如何在Python中实时渲染两个带有alpha通道的视频混合?

问题描述 投票:0回答:1

我需要在Python中实时播放两个视频。 一个视频是没有 Alpha 通道的背景视频。我使用的是 H264,但它可以是任何编解码器。 第二个视频是叠加视频。该带有 Alpha 通道的视频需要在第一个视频之上实时播放。我使用带有 Alpha 通道的 Quicktime 444,但它可以是任何编解码器。

在库方面,我尝试了cv和numpy的组合,尝试了pymovie、pyAV、ffmpeg……到目前为止,结果都没有成功。当视频渲染时,帧速率远低于 30FPS,并且生成的流出现故障。

我还尝试在没有 Alpha 通道的情况下渲染视频并实时执行绿屏色度键控。不用说,甚至更糟。

我可以使用什么解决方案?

这是我尝试使用 ffmpeg 的代码

import ffmpeg
import cv2
import numpy as np

def decode_video_stream(video_path, pix_fmt, width, height, fps):
    process = (
        ffmpeg
        .input(video_path)
        .output('pipe:', format='rawvideo', pix_fmt=pix_fmt, s=f'{width}x{height}', r=fps)
        .run_async(pipe_stdout=True, pipe_stderr=True)
    )
    return process

def read_frame(process, width, height, channels):
    frame_size = width * height * channels
    raw_frame = process.stdout.read(frame_size)
    if not raw_frame:
        return None
    frame = np.frombuffer(raw_frame, np.uint8).reshape((height, width, channels))
    return frame

def play_videos_with_alpha(base_video_path, alpha_video_path, resolution=(1280, 720), fps=30):
    width, height = resolution
    frame_time = int(1000 / fps)  # Frame time in milliseconds

    # Initialize FFmpeg decoding processes
    base_process = decode_video_stream(base_video_path, 'rgb24', width, height, fps)
    alpha_process = decode_video_stream(alpha_video_path, 'rgba', width, height, fps)

    cv2.namedWindow("Blended Video", cv2.WINDOW_NORMAL)

    try:
        while True:
            # Read frames
            base_frame = read_frame(base_process, width, height, channels=3)
            alpha_frame = read_frame(alpha_process, width, height, channels=4)

            # Restart processes if end of video is reached
            if base_frame is None:
                base_process.stdout.close()
                base_process = decode_video_stream(base_video_path, 'rgb24', width, height, fps)
                base_frame = read_frame(base_process, width, height, channels=3)

            if alpha_frame is None:
                alpha_process.stdout.close()
                alpha_process = decode_video_stream(alpha_video_path, 'rgba', width, height, fps)
                alpha_frame = read_frame(alpha_process, width, height, channels=4)

            # Separate RGB and alpha channels from alpha video
            rgb_image = cv2.cvtColor(alpha_frame[:, :, :3], cv2.COLOR_RGB2BGR)
            alpha_channel = alpha_frame[:, :, 3] / 255.0  # Normalize alpha

            # Convert base frame to BGR format for blending
            base_image = cv2.cvtColor(base_frame, cv2.COLOR_RGB2BGR)

            # Blend the images
            blended_image = (base_image * (1 - alpha_channel[:, :, None]) + rgb_image * alpha_channel[:, :, None]).astype(np.uint8)

            # Display the result
            cv2.imshow("Blended Video", blended_image)

            if cv2.waitKey(frame_time) & 0xFF == ord('q'):
                break

    except Exception as e:
        print("Error during playback:", e)

    finally:
        # Clean up
        base_process.stdout.close()
        alpha_process.stdout.close()
        cv2.destroyAllWindows()

base_video_path = "test.mp4"  # Background video
alpha_video_path = "test.mov"  # Overlay video
play_videos_with_alpha(base_video_path, alpha_video_path, resolution=(1280, 720), fps=30)

这是迄今为止丢帧较少的版本。我一直在考虑线程或使用 CUDA,但理想情况下我想要一些可以在任何机器上运行的东西。在不减小帧大小 (1920 x 1080) 且不预渲染混合和导出预混合文件的情况下,计算量最少的操作是什么?有办法吗?也许我的理解全错了。我感到失落。请帮忙。谢谢。

python user-interface ffmpeg graphics alpha-transparency
1个回答
0
投票
You can use PIL except Alpha 
# Importing libraries
import os
import cv2
from PIL import Image

# Set path to the Google Drive for enter code here with images
path = "/content/drive/My Drive/Images"
os.chdir(path)

mean.height = 0
mean.width = 0

# Counting the number of images in the directory
num_of_images = len([file for file in os.listdir('.') if file.endswith((".jpg", ".jpeg", ".png"))])
print("Number of Images:", num_of_images)

# Calculating the mean width and height of all images
for file in os.listdir('.'):
    if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith("png"):
        im = Image.open(os.path.join(path, file))
        width, height = im.size
        mean_width += width
        mean_height += height

# Averaging width and height
mean_width = int(mean_width / num_of_images)
mean_height = int(mean_height / num_of_images)

# Resizing all images to the mean width and height
for file in os.listdir('.'):
    if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith("png"):
        im = Image.open(os.path.join(path, file))
        # Use Image.LANCZOS instead of Image.ANTIALIAS for downsampling
        im_resized = im.resize((mean_width, mean_height), Image.LANCZOS)
        im_resized.save(file, 'JPEG', quality=95)
        print(f"{file} is resized")


# Function to generate video
def generate_video():
    image_folder = path
    video_name = 'mygeneratedvideo.avi'

    images = [img for img in os.listdir(image_folder) if img.endswith((".jpg", ".jpeg", ".png"))]
    print("Images:", images)

    # Set frame from the first image
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    # Video writer to create .avi file
    video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'DIVX'), 1, (width, height))

    # Appending images to video
    for image in images:
        video`enter code here`.write(cv2.imread(os.path.join(image_folder, image)))

    # Release the video file
    video.release()
    cv2.destroyAllWindows()
    print("Video generated successfully enter code here fully!")

# Calling the function enter code here` to generate the video
© www.soinside.com 2019 - 2024. All rights reserved.