在 Python 中使用 MFDNet 进行视频逐帧去雨

Question

正如这个 CodeReview 问题提到的，我正在尝试修改代码以处理视频中逐帧雨条纹的去除。此代码中使用了 FFmpeg 包。

import argparse
import os
import time

import cv2
import ffmpeg
import numpy as np
import torch
from skimage import img_as_ubyte
from torch.utils.data import DataLoader
from tqdm import tqdm

import utils
from data_RGB import get_test_data
from MFDNet import HPCNet as mfdnet


def process_video_frame_by_frame(input_file, output_file, model_restoration):
    """
    Decodes a video frame by frame, processes each frame,
    and re-encodes to a new video.

    Args:
        input_file: Path to the input video file.
        output_file: Path to the output video file.
    """
    try:
        # Probe for video information
        probe = ffmpeg.probe(input_file)
        video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
        width = int(video_stream['width'])
        height = int(video_stream['height'])

        # Input
        process1 = (
            ffmpeg
            .input(input_file)
            .output('pipe:', format='rawvideo', pix_fmt='rgb24')
            .run_async(pipe_stdout=True)
        )

        # Output
        process2 = (
            ffmpeg
            .input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height))
            .output(output_file, vcodec='libx264', pix_fmt='yuv420p')
            .overwrite_output()
            .run_async(pipe_stdin=True)
        )

        # Process frame (deraining processing)
        while in_bytes := process1.stdout.read(width * height * 3):
            in_frame = torch.frombuffer(in_bytes, dtype=torch.uint8).float().reshape((1, 3, width, height))
            restored = model_restoration(torch.div(in_frame, 255).to(device='cuda'))
            restored = torch.clamp(restored[0], 0, 1)
            restored = restored.cpu().detach().numpy()
            restored *= 255
            out_frame = restored
            np.reshape(out_frame, (3, width, height))

            # Encode and write the frame
            process2.stdin.write(
                out_frame
                .astype(np.uint8)
                .tobytes()
            )
            
        # Close streams
        process1.stdout.close()
        process2.stdin.close()
        process1.wait()
        process2.wait()

    except ffmpeg.Error as e:
        print('stdout:', e.stdout.decode('utf8'))
        print('stderr:', e.stderr.decode('utf8'))

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Image Deraining using MPRNet')

    parser.add_argument('--weights', default='./checkpoints/checkpoints_mfd.pth', type=str,
                        help='Path to weights')
    parser.add_argument('--gpus', default='0', type=str, help='CUDA_VISIBLE_DEVICES')

    args = parser.parse_args()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    model_restoration = mfdnet()
    utils.load_checkpoint(model_restoration, args.weights)
    print("===>Testing using weights: ", args.weights)

    model_restoration.eval().cuda()
    
    input_video = "Input_video.mp4"
    output_video = 'output_video.mp4'

    process_video_frame_by_frame(input_video, output_video, model_restoration)

让我们重点关注

while

循环部分：

上面代码片段的版本可以正确执行。在下一步中，我尝试按照301_Moved_Permanently的答案来使用

torch.save

。因此，

while

循环的内容如下：

        # Process frame (deraining processing)
        while in_bytes := process1.stdout.read(width * height * 3):
            in_frame = torch.frombuffer(in_bytes, dtype=torch.uint8).float().reshape((1, 3, width, height))
            restored = model_restoration(torch.div(in_frame, 255).to(device='cuda'))
            restored = torch.clamp(restored[0], 0, 1)
            out_frame = torch.mul(restored.cpu().detach(), 255).reshape(3, width, height).byte()
            torch.save(out_frame, process2.stdin)

发生内存不足错误，并显示以下消息：

torch.OutOfMemoryError：CUDA 内存不足。尝试分配 676.00 MiB。 GPU 0 的总容量为 23.99 GiB，其中 0 字节可用。在分配的内存中，84.09 GiB 由 PyTorch 分配，1.21 GiB 由 PyTorch 保留但未分配。

为了诊断错误，我删除了最后两行代码：

        # Process frame (deraining processing)
        while in_bytes := process1.stdout.read(width * height * 3):
            in_frame = torch.frombuffer(in_bytes, dtype=torch.uint8).float().reshape((1, 3, width, height))
            restored = model_restoration(torch.div(in_frame, 255).to(device='cuda'))
            restored = torch.clamp(restored[0], 0, 1)

内存不足错误仍然发生。这对我来说很奇怪。我对可执行版本代码的理解，

restored = restored.cpu().detach().numpy()

行是将GPU内存中的

restored

数据传输到主存，然后转换为numpy格式。为什么我删除这行代码然后发生内存不足错误？

我使用的硬件和软件规格如下：

CPU：第 12 代 Intel(R) Core(TM) i9-12900K 3.20 GHz
RAM：128 GB（128 GB 可用）
显卡：NVIDIA GeForce RTX 4090
操作系统：Windows 11 Pro 22H2，操作系统内部版本：22621.4317

Pytorch 版本：

> python -c "import torch; print(torch.__version__)"
2.5.0+cu124

Answer 1

如果没有有关模型实现的其他信息，代码片段中唯一可能导致内存问题的内容与迭代中的张量累积有关。你有一个非常好的配置，所以我认为轻量级模型不会难以逐帧处理。

另外，尝试使用

torch.no_grad()

：你正在做推理，不需要存储不必要的梯度操作。

while in_bytes := process1.stdout.read(width * height * 3):
    with torch.no_grad():  
        in_frame = torch.frombuffer(in_bytes, dtype=torch.uint8).float().reshape((1, 3, width, height))
        in_frame_gpu = torch.div(in_frame, 255).to(device='cuda')
            
        restored = model_restoration(in_frame_gpu)
        restored = torch.clamp(restored[0], 0, 1)
            
        out_frame = (restored.cpu() * 255).byte().numpy()
    
        # Clear cache and del intermediate vars
        torch.cuda.empty_cache()
        del in_frame_gpu
        del restored

        process2.stdin.write(out_frame.tobytes())

在 Python 中使用 MFDNet 进行视频逐帧去雨

问题描述投票：0回答：1

1个回答

最新问题

在 Python 中使用 MFDNet 进行视频逐帧去雨

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1