使用来自图像采集卡源的 libav 编码 H.264 视频时出错

Question

我几天来一直在努力解决实际问题。我进行了数小时的互联网搜索、ChatGPT 会话、代码审查等，但我没有让它按预期工作。

基础：Windows 11 Pro x64，自己的 LGPL 版本的 libav 和 libmfx，x64 C++ VC 程序。

以下情况：我从帧采集卡（以下示例中的 YUAN SDI 2K）接收帧，并希望使用硬件 Intel QS H.264 编码器对它们进行编码。到目前为止效果很好。主要加工步骤为：

从图像采集卡接收YUV420P像素格式的帧
将帧缩放为 QSV 编码器使用的 NV12 格式
使用 Intel QSV 编码器对此帧进行编码
将这些帧写入/混合到 mp4 文件中

到目前为止，这有效，但我在 PTS 和 DTS 时间戳方面遇到了困难。我尝试了 ChatGPT 的很多提示组合，但我没有让它真正发挥作用，从而生成有效的视频。我要么收到错误，PTS/DTS 不是单调递增，根本没有改变，PTS 和 DTS 具有完全不同的值等等。有一次我得到一个视频，其中时基似乎很好，libav 没有报告任何错误，但视频中帧的顺序不正确。

这里是实际示例（原型，没有适当的清理，没有冲洗等）代码：

#define __STDC_LIMIT_MACROS

#include <cstdio>
#include <cstdint>
#include <Windows.h>

using namespace _DSHOWLIB_NAMESPACE;

#ifdef _WIN32
//Windows
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/hwcontext_qsv.h"
};
#endif
#include <iostream>

void uSleep(double waitTimeInUs, LARGE_INTEGER frequency)
{
    LARGE_INTEGER startTime, currentTime;

    QueryPerformanceCounter(&startTime);

    if (waitTimeInUs > 16500.0)
        Sleep(1);

    do
    {
        YieldProcessor();
        //Sleep(0);
        QueryPerformanceCounter(&currentTime);
    } while (waitTimeInUs > (currentTime.QuadPart - startTime.QuadPart) * 1000000.0 / frequency.QuadPart);
}

void check_error(int ret)
{
    if (ret < 0) {
        char errbuf[128];
        int tmp = errno;
        av_strerror(ret, errbuf, sizeof(errbuf));
        std::cerr << "Error: " << errbuf << '\n';
        exit(1);
    }
}

bool _isRunning = true;

BOOL WINAPI consoleHandler(DWORD signal)
{
    if (signal == CTRL_C_EVENT)
    {
        _isRunning = false;
    }

    return TRUE;
}

int main(int argc, char* argv[])
{
    if (!SetConsoleCtrlHandler(consoleHandler, TRUE)) 
    {
        std::cerr << "Could not set control handler!" << '\n';
        return 1;
    }

    unsigned int videoIndex = 0;
    
    avdevice_register_all();

    av_log_set_level(AV_LOG_TRACE);

    const AVInputFormat * pFrameGrabberInputFormat = av_find_input_format("dshow");

    constexpr int frameGrabberPixelWidth = 1920;
    constexpr int frameGrabberPixelHeight = 1080;
    constexpr int frameGrabberFrameRate = 25;

    char shortStringBuffer[32];

    AVDictionary* pFrameGrabberOptions = nullptr;

    _snprintf_s(shortStringBuffer, sizeof(shortStringBuffer), "%dx%d", frameGrabberPixelWidth, frameGrabberPixelHeight);
    av_dict_set(&pFrameGrabberOptions, "video_size", shortStringBuffer, 0);

    _snprintf_s(shortStringBuffer, sizeof(shortStringBuffer), "%d", frameGrabberFrameRate);

    av_dict_set(&pFrameGrabberOptions, "framerate", shortStringBuffer, 0);
    av_dict_set(&pFrameGrabberOptions, "pixel_format", "yuv420p", 0);

    AVFormatContext* pFrameGrabberFormatContext = avformat_alloc_context();

    pFrameGrabberFormatContext->flags = AVFMT_FLAG_NOBUFFER | AVFMT_FLAG_FLUSH_PACKETS;

    if(avformat_open_input(&pFrameGrabberFormatContext, "video=MZ0380 PCI, Analog 01 Capture", pFrameGrabberInputFormat, &pFrameGrabberOptions) != 0)
    {
        std::cerr << "Couldn't open input stream." << '\n';
        return -1;
    }

    if(avformat_find_stream_info(pFrameGrabberFormatContext, nullptr) < 0)
    {
        std::cerr << "Couldn't find stream information." << '\n';
        return -1;
    }

    bool foundVideoStream = false;

    for(unsigned int loop_videoIndex = 0; loop_videoIndex < pFrameGrabberFormatContext->nb_streams; loop_videoIndex++)
    {
        if(pFrameGrabberFormatContext->streams[loop_videoIndex]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            videoIndex = loop_videoIndex;
            foundVideoStream = true;
            break;
        }
    }

    if(!foundVideoStream)
    {
        std::cerr << "Couldn't find a video stream." << '\n';
        return -1;
    }

    const AVCodec* pFrameGrabberCodec = avcodec_find_decoder(pFrameGrabberFormatContext->streams[videoIndex]->codecpar->codec_id);

    AVCodecContext* pFrameGrabberCodecContext = avcodec_alloc_context3(pFrameGrabberCodec);

    if(pFrameGrabberCodec == nullptr)
    {
        std::cerr << "Codec not found." << '\n';
        return -1;
    }

    pFrameGrabberCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
    pFrameGrabberCodecContext->width = frameGrabberPixelWidth;
    pFrameGrabberCodecContext->height = frameGrabberPixelHeight;

    int ret = avcodec_open2(pFrameGrabberCodecContext, pFrameGrabberCodec, nullptr);

    if(ret < 0)
    {
        std::cerr << "Could not open pVideoCodec." << '\n';
        return -1;
    }

    const char* outputFilePath = "c:\\temp\\output.mp4";
    constexpr int outputWidth = frameGrabberPixelWidth;
    constexpr int outputHeight = frameGrabberPixelHeight;
    constexpr int outputFrameRate = frameGrabberFrameRate;

    SwsContext* img_convert_ctx = sws_getContext(frameGrabberPixelWidth, frameGrabberPixelHeight, AV_PIX_FMT_YUV420P, outputWidth, outputHeight, AV_PIX_FMT_NV12, SWS_BICUBIC, nullptr, nullptr, nullptr);

    constexpr double frameTimeinUs = 1000000.0 / frameGrabberFrameRate;

    LARGE_INTEGER frequency;
    LARGE_INTEGER lastTime, currentTime;

    QueryPerformanceFrequency(&frequency);
    QueryPerformanceCounter(&lastTime);

    const AVCodec* pVideoCodec = avcodec_find_encoder_by_name("h264_qsv");

    if (!pVideoCodec)
    {
        std::cerr << "Codec not found" << '\n';
        return 1;
    }

    AVCodecContext* pVideoCodecContext = avcodec_alloc_context3(pVideoCodec);

    if (!pVideoCodecContext)
    {
        std::cerr << "Could not allocate video pVideoCodec context" << '\n';
        return 1;
    }

    AVBufferRef* pHardwareDeviceContextRef = nullptr;

    ret = av_hwdevice_ctx_create(&pHardwareDeviceContextRef, AV_HWDEVICE_TYPE_QSV, nullptr, nullptr, 0);

    pVideoCodecContext->bit_rate = static_cast<int64_t>(outputWidth * outputHeight) * 2;
    pVideoCodecContext->width = outputWidth;
    pVideoCodecContext->height = outputHeight;
    pVideoCodecContext->framerate = { outputFrameRate, 1 };
    pVideoCodecContext->time_base = { 1, outputFrameRate };
    pVideoCodecContext->pix_fmt = AV_PIX_FMT_QSV;
    pVideoCodecContext->gop_size = 10;  
    pVideoCodecContext->max_b_frames = 2;

    AVBufferRef* pHardwareFramesContextRef = av_hwframe_ctx_alloc(pHardwareDeviceContextRef);

    AVHWFramesContext* pHardwareFramesContext = reinterpret_cast<AVHWFramesContext*>(pHardwareFramesContextRef->data);

    pHardwareFramesContext->format = AV_PIX_FMT_QSV;
    pHardwareFramesContext->sw_format = AV_PIX_FMT_NV12;
    pHardwareFramesContext->width = outputWidth;
    pHardwareFramesContext->height = outputHeight;
    pHardwareFramesContext->initial_pool_size = 32;

    ret = av_hwframe_ctx_init(pHardwareFramesContextRef);

    pVideoCodecContext->hw_frames_ctx = av_buffer_ref(pHardwareFramesContextRef);
    pVideoCodecContext->hw_device_ctx = av_buffer_ref(pHardwareDeviceContextRef);

    ret = avcodec_open2(pVideoCodecContext, pVideoCodec, nullptr);//&pVideoOptionsDict);
    check_error(ret);

    AVFormatContext* pVideoFormatContext = nullptr;

    avformat_alloc_output_context2(&pVideoFormatContext, nullptr, nullptr, outputFilePath);

    if (!pVideoFormatContext)
    {
        std::cerr << "Could not create output context" << '\n';
        return 1;
    }

    const AVOutputFormat* pVideoOutputFormat = pVideoFormatContext->oformat;

    if (pVideoFormatContext->oformat->flags & AVFMT_GLOBALHEADER)
    {
        pVideoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

    AVStream* pVideoStream = avformat_new_stream(pVideoFormatContext, pVideoCodec);

    if (!pVideoStream)
    {
        std::cerr << "Could not allocate stream" << '\n';
        return 1;
    }

    pVideoStream->time_base = pVideoCodecContext->time_base;

    ret = avcodec_parameters_from_context(pVideoStream->codecpar, pVideoCodecContext);

    check_error(ret);

    if (!(pVideoOutputFormat->flags & AVFMT_NOFILE)) 
    {
        ret = avio_open(&pVideoFormatContext->pb, outputFilePath, AVIO_FLAG_WRITE);
        check_error(ret);
    }

    pVideoFormatContext->flags |= AVFMT_FLAG_GENPTS & AVFMT_FLAG_IGNDTS;

    ret = avformat_write_header(pVideoFormatContext, nullptr);

    check_error(ret);

    AVFrame* pHardwareFrame = av_frame_alloc();

    if (av_hwframe_get_buffer(pVideoCodecContext->hw_frames_ctx, pHardwareFrame, 0) < 0)
    {
        std::cerr << "Error allocating a hw frame" << '\n';
        return -1;
    }

    AVFrame* pFrameGrabberFrame = av_frame_alloc();
    AVPacket* pFrameGrabberPacket = av_packet_alloc();

    AVPacket* pVideoPacket = av_packet_alloc();
    AVFrame* pVideoFrame = av_frame_alloc();

    while (_isRunning)
    {
        if (av_read_frame(pFrameGrabberFormatContext, pFrameGrabberPacket) == 0)
        {
            if (pFrameGrabberPacket->stream_index == videoIndex)
            {
                ret = avcodec_send_packet(pFrameGrabberCodecContext, pFrameGrabberPacket);

                if (ret < 0)
                {
                    std::cerr << "Error sending a packet for decoding!" << '\n';
                    return -1;
                }

                ret = avcodec_receive_frame(pFrameGrabberCodecContext, pFrameGrabberFrame);

                if (ret != 0)
                {
                    std::cerr << "Receiving frame failed!" << '\n';
                    return -1;
                }

                if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF))
                {
                    std::cout << "End of stream detected. Exiting now." << '\n';
                    return 0;
                }

                if (ret != 0)
                {
                    std::cerr << "Decode Error!" << '\n';
                    return -1;
                }

                QueryPerformanceCounter(&currentTime);

                const double elapsedTime = (currentTime.QuadPart - lastTime.QuadPart) * 1000000.0 / frequency.QuadPart;

                if (elapsedTime > 0.0 && elapsedTime < frameTimeinUs)
                {
                    uSleep(frameTimeinUs - elapsedTime, frequency);
                }

                ret = sws_scale_frame(img_convert_ctx, pVideoFrame, pFrameGrabberFrame);

                if (ret < 0)
                {
                    std::cerr << "Scaling frame for Intel QS Encoder did fail!" << '\n';
                    return -1;
                }

                if (av_hwframe_transfer_data(pHardwareFrame, pVideoFrame, 0) < 0) 
                {
                    std::cerr << "Error transferring frame data to hw frame!" << '\n';
                    return -1;
                }

                av_packet_unref(pVideoPacket);

                pHardwareFrame->pts = av_rescale_q(pHardwareFrame->pts, { 1, outputFrameRate }, pVideoCodecContext->time_base);

                ret = avcodec_send_frame(pVideoCodecContext, pHardwareFrame);

                if (ret < 0)
                {
                    std::cerr << "Error sending a frame for encoding" << '\n';
                    check_error(ret);
                }

                while (ret >= 0) 
                {
                    ret = avcodec_receive_packet(pVideoCodecContext, pVideoPacket);

                    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                    {
                        break;
                    }

                    if (ret < 0) 
                    {
                        std::cerr << "Error during encoding" << '\n';
                        return 1;
                    }

                    pVideoPacket->stream_index = 0;

                    av_packet_rescale_ts(pVideoPacket, pVideoCodecContext->time_base, pVideoFormatContext->streams[0]->time_base);

                    ret = av_interleaved_write_frame(pVideoFormatContext, pVideoPacket);

                    check_error(ret);

                    av_packet_unref(pVideoPacket);
                }

                av_packet_unref(pFrameGrabberPacket);

                QueryPerformanceCounter(&lastTime);
            }
        }
    }   

    av_write_trailer(pVideoFormatContext);
    av_buffer_unref(&pHardwareDeviceContextRef);
    avcodec_free_context(&pVideoCodecContext);
    avio_closep(&pVideoFormatContext->pb);
    avformat_free_context(pVideoFormatContext);
    av_packet_free(&pVideoPacket);

    avcodec_free_context(&pFrameGrabberCodecContext);
    av_frame_free(&pFrameGrabberFrame);
    av_packet_free(&pFrameGrabberPacket);
    avformat_close_input(&pFrameGrabberFormatContext);

    return 0;
}

Answer 1

我找到了解决问题的方法。

QSV编码器似乎不支持B-Frame。因此，如果将

max_b_frames

设置为 0，它将按预期工作并且帧顺序是正确的。

使用来自图像采集卡源的 libav 编码 H.264 视频时出错

问题描述投票：0回答：1

1个回答

最新问题

使用来自图像采集卡源的 libav 编码 H.264 视频时出错

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1