我几天来一直在努力解决实际问题。我进行了数小时的互联网搜索、ChatGPT 会话、代码审查等,但我没有让它按预期工作。
基础:Windows 11 Pro x64,自己的 LGPL 版本的 libav 和 libmfx,x64 C++ VC 程序。
以下情况:我从帧采集卡(以下示例中的 YUAN SDI 2K)接收帧,并希望使用硬件 Intel QS H.264 编码器对它们进行编码。到目前为止效果很好。主要加工步骤为:
到目前为止,这有效,但我在 PTS 和 DTS 时间戳方面遇到了困难。我尝试了 ChatGPT 的很多提示组合,但我没有让它真正发挥作用,从而生成有效的视频。我要么收到错误,PTS/DTS 不是单调递增,根本没有改变,PTS 和 DTS 具有完全不同的值等等。有一次我得到一个视频,其中时基似乎很好,libav 没有报告任何错误,但视频中帧的顺序不正确。
这里是实际示例(原型,没有适当的清理,没有冲洗等)代码:
#define __STDC_LIMIT_MACROS
#include <cstdio>
#include <cstdint>
#include <Windows.h>
using namespace _DSHOWLIB_NAMESPACE;
#ifdef _WIN32
//Windows
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/hwcontext_qsv.h"
};
#endif
#include <iostream>
void uSleep(double waitTimeInUs, LARGE_INTEGER frequency)
{
LARGE_INTEGER startTime, currentTime;
QueryPerformanceCounter(&startTime);
if (waitTimeInUs > 16500.0)
Sleep(1);
do
{
YieldProcessor();
//Sleep(0);
QueryPerformanceCounter(¤tTime);
} while (waitTimeInUs > (currentTime.QuadPart - startTime.QuadPart) * 1000000.0 / frequency.QuadPart);
}
void check_error(int ret)
{
if (ret < 0) {
char errbuf[128];
int tmp = errno;
av_strerror(ret, errbuf, sizeof(errbuf));
std::cerr << "Error: " << errbuf << '\n';
exit(1);
}
}
bool _isRunning = true;
BOOL WINAPI consoleHandler(DWORD signal)
{
if (signal == CTRL_C_EVENT)
{
_isRunning = false;
}
return TRUE;
}
int main(int argc, char* argv[])
{
if (!SetConsoleCtrlHandler(consoleHandler, TRUE))
{
std::cerr << "Could not set control handler!" << '\n';
return 1;
}
unsigned int videoIndex = 0;
avdevice_register_all();
av_log_set_level(AV_LOG_TRACE);
const AVInputFormat * pFrameGrabberInputFormat = av_find_input_format("dshow");
constexpr int frameGrabberPixelWidth = 1920;
constexpr int frameGrabberPixelHeight = 1080;
constexpr int frameGrabberFrameRate = 25;
char shortStringBuffer[32];
AVDictionary* pFrameGrabberOptions = nullptr;
_snprintf_s(shortStringBuffer, sizeof(shortStringBuffer), "%dx%d", frameGrabberPixelWidth, frameGrabberPixelHeight);
av_dict_set(&pFrameGrabberOptions, "video_size", shortStringBuffer, 0);
_snprintf_s(shortStringBuffer, sizeof(shortStringBuffer), "%d", frameGrabberFrameRate);
av_dict_set(&pFrameGrabberOptions, "framerate", shortStringBuffer, 0);
av_dict_set(&pFrameGrabberOptions, "pixel_format", "yuv420p", 0);
AVFormatContext* pFrameGrabberFormatContext = avformat_alloc_context();
pFrameGrabberFormatContext->flags = AVFMT_FLAG_NOBUFFER | AVFMT_FLAG_FLUSH_PACKETS;
if(avformat_open_input(&pFrameGrabberFormatContext, "video=MZ0380 PCI, Analog 01 Capture", pFrameGrabberInputFormat, &pFrameGrabberOptions) != 0)
{
std::cerr << "Couldn't open input stream." << '\n';
return -1;
}
if(avformat_find_stream_info(pFrameGrabberFormatContext, nullptr) < 0)
{
std::cerr << "Couldn't find stream information." << '\n';
return -1;
}
bool foundVideoStream = false;
for(unsigned int loop_videoIndex = 0; loop_videoIndex < pFrameGrabberFormatContext->nb_streams; loop_videoIndex++)
{
if(pFrameGrabberFormatContext->streams[loop_videoIndex]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoIndex = loop_videoIndex;
foundVideoStream = true;
break;
}
}
if(!foundVideoStream)
{
std::cerr << "Couldn't find a video stream." << '\n';
return -1;
}
const AVCodec* pFrameGrabberCodec = avcodec_find_decoder(pFrameGrabberFormatContext->streams[videoIndex]->codecpar->codec_id);
AVCodecContext* pFrameGrabberCodecContext = avcodec_alloc_context3(pFrameGrabberCodec);
if(pFrameGrabberCodec == nullptr)
{
std::cerr << "Codec not found." << '\n';
return -1;
}
pFrameGrabberCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
pFrameGrabberCodecContext->width = frameGrabberPixelWidth;
pFrameGrabberCodecContext->height = frameGrabberPixelHeight;
int ret = avcodec_open2(pFrameGrabberCodecContext, pFrameGrabberCodec, nullptr);
if(ret < 0)
{
std::cerr << "Could not open pVideoCodec." << '\n';
return -1;
}
const char* outputFilePath = "c:\\temp\\output.mp4";
constexpr int outputWidth = frameGrabberPixelWidth;
constexpr int outputHeight = frameGrabberPixelHeight;
constexpr int outputFrameRate = frameGrabberFrameRate;
SwsContext* img_convert_ctx = sws_getContext(frameGrabberPixelWidth, frameGrabberPixelHeight, AV_PIX_FMT_YUV420P, outputWidth, outputHeight, AV_PIX_FMT_NV12, SWS_BICUBIC, nullptr, nullptr, nullptr);
constexpr double frameTimeinUs = 1000000.0 / frameGrabberFrameRate;
LARGE_INTEGER frequency;
LARGE_INTEGER lastTime, currentTime;
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&lastTime);
const AVCodec* pVideoCodec = avcodec_find_encoder_by_name("h264_qsv");
if (!pVideoCodec)
{
std::cerr << "Codec not found" << '\n';
return 1;
}
AVCodecContext* pVideoCodecContext = avcodec_alloc_context3(pVideoCodec);
if (!pVideoCodecContext)
{
std::cerr << "Could not allocate video pVideoCodec context" << '\n';
return 1;
}
AVBufferRef* pHardwareDeviceContextRef = nullptr;
ret = av_hwdevice_ctx_create(&pHardwareDeviceContextRef, AV_HWDEVICE_TYPE_QSV, nullptr, nullptr, 0);
pVideoCodecContext->bit_rate = static_cast<int64_t>(outputWidth * outputHeight) * 2;
pVideoCodecContext->width = outputWidth;
pVideoCodecContext->height = outputHeight;
pVideoCodecContext->framerate = { outputFrameRate, 1 };
pVideoCodecContext->time_base = { 1, outputFrameRate };
pVideoCodecContext->pix_fmt = AV_PIX_FMT_QSV;
pVideoCodecContext->gop_size = 10;
pVideoCodecContext->max_b_frames = 2;
AVBufferRef* pHardwareFramesContextRef = av_hwframe_ctx_alloc(pHardwareDeviceContextRef);
AVHWFramesContext* pHardwareFramesContext = reinterpret_cast<AVHWFramesContext*>(pHardwareFramesContextRef->data);
pHardwareFramesContext->format = AV_PIX_FMT_QSV;
pHardwareFramesContext->sw_format = AV_PIX_FMT_NV12;
pHardwareFramesContext->width = outputWidth;
pHardwareFramesContext->height = outputHeight;
pHardwareFramesContext->initial_pool_size = 32;
ret = av_hwframe_ctx_init(pHardwareFramesContextRef);
pVideoCodecContext->hw_frames_ctx = av_buffer_ref(pHardwareFramesContextRef);
pVideoCodecContext->hw_device_ctx = av_buffer_ref(pHardwareDeviceContextRef);
ret = avcodec_open2(pVideoCodecContext, pVideoCodec, nullptr);//&pVideoOptionsDict);
check_error(ret);
AVFormatContext* pVideoFormatContext = nullptr;
avformat_alloc_output_context2(&pVideoFormatContext, nullptr, nullptr, outputFilePath);
if (!pVideoFormatContext)
{
std::cerr << "Could not create output context" << '\n';
return 1;
}
const AVOutputFormat* pVideoOutputFormat = pVideoFormatContext->oformat;
if (pVideoFormatContext->oformat->flags & AVFMT_GLOBALHEADER)
{
pVideoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
AVStream* pVideoStream = avformat_new_stream(pVideoFormatContext, pVideoCodec);
if (!pVideoStream)
{
std::cerr << "Could not allocate stream" << '\n';
return 1;
}
pVideoStream->time_base = pVideoCodecContext->time_base;
ret = avcodec_parameters_from_context(pVideoStream->codecpar, pVideoCodecContext);
check_error(ret);
if (!(pVideoOutputFormat->flags & AVFMT_NOFILE))
{
ret = avio_open(&pVideoFormatContext->pb, outputFilePath, AVIO_FLAG_WRITE);
check_error(ret);
}
pVideoFormatContext->flags |= AVFMT_FLAG_GENPTS & AVFMT_FLAG_IGNDTS;
ret = avformat_write_header(pVideoFormatContext, nullptr);
check_error(ret);
AVFrame* pHardwareFrame = av_frame_alloc();
if (av_hwframe_get_buffer(pVideoCodecContext->hw_frames_ctx, pHardwareFrame, 0) < 0)
{
std::cerr << "Error allocating a hw frame" << '\n';
return -1;
}
AVFrame* pFrameGrabberFrame = av_frame_alloc();
AVPacket* pFrameGrabberPacket = av_packet_alloc();
AVPacket* pVideoPacket = av_packet_alloc();
AVFrame* pVideoFrame = av_frame_alloc();
while (_isRunning)
{
if (av_read_frame(pFrameGrabberFormatContext, pFrameGrabberPacket) == 0)
{
if (pFrameGrabberPacket->stream_index == videoIndex)
{
ret = avcodec_send_packet(pFrameGrabberCodecContext, pFrameGrabberPacket);
if (ret < 0)
{
std::cerr << "Error sending a packet for decoding!" << '\n';
return -1;
}
ret = avcodec_receive_frame(pFrameGrabberCodecContext, pFrameGrabberFrame);
if (ret != 0)
{
std::cerr << "Receiving frame failed!" << '\n';
return -1;
}
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF))
{
std::cout << "End of stream detected. Exiting now." << '\n';
return 0;
}
if (ret != 0)
{
std::cerr << "Decode Error!" << '\n';
return -1;
}
QueryPerformanceCounter(¤tTime);
const double elapsedTime = (currentTime.QuadPart - lastTime.QuadPart) * 1000000.0 / frequency.QuadPart;
if (elapsedTime > 0.0 && elapsedTime < frameTimeinUs)
{
uSleep(frameTimeinUs - elapsedTime, frequency);
}
ret = sws_scale_frame(img_convert_ctx, pVideoFrame, pFrameGrabberFrame);
if (ret < 0)
{
std::cerr << "Scaling frame for Intel QS Encoder did fail!" << '\n';
return -1;
}
if (av_hwframe_transfer_data(pHardwareFrame, pVideoFrame, 0) < 0)
{
std::cerr << "Error transferring frame data to hw frame!" << '\n';
return -1;
}
av_packet_unref(pVideoPacket);
pHardwareFrame->pts = av_rescale_q(pHardwareFrame->pts, { 1, outputFrameRate }, pVideoCodecContext->time_base);
ret = avcodec_send_frame(pVideoCodecContext, pHardwareFrame);
if (ret < 0)
{
std::cerr << "Error sending a frame for encoding" << '\n';
check_error(ret);
}
while (ret >= 0)
{
ret = avcodec_receive_packet(pVideoCodecContext, pVideoPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
break;
}
if (ret < 0)
{
std::cerr << "Error during encoding" << '\n';
return 1;
}
pVideoPacket->stream_index = 0;
av_packet_rescale_ts(pVideoPacket, pVideoCodecContext->time_base, pVideoFormatContext->streams[0]->time_base);
ret = av_interleaved_write_frame(pVideoFormatContext, pVideoPacket);
check_error(ret);
av_packet_unref(pVideoPacket);
}
av_packet_unref(pFrameGrabberPacket);
QueryPerformanceCounter(&lastTime);
}
}
}
av_write_trailer(pVideoFormatContext);
av_buffer_unref(&pHardwareDeviceContextRef);
avcodec_free_context(&pVideoCodecContext);
avio_closep(&pVideoFormatContext->pb);
avformat_free_context(pVideoFormatContext);
av_packet_free(&pVideoPacket);
avcodec_free_context(&pFrameGrabberCodecContext);
av_frame_free(&pFrameGrabberFrame);
av_packet_free(&pFrameGrabberPacket);
avformat_close_input(&pFrameGrabberFormatContext);
return 0;
}
我找到了解决问题的方法。
QSV编码器似乎不支持B-Frame。因此,如果将
max_b_frames
设置为 0,它将按预期工作并且帧顺序是正确的。