我一直在尝试使用 v4l 和 c 用 elgato 脸部摄像头捕捉图像。由于缺乏示例教程,我使用了人工智能聊天机器人生成的代码以及我能找到的几种相关示例中的修改代码,然后尝试熟悉代码并使其发挥作用。现在有一些问题,但我目前正在尝试解决的主要问题是捕获的照片是绿色和洋红色的。
我尝试使用不同的 yuv 到 rgb 公式、值范围和像素格式(我相信 UYVY 是正确的)。我设法得到不同的颜色对,有一次是 3 种颜色,但从来不是全部 4 种。(我所说的“颜色”指的是 yuv 平面的象限)我也尝试了其他 SO 问题中提出的解决方案,但没有运气。 这是我的代码:
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <jpeglib.h>
#define VIDEO_DEVICE "/dev/video2"
#define CAPTURE_WIDTH 1920
#define CAPTURE_HEIGHT 1080
#define BUFFER_SIZE (CAPTURE_WIDTH * CAPTURE_HEIGHT * 2)
// Function to convert YUV to RGB
void yuv2rgb(unsigned char *yuv, unsigned char *rgb, int width, int height) {
int i, j;
int y, u, v;
int r, g, b;
int index_yuv, index_rgb;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 2) {
index_yuv = (i * width + j) * 2;
index_rgb = (i * width + j) * 3;
y = yuv[index_yuv];
u = yuv[index_yuv + 1];
v = yuv[index_yuv + 3];
// Adjust U and V range to -128 to 127
u -= 128;
v -= 128;
// YUV to RGB conversion
r = y + 1.402 * v;
g = y - 0.344136 * u - 0.714136 * v;
b = y + 1.772 * u;
r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
b = (b < 0) ? 0 : ((b > 255) ? 255 : b);
rgb[index_rgb] = r;
rgb[index_rgb + 1] = g;
rgb[index_rgb + 2] = b;
y = yuv[index_yuv + 2];
// YUV to RGB conversion
r = y + 1.402 * v;
g = y - 0.344136 * u - 0.714136 * v;
b = y + 1.772 * u;
r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
b = (b < 0) ? 0 : ((b > 255) ? 255 : b);
rgb[index_rgb + 3] = r;
rgb[index_rgb + 4] = g;
rgb[index_rgb + 5] = b;
}
}
}
int main() {
int videoFd;
struct v4l2_capability cap;
struct v4l2_format format;
struct v4l2_requestbuffers reqbuf;
struct v4l2_buffer buf;
// Open the video device
videoFd = open(VIDEO_DEVICE, O_RDWR);
if (videoFd < 0) {
perror("Failed to open video device");
return 1;
}
// Check device capabilities
if (ioctl(videoFd, VIDIOC_QUERYCAP, &cap) < 0) {
perror("Failed to query device capabilities");
close(videoFd);
return 1;
}
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
fprintf(stderr, "Video capture not supported\n");
close(videoFd);
return 1;
}
// Set the desired capture format
memset(&format, 0, sizeof(format));
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
format.fmt.pix.width = CAPTURE_WIDTH;
format.fmt.pix.height = CAPTURE_HEIGHT;
format.fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY; // YUV 4:2:2 format
if (ioctl(videoFd, VIDIOC_S_FMT, &format) < 0) {
perror("Failed to set video format");
close(videoFd);
return 1;
}
// Request a single buffer for capture
memset(&reqbuf, 0, sizeof(reqbuf));
reqbuf.count = 1;
reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
reqbuf.memory = V4L2_MEMORY_MMAP;
if (ioctl(videoFd, VIDIOC_REQBUFS, &reqbuf) < 0) {
perror("Failed to request buffer for capture");
close(videoFd);
return 1;
}
// Map the buffer for user-space access
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = 0;
if (ioctl(videoFd, VIDIOC_QUERYBUF, &buf) < 0) {
perror("Failed to query buffer");
close(videoFd);
return 1;
}
void *buffer = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, videoFd, buf.m.offset);
if (buffer == MAP_FAILED) {
perror("Failed to map buffer");
close(videoFd);
return 1;
}
// Queue the buffer for capture
if (ioctl(videoFd, VIDIOC_QBUF, &buf) < 0) {
perror("Failed to queue buffer");
munmap(buffer, buf.length);
close(videoFd);
return 1;
}
// Start capturing frames
enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (ioctl(videoFd, VIDIOC_STREAMON, &type) < 0) {
perror("Failed to start capturing");
munmap(buffer, buf.length);
close(videoFd);
return 1;
}
// Wait for the frame to be captured
if (ioctl(videoFd, VIDIOC_DQBUF, &buf) < 0) {
perror("Failed to dequeue buffer");
munmap(buffer, buf.length);
close(videoFd);
return 1;
}
// Convert the captured frame from YUV to RGB
unsigned char *rgbBuffer = (unsigned char *)malloc(CAPTURE_WIDTH * CAPTURE_HEIGHT * 3);
yuv2rgb((unsigned char *)buffer, rgbBuffer, CAPTURE_WIDTH, CAPTURE_HEIGHT);
// Save the RGB frame as a JPEG image
FILE *file = fopen("captured_frame.jpg", "wb");
if (file == NULL) {
perror("Failed to open output file");
munmap(buffer, buf.length);
free(rgbBuffer);
close(videoFd);
return 1;
}
// Use libjpeg to write the RGB data as a JPEG image
struct jpeg_compress_struct cinfo;
struct jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_compress(&cinfo);
jpeg_stdio_dest(&cinfo, file);
cinfo.image_width = CAPTURE_WIDTH;
cinfo.image_height = CAPTURE_HEIGHT;
cinfo.input_components = 3;
cinfo.in_color_space = JCS_RGB;
jpeg_set_defaults(&cinfo);
jpeg_set_quality(&cinfo, 80, TRUE);
jpeg_start_compress(&cinfo, TRUE);
JSAMPROW row_pointer[1];
while (cinfo.next_scanline < cinfo.image_height) {
row_pointer[0] = &rgbBuffer[cinfo.next_scanline * cinfo.image_width * cinfo.input_components];
jpeg_write_scanlines(&cinfo, row_pointer, 1);
}
jpeg_finish_compress(&cinfo);
fclose(file);
jpeg_destroy_compress(&cinfo);
// Stop capturing frames
if (ioctl(videoFd, VIDIOC_STREAMOFF, &type) < 0) {
perror("Failed to stop capturing");
munmap(buffer, buf.length);
free(rgbBuffer);
close(videoFd);
return 1;
}
// Clean up resources
munmap(buffer, buf.length);
free(rgbBuffer);
close(videoFd);
printf("Frame captured and saved successfully.\n");
return 0;
}
替代 yuv 到 rgb 函数(产生类似的结果,可能稍差):
void yuv2rgb(unsigned char *uyvy, unsigned char *rgb, int width, int height) {
int i, j;
int y0, u, y1, v;
int r, g, b;
int index_uyvy, index_rgb;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 2) {
index_uyvy = (i * width + j) * 2;
index_rgb = (i * width + j) * 3;
y0 = uyvy[index_uyvy];
u = uyvy[index_uyvy + 1];
y1 = uyvy[index_uyvy + 2];
v = uyvy[index_uyvy + 3];
// Adjust U and V range to -128 to 127
u -= 128;
v -= 128;
// UYVY to RGB conversion
r = y0 + 1.402 * v;
g = y0 - 0.344136 * u - 0.714136 * v;
b = y0 + 1.772 * u;
r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
b = (b < 0) ? 0 : ((b > 255) ? 255 : b);
rgb[index_rgb] = r;
rgb[index_rgb + 1] = g;
rgb[index_rgb + 2] = b;
// UYVY to RGB conversion
r = y1 + 1.402 * v;
g = y1 - 0.344136 * u - 0.714136 * v;
b = y1 + 1.772 * u;
r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
b = (b < 0) ? 0 : ((b > 255) ? 255 : b);
rgb[index_rgb + 3] = r;
rgb[index_rgb + 4] = g;
rgb[index_rgb + 5] = b;
}
}
}
与打包 RGB 格式类似,打包 YUV 格式将 Y、Cb 和 Cr 分量连续存储在内存中。它们可能对色度分量应用子采样,因此在交织三个分量的方式上有所不同。
注意
- 在随后的所有表格中,位 7 是字节中的最高有效位。
- “Y”、“Cb”和“Cr”分别表示亮度、蓝色色度(也称为“U”)和红色色度(也称为“V”)分量的位。 “A”表示 alpha 分量的位(如果格式支持),“X”表示填充位。
标识符 | 代码 | 字节0 | 字节1 | 字节2 | 字节3 | 字节4 | 字节5 | 字节6 | 字节7 |
---|---|---|---|---|---|---|---|---|---|
V4L2_PIX_FMT_UYVY | 'UYVY' | Cb0 | Y'0 | Cr0 | Y'1 | Cb2 | Y'2 | Cr2 | Y'3 |
这些格式通常称为 YUYV 或 YUY2,对色度分量水平进行二次采样,在容器中存储 2 个像素。 8 位格式的容器为 32 位,10 位以上格式的容器为 64 位。
每个组件超过 8 位的打包 YUYV 格式存储为四个 16 位小端字。每个字的最高有效位包含一个组成部分,最低有效位是零填充。
检查您的转换代码,我认为您没有读取正确的组件序列。
我只是在摆弄一个红外摄像头,它可以生成
UYVY
格式的彩色框,这就是我想到的:
#include <cstdint> // for: uint8_t
#include <cstddef> // for: size_t
struct rgb_t
{
uint8_t r;
uint8_t g;
uint8_t b;
};
static_assert(sizeof(rgb_t) == sizeof(uint8_t) * 3, "Size mismatch");
/**
* @brief Ensures that a given value is within the specified minimum and maximum
* range.
*
* @tparam T The type of the value to be clamped.
*
* @param[in] value The value to be clamped.
* @param[in] min The minimum allowable value.
* @param[in] max The maximum allowable value.
*
* @return The clamped value, which will be within the range [min, max].
*
* @note This function ensures that the RGB values are within the valid
* range [0, 255]. This is necessary because the conversion formulas can produce
* values outside this range.
*/
template <typename T>
constexpr T clamp(T value, T min, T max)
{
return
/* if */ (value < min) ?
min
/* else if */ : (value > max) ?
max
/* else */ :
value;
}
/**
* @brief Converts UYVY formatted data to RGB888 format.
*
* This function processes 2 pixels at a time from the UYVY formatted buffer.
* The UYVY format packs 2 pixels into 4 bytes in the order: [U0, Y0, V0, Y1].
* - The first pixel uses: Y0, U0, V0
* - The second pixel uses: Y1, U0, V0.
*
* References to YUV format:
* - https://en.wikipedia.org/wiki/YUV
* - https://fourcc.org/yuv.php
*
* References to UYVY format:
* - https://fourcc.org/pixel-format/yuv-uyvy/
* - https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#uyvy
*
* @param[in] uyvy_buffer Pointer to the UYVY input buffer.
* @param[in] uyvy_buffer_bytes_count Size of the UYVY buffer in bytes.
* @param[in] width Width of the image.
* @param[in] height Height of the image.
* @param[out] rgb Pointer to the output RGB buffer.
* @param[in] rgb_size Size of the RGB output buffer (in number of rgb_t elements).
*
* @return The number of processed pixels, or 0 if there was an error (e.g., insufficient buffer size).
*/
size_t UYVY_to_RGB888(
const uint8_t* uyvy_buffer, size_t uyvy_buffer_bytes_count,
size_t width, size_t height,
rgb_t* rgb, size_t rgb_size
)
{
const size_t frame_size = (width * height * 2); // Each pixel is 2 bytes in UYVY format
// Check if `UYVY buffer bytes count` is sufficient
if(uyvy_buffer_bytes_count < frame_size)
{
return 0; // Insufficient `UYVY buffer bytes count`
}
// Check if `RGB size` is sufficient
if( rgb_size < (width * height) )
{
return 0; // Insufficient `RGB size`
}
// Reusable variables (not declard inside the loop - for perfomance reason)
uint8_t u, y0, v, y1;
int Y, U, V;
size_t rgb_index = 0;
for (size_t i = 0; i < frame_size; i += 4)
{
u = uyvy_buffer[i + 0];
y0 = uyvy_buffer[i + 1];
v = uyvy_buffer[i + 2];
y1 = uyvy_buffer[i + 3];
// Adjust `Y`, `U`, and `V` components
//
// `Y`, `U`, and `V` components in video systems often have specific
// ranges, so the following offsets are used because the 'YUV color
// space' is often encoded with an offset:
// - `Y` typically ranges from 16 to 235 (so we subtract 16 to center it around 0)
// - `U` and `V` typically range from 16 to 240 (so we subtract 128 to center them around 0)
Y = y0 - 16;
U = u - 128;
V = v - 128;
// Processing the first pixel in the UYVY pair
// YUV to RGB conversion formulas given from here:
// https://learn.microsoft.com/en-us/previous-versions/aa917087(v=msdn.10)#converting-8-bit-yuv-to-rgb888
rgb[rgb_index].r = clamp(((298 * Y) + (409 * V) + 128) >> 8, 0, 255); // R
rgb[rgb_index].g = clamp(((298 * Y) - (100 * U) - (208 * V) + 128) >> 8, 0, 255); // G
rgb[rgb_index].b = clamp(((298 * Y) + (516 * U) + 128) >> 8, 0, 255); // B
rgb_index++;
// Process the second pixel in the UYVY pair
// Using the same `U` and `V` values, but different `Y` (y1) value
Y = y1 - 16;
rgb[rgb_index].r = clamp(((298 * Y) + (409 * V) + 128) >> 8, 0, 255); // R
rgb[rgb_index].g = clamp(((298 * Y) - (100 * U) - (208 * V) + 128) >> 8, 0, 255); // G
rgb[rgb_index].b = clamp(((298 * Y) + (516 * U) + 128) >> 8, 0, 255); // B
rgb_index++;
}
return rgb_index; // Return number of processed pixels
}
使用示例:
#include <vector> // for: std::vector<T>
#include <cstdio> // for: fputs(), fprintf()
int main()
{
const size_t width = 256;
const size_t height = 192;
const size_t uyvy_buffer_bytes_count = width * height * 2; // Each pixel is 2 bytes in UYVY format
// Create an example UYVY buffer with dummy data (for demonstration purposes)
std::vector<uint8_t> uyvy_buffer(uyvy_buffer_bytes_count, 128); // Fill with a neutral gray value
const size_t rgb_buffer_size = width * height; // Each pixel is one rgb_t structure
// Output buffer for RGB data
std::vector<rgb_t> rgb_buffer(rgb_buffer_size);
// Convert UYVY to RGB
const size_t num_processed_pixels = UYVY_to_RGB888(
uyvy_buffer.data(), uyvy_buffer_bytes_count,
width, height,
rgb_buffer.data(), rgb_buffer_size
);
// Check if the conversion was successful
if(num_processed_pixels == 0)
{
fputs("Error: Conversion failed.\n", stderr);
return 1;
}
// Here, we simply print the first 10 pixels
fputs("First 10 RGB pixels:\n", stdout);
for(size_t i = 0; i < 10; ++i) {
fprintf(stdout,
"Pixel %zu: (%d, %d, %d)\n",
i,
rgb_buffer[i].r,
rgb_buffer[i].g,
rgb_buffer[i].b
);
}
return 0;
}