我很早就开始尝试在使用 MALI 400 GPU 运行 Linux 的无头嵌入式设备上执行一些图像处理任务。这通过官方驱动程序支持 OpenGL ES 2.0,并且可能通过非官方 LIMA 驱动程序支持大部分完整的 OpenGL 2.1。
具体来说,我的图像通过外部系统进入 DMA 映射内存,然后将它们加载到 (
MONO8
/LUMINANCE
) 纹理,运行渲染到另一个纹理的着色器程序,然后将其读出与 glReadPixels
一起使用。如果对任何人有帮助,我可以发布更完整的代码,但现在我只显示设置的相关部分以避免混乱(我认为都是相当标准的):
// Setup code:
display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
eglInitialize(display, &major, &minor);
const EGLint configAttributes[] = {
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
EGL_BLUE_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_RED_SIZE, 8,
EGL_ALPHA_SIZE, 8,
EGL_NONE
};
eglChooseConfig(display, configAttributes, &config, 1, &numConfigs)
const EGLint pbufferAttributes[] = {
EGL_WIDTH, 1920,
EGL_HEIGHT, 1200,
EGL_NONE
};
surface = eglCreatePbufferSurface(display, config, pbufferAttributes);
const EGLint contextAttributes[] = {
EGL_CONTEXT_CLIENT_VERSION, 2,
EGL_NONE
};
context = eglCreateContext(display, config, EGL_NO_CONTEXT, contextAttributes);
eglMakeCurrent(display, surface, surface, context);
... Setup shaders, VBOs, etc ...
// Texture used to load image
glGenTextures(1, &textureID);
glBindTexture(GL_TEXTURE_2D, textureID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
unsigned char* data = (unsigned char*)malloc(1920 * 1200 * sizeof(unsigned char));
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, 1920, 1200, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, data); // Bind to dummy data at first, check if we can remove this
glBindBuffer(GL_ARRAY_BUFFER, VBOVertices);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(1);
// Texture used to render into
GLuint framebuffer;
glGenFramebuffers(1, &framebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);
GLuint texture;
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1920, 1200, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, textureID);
// LATER: In main loop
auto start = std::chrono::steady_clock::now(); // Start timer for image loading
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, 1920, 1200, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, image.img); // Load current image into texture from before
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, textureID);
glUniform1i(glGetUniformLocation(shaderProgram, "texture1"), 0);
auto text_loaded = std::chrono::steady_clock::now(); // Texture is loaded, end timer for image loading
glUseProgram(shaderProgram);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glFinish();
auto gl_finished = std::chrono::steady_clock::now(); // All rendering should be done when glFinish returns?
glReadPixels(0, 0, 1920, 1200, GL_RGBA, GL_UNSIGNED_BYTE, preprocessed_img_buf);
auto end = std::chrono::steady_clock::now(); // End timer for readout
// This is needed to get back into MONO8 format
for (int i = 0; i < 1920 * 1200; i++){
processed_img_buf[i] = preprocessed_img_buf[i * 4];
}
// Print frame times
auto text_loaded_time = std::chrono::duration_cast<std::chrono::microseconds>(text_loaded - start).count();
auto frame_render_time = std::chrono::duration_cast<std::chrono::microseconds>(gl_finished - text_loaded).count();
auto frame_readout_time = std::chrono::duration_cast<std::chrono::microseconds>(end - gl_finished).count();
尝试使用上述代码获得宽松的基准,我看到的时间我几乎不敢相信:
Image loading time: 7148 us
Render time: 85720 us
Readout time: 158734 us
Total frame time: 251602 us
Image loading time: 4797 us
Render time: 85841 us
Readout time: 152563 us
Total frame time: 243201 us
Image loading time: 6018 us
Render time: 85757 us
Readout time: 158420 us
Total frame time: 250195 us
我本来希望
glReadPixels
会很慢,但不会是渲染时间的两倍/慢于 10 FPS(假设我以相当理智的方式进行基准测试)。这让我认为我做错了其他事情,但是我尝试过的一切似乎都以某种方式不受支持:
GL_LUMINANCE
纹理进行渲染,但出现错误,看来我的 EGL 没有只有一个 8bpp 通道的配置,因此我无法按照我的理解以这种方式渲染?GL_LUMINANCE
上使用glReadPixels
格式,却发现文档中的该功能不支持它,据说仅支持GL_RGB
,GL_RGBA
,GL_ALPHA
glReadPixels
调用 GL_ALPHA
,但尽管有文档,我还是遇到了无效的操作错误(也许我配置错误)不过要早点发生吗?)选项/设置的组合会是什么样子?我开始认为这是可行的,但现在我怀疑如果不投入大量时间,这款 GPU 是否对我有用。是这种情况还是我距离更合理的帧时间还差几个枚举?
还有一些我有的想法/问题,但不想考虑主要问题的一部分以避免分散注意力:
正如我在评论中提到的,您可以使用 GBM 库来完成您的任务。这是分步指南:
使用一个或多个 GBM BO(缓冲区对象)创建 GBM 表面。
继续从 GBM 表面创建 EGLSurface 以进行渲染。
如果您的图形驱动程序支持,您可以使用 DRM PRIME API 获取每个 BO 的 DMA 文件描述符 (fd)。这将允许您映射这些缓冲区并根据需要读取它们的内容。
需要注意的是,在创建 GBM 表面时,请确保使用线性格式修改器,以便映射内存的内容对您的用例有意义。
下面是一个示例,演示了将内容渲染到 GBM 表面并使用映射的 DMA 文件描述符将结果保存到 PNG 图像中:
介子.build
project(
'OpenGL DMA Read Example',
'c',
version : '0.1.0',
meson_version: '>= 0.59.0',
default_options: [
'warning_level=2',
'buildtype=debug'
]
)
c = meson.get_compiler('c')
include_paths = []
include_paths_sys = [
'/usr/local/include',
'/usr/include/drm',
'/usr/include/libdrm',
'/usr/include/freetype2'
]
foreach p : include_paths_sys
if run_command('[', '-d', p, ']', check : true).returncode() == 0
include_paths += [include_directories(p)]
endif
endforeach
egl_dep = c.find_library('EGL')
glesv2_dep = c.find_library('GLESv2')
drm_dep = c.find_library('drm')
gbm_dep = c.find_library('gbm')
freeimage_dep = c.find_library('freeimage')
executable(
'dma-read',
sources : ['main.c'],
include_directories : include_paths,
dependencies : [
egl_dep,
glesv2_dep,
drm_dep,
gbm_dep,
freeimage_dep
])
main.cpp
#include <EGL/egl.h>
#include <errno.h>
#include <gbm.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <xf86drmMode.h>
#include <sys/ioctl.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <GLES2/gl2.h>
#include <FreeImage.h>
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#define PNG_PATH "/tmp/dma_read.png"
#define DRM_DEVICE "/dev/dri/card0"
#define WIDTH 512
#define HEIGHT 512
#define FORMAT GBM_FORMAT_ARGB8888
static int drmFd, dmaFd;
static char *map;
static int offset;
static unsigned int stride;
static struct gbm_device *gbmDevice;
static struct gbm_surface *gbmSurface;
static struct gbm_bo *gbmBO;
static EGLDisplay eglDisplay;
static EGLContext eglContext;
static EGLSurface eglSurface;
static EGLConfig eglConfig;
static const EGLint eglConfigAttribs[] =
{
EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
EGL_RED_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_BLUE_SIZE, 8,
EGL_ALPHA_SIZE, 8,
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
EGL_NONE
};
static int matchConfigToVisual(EGLDisplay egl_display, EGLint visual_id, EGLConfig *configs, int count)
{
for (int i = 0; i < count; ++i)
{
EGLint id;
if (!eglGetConfigAttrib(egl_display, configs[i], EGL_NATIVE_VISUAL_ID, &id))
continue;
if (id == visual_id)
return i;
}
return -1;
}
static int chooseEGLConfiguration(EGLDisplay egl_display, const EGLint *attribs, EGLint visual_id, EGLConfig *config_out)
{
EGLint count = 0;
EGLint matched = 0;
EGLConfig *configs;
int config_index = -1;
if (!eglGetConfigs(egl_display, NULL, 0, &count) || count < 1)
{
printf("No EGL configs to choose from.\n");
return 0;
}
configs = (void**)malloc(count * sizeof *configs);
if (!configs)
return 0;
if (!eglChooseConfig(egl_display, attribs, configs, count, &matched) || !matched)
{
printf("No EGL configs with appropriate attributes.\n");
goto out;
}
if (!visual_id)
config_index = 0;
if (config_index == -1)
config_index = matchConfigToVisual(egl_display, visual_id, configs, matched);
if (config_index != -1)
*config_out = configs[config_index];
out:
free(configs);
if (config_index == -1)
return 0;
return 1;
}
int getDMAFDFromBO(int drmFd, struct gbm_bo *bo)
{
struct drm_prime_handle prime_handle;
memset(&prime_handle, 0, sizeof(prime_handle));
prime_handle.handle = gbm_bo_get_handle(bo).u32;
prime_handle.flags = DRM_CLOEXEC | DRM_RDWR;
prime_handle.fd = -1;
if (ioctl(drmFd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime_handle) != 0)
goto fail;
if (prime_handle.fd < 0)
goto fail;
// Set read and write permissions on the file descriptor
if (fcntl(prime_handle.fd, F_SETFL, fcntl(prime_handle.fd, F_GETFL) | O_RDWR) == -1)
{
close(prime_handle.fd);
goto fail;
}
printf("Got BO DMA fd using DRM_IOCTL_PRIME_HANDLE_TO_FD.\n");
return prime_handle.fd;
fail:
prime_handle.fd = gbm_bo_get_fd(bo);
if (prime_handle.fd >= 0)
{
printf("Got BO DMA fd using gbm_bo_get_fd().\n");
return prime_handle.fd;
}
printf("Failed to get fd for handle %u: %s\n", prime_handle.handle, strerror(errno));
return -1;
}
int mapDMA()
{
map = mmap(NULL, HEIGHT * stride, PROT_READ | PROT_WRITE, MAP_SHARED, dmaFd, 0);
if (map == MAP_FAILED)
{
map = mmap(NULL, HEIGHT * stride, PROT_WRITE, MAP_SHARED, dmaFd, 0);
if (map == MAP_FAILED)
{
void **dummy = NULL;
map = gbm_bo_map(gbmBO, 0, 0, WIDTH, HEIGHT, GBM_BO_TRANSFER_READ, &stride, dummy);
if (!map)
{
printf("Failed to map DMA fd.\n");
return 0;
}
}
}
return 1;
}
int init()
{
drmFd = open(DRM_DEVICE, O_RDWR);
if (drmFd < 0)
{
printf("Failed to open DRM device %s.\n", DRM_DEVICE);
return 0;
}
gbmDevice = gbm_create_device(drmFd);
if (!gbmDevice)
{
printf("Failed to create GBM device.\n");
return 0;
}
eglDisplay = eglGetDisplay(gbmDevice);
if (eglDisplay == EGL_NO_DISPLAY)
{
printf("Failed to get EGL display.\n");
return 0;
}
if (!eglInitialize(eglDisplay, NULL, NULL))
{
printf("Failed to initialize EGL display.\n");
return 0;
}
if (!chooseEGLConfiguration(eglDisplay, eglConfigAttribs, FORMAT, &eglConfig))
{
printf("Failed to choose EGL configuration.\n");
return 0;
}
eglContext = eglCreateContext(eglDisplay, eglConfig, EGL_NO_CONTEXT, NULL);
if (eglContext == EGL_NO_CONTEXT)
{
printf("Failed to create EGL context.\n");
return 0;
}
gbmSurface = gbm_surface_create(
gbmDevice,
WIDTH,
HEIGHT,
FORMAT,
GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR);
if (!gbmSurface)
{
printf("Failed to create GBM surface.\n");
return 0;
}
eglSurface = eglCreateWindowSurface(eglDisplay, eglConfig, (EGLNativeWindowType)gbmSurface, NULL);
if (eglSurface == EGL_NO_SURFACE)
{
printf("Failed to create EGL surface.\n");
return 0;
}
eglMakeCurrent(eglDisplay,
eglSurface,
eglSurface,
eglContext);
eglSwapBuffers(eglDisplay, eglSurface);
// Create a single BO (calling gbm_surface_lock_front_buffer() again before gbm_surface_release_buffer() would create another BO)
gbmBO = gbm_surface_lock_front_buffer(gbmSurface);
gbm_surface_release_buffer(gbmSurface, gbmBO);
stride = gbm_bo_get_stride(gbmBO);
offset = gbm_bo_get_offset(gbmBO, 0);
dmaFd = getDMAFDFromBO(drmFd, gbmBO);
if (dmaFd < 0)
return 0;
if (!mapDMA())
return 0;
return 1;
}
void savePNG()
{
eglSwapBuffers(eglDisplay, eglSurface);
gbm_surface_lock_front_buffer(gbmSurface);
struct dma_buf_sync sync;
sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ;
ioctl(dmaFd, DMA_BUF_IOCTL_SYNC, &sync);
FIBITMAP *image = FreeImage_ConvertFromRawBits((BYTE*)&map[offset],
WIDTH,
HEIGHT,
stride,
32,
0xFF0000, 0x00FF00, 0x0000FF,
false);
if (FreeImage_Save(FIF_PNG, image, PNG_PATH, PNG_DEFAULT))
printf("PNG image saved: %s.\n", PNG_PATH);
else
printf("Failed to save PNG image: %s.\n", PNG_PATH);
FreeImage_Unload(image);
sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ;
ioctl(dmaFd, DMA_BUF_IOCTL_SYNC, &sync);
gbm_surface_release_buffer(gbmSurface, gbmBO);
}
void render()
{
glEnable(GL_SCISSOR_TEST);
// Red
glViewport(0, 0, WIDTH/2, HEIGHT/2);
glScissor(0, 0, WIDTH/2, HEIGHT/2);
glClearColor(1.f, 0.f, 0.f, 1.f);
glClear(GL_COLOR_BUFFER_BIT);
// Green
glViewport(WIDTH/2, 0, WIDTH/2, HEIGHT/2);
glScissor(WIDTH/2, 0, WIDTH/2, HEIGHT/2);
glClearColor(0.f, 1.f, 0.f, 1.f);
glClear(GL_COLOR_BUFFER_BIT);
// Blue
glViewport(0, HEIGHT/2, WIDTH/2, HEIGHT/2);
glScissor(0, HEIGHT/2, WIDTH/2, HEIGHT/2);
glClearColor(0.f, 0.f, 1.f, 1.f);
glClear(GL_COLOR_BUFFER_BIT);
// Black
glViewport(WIDTH/2, HEIGHT/2, WIDTH/2, HEIGHT/2);
glScissor(WIDTH/2, HEIGHT/2, WIDTH/2, HEIGHT/2);
glClearColor(0.f, 0.f, 0.f, 1.f);
glClear(GL_COLOR_BUFFER_BIT);
}
int main()
{
init();
render();
savePNG();
return 0;
}
要测试它,请将文件放在同一目录中并运行以下命令:
$ meson setup build
$ cd build
$ meson compile
$ ./dma-read
如果一切顺利,应将如图所示的 PNG 文件保存在
/tmp/dma_read.png
中。