我有以下脚本尝试在图像上绘制矩形
import cv2
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
def draw_square(image_gpu, image_width,image_height, x, y, width, height, color):
block_dim = (16, 16) # CUDA block dimensions
grid_dim_x = (image_width + block_dim[0] - 1) // block_dim[0] # CUDA grid dimensions (x-axis)
grid_dim_y = (image_height + block_dim[1] - 1) // block_dim[1] # CUDA grid dimensions (y-axis)
mod = SourceModule("""
__global__ void draw_square_kernel(unsigned char *image, int image_width, int x, int y, int width, int height, unsigned char *color)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row >= y && row < y + height && col >= x && col < x + width)
{
int pixel_idx = row * image_width * 3 + col * 3;
image[pixel_idx] = color[0];
image[pixel_idx + 1] = color[1];
image[pixel_idx + 2] = color[2];
}
}
""", no_extern_c=True)
draw_square_kernel = mod.get_function("draw_square_kernel")
draw_square_kernel(image_gpu, np.int32(image_width), np.int32(x), np.int32(y),
np.int32(width), np.int32(height), cuda.In(color, block=block_dim, grid=(grid_dim_x, grid_dim_y)))
# Load the image
image_path = 'Lena.png' # Replace with the path to your image
image = cv2.imread(image_path)
# Convert the image to the RGB format
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Upload the image to the GPU
image_gpu = cuda.to_device(image_rgb)
# Define the square coordinates
x, y = 100, 100 # Top-left corner coordinates
width, height = 200, 200 # Width and height of the square
# Define the color of the square (Green in this example)
color = np.array([0, 255, 0], dtype=np.uint8)
# Draw a square on the GPU image
draw_square(image_gpu, image_rgb.shape[1], image_rgb.shape[0],x, y, width, height, color)
# Download the modified image from the GPU
image_with_square = np.empty_like(image_rgb)
cuda.memcpy_dtoh(image_with_square, image_gpu)
# Convert the image back to the BGR format for display
image_with_square_bgr = cv2.cvtColor(image_with_square, cv2.COLOR_RGB2BGR)
# Display the image with the square
cv2.imshow('Image with Square', image_with_square_bgr)
cv2.waitKey(0)
cv2.destroyAllWindows()
但是当我尝试运行它时,我得到了
python 3_rectangle6.py
Traceback (most recent call last):
File "/cbe421fe-1303-4821-9392-a849bfdd00e2/MyStudy/PyCuda/practice/3_rectangle_pycuda3.py", line 52, in <module>
draw_square(image_gpu, image_rgb.shape[1], image_rgb.shape[0],x, y, width, height, color)
File "/cbe421fe-1303-4821-9392-a849bfdd00e2/MyStudy/PyCuda/practice/3_rectangle_pycuda3.py", line 29, in draw_square
draw_square_kernel = mod.get_function("draw_square_kernel")
File "/miniconda3/envs/py39Cuda2/lib/python3.9/site-packages/pycuda/compiler.py", line 332, in get_function
return self.module.get_function(name)
pycuda._driver.LogicError: cuModuleGetFunction failed: named symbol not found
如你所见,这是我的第六次尝试,但仍然无法识别draw_square_kernel...
因为您已将
no_extern_c=True
选项添加到 SourceModule 实例,所以传递给编译器的代码不会自动用 extern "C" {}
括起来,并且生成的编译是使用 C++ 而不是 C 链接进行的。
CUDA 使用Itanium C++ ABI,因此生成的损坏符号名称将类似于:
_Z18draw_square_kernelPhiiiiiS_
.
要使代码正常工作,您可以删除
no_extern_c=True
选项(但花点时间了解其含义),或者在 get_function
调用中使用损坏的符号名称。您可以使用 nvcc 或对象分析工具来获取内核的准确损坏符号名称。