编辑:从回复中,我意识到我真正需要的是 CUDA C++ 中虚拟类方法的替代方法。我相信所提出的解决方案比如何实现设备端 CUDA 虚拟函数?中提出的解决方案更适合我的用例,特别是因为必须处理虚拟类向量。
原始问题:我正在尝试使用 CUDA 在 C++ 中编写路径跟踪器,我现在正尝试将形状数组传递给
render
函数,但即使这些已被复制到设备内存,当我尝试访问形状的方法时,我收到非法内存访问。我可能与我构建类及其继承的方式有关。
我有一个主机函数
render
,它接受场景描述符和图像数组
__host__ void render(const std::shared_ptr<Scene> &scene, uchar4 *image);
这个函数调用一个内核
renderImage
,它接受许多参数,特别是我的形状数组
__global__ void renderImage(const uint16_t width, const uint16_t height,
uchar4 *image, const Vec3 origin,
const Vec3 pixel00, const Vec3 deltaU,
const Vec3 deltaV, const Shape **shapes,
const size_t num_shapes);
形状定义为
// shape.cuh
#pragma once
#include "cuda_path_tracer/ray.cuh"
class Shape {
public:
Shape() = default;
Shape(const Shape &) = default;
__host__ __device__ Shape(Shape &&) = delete;
auto operator=(const Shape &) -> Shape & = default;
__host__ __device__ auto operator=(Shape &&) -> Shape & = delete;
virtual ~Shape() = default;
__host__ __device__ virtual auto hit(const Ray &r) const -> bool = 0;
__host__ __device__ virtual auto getShapeType() const -> ShapeType = 0;
};
从抽象类派生的形状如下所示:
// sphere.cuh
#pragma once
#include "shape.cuh"
class Sphere : public Shape {
public:
__host__ __device__ Sphere(const Vec3 ¢er, float radius);
__host__ __device__ auto hit(const Ray &r) const -> bool override;
__host__ __device__ auto hitt() const -> bool override;
__host__ __device__ auto getShapeType() const -> ShapeType override;
private:
Vec3 center;
float radius;
};
并且是这样实现的
// sphere.cu
#include "cuda_path_tracer/sphere.cuh"
__host__ Sphere::Sphere(const Vec3 ¢er, const float radius)
: Shape(), center(center), radius(radius) {}
__host__ __device__ auto Sphere::hit(const Ray &r) const -> bool {
Vec3 const oc = r.getOrigin() - center;
float const a = r.getDirection().dot(r.getDirection());
float const b = 2.0f * oc.dot(r.getDirection());
float const c = oc.dot(oc) - radius * radius;
float const discriminant = b * b - 4 * a * c;
return discriminant > 0;
}
通过主机的
render
功能将形状复制到设备内存中
const auto num_shapes = scene->getShapes().size();
const Shape **d_shapes;
CUDA_ERROR_CHECK(
cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape *)));
Shape **h_shapes = new Shape *[num_shapes];
for (size_t i = 0; i < num_shapes; i++) {
CUDA_ERROR_CHECK(cudaMalloc((void **)&h_shapes[i], sizeof(Shape)));
CUDA_ERROR_CHECK(cudaMemcpy(h_shapes[i], scene->getShapes()[i],
sizeof(Shape), cudaMemcpyHostToDevice));
}
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes, num_shapes * sizeof(Shape *),
cudaMemcpyHostToDevice));
delete[] h_shapes;
在
renderImage
内核内部,我有两个被调用的设备函数,一个用于获取与像素对应的光线,另一个用于与光线对应的颜色,给定形状数组,getColor
函数看起来像这个
__device__ auto getColor(const Ray &ray, const Shape *const *shapes,
const size_t num_shapes) -> uchar4 {
// Dummy implementation
for (size_t i = 0; i < num_shapes; i++) {
if (shapes[i]->hit(ray)) {
return make_uchar4(1, 0, 0, UCHAR_MAX);
}
}
return make_uchar4(0, 0, 1, UCHAR_MAX);
}
问题是
shapes[i]->hit(ray)
产生 Illegal memory access
异常,为什么呢?我该如何解决它?
感谢评论中的建议,特别是@Homer512的提示,我已经找到了这个解决方案,请告诉我您是否认为可以做得更好。
// shape.cuh
#pragma once
#include <variant>
#include "sphere.cuh"
using Shape = std::variant<Sphere>;
//sphere.cuh
#pragma once
#include "ray.cuh"
#include "vec3.cuh"
class Sphere {
public:
__host__ __device__ Sphere(const Vec3 ¢er, float radius);
__device__ auto hit(const Ray &r) const -> bool;
private:
Vec3 center;
float radius;
};
h_shapes
初始化为
const std::vector<Shape> &h_shapes = scene->getShapes();
const size_t num_shapes = h_shapes.size();
Shape *d_shapes;
CUDA_ERROR_CHECK(cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape)));
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes.data(),
num_shapes * sizeof(Sphere),
cudaMemcpyHostToDevice));
最后设备函数获取颜色
template <class... Ts> struct overload : Ts... {
using Ts::operator()...;
};
__device__ auto getColor(const Ray &ray, const Shape *shapes,
const size_t num_shapes) -> uchar4 {
for (size_t i = 0; i < num_shapes; i++) {
bool hit = std::visit(
overload{
[&ray](const Sphere &s) { return s.hit(ray); },
},
shapes[i]);
if (hit) {
return make_uchar4(1, 0, 0, UCHAR_MAX);
}
}
return make_uchar4(0, 0, 1, UCHAR_MAX);
}
在这里,我真的不喜欢这样的事实:对于每个新形状,当我已经定义了表示多个形状的联合的类型时,我必须编写
[&ray](const Sphere/Cube/Pyramid &s) { return s.ray(ray) }