在 C++ 中使用 CUDA 时的多态替代方案

Question

编辑：从回复中，我意识到我真正需要的是 CUDA C++ 中虚拟类方法的替代方法。我相信所提出的解决方案比如何实现设备端 CUDA 虚拟函数？中提出的解决方案更适合我的用例，特别是因为必须处理虚拟类向量。

原始问题：我正在尝试使用 CUDA 在 C++ 中编写路径跟踪器，我现在正尝试将形状数组传递给

render

函数，但即使这些已被复制到设备内存，当我尝试访问形状的方法时，我收到非法内存访问。我可能与我构建类及其继承的方式有关。

我有一个主机函数

render

，它接受场景描述符和图像数组

__host__ void render(const std::shared_ptr<Scene> &scene, uchar4 *image);

这个函数调用一个内核

renderImage

，它接受许多参数，特别是我的形状数组

__global__ void renderImage(const uint16_t width, const uint16_t height,
                            uchar4 *image, const Vec3 origin,
                            const Vec3 pixel00, const Vec3 deltaU,
                            const Vec3 deltaV, const Shape **shapes,
                            const size_t num_shapes);

形状定义为

// shape.cuh
#pragma once

#include "cuda_path_tracer/ray.cuh"

class Shape {
public:
  Shape() = default;
  Shape(const Shape &) = default;
  __host__ __device__ Shape(Shape &&) = delete;
  auto operator=(const Shape &) -> Shape & = default;
  __host__ __device__ auto operator=(Shape &&) -> Shape & = delete;
  virtual ~Shape() = default;
  __host__ __device__ virtual auto hit(const Ray &r) const -> bool = 0;
  __host__ __device__ virtual auto getShapeType() const -> ShapeType = 0;
};

从抽象类派生的形状如下所示：

// sphere.cuh
#pragma once

#include "shape.cuh"

class Sphere : public Shape {
public:
  __host__ __device__ Sphere(const Vec3 &center, float radius);
  __host__ __device__ auto hit(const Ray &r) const -> bool override;
  __host__ __device__ auto hitt() const -> bool override;
  __host__ __device__ auto getShapeType() const -> ShapeType override;

private:
  Vec3 center;
  float radius;
};

并且是这样实现的

// sphere.cu
#include "cuda_path_tracer/sphere.cuh"

__host__ Sphere::Sphere(const Vec3 &center, const float radius)
    : Shape(), center(center), radius(radius) {}

__host__ __device__ auto Sphere::hit(const Ray &r) const -> bool {
  Vec3 const oc = r.getOrigin() - center;
  float const a = r.getDirection().dot(r.getDirection());
  float const b = 2.0f * oc.dot(r.getDirection());
  float const c = oc.dot(oc) - radius * radius;
  float const discriminant = b * b - 4 * a * c;
  return discriminant > 0;
}

通过主机的

render

功能将形状复制到设备内存中

const auto num_shapes = scene->getShapes().size();
const Shape **d_shapes;
CUDA_ERROR_CHECK(
    cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape *)));

Shape **h_shapes = new Shape *[num_shapes];

for (size_t i = 0; i < num_shapes; i++) {
  CUDA_ERROR_CHECK(cudaMalloc((void **)&h_shapes[i], sizeof(Shape)));
  CUDA_ERROR_CHECK(cudaMemcpy(h_shapes[i], scene->getShapes()[i],
                              sizeof(Shape), cudaMemcpyHostToDevice));
}
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes, num_shapes * sizeof(Shape *),
                            cudaMemcpyHostToDevice));
delete[] h_shapes;

在

renderImage

内核内部，我有两个被调用的设备函数，一个用于获取与像素对应的光线，另一个用于与光线对应的颜色，给定形状数组，

getColor

函数看起来像这个

__device__ auto getColor(const Ray &ray, const Shape *const *shapes,
                         const size_t num_shapes) -> uchar4 {
  // Dummy implementation
  for (size_t i = 0; i < num_shapes; i++) {
    if (shapes[i]->hit(ray)) {
      return make_uchar4(1, 0, 0, UCHAR_MAX);
    }
  }
  return make_uchar4(0, 0, 1, UCHAR_MAX);
}

问题是

shapes[i]->hit(ray)

产生

Illegal memory access

异常，为什么呢？我该如何解决它？

Answer 1

感谢评论中的建议，特别是@Homer512的提示，我已经找到了这个解决方案，请告诉我您是否认为可以做得更好。

// shape.cuh
#pragma once

#include <variant>
#include "sphere.cuh"

using Shape = std::variant<Sphere>;

//sphere.cuh
#pragma once

#include "ray.cuh"
#include "vec3.cuh"

class Sphere {
public:
  __host__ __device__ Sphere(const Vec3 &center, float radius);
  __device__ auto hit(const Ray &r) const -> bool;

private:
  Vec3 center;
  float radius;
};

h_shapes

初始化为

const std::vector<Shape> &h_shapes = scene->getShapes();
  const size_t num_shapes = h_shapes.size();
  Shape *d_shapes;
  CUDA_ERROR_CHECK(cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape)));
  CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes.data(),
                              num_shapes * sizeof(Sphere),
                              cudaMemcpyHostToDevice));

最后设备函数获取颜色

template <class... Ts> struct overload : Ts... {
  using Ts::operator()...;
};

__device__ auto getColor(const Ray &ray, const Shape *shapes,
                         const size_t num_shapes) -> uchar4 {
  for (size_t i = 0; i < num_shapes; i++) {
    bool hit = std::visit(
        overload{
            [&ray](const Sphere &s) { return s.hit(ray); },
        },
        shapes[i]);

    if (hit) {
      return make_uchar4(1, 0, 0, UCHAR_MAX);
    }
  }
  return make_uchar4(0, 0, 1, UCHAR_MAX);
}

在这里，我真的不喜欢这样的事实：对于每个新形状，当我已经定义了表示多个形状的联合的类型时，我必须编写

[&ray](const Sphere/Cube/Pyramid &s) { return s.ray(ray) }

在 C++ 中使用 CUDA 时的多态替代方案

问题描述投票：0回答：1

1个回答

最新问题

在 C++ 中使用 CUDA 时的多态替代方案

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1