在我的应用程序中,我使用计算着色器快速地阐述数据。我为模型的每个实例分配一个计算着色器。例如,我有 30 个实例,我调度计算着色器 30 次。
for(int i = 0; i < engineModLoader.instanceNumber; i++)
{
engineRenderer.DispatchCompute(phoenixMesh.totalMeshlets.size(), selectedMeshlet,
engineModLoader.instancesData[i].instancePos);
}
我使用计算着色器的结果来填充全局索引缓冲区,这对于绘制实例很有用。因此,所有调度的计算着色器都必须在渲染实例的
DrawFrame()
调用之前终止。如何在 CPU 上等待计算着色器的终止?
到目前为止,我尝试以这种方式同步我的计算着色器,但我得到了错误的数据:
void Renderer::DispatchCompute(int numberOfElements, std::vector<Phoenix::DataToCompute>& selectedMeshlet,
const glm::vec3& instancePos)
{
VkSubmitInfo computeSubmitInfo{};
computeSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
vkWaitForFences(engineDevice.logicalDevice, 1, &computeInFlightFences[currentComputeFrame], VK_TRUE, UINT64_MAX);
engineTransform.ubo.instancePos = instancePos;
UpdateUniformBuffer(currentComputeFrame);
vkResetFences(engineDevice.logicalDevice, 1, &computeInFlightFences[currentComputeFrame]);
vkResetCommandBuffer(computeCommandBuffers[currentComputeFrame], 0);
RecordComputeBuffer(numberOfElements, computeCommandBuffers[currentComputeFrame]);
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &computeCommandBuffers[currentComputeFrame];
computeSubmitInfo.signalSemaphoreCount = 1;
computeSubmitInfo.pSignalSemaphores = &computeSemaphores[currentComputeFrame];
if (vkQueueSubmit(engineDevice.computeQueue, 1, &computeSubmitInfo, computeInFlightFences[currentComputeFrame]) != VK_SUCCESS)
{
throw std::runtime_error("failed to submit compute command buffer!");
}
VkDeviceSize bufferSize = sizeof(Phoenix::DataToCompute) * numberOfElements;
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
CreateBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
stagingBuffer, stagingBufferMemory);
CopyBuffer(SSBOBuffers[currentComputeFrame], stagingBuffer, bufferSize,
&computeSemaphores[currentComputeFrame]);
void* bufferData = nullptr;
vkMapMemory(engineDevice.logicalDevice, stagingBufferMemory, 0, bufferSize, 0, &bufferData);
memcpy(selectedMeshlet.data(), bufferData, bufferSize);
vkUnmapMemory(engineDevice.logicalDevice, stagingBufferMemory);
currentComputeFrame = (currentComputeFrame + 1) % MAX_FRAMES_IN_FLIGHT;
vkDestroyBuffer(engineDevice.logicalDevice, stagingBuffer, nullptr);
vkFreeMemory(engineDevice.logicalDevice, stagingBufferMemory, nullptr);
}
void Renderer::RecordComputeBuffer(int numberOfElements, VkCommandBuffer commandBuffer)
{
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
if (vkBeginCommandBuffer(commandBuffer, &beginInfo) != VK_SUCCESS)
{
throw std::runtime_error("failed to begin recording command buffer!");
}
VkDeviceSize ssboSize = sizeof(Phoenix::DataToCompute) * numberOfElements;
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, enginePipeline.computePipeline);
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, enginePipeline.computePipelineLayout, 0, 1,
&descriptorSets[currentComputeFrame], 0, 0);
vkCmdDispatch(commandBuffer, numberOfElements / 32, 1, 1);
if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS)
{
throw std::runtime_error("failed to record command buffer!");
}
}
我哪里错了?
如何在 CPU 上等待计算着色器的终止?
你绝对不应该。
你不需要CPU来等待计算着色器;您需要 GPU 来等待计算着色器。您应该分派计算着色器操作,然后使用依赖项在这些操作和使用它的进程之间进行同步。这种依赖关系存在于您的命令缓冲区或批处理中,而不是 CPU 代码中。
我哪里错了?
有很多错误,但最明显的错误之一是你在实际使用内存之前就破坏了它。您在复制操作中使用了内存,但在将该副本提交到队列并确保队列完成使用它之前删除了内存。
这很糟糕。验证层应该很容易发现这一点。
另外,不要只为这样的短操作分配内存。分配一大块设备内存并从中进行子分配。