我有一个类似于以下项目的 CMake 项目:
cmake_minimum_required(VERSION 3.22)
project(example
DESCRIPTION "Example for interop between D3D11, CUDA 11.8 and libtorch 2.0.1 (CUDA 11.8)"
LANGUAGES CXX C CUDA
)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# When using Visual Studio generator, CMAKE_BUILD_TYPE is ignored since the type of build is set
# during the build step (e.g. "cmake --build <build-dir> --target ALL_BUILD --config Debug").
set(CMAKE_CONFIGURATION_TYPES "debug;release" CACHE STRING "") #FORCE
# Release, Debug (not supported: RelWithDebInfo, MinSizeRel)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/")
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/bin/")
# Note: using file(READ) may lead to incorrect parsing of the token.txt file if there are
# multiple lines in that file (including empty terminating line!)
# By employing file(STRINGS) with a LIMIT_COUNT set to 1 we ensure that only the first
# string (the GitLab token) is retrieved
file(
STRINGS
"${CMAKE_SOURCE_DIR}/token.txt"
TOKEN
LIMIT_COUNT 1
)
if(NOT DEFINED TOKEN)
message(SEND_ERROR "Could not find ${CMAKE_SOURCE_DIR}/token.txt with token string inside")
elseif(TOKEN STREQUAL "")
message(SEND_ERROR "Found ${CMAKE_SOURCE_DIR}/token.txt but token string is empty")
endif()
include("${CMAKE_SOURCE_DIR}/cmake/download.cmake")
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_ARCHITECTURES 75)
set(CUDA_USE_STATIC_CUDA_RUNTIME ON)
#find_package(CUDA 11.8 REQUIRED) # deprecated, do not use!
set(CUDA_HOME "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8") # C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/bin
get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(isMultiConfig)
foreach(config ${CMAKE_CONFIGURATION_TYPES})
get_property(INTERNAL_DEPS_${config} GLOBAL PROPERTY INTERNAL_DEPS_PROP_${config})
string(TOLOWER "${config}" config)
# Common for all supported build types
if (("${config}" STREQUAL "debug") OR ("${config}" STREQUAL "release"))
... # Dependencies such as ImGUI and DirectXTex
find_package(Torch 2.0.1 REQUIRED)
add_library(kernels
STATIC
src/kernels/default.cu
src/kernels/simple.cu
src/kernels/grayscale.cu
)
target_compile_features(
kernels
PUBLIC
cxx_std_17
)
set_target_properties(
kernels
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
)
add_executable(
${PROJECT_NAME}
WIN32
"${CMAKE_CURRENT_SOURCE_DIR}/src/load_sample.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp"
)
target_include_directories(
${PROJECT_NAME}
PUBLIC
${CUDA_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
"${CMAKE_BINARY_DIR}/deps/${config}/include"
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)
target_link_directories(
${PROJECT_NAME}
PUBLIC
"${CMAKE_BINARY_DIR}/deps/${config}/lib"
)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
d3d11.lib dxgi.lib dxguid.lib d3dcompiler.lib
uuid.lib kernel32.lib user32.lib
comdlg32.lib advapi32.lib shell32.lib
ole32.lib oleaut32.lib
${CUDA_LIBRARIES}
${TORCH_LIBRARIES}
kernels
... # Dependencies such as ImGUI and DirectXTex
)
set_target_properties(
${PROJECT_NAME}
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
)
file(GLOB
SHADERS
"${CMAKE_SOURCE_DIR}/shaders/*.*"
)
file(COPY
${SHADERS}
DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${config}/"
)
endif()
endforeach()
endif()
该项目的配置和构建基于:
配置步骤的日志(此处为
debug
类型)如下所示:
cmake -Bbuild -G "Visual Studio 16 2019" -S. -DCMAKE_CONFIGURATION_TYPES="debug"
-- Selecting Windows SDK version 10.0.19041.0 to target Windows 10.0.19043.
-- The CXX compiler identification is MSVC 19.29.30152.0
-- The C compiler identification is MSVC 19.29.30152.0
-- The CUDA compiler identification is NVIDIA 12.2.91
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/Hostx64/x64/cl.exe - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/Hostx64/x64/cl.exe - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
Selected libtorch installation type: local
Finding Torch 2.0.1 package
CMake Warning at CMakeLists.txt:286 (message):
Will override variable NVTOOLEXT_HOME and environmental variable
NVTOOLSEXT_PATH
CMake Warning at CMakeLists.txt:287 (message):
Make sure to install NVXT with the CUDA 11.8 installer
CMake Warning at CMakeLists.txt:288 (message):
NVXT for CUDA 12.x has been moved to a headers-only library
-- Found CUDA: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8")
-- Caffe2: CUDA detected: 12.2
-- Caffe2: CUDA nvcc is: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/bin/nvcc.exe
-- Caffe2: CUDA toolkit directory: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8
-- Caffe2: Header version is: 11.8
CMake Warning at C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Caffe2/public/cuda.cmake:166 (message):
Failed to compute shorthash for libnvrtc.so
Call Stack (most recent call first):
C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Caffe2/Caffe2Config.cmake:88 (include)
C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
CMakeLists.txt:295 (find_package)
-- USE_CUDNN is set to 0. Compiling without cuDNN support
-- Autodetected CUDA architecture(s): 8.6
-- Added CUDA NVCC flags for: -gencode;arch=compute_86,code=sm_86
-- Found Torch: C:/Users/example/Projects/Example/LOCAL_DEPS/libtorch/2.0.1/debug/gpu/lib/torch.lib (Required is at least version "2.0.1")
-- Configuring done (25.1s)
-- Generating done (0.0s)
-- Build files have been written to: C:/Users/example/Projects/Example/build
作为环境变量我有
CUDA_HOME
设置为 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8
CUDA_PATH
设置为 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8
另外我还有
CUDA_PATH_V11_8
设置为 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8
CUDA_PATH_V12_2
设置为 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2
构建步骤是问题出现的地方,所有问题都归结为以下模式:
cudart_static.lib(cudart_generated_cuda_runtime_api.obj) : error LNK2005: <SOME-CUDA-FUNCTION> already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.v
cxproj]
下面可以看到一个直接的例子
cudart_static.lib(cudart_generated_cuda_runtime_api.obj) : error LNK2005: cudaArrayGetInfo already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.v
cxproj]
cudart_static.lib(cudart_generated_cuda_runtime_api.obj) : error LNK2005: cudaArrayGetMemoryRequirements already defined in cudart_static.lib(cudart_generated_cuda_runtime_api.obj) [C:\Users\example\Projects\Example\build\example.vcxproj]
一切最终都会失败
Creating library C:/Users/example/Projects/Example/build/bin/debug/demo.lib and object C:/Users/example/Projects/Example/build/bin/debug/demo.exp
LINK : warning LNK4098: defaultlib 'LIBCMT' conflicts with use of other libs; use /NODEFAULTLIB:library [C:\Users\example\Projects\Example\build\demo.vcxproj]
C:\Users\example\Projects\Example\build\bin\debug\demo.exe : fatal error LNK1169: one or more multiply defined symbols found [C:\Users\example\Projects\Example\build\demo.vcxproj]
其中一个内核的示例(此处
default.cu
仅传递 D3D11 缓冲区数据):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* The following kernel creates a simply copy by reading from one surface object and writing the read data to another.
* No alteration of the data takes place within the CUDA context
* @param surface_in Source, where the data will be loaded from. Requires to be mapped with read access
* @param surface_out Destination, where the data will be written to. Requires to be mapped with write access
*/
__global__ void cuda_kernel_default(const cudaSurfaceObject_t surface_in, cudaSurfaceObject_t surface_out, size_t width, size_t height)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
uchar4 pixel = {
0, // red x
0, // green y
0, // blue z
0 // alpha w
};
surf2Dread<uchar4>(&pixel, surface_in, x * sizeof(uchar4), y, cudaBoundaryModeClamp);
surf2Dwrite<uchar4>(pixel, surface_out, x * sizeof(uchar4), y, cudaBoundaryModeClamp);
}
extern "C"
cudaError_t cuda_default(const cudaSurfaceObject_t surface_in, cudaSurfaceObject_t surface_out, size_t width, size_t height)
{
cudaError_t error = cudaSuccess;
dim3 Db = dim3(32, 32); // Block dimensions
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y); // Grid dimensions
cuda_kernel_default << <Dg, Db >> > (surface_in, surface_out, width, height);
error = cudaGetLastError();
return error;
}
我的主要项目
main.cpp
有以下内容:
#define NOMINMAX
#include <windows.h>
#include <windowsx.h>
#include <dxgi.h> // DirectX graphics infrastructure
#include <d3d11.h> // Direct3D functionality
#include <DirectXMath.h> // Math (vectors and matrices), successor of the deprecated D3DXimage files, successor of the deprecated D3DX11SaveTextureToFile
#include <exception>
#include <string>
#include <cuda_runtime_api.h> // Runtime CUDA API offers better code management but less control compared the the CUDA driver API
#include <cuda_d3d11_interop.h> // Provides facilities for registering and mapping graphics resources (among others) from/to Direct3D to/from CUDA context
#include <torch/torch.h> // Torch general
#include <torch/script.h> // Torch script
... // Other dependencies such as ImGUI and DirectXTex
#include "load_sample.h" // Contains D3D11 stuff, creates and loads the cube and shaders for the scene
我知道,而且从错误中可以清楚地看出,我在某处包含了两倍的 CUDA RT 静态库,但我不知道在哪里。
更新1:
我遵循评论部分中的建议,使用
--trace-expand
运行配置步骤以查看完整输出(遗憾的是太大,无法粘贴到 Pastebin XD 上)。我看到 add_library()
和 add_executable()
都受到 CMake 政策 CMP0156 的影响:
CMake Warning (dev) at CMakeLists.txt:306 (add_library):
Policy CMP0156 is not set: De-duplicate libraries on link lines based on
linker capabilities. Run "cmake --help-policy CMP0156" for policy details.
Use the cmake_policy command to set the policy and suppress this warning.
Since the policy is not set, legacy libraries de-duplication strategy will
be applied.
This warning is for project developers. Use -Wno-dev to suppress it.
CMake Warning (dev) at CMakeLists.txt:343 (add_executable):
Policy CMP0156 is not set: De-duplicate libraries on link lines based on
linker capabilities. Run "cmake --help-policy CMP0156" for policy details.
Use the cmake_policy command to set the policy and suppress this warning.
Since the policy is not set, legacy libraries de-duplication strategy will
be applied.
This warning is for project developers. Use -Wno-dev to suppress it.
我刚刚了解
de-duplication
的含义,但我确实相信这部分(如果不是我的问题的罪魁祸首)就在那里。我确实记得去年我使用 - 我相信 - CMake 3.12 作为最低要求版本时能够构建代码。
虽然不是一个非常准确的答案,但我确实发现问题在于 libtorch(特别是它的 CUDA 版本)。
我挖出了一个旧项目,其中有 D3D11 和 CUDA 互操作(这是我当前设置的一部分,减去 libtorch)。它正在构建和运行,没有任何问题。由于我已将代码转移到我的新项目(使用 libtorch),所以它不太可能突然崩溃。以防万一我确实将旧代码与新代码一起使用了。结果
与CMakeLists.txt
${TORCH_LIBRARIES}
解决方案是针对 CUDA 11.8 迁移到 libtorch
2.4.0。除了将 libtorch 的路径设置为新版本之外,我没有更改项目中的任何其他内容,现在一切正常。