compute-sanitizer --tool memcheck my_cuda_program
就是我正在努力做的事情。
我正在尝试这个,因为我得到了
Thread 1 my_cuda_program received signal CUDA_EXCEPTION_5, Warp Out-of-range Address.
当我通过 cuda-gdb 运行我的程序时。
但是,我明白了
========= COMPUTE-SANITIZER
========= Unable to find injection library libsanitizer-collection.so
将此作为输出。但是,libsanitizer-collection.so 存在于我的 /usr/lib/nvidia-cuda-toolkit 中。
我的 nvcc 版本是 12.4,这就是我使用compute sanitizer 的原因。 我使用 cmake 编译我的程序,如下所示
cmake_minimum_required(VERSION 3.29.2)
project(nnbody VERSION 0.1.0 LANGUAGES CUDA CXX)
include(CTest)
enable_testing()
add_subdirectory(third-party/yaml-cpp)
enable_language(CUDA)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20")
set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=sm_60")
# check openmp
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else ()
message (FATAL_ERROR "The compiler does not support OpenMP.")
endif()
# check BLAS and LAPACK
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
find_package(FFTW3 REQUIRED)
set(VTK_DIR "/usr/local/include/vtk-9.3/")
find_package(VTK REQUIRED)
file(GLOB ALL_FILES_SRC
"src/*.cuh"
"src/*.cu"
"src/*.h"
"src/*.hpp"
"src/*.c"
"src/*.cpp"
)
set_source_files_properties(ALL_FILES_SRC LANGUAGE CUDA)
add_executable(nnbody ${ALL_FILES_SRC})
target_link_libraries(nnbody PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${FFTW3_LIBRARIES} ${VTK_LIBRARIES} yaml-cpp)
set_property(TARGET nnbody PROPERTY CUDA_ARCHITECTURES 60 86)
set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include(CPack)
我不知道该尝试什么..
我有同样的错误,
/usr/bin/compute-sanitizer
会尝试从libsanitizer-collection.so
加载/usr/bin
,但它位于/usr/lib/nvidia-cuda-toolkit/compute-sanitizer/libsanitizer-collection.so
。 (*buntu 22.04、NVIDIA 驱动程序版本 550、CUDA 12.4)
您可以通过使用
strace
来确认 compute-sanitizer
尝试在哪里找到该库:
$ strace compute-sanitizer ./main 2>&1 | grep libsanitizer-collection.so
stat("/usr/bin/libsanitizer-collection.so", 0x7ffe926f6c20) = -1 ENOENT (No such file or directory)
write(1, "========= Unable to find injecti"..., 70========= Unable to find injection library libsanitizer-collection.so
可以使用
libsanitizer-collection.so
命令确定locate
的实际位置:
$ locate libsanitizer-collection.so
/usr/lib/nvidia-cuda-toolkit/compute-sanitizer/libsanitizer-collection.so
我通过将
compute-sanitizer
移动到 /usr/lib/nvidia-cuda-toolkit/compute-sanitizer
目录并将其链接回 /usr/bin
解决了这个问题。
sudo mv /usr/bin/compute-sanitizer /usr/lib/nvidia-cuda-toolkit/compute-sanitizer/compute-sanitizer
sudo ln -s /usr/lib/nvidia-cuda-toolkit/compute-sanitizer/compute-sanitizer /usr/bin/compute-sanitizer
然后我收到一个新错误,但这可能无关:
========= COMPUTE-SANITIZER
========= Target application terminated before first instrumented API call
========= Error: couldn't find exit code.