我想在 GPU 上生成多个线程。就像我附加的代码片段一样。这样做会引发错误
MemoryError: std::bad_alloc: CUDA error at: rapids/include/rmm/mr/device/cuda_memory_resource.hpp:60: cudaErrorIllegalAddress an illegal memory access was encountered
from concurrent.futures import ThreadPoolExecutor
import cupy as cp
import numpy as np
import cudf
def func(x):
print(x)
X_test = pd.read_csv("temp.csv")
X_test = cudf.from_pandas(X_test)
return len(X_test)+x
def get_stats_internal_with_gpu(stats_arg):
gpu_id = np.random.randint(2)
cp.cuda.Device(gpu_id).use() # Assign the GPU
return func(stats_arg) # Your existing GPU code
# Parallelize across GPUs
with ThreadPoolExecutor(max_workers=510) as executor:
chunk_results = list(executor.map( get_stats_internal_with_gpu,list(range(50))
))
print(chunk_results)
任何有关此问题的帮助将不胜感激
对于这种在 Python 中使用 cuDF 的多 GPU 工作,使用 Dask cuDF 可能会更好,它依赖于 Dask 和 Dask-CUDA。
Dask 可以使用 map_partitions 在较大数据集的每个分区上轻松执行任意函数。
from dask_cuda import LocalCUDACluster
from dask.distributed import Client
import dask_cudf
import cudf
cluster = LocalCUDACluster() # use every GPU on machine by default
client = Client(cluster)
df = dask_cudf.read_csv(...) # read file into distributed dataset
df.map_partitions(func) # run func on each partition