docker build . -t test_ocl
docker run -it --gpus all test_ocl /bin/bash
Clinfo输出:
root@7307c8f6cf60:/# clinfo
Number of platforms 0
ICD loader properties
ICD loader Name OpenCL ICD Loader
ICD loader Vendor OCL Icd free software
ICD loader Version 2.3.2
ICD loader Profile OpenCL 3.0
nvidia-smi
root@7307c8f6cf60:/# nvidia-smi
Thu Feb 6 16:23:01 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.86.09 Driver Version: 571.96 CUDA Version: 12.8 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA RTX A1000 6GB Lap... On | 00000000:01:00.0 On | N/A |
| N/A 39C P8 5W / 35W | 194MiB / 6144MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
使用Python来看平台
root@7307c8f6cf60:/# /venv/bin/python
Python 3.12.3 (main, Jan 17 2025, 18:03:48) [GCC 13.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pyopencl as cl
>>> cl.get_platforms()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
pyopencl._cl.LogicError: clGetPlatformIDs failed: PLATFORM_NOT_FOUND_KHR
秒尝试
docker run --rm --gpus all nvidia/opencl clinfo
Number of platforms 0
第三尝试
使用Rocm
docker run -it --gpus all rocm/dev-ubuntu-22.04 /bin/bash
Clinforoot@f561a9533509:/# clinfo
Number of platforms: 1
Platform Profile: FULL_PROFILE
Platform Version: OpenCL 2.1 AMD-APP (3635.0)
Platform Name: AMD Accelerated Parallel Processing
Platform Vendor: Advanced Micro Devices, Inc.
Platform Extensions: cl_khr_icd cl_amd_event_callback
Platform Name: AMD Accelerated Parallel Processing
Number of devices: 0
至少这是我这次找到我的CPU
apt update
apt upgrade -y
pip3 install siphash24 pyopencl
root@f561a9533509:/# python3
Python 3.10.12 (main, Jan 17 2025, 14:35:34) [GCC 11.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pyopencl as cl
>>> cl.get_platforms()
[<pyopencl.Platform 'AMD Accelerated Parallel Processing' at 0x7fa26e577010>]
它找到了平台
>>> p = cl.get_platforms()[0]
>>> p.get_devices()
[]
>>>
但没有设备
第四次尝试 docker run --gpus all nvcr.io/nvidia/k8s/cuda-sample:nbody nbody -gpu -benchmark
Run "nbody -benchmark [-numbodies=<numBodies>]" to measure performance.
-fullscreen (run n-body simulation in fullscreen mode)
-fp64 (use double precision floating point values for simulation)
-hostmem (stores simulation data in host memory)
-benchmark (run benchmark to measure performance)
-numbodies=<N> (number of bodies (>= 1) to run in simulation)
-device=<d> (where d=0,1,2.... for the CUDA device to use)
-numdevices=<i> (where i=(number of CUDA devices > 0) to use for simulation)
-compare (compares simulation results running once on the default GPU and once on the CPU)
-cpu (run n-body simulation on the CPU)
-tipsy=<file.bin> (load a tipsy model file for simulation)
NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.
> Windowed mode
> Simulation data stored in video memory
> Single precision floating point simulation
> 1 Devices used for simulation
GPU Device 0: "Ampere" with compute capability 8.6
> Compute 8.6 CUDA device: [NVIDIA RTX A1000 6GB Laptop GPU]
20480 bodies, total time for 10 iterations: 19.150 ms
= 219.026 billion interactions per second
= 4380.514 single-precision GFLOP/s at 20 flops per interaction
也许这成功将命令发送到GPU? 尽管我不确定如何在自己的代码中复制它?
我还没有opencl工作,但是看来我已经设法在Docker内使用了GPU。
src/gpu.py
from numba import cuda
import numpy as np
@cuda.jit
def vector_add(a, b, c):
idx = cuda.grid(1)
if idx < a.size:
c[idx] = a[idx] + b[idx]
def main():
# Size of vectors
n = 1000000
# Initialize host vectors
h_a = np.random.rand(n).astype(np.float32)
h_b = np.random.rand(n).astype(np.float32)
h_c = np.zeros(n, dtype=np.float32)
# Allocate device memory
d_a = cuda.to_device(h_a)
d_b = cuda.to_device(h_b)
d_c = cuda.device_array(n, dtype=np.float32)
# Configure the blocks and grids
threads_per_block = 256
blocks_per_grid = (n + threads_per_block - 1) // threads_per_block
# Launch the kernel
vector_add[blocks_per_grid, threads_per_block](d_a, d_b, d_c)
# Copy the result back to the host
h_c = d_c.copy_to_host()
# Verify the result
if np.allclose(h_c, h_a + h_b):
print("Success!")
else:
print("Error!")
if __name__ == "__main__":
main()
Dockefile
FROM ubuntu:24.04
RUN apt update
RUN apt upgrade -y
RUN apt install -y curl wget
ENV INSTALLER_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
ENV INSTALLER_FILE="Miniconda3-latest-Linux-x86_64.sh"
ENV INSTALL_DIR="/miniconda3"
WORKDIR /home/
RUN curl -o $INSTALLER_FILE $INSTALLER_URL
RUN chmod 777 $INSTALLER_FILE
RUN ./$INSTALLER_FILE -b -p $INSTALL_DIR
ENV PATH=$INSTALL_DIR/bin:$PATH
RUN conda config --set ssl_verify false
RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org"
RUN conda config --append channels conda-forge
RUN conda install -y numba
RUN conda install -y cudatoolkit
COPY src/gpu.py /home/gpu.py
建造它:
docker build . -t numba_test
wriming:
docker run --rm -it --gpus=all numba_test python /home/gpu.py
Success!