从OpenCL中的GPU编程中获取初始值

问题描述 投票:0回答:2

我开发了一个小代码,通过OpenCL库使用GPU添加两个小向量。主要代码vectorAdd.cc如下:

#include <iostream>
#include <CL/cl.hpp>
#include <cassert>
#include <fstream>
#include <time.h>
#include <cmath>

void randomInit(float *data, int size) 
{
  for (unsigned int i = 0; i < size; ++i)
    data[i] = rand() / (float)RAND_MAX;
}


int main()
{

  //get all platforms (drivers)
  std::vector<cl::Platform> platforms;
  cl::Platform::get(&platforms);

  assert(platforms.size() > 0);

  cl::Platform myPlatform = platforms[0];
  std::cout << "Using platform: "<<myPlatform.getInfo<CL_PLATFORM_NAME>()<<"\n";

  //get default device of the default platform
  std::vector<cl::Device> devices;
  myPlatform.getDevices(CL_DEVICE_TYPE_ALL, &devices);

  assert(devices.size() > 0);

  cl::Device myDevice = devices[0];
  std::cout<< "Using device: "<<myDevice.getInfo<CL_DEVICE_NAME>()<<"\n";

  std::ifstream vectorAddFile("vector_add_kernel.cl" );
  std::string src(std::istreambuf_iterator<char>(vectorAddFile), (std::istreambuf_iterator<char>()));

  cl::Program::Sources sources(1, std::make_pair(src.c_str(), src.length() + 1));

  cl::Context context(myDevice);
  cl::Program program(context, sources);

  int szVec = 10;   

  float* A = new float[szVec];
  float* B = new float[szVec];

  randomInit(A,szVec);
  randomInit(B,szVec);

  float* C = new float[szVec];
  std::fill_n(C, szVec, 0);

  // create buffers on the device
  cl::Buffer buffer_A = cl::Buffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, szVec * sizeof(float), A);
  cl::Buffer buffer_B = cl::Buffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, szVec * sizeof(float), B);
  cl::Buffer buffer_C = cl::Buffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, szVec * sizeof(float), C);

 //create queue to which we will push commands for the device.
 cl::CommandQueue queue(context, myDevice);

 //write arrays A and B to the device
 //queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(float) * szVec, A);
 //queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(float) * szVec, B);

 auto err = program.build("cl.std.CL1.2");

 // run the kernel
 cl::Kernel kernel(program,"vector_add", &err);
 kernel.setArg(0, buffer_A);
 kernel.setArg(1, buffer_B);
 kernel.setArg(2, buffer_C);
 queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(szVec), cl::NullRange);
 queue.finish();

 //read result C from the device to array C
 queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(float) * szVec, C);

 std::cout<<" result: \n";

 for(int i = 0; i < szVec; i++)
 {
   std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
 }

 std::cout << std::endl;

 return 0;
}

并且内核代码vector_add_kernel.cl如下:

__kernel void vector_add(__global float *A, __global float *B, __global float *C) 
{
   // Get the index of the current element
   int i = get_global_id(0);

   // Do the operation
   C[i] = A[i] + B[i];
}

我得到的结果是:

Using platform: NVIDIA CUDA
Using device: Tesla K20m
result: 
0.840188 + 0.477397 = 0
0.394383 + 0.628871 = 0
0.783099 + 0.364784 = 0
0.79844 + 0.513401 = 0
0.911647 + 0.95223 = 0
0.197551 + 0.916195 = 0
0.335223 + 0.635712 = 0
0.76823 + 0.717297 = 0
0.277775 + 0.141603 = 0
0.55397 + 0.606969 = 0

您所看到的问题,结果始终是我初始化的向量C,我不明白为什么。我还用其他一些值初始化了vector C,结果再次是初始值。

c++ gpu opencl nvidia
2个回答
1
投票

可能只是语法错误。


0
投票

问题源于使用此命令构建程序

© www.soinside.com 2019 - 2024. All rights reserved.