Почему я получаю 0
как результат?
// Host code
#include <vector>
#include <iostream>
#include <string>
#include "CL\cl.h"
void runCL(double * a, double * b, double * c, const int & n) {
cl_int err;
cl_uint numEntries;
cl_uint numPlatforms;
err = clGetPlatformIDs(0, nullptr, &numPlatforms);
//check err
std::vector<cl_platform_id> platform(numPlatforms);
err = clGetPlatformIDs(numPlatforms, &platform[0], nullptr);
//Let's print the platforms
size_t strLen;
for (auto i = 0; i < numPlatforms; ++i) {
err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 0, nullptr, &strLen);
std::cout << "strLen = " << strLen << std::endl;
//Check err
std::vector<char> platformName(strLen);
err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, strLen, &platformName[0], nullptr);
std::cout << "Platform[" << i << "] = " << std::string(platformName.data()) << std::endl;
}
//We now know what the platforms are let's pick a specific device
cl_uint numDevices;
cl_device_id device;
err = clGetDeviceIDs(platform[0],CL_DEVICE_TYPE_GPU,0,nullptr,&numDevices);
//check err
std::vector<cl_device_id> deviceId(numDevices);
err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, numDevices, &deviceId[0],nullptr);
for (auto i = 0; i < numDevices; ++i) {
err = clGetDeviceInfo(deviceId[0], CL_DEVICE_NAME, 0, nullptr, &strLen);
//check err
std::vector<char> deviceName(strLen);
err = clGetDeviceInfo(deviceId[0], CL_DEVICE_NAME, strLen, &deviceName[0], nullptr);
std::cout << "device[" << i << "] = " << std::string(deviceName.data()) << std::endl;
}
//Now I know the device, I can create context and commant queuq
cl_context context;
cl_command_queue cmd_queue;
context = clCreateContext(0, 1, &deviceId[0], nullptr, nullptr, nullptr);
cmd_queue = clCreateCommandQueue(context, deviceId[0], 0, nullptr);
//Let's allocate memory
size_t bufferSize = sizeof(double)*n;
cl_mem a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, n, nullptr, nullptr);
err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, bufferSize, (void*)a, 0, nullptr, nullptr);
cl_mem b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, n, nullptr, nullptr);
err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, bufferSize, (void*)b, 0, nullptr, nullptr);
cl_mem c_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr);
if (CL_SUCCESS != err) {
std::cout << "Error in clEnqueueWriteBuffer" << std::endl;
}
clFinish(cmd_queue);
//Now let's create the program (compiling kernels)
cl_program program[1];
cl_kernel kernel[1];
const char * filename = "device.cl";
program[0] = clCreateProgramWithSource(context, 1, (const char**)&filename, nullptr, &err);
err = clBuildProgram(program[0], 0, nullptr, nullptr, nullptr, nullptr);
if (CL_SUCCESS != err) {
std::cout << "Error in clBuildProgram" << std::endl;
}
kernel[0] = clCreateKernel(program[0], "vecAdd", &err);
//Setting kernel args
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &a_mem);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &b_mem);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &c_mem);
if (CL_SUCCESS != err) {
std::cout << "Error in clSetKernelArg" << std::endl;
}
//Actual execution
size_t globalWorkSize = n;
err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1,nullptr, &globalWorkSize, nullptr,0,nullptr,nullptr);
clFinish(cmd_queue);
err = clEnqueueReadBuffer(cmd_queue, c_mem, CL_TRUE, 0, bufferSize, c, 0, nullptr, nullptr);
clFinish(cmd_queue);
//Teardown
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
}
int main(int argc, char **argv) {
//stuff before running openCL
const int n = 32;
std::vector<double> a(n);
std::vector<double> b(n);
double * c = new double[n];
for (auto i = 0; i < n; ++i) {
a[i] = static_cast<double>(i + 1);
b[i] = static_cast<double>(n - i - 1);
}
runCL(a.data(),b.data(),c,n);
//stuff after running openCL
/*for (auto i = 0; i < n; ++i)
std::cout << "res[" << i << "] = " << c[i] << std::endl;*/
return 0;
}
И код ядра
// Add you device OpenCL code
__kernel void vecAdd(
__global double * inputA,
__global double * inputB,
__global double * outputC) {
size_t idx = get_global_id(0);
outputC[idx] = inputA[idx] + inputB[idx];
}
Это моя первая программа opencl, пыталась следовать учебнику из youtube (невырезать и вставлять, но вдохновенно)
Можете ли вы сказать мне, что мне не хватает?(Надеюсь, это что-то глупое, хотя я немного путаюсь с аргументами нескольких функций opencl).
Обновление : добавлена проверка ошибок, и я получаю вывод:
strLen = 12
Platform[0] = NVIDIA CUDA
strLen = 16
Platform[1] = Intel(R) OpenCL
strLen = 42
Platform[2] = Experimental OpenCL 2.1 CPU Only Platform
device[0] = GeForce GTX 960M
Error in clEnqueueWriteBuffer
Error in clBuildProgram
Error in clSetKernelArg
Поэтому при настройке буферов произошла ошибка. Вы можете мне помочь с этим?