Вспомогательная функция OpenCL C не вызывается и не работает - PullRequest
0 голосов
/ 03 марта 2020

Когда программа не возвращает никакой ошибки во время компиляции, она, кажется, не вызывает вспомогательную функцию в моем ядре.

Эта функция представляет собой просто точечное произведение двух векторного массива, который должен быть назначен массив float результат, но результат остается со значением по умолчанию (0.0).

Что мне не хватает? порядок, в котором я вызываю clEnqueueWriteBuffer / clEnqueueReadBuffer, должен заставить его работать, если я не использую какой-либо неправильный аргумент?

Вот это kernel.cl

typedef unsigned long int64;
typedef float fp32;

typedef struct Vector3
{
    fp32 x;
    fp32 y;
    fp32 z; 
}Vec3;


void DotProduct(__global Vec3* vec1,__global Vec3* vec2,__global fp32* resultArr,int64 len)
{
    for(int i=0;i<len;i++)
    {
        resultArr[i] = (vec1[i].x * vec2[i].x) + (vec1[i].y * vec2[i].y) + (vec1[i].z * vec2[i].z);
    }
}

__kernel void CallDotProduct(__global Vec3* vec1,int64 len1,__global Vec3* vec2,int64 len2,__global fp32* resultArr,int64 resultCount)
{
    if((len1==len2 && len1==resultCount) || (len1<len2 && len1==resultCount))
    {
        DotProduct(vec1,vec2,resultArr,len1);
    }
}

source

#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 200
#define _CRT_SECURE_NO_DEPRECATE /*open with FILE*/
#include <iostream>
#include "AppCL.h"
#include <iomanip>
#include <cstdlib>

#define NELEMS(x)  (sizeof(x) / sizeof((x)[0]))

void GetPlatformInfo(const cl_uint& numPlatforms, cl_platform_id* platforms, std::string& details)
{
    std::string attributeNames[] = { "Name", "Vendor", "Version", "Profile", "Extensions" };
    const cl_platform_info attributeTypes[5] = { CL_PLATFORM_NAME,CL_PLATFORM_VENDOR,CL_PLATFORM_VERSION,CL_PLATFORM_PROFILE,
                                                CL_PLATFORM_EXTENSIONS };
    size_t infoSize;
    int numAttributes = NELEMS(attributeTypes);
    char* info;
    std::string infoContent = "";
    for (size_t i = 0; i < numPlatforms; i++)
    {

        for (size_t j = 0; j < numAttributes; j++)
        {
            clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize);
            info = new char[infoSize];
            // get platform attribute value
            clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL);
            infoContent.assign(info, info + strlen(info));
            details += attributeNames[j] + " " + infoContent + '\n';

        }
        std::cout << details << std::endl;
    }

}



void ReadBytes(std::vector<char>* bytes, const std::string& pathDecode, unsigned long& fileSize)
{
    std::ifstream* file = new std::ifstream(pathDecode, std::ios::binary);
    file->unsetf(std::ios::skipws);
    file->seekg(0, std::ios::end);
    fileSize = file->tellg();
    file->seekg(0, std::ios::beg);
    bytes->reserve(fileSize);
    bytes->insert(bytes->begin(), std::istream_iterator<char>(*file), std::istream_iterator<char>());
    delete file;
}

int Program::ParseFile(std::string& content, const std::string& filename)
{
    std::ifstream* openFile = new std::ifstream(filename, std::ios::binary);

    size_t fileSize = 0;
    if (openFile->is_open())
    {
        openFile->seekg(0, std::fstream::end);
        fileSize = openFile->tellg();
        openFile->seekg(0, std::fstream::beg);

    }
    content.reserve(fileSize);
    content.insert(content.begin(), std::istreambuf_iterator<char>(*openFile), std::istreambuf_iterator<char>());
    delete openFile;
    if (content.size() <= 1)
        return 1;

    return 0;
}
void GetMessageError(cl_int status, cl_program program, cl_device_id device)
{
    size_t length = 0;
    status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
    size_t total = length * sizeof(char);
    char* buffer = new char[total];
    status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, length, buffer, NULL);
    std::string message(buffer, total);
    if (!message.empty())
        std::cout << message << std::endl;
    else
        std::cout << "Error: " << status << std::endl;

    delete[] buffer;
}

void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message)
{
    if (status != 0)
    {
        std::cout << message << std::endl;
        GetMessageError(status, program, devices);
    }

}


void Program::GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms)
{

    cl_int status = clGetPlatformIDs(0, NULL, &numberPlatforms);
    if (status == 1)
    {
        std::cout << "Failed Platforms not found" << std::endl;
    }

    if (numberPlatforms > 0)
    {
        platforms = new cl_platform_id[numberPlatforms * sizeof(cl_platform_id)];
        status = clGetPlatformIDs(numberPlatforms, &platforms[0], &numberPlatforms);
        platform = platforms[0];

        if (platform == nullptr)
        {
            std::cout << "Error Obtaining platformId" << std::endl;
        }
        //std::string temp = "";
        //GetPlatformInfo(numberPlatforms,platforms,1,temp);
        delete[] platforms;

    }

}

void Program::GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform)
{
    cl_uint deviceCount = 0;
    cl_int status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount);

    if (deviceCount == 0)
    {
        std::cout << "No GPU device available." << std::endl;
        std::cout << "Choose CPU as default device." << std::endl;
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceCount);
        devices = new cl_device_id[deviceCount * sizeof(cl_device_id)];
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, deviceCount, devices, NULL);
    }
    else
    {
        devices = new cl_device_id[(deviceCount * sizeof(cl_device_id))];
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL);
    }

}

void Program::GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices)
{
    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM,reinterpret_cast<cl_context_properties>(platform), 0 };
    cl_int status = 0;
    context = clCreateContext(contextProperties, 1, devices, NULL, NULL, &status);
    if (status != 0)
    {
        std::cout << "Error creating context for device" << std::endl;
    }

}

void Program::GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices)
{
    commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
}


int readfilec(const char* filePath, const char*& content, size_t& filesize)
{
    FILE* fp;

    fp = fopen(filePath, "rb");
    fseek(fp, 0, SEEK_END); // seek to end of file
    filesize = ftell(fp); // get current file pointer
    fseek(fp, 0, SEEK_SET);
    content = new char[filesize + 1];



    if ((fread((void*)content, sizeof(char), filesize, fp)) != filesize)
        return 1;


    fclose(fp);

    delete fp;

    return 0;

}

template<size_t size>
void Program::CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program)
{
    cl_int status = 0;

    bool parsedFile =readfilec(filePath, SourceStr, sourceSize[0]);


    if (parsedFile == 0)
    {
        std::cout << "File Parsed" << std::endl;
    }
    //const char* Source = SourceStr.c_str();
    std::cout << *SourceStr << std::endl;
    program = clCreateProgramWithSource(context, 1, &SourceStr, sourceSize, &status);

    if (status != 0)
    {
        std::cout << "Program couldnt be created" << std::endl;
        std::cout << status << std::endl;
    }
}

void Program::BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion)
{


    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
    if (status != 0)
    {
        std::cout << "Program couldnt be built" << std::endl;
        GetMessageError(status, program, devices[0]);
    }
}

typedef struct Vector3
{
    float x;
    float y;
    float z;

}Vec3;

int main()
{

    cl_uint numPlatf = 0;


    Program p1;
    p1.GetPlatform(p1.platform, numPlatf, p1.platforms);
    p1.GetDeviceIDs(p1.devices, p1.platform);
    p1.GetContext(p1.context, p1.platform, p1.devices);
    p1.GetCommandQueue(p1.commandQueue, p1.context, p1.devices);
    p1.CreateProgramWithSource(p1.filePath, p1.sourceCode, p1.sourceSize, p1.context, p1.program);
    p1.BuildProgram(p1.program, 1, p1.devices, "-cl-std=CL2.0");


    std::string s;
    unsigned long long n = ULLONG_MAX;
    unsigned long long m = 0;

    unsigned long long length1 = 4;
    unsigned long long length2 = 5;
    unsigned long long resultlen = 5;

    Vec3* v1 = (Vec3*)calloc(length1,sizeof(Vec3*) * length1);
    Vec3* v2 = (Vec3*)calloc(length1, sizeof(Vec3*) * length2);
    float* res = (float*)calloc(resultlen, sizeof(float*) * resultlen);

    for (decltype(length1) i = 0; i < resultlen; i++)
    {

        if (resultlen > length1)
            if (i < resultlen - 1)
            {
                v1[i].x = (float)(rand() % 100) + 1;
                v1[i].y = (float)(rand() % 100) + 1;
                v1[i].z = (float)(rand() % 100) + 1;

                std::cout << i + 1 << ") vec1" << std::endl;
                std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;
            }

        v2[i].x= (float)(rand() % 100) + 1;
        v2[i].y= (float)(rand() % 100) + 1;
        v2[i].z = (float)(rand() % 100) + 1;

        std::cout <<i+1 <<") vec2" << std::endl;
        std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;


        res[i] = 0.0f;

        std::cout << i + 1 << ") result: "<< res[i] << std::endl;

    }

    cl_mem vec1MemObject = clCreateBuffer(p1.context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,sizeof(Vec3*)*length1,&v1,&p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 0");
    cl_mem vec2MemObject = clCreateBuffer(p1.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(Vec3*) * length2, &v2, &p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 1");
    cl_mem resBuffer = clCreateBuffer(p1.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(res) * resultlen, &res, &p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 2");

    cl_kernel dotProductKernel = clCreateKernel(p1.program,"CallDotProduct",&p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating kernel 0");

    p1.status = clSetKernelArg(dotProductKernel,0,sizeof(cl_mem),(void*)&vec1MemObject);
    CheckErrorCode(p1.status,p1.program,p1.devices[0],"Failed to set Argument 0");

    p1.status = clSetKernelArg(dotProductKernel, 1, sizeof(length1), &length1);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 1");

    p1.status = clSetKernelArg(dotProductKernel, 2, sizeof(cl_mem), (void*)&vec2MemObject);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 2");

    p1.status = clSetKernelArg(dotProductKernel, 3, sizeof(length2), &length2);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 3");

    p1.status = clSetKernelArg(dotProductKernel, 4, sizeof(cl_mem), &resBuffer);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 4");

    p1.status = clSetKernelArg(dotProductKernel, 5, sizeof(resultlen), &resultlen);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 5");

    size_t  globalWorkSize[1] = { 1 };
    size_t localWorkSize = 64;

    p1.status = clEnqueueReadBuffer(p1.commandQueue, vec1MemObject, CL_FALSE, 0, sizeof(Vec3*), &v1, 0, NULL, NULL);
    p1.status = clEnqueueReadBuffer(p1.commandQueue, vec2MemObject, CL_FALSE, 0, sizeof(Vec3*), &v2, 0, NULL, NULL);
    p1.status = clEnqueueWriteBuffer(p1.commandQueue, resBuffer, CL_FALSE, 1, sizeof(res) * resultlen, &res, 0, NULL, NULL);
    p1.status = clEnqueueNDRangeKernel(p1.commandQueue, dotProductKernel, 1, NULL, globalWorkSize, &localWorkSize, 0, NULL, NULL);

    for (decltype(length1) i = 0; i < resultlen; i++)
    {



        if (resultlen > length1)
            if (i < resultlen - 1)
            {
                std::cout <<i + 1 << ") vec1" << std::endl;
                std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;

            }
        std::cout << '*' << std::endl;
        std::cout << i + 1 << ") vec2" << std::endl;
        std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;
        std::cout << '=' << std::endl;
        std::cout << res[i] << std::endl;
    }
  return 0;
}

AppCL.h

#ifndef APPLICATIONCL_H
#define APPLICATIONCL_H

#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
#include <fstream>
#include <sstream>
#include <CL/cl.h>

class Program
{
    friend int readfilec(const char* filePath,const char*& content, size_t& filesize);
    friend void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message);
    friend void GetMessageError(cl_int status, cl_program program, cl_device_id device);
public:
    cl_int status;
    cl_platform_id platform;// = nullptr;
    cl_device_id* devices;// = nullptr;
    cl_context context;
    cl_command_queue commandQueue;
    cl_program program;
    size_t sourceSize[1] = { 0 };
    const char* sourceCode;// = nullptr;
    std::string strSource;
    const char* filePath;
    unsigned  char* fileBytes;
    cl_platform_id* platforms;
    //="kernelApplication.cl";

    inline void SetFilePath(const char* path) { filePath = path; }
    inline const char* GetFilePath() { return filePath; }
    void GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms);
    void GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform);
    void GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices);
    void GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices);
    template<size_t size>
    void CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program);
    void BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion);

    int ParseFile(std::string& content, const std::string& filename);

    Program() : platform(nullptr), commandQueue(NULL), program(NULL), context(NULL), devices(nullptr), sourceCode(nullptr), platforms(nullptr), filePath("kernel.cl"), status(0)
    {

    }

private:


};

#endif // !APPLICATIONCL_H
...