синхронизация между задачами в очереди в openCL - PullRequest
0 голосов
/ 28 апреля 2020

Я создал класс с именем relaxation. relaxation.hpp выглядит следующим образом:


#ifndef RELAXATION_HPP
#define RELAXATION_HPP

#define __CL_ENABLE_EXCEPTIONS

#include <iostream>
#include <CL/cl.hpp>
#include <cmath>
#include "CL_ERROR.hpp"
#include "defines.hpp"


template<class T>
class relaxation
{
  protected:
    int stencilDataWidth = 3;
    int relaxationStencilSize = pow(stencilDataWidth,3);
    T* stencil   = NULL;
    T omega = 0.8;  

    cl::Buffer       relaxationStencilBuffer;
    cl::Kernel       kernel;

  public:
     relaxation(cl::Program& _programInput, cl::CommandQueue& _queue, 
                const cl::Context& _context);
    ~relaxation();
     T getOmega();
     void createKernel(cl::Program& program);
     void setStencil(const cl::Context& context, cl::CommandQueue& queue);
     void relaxing(cl::CommandQueue& queue, 
                   cl::Buffer& inoutBuffer,  cl::Buffer& intermediateBuffer, 
                   cl::Buffer& RHSBuffer, 
                   const cl::NDRange& globalRange, const cl::NDRange& localRange,
                   const int numberOfRelaxationSweep,
                   const int& domainHeight, const int& domainWidth,
                   const int& domainDepth, cl::size_t<3> bufferOrigin,
                   cl::size_t<3> hostOrigin, cl::size_t<3> region,
                   const int& deviceWidth, const int& spatialStepSize, cl::Event& event);
};

//#include "../src/relaxation.cc"

#endif

, а исходный файл (relaxation.cc) -

#include "../includes/relaxation.hpp"

template<class T>
relaxation<T>::relaxation(cl::Program& _program, cl::CommandQueue& _queue,
                          const cl::Context& _context)
{
  createKernel(_program);
  setStencil(_context, _queue);
}


template<class T>
relaxation<T>::~relaxation()
{
   delete[] stencil;
}

template<class T>
inline T relaxation<T>::getOmega()
{
   return omega;
}


template<class T>
void relaxation<T>::setStencil(const cl::Context& context, cl::CommandQueue& queue)
{
   stencil = new T[relaxationStencilSize];

  T coeff = omega;

  stencil[0] = 0.0;
  stencil[1] = 0.0;
  stencil[2] = 0.0;
  stencil[3] = 0.0;
  stencil[4] = 1.0 / 6.0 * coeff;
  stencil[5] = 0.0;
  stencil[6] = 0.0;
  stencil[7] = 0.0;
  stencil[8] = 0.0;

  stencil[9] = 0.0;
  stencil[10] = 1.0 / 6.0 * coeff;
  stencil[11] = 0.0;
  stencil[12] = 1.0 / 6.0 * coeff;
  stencil[13] = (1.0 - omega);
  stencil[14] = 1.0 / 6.0 * coeff;
  stencil[15] = 0.0;
  stencil[16] = 1.0 / 6.0 * coeff;
  stencil[17] = 0.0;

  stencil[18] = 0.0;
  stencil[19] = 0.0;
  stencil[20] = 0.0;
  stencil[21] = 0.0;
  stencil[22] = 1.0 / 6.0 * coeff;
  stencil[23] = 0.0;
  stencil[24] = 0.0;
  stencil[25] = 0.0;
  stencil[26] = 0.0;

  try
  {
    relaxationStencilBuffer = cl::Buffer(context, CL_MEM_READ_ONLY,  
                                         relaxationStencilSize * sizeof(T));
    queue.enqueueWriteBuffer(relaxationStencilBuffer,  CL_TRUE, 0, 
                             relaxationStencilSize * sizeof(T), stencil);

  }catch (const cl::Error& error)
  {
    std::cout << "  -> Relaxaation class, Problem in buffer creation/writing "
                   "data to device " << std::endl;
    std::cout << "  -> " << getErrorString(error) << std::endl;
    exit(0);
  }
}



template<class T>
void relaxation<T>::createKernel(cl::Program& program)
{
  std::cout << "==> Relaxation class, Creating kernels";
  try
  {
    kernel  = cl::Kernel(program, "relaxation");
    std::cout << "\t-> Done!" << std::endl;
  }catch (const cl::Error& error)
  {
    std::cout << "  -> Relaxation class, Problem in kernel  " << std::endl;
    std::cout << "  -> " << getErrorString(error) << std::endl;
    exit(0);
  }

}


/*
 * x_n = b - Ax_{n-1}
 * inoutBuffer -> x
 * RHSBuffer   -> b
 * relaxationStencilBuffer -> A
 */
template<class T>
void relaxation<T>::relaxing(cl::CommandQueue& queue, 
                         cl::Buffer& inoutBuffer,cl::Buffer& intermediateBuffer, 
                         cl::Buffer& RHSBuffer,
                         const cl::NDRange& globalRange, const cl::NDRange& localRange,
                         const int numberOfRelaxationSweep,
                         const int& domainHeight, const int& domainWidth,
                         const int& domainDepth, cl::size_t<3> bufferOrigin,
                         cl::size_t<3> hostOrigin, cl::size_t<3> region, 
                         const int& deviceWidth, const int& spatialStepSize, 
                         cl::Event& event)
{
  // this step is done to have same boundary vales in intermedaite buffer as x
  cl::Event copyEvent;
  cl::Event iterationEvent;
  try
  {
    queue.enqueueCopyBufferRect(inoutBuffer, intermediateBuffer, bufferOrigin, 
                hostOrigin, region, deviceWidth * sizeof(T), 0, 
                deviceWidth * sizeof(T), 0, NULL, &copyEvent);
  } catch (const cl::Error& error)
  {
    std::cout << "  -> Problem in copying buffer x to y" << std::endl;
    std::cout << "  -> " << getErrorString(error) << std::endl;
    exit(0);
  }
  copyEvent.wait();

  int argCount = 0;
  try
  {
    kernel.setArg(argCount++, inoutBuffer);
    kernel.setArg(argCount++, domainHeight);
    kernel.setArg(argCount++, domainWidth);
    kernel.setArg(argCount++, domainDepth);
    kernel.setArg(argCount++, relaxationStencilBuffer);
    kernel.setArg(argCount++, stencilDataWidth);
    kernel.setArg(argCount++, RHSBuffer);
    kernel.setArg(argCount++, intermediateBuffer);
    kernel.setArg(argCount++, spatialStepSize);
    kernel.setArg(argCount++, localMemSize * sizeof(T), NULL);
    kernel.setArg(argCount++, localHeight);
    kernel.setArg(argCount++, localWidth);
    kernel.setArg(argCount++, localDepth);
  } catch (const cl::Error& error)
  {
    std::cout << "  -> Relaxation class, Problem in setting the argument of kernel" << 
                std::endl;
    std::cout << "  -> " << getErrorString(error) << std::endl;
    exit(0);
  }

  for (int i = 0; i < numberOfRelaxationSweep; ++i)
  {
    try
    {
      queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalRange, localRange, 
                                 NULL, &iterationEvent);
    }catch (const cl::Error& error)
    {
      std::cout << "  -> Relaxation class, Problem in enqueue kernel" << 
                   std::endl;
      std::cout << "  -> " << getErrorString(error) << std::endl;
      exit(0);
    }

    try
    {
      queue.finish();
    }catch (const cl::Error& error)
    {
      std::cout << "  -> Relaxation class, Problem in finishing kernel" << std::endl;
      std::cout << "  -> " << getErrorString(error) << std::endl;
      exit(0);
    }

    iterationEvent.wait();
    if (i == numberOfRelaxationSweep -1)
    {
       try
       {
          queue.enqueueCopyBufferRect(intermediateBuffer, inoutBuffer, bufferOrigin, 
                hostOrigin, region, deviceWidth * sizeof(T), 0, 
                deviceWidth * sizeof(T), 0, NULL, &event);   \\ event release here
       } catch (const cl::Error& error)
       {
           std::cout << "  -> Problem in copying buffer x to y" << std::endl;
           std::cout << "  -> " << getErrorString(error) << std::endl;
           exit(0);
       }
    } else
    {
       try
       {
          queue.enqueueCopyBufferRect(intermediateBuffer, inoutBuffer, bufferOrigin, 
                hostOrigin, region, deviceWidth * sizeof(T), 0, 
                deviceWidth * sizeof(T), 0, NULL, &copyEvent);
       } catch (const cl::Error& error)
       {
           std::cout << "  -> Problem in copying buffer x to y" << std::endl;
           std::cout << "  -> " << getErrorString(error) << std::endl;
           exit(0);
       }
       copyEvent.wait();
    }
  }
}

для запуска кода в main.cc Мне это нравится

cl::Event event;
relaxation<T> fmgRelaxation   = new relaxation<T>(program, queue, context);
fmgRelaxation->relaxing(queue, xBuffer, intermediateBuffer[0], bBuffer, 
                                globalRange[0], localRange, preSweepNumber, 
                                deviceHeight[0], deviceWidth[0], deviceDepth[0],
                                bufferOrigin, hostOrigin, region[0], deviceWidth[0], 
                                spatialStepSize[0], event);
        event.wait(); // if the code wait here to everything in relaxation class are done????

// Do sth else

Другие параметры, созданные заранее (я не добавил весь код в последней части, поскольку он слишком длинный). Я передал ссылку на событие в main.cc методу relaxing из relaxation class, и последний EnqueueCopyBuffer в методе relaxing выпустит его (как я делаю короткий комментарий). теперь мой вопрос - код в main.cc, если код заблокирован (т.е. в хосте), чтобы все в relaxation class было выполнено, а затем продолжите в main.cc. Если нет, то как мне это сделать.

...