Я создал класс с именем relaxation
. relaxation.hpp
выглядит следующим образом:
#ifndef RELAXATION_HPP
#define RELAXATION_HPP
#define __CL_ENABLE_EXCEPTIONS
#include <iostream>
#include <CL/cl.hpp>
#include <cmath>
#include "CL_ERROR.hpp"
#include "defines.hpp"
template<class T>
class relaxation
{
protected:
int stencilDataWidth = 3;
int relaxationStencilSize = pow(stencilDataWidth,3);
T* stencil = NULL;
T omega = 0.8;
cl::Buffer relaxationStencilBuffer;
cl::Kernel kernel;
public:
relaxation(cl::Program& _programInput, cl::CommandQueue& _queue,
const cl::Context& _context);
~relaxation();
T getOmega();
void createKernel(cl::Program& program);
void setStencil(const cl::Context& context, cl::CommandQueue& queue);
void relaxing(cl::CommandQueue& queue,
cl::Buffer& inoutBuffer, cl::Buffer& intermediateBuffer,
cl::Buffer& RHSBuffer,
const cl::NDRange& globalRange, const cl::NDRange& localRange,
const int numberOfRelaxationSweep,
const int& domainHeight, const int& domainWidth,
const int& domainDepth, cl::size_t<3> bufferOrigin,
cl::size_t<3> hostOrigin, cl::size_t<3> region,
const int& deviceWidth, const int& spatialStepSize, cl::Event& event);
};
//#include "../src/relaxation.cc"
#endif
, а исходный файл (relaxation.cc
) -
#include "../includes/relaxation.hpp"
template<class T>
relaxation<T>::relaxation(cl::Program& _program, cl::CommandQueue& _queue,
const cl::Context& _context)
{
createKernel(_program);
setStencil(_context, _queue);
}
template<class T>
relaxation<T>::~relaxation()
{
delete[] stencil;
}
template<class T>
inline T relaxation<T>::getOmega()
{
return omega;
}
template<class T>
void relaxation<T>::setStencil(const cl::Context& context, cl::CommandQueue& queue)
{
stencil = new T[relaxationStencilSize];
T coeff = omega;
stencil[0] = 0.0;
stencil[1] = 0.0;
stencil[2] = 0.0;
stencil[3] = 0.0;
stencil[4] = 1.0 / 6.0 * coeff;
stencil[5] = 0.0;
stencil[6] = 0.0;
stencil[7] = 0.0;
stencil[8] = 0.0;
stencil[9] = 0.0;
stencil[10] = 1.0 / 6.0 * coeff;
stencil[11] = 0.0;
stencil[12] = 1.0 / 6.0 * coeff;
stencil[13] = (1.0 - omega);
stencil[14] = 1.0 / 6.0 * coeff;
stencil[15] = 0.0;
stencil[16] = 1.0 / 6.0 * coeff;
stencil[17] = 0.0;
stencil[18] = 0.0;
stencil[19] = 0.0;
stencil[20] = 0.0;
stencil[21] = 0.0;
stencil[22] = 1.0 / 6.0 * coeff;
stencil[23] = 0.0;
stencil[24] = 0.0;
stencil[25] = 0.0;
stencil[26] = 0.0;
try
{
relaxationStencilBuffer = cl::Buffer(context, CL_MEM_READ_ONLY,
relaxationStencilSize * sizeof(T));
queue.enqueueWriteBuffer(relaxationStencilBuffer, CL_TRUE, 0,
relaxationStencilSize * sizeof(T), stencil);
}catch (const cl::Error& error)
{
std::cout << " -> Relaxaation class, Problem in buffer creation/writing "
"data to device " << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
}
template<class T>
void relaxation<T>::createKernel(cl::Program& program)
{
std::cout << "==> Relaxation class, Creating kernels";
try
{
kernel = cl::Kernel(program, "relaxation");
std::cout << "\t-> Done!" << std::endl;
}catch (const cl::Error& error)
{
std::cout << " -> Relaxation class, Problem in kernel " << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
}
/*
* x_n = b - Ax_{n-1}
* inoutBuffer -> x
* RHSBuffer -> b
* relaxationStencilBuffer -> A
*/
template<class T>
void relaxation<T>::relaxing(cl::CommandQueue& queue,
cl::Buffer& inoutBuffer,cl::Buffer& intermediateBuffer,
cl::Buffer& RHSBuffer,
const cl::NDRange& globalRange, const cl::NDRange& localRange,
const int numberOfRelaxationSweep,
const int& domainHeight, const int& domainWidth,
const int& domainDepth, cl::size_t<3> bufferOrigin,
cl::size_t<3> hostOrigin, cl::size_t<3> region,
const int& deviceWidth, const int& spatialStepSize,
cl::Event& event)
{
// this step is done to have same boundary vales in intermedaite buffer as x
cl::Event copyEvent;
cl::Event iterationEvent;
try
{
queue.enqueueCopyBufferRect(inoutBuffer, intermediateBuffer, bufferOrigin,
hostOrigin, region, deviceWidth * sizeof(T), 0,
deviceWidth * sizeof(T), 0, NULL, ©Event);
} catch (const cl::Error& error)
{
std::cout << " -> Problem in copying buffer x to y" << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
copyEvent.wait();
int argCount = 0;
try
{
kernel.setArg(argCount++, inoutBuffer);
kernel.setArg(argCount++, domainHeight);
kernel.setArg(argCount++, domainWidth);
kernel.setArg(argCount++, domainDepth);
kernel.setArg(argCount++, relaxationStencilBuffer);
kernel.setArg(argCount++, stencilDataWidth);
kernel.setArg(argCount++, RHSBuffer);
kernel.setArg(argCount++, intermediateBuffer);
kernel.setArg(argCount++, spatialStepSize);
kernel.setArg(argCount++, localMemSize * sizeof(T), NULL);
kernel.setArg(argCount++, localHeight);
kernel.setArg(argCount++, localWidth);
kernel.setArg(argCount++, localDepth);
} catch (const cl::Error& error)
{
std::cout << " -> Relaxation class, Problem in setting the argument of kernel" <<
std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
for (int i = 0; i < numberOfRelaxationSweep; ++i)
{
try
{
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalRange, localRange,
NULL, &iterationEvent);
}catch (const cl::Error& error)
{
std::cout << " -> Relaxation class, Problem in enqueue kernel" <<
std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
try
{
queue.finish();
}catch (const cl::Error& error)
{
std::cout << " -> Relaxation class, Problem in finishing kernel" << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
iterationEvent.wait();
if (i == numberOfRelaxationSweep -1)
{
try
{
queue.enqueueCopyBufferRect(intermediateBuffer, inoutBuffer, bufferOrigin,
hostOrigin, region, deviceWidth * sizeof(T), 0,
deviceWidth * sizeof(T), 0, NULL, &event); \\ event release here
} catch (const cl::Error& error)
{
std::cout << " -> Problem in copying buffer x to y" << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
} else
{
try
{
queue.enqueueCopyBufferRect(intermediateBuffer, inoutBuffer, bufferOrigin,
hostOrigin, region, deviceWidth * sizeof(T), 0,
deviceWidth * sizeof(T), 0, NULL, ©Event);
} catch (const cl::Error& error)
{
std::cout << " -> Problem in copying buffer x to y" << std::endl;
std::cout << " -> " << getErrorString(error) << std::endl;
exit(0);
}
copyEvent.wait();
}
}
}
для запуска кода в main.cc
Мне это нравится
cl::Event event;
relaxation<T> fmgRelaxation = new relaxation<T>(program, queue, context);
fmgRelaxation->relaxing(queue, xBuffer, intermediateBuffer[0], bBuffer,
globalRange[0], localRange, preSweepNumber,
deviceHeight[0], deviceWidth[0], deviceDepth[0],
bufferOrigin, hostOrigin, region[0], deviceWidth[0],
spatialStepSize[0], event);
event.wait(); // if the code wait here to everything in relaxation class are done????
// Do sth else
Другие параметры, созданные заранее (я не добавил весь код в последней части, поскольку он слишком длинный). Я передал ссылку на событие в main.cc
методу relaxing
из relaxation class
, и последний EnqueueCopyBuffer
в методе relaxing
выпустит его (как я делаю короткий комментарий). теперь мой вопрос - код в main.cc
, если код заблокирован (т.е. в хосте), чтобы все в relaxation class
было выполнено, а затем продолжите в main.cc
. Если нет, то как мне это сделать.