Я пытаюсь применить фильтр в OpenCL. Однако кажется, что есть некоторые аспекты копирования изображений на устройство и с него и выполнения вычислений ядра изображения, которые я не понимаю. В результате я получаю C Segmentation fault: 11 error
, который не могу полностью отследить.
Ядро и convolve_image
полностью объясняют ошибку, которую я получаю, но я включаю весь код, который у меня есть, на случай, если кто-то захочет воспроизвести ошибку.
Ниже мое ядро
__kernel void Convolve(__read_only image2d_t imgSrc,
__constant float * kernelValues,
int kernelSize,
__write_only image2d_t imgConvolved)
{
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates
CLK_ADDRESS_CLAMP | //Clamp to zeros
CLK_FILTER_NEAREST; //Don't interpolate
int column = get_global_id(0);
int row = get_global_id(1);
int halfWidth = (int)(kernelSize/2);
float4 sum = {0.0f,0.0f,0.0f,0.0f};
int kernelIndex = 0;
int2 coords;
for(int i = -halfWidth; i<= halfWidth; i++)
{
printf("here");
coords.y = row + i;
for(int j = -halfWidth; j <= halfWidth; j++)
{
coords.x =column+j;
float4 pixel;
pixel = read_imagef(imgSrc,smp,coords);
sum.x += pixel * kernelValues[kernelIndex++];
}
}
coords.x = column;
coords.y= row;
write_imagef(imgConvolved,coords,sum);
}
Вот так я загружаю изображения - convolve_image
:
def convolve_image(imgIn,convolution_kernel,kernel_size):
"apply morphological operation to image using GPU"
# (1) setup OpenCL
platforms = cl.get_platforms() # a platform corresponds to a driver (e.g. AMD)
platform = platforms[0] # take first platform
devices = platform.get_devices(cl.device_type.GPU) # get GPU devices of selected platform
device = devices[0] # take first GPU
context = cl.Context([device]) # put selected GPU into context object
queue = cl.CommandQueue(context, device) # create command queue for selected GPU and context
# (2) get shape of input image, allocate memory for output to which result can be copied to
shape = imgIn.shape
imgOut = numpy.empty_like(imgIn)
print(shape)
h_kernel = convolution_kernel
# Send the data to the guest memory.
d_kernel = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_kernel)
#print(h_image_output)
# (2) create image buffers which hold images for OpenCL
imgInBuf = cl.Image(context, cl.mem_flags.READ_ONLY, cl.ImageFormat(cl.channel_order.RGB, cl.channel_type.FLOAT), shape=shape) # holds a gray-valued image of given shape
imgOutBuf = cl.Image(context, cl.mem_flags.WRITE_ONLY, cl.ImageFormat(cl.channel_order.RGB, cl.channel_type.FLOAT), shape=shape) # placeholder for gray-valued image of given shape
kernel_width = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=numpy.int32(kernel_size))
# (3) load and compile OpenCL program
program = cl.Program(context, open('kernel.cl').read()).build()
# (3) from OpenCL program, get kernel object and set arguments (input image, operation type, output image)
kernel = cl.Kernel(program, 'Convolve') # name of function according to kernel.py
kernel.set_arg(0, imgInBuf) # input image buffer
kernel.set_arg(1, d_kernel)
kernel.set_arg(2, kernel_width)
kernel.set_arg(3, imgOutBuf) # output image buffer
# (4) copy image to device, execute kernel, copy data back
cl.enqueue_copy(queue, imgInBuf, imgIn, origin=(0, 0), region=shape, is_blocking=False) # copy image from CPU to GPU
print("here")
cl.enqueue_nd_range_kernel(queue, kernel, shape, None) # execute kernel, work is distributed across shape[0]*shape[1] work-items (one work-item per pixel of the image)
cl.enqueue_copy(queue, imgOut, imgOutBuf, origin=(0, 0), region=shape, is_blocking=True) # wait until finished copying resulting image back from GPU to CPU
return imgOut
Это вспомогательные функции:
def normalize_kernel(kernel, dim):
"""Normalizes a kernel
Args:
kernel: a two-d kernel
"""
for x in range(0, dim):
for y in range(dim):
kernel[x][y] = kernel[x][y] / numpy.sum(kernel)
return kernel
def gaussian_kernel(dim, sigma):
"""
The Guassian blur function is as follows:
x² + y²
G(x,y) = 1 - -------
--------- * e 2σ²
2πσ²
Finally the kernel is normalized to avoid too dark or too light areas.
"""
rows = dim
cols = dim
arr = numpy.empty([rows, cols]).astype(numpy.float32)
center = dim / 2
total = 0.0
for x in range(0, rows):
for y in range(0, cols):
x_ = x - center
y_ = y - center
arr[x][y] = (1 / (2.0 * math.pi * math.pow(sigma, 2))) * math.pow(math.e, -1.0 * (
(math.pow(x_, 2) + math.pow(y_, 2)) / (2.0 * math.pow(sigma, 2))))
total = total + arr[x][y]
return normalize_kernel(arr, dim)
И, наконец, это главное:
def main( ):
path =sys.argv[1]
print(path)
img = cv2.imread(path)
print(type(img))
# Kernel parameters
kernel_dim = 5
kernel_sig = 1
convolution_kernel = gaussian_kernel(kernel_dim, kernel_sig) # gaussian_kernel(kernel_dim, kernel_sig)
print ("The kernel:\n", convolution_kernel)
# dilate
convolved = convolve_image(img,convolution_kernel,kernel_dim)
cv2.imwrite('convolved.png', convolved)
if __name__ == '__main__':
main()