Ошибка сегментации: 11 с изображениями в Pyopencl - PullRequest
0 голосов
/ 05 июня 2019

Я пытаюсь применить фильтр в OpenCL. Однако кажется, что есть некоторые аспекты копирования изображений на устройство и с него и выполнения вычислений ядра изображения, которые я не понимаю. В результате я получаю C Segmentation fault: 11 error, который не могу полностью отследить.

Ядро и convolve_image полностью объясняют ошибку, которую я получаю, но я включаю весь код, который у меня есть, на случай, если кто-то захочет воспроизвести ошибку.

Ниже мое ядро ​​

 __kernel void Convolve(__read_only image2d_t imgSrc,
                                __constant float * kernelValues,
                                int kernelSize,
                                __write_only image2d_t imgConvolved)
    {
        const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates
                              CLK_ADDRESS_CLAMP | //Clamp to zeros
                              CLK_FILTER_NEAREST; //Don't interpolate
     int column = get_global_id(0);
     int row = get_global_id(1);

     int halfWidth = (int)(kernelSize/2);

     float4 sum = {0.0f,0.0f,0.0f,0.0f};
     int kernelIndex = 0;
     int2 coords;

     for(int i = -halfWidth; i<= halfWidth; i++)
     {
      printf("here");

        coords.y = row + i;
        for(int j = -halfWidth; j <= halfWidth; j++)
        {
            coords.x =column+j;
            float4 pixel;
            pixel = read_imagef(imgSrc,smp,coords);

            sum.x += pixel * kernelValues[kernelIndex++];
        }
     }
     coords.x = column;
     coords.y= row;
     write_imagef(imgConvolved,coords,sum);
    }

Вот так я загружаю изображения - convolve_image:

def convolve_image(imgIn,convolution_kernel,kernel_size):
    "apply morphological operation to image using GPU"
    # (1) setup OpenCL
    platforms = cl.get_platforms() # a platform corresponds to a driver (e.g. AMD)
    platform = platforms[0] # take first platform
    devices = platform.get_devices(cl.device_type.GPU) # get GPU devices of selected platform
    device = devices[0] # take first GPU
    context = cl.Context([device]) # put selected GPU into context object
    queue = cl.CommandQueue(context, device) # create command queue for selected GPU and context

    # (2) get shape of input image, allocate memory for output to which result can be copied to
    shape = imgIn.shape
    imgOut = numpy.empty_like(imgIn)   
    print(shape)

    h_kernel = convolution_kernel


    # Send the data to the guest memory.
    d_kernel = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_kernel)
    #print(h_image_output)
    # (2) create image buffers which hold images for OpenCL
    imgInBuf = cl.Image(context, cl.mem_flags.READ_ONLY, cl.ImageFormat(cl.channel_order.RGB, cl.channel_type.FLOAT), shape=shape) # holds a gray-valued image of given shape
    imgOutBuf = cl.Image(context, cl.mem_flags.WRITE_ONLY, cl.ImageFormat(cl.channel_order.RGB, cl.channel_type.FLOAT), shape=shape) # placeholder for gray-valued image of given shape
    kernel_width = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=numpy.int32(kernel_size))

    # (3) load and compile OpenCL program
    program = cl.Program(context, open('kernel.cl').read()).build()

    # (3) from OpenCL program, get kernel object and set arguments (input image, operation type, output image)
    kernel = cl.Kernel(program, 'Convolve') # name of function according to kernel.py
    kernel.set_arg(0, imgInBuf) # input image buffer
    kernel.set_arg(1, d_kernel)
    kernel.set_arg(2, kernel_width) 
    kernel.set_arg(3, imgOutBuf) # output image buffer

    # (4) copy image to device, execute kernel, copy data back
    cl.enqueue_copy(queue, imgInBuf, imgIn, origin=(0, 0), region=shape, is_blocking=False) # copy image from CPU to GPU
    print("here")
    cl.enqueue_nd_range_kernel(queue, kernel, shape, None) # execute kernel, work is distributed across shape[0]*shape[1] work-items (one work-item per pixel of the image)
    cl.enqueue_copy(queue, imgOut, imgOutBuf, origin=(0, 0), region=shape, is_blocking=True) # wait until finished copying resulting image back from GPU to CPU

    return imgOut

Это вспомогательные функции:

def normalize_kernel(kernel, dim):
    """Normalizes a kernel
    Args:
        kernel: a two-d kernel
    """
    for x in range(0, dim):
        for y in range(dim):
            kernel[x][y] = kernel[x][y] / numpy.sum(kernel)
    return kernel


def gaussian_kernel(dim, sigma):
    """
    The Guassian blur function is as follows:

                           x² + y²
    G(x,y) =    1        - -------
            --------- * e    2σ²
              2πσ²
    Finally the kernel is normalized to avoid too dark or too light areas.
    """
    rows = dim
    cols = dim
    arr = numpy.empty([rows, cols]).astype(numpy.float32)
    center = dim / 2
    total = 0.0
    for x in range(0, rows):
        for y in range(0, cols):
            x_ = x - center
            y_ = y - center
            arr[x][y] = (1 / (2.0 * math.pi * math.pow(sigma, 2))) * math.pow(math.e, -1.0 * (
                (math.pow(x_, 2) + math.pow(y_, 2)) / (2.0 * math.pow(sigma, 2))))
            total = total + arr[x][y]

    return normalize_kernel(arr, dim)

И, наконец, это главное:

def main( ):
    path =sys.argv[1]
    print(path)
    img = cv2.imread(path)
    print(type(img))


    # Kernel parameters
    kernel_dim = 5
    kernel_sig = 1
    convolution_kernel = gaussian_kernel(kernel_dim, kernel_sig)  # gaussian_kernel(kernel_dim, kernel_sig)

    print ("The kernel:\n", convolution_kernel)


    # dilate
    convolved = convolve_image(img,convolution_kernel,kernel_dim)
    cv2.imwrite('convolved.png', convolved)



if __name__ == '__main__':
    main()
...