Как мне исправить CL_INVALID_MEM_OBJECT, когда я передаю буфер в ядро ​​OpenCL и буфер из clCreateFromGLTexture? - PullRequest
1 голос
/ 17 марта 2020

Мне нужно сделать некоторые математические вычисления для пикселей текстуры GL. У меня есть рабочее ядро ​​OpenCL. Я могу преобразовать текстуру GL в буфер OpenCL (по крайней мере, это не дает ошибки). Но когда я пытаюсь установить этот буфер в качестве аргумента для ядра, я получаю ошибку -38 (CL_INVALID_MEM_OBJECT).

Изначально я попробовал его с помощью ржавчины, но когда это не помогло, я переключился на C, чтобы посмотреть, существует ли проблема независимо от оболочек ржавчины.

Это мое приложение C это воздуховод из нескольких примеров (c++ -g -o check2 check2.cxx -lOpenCL -lGL -lX11):

/*
  https://www.khronos.org/opengl/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX)
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <GL/gl.h>
#include <GL/glx.h>

#include <CL/cl.h>
#include <CL/cl_gl.h>

#define GLX_CONTEXT_MAJOR_VERSION_ARB       0x2091
#define GLX_CONTEXT_MINOR_VERSION_ARB       0x2092
typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*);

// Helper to check for extension string presence.  Adapted from:
//   http://www.opengl.org/resources/features/OGLextensions/
static bool isExtensionSupported(const char *extList, const char *extension)
{
    const char *start;
    const char *where, *terminator;

    /* Extension names should not have spaces. */
    where = strchr(extension, ' ');
    if (where || *extension == '\0')
    return false;

    /* It takes a bit of care to be fool-proof about parsing the
       OpenGL extensions string. Don't be fooled by sub-strings,
       etc. */
    for (start=extList;;) {
    where = strstr(start, extension);

    if (!where)
        break;

    terminator = where + strlen(extension);

    if ( where == start || *(where - 1) == ' ' )
        if ( *terminator == ' ' || *terminator == '\0' )
        return true;

    start = terminator;
    }

    return false;
}

static bool ctxErrorOccurred = false;
static int ctxErrorHandler( Display *dpy, XErrorEvent *ev )
{
    ctxErrorOccurred = true;
    return 0;
}



void test_kernel(cl_mem a_mem_obj, cl_mem b_mem_obj, cl_mem c_mem_obj, cl_kernel kernel, int LIST_SIZE, cl_command_queue command_queue)
{
    int err;

    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    printf("err? %d\tset kernel arg 0\n", err);
    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    printf("err? %d\n", err);
    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
    printf("err? %d\n", err);

    // Execute the OpenCL kernel on the list
    size_t global_item_size = LIST_SIZE; // Process the entire lists
    size_t local_item_size = 64; // Divide work items into groups of 64
    err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
    printf("err? %d\t(kernel diff)\n", err);

    // Read the memory buffer C on the device to the local variable C
    long long *C;
    C= (long long*)malloc(sizeof(*C)*LIST_SIZE);
    err = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(*C), C, 0, NULL, NULL);
    printf("err? %d(copy result)\n", err);

    for (int i=0; i<LIST_SIZE && i<10; i++) {
    printf("%lld\n", C[i]);
    }

    free(C);
}


int main(int argc, char* argv[])
{
    Display *display = XOpenDisplay(NULL);

    if (!display)
    {
        printf("Failed to open X display\n");
        exit(1);
    }

    // Get a matching FB config
    static int visual_attribs[] =
    {
     GLX_X_RENDERABLE    , True,
     GLX_DRAWABLE_TYPE   , GLX_WINDOW_BIT,
     GLX_RENDER_TYPE     , GLX_RGBA_BIT,
     GLX_X_VISUAL_TYPE   , GLX_TRUE_COLOR,
     GLX_RED_SIZE        , 8,
     GLX_GREEN_SIZE      , 8,
     GLX_BLUE_SIZE       , 8,
     GLX_ALPHA_SIZE      , 8,
     GLX_DEPTH_SIZE      , 24,
     GLX_STENCIL_SIZE    , 8,
     GLX_DOUBLEBUFFER    , True,
     //GLX_SAMPLE_BUFFERS  , 1,
     //GLX_SAMPLES         , 4,
     None
    };

    int glx_major, glx_minor;

    // FBConfigs were added in GLX version 1.3.
    if ( !glXQueryVersion( display, &glx_major, &glx_minor ) || 
     ( ( glx_major == 1 ) && ( glx_minor < 3 ) ) || ( glx_major < 1 ) )
    {
        printf("Invalid GLX version");
        exit(1);
    }

    printf( "Getting matching framebuffer configs\n" );
    int fbcount;
    GLXFBConfig* fbc = glXChooseFBConfig(display, DefaultScreen(display), visual_attribs, &fbcount);
    if (!fbc)
    {
        printf( "Failed to retrieve a framebuffer config\n" );
        exit(1);
    }
    printf( "Found %d matching FB configs.\n", fbcount );

    // Pick the FB config/visual with the most samples per pixel
    printf( "Getting XVisualInfos\n" );
    int best_fbc = -1, worst_fbc = -1, best_num_samp = -1, worst_num_samp = 999;

    int i;
    for (i=0; i<fbcount; ++i)
    {
        XVisualInfo *vi = glXGetVisualFromFBConfig( display, fbc[i] );
        if ( vi )
        {
            int samp_buf, samples;
            glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLE_BUFFERS, &samp_buf );
            glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLES       , &samples  );

            printf( "  Matching fbconfig %d, visual ID 0x%2lx: SAMPLE_BUFFERS = %d,"
                " SAMPLES = %d\n", 
                i, vi -> visualid, samp_buf, samples );

            if ( best_fbc < 0 || samp_buf && samples > best_num_samp )
            best_fbc = i, best_num_samp = samples;
            if ( worst_fbc < 0 || !samp_buf || samples < worst_num_samp )
            worst_fbc = i, worst_num_samp = samples;
        }
        XFree( vi );
    }

    GLXFBConfig bestFbc = fbc[ best_fbc ];

    // Be sure to free the FBConfig list allocated by glXChooseFBConfig()
    XFree( fbc );
#if 0
    // Get a visual
    XVisualInfo *vi = glXGetVisualFromFBConfig( display, bestFbc );
    printf( "Chosen visual ID = 0x%lx\n", vi->visualid );

    printf( "Creating colormap\n" );
    XSetWindowAttributes swa;
    Colormap cmap;
    swa.colormap = cmap = XCreateColormap( display,
                       RootWindow( display, vi->screen ), 
                       vi->visual, AllocNone );
    swa.background_pixmap = None ;
    swa.border_pixel      = 0;
    swa.event_mask        = StructureNotifyMask;

    printf( "Creating window\n" );
    Window win = XCreateWindow( display, RootWindow( display, vi->screen ), 
                0, 0, 100, 100, 0, vi->depth, InputOutput, 
                vi->visual, 
                CWBorderPixel|CWColormap|CWEventMask, &swa );
    if ( !win )
    {
        printf( "Failed to create window.\n" );
        exit(1);
    }

    // Done with the visual info data
    XFree( vi );

    XStoreName( display, win, "GL 3.0 Window" );

    printf( "Mapping window\n" );
    XMapWindow( display, win );
#endif

    // Get the default screen's GLX extension list
    const char *glxExts = glXQueryExtensionsString( display,
                            DefaultScreen( display ) );

    // NOTE: It is not necessary to create or make current to a context before
    // calling glXGetProcAddressARB
    glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0;
    glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)
    glXGetProcAddressARB( (const GLubyte *) "glXCreateContextAttribsARB" );

    GLXContext ctx = 0;

    // Install an X error handler so the application won't exit if GL 3.0
    // context allocation fails.
    //
    // Note this error handler is global.  All display connections in all threads
    // of a process use the same error handler, so be sure to guard against other
    // threads issuing X commands while this code is running.
    ctxErrorOccurred = false;
    int (*oldHandler)(Display*, XErrorEvent*) =
    XSetErrorHandler(&ctxErrorHandler);

    // Check for the GLX_ARB_create_context extension string and the function.
    // If either is not present, use GLX 1.3 context creation method.
    if ( !isExtensionSupported( glxExts, "GLX_ARB_create_context" ) ||
     !glXCreateContextAttribsARB )
    {
        printf( "glXCreateContextAttribsARB() not found"
            " ... using old-style GLX context\n" );
        ctx = glXCreateNewContext( display, bestFbc, GLX_RGBA_TYPE, 0, True );
    }

    // If it does, try to get a GL 3.0 context!
    else
    {
        int context_attribs[] =
        {
         GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
         GLX_CONTEXT_MINOR_VERSION_ARB, 0,
         //GLX_CONTEXT_FLAGS_ARB        , GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB,
         None
        };

        printf( "Creating context\n" );
        ctx = glXCreateContextAttribsARB( display, bestFbc, 0,
                          True, context_attribs );

        // Sync to ensure any errors generated are processed.
        XSync( display, False );
        if ( !ctxErrorOccurred && ctx )
        printf( "Created GL 3.0 context\n" );
        else
        {
            // Couldn't create GL 3.0 context.  Fall back to old-style 2.x context.
            // When a context version below 3.0 is requested, implementations will
            // return the newest context version compatible with OpenGL versions less
            // than version 3.0.
            // GLX_CONTEXT_MAJOR_VERSION_ARB = 1
            context_attribs[1] = 1;
            // GLX_CONTEXT_MINOR_VERSION_ARB = 0
            context_attribs[3] = 0;

            ctxErrorOccurred = false;

            printf( "Failed to create GL 3.0 context"
                " ... using old-style GLX context\n" );
            ctx = glXCreateContextAttribsARB( display, bestFbc, 0, 
                              True, context_attribs );
        }
    }

    // Sync to ensure any errors generated are processed.
    XSync( display, False );

    // Restore the original error handler
    XSetErrorHandler( oldHandler );

    if ( ctxErrorOccurred || !ctx )
    {
        printf( "Failed to create an OpenGL context\n" );
        exit(1);
    }

    // Verifying that context is a direct context
    if ( ! glXIsDirect ( display, ctx ) )
    {
        printf( "Indirect GLX rendering context obtained\n" );
    }
    else
    {
        printf( "Direct GLX rendering context obtained\n" );
    }


    printf( "Making context current\n" );
    glXMakeCurrent( display, 0, ctx );

#define IMAGE_DIAM 512
#define LIST_SIZE (IMAGE_DIAM*IMAGE_DIAM*3)
    GLuint texture_id;
    {
    glGenTextures(1, &texture_id);
    printf("err? %d\n", glGetError());

    glBindTexture(GL_TEXTURE_2D, texture_id);
    printf("err? %d\t(bind texture)\n", glGetError());

    unsigned char random_stack_crap[LIST_SIZE];
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, IMAGE_DIAM, IMAGE_DIAM, 0, GL_RGB, GL_UNSIGNED_BYTE, random_stack_crap);
    printf("err? %d\n", glGetError());
    }

    //
    //
    //



    cl_platform_id platform;
    clGetPlatformIDs(1, &platform, NULL);

    cl_device_id device;
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    printf("device\t%p\n", device);

    printf ("context\t%p\t%p\n", ctx, glXGetCurrentContext());
    printf ("display\t%p\t%p\n", display, glXGetCurrentDisplay());

    cl_context_properties props[] =
    {
     CL_GL_CONTEXT_KHR,
     (cl_context_properties) glXGetCurrentContext(),
     CL_GLX_DISPLAY_KHR,
     (cl_context_properties) glXGetCurrentDisplay(),
     CL_CONTEXT_PLATFORM,
     (cl_context_properties) platform,
     0
    };
    int err=0;
    cl_context cl = clCreateContext(props, 1, &device, NULL, NULL, &err);
    printf("err? %d\n", err);
    printf("cl context %p\n", cl);


    //
    // https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/
    //


    cl_command_queue command_queue = clCreateCommandQueue(cl, device, 0, &err);
    printf("err? %d\n", err);
    printf("cl queue %p\n", command_queue);

    // Create memory buffers on the device for each vector 
    cl_mem z_mem_obj = clCreateFromGLTexture( cl,
                          CL_MEM_READ_WRITE,
                          GL_TEXTURE_2D,
                          0,
                          texture_id,
                          &err);

    cl_mem a_mem_obj = clCreateBuffer(cl, CL_MEM_READ_ONLY,
                      LIST_SIZE , NULL, &err);
    printf("err? %d\n", err);
    cl_mem b_mem_obj = clCreateBuffer(cl, CL_MEM_READ_ONLY,
                      LIST_SIZE , NULL, &err);
    printf("err? %d\n", err);
    cl_mem c_mem_obj = clCreateBuffer(cl, CL_MEM_WRITE_ONLY, 
                      LIST_SIZE*8 , NULL, &err);
    printf("err? %d\n", err);

    // Copy the lists A and B to their respective memory buffers

    {
    unsigned char *A = (unsigned char*) malloc(LIST_SIZE);
    for (int i=0; i<LIST_SIZE; i++) {
        A[i] = i+2;
    }
    unsigned char *B = (unsigned char*) malloc(LIST_SIZE);
    for (int i=0; i<LIST_SIZE; i++) {
        B[i] = 2*i;
    }

    err = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, 
                   LIST_SIZE, A, 0, NULL, NULL);
    printf("err? %d\tcopy A\n", err);

    err = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 
                   LIST_SIZE, B, 0, NULL, NULL);
    printf("err? %d\tcopy B\n", err);
    }

    const char* source_str = "__kernel void diff(__global uchar* rgb_a, __global uchar* rgb_b, __global ulong * diff_out)\n\
{\n\
    int idx = get_global_id(0);\n\
    diff_out[idx] = abs((int)rgb_a[idx] - (int)rgb_b[idx]);\n\
}";
    size_t source_size = strlen(source_str);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(cl, 1, 
                           (const char **)&source_str, (const size_t *)&source_size, &err);
    printf("err? %d\t(create program)\n", err);

    // Build the program
    err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    printf("err? %d\n", err);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "diff", &err);
    printf("err? %d\n", err);


    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    printf("err? %d\n", err);
    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    printf("err? %d\n", err);
    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
    printf("err? %d\n", err);

    // Execute the OpenCL kernel on the list
    size_t global_item_size = LIST_SIZE; // Process the entire lists
    size_t local_item_size = 64; // Divide work items into groups of 64
    err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
    printf("err? %d\t(kernel diff)\n", err);

    // Read the memory buffer C on the device to the local variable C
    long long *C;
    C= (long long*)malloc(sizeof(*C)*LIST_SIZE);
    err = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(*C), C, 0, NULL, NULL);
    printf("err? %d(copy resul)\n", err);

    for (int i=0; i<LIST_SIZE && i<10; i++) {
    printf("%lld\n", C[i]);
    }

    free(C);

    test_kernel(a_mem_obj, b_mem_obj, c_mem_obj, kernel, LIST_SIZE, command_queue);
    test_kernel(z_mem_obj, b_mem_obj, c_mem_obj, kernel, LIST_SIZE, command_queue);

    return 0;
}

Вывод на мой linux / gentoo box с mesa-progs-8.4.0:

Getting matching framebuffer configs
Found 10 matching FB configs.
Getting XVisualInfos
  Matching fbconfig 0, visual ID 0x24: SAMPLE_BUFFERS = 0, SAMPLES = 0
  Matching fbconfig 1, visual ID 0x7a: SAMPLE_BUFFERS = 0, SAMPLES = 0
  Matching fbconfig 2, visual ID 0x38: SAMPLE_BUFFERS = 1, SAMPLES = 2
  Matching fbconfig 3, visual ID 0x8e: SAMPLE_BUFFERS = 1, SAMPLES = 2
  Matching fbconfig 4, visual ID 0x3a: SAMPLE_BUFFERS = 1, SAMPLES = 4
  Matching fbconfig 5, visual ID 0x90: SAMPLE_BUFFERS = 1, SAMPLES = 4
  Matching fbconfig 6, visual ID 0x44: SAMPLE_BUFFERS = 1, SAMPLES = 8
  Matching fbconfig 7, visual ID 0x9a: SAMPLE_BUFFERS = 1, SAMPLES = 8
  Matching fbconfig 8, visual ID 0x4c: SAMPLE_BUFFERS = 1, SAMPLES = 16
  Matching fbconfig 9, visual ID 0xa2: SAMPLE_BUFFERS = 1, SAMPLES = 16
Creating context
Created GL 3.0 context
Direct GLX rendering context obtained
Making context current
err? 0
err? 0  (bind texture)
err? 0
device  0x557c1755cef0
context 0x557c173e0b68  0x557c173e0b68
display 0x557c172ee950  0x557c172ee950
err? 0
cl context 0x557c17559340
err? 0
cl queue 0x557c177dd080
err? 0
err? 0
err? 0
err? 0  copy A
err? 0  copy B
err? 0  (create program)
err? 0
err? 0
err? 0
err? 0
err? 0
err? 0  (kernel diff)
err? 0(copy resul)
2
1
0
1
2
3
4
5
6
7
err? 0  set kernel arg 0
err? 0
err? 0
err? 0  (kernel diff)
err? 0(copy result)
2
1
0
1
2
3
4
5
6
7
err? -38        set kernel arg 0
err? 0
err? 0
err? 0  (kernel diff)
err? 0(copy result)
2
1
0
1
2
3
4
5
6
7

Обратите внимание на err? -38 set kernel arg 0, который соответствует последнему тесту, в котором я передаю z_mem_obj для использования буфера, преобразованного из текстуры GL.

clinfo | grep khr перечисляет cl_khr_gl_sharing в расширениях платформы и расширениях устройств.

...