Мне нужно сделать некоторые математические вычисления для пикселей текстуры GL. У меня есть рабочее ядро OpenCL. Я могу преобразовать текстуру GL в буфер OpenCL (по крайней мере, это не дает ошибки). Но когда я пытаюсь установить этот буфер в качестве аргумента для ядра, я получаю ошибку -38 (CL_INVALID_MEM_OBJECT).
Изначально я попробовал его с помощью ржавчины, но когда это не помогло, я переключился на C, чтобы посмотреть, существует ли проблема независимо от оболочек ржавчины.
Это мое приложение C это воздуховод из нескольких примеров (c++ -g -o check2 check2.cxx -lOpenCL -lGL -lX11
):
/*
https://www.khronos.org/opengl/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <GL/gl.h>
#include <GL/glx.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>
#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091
#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092
typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*);
// Helper to check for extension string presence. Adapted from:
// http://www.opengl.org/resources/features/OGLextensions/
static bool isExtensionSupported(const char *extList, const char *extension)
{
const char *start;
const char *where, *terminator;
/* Extension names should not have spaces. */
where = strchr(extension, ' ');
if (where || *extension == '\0')
return false;
/* It takes a bit of care to be fool-proof about parsing the
OpenGL extensions string. Don't be fooled by sub-strings,
etc. */
for (start=extList;;) {
where = strstr(start, extension);
if (!where)
break;
terminator = where + strlen(extension);
if ( where == start || *(where - 1) == ' ' )
if ( *terminator == ' ' || *terminator == '\0' )
return true;
start = terminator;
}
return false;
}
static bool ctxErrorOccurred = false;
static int ctxErrorHandler( Display *dpy, XErrorEvent *ev )
{
ctxErrorOccurred = true;
return 0;
}
void test_kernel(cl_mem a_mem_obj, cl_mem b_mem_obj, cl_mem c_mem_obj, cl_kernel kernel, int LIST_SIZE, cl_command_queue command_queue)
{
int err;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
printf("err? %d\tset kernel arg 0\n", err);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
printf("err? %d\n", err);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
printf("err? %d\n", err);
// Execute the OpenCL kernel on the list
size_t global_item_size = LIST_SIZE; // Process the entire lists
size_t local_item_size = 64; // Divide work items into groups of 64
err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
printf("err? %d\t(kernel diff)\n", err);
// Read the memory buffer C on the device to the local variable C
long long *C;
C= (long long*)malloc(sizeof(*C)*LIST_SIZE);
err = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(*C), C, 0, NULL, NULL);
printf("err? %d(copy result)\n", err);
for (int i=0; i<LIST_SIZE && i<10; i++) {
printf("%lld\n", C[i]);
}
free(C);
}
int main(int argc, char* argv[])
{
Display *display = XOpenDisplay(NULL);
if (!display)
{
printf("Failed to open X display\n");
exit(1);
}
// Get a matching FB config
static int visual_attribs[] =
{
GLX_X_RENDERABLE , True,
GLX_DRAWABLE_TYPE , GLX_WINDOW_BIT,
GLX_RENDER_TYPE , GLX_RGBA_BIT,
GLX_X_VISUAL_TYPE , GLX_TRUE_COLOR,
GLX_RED_SIZE , 8,
GLX_GREEN_SIZE , 8,
GLX_BLUE_SIZE , 8,
GLX_ALPHA_SIZE , 8,
GLX_DEPTH_SIZE , 24,
GLX_STENCIL_SIZE , 8,
GLX_DOUBLEBUFFER , True,
//GLX_SAMPLE_BUFFERS , 1,
//GLX_SAMPLES , 4,
None
};
int glx_major, glx_minor;
// FBConfigs were added in GLX version 1.3.
if ( !glXQueryVersion( display, &glx_major, &glx_minor ) ||
( ( glx_major == 1 ) && ( glx_minor < 3 ) ) || ( glx_major < 1 ) )
{
printf("Invalid GLX version");
exit(1);
}
printf( "Getting matching framebuffer configs\n" );
int fbcount;
GLXFBConfig* fbc = glXChooseFBConfig(display, DefaultScreen(display), visual_attribs, &fbcount);
if (!fbc)
{
printf( "Failed to retrieve a framebuffer config\n" );
exit(1);
}
printf( "Found %d matching FB configs.\n", fbcount );
// Pick the FB config/visual with the most samples per pixel
printf( "Getting XVisualInfos\n" );
int best_fbc = -1, worst_fbc = -1, best_num_samp = -1, worst_num_samp = 999;
int i;
for (i=0; i<fbcount; ++i)
{
XVisualInfo *vi = glXGetVisualFromFBConfig( display, fbc[i] );
if ( vi )
{
int samp_buf, samples;
glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLE_BUFFERS, &samp_buf );
glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLES , &samples );
printf( " Matching fbconfig %d, visual ID 0x%2lx: SAMPLE_BUFFERS = %d,"
" SAMPLES = %d\n",
i, vi -> visualid, samp_buf, samples );
if ( best_fbc < 0 || samp_buf && samples > best_num_samp )
best_fbc = i, best_num_samp = samples;
if ( worst_fbc < 0 || !samp_buf || samples < worst_num_samp )
worst_fbc = i, worst_num_samp = samples;
}
XFree( vi );
}
GLXFBConfig bestFbc = fbc[ best_fbc ];
// Be sure to free the FBConfig list allocated by glXChooseFBConfig()
XFree( fbc );
#if 0
// Get a visual
XVisualInfo *vi = glXGetVisualFromFBConfig( display, bestFbc );
printf( "Chosen visual ID = 0x%lx\n", vi->visualid );
printf( "Creating colormap\n" );
XSetWindowAttributes swa;
Colormap cmap;
swa.colormap = cmap = XCreateColormap( display,
RootWindow( display, vi->screen ),
vi->visual, AllocNone );
swa.background_pixmap = None ;
swa.border_pixel = 0;
swa.event_mask = StructureNotifyMask;
printf( "Creating window\n" );
Window win = XCreateWindow( display, RootWindow( display, vi->screen ),
0, 0, 100, 100, 0, vi->depth, InputOutput,
vi->visual,
CWBorderPixel|CWColormap|CWEventMask, &swa );
if ( !win )
{
printf( "Failed to create window.\n" );
exit(1);
}
// Done with the visual info data
XFree( vi );
XStoreName( display, win, "GL 3.0 Window" );
printf( "Mapping window\n" );
XMapWindow( display, win );
#endif
// Get the default screen's GLX extension list
const char *glxExts = glXQueryExtensionsString( display,
DefaultScreen( display ) );
// NOTE: It is not necessary to create or make current to a context before
// calling glXGetProcAddressARB
glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0;
glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)
glXGetProcAddressARB( (const GLubyte *) "glXCreateContextAttribsARB" );
GLXContext ctx = 0;
// Install an X error handler so the application won't exit if GL 3.0
// context allocation fails.
//
// Note this error handler is global. All display connections in all threads
// of a process use the same error handler, so be sure to guard against other
// threads issuing X commands while this code is running.
ctxErrorOccurred = false;
int (*oldHandler)(Display*, XErrorEvent*) =
XSetErrorHandler(&ctxErrorHandler);
// Check for the GLX_ARB_create_context extension string and the function.
// If either is not present, use GLX 1.3 context creation method.
if ( !isExtensionSupported( glxExts, "GLX_ARB_create_context" ) ||
!glXCreateContextAttribsARB )
{
printf( "glXCreateContextAttribsARB() not found"
" ... using old-style GLX context\n" );
ctx = glXCreateNewContext( display, bestFbc, GLX_RGBA_TYPE, 0, True );
}
// If it does, try to get a GL 3.0 context!
else
{
int context_attribs[] =
{
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
GLX_CONTEXT_MINOR_VERSION_ARB, 0,
//GLX_CONTEXT_FLAGS_ARB , GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB,
None
};
printf( "Creating context\n" );
ctx = glXCreateContextAttribsARB( display, bestFbc, 0,
True, context_attribs );
// Sync to ensure any errors generated are processed.
XSync( display, False );
if ( !ctxErrorOccurred && ctx )
printf( "Created GL 3.0 context\n" );
else
{
// Couldn't create GL 3.0 context. Fall back to old-style 2.x context.
// When a context version below 3.0 is requested, implementations will
// return the newest context version compatible with OpenGL versions less
// than version 3.0.
// GLX_CONTEXT_MAJOR_VERSION_ARB = 1
context_attribs[1] = 1;
// GLX_CONTEXT_MINOR_VERSION_ARB = 0
context_attribs[3] = 0;
ctxErrorOccurred = false;
printf( "Failed to create GL 3.0 context"
" ... using old-style GLX context\n" );
ctx = glXCreateContextAttribsARB( display, bestFbc, 0,
True, context_attribs );
}
}
// Sync to ensure any errors generated are processed.
XSync( display, False );
// Restore the original error handler
XSetErrorHandler( oldHandler );
if ( ctxErrorOccurred || !ctx )
{
printf( "Failed to create an OpenGL context\n" );
exit(1);
}
// Verifying that context is a direct context
if ( ! glXIsDirect ( display, ctx ) )
{
printf( "Indirect GLX rendering context obtained\n" );
}
else
{
printf( "Direct GLX rendering context obtained\n" );
}
printf( "Making context current\n" );
glXMakeCurrent( display, 0, ctx );
#define IMAGE_DIAM 512
#define LIST_SIZE (IMAGE_DIAM*IMAGE_DIAM*3)
GLuint texture_id;
{
glGenTextures(1, &texture_id);
printf("err? %d\n", glGetError());
glBindTexture(GL_TEXTURE_2D, texture_id);
printf("err? %d\t(bind texture)\n", glGetError());
unsigned char random_stack_crap[LIST_SIZE];
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, IMAGE_DIAM, IMAGE_DIAM, 0, GL_RGB, GL_UNSIGNED_BYTE, random_stack_crap);
printf("err? %d\n", glGetError());
}
//
//
//
cl_platform_id platform;
clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
printf("device\t%p\n", device);
printf ("context\t%p\t%p\n", ctx, glXGetCurrentContext());
printf ("display\t%p\t%p\n", display, glXGetCurrentDisplay());
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR,
(cl_context_properties) glXGetCurrentContext(),
CL_GLX_DISPLAY_KHR,
(cl_context_properties) glXGetCurrentDisplay(),
CL_CONTEXT_PLATFORM,
(cl_context_properties) platform,
0
};
int err=0;
cl_context cl = clCreateContext(props, 1, &device, NULL, NULL, &err);
printf("err? %d\n", err);
printf("cl context %p\n", cl);
//
// https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/
//
cl_command_queue command_queue = clCreateCommandQueue(cl, device, 0, &err);
printf("err? %d\n", err);
printf("cl queue %p\n", command_queue);
// Create memory buffers on the device for each vector
cl_mem z_mem_obj = clCreateFromGLTexture( cl,
CL_MEM_READ_WRITE,
GL_TEXTURE_2D,
0,
texture_id,
&err);
cl_mem a_mem_obj = clCreateBuffer(cl, CL_MEM_READ_ONLY,
LIST_SIZE , NULL, &err);
printf("err? %d\n", err);
cl_mem b_mem_obj = clCreateBuffer(cl, CL_MEM_READ_ONLY,
LIST_SIZE , NULL, &err);
printf("err? %d\n", err);
cl_mem c_mem_obj = clCreateBuffer(cl, CL_MEM_WRITE_ONLY,
LIST_SIZE*8 , NULL, &err);
printf("err? %d\n", err);
// Copy the lists A and B to their respective memory buffers
{
unsigned char *A = (unsigned char*) malloc(LIST_SIZE);
for (int i=0; i<LIST_SIZE; i++) {
A[i] = i+2;
}
unsigned char *B = (unsigned char*) malloc(LIST_SIZE);
for (int i=0; i<LIST_SIZE; i++) {
B[i] = 2*i;
}
err = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
LIST_SIZE, A, 0, NULL, NULL);
printf("err? %d\tcopy A\n", err);
err = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
LIST_SIZE, B, 0, NULL, NULL);
printf("err? %d\tcopy B\n", err);
}
const char* source_str = "__kernel void diff(__global uchar* rgb_a, __global uchar* rgb_b, __global ulong * diff_out)\n\
{\n\
int idx = get_global_id(0);\n\
diff_out[idx] = abs((int)rgb_a[idx] - (int)rgb_b[idx]);\n\
}";
size_t source_size = strlen(source_str);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(cl, 1,
(const char **)&source_str, (const size_t *)&source_size, &err);
printf("err? %d\t(create program)\n", err);
// Build the program
err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
printf("err? %d\n", err);
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "diff", &err);
printf("err? %d\n", err);
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
printf("err? %d\n", err);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
printf("err? %d\n", err);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
printf("err? %d\n", err);
// Execute the OpenCL kernel on the list
size_t global_item_size = LIST_SIZE; // Process the entire lists
size_t local_item_size = 64; // Divide work items into groups of 64
err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);
printf("err? %d\t(kernel diff)\n", err);
// Read the memory buffer C on the device to the local variable C
long long *C;
C= (long long*)malloc(sizeof(*C)*LIST_SIZE);
err = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(*C), C, 0, NULL, NULL);
printf("err? %d(copy resul)\n", err);
for (int i=0; i<LIST_SIZE && i<10; i++) {
printf("%lld\n", C[i]);
}
free(C);
test_kernel(a_mem_obj, b_mem_obj, c_mem_obj, kernel, LIST_SIZE, command_queue);
test_kernel(z_mem_obj, b_mem_obj, c_mem_obj, kernel, LIST_SIZE, command_queue);
return 0;
}
Вывод на мой linux / gentoo box с mesa-progs-8.4.0:
Getting matching framebuffer configs
Found 10 matching FB configs.
Getting XVisualInfos
Matching fbconfig 0, visual ID 0x24: SAMPLE_BUFFERS = 0, SAMPLES = 0
Matching fbconfig 1, visual ID 0x7a: SAMPLE_BUFFERS = 0, SAMPLES = 0
Matching fbconfig 2, visual ID 0x38: SAMPLE_BUFFERS = 1, SAMPLES = 2
Matching fbconfig 3, visual ID 0x8e: SAMPLE_BUFFERS = 1, SAMPLES = 2
Matching fbconfig 4, visual ID 0x3a: SAMPLE_BUFFERS = 1, SAMPLES = 4
Matching fbconfig 5, visual ID 0x90: SAMPLE_BUFFERS = 1, SAMPLES = 4
Matching fbconfig 6, visual ID 0x44: SAMPLE_BUFFERS = 1, SAMPLES = 8
Matching fbconfig 7, visual ID 0x9a: SAMPLE_BUFFERS = 1, SAMPLES = 8
Matching fbconfig 8, visual ID 0x4c: SAMPLE_BUFFERS = 1, SAMPLES = 16
Matching fbconfig 9, visual ID 0xa2: SAMPLE_BUFFERS = 1, SAMPLES = 16
Creating context
Created GL 3.0 context
Direct GLX rendering context obtained
Making context current
err? 0
err? 0 (bind texture)
err? 0
device 0x557c1755cef0
context 0x557c173e0b68 0x557c173e0b68
display 0x557c172ee950 0x557c172ee950
err? 0
cl context 0x557c17559340
err? 0
cl queue 0x557c177dd080
err? 0
err? 0
err? 0
err? 0 copy A
err? 0 copy B
err? 0 (create program)
err? 0
err? 0
err? 0
err? 0
err? 0
err? 0 (kernel diff)
err? 0(copy resul)
2
1
0
1
2
3
4
5
6
7
err? 0 set kernel arg 0
err? 0
err? 0
err? 0 (kernel diff)
err? 0(copy result)
2
1
0
1
2
3
4
5
6
7
err? -38 set kernel arg 0
err? 0
err? 0
err? 0 (kernel diff)
err? 0(copy result)
2
1
0
1
2
3
4
5
6
7
Обратите внимание на err? -38 set kernel arg 0
, который соответствует последнему тесту, в котором я передаю z_mem_obj
для использования буфера, преобразованного из текстуры GL.
clinfo | grep khr
перечисляет cl_khr_gl_sharing
в расширениях платформы и расширениях устройств.