Hello all,
I've been trying to work together an OpenCL particle demo but I get an error of which I have no idea what causes it and thus no idea how to fix it.
Once it get to the call of clEnqueueNDRangeKernel(...) I get an error "Access violation writing location 0xFFFFFFFD", yet none of the arguments I pass have that value.
I am kind of at a loss here so I have no idea what the problem is.
My particles update code:
void UpdateFunc(int)
{
glutTimerFunc(30, UpdateFunc, 30);
// Get workgroup size
size_t global = NUM_PARTICLES;
size_t local;
cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't get work group info!" << std::endl;
getchar();
exit(-1);
}
// Aquire position vbo.
err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Error when aquiring OGL object." << std::endl;
getchar();
exit(-1);
}
// run kernel.
cl_event wait;
err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, &wait);
if(err != CL_SUCCESS)
{
std::cout << "Error when running kernel." << std::endl;
getchar();
exit(-1);
}
clWaitForEvents(1, &wait);
clFinish(commandQueue);
// release position vbo.
err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Error when releasing OGL object." << std::endl;
getchar();
exit(-1);
}
glutPostRedisplay();
}
And my whole source:
#include <iostream>
#include <GL/glew.h>
#include <GL/glut.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>
#define NUM_PARTICLES 1024
cl_platform_id platformID;
cl_device_id deviceID;
cl_program program;
cl_context context;
cl_command_queue commandQueue;
cl_kernel kernel;
cl_mem VelocityArray;
cl_mem PositionArray;
cl_event event;
GLuint vbo;
void RenderFunc();
void UpdateFunc(int);
char* getDeviceName(cl_device_id device);
int main(int argc, char **argv)
{
std::cout << "Starting..." << std::endl;
std::cout << "Using GPU config for acceleration." << std::endl;
std::cout << "Simulating with " << NUM_PARTICLES << " particles." << std::endl;
// Initialize glut.
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
glutInitWindowSize(1024, 768);
glutInitWindowPosition(0,0);
glutCreateWindow("OpenCL + OpenGL particles");
// intialize glew
glewInit();
// Get first OpenCL platform.
cl_int err = clGetPlatformIDs(1, &platformID, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't find an OpenCL platform!" << std::endl;
getchar();
return -1;
}
// Get first OpenCL gpu device.
err = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't find a OpenCL enabled GPU!" << std::endl;
getchar();
return -1;
}
std::cout << "Using OpenCL device: " << getDeviceName(deviceID) << std::endl;
// Create OpenCL create properties
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, (cl_context_properties)platformID,
0
};
// Create an OpenCL context.
context = clCreateContext(props, 1, &deviceID, nullptr, nullptr, &err);
if(!context || err != CL_SUCCESS)
{
std::cout << "Couldn't create an OpenCL context." << std::endl;
getchar();
return -1;
}
// Create a command queue.
commandQueue = clCreateCommandQueue(context, deviceID, 0, &err);
if(!commandQueue || err != CL_SUCCESS)
{
std::cout << "Couldn't create a command queue for OpenCL." << std::endl;
getchar();
return -1;
}
// Load OpenCL code.
FILE* stream = fopen("particles.kernel.cl", "rb");
fseek(stream, 0, SEEK_END);
unsigned int len = ftell(stream);
fseek(stream, 0, SEEK_SET);
char * KernelSource = (char*)malloc(len + 1);
fread(KernelSource, sizeof(char), len, stream);
KernelSource[len] = '\0';
fclose(stream);
// Create a program.
program = clCreateProgramWithSource(context, 1, (const char**)&KernelSource, nullptr, &err);
if(!program || err != CL_SUCCESS)
{
std::cout << "Couldn't create a program." << std::endl;
getchar();
return -1;
}
// Build the program.
err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't build the program:" << std::endl;
std::cout << "Build log:" << std::endl;
char buffer[2048];
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
std::cout << buffer << std::endl << "-- End of build log -- " << std::endl;
getchar();
return -1;
}
// Create a kernel.
kernel = clCreateKernel(program, "particle_cycle", &err);
if(!kernel || err != CL_SUCCESS)
{
std::cout << "Couldn't create a kernel from the program.";
getchar();
return -1;
}
glutDisplayFunc(RenderFunc);
glutTimerFunc(30, UpdateFunc, 30);
// initialize opengl
glClearColor(0,0,0,0);
glViewport(0,0,1024, 768);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(90.0, (float)1024/768, 0.1, 1000.0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0,0, -1.0f);
// Create position VBO
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
float* initialPos = new float[4 * sizeof(float) * NUM_PARTICLES];
for(unsigned int i = 0; i < NUM_PARTICLES; i++)
{
initialPos = rand() / (float)RAND_MAX;
}
// Upload data to OGL
glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * NUM_PARTICLES, initialPos, GL_DYNAMIC_DRAW);
// Bind vbo to cl buffer
PositionArray = clCreateFromGLBuffer(context, CL_MEM_READ_WRITE, vbo, nullptr);
// Create velocity array.
float *velocity = new float[4 * sizeof(float) * NUM_PARTICLES];
for(unsigned int i = 0; i < NUM_PARTICLES; i++)
{
initialPos = rand() / (float)RAND_MAX;
}
// Create OpenCL buffer object.
VelocityArray = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float) * NUM_PARTICLES, nullptr, nullptr);
if(!VelocityArray)
{
std::cout << "Couldn't create a buffer for the particle velocities." << std::endl;
getchar();
return -1;
}
// Upload data and associate buffer with the context.
err = clEnqueueWriteBuffer(commandQueue, VelocityArray, CL_TRUE, 0, 4 * sizeof(float) * NUM_PARTICLES, velocity, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't upload particle velocities to the context." << std::endl;
getchar();
return -1;
}
// Set the kernel arguments.
float dt = 0.03;
clSetKernelArg(kernel, 0, sizeof(vbo), &vbo);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &VelocityArray);
clSetKernelArg(kernel, 2, sizeof(float), &dt);
// start application
glutMainLoop();
}
void RenderFunc()
{
// Clear
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//Enable blending, set point size
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_POINT_SMOOTH);
glPointSize(5.0f);
// Bind buffer
glBindBuffer(GL_ARRAY_BUFFER, vbo);
// Enable drawing
glVertexPointer(4, GL_FLOAT, 0, nullptr);
glEnableClientState(GL_VERTEX_ARRAY);
// Draw the array.
glDrawArrays(GL_POINTS, 0, NUM_PARTICLES);
// Disable drawing.
glDisableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// Swap buffers
glutSwapBuffers();
}
void UpdateFunc(int)
{
glutTimerFunc(30, UpdateFunc, 30);
// Get workgroup size
size_t global = NUM_PARTICLES;
size_t local;
cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Couldn't get work group info!" << std::endl;
getchar();
exit(-1);
}
// Aquire position vbo.
err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Error when aquiring OGL object." << std::endl;
getchar();
exit(-1);
}
// run kernel.
cl_event wait;
err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Error when running kernel." << std::endl;
getchar();
exit(-1);
}
clWaitForEvents(1, &wait);
clFinish(commandQueue);
// release position vbo.
err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
if(err != CL_SUCCESS)
{
std::cout << "Error when releasing OGL object." << std::endl;
getchar();
exit(-1);
}
glutPostRedisplay();
}
char* getDeviceName(cl_device_id device)
{
char deviceName[512];
size_t deviceNameSize;
cl_int err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, &deviceNameSize);
if(err != CL_SUCCESS)
{
return nullptr;
}
else
{
return strdup(deviceName);
}
}
The kernel:
__kernel void particle_cycle(__global float4* pos, __read_only __global float4* vel, __read_only float dt)
{
const size_t i = get_global_id(0);
float4 v = vel;
float4 p = pos;
p += (v * dt);
pos = p;
}
EDIT:
I forgot my setup:
- Windows 8 Pro x64
- Visual Studio 2012 Express
- AMD APP SDK v2.8
- AMD Radeon HD 7850
Thanks a lot in advance for any help.