Jump to content

  • Log In with Google      Sign In   
  • Create Account

#Actualassainator

Posted 29 April 2013 - 05:46 AM

Hello all,

I've been trying to work together an OpenCL particle demo but I get an error of which I have no idea what causes it and thus no idea how to fix it.

 

Once it get to the call of clEnqueueNDRangeKernel(...) I get an error "Access violation writing location 0xFFFFFFFD", yet none of the arguments I pass have that value.

 

I am kind of at a loss here so I have no idea what the problem is.

 

My particles update code:

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, &wait);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

 

And my whole source:

#include <iostream>
#include <GL/glew.h>
#include <GL/glut.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>

#define NUM_PARTICLES 1024

cl_platform_id platformID;
cl_device_id deviceID;
cl_program program;
cl_context context;
cl_command_queue commandQueue;
cl_kernel kernel;
cl_mem VelocityArray;
cl_mem PositionArray;
cl_event event;

GLuint vbo;

void RenderFunc();
void UpdateFunc(int);

char* getDeviceName(cl_device_id device);

int main(int argc, char **argv)
{
	std::cout << "Starting..." << std::endl;
	std::cout << "Using GPU config for acceleration." << std::endl;

	std::cout << "Simulating with " << NUM_PARTICLES << " particles." << std::endl;
	

	// Initialize glut.
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
	glutInitWindowSize(1024, 768);
	glutInitWindowPosition(0,0);

	glutCreateWindow("OpenCL + OpenGL particles");

	// intialize glew
	glewInit();


	// Get first OpenCL platform.
	cl_int err = clGetPlatformIDs(1, &platformID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find an OpenCL platform!" << std::endl;
		getchar();
		return -1;
	}

	// Get first OpenCL gpu device.
	err = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find a OpenCL enabled GPU!" << std::endl;
		getchar();
		return -1;
	}
	std::cout << "Using OpenCL device: " << getDeviceName(deviceID) << std::endl;

	// Create OpenCL create properties
	cl_context_properties props[] =
	{
		CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
		CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
		CL_CONTEXT_PLATFORM, (cl_context_properties)platformID,
		0
	};

	// Create an OpenCL context.
	context = clCreateContext(props, 1, &deviceID, nullptr, nullptr, &err);
	if(!context || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create an OpenCL context." << std::endl;
		getchar();
		return -1;
	}

	// Create a command queue.
	commandQueue = clCreateCommandQueue(context, deviceID, 0, &err);
	if(!commandQueue || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a command queue for OpenCL." << std::endl;
		getchar();
		return -1;
	}

	// Load OpenCL code.
	FILE* stream = fopen("particles.kernel.cl", "rb");
	fseek(stream, 0, SEEK_END);
	unsigned int len = ftell(stream);
	fseek(stream, 0, SEEK_SET);
	char * KernelSource = (char*)malloc(len + 1);
	fread(KernelSource, sizeof(char), len, stream);
	KernelSource[len] = '\0';
	fclose(stream);

	// Create a program.
	program = clCreateProgramWithSource(context, 1, (const char**)&KernelSource, nullptr, &err);
	if(!program || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a program." << std::endl;
		getchar();
		return -1;
	}

	// Build the program.
	err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't build the program:" << std::endl;
		std::cout << "Build log:" << std::endl;

		char buffer[2048];
		clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
		std::cout << buffer << std::endl << "-- End of build log -- " << std::endl;

		getchar();
		return -1;
	}

	// Create a kernel.
	kernel = clCreateKernel(program, "particle_cycle", &err);
	if(!kernel || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a kernel from the program.";
		getchar();
		return -1;
	}

	
	glutDisplayFunc(RenderFunc);
	glutTimerFunc(30, UpdateFunc, 30);

	// initialize opengl
	glClearColor(0,0,0,0);
	glViewport(0,0,1024, 768);

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(90.0, (float)1024/768, 0.1, 1000.0);

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();
	glTranslatef(0,0, -1.0f);

	// Create position VBO
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	float* initialPos = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Upload data to OGL
	glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * NUM_PARTICLES, initialPos, GL_DYNAMIC_DRAW);

	// Bind vbo to cl buffer
	PositionArray = clCreateFromGLBuffer(context, CL_MEM_READ_WRITE, vbo, nullptr);

	// Create velocity array.
	float *velocity = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Create OpenCL buffer object.
	VelocityArray = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float) * NUM_PARTICLES, nullptr, nullptr);
	if(!VelocityArray)
	{
		std::cout << "Couldn't create a buffer for the particle velocities." << std::endl;
		getchar();
		return -1;
	}

	// Upload data and associate buffer with the context.
	err = clEnqueueWriteBuffer(commandQueue, VelocityArray, CL_TRUE, 0, 4 * sizeof(float) * NUM_PARTICLES, velocity, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't upload particle velocities to the context." << std::endl;
		getchar();
		return -1;
	}

	// Set the kernel arguments.
	float dt = 0.03;
	clSetKernelArg(kernel, 0, sizeof(vbo), &vbo); 
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &VelocityArray);
	clSetKernelArg(kernel, 2, sizeof(float), &dt);

	// start application
	glutMainLoop();
}

void RenderFunc()
{
	// Clear
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	//Enable blending, set point size
	glEnable(GL_BLEND);
	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
	glEnable(GL_POINT_SMOOTH);
	glPointSize(5.0f);

	// Bind buffer
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	// Enable drawing
	glVertexPointer(4, GL_FLOAT, 0, nullptr);
	glEnableClientState(GL_VERTEX_ARRAY);

	// Draw the array.
	glDrawArrays(GL_POINTS, 0, NUM_PARTICLES);

	// Disable drawing.
	glDisableClientState(GL_VERTEX_ARRAY);
	glBindBuffer(GL_ARRAY_BUFFER, 0);

	// Swap buffers
	glutSwapBuffers();
}

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

char* getDeviceName(cl_device_id device)
{
	char deviceName[512];
	size_t deviceNameSize;
	cl_int err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, &deviceNameSize);

	if(err != CL_SUCCESS)
	{
		return nullptr;
	}
	else
	{
		return strdup(deviceName);
	}
}

The kernel:

__kernel void particle_cycle(__global float4* pos, __read_only __global float4* vel, __read_only float dt)
{
	const size_t i = get_global_id(0);

	float4 v = vel[i];
	float4 p = pos[i];

	p += (v * dt);

	pos[i] = p;
}

 

EDIT:

I forgot my setup:

- Windows 8 Pro x64

- Visual Studio 2012 Express

- AMD APP SDK v2.8

- AMD Radeon HD 7850

 

 

Thanks a lot in advance for any help.


#2assainator

Posted 29 April 2013 - 05:46 AM

Hello all,

I've been trying to work together an OpenCL particle demo but I get an error of which I have no idea what causes it and thus no idea how to fix it.

 

Once it get to the call of clEnqueueNDRangeKernel(...) I get an error "Access violation writing location 0xFFFFFFFD", yet none of the arguments I pass have that value.

 

I am kind of at a loss here so I have no idea what the problem is.

 

My particles update code:

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, &wait);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

 

And my whole source:

#include <iostream>
#include <GL/glew.h>
#include <GL/glut.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>

#define NUM_PARTICLES 1024

cl_platform_id platformID;
cl_device_id deviceID;
cl_program program;
cl_context context;
cl_command_queue commandQueue;
cl_kernel kernel;
cl_mem VelocityArray;
cl_mem PositionArray;
cl_event event;

GLuint vbo;

void RenderFunc();
void UpdateFunc(int);

char* getDeviceName(cl_device_id device);

int main(int argc, char **argv)
{
	std::cout << "Starting..." << std::endl;
	std::cout << "Using GPU config for acceleration." << std::endl;

	std::cout << "Simulating with " << NUM_PARTICLES << " particles." << std::endl;
	

	// Initialize glut.
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
	glutInitWindowSize(1024, 768);
	glutInitWindowPosition(0,0);

	glutCreateWindow("OpenCL + OpenGL particles");

	// intialize glew
	glewInit();


	// Get first OpenCL platform.
	cl_int err = clGetPlatformIDs(1, &platformID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find an OpenCL platform!" << std::endl;
		getchar();
		return -1;
	}

	// Get first OpenCL gpu device.
	err = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find a OpenCL enabled GPU!" << std::endl;
		getchar();
		return -1;
	}
	std::cout << "Using OpenCL device: " << getDeviceName(deviceID) << std::endl;

	// Create OpenCL create properties
	cl_context_properties props[] =
	{
		CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
		CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
		CL_CONTEXT_PLATFORM, (cl_context_properties)platformID,
		0
	};

	// Create an OpenCL context.
	context = clCreateContext(props, 1, &deviceID, nullptr, nullptr, &err);
	if(!context || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create an OpenCL context." << std::endl;
		getchar();
		return -1;
	}

	// Create a command queue.
	commandQueue = clCreateCommandQueue(context, deviceID, 0, &err);
	if(!commandQueue || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a command queue for OpenCL." << std::endl;
		getchar();
		return -1;
	}

	// Load OpenCL code.
	FILE* stream = fopen("particles.kernel.cl", "rb");
	fseek(stream, 0, SEEK_END);
	unsigned int len = ftell(stream);
	fseek(stream, 0, SEEK_SET);
	char * KernelSource = (char*)malloc(len + 1);
	fread(KernelSource, sizeof(char), len, stream);
	KernelSource[len] = '\0';
	fclose(stream);

	// Create a program.
	program = clCreateProgramWithSource(context, 1, (const char**)&KernelSource, nullptr, &err);
	if(!program || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a program." << std::endl;
		getchar();
		return -1;
	}

	// Build the program.
	err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't build the program:" << std::endl;
		std::cout << "Build log:" << std::endl;

		char buffer[2048];
		clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
		std::cout << buffer << std::endl << "-- End of build log -- " << std::endl;

		getchar();
		return -1;
	}

	// Create a kernel.
	kernel = clCreateKernel(program, "particle_cycle", &err);
	if(!kernel || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a kernel from the program.";
		getchar();
		return -1;
	}

	
	glutDisplayFunc(RenderFunc);
	glutTimerFunc(30, UpdateFunc, 30);

	// initialize opengl
	glClearColor(0,0,0,0);
	glViewport(0,0,1024, 768);

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(90.0, (float)1024/768, 0.1, 1000.0);

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();
	glTranslatef(0,0, -1.0f);

	// Create position VBO
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	float* initialPos = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Upload data to OGL
	glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * NUM_PARTICLES, initialPos, GL_DYNAMIC_DRAW);

	// Bind vbo to cl buffer
	PositionArray = clCreateFromGLBuffer(context, CL_MEM_READ_WRITE, vbo, nullptr);

	// Create velocity array.
	float *velocity = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Create OpenCL buffer object.
	VelocityArray = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float) * NUM_PARTICLES, nullptr, nullptr);
	if(!VelocityArray)
	{
		std::cout << "Couldn't create a buffer for the particle velocities." << std::endl;
		getchar();
		return -1;
	}

	// Upload data and associate buffer with the context.
	err = clEnqueueWriteBuffer(commandQueue, VelocityArray, CL_TRUE, 0, 4 * sizeof(float) * NUM_PARTICLES, velocity, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't upload particle velocities to the context." << std::endl;
		getchar();
		return -1;
	}

	// Set the kernel arguments.
	float dt = 0.03;
	clSetKernelArg(kernel, 0, sizeof(vbo), &vbo); 
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &VelocityArray);
	clSetKernelArg(kernel, 2, sizeof(float), &dt);

	// start application
	glutMainLoop();
}

void RenderFunc()
{
	// Clear
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	//Enable blending, set point size
	glEnable(GL_BLEND);
	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
	glEnable(GL_POINT_SMOOTH);
	glPointSize(5.0f);

	// Bind buffer
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	// Enable drawing
	glVertexPointer(4, GL_FLOAT, 0, nullptr);
	glEnableClientState(GL_VERTEX_ARRAY);

	// Draw the array.
	glDrawArrays(GL_POINTS, 0, NUM_PARTICLES);

	// Disable drawing.
	glDisableClientState(GL_VERTEX_ARRAY);
	glBindBuffer(GL_ARRAY_BUFFER, 0);

	// Swap buffers
	glutSwapBuffers();
}

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

char* getDeviceName(cl_device_id device)
{
	char deviceName[512];
	size_t deviceNameSize;
	cl_int err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, &deviceNameSize);

	if(err != CL_SUCCESS)
	{
		return nullptr;
	}
	else
	{
		return strdup(deviceName);
	}
}

The kernel:

__kernel void particle_cycle(__global float4* pos, __read_only __global float4* vel, __read_only float dt)
{
	const size_t i = get_global_id(0);

	float4 v = vel[i];
	float4 p = pos[i];

	p += (v * dt);

	pos[i] = p;
}

 

EDIT:

I forgot my setup:

- Windows 8 Pro x64

- Visual Studio 2012 Express

- AMD APP SDK v2.8

- AMD Radeon HD 7850


#1assainator

Posted 29 April 2013 - 05:40 AM

Hello all,

I've been trying to work together an OpenCL particle demo but I get an error of which I have no idea what causes it and thus no idea how to fix it.

 

Once it get to the call of clEnqueueNDRangeKernel(...) I get an error "Access violation writing location 0xFFFFFFFD", yet none of the arguments I pass have that value.

 

I am kind of at a loss here so I have no idea what the problem is.

 

My particles update code:

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, &wait);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

 

And my whole source:

#include <iostream>
#include <GL/glew.h>
#include <GL/glut.h>
#include <CL/cl.h>
#include <CL/cl_gl.h>

#define NUM_PARTICLES 1024

cl_platform_id platformID;
cl_device_id deviceID;
cl_program program;
cl_context context;
cl_command_queue commandQueue;
cl_kernel kernel;
cl_mem VelocityArray;
cl_mem PositionArray;
cl_event event;

GLuint vbo;

void RenderFunc();
void UpdateFunc(int);

char* getDeviceName(cl_device_id device);

int main(int argc, char **argv)
{
	std::cout << "Starting..." << std::endl;
	std::cout << "Using GPU config for acceleration." << std::endl;

	std::cout << "Simulating with " << NUM_PARTICLES << " particles." << std::endl;
	

	// Initialize glut.
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
	glutInitWindowSize(1024, 768);
	glutInitWindowPosition(0,0);

	glutCreateWindow("OpenCL + OpenGL particles");

	// intialize glew
	glewInit();


	// Get first OpenCL platform.
	cl_int err = clGetPlatformIDs(1, &platformID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find an OpenCL platform!" << std::endl;
		getchar();
		return -1;
	}

	// Get first OpenCL gpu device.
	err = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't find a OpenCL enabled GPU!" << std::endl;
		getchar();
		return -1;
	}
	std::cout << "Using OpenCL device: " << getDeviceName(deviceID) << std::endl;

	// Create OpenCL create properties
	cl_context_properties props[] =
	{
		CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
		CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
		CL_CONTEXT_PLATFORM, (cl_context_properties)platformID,
		0
	};

	// Create an OpenCL context.
	context = clCreateContext(props, 1, &deviceID, nullptr, nullptr, &err);
	if(!context || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create an OpenCL context." << std::endl;
		getchar();
		return -1;
	}

	// Create a command queue.
	commandQueue = clCreateCommandQueue(context, deviceID, 0, &err);
	if(!commandQueue || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a command queue for OpenCL." << std::endl;
		getchar();
		return -1;
	}

	// Load OpenCL code.
	FILE* stream = fopen("particles.kernel.cl", "rb");
	fseek(stream, 0, SEEK_END);
	unsigned int len = ftell(stream);
	fseek(stream, 0, SEEK_SET);
	char * KernelSource = (char*)malloc(len + 1);
	fread(KernelSource, sizeof(char), len, stream);
	KernelSource[len] = '\0';
	fclose(stream);

	// Create a program.
	program = clCreateProgramWithSource(context, 1, (const char**)&KernelSource, nullptr, &err);
	if(!program || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a program." << std::endl;
		getchar();
		return -1;
	}

	// Build the program.
	err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't build the program:" << std::endl;
		std::cout << "Build log:" << std::endl;

		char buffer[2048];
		clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
		std::cout << buffer << std::endl << "-- End of build log -- " << std::endl;

		getchar();
		return -1;
	}

	// Create a kernel.
	kernel = clCreateKernel(program, "particle_cycle", &err);
	if(!kernel || err != CL_SUCCESS)
	{
		std::cout << "Couldn't create a kernel from the program.";
		getchar();
		return -1;
	}

	
	glutDisplayFunc(RenderFunc);
	glutTimerFunc(30, UpdateFunc, 30);

	// initialize opengl
	glClearColor(0,0,0,0);
	glViewport(0,0,1024, 768);

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(90.0, (float)1024/768, 0.1, 1000.0);

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();
	glTranslatef(0,0, -1.0f);

	// Create position VBO
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	float* initialPos = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Upload data to OGL
	glBufferData(GL_ARRAY_BUFFER, 4 * sizeof(float) * NUM_PARTICLES, initialPos, GL_DYNAMIC_DRAW);

	// Bind vbo to cl buffer
	PositionArray = clCreateFromGLBuffer(context, CL_MEM_READ_WRITE, vbo, nullptr);

	// Create velocity array.
	float *velocity = new float[4 * sizeof(float) * NUM_PARTICLES];
	for(unsigned int i = 0; i < NUM_PARTICLES; i++)
	{
		initialPos[i] = rand() / (float)RAND_MAX;
	}

	// Create OpenCL buffer object.
	VelocityArray = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float) * NUM_PARTICLES, nullptr, nullptr);
	if(!VelocityArray)
	{
		std::cout << "Couldn't create a buffer for the particle velocities." << std::endl;
		getchar();
		return -1;
	}

	// Upload data and associate buffer with the context.
	err = clEnqueueWriteBuffer(commandQueue, VelocityArray, CL_TRUE, 0, 4 * sizeof(float) * NUM_PARTICLES, velocity, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't upload particle velocities to the context." << std::endl;
		getchar();
		return -1;
	}

	// Set the kernel arguments.
	float dt = 0.03;
	clSetKernelArg(kernel, 0, sizeof(vbo), &vbo); 
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &VelocityArray);
	clSetKernelArg(kernel, 2, sizeof(float), &dt);

	// start application
	glutMainLoop();
}

void RenderFunc()
{
	// Clear
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	//Enable blending, set point size
	glEnable(GL_BLEND);
	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
	glEnable(GL_POINT_SMOOTH);
	glPointSize(5.0f);

	// Bind buffer
	glBindBuffer(GL_ARRAY_BUFFER, vbo);

	// Enable drawing
	glVertexPointer(4, GL_FLOAT, 0, nullptr);
	glEnableClientState(GL_VERTEX_ARRAY);

	// Draw the array.
	glDrawArrays(GL_POINTS, 0, NUM_PARTICLES);

	// Disable drawing.
	glDisableClientState(GL_VERTEX_ARRAY);
	glBindBuffer(GL_ARRAY_BUFFER, 0);

	// Swap buffers
	glutSwapBuffers();
}

void UpdateFunc(int)
{
	glutTimerFunc(30, UpdateFunc, 30);

	// Get workgroup size
	size_t global = NUM_PARTICLES;
	size_t local;
	cl_int err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Couldn't get work group info!" << std::endl;
		getchar();
		exit(-1);
	}

	// Aquire position vbo.
	err = clEnqueueAcquireGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when aquiring OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	// run kernel.
	cl_event wait;
	err = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &global, &local, 0, nullptr, nullptr);
	if(err != CL_SUCCESS)
	{
		std::cout << "Error when running kernel." << std::endl;
		getchar();
		exit(-1);
	}
	clWaitForEvents(1, &wait);
	clFinish(commandQueue);

	// release position vbo.
	err = clEnqueueReleaseGLObjects(commandQueue, 1, &PositionArray, 0, nullptr, nullptr);

	if(err != CL_SUCCESS)
	{
		std::cout << "Error when releasing OGL object." << std::endl;
		getchar();
		exit(-1);
	}

	glutPostRedisplay();
}

char* getDeviceName(cl_device_id device)
{
	char deviceName[512];
	size_t deviceNameSize;
	cl_int err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, &deviceNameSize);

	if(err != CL_SUCCESS)
	{
		return nullptr;
	}
	else
	{
		return strdup(deviceName);
	}
}

The kernel:

__kernel void particle_cycle(__global float4* pos, __read_only __global float4* vel, __read_only float dt)
{
	const size_t i = get_global_id(0);

	float4 v = vel[i];
	float4 p = pos[i];

	p += (v * dt);

	pos[i] = p;
}

 


PARTNERS