• Advertisement
Sign in to follow this  

Optimizing per-pixel lighting

This topic is 3625 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

Hi all, I've currently ran into a bottleneck when doinf multiple per-pixel omni-lights usind linear depth cubemaps. I've tried to optimize everything as best I can, but am wondering if there's anything i've missed? So here goes... 1. Prepare lights - For every light in the scene, I caculate it's visibility using occlusion queries. I then sort an array of lights using qsort, to order them by visibility. 2. Shadowmap rendering - A linear depth cubemap is rendered for the 4 nearest lights. I am rendering each one in turn to the fbo, by using glColorMask, so that each channel of the cubemap stores the linear depth for each of the 4 lights. 3. Rendering - I pass the near and far clip planes of the light to the shader, and calculate per-pixel lighting if the texel falls in the near/far clip range, and the texel distance from the light is less than the occluder distance stored in the cubemap channel for that light. and here's the code snippets... cubemap creation
		// FBO
		glGenFramebuffersEXT(1, &shadow_fbo);
		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,shadow_fbo);
		//Gen Depth Buffer
		glGenRenderbuffersEXT(1, &shadow_DepthBuffer);
		glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, shadow_DepthBuffer);
		glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT, Width, Height);
		for(int j=0;j<6;++j){	
			shadow_cubeFaces[j]	=	_textureman->CreateFloat(Width, Height, 1);
		}
		//Attach Depth Buffer
		glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, shadow_DepthBuffer);
		for(int j=0;j<6;++j){
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT+j, GL_TEXTURE_2D, shadow_cubeFaces[j], 0);
		}
		//Gen Cubemap Texture
		glEnable(GL_TEXTURE_CUBE_MAP_ARB);
		glGenTextures(1, &shadowCubeMap);
		glBindTexture(GL_TEXTURE_CUBE_MAP_ARB, shadowCubeMap);
		glTexParameteri(GL_TEXTURE_CUBE_MAP_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_CUBE_MAP_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_CUBE_MAP_ARB, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_CUBE_MAP_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

		for(int j=0;j<6;++j){
			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + j, 0, GL_RGBA16F_ARB, Width, Height, 0, GL_RGBA, GL_FLOAT, NULL);
		}
		glDisable(GL_TEXTURE_CUBE_MAP_ARB);
		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);


and the cubemap rendering...
	int	Width		=	512;
	int	Height		=	512;


	cam	=	new CCamera();

	CVector3	view[6];
	CVector3	up[6];

	view[0]	=	CVector3(0,0,1);
	view[1]	=	CVector3(0,0,-1);
	view[2]	=	CVector3(0,-1,0);
	view[3]	=	CVector3(0,1,0);
	view[4]	=	CVector3(1,0,0);
	view[5]	=	CVector3(-1,0,0);

	up[0]	=	CVector3(0,1,0);
	up[1]	=	CVector3(0,1,0);
	up[2]	=	CVector3(1,0,0);
	up[3]	=	CVector3(-1,0,0);
	up[4]	=	CVector3(0,1,0);
	up[5]	=	CVector3(0,1,0);



	CShader*	pShader	=	_textureman->getDepthMapper();

	glGetIntegerv(GL_DRAW_BUFFER, &_currentDrawbuf); 

	glCullFace(GL_BACK); 
	glEnable(GL_ALPHA_TEST);

	pShader->enable();
	
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, shadow_fbo);

	for(int i=0;i<6;++i){

		glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT+i, GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB+i, shadowCubeMap, 0);
		glViewport(0,0,Width, Height);

		glDrawBuffer(GL_DEPTH_ATTACHMENT_EXT);
		glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT+i);
			
		glClearColor(0.0f,0.0f,0.0f,0.0f);
		glClear(GL_COLOR_BUFFER_BIT);
		glLoadIdentity();

		//	-----------------------------------------------------------------------------------------------
		//	LOOP THROUGH 4 LIGHTS STORING DEPTH IN r g b a
		//	-----------------------------------------------------------------------------------------------

		int numlights	=	_lightman->getNumOfVisibleLights();

		if(numlights>4)
			numlights=4;

		CVector4	colormask[4];
		colormask[0]	=	CVector4(1,0,0,0);
		colormask[1]	=	CVector4(0,1,0,0);
		colormask[2]	=	CVector4(0,0,1,0);
		colormask[3]	=	CVector4(0,0,0,1);



		for(int n=0;n<numlights;++n){

			glClear(GL_DEPTH_BUFFER_BIT);

			glColorMask(colormask[n].x, colormask[n].y, colormask[n].z, colormask[n].w);

			CLight*		pLight	=	_lightman->getLight(n);
			CVector3 pos	=	 pLight->getPosition();

			float	clip_near	=	0.0001f;
			float	far_near	=	50000.0f;

			clip_near	=	pLight->getNearClip();
			far_near	=	pLight->getFarClip();

			pShader->setUniform1f( "clip_near", clip_near);
			pShader->setUniform1f( "far_near", far_near);

			pShader->enable();
			pShader->setUniform3fv( "sun_pos", pos);
			pShader->setUniform4f( "r1", 1,0,0,0 );
			pShader->setUniform4f( "r2", 0,1,0,0 );
			pShader->setUniform4f( "r3", 0,0,1,0 );
			pShader->setUniform4f( "r4", 0,0,0,1 );

			pShader->setUniform1i("pass",0);


			cam->PositionCamera( pos, pos+view, up );
			cam->LookClip(1,clip_near,far_near);
			cam->UpdateFrustumFaster();

			int numObjects	=	_world.numOfObjects;
			for(int i=0;i<numObjects;++i){
					
				t3DObject*		pObject     =	&_world.pObject;
				tMaterialInfo*	pMaterial	=	&_world.pMaterials[pObject->materialID];

				if(pMaterial->bSuperShader){

					use_Vertices(pObject);
					use_TextureCoords(0, pObject);

					_textureman->bindMultiTexture( 1, pMaterial->texureId_diffuse );

					glDrawElements(GL_TRIANGLES, pObject->numOfFaces*3, GL_UNSIGNED_INT, pObject->pIndices);
				
					_textureman->unbindMultiTexture( 1 );

					disable_Vertices(pObject);
					disable_TextureCoords(0);
				}
			}
	
			pShader->setUniform1i("pass",1);
			glPushMatrix();
				renderObjectsMesh();
			glPopMatrix();

			pShader->setUniform1i("pass",2);
			glPushMatrix();
				for(int i=0;i<NUM_BOTS;++i){
					gunMD5->drawMd5Mesh( 0 );
				}
			glPopMatrix();



			glColorMask(1, 1, 1, 1);

		}

	}

	glCullFace(GL_BACK); 

	pShader->disable();

	// Restore the current Draw buffer
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
	glDrawBuffer(_currentDrawbuf);
	glViewport(0 , 0,_gl->m_nWidth ,_gl->m_nHeight);	



...and finally the glsl shader function for per-pixel lighting...
vec3	computeCubeShadowMap(int lightCap, vec3 norm, float offset, vec3 this_vertex, vec3 cull_vertex, vec3 lightPosition, vec3 clipdata){

	vec3 	this_lighting			=	vec3(0,0,0);
	vec3	texel_to_light_vector	=	lightPosition-this_vertex;
	float	d						=	abs(length(texel_to_light_vector));

	if(numLights>=lightCap && (d>clipdata.x) && (d<clipdata.y) ){
	
		int n	=	lightCap;
		
		float	this_shadow		=	0.0;
	
		vec3	texel_to_light_distance	=	normalize(texel_to_light_vector);
	
		float	thisNdotL	=	clamp(dot( norm, texel_to_light_distance ),0.0,1.0);
		float	thisRBNdotL	=	clamp(dot( radiosity_normal, texel_to_light_distance ),0.0,1.0);
		
		float 	atten = 1.0 / (	gl_LightSource[n].constantAttenuation +
								gl_LightSource[n].linearAttenuation * d +
								gl_LightSource[n].quadraticAttenuation * d* d );
							
		clamp(thisNdotL,0.0,1.0);
		float attenNdotL	=	atten*thisNdotL;
		float attenRBNdotL	=	atten*thisRBNdotL;
		this_lighting	+=	gl_LightSource[n].diffuse.xyz*attenNdotL;
		
		if(enable_radiosity==1){
			this_lighting	+=	gl_LightSource[n].diffuse.xyz*rC*3.1417*attenRBNdotL;
		}

		if(lightCap<4){
		 	vec3	cubeshadowLookup		=	normalize(cull_vertex.xyz-lightPosition.xyz);
		 	cubeshadowLookup.y		=	-cubeshadowLookup.y;
		 	cubeshadowLookup.xyz	=	cubeshadowLookup.zyx;
		 	
			if(n==0)
				this_shadow 			= 	textureCube(tex7,cubeshadowLookup).r;
				
			if(n==1)
				this_shadow 			= 	textureCube(tex7,cubeshadowLookup).g;
				
			if(n==2)
				this_shadow 			= 	textureCube(tex7,cubeshadowLookup).b;
				
			if(n==3)
				this_shadow 			= 	textureCube(tex7,cubeshadowLookup).a;

			float	d_occluder	=	this_shadow;
			float	d_receiver	=	d;
			float	value_light	=	0.0;
			
//			float	penumbra	=	(((d_receiver-d_occluder)*1.0)/d_occluder);
//			float	penumbra	=	(((d_receiver-d_occluder)*0.1));
			
			if(d_receiver+offset<d_occluder)	
				value_light	=	1.0;
				
				
			this_lighting	*=	value_light;
				
		}
		else{
			this_shadow 			= 	1.0;
			this_lighting	*=	this_shadow;
		}
		
	} 

	return this_lighting;
}

Share this post


Link to post
Share on other sites
Advertisement
It depends on where your bottleneck is, but a likely candidate is the shadowmap generation. One option is to use dual-paraboloid shadow maps (rather than cubemaps). Your overall lighting scheme sounds similar to what we've used in the past, and we solved the problem with DPSM.

Not sure if you have access to the ACM digital library, but this could help. If you can't get to it, send me a PM.

http://portal.acm.org/citation.cfm?id=1183316.1183331&coll=portal&dl=ACM

Share this post


Link to post
Share on other sites
Sign in to follow this  

  • Advertisement