Help with lighting options

Started by
11 comments, last by Hawkblood 10 years, 1 month ago

I have gone down a long road with pixil lighting and I was wondering if it would be worth it to try something different.

Currently I have a scene broken into sections so I can omit them when not seen. It uses HLSL lighting with multiple lights:


//-----------------------------------------------------------------------------
// Copyright (c) 2008 dhpoware. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//-----------------------------------------------------------------------------
//
// Tangent space normal mapping with multiple point lights in a single pass
// using shader model 3.0. This effect file limits the number of point lights
// to 8.
//
//
//
//*************** THIS HAS BEEN MODIFIED TO SUPPORT 3 TYPES OF LIGHTS WITHIN 
//					THE SAME EFFECT
//
//
//-----------------------------------------------------------------------------

#define MAX_ALL_LIGHTS 16

struct AllLight
{
	int type;//0==directional, 1==point, 2==spot
	float3 dir;
	float3 pos;				// world space position
	float4 ambient;
	float4 diffuse;
	float4 specular;
	float spotInnerCone;	// spot light inner cone (theta) angle
	float spotOuterCone;	// spot light outer cone (phi) angle
	float radius;
};

struct Material
{
	float4 ambient;
	float4 diffuse;
	float4 emissive;
	float4 specular;
	float shininess;
};

//-----------------------------------------------------------------------------
// Globals.
//-----------------------------------------------------------------------------

float4x4 worldMatrix;
float4x4 worldInverseTransposeMatrix;
float4x4 worldViewProjectionMatrix;

float3 cameraPos;
float4 globalAmbient;
int numLights;

AllLight lights[MAX_ALL_LIGHTS];
Material material;

//-----------------------------------------------------------------------------
// Textures.
//-----------------------------------------------------------------------------

texture colorMapTexture;
texture normalMapTexture;

sampler2D colorMap = sampler_state
{
	Texture = <colorMapTexture>;
    MagFilter = Linear;
    MinFilter = Anisotropic;
    MipFilter = Linear;
    MaxAnisotropy = 16;
};

sampler2D normalMap = sampler_state
{
    Texture = <normalMapTexture>;
    MagFilter = Linear;
    MinFilter = Anisotropic;
    MipFilter = Linear;
    MaxAnisotropy = 16;
};

//-----------------------------------------------------------------------------
// Vertex Shaders.
//-----------------------------------------------------------------------------

struct VS_INPUT
{
	float3 position : POSITION;
	float2 texCoord : TEXCOORD0;
	float3 normal : NORMAL;
    float4 tangent : TANGENT;
};

struct VS_OUTPUT
{
	float4 position : POSITION;
	float3 worldPos : TEXCOORD0;
	float2 texCoord : TEXCOORD1;
	float3 normal : TEXCOORD2;
	float3 tangent : TEXCOORD3;
	float3 bitangent : TEXCOORD4;
};


VS_OUTPUT VS_AllLighting(VS_INPUT IN)
{
	VS_OUTPUT OUT;

	OUT.position = mul(float4(IN.position, 1.0f), worldViewProjectionMatrix);
	OUT.worldPos = mul(float4(IN.position, 1.0f), worldMatrix).xyz;
	OUT.texCoord = IN.texCoord;
	
	OUT.normal = mul(IN.normal, (float3x3)worldInverseTransposeMatrix);
	OUT.tangent = mul(IN.tangent.xyz, (float3x3)worldInverseTransposeMatrix);
	OUT.bitangent = cross(OUT.normal, OUT.tangent) * IN.tangent.w;


	return OUT;
}

//-----------------------------------------------------------------------------
// Pixel Shaders.
//-----------------------------------------------------------------------------

float4 PS_AllLighting(VS_OUTPUT IN) : COLOR
{

    float3 t = normalize(IN.tangent);
    float3 b = normalize(IN.bitangent);
    float3 n = normalize(IN.normal);

    float3x3 tbnMatrix = float3x3(t.x, b.x, n.x,
	                              t.y, b.y, n.y,
	                              t.z, b.z, n.z);
	                                 
    float3 v = normalize(mul(cameraPos - IN.worldPos, tbnMatrix));
    float3 l = float3(0.0f, 0.0f, 0.0f);
    float3 h = float3(0.0f, 0.0f, 0.0f);
    
    float atten = 0.0f;
    float nDotL = 0.0f;
    float nDotH = 0.0f;
    float power = 0.0f;
    
    float4 color = float4(0.0f, 0.0f, 0.0f, 0.0f);
    
    n = normalize(tex2D(normalMap, IN.texCoord).rgb * 2.0f - 1.0f);
	float4 ShinnyMap = tex2D(normalMap, IN.texCoord);

	for (int i = 0; i < numLights; ++i)
    {
		float len=length(lights[i].pos - IN.worldPos);
		if ((len<lights[i].radius*1.25f)||(lights[i].type==0)){
			if (lights[i].type==0){
				l=mul(-lights[i].dir,tbnMatrix);
				atten = 1.0f;
			}
			else{
				l = mul((lights[i].pos - IN.worldPos) / lights[i].radius, tbnMatrix);
			   atten = saturate(1.0f - dot(l, l));
			}
			l = normalize(l);
			if (lights[i].type==2) {
				float2 cosAngles = cos(float2(lights[i].spotOuterCone, lights[i].spotInnerCone) * 0.5f);
				float3 dl=mul(lights[i].dir,tbnMatrix);
				float spotDot = dot(-l, normalize(dl));
				float spotEffect = smoothstep(cosAngles[0], cosAngles[1], spotDot);
    
				atten *= spotEffect;
			}      
			h = normalize(l + v);
        
			nDotL = saturate(dot(n, l));
			nDotH = saturate(dot(n, h));
			power = (nDotL == 0.0f) ? 0.0f : pow(nDotH, material.shininess*ShinnyMap.w);
        
			color += ((material.diffuse * lights[i].diffuse )+
				(lights[i].specular * power ))* nDotL * atten;

		}
    }
     color += material.ambient * globalAmbient +material.emissive; 

	 return color * tex2D(colorMap, IN.texCoord);
}



//-----------------------------------------------------------------------------
// Techniques.
//-----------------------------------------------------------------------------

technique NormalMappingAllLighting
{
    pass
    {
        VertexShader = compile vs_3_0 VS_AllLighting();
        PixelShader = compile ps_3_0 PS_AllLighting();
    }
}


It looks great, but there is a problem. On certain surfaces, when I get close to them, the frame rate dramatically fluctuates(30-50fps). When I'm not close to anything the frame rate is 60fps. The only thing that seems to keep it at 60 all the time is when I remove ALL the lights....

There are three parts to this question:

1. Am I doing something wrong in the shader? If so, please help me fix it.

2. Is there any better lighting method for normal mapping?

3. I was thinking about trying cube lighting. Will it work with normal mapping? Is it faster than pixil lighting?

e
Advertisement

I believe the main problem you have encountered is rather in your approach to the lighting with shaders. You have set all the lights in the one shader, then perform branching (the for loop and if statements create branches in your computations) to calculate all the lighting terms within the single shader. However graphics cards are notorious for having bad performance with computing branches.

A more common solution to your problem is you render all objects once for each light and split the calculations out into several executions of the same shader and let the CPU perform all the branching as to whether an object should be drawn for a specific light etc.

I.e.


for each light:
    for each object:
        should we draw this object? if so:
            set light properties for the shader
            render object

This means that your shader only has to handle one light at a time meaning you don't need any branches. The next problem is how to actually sum all the contributions of the individual lights and we can do this by enabling additive color blending. So you could almost say you build your scene out of layers, one for each light, and them add them all up to get the final image on the screen.

-BiT

The problem ONLY occurs when I am close to the object. I could render a single object with a single light and it still happens. It doesn't make sense....

How would I keep from branching? Would I have each light with its own vertex shader and pixil shader pass? If so, I would be rendering each object as many times as I have lights... Right?

You are correct about the second point. The computational complexity would be # Lights x # Objects. But with a simpler shader this wouldn't be too much of a problem. As it is at the moment the complexity of your shader is proportional to the # Lights (because of the for loop) and you execute it once per object. So you are already on par with that anyway, but you will remove branching in the shader by not requiring the for loop, you could also use a different pixel shader for each different type of light removing the need for another if statement. Further more you can check if the object to be rendered is within range of the light and just exclude it from being drawn completely.

You say the problem only occurs when you are close to the object, this is most likely because there are more pixels filling the screen, thus more executions for the pixel shader, thus more time spent on computations. I would suggest you disable vsync (if you havn't already) then you will probably notice the performance differences and how they change.

-BiT

I see a few problems with this shader.

  • The TBN matrix is calculated and used in the pixel shader?! That makes zero sense, and is a slowdown in performance. You should never be doing transformations in pixel shaders. It's enough to do the necessary transformations in the vertex shader and pass what's really required to the pixel shader, since the rasteriser does the necessary interpolations. So: Don't pass the binormal, normal, and tangent to the pixel shader. Instead, construct your TBN matrix in the vertex shader and transform the necessary vectors required for lighting in the vertex shader, and then only output what the pixel shader needs.
  • You are unnecessarily sampling twice from the normal map. Sampling textures is an expensive operation.
  • The input datatype for your pixel shader is VS_OUTPUT. This means the rasteriser is forced to unnecessarily interpolate registers it doesn't really need (POSITION for instance).
  • On some cards, using integers for for-loops is a huge slowdown. I've observed this before on my laptop when writing a fractal shader. The solution was to use floats (you might want to consider using float16 to save space and computational power).
  • You're always allocating space for 16 lights, even though you might only be using 1. This seems inefficient to me.

You might also want to consider using a lower shader model as well. None of the commands in your shader require shader model 3.0.

"I would try to find halo source code by bungie best fps engine ever created, u see why call of duty loses speed due to its detail." -- GettingNifty

The TBN matrix is calculated and used in the pixel shader

I can't figure out the syntax to put it into the VS.....


You are unnecessarily sampling twice from the normal map. Sampling textures is an expensive operation.

Fixed that. Thanks.


The input datatype for your pixel shader is VS_OUTPUT. This means the rasteriser is forced to unnecessarily interpolate registers it doesn't really need (POSITION for instance).

Please explain.... I tried taking out "float4 position : POSITION" and removing the calculation in the VS, but it won't compile.....


You're always allocating space for 16 lights, even though you might only be using 1. This seems inefficient to me.

Will that make a difference here? I will only be using at most 8 lights (likely) but I was keeping my options open.....


You might also want to consider using a lower shader model as well. None of the commands in your shader require shader model 3.0.

It doesn't compile with anything lower that 3.0.... I tried 2.0 and 1.0 just to test it-- didn't compile.

Here's a video that shows the "slowdown":

[media]http:

[/media]

There are only 4 lights present in the scene and they are all spot lights. 2 for the lighting of the rooms and 2 for shinning around the light sources themselves (this makes it look better).

Okay, upon closer inspection, I understand why the tangent transformation matrix is being used in the pixel shader. It's because there really isn't any other way to do it.

If you did the transformation in the vertex shader, you'd be limited to the number of texcoord channels, but by moving it to the pixel shader, you can go way over the 8-light-limit. You're just trading that off for speed, because for every single pixel on the screen you're doing light_count+1 number of transforms into tangent space.

My educated guess is that the slowdown has something to do with that transformation matrix.

The way I see it, you have two options:

  1. You could consider multipass lighting. Basically, the pixel shader is called once for every light in your scene, and you blend them all together. It's very speedy because no state changes are involved.
  2. You could optimise the crap out of the current shader, and cross your fingers in the hopes of it being a little bit faster.

If you choose number 1), I need to know something: Is it possible to use for-loops inside a technique?

"I would try to find halo source code by bungie best fps engine ever created, u see why call of duty loses speed due to its detail." -- GettingNifty


If you choose number 1), I need to know something: Is it possible to use for-loops inside a technique?

Wouldn't that be great. I tried it and it won't compile.

The multipass approach *could* work, but the shader would be HUGE because I would have to account for every combination of (light types)*(number of lights).

By the way, you were right about the 16 max lights. I cut it to 8 and it seems to work great. Here is the final product:


//-----------------------------------------------------------------------------
// Copyright (c) 2008 dhpoware. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//-----------------------------------------------------------------------------
//
// Tangent space normal mapping with multiple point lights in a single pass
// using shader model 3.0. This effect file limits the number of point lights
// to 8.
//
//
//
//*************** THIS HAS BEEN MODIFIED TO SUPPORT 3 TYPES OF LIGHTS WITHIN 
//					THE SAME EFFECT
//
//
//-----------------------------------------------------------------------------

#define MAX_ALL_LIGHTS 8

struct AllLight
{
	int type;//0==directional, 1==point, 2==spot
	float3 dir;
	float3 pos;				// world space position
	float4 diffuse;
	float4 specular;
	float spotInnerCone;	// spot light inner cone (theta) angle
	float spotOuterCone;	// spot light outer cone (phi) angle
	float radius;
};

struct Material
{
	float4 ambient;
	float4 diffuse;
	float4 emissive;
	float4 specular;
	float shininess;
};

//-----------------------------------------------------------------------------
// Globals.
//-----------------------------------------------------------------------------

float4x4 worldMatrix;
float4x4 worldInverseTransposeMatrix;
float4x4 worldViewProjectionMatrix;

float3 cameraPos;
float4 globalAmbient;
int numLights;

AllLight lights[MAX_ALL_LIGHTS];
Material material;

//-----------------------------------------------------------------------------
// Textures.
//-----------------------------------------------------------------------------

texture colorMapTexture;
texture normalMapTexture;

sampler2D colorMap = sampler_state
{
	Texture = <colorMapTexture>;
    MagFilter = Linear;
    MinFilter = Anisotropic;
    MipFilter = Linear;
    MaxAnisotropy = 16;
};

sampler2D normalMap = sampler_state
{
    Texture = <normalMapTexture>;
    MagFilter = Linear;
    MinFilter = Anisotropic;
    MipFilter = Linear;
    MaxAnisotropy = 16;
};

//-----------------------------------------------------------------------------
// Vertex Shaders.
//-----------------------------------------------------------------------------

struct VS_INPUT
{
	float3 position : POSITION;
	float2 texCoord : TEXCOORD0;
	float3 normal : NORMAL;
    float4 tangent : TANGENT;
};

struct VS_OUTPUT
{
	float4 position : POSITION;
	float3 CamWorld : POSITION1;
	float3 worldPos : TEXCOORD0;
	float2 texCoord : TEXCOORD1;
	float3 normal : TEXCOORD2;
	float3 tangent : TEXCOORD3;
	float3 bitangent : TEXCOORD4;
};


VS_OUTPUT VS_AllLighting(VS_INPUT IN)
{
	VS_OUTPUT OUT;

	OUT.position = mul(float4(IN.position, 1.0f), worldViewProjectionMatrix);
	OUT.worldPos = mul(float4(IN.position, 1.0f), worldMatrix).xyz;
	OUT.texCoord = IN.texCoord;
	
	OUT.normal = mul(IN.normal, (float3x3)worldInverseTransposeMatrix);
	OUT.normal =normalize(OUT.normal);
	OUT.tangent = mul(IN.tangent.xyz, (float3x3)worldInverseTransposeMatrix);
	OUT.tangent = normalize(OUT.tangent);
	OUT.bitangent = cross(OUT.normal, OUT.tangent) * IN.tangent.w;
	OUT.bitangent = normalize(OUT.bitangent);

    float3x3 tbnMatrix = float3x3(OUT.tangent.x, OUT.bitangent.x, OUT.normal.x,
	                              OUT.tangent.y, OUT.bitangent.y, OUT.normal.y,
	                              OUT.tangent.z, OUT.bitangent.z, OUT.normal.z);
	OUT.CamWorld = normalize(mul(cameraPos - OUT.worldPos, tbnMatrix));

	return OUT;
}

//-----------------------------------------------------------------------------
// Pixel Shaders.
//-----------------------------------------------------------------------------

float4 PS_AllLighting(VS_OUTPUT IN) : COLOR
{


    float3x3 tbnMatrix = float3x3(IN.tangent.x, IN.bitangent.x, IN.normal.x,
	                              IN.tangent.y, IN.bitangent.y, IN.normal.y,
	                              IN.tangent.z, IN.bitangent.z, IN.normal.z);
	                                 
    float3 l = float3(0.0f, 0.0f, 0.0f);
    float3 h = float3(0.0f, 0.0f, 0.0f);
    
    float atten = 0.0f;
    float nDotL = 0.0f;
    float nDotH = 0.0f;
    float power = 0.0f;
    
    float4 color = float4(0.0f, 0.0f, 0.0f, 0.0f);
    
	float4 ShinnyMap = tex2D(normalMap, IN.texCoord);
	float3 n= ShinnyMap.rgb *2.0f-1.0f;//this should already be normalized
	ShinnyMap.w*=material.shininess;

	for (float i = 0; i < numLights; ++i)
    {

		float len=length(lights[i].pos - IN.worldPos);
		if ((len<lights[i].radius*1.25f)||(lights[i].type==0)){
			if (lights[i].type==0){
				l=mul(-lights[i].dir,tbnMatrix);
				atten = 1.0f;
			}
			else{
				l = mul((lights[i].pos - IN.worldPos) / lights[i].radius, tbnMatrix);
			   atten = saturate(1.0f - dot(l, l));
			}
			l = normalize(l);
			if (lights[i].type==2) {
				float2 cosAngles = cos(float2(lights[i].spotOuterCone, lights[i].spotInnerCone) * 0.5f);
				float3 dl=mul(lights[i].dir,tbnMatrix);//lights[i].dir should ALWAYS be normalized on input
				float spotDot = dot(-l, dl);
				float spotEffect = smoothstep(cosAngles[0], cosAngles[1], spotDot);
    
				atten *= spotEffect;
			}      
			h = normalize(l + IN.CamWorld);
        
			nDotL = saturate(dot(n, l));
			nDotH = saturate(dot(n, h));
			power = (nDotL == 0.0f) ? 0.0f : pow(nDotH, ShinnyMap.w);
        
			color += ((material.diffuse * lights[i].diffuse )+
				(lights[i].specular * power ))* nDotL * atten;

		}

    }

     color += material.ambient * globalAmbient +material.emissive; 

	 return color * tex2D(colorMap, IN.texCoord);
}



//-----------------------------------------------------------------------------
// Techniques.
//-----------------------------------------------------------------------------

technique NormalMappingAllLighting
{
    pass
    {
        VertexShader = compile vs_3_0 VS_AllLighting();
        PixelShader = compile ps_3_0 PS_AllLighting();
    }
}


I made some optimizations too, but I think it can't be optimized any more than I have it already.

Except maybe:


float2 cosAngles = cos(float2(lights[i].spotOuterCone, lights[i].spotInnerCone) * 0.5f);

I tried to calculate this in my program, but I don't think "cos" does the same thing in C++ as in HLSL.... Needless to say, my attempt failed-- I got weird results. perhaps I did it incorrectly. Do you know how to come up with "cosAngles"?

I tried :


				D3DXVECTOR2 cosSpotAngles(cos(IntStructureTemplate[TemplateType].Lights[LN].spotOuterCone),cos(IntStructureTemplate[TemplateType].Lights[LN].spotInnerCone));
				cosSpotAngles*=0.5f;

Of course, I had to modify the shader to take a single float2. Like I said, it didn't give me good results.....


The multipass approach *could* work, but the shader would be HUGE because I would have to account for every combination of (light types)*(number of lights).

What? The shader would be simple, it would just calculate the value for a single light. The drawback is that you have to draw your geometry n times, where n is the number of lights affecting a particular piece of geometry.


By the way, you were right about the 16 max lights. I cut it to 8 and it seems to work great.

So make a different technique (or pass) for each number of lights, and use the appropriate one depending on how many lights affect the object you're currently drawing. Something like this:


float4 PS_AllLighting(VS_OUTPUT input, uniform int LightCount) : COLOR
{
	for (float i = 0; i < LightCount; ++i)
	{
		// etc.....
	}
    return float4(1, 0, 0, 1);
}

technique NormalMappingAllLighting1
{
    pass
    {
        VertexShader = compile vs_3_0 VS_AllLighting();
        PixelShader = compile ps_3_0 PS_AllLighting(1);
    }
}

technique NormalMappingAllLighting2
{
    pass
    {
        VertexShader = compile vs_3_0 VS_AllLighting();
        PixelShader = compile ps_3_0 PS_AllLighting(2);
    }
}

technique NormalMappingAllLighting3
{
    pass
    {
        VertexShader = compile vs_3_0 VS_AllLighting();
        PixelShader = compile ps_3_0 PS_AllLighting(3);
    }
}

// And so on...

You could also try to get dynamic looping to work in your pixel shader. I'm not sure why it isn't working now (I'm pretty sure it's supported in ps_3_0), but it's clear that it's unrolling the loop and always executing it 16 times I guess. Have you played with different shader compilation flags?

The use of TBN-space lighting here is only harmful to performance.
For each light, you're transforming the light from world coordinates into TBN coordinates.
The alternative would be to transform the normal-map once from TBN coordinates to world coordinates.

Don't do math on constants within the shader:
cos(float2(lights.spotOuterCone, lights.spotInnerCone) * 0.5f);
Do that math on the CPU, so that the shader code becomes:
float2(lights.halfCosSpotOuterCone, lights.halfCosSpotInnerCone)

This is wasteful:
power = (nDotL == 0.0f) ? 0.0f : pow(nDotH, material.shininess*ShinnyMap.w);
Because a few lines later you use:
power * nDotL
(if nDotL is zero, you end up with zero with or without the ?: logic)

These branches are wasteful:
if (lights.type==0){
Make three for loops instead.

These aren't performance issues, but just don't make sense physically:
color += ((material.diffuse * lights.diffuse )+(lights.specular * power ))* nDotL * atten;
Lights can't have one color for diffuse reflections and a different colour for specular reflections. When the light emits photons of a certain colour, it can't know in advance how they're going to bounce, and change their colours accordingly.
i.e. lights.diffuse/lights.specular shoudl just be lights.color
and:
return color * tex2D(colorMap, IN.texCoord);
The color map should only affect the colours of the diffuse/ambient/emissive parts. Specular reflections do not actually enter/touch the material, and so they have no chance to be discoloured by the material.
(sometimes a separate specular colour map is used, which makese sense for metallic objects).

Here's a quick attempt at optimizing it:


struct DirLight
{
	float3 dir;
	float3 pos;				// world space position
	float4 color;
};
struct PointLight
{
	float3 pos;				// world space position
	float4 ambient;
	float4 color;
	float radius;
};
struct SpotLight
{
	float3 dir;
	float3 pos;				// world space position
	float4 color;
	float cosSpotInnerCone;	// spot light inner cone (theta) angle
	float cosSpotOuterCone;	// spot light outer cone (phi) angle
	float radius;
};

struct Material
{
	float4 ambient;
	float4 diffuse;
	float4 emissive;
	float4 specular;
	float shininess;
} material;

float3 lightCounts;

#define MAX_DIR_LIGHTS 2
#define MAX_POINT_LIGHTS 4
#define MAX_SPOT_LIGHTS 4

DirLight dir_lights[MAX_DIR_LIGHTS];
PointLight point_lights[MAX_POINT_LIGHTS];
SpotLight spot_lights[MAX_SPOT_LIGHTS];

float4x4 worldMatrix;
float4x4 worldInverseTransposeMatrix;
float4x4 worldViewProjectionMatrix;

float3 cameraPos;
float4 globalAmbient;

sampler2D colorMap = sampler_state
{
	Texture = <colorMapTexture>;
	MagFilter = Linear;
	MinFilter = Anisotropic;
	MipFilter = Linear;
	MaxAnisotropy = 16;
};

sampler2D normalMap = sampler_state
{
	Texture = <normalMapTexture>;
	MagFilter = Linear;
	MinFilter = Anisotropic;
	MipFilter = Linear;
	MaxAnisotropy = 16;
};

struct VS_OUTPUT
{
	float4 position : POSITION;
	float3 worldPos : TEXCOORD0;
	float2 texCoord : TEXCOORD1;
	float3 normal : TEXCOORD2;
	float3 tangent : TEXCOORD3;
	float3 bitangent : TEXCOORD4;
};


float4 PS_AllLighting(VS_OUTPUT IN) : COLOR
{

	float3 t = normalize(IN.tangent);
	float3 b = normalize(IN.bitangent);
	float3 n = normalize(IN.normal);

	float3x3 inv_tbnMatrix = float3x3(t, b, n);

	float3 v = normalize(cameraPos - IN.worldPos);

	float4 nmap = tex2D(normalMap, IN.texCoord);
	n = normalize(mul(nmap.rgb * 2.0f - 1.0f, inv_tbnMatrix ));
	float specPower = nmap.w * material.shininess;

	float4 diffuse = (float4)0;
	float4 specular = (float4)0;
	{[loop]
	for (int i = 0; i < MAX_DIR_LIGHTS; ++i)
	{
		[branch]
		if( (float)i >= lightCounts.x )
			break;
		float len=length(dir_lights[i].pos - IN.worldPos);
		float3 l=-dir_lights[i].dir;
		float3 h = normalize(l + v);
		float nDotL = saturate(dot(n, l));
		float nDotH = saturate(dot(n, h));
		float spec = pow(nDotH, specPower);
		diffuse += dir_lights[i].color * nDotL;
		specular += spec * nDotL;
	}}
	{[loop]
	for (int i = 0; i < MAX_POINT_LIGHTS; ++i)
	{
		[branch]
		if( (float)i >= lightCounts.y )
			break;
		float3 l = (point_lights[i].pos - IN.worldPos) / point_lights[i].radius;
		float atten = saturate(1.0f - dot(l, l));
		float len=length(point_lights[i].pos - IN.worldPos);
		atten *= (len<point_lights[i].radius*1.25f) ? 1.0 : 0.0;
		l = normalize(l);
		float3 h = normalize(l + v);
		float nDotL = saturate(dot(n, l));
		float nDotH = saturate(dot(n, h));
		float spec = pow(nDotH, specPower);
		atten *= nDotL;
		diffuse += point_lights[i].color * atten;
		specular += spec * atten;
	}}
	{[loop]
	for (int i = 0; i < MAX_SPOT_LIGHTS; ++i)
	{
		[branch]
		if( (float)i >= lightCounts.z )
			break;
		float3 l = (spot_lights[i].pos - IN.worldPos) / spot_lights[i].radius;
		float atten = saturate(1.0f - dot(l, l));
		float len=length(spot_lights[i].pos - IN.worldPos);
		atten *= (len<spot_lights[i].radius*1.25f) ? 1.0 : 0.0;
		l = normalize(l);
		float3 h = normalize(l + v);
		float nDotL = saturate(dot(n, l));
		float nDotH = saturate(dot(n, h));
		float spec = pow(nDotH, specPower);
		float2 cosAngles = float2(spot_lights[i].cosSpotOuterCone, spot_lights[i].cosSpotInnerCone);
		float3 dl=spot_lights[i].dir;
		float spotDot = dot(-l, dl);
		float spotEffect = smoothstep(cosAngles[0], cosAngles[1], spotDot);
		atten *= spotEffect;
		atten *= nDotL;
		diffuse += spot_lights[i].color * atten;
		specular += spec * atten;
	}}

	float4 color = (diffuse * material.diffuse
	                + material.ambient * globalAmbient
	                + material.emissive) * tex2D(colorMap, IN.texCoord) 
	             + specular * material.specular;

	return color;
}

From 256 ops, down to 191 (n.b. the 191 op version also has a lot of duplicated code due to having 3 loops!!!)

This topic is closed to new replies.

Advertisement