Hi all,
I've managed to make a lighting shader, taking 1 to 3 directional lights (VS) and up to 32 point lights (PS).
Per frame I check which point lights are within the frustum (using basic sphere with light radius checking). If for example 20 point lights are within the frustum, I set the lights in the array 21 up to 32 to range = 0 and process them anyway. Doing this with another int variable (activelights) in the loops in the shader, isn't improving performance, compared to using a const max lights, and looping through all, for invisible lights with range = 0.
The issue:
- it's terribly slow on quite up 2 date hardware
I've copied the shader below, so you have an idea.
Before I dive into restructuring my scenegraph etc., I'd like to hear your opinion on the possible optimizations:
1 - currently my render queue sorts objects based on material and then renders them all, using all (point) lights. Even if the lighting don't affect the object (don't check this now). Would it be worthwile to check and set lights that affect an object, right before I render it. So basically light position + radius versus object position + radius (quick rough check).
2 - would it be worthwile to introduce a loop in the VS program, that checks which lights affect the vertex being processed, and only go through these lights in the PS program? (this would save me from redoing my renderqueue)
3 - are there any quickwin improvements I could apply to the effect/ VS/PS programs?
4 - I could check distance of point lights from the camera, and for example render point lights > 50 units from camera, using a vertexshader, instead of a pixel shader
5 - other possibilities? (I think multiple passes without changing anything else won't help that much.)
Any help is really appreciated.
When I have a scene with for example 4 point lights (and a const MAX LIGHT of 4), the performance is visibly better.
All this I want to do without using a deferred renderer (for learning/ practice and see if I can improve it).
Personally I think I'm GPU bound now, and the most logical step would be to have the shader take 4 or 8 max lights, and set the lights that affect each object, right before I render the object (keep renderqueue the same, sorted by material). This would mean setting say 4 times 3 shader parameters for each object (no commitchanges needed I think).
/*************************************************************/
/** CR GENERIC SHADER 1 DIRECTIONAL, 4 POINT LIGHTS **/
/** PER PIXEL LIGHTING **/
/** SINGLE TEXTURE, ANISOTROPY **/
/** TECHNIQUES: OPAQUE AND BLENDED **/
/** SHADER MODEL: 3.0,NO BWARDS COMPATIBILITY **/
/*************************************************************/
/*******************************************************/
/** UNIFORM INPUT, CONTROLLED BY ENGINE **/
/** TRANSFORMATIONS & MATERIALS **/
/*******************************************************/
float4x4 World : WORLD;
float4x4 WorldInvTransp : WORLDINVTRANSP;
shared float4x4 ViewProj : VIEWPROJECTION;
float4 AmbientColor : AMB_COLOR;
float AmbientIntensity : AMB_INTENSITY;
float4 MatAmb : MATERIAL_AMBIENT;
float4 MatDiff : MATERIAL_DIFFUSE;
float4 MatSpec : MATERIAL_SPECULAR;
float4 MatEmi : MATERIAL_EMISSIVE;
float MatPower : MATERIAL_POWER;
texture Tex0 : TEXTURE0 < string name = "roadblock texture.tga"; >;
// modelfile, just for effectedit
string XFile = "roadblock.x";
/*******************************************************/
/** UNIFORM INPUT, CONTROLLED BY ENGINE **/
/** LIGHT SOURCES AND PROPERTIES **/
/*******************************************************/
#define MaxDirectionalLights 1
float3 DirLightDir[MaxDirectionalLights];
float4 DirLightCol[MaxDirectionalLights];
float DirLightInt[MaxDirectionalLights];
#define MaxPointLights 32
float3 PointLightPos[MaxPointLights];
float PointLightRange[MaxPointLights];
float PointLightFPRange[MaxPointLights];
float4 PointLightCol[MaxPointLights];
float PointLightInt[MaxPointLights];
/*******************************************************/
/** SAMPLER STATES FOR TEXTURING **/
/*******************************************************/
sampler2D textureSampler = sampler_state
{
Texture = (Tex0);
MinFilter = ANISOTROPIC;
MagFilter = LINEAR;
MipFilter = LINEAR;
MaxAnisotropy = 4;
};
/*******************************************************/
/** VERTEX SHADER INPUT <= VERTEX DECLARATION **/
/*******************************************************/
struct VS_INPUT
{
float4 Pos : POSITION0;
float4 Normal : NORMAL0;
float2 TexCoord : TEXCOORD0;
};
/*******************************************************/
/** VERTEX SHADER OUTPUT - PIXEL SHADER INPUT **/
/*******************************************************/
struct VS_OUTPUT
{
float4 Pos : POSITION0;
float4 Color : COLOR0;
float3 Normal : TEXCOORD1;
float2 TexCoord : TEXCOORD2;
float3 wPos : TEXCOORD3;
};
/*******************************************************/
/** VERTEX SHADER PROGRAM **/
/*******************************************************/
VS_OUTPUT VS_function(VS_INPUT input)
{
VS_OUTPUT Out = (VS_OUTPUT)0;
float4 worldPosition = mul(input.Pos, World);
Out.Pos = mul(worldPosition, ViewProj);
float4 normal = mul(input.Normal, WorldInvTransp);
Out.Normal = normal;
Out.TexCoord = input.TexCoord;
Out.wPos = worldPosition;
// DIRECTIONAL LIGHT
float dirIntensity[MaxDirectionalLights];
float dirTotal = 0.0f;
for(int i=0;i<MaxDirectionalLights;i++)
{
dirIntensity = dot(normal, DirLightDir);
dirTotal += saturate(DirLightCol * DirLightInt * dirIntensity);
}
Out.Color = dirTotal;
return Out;
}
/*******************************************************/
/** PIXEL SHADER PROGRAM **/
/*******************************************************/
float4 PS_function(VS_OUTPUT input): COLOR0
{
float4 textureColor = tex2D(textureSampler, input.TexCoord);
float4 amb = AmbientColor * AmbientIntensity * MatAmb;
float4 diff = input.Color * MatDiff;
float distt[MaxPointLights];
float att[MaxPointLights];
float4 att_total = 0.0f;
float4 attcolored;
float4 perpixel;
for(int i=0;i<MaxPointLights;i++)
{
distt = distance(PointLightPos, input.wPos);
att = 1 - saturate((distt - PointLightFPRange) / PointLightRange);
att = (pow(att, 2)) * PointLightInt;
attcolored = att * PointLightCol;
perpixel = saturate(dot(normalize(PointLightPos - input.wPos), normalize(input.Normal)));
att_total += (attcolored * perpixel);
}
return saturate((diff + amb + att_total + MatEmi) * textureColor);
}
/*******************************************************/
/** TECHNIQUES & PASSES **/
/*******************************************************/
technique OpaqueTechnique
{
pass P0
{
AlphaBlendEnable = FALSE;
VertexShader = compile vs_3_0 VS_function();
PixelShader = compile ps_3_0 PS_function();
}
}
/*******************************************************/
/** BLENDED **/
/*******************************************************/
technique BlendedTechnique
{
pass P0
{
AlphaBlendEnable = TRUE;
SrcBlend = SRCALPHA;
DestBlend = INVSRCALPHA;
AlphaOp[0] = SelectArg1;
AlphaArg1[0] = Texture;
VertexShader = compile vs_3_0 VS_function();
PixelShader = compile ps_3_0 PS_function();
}
}