Hi guys,
PS. FPS I GOT IN MY ENGINE: 50 - 70 fps, res: 1080x1920, system can run Crysis 3 at high settings, so that's not the problem.
Scene:
right now I'm facing another issue, which is the performance of my Engine. Now the problem is not my architecture, well at least I don't think so, so I tried running a Profiler.
Most of the performance was taken off in the initialization of course, but then later on in the render loop then rendering of the depth, normal, diffuse maps are quite fast, relatively to other things. Now the real problem comes when the swapchain->present(0,0) is called. As i understand it takes a long time because it's waiting for the previous frame to finish, right?
I have head some rumors about that if statements in a shader are quite weird, because even though it is a negative statement, false, it will still semi-run/check all the contents inside the if statement, which makes it quite slow, is this true or just some rubbish?
Also my FPS is NOT affected if I change my regular shader for all the objects (tried to remove all so only the positions were calculated in world space and then returned a white color), but then if I changed my Post Processing shader to a very simple version which ONLY returns the diffuse map, my FPS is boosted 2x - 3x times! But why? (Shader is below if you're interested, but there are still errors).
Texture2D t_dffmap : register(t0);
Texture2D t_depthmap : register(t1);
Texture2D t_normalmap : register(t2);
Texture2D t_random : register(t3);
Texture2D t_blmextract : register(t4);
Texture2D t_megaparticles : register(t5);
Texture2D t_fractalnoise : register(t6);
Texture2D t_softp : register(t7);
Texture2D t_softp_depth : register(t8);
Texture2D t_shadowmap : register(t9);
Texture2D t_glowmap : register(t10);
SamplerState ss;
cbuffer PARAMSBUFFER : register(b0)
{
float time;
float blur;
float bloomExtract;
float bloom;
float pixelDisortion;
float pixelDisorterAmount;
float ssao;
float bluramount;
float megaparticles;
float radialblur;
float radial_start;
float radial_width;
float2 radial_center;
float fractalNoise;
float glowmap;
matrix view;
};
cbuffer BloomBuffer : register(b1)
{
float BloomThreshold;
float BloomSaturation;
float BaseSaturation;
float BloomIntensity;
float BaseIntensity;
};
cbuffer SSAOBuffer : register(b2)
{
float g_scale;
float g_bias;
float g_sample_rad;
float g_intensity;
float ssaoIterations;
float3 pppspace;
};
cbuffer GODRAYBuffer : register(b3)
{
float3 LightPosition;
float pad;
matrix WorldViewProjection;
float GOD_Density;
float GOD_Weight;
float GOD_Decay;
float GOD_Exposure;
float GOD_NUM_SAMPLES;
float GodRays;
float2 ppad;
};
struct VS_Output
{
float4 Pos : SV_POSITION;
float2 Tex : TEXCOORD0;
float2 LightPos : TEXCOORD1;
};
VS_Output VShader(uint id : SV_VertexID)
{
VS_Output Output;
Output.Tex = float2((id << 1) & 2, id & 2);
Output.Pos = float4(Output.Tex * float2(2,-2) + float2(-1,1), 0, 1);
return Output;
}
// Helper for modifying the saturation of a color.
float4 AdjustSaturation(float4 color, float saturation)
{
// The constants 0.3, 0.59, and 0.11 are chosen because the
// human eye is more sensitive to green light, and less to blue.
float grey = dot(color, float3(0.3, 0.59, 0.11));
return lerp(grey, color, saturation);
}
// Ambient Occlusion Stuff --------------------------------------------------
float3 getPosition(in float2 uv)
{
return mul( t_depthmap.Sample(ss, uv).xyz, view);
}
float3 getNormal(in float2 uv)
{
return normalize(t_normalmap.Sample(ss, uv).xyz * 2.0f - 1.0f);
}
float2 getRandom(in float2 uv)
{
return normalize( mul(t_random.Sample(ss, float2(800, 600) * uv / float2(64, 64)).xy * 2.0f - 1.0f, view) );
}
float doAmbientOcclusion(in float2 tcoord,in float2 uv, in float3 p, in float3 cnorm)
{
float3 diff = getPosition(tcoord + uv) - p;
const float3 v = normalize(diff);
const float d = length(diff)*g_scale;
return max(0.0,dot(cnorm,v)-g_bias)*(1.0/(1.0+d))*g_intensity;
}
// End
float4 PShader(VS_Output input) : SV_TARGET
{
if (bloomExtract == 1)
{
// Look up the original image color.
float4 c = t_dffmap.Sample(ss, input.Tex);
// Adjust it to keep only values brighter than the specified threshold.
return saturate((c - BloomThreshold) / (1 - BloomThreshold));
}
float4 color = float4(1.0f, 1.0f, 1.0f, 1.0f);
if (pixelDisortion == 1)
{
// Distortion factor
float NoiseX = pixelDisorterAmount * (time/1000) * sin(input.Tex.x * input.Tex.y+time/1000);
NoiseX=fmod(NoiseX,8) * fmod(NoiseX,4);
// Use our distortion factor to compute how much it will affect each
// texture coordinate
float DistortX = fmod(NoiseX,5);
float DistortY = fmod(NoiseX,5+0.002);
// Create our new texture coordinate based on our distortion factor
input.Tex = float2(DistortX,DistortY);
}
if (fractalNoise == 1)
{
float offset = saturate((t_fractalnoise.Sample(ss, input.Tex) / 10.0f));
input.Tex += 1 * (t_fractalnoise.Sample(ss, input.Tex).xy - 0.5)/15;
}
float4 dffMAP = t_dffmap.Sample(ss, input.Tex);
if (megaparticles == 1)
{
dffMAP.a = 0.0f;
dffMAP += t_megaparticles.Sample(ss, input.Tex);
}
color = dffMAP;
if(bloom == 1)
{
// Look up the bloom and original base image colors.
float4 cbloom = t_blmextract.Sample(ss, input.Tex);
float4 base = color;
// Adjust color saturation and intensity.
cbloom = AdjustSaturation(cbloom, BloomSaturation) * BloomIntensity;
base = AdjustSaturation(base, BaseSaturation) * BaseIntensity;
// Darken down the base image in areas where there is a lot of bloom,
// to prevent things looking excessively burned-out.
base *= (1 - saturate(cbloom));
// Combine the two images.
color = base + cbloom;
}
if (blur == 1)
{
float hblur[17] = {0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16};
int i = 0;
for(;i < bluramount;)
{
color += t_dffmap.Sample(ss, input.Tex + float2(0.002f * hblur[i+1],0.002f * hblur));
i++;
}
i++;
color = color / i;
}
else if (radialblur == 1)
{
const int nsamples = 16;
input.Tex -= 0.5;
for(int i=0; i<nsamples; i++)
{
float scale = radial_start + radial_width*(i/(float) (nsamples-1));
color += t_dffmap.Sample(ss, input.Tex.xy*scale + radial_center );
}
color /= nsamples;
}
if (ssao == 1)
{
// Apply SSAO
const float2 vec[4] = {float2(1,0),float2(-1,0),
float2(0,1),float2(0,-1)};
float3 p = getPosition(input.Tex);
float3 n = getNormal(input.Tex);
float2 rand = getRandom(input.Tex);
float ao = 0.0f;
float rad = g_sample_rad/p.z; // g_s_r
//**SSAO Calculation**//
int iterations = ssaoIterations;
for (int j = 0; j < iterations; ++j)
{
float2 coord1 = reflect(vec[j],rand)*rad;
float2 coord2 = float2(coord1.x*0.707 - coord1.y*0.707,
coord1.x*0.707 + coord1.y*0.707);
ao += doAmbientOcclusion(input.Tex,coord1*0.25, p, n);
ao += doAmbientOcclusion(input.Tex,coord2*0.5, p, n);
ao += doAmbientOcclusion(input.Tex,coord1*0.75, p, n);
ao += doAmbientOcclusion(input.Tex,coord2, p, n);
}
ao/=(float)iterations*4.0;
color.rgb *= ao;
}
// Soft Particles
float pDepth = t_softp_depth.Sample(ss, input.Tex);
float wDepth = t_depthmap.Sample(ss, input.Tex);
float twD = t_depthmap.Sample(ss, input.Tex);
if ((wDepth < pDepth) || twD == 0)
{
float4 pColor = t_softp.Sample(ss, input.Tex);
if (twD != 0)
pColor.a = saturate((wDepth - pDepth) * 1);
color += pColor;
}
// End
if (glowmap == 1)
{
color += t_glowmap.Sample(ss, input.Tex);
}
if (GodRays == 1)
{
float2 deltaTexCoord = (input.Tex - input.LightPos.xy);
deltaTexCoord *= GOD_Density / GOD_NUM_SAMPLES;
//float4 color = t_dffmap.Sample(ss, input.Tex);
float illuminationDecay = 1.0f;
for (int i = 0; i < GOD_NUM_SAMPLES; i++)
{
input.Tex -= deltaTexCoord;
float4 sample = t_dffmap.Sample(ss, input.Tex);
sample *= illuminationDecay * GOD_Weight;
color += sample;
illuminationDecay *= GOD_Decay;
}
return color * GOD_Exposure;
}
return color;
}
Now I understand that in GameDev there are many experienced programmers in graphics, so now I ask them/you, what can speed up an engine, what should I avoid? Tricks?
And that's basically it, thanks for taking interest!