With this ssao shader
float4 PS(VertexOut pin, uniform int gSampleCount) : SV_Target
{
/* // p -- the point we are computing the ambient occlusion for.
// n -- normal vector at p.
// q -- a random offset from p.
// r -- a potential occluder that might occlude p.
// Get viewspace normal and z-coord of this pixel. The tex-coords for
// the fullscreen quad we drew are already in uv-space.
float4 normalDepth = gNormalDepthMap.SampleLevel(samNormalDepth, pin.Tex, 0.0f);
float3 n = normalDepth.xyz;
float pz = normalDepth.w;
//
// Reconstruct full view space position (x,y,z).
// Find t such that p = t*pin.ToFarPlane.
// p.z = t*pin.ToFarPlane.z
// t = p.z / pin.ToFarPlane.z
//
float3 p = (pz/pin.ToFarPlane.z)*pin.ToFarPlane;
if (p.z > 100)
return 1.0f;
// Extract random vector and map from [0,1] --> [-1, +1].
float3 randVec = 2.0f*gRandomVecMap.SampleLevel(samRandomVec, 4.0f*pin.Tex, 0.0f).rgb - 1.0f;
float occlusionSum = 0.0f;
// Sample neighboring points about p in the hemisphere oriented by n.
[unroll]
for(int i = 0; i < gSampleCount; ++i)
{
// Are offset vectors are fixed and uniformly distributed (so that our offset vectors
// do not clump in the same direction). If we reflect them about a random vector
// then we get a random uniform distribution of offset vectors.
float3 offset = reflect(gOffsetVectors[i].xyz, randVec);
// Flip offset vector if it is behind the plane defined by (p, n).
float flip = sign( dot(offset, n) );
// Sample a point near p within the occlusion radius.
float3 q = p + flip * gOcclusionRadius * offset;
// Project q and generate projective tex-coords.
float4 projQ = mul(float4(q, 1.0f), gViewToTexSpace);
projQ /= projQ.w;
// Find the nearest depth value along the ray from the eye to q (this is not
// the depth of q, as q is just an arbitrary point near p and might
// occupy empty space). To find the nearest depth we look it up in the depthmap.
float rz = gNormalDepthMap.SampleLevel(samNormalDepth, projQ.xy, 0.0f).a;
// Reconstruct full view space position r = (rx,ry,rz). We know r
// lies on the ray of q, so there exists a t such that r = t*q.
// r.z = t*q.z ==> t = r.z / q.z
float3 r = (rz / q.z) * q;
//
// Test whether r occludes p.
// * The product dot(n, normalize(r - p)) measures how much in front
// of the plane(p,n) the occluder point r is. The more in front it is, the
// more occlusion weight we give it. This also prevents self shadowing where
// a point r on an angled plane (p,n) could give a false occlusion since they
// have different depth values with respect to the eye.
// * The weight of the occlusion is scaled based on how far the occluder is from
// the point we are computing the occlusion of. If the occluder r is far away
// from p, then it does not occlude it.
//
float distZ = p.z - r.z;
float dp = max(dot(n, normalize(r - p)), 0.0f);
float occlusion = dp * OcclusionFunction(distZ);
occlusionSum += occlusion;
}
occlusionSum /= gSampleCount;
float access = 1.0f - occlusionSum;
// Sharpen the contrast of the SSAO map to make the SSAO affect more dramatic.
return saturate(pow(access, 4.0f)); */
return 1.0f;
}
performance is still same so there is no problem here.
When I'm looking at other side or If i'm too far, I disable computing SSAO Map with this code.
bool ComputeSSAOThisFrame = false;
for (USHORT i = 0; i < ModelInstances.size(); ++i)
{
if (ModelInstances[i].ComputeSSAO)
{
if (ModelInstances[i].Visible)
{
XMVECTOR campos = m_Cam.GetPositionXM();
XMVECTOR modelpos = XMLoadFloat3(&XMFLOAT3(ModelInstances[i].World._41,
ModelInstances[i].World._42, ModelInstances[i].World._43));
XMVECTOR dist = modelpos - campos;
float distf;
XMStoreFloat(&distf, XMVector3LengthSq(dist));
if (!(distf > 10000)) //sqrt(10000) = 100
{
ComputeSSAOThisFrame = true;
break;
}
}
}
}
Now the performance is when not visible:
when too far:
Now when I'm looking at sky, everything is culling using intersection tests (shadows are too culled i.e rendering to shadow map) + terrain is culled in constant hull shader.
Now the performance is:
These code is causing performance issues because when I don't call it, FPS increases to 136 from 40.
void Ssao::ComputeSsao(const Camera& camera)
{
// Bind the ambient map as the render target. Observe that this pass does not bind
// a depth/stencil buffer--it does not need it, and without one, no depth test is
// performed, which is what we want.
ID3D11RenderTargetView* renderTargets[1] = {mAmbientRTV0};
mDC->OMSetRenderTargets(1, renderTargets, 0);
mDC->ClearRenderTargetView(mAmbientRTV0, reinterpret_cast<const float*>(&Colors::Black));
mDC->RSSetViewports(1, &mAmbientMapViewport);
// Transform NDC space [-1,+1]^2 to texture space [0,1]^2
static const XMMATRIX T(
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, -0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.5f, 0.5f, 0.0f, 1.0f);
XMMATRIX P = camera.Proj();
XMMATRIX PT = XMMatrixMultiply(P, T);
Effects::SsaoFX->SetViewToTexSpace(PT);
Effects::SsaoFX->SetOffsetVectors(mOffsets);
Effects::SsaoFX->SetFrustumCorners(mFrustumFarCorner);
Effects::SsaoFX->SetNormalDepthMap(mNormalDepthSRV);
Effects::SsaoFX->SetRandomVecMap(mRandomVectorSRV);
UINT stride = sizeof(Vertex::Basic32);
UINT offset = 0;
mDC->IASetInputLayout(InputLayouts::Basic32);
mDC->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
mDC->IASetVertexBuffers(0, 1, &mScreenQuadVB, &stride, &offset);
mDC->IASetIndexBuffer(mScreenQuadIB, DXGI_FORMAT_R16_UINT, 0);
ID3DX11EffectTechnique* tech = Effects::SsaoFX->SsaoTech;
D3DX11_TECHNIQUE_DESC techDesc;
tech->GetDesc( &techDesc );
for(UINT p = 0; p < techDesc.Passes; ++p)
{
tech->GetPassByIndex(p)->Apply(0, mDC);
mDC->DrawIndexed(6, 0, 0);
}
}
void Ssao::BlurAmbientMap(int blurCount)
{
for(int i = 0; i < blurCount; ++i)
{
// Ping-pong the two ambient map textures as we apply
// horizontal and vertical blur passes.
BlurAmbientMap(mAmbientSRV0, mAmbientRTV1, true);
BlurAmbientMap(mAmbientSRV1, mAmbientRTV0, false);
}
}
void Ssao::BlurAmbientMap(ID3D11ShaderResourceView* inputSRV, ID3D11RenderTargetView* outputRTV, bool horzBlur)
{
ID3D11RenderTargetView* renderTargets[1] = {outputRTV};
mDC->OMSetRenderTargets(1, renderTargets, 0);
mDC->ClearRenderTargetView(outputRTV, reinterpret_cast<const float*>(&Colors::Black));
mDC->RSSetViewports(1, &mAmbientMapViewport);
Effects::SsaoBlurFX->SetTexelWidth(1.0f / mAmbientMapViewport.Width );
Effects::SsaoBlurFX->SetTexelHeight(1.0f / mAmbientMapViewport.Height );
Effects::SsaoBlurFX->SetNormalDepthMap(mNormalDepthSRV);
Effects::SsaoBlurFX->SetInputImage(inputSRV);
ID3DX11EffectTechnique* tech;
if(horzBlur)
{
tech = Effects::SsaoBlurFX->HorzBlurTech;
}
else
{
tech = Effects::SsaoBlurFX->VertBlurTech;
}
UINT stride = sizeof(Vertex::Basic32);
UINT offset = 0;
mDC->IASetInputLayout(InputLayouts::Basic32);
mDC->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
mDC->IASetVertexBuffers(0, 1, &mScreenQuadVB, &stride, &offset);
mDC->IASetIndexBuffer(mScreenQuadIB, DXGI_FORMAT_R16_UINT, 0);
D3DX11_TECHNIQUE_DESC techDesc;
tech->GetDesc( &techDesc );
for(UINT p = 0; p < techDesc.Passes; ++p)
{
tech->GetPassByIndex(p)->Apply(0, mDC);
mDC->DrawIndexed(6, 0, 0);
// Unbind the input SRV as it is going to be an output in the next blur.
Effects::SsaoBlurFX->SetInputImage(0);
tech->GetPassByIndex(p)->Apply(0, mDC);
}
}
When i'm too far or if model is not visible, then i do this and performance suddenly increases.
if (ComputeSSAOThisFrame) //don't compute if all models are not visible or all models are far.
{
// Now compute the ambient occlusion.
m_Ssao->ComputeSsao(m_Cam);
m_Ssao->BlurAmbientMap(2);
}
NOTE: The resolution is 1024x768, if its 800x600 or 500x500 then with SSAO it reaches about 70-100 fps and 216 fps when not computing ssao. My goal is to reach 100 fps with SSAO at 1024x768 resolution.
EDIT: here are SSAO blur and ssaoNormaldepth shaders.
SSAO normal depth: http://pastebin.com/DUXjGxYd
SSAO Blur: http://pastebin.com/KQe2MP1C