I'm currently building my new deferred shading based renderer and been testing depth pre pass for opaque geometry.
I haven't implemented the GBuffer pass yet (rendering to backbuffer atm) so I guess the benefit will be greater later on but I was testing instancing with this and for 50k textured cubes (full input = Pos/UV/Normal/Tangent/BiTangent) and anisotropic filtering on. Here are my results:
Without depth pre pass:
actual rasterized geometry pass = ~ 2.7ms
With depth pre pass:
pre pass = ~ 2.0ms
actual rasterized geometry pass: around 2.4ms
So basically it's cutting my performance almost in half.
I'm already doing a very lightweight pre pass.
I've split the vertex information so I can transfer only the vertex position data.
This means that I'm setting multiple vertex buffers for the actual rasterization pass unfortunately though...could this be the culprit ?
I've set the pixel shader to NULL, set color write disabled.
And the shader itself only transforms vertex positions:
#pragma pack_matrix( row_major )
// Single big buffer to store instance transforms
Buffer<float4> InstanceTransformBuffer : register(t0);
// Constant buffers
cbuffer InstanceTransformsAccessBuffer : register(b0)
{
float startIndex : packoffset(c0.x);
float elementsPerInstance : packoffset(c0.y);
float4x4 ViewProjection : packoffset(c1);
};
struct VSI
{
float4 Position : POSITION;
uint InstanceID : SV_InstanceID;
};
struct VSO
{
float4 Position : SV_POSITION;
};
float4x4 GetInstanceTransform(uint instID, uint offset)
{
uint BufferOffset = instID * elementsPerInstance + startIndex + offset;
float4 c0 = InstanceTransformBuffer.Load(BufferOffset + 0);
float4 c1 = InstanceTransformBuffer.Load(BufferOffset + 1);
float4 c2 = InstanceTransformBuffer.Load(BufferOffset + 2);
float4 c3 = float4(0.0f, 0.0f, 0.0f, 1.0f);
float4x4 _World = { c0.x, c1.x, c2.x, c3.x,
c0.y, c1.y, c2.y, c3.y,
c0.z, c1.z, c2.z, c3.z,
c0.w, c1.w, c2.w, c3.w };
return _World;
}
VSO VS(VSI input)
{
VSO output = (VSO)0;
float4x4 World = GetInstanceTransform(input.InstanceID, 0);
float4x4 WVP = mul(World, ViewProjection);
output.Position = mul(input.Position, WVP);
return output;
}
My render function looks like this:
void RendererD3D11::RenderGBuffer(const unsigned int drawcalls,
const unsigned int* culledSceneIDs)
{
// Get instance description
InstanceGroupDescription* instanceGroup = this->contentManager->GetPtrToOpaqueInstanceGroupDesc(drawcalls);
unsigned int numInstances = 0; // Keeps track of how many instances we actually want to draw of this group
if(instanceGroup->entityType == SceneList::Primitive)
{
D3D11_MAPPED_SUBRESOURCE instanceBufferProperties;
// Lock the constant buffer so it can be written to
this->deviceContext->Map(this->instanceTransformBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &instanceBufferProperties);
// Get a pointer to the data in the constant buffer.
XMFLOAT4* pInstanceData = (XMFLOAT4*)instanceBufferProperties.pData;
// Go through each sceneInstance inside this instance group
for(size_t i = 0; i < instanceGroup->instanceSceneIDSize; ++i)
{
if(culledSceneIDs[instanceGroup->instanceSceneIDs] != 0)
{
// Get ScenePrimitiveDescription
ScenePrimitiveDescription* scenePrimitive = &this->sceneManager->GetCurrentScene()->GetDesc()->primitives[instanceGroup->instanceSceneIDs];
// Update buffer
XMMATRIX worldTransform = XMMatrixTranspose(XMLoadFloat4x4(&scenePrimitive->worldTransform));
for(int u = 0; u < 3; u++)
{
XMStoreFloat4(&pInstanceData[(numInstances * 3) + u], worldTransform.r);
}
// This instance should be drawn since it was not culled.
numInstances++;
}
}
// Unlock the constant buffer
this->deviceContext->Unmap(this->instanceTransformBuffer, 0);
D3D11_MAPPED_SUBRESOURCE mappedResourceProperties;
// Lock the constant buffer so it can be written to
this->deviceContext->Map(this->cbInstanceTransformAccessBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResourceProperties);
// Get a pointer to the data in the constant buffer.
cbsAccessInstanceTransforms* pData = (cbsAccessInstanceTransforms*)mappedResourceProperties.pData;
// Copy the matrices into the constant buffer
XMStoreFloat4x4(&pData->ViewProjection, mainCamera->ViewProjectionMatrix());
pData->elementsPerInstance = 3;
pData->startIndex = 0;
pData->padding = XMFLOAT2(0.0f, 0.0f);
// Unlock the constant buffer
this->deviceContext->Unmap(this->cbInstanceTransformAccessBuffer, 0);
// Set Constant buffer
this->deviceContext->VSSetConstantBuffers(0, 1, &this->cbInstanceTransformAccessBuffer);
// Set Shader Resource View for instance transforms
ID3D11ShaderResourceView* resource = this->instanceTransformBuffer_SRV;
this->deviceContext->VSSetShaderResources(0, 1, &resource);
// Set BlendState
this->renderStateContext.SetBlendState(RenderStateDesc::ColorWriteDisabled, &blendStates, deviceContext);
// Set DepthStencilState
this->renderStateContext.SetDepthStencilState(RenderStateDesc::DepthWriteEnabled, &this->depthStencilStates, this->deviceContext);
// Depth Pre-Pass
GenericShader* depthPrePass_shader = this->contentManager->GetShader(ShaderFile::DepthPrePass, this->device);
if(depthPrePass_shader)
{
this->deviceContext->VSSetShader(depthPrePass_shader->VS, 0, 0);
this->deviceContext->PSSetShader(NULL, 0, 0);
}
ID3D11DepthStencilView* main_DSV = this->mainDSV;
deviceContext->OMSetRenderTargets(0, NULL, main_DSV);
// Instanced Draw Call (DepthPrePass)
if(numInstances > 0)
{
this->contentManager->GetPrimitiveFromPool(instanceGroup->groupID)->Draw(this->depthOnlyInputLayout, this->device,
this->deviceContext, numInstances, true);
}
// Get ScenePrimitiveDescription
ScenePrimitiveDescription* scenePrimitive = &this->sceneManager->GetCurrentScene()->GetDesc()->primitives[instanceGroup->instanceSceneIDs[0]];
// Set DiffuseMap
ID3D11ShaderResourceView* diffuseMap_SRV = this->contentManager->GetTextureFromPool(scenePrimitive->material.diffuseMap.ID)->GetResource();
this->deviceContext->PSSetShaderResources(1, 1, &diffuseMap_SRV);
// Set NormalMap
ID3D11ShaderResourceView* normalMap_SRV = this->contentManager->GetTextureFromPool(scenePrimitive->material.normalMap.ID)->GetResource();
this->deviceContext->PSSetShaderResources(2, 1, &normalMap_SRV);
// Set SpecularMap
ID3D11ShaderResourceView* specularMap_SRV = this->contentManager->GetTextureFromPool(scenePrimitive->material.specularMap.ID)->GetResource();
this->deviceContext->PSSetShaderResources(3, 1, &specularMap_SRV);
// Set SamplerStates
this->renderStateContext.SetSamplerState(RenderStateDesc::Anisotropic, &this->samplerstates, this->deviceContext, 0);
this->renderStateContext.SetSamplerState(RenderStateDesc::Linear, &this->samplerstates, this->deviceContext, 1);
// Set BlendState
this->renderStateContext.SetBlendState(RenderStateDesc::BlendDisabled, &blendStates, deviceContext);
// Set DepthStencilState
this->renderStateContext.SetDepthStencilState(RenderStateDesc::DepthEnabled, &this->depthStencilStates, this->deviceContext);
// Set FillMode
if(this->isWireframe)
{
this->renderStateContext.SetRasterizerState(RenderStateDesc::Wireframe, &rasterizerStates, deviceContext);
}
else
{
this->renderStateContext.SetRasterizerState(RenderStateDesc::BackFaceCull, &rasterizerStates, deviceContext);
}
// Set GBuffer shader
GenericShader* gbuffer_shader = this->contentManager->GetShader(ShaderFile::GBuffer, this->device);
if(gbuffer_shader)
{
this->deviceContext->VSSetShader(gbuffer_shader->VS, 0, 0);
this->deviceContext->PSSetShader(gbuffer_shader->PS, 0, 0);
}
ID3D11RenderTargetView* backBuffer_RTV = this->backBufferRTV;
deviceContext->OMSetRenderTargets(1, &backBuffer_RTV, main_DSV);
// Instanced Draw Call (GBuffer)
if(numInstances > 0)
{
this->contentManager->GetPrimitiveFromPool(instanceGroup->groupID)->Draw(this->defaultInputLayout, this->device,
this->deviceContext, numInstances, false);
}
}
else
{
// StaticMesh in here...
}
}