Sign in to follow this  

DX11 Puzzling Fill Rate

This topic is 2139 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

I've written a deferred renderer for DX11 and am optimizing its performance. While benchmarking fill rate, I ran into something puzzling. When I stub out one of the shaders used to render the scene (pre-lighting) so that it essentially has a null VS and its PS simply writes out constants to the GBuffers, I see a pretty significant increase in performance when rendering the scene. That seemed reasonable.

So then I started thinking about doing a depth prepass, but before going through the work of implementing it, I decided to first try a simple test to see if it had potential. For the test, I simply cleared the depth buffer to 0 instead of 1. The idea is that every pixel in the scene would then be rejected before the PS and I would see pretty much the same significant increase in performance as with the null shader above. However, I saw absolutely no speed increase at all.

Does this imply that my PS is being executed even if it's occluded by closer depth values? How is that possible?

Here's the PS and the functions it calls (see PSMain for the pixel shader):

[code]
StructuredBuffer<SpecBuf_Params_s> SpecBuf_Params;
SpecBuf_Params_s GetParams( in uint uInstIndex, in uint uSubMtlIndex ) {
return SpecBuf_Params[ Spec_GetParamsIndex( uInstIndex, uSubMtlIndex ) ];
}

float4 Spec_Motif( SpecMotif_s Motif ) {
float4 vTableColor = g_avMotifColor[ Motif.m_uMotifIndex ];
float4 vBiasedColor = (vTableColor * Motif.m_fScale + Motif.m_fOffset) * Motif.m_vBaseColor;
float3 vFinalColor = (Motif.m_uFlags & 1) ? Motif.m_vBaseColor.rgb : vBiasedColor.rgb;
float fFinalAlpha = (Motif.m_uFlags & 2) ? Motif.m_vBaseColor.a : vBiasedColor.a;
return float4( vFinalColor, fFinalAlpha );
}

uint Spec_AlphaToCoverage( float fUnitAlpha ) {
uint uCoverage;
#if SPEC_MSAA_COUNT == 2
if( fUnitAlpha < (1.0f / 3.0f) ) {
uCoverage = 0;
} else if( fUnitAlpha < (2.0f / 3.0f) ) {
uCoverage = 1;
} else {
uCoverage = 3;
}
#elif SPEC_MSAA_COUNT == 4
if( fUnitAlpha < (1.0f / 5.0f) ) {
uCoverage = 0;
} else if( fUnitAlpha < (2.0f / 5.0f) ) {
uCoverage = 1;
} else if( fUnitAlpha < (3.0f / 5.0f) ) {
uCoverage = 3;
} else if( fUnitAlpha < (4.0f / 5.0f) ) {
uCoverage = 7;
} else {
uCoverage = 15;
}
#else
uCoverage = 0xffffffff;
#endif
return uCoverage;
}

SpecRawGBuffer_s Spec_PackGBuffer( in SpecGBufferSource_s Source ) {
SpecRawGBuffer_s RawGBuffer;
// Compute flags field for Tex2...
uint uEdgePixel = any( frac( Source.m_vCentroidPosXY_SS ) - 0.5f );
uint uFlags = (Source.m_uNoAO << 5) | ((uEdgePixel & 1) << 4) | max( min( uint( Source.m_fSpecUnitSharpness * 15.0f ), 15 ), 1 );
// Store values in packed GBuffer...
RawGBuffer.m_vTex0 = float4( Source.m_vDiffuseColor, 0 );
RawGBuffer.m_vTex1 = float4( Source.m_vEmissiveColor, Source.m_fSpecUnitIntensity );
RawGBuffer.m_vuTex2 = uint4( (255.0f/2.0f) + (255.0f/2.0f)*Source.m_vUnitNorm_WS, uFlags );
return RawGBuffer;
}

SpecRawGBuffer_s PSMain( VS_Out Input, out uint uCoverage : SV_Coverage ) : SV_TARGET {
SpecBuf_Params_s Params = GetParams( Input.uInstanceID, Input.uSubMtlIndex );
uint uFlags = Params.m_uFlags;
uint uFlag_VtxRGB_Tint = uFlags & FLAG_VTX_RGB_TINT;
uint uFlag_VtxRGB_Emis = uFlags & FLAG_VTX_RGB_EMIS;
uint uFlag_VtxA_Tint = uFlags & FLAG_VTX_A_TINT;
uint uFlag_VtxA_Emis = uFlags & FLAG_VTX_A_EMIS;
uint uFlag_VtxA_Opac = uFlags & FLAG_VTX_A_OPAC;
uint uFlag_VtxA_Glos = uFlags & FLAG_VTX_A_GLOS;
uint uFlag_BaseA_Emis = uFlags & FLAG_BASE_A_EMIS;
uint uFlag_BaseA_Opac = uFlags & FLAG_BASE_A_OPAC;
uint uFlag_BaseA_Glos = uFlags & FLAG_BASE_A_GLOS;

float4 vMotifTintOpac = Spec_Motif( Params.m_MotifTintOpac );
float4 vMotifEmisGlos = Spec_Motif( Params.m_MotifEmisGlos );
float3 vVtxTint = (uFlag_VtxRGB_Tint ? Input.vColorVtx.rgb : 1) * (uFlag_VtxA_Tint ? Input.vColorVtx.a : 1);
float3 vVtxEmis = Params.m_fAddEmis + (uFlag_VtxRGB_Emis ? Input.vColorVtx.rgb : 0) + (uFlag_VtxA_Emis ? Input.vColorVtx.a : 0);
float fVtxOpac = (uFlag_VtxA_Opac ? Input.vColorVtx.a : 1);
float fVtxGlos = (uFlag_VtxA_Glos ? Input.vColorVtx.a : 0);
float4 vVtxTintOpac = float4( vVtxTint, fVtxOpac ) * vMotifTintOpac;
float4 vVtxEmisGlos = float4( vVtxEmis, fVtxGlos );

// Compute normal...
float3 vUnitNorm_WS = normalize( Input.vNormal_WS );

// Compute base color...
float3 vTC_BaseRGB = float3( Input.vTC_Base.xy, Params.m_uTexSliceIndexBaseRGB );
float3 vTC_BaseA = float3( Params.m_fTexCoordScale_BaseA * Input.vTC_Base.zw, Params.m_uTexSliceIndexBaseA );
float3 vTexColorBaseRGB = TexBase.Sample( SamplerBase, vTC_BaseRGB ).rgb;
float fTexColorBaseA = TexBase.Sample( SamplerBase, vTC_BaseA ).a;
float fDetailMult = lerp( 1, 2 * fTexColorBaseA, Params.m_Switch_fBaseA_Detl );
vTexColorBaseRGB *= fDetailMult;

// Compute reflection color...
float4 vMotifCube = Spec_Motif( Params.m_MotifCube );
float3 vUnitVtxToCam_WS = normalize( Spec_GetCamPos() - Input.vPos_WS );
float3 vUnitReflect_WS = reflect( -vUnitVtxToCam_WS, vUnitNorm_WS );
float3 vReflectionColor = vMotifCube.rgb * TexCube.Sample( SamplerCube, float4( vUnitReflect_WS, Params.m_uTexSliceIndexCube ) ).rgb;

// Compute final values...
float fFinalOpac = Input.fUnitFadeAlpha * vVtxTintOpac.a * (uFlag_BaseA_Opac ? fTexColorBaseA : 1);
float fFinalGlos = vMotifEmisGlos.a * (vVtxEmisGlos.a + (uFlag_BaseA_Glos ? fTexColorBaseA : 0));
float3 vFinalDiff = saturate( vVtxTintOpac.rgb * vTexColorBaseRGB ) + fFinalGlos * vReflectionColor;
float3 vFinalEmis = vMotifEmisGlos.rgb * (vVtxEmisGlos.rgb + (uFlag_BaseA_Emis ? fTexColorBaseA : 0));
uCoverage = Spec_AlphaToCoverage( fFinalOpac );

// Store everything into our gbuffers...
SpecGBufferSource_s GBufSource = Spec_GetDefaultGBufSource();
GBufSource.m_vDiffuseColor = vFinalDiff;
GBufSource.m_vEmissiveColor = vFinalDiff * vFinalEmis;
GBufSource.m_vCentroidPosXY_SS = Input.vPos_HS.xy;
GBufSource.m_vUnitNorm_WS = vUnitNorm_WS;
GBufSource.m_fSpecUnitSharpness = Params.m_fSpecUnitSharpness;
GBufSource.m_fSpecUnitIntensity = fFinalGlos;
return Spec_PackGBuffer( GBufSource );
}
[/code]

Edit: After some more testing, it's really looking like early Z rejection just isn't working. Though, I'm not sure why yet. Do NVIDIA and ATI provide docs that describe the conditions which must be met to keep early Z rejection enabled?

Share this post


Link to post
Share on other sites
Nvidia has some guidelines in [url="http://developer.download.nvidia.com/GPU_Programming_Guide/GPU_Programming_Guide_G80.pdf"]this doc[/url], but they might be a but out of date depending on which hardware you're working with. Alpha to coverage or outputting SV_Coverage can definitely mess with Z cull, so you might want to try disabling that to see if it makes a difference.

Also, if you want to see whether your pixel shader is actually running you can use the D3D11_QUERY_DATA_PIPELINE_STATISTICS to get the number of pixel shader invocations.

Share this post


Link to post
Share on other sites
Thanks for the info MJP. I was indeed not meeting some of those requirements. However, even after fixing things up the query reports no change in the number of PS invocations.

- PS no longer outputs SV_Coverage.
- Using ClearDepthStencilView() to clear the depth buffer.
- PS doesn't write depth.
- The direction of the depth test is <= while both writing and comparing the depth buffer and doesn't change in between.
- Depth buffer is a Texture2DMS (no array).
- The PS uses the XY components of the SV_Position semantic, but not the z component.
- The PS doesn't use clip, texkil, or discard.
- Alpha to coverage is disabled
- SampleMask is always 0xFFFFFFFF.

The depth buffer is, however, DXGI_FORMAT_D32_FLOAT, but the NVIDIA doc doesn't list that as a reason early Z would be disabled. I don't have a stencil buffer. I'm am using 2x MSAA render targets. I can see in Pix that the depth buffer has been written to with the pre-pass.

When writing the depth, I bind a read/write DSV to the pipeline and use this state:

D3D11_BLEND_DESC:
AlphaToCoverageEnable = 0
IndependentblenEnable = 0
BlendEnable = 0
SrcBlend = ONE
DestBlend = ZERO
BlendOp = ADD
SrcBlendAlpha = ONE
DestBlendAlpha = ZERO
BlendOpAlpha = ADD
RenderTargetWriteMask = 0

D3D11_DEPTH_STENCIL_DESC:
DepthEnable = 1
DepthWriteMask = ALL
DepthFunc = LESS_EQUAL
(all stencil members are 0)

D3D11_RASTERIZER_DESC:
FillMode = SOLID
CullMode = BACK
FrontCounterClockwise = 1
DepthBias = 0
DepthBiasClamp = 0
SlopeScaledDepthBias = 0
DepthClipEnable = 1
ScissorEnable = 0
MultisampleEnable = 0
AntialiasedLineEnable = 0


When rendering the scene, I bind a read-only DSV to the pipeline and use this state:

D3D11_BLEND_DESC:
AlphaToCoverageEnable = 0
IndependentblenEnable = 0
BlendEnable = 0
SrcBlend = ONE
DestBlend = ZERO
BlendOp = ADD
SrcBlendAlpha = ONE
DestBlendAlpha = ZERO
BlendOpAlpha = ADD
RenderTargetWriteMask = 15

D3D11_DEPTH_STENCIL_DESC:
DepthEnable = 1
DepthWriteMask = ZERO
DepthFunc = LESS_EQUAL
(all stencil members are 0)

D3D11_RASTERIZER_DESC:
FillMode = SOLID
CullMode = BACK
FrontCounterClockwise = 1
DepthBias = 0
DepthBiasClamp = 0
SlopeScaledDepthBias = 0
DepthClipEnable = 1
ScissorEnable = 0
MultisampleEnable = 0
AntialiasedLineEnable = 0

Here's the depth prepass shader:

[code]
// Constant buffer with cam info:
cbuffer SpecBuf_Camera {
row_major float4x4 g_ProjCamMtx;
};

// Vertex in:
struct VS_In {
float3 vPos_WS : POSITION;
};

// Vertex out:
struct VS_Out {
float4 vPos_HS : SV_POSITION;
};

// Vertex shader:
VS_Out VS( VS_In Input ) {
VS_Out Output;
Output.vPos_HS = mul( g_ProjCamMtx, float4( Input.vPos_WS, 1 ) );
return Output;
}

// Pixel shader:
float4 PS( VS_Out Input ) : SV_TARGET {
return 0;
}

// Technique:
technique11 Terrain {
pass P1 {
SetVertexShader( CompileShader( vs_5_0, VS() ) );
SetPixelShader( CompileShader( ps_5_0, PS() ) );
}
}
[/code]


What else could I be doing to turn off early Z?

Share this post


Link to post
Share on other sites
Matt, thanks to your book for pointing out the [earlydepthstencil] attribute for pixel shaders! I tried this and the query now reports over a million invocations of the PS have been eliminated (without a depth pre-pass - just drawing the scene from front to back as much as possible). Though, I didn't notice much of a gain in terms of performance. Doing a full depth prepass increases the draw call count substantially which negatively impacts frame rate.

If anyone's interested, the book I'm referring to is called Practical Rendering & Computation with Direct3D 11, and it's proven to be a valuable resource to me during my adventure through DX11.

Share this post


Link to post
Share on other sites
Sign in to follow this  

  • Similar Content

    • By mister345
      Hi, can somebody please tell me in clear simple steps how to debug and step through an hlsl shader file?
      I already did Debug > Start Graphics Debugging > then captured some frames from Visual Studio and
      double clicked on the frame to open it, but no idea where to go from there.
       
      I've been searching for hours and there's no information on this, not even on the Microsoft Website!
      They say "open the  Graphics Pixel History window" but there is no such window!
      Then they say, in the "Pipeline Stages choose Start Debugging"  but the Start Debugging option is nowhere to be found in the whole interface.
      Also, how do I even open the hlsl file that I want to set a break point in from inside the Graphics Debugger?
       
      All I want to do is set a break point in a specific hlsl file, step thru it, and see the data, but this is so unbelievably complicated
      and Microsoft's instructions are horrible! Somebody please, please help.
       
       
       

    • By mister345
      I finally ported Rastertek's tutorial # 42 on soft shadows and blur shading. This tutorial has a ton of really useful effects and there's no working version anywhere online.
      Unfortunately it just draws a black screen. Not sure what's causing it. I'm guessing the camera or ortho matrix transforms are wrong, light directions, or maybe texture resources not being properly initialized.  I didnt change any of the variables though, only upgraded all types and functions DirectX3DVector3 to XMFLOAT3, and used DirectXTK for texture loading. If anyone is willing to take a look at what might be causing the black screen, maybe something pops out to you, let me know, thanks.
      https://github.com/mister51213/DX11Port_SoftShadows
       
      Also, for reference, here's tutorial #40 which has normal shadows but no blur, which I also ported, and it works perfectly.
      https://github.com/mister51213/DX11Port_ShadowMapping
       
    • By xhcao
      Is Direct3D 11 an api function like glMemoryBarrier in OpenGL? For example, if binds a texture to compute shader, compute shader writes some values to texture, then dispatchCompute, after that, read texture content to CPU side. I know, In OpenGL, we could call glMemoryBarrier before reading to assure that texture all content has been updated by compute shader.
      How to handle incoherent memory access in Direct3D 11? Thank you.
    • By _Engine_
      Atum engine is a newcomer in a row of game engines. Most game engines focus on render
      techniques in features list. The main task of Atum is to deliver the best toolset; that’s why,
      as I hope, Atum will be a good light weighted alternative to Unity for indie games. Atum already
      has fully workable editor that has an ability to play test edited scene. All system code has
      simple ideas behind them and focuses on easy to use functionality. That’s why code is minimized
      as much as possible.
      Currently the engine consists from:
      - Scene Editor with ability to play test edited scene;
      - Powerful system for binding properties into the editor;
      - Render system based on DX11 but created as multi API; so, adding support of another GAPI
        is planned;
      - Controls system based on aliases;
      - Font system based on stb_truetype.h;
      - Support of PhysX 3.0, there are samples in repo that use physics;
      - Network code which allows to create server/clinet; there is some code in repo which allows
        to create a simple network game
      I plan to use this engine in multiplayer game - so, I definitely will evolve the engine. Also
      I plan to add support for mobile devices. And of course, the main focus is to create a toolset
      that will ease games creation.
      Link to repo on source code is - https://github.com/ENgineE777/Atum
      Video of work process in track based editor can be at follow link: 
       
       

    • By mister345
      I made a spotlight that
      1. Projects 3d models onto a render target from each light POV to simulate shadows
      2. Cuts a circle out of the square of light that has been projected onto the render target
      as a result of the light frustum, then only lights up the pixels inside that circle 
      (except the shadowed parts of course), so you dont see the square edges of the projected frustum.
       
      After doing an if check to see if the dot product of light direction and light to vertex vector is greater than .95
      to get my initial cutoff, I then multiply the light intensity value inside the resulting circle by the same dot product value,
      which should range between .95 and 1.0.
       
      This should give the light inside that circle a falloff from 100% lit to 0% lit toward the edge of the circle. However,
      there is no falloff. It's just all equally lit inside the circle. Why on earth, I have no idea. If someone could take a gander
      and let me know, please help, thank you so much.
      float CalculateSpotLightIntensity(     float3 LightPos_VertexSpace,      float3 LightDirection_WS,      float3 SurfaceNormal_WS) {     //float3 lightToVertex = normalize(SurfacePosition - LightPos_VertexSpace);     float3 lightToVertex_WS = -LightPos_VertexSpace;          float dotProduct = saturate(dot(normalize(lightToVertex_WS), normalize(LightDirection_WS)));     // METALLIC EFFECT (deactivate for now)     float metalEffect = saturate(dot(SurfaceNormal_WS, normalize(LightPos_VertexSpace)));     if(dotProduct > .95 /*&& metalEffect > .55*/)     {         return saturate(dot(SurfaceNormal_WS, normalize(LightPos_VertexSpace)));         //return saturate(dot(SurfaceNormal_WS, normalize(LightPos_VertexSpace))) * dotProduct;         //return dotProduct;     }     else     {         return 0;     } } float4 LightPixelShader(PixelInputType input) : SV_TARGET {     float2 projectTexCoord;     float depthValue;     float lightDepthValue;     float4 textureColor;     // Set the bias value for fixing the floating point precision issues.     float bias = 0.001f;     // Set the default output color to the ambient light value for all pixels.     float4 lightColor = cb_ambientColor;     /////////////////// NORMAL MAPPING //////////////////     float4 bumpMap = shaderTextures[4].Sample(SampleType, input.tex);     // Expand the range of the normal value from (0, +1) to (-1, +1).     bumpMap = (bumpMap * 2.0f) - 1.0f;     // Change the COORDINATE BASIS of the normal into the space represented by basis vectors tangent, binormal, and normal!     float3 bumpNormal = normalize((bumpMap.x * input.tangent) + (bumpMap.y * input.binormal) + (bumpMap.z * input.normal));     //////////////// LIGHT LOOP ////////////////     for(int i = 0; i < NUM_LIGHTS; ++i)     {     // Calculate the projected texture coordinates.     projectTexCoord.x =  input.vertex_ProjLightSpace[i].x / input.vertex_ProjLightSpace[i].w / 2.0f + 0.5f;     projectTexCoord.y = -input.vertex_ProjLightSpace[i].y / input.vertex_ProjLightSpace[i].w / 2.0f + 0.5f;     if((saturate(projectTexCoord.x) == projectTexCoord.x) && (saturate(projectTexCoord.y) == projectTexCoord.y))     {         // Sample the shadow map depth value from the depth texture using the sampler at the projected texture coordinate location.         depthValue = shaderTextures[6 + i].Sample(SampleTypeClamp, projectTexCoord).r;         // Calculate the depth of the light.         lightDepthValue = input.vertex_ProjLightSpace[i].z / input.vertex_ProjLightSpace[i].w;         // Subtract the bias from the lightDepthValue.         lightDepthValue = lightDepthValue - bias;         float lightVisibility = shaderTextures[6 + i].SampleCmp(SampleTypeComp, projectTexCoord, lightDepthValue );         // Compare the depth of the shadow map value and the depth of the light to determine whether to shadow or to light this pixel.         // If the light is in front of the object then light the pixel, if not then shadow this pixel since an object (occluder) is casting a shadow on it.             if(lightDepthValue < depthValue)             {                 // Calculate the amount of light on this pixel.                 float lightIntensity = saturate(dot(bumpNormal, normalize(input.lightPos_LS[i])));                 if(lightIntensity > 0.0f)                 {                     // Determine the final diffuse color based on the diffuse color and the amount of light intensity.                     float spotLightIntensity = CalculateSpotLightIntensity(                         input.lightPos_LS[i], // NOTE - this is NOT NORMALIZED!!!                         cb_lights[i].lightDirection,                          bumpNormal/*input.normal*/);                     lightColor += cb_lights[i].diffuseColor*spotLightIntensity* .18f; // spotlight                     //lightColor += cb_lights[i].diffuseColor*lightIntensity* .2f; // square light                 }             }         }     }     // Saturate the final light color.     lightColor = saturate(lightColor);    // lightColor = saturate( CalculateNormalMapIntensity(input, lightColor, cb_lights[0].lightDirection));     // TEXTURE ANIMATION -  Sample pixel color from texture at this texture coordinate location.     input.tex.x += textureTranslation;     // BLENDING     float4 color1 = shaderTextures[0].Sample(SampleTypeWrap, input.tex);     float4 color2 = shaderTextures[1].Sample(SampleTypeWrap, input.tex);     float4 alphaValue = shaderTextures[3].Sample(SampleTypeWrap, input.tex);     textureColor = saturate((alphaValue * color1) + ((1.0f - alphaValue) * color2));     // Combine the light and texture color.     float4 finalColor = lightColor * textureColor;     /////// TRANSPARENCY /////////     //finalColor.a = 0.2f;     return finalColor; }  
      Light_vs.hlsl
      Light_ps.hlsl
  • Popular Now