# DX11 SSAO Using 32-bit pixel format as NormalDepth Texturemap

This topic is 1518 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

## Recommended Posts

Hey Guys,
I'm doing Exercise5 Ch22 SSAO on Frank Luna's DX11 book, I used DXGI_FORMAT_R8G8B8A8_UNORM to replace DXGI_FORMAT_R16G16B16A16_FLOAT when building normalDepth texture map.
When using DXGI_FORMAT_R16G16B16A16_FLOAT, I store view space normal to RGB channel, the alpha channel stores the view space depth(z-coordinate). Now using DXGI_FORMAT_R8G8B8A8_UNORM, I store normal vector x- and y- coordinate to RG channel, and BA combined store 16-bit depth value.
I construct the normal z-coordinate by nz = -sqrt(1-x^2-y^2).
To store the view space depth over two 8-bit UNORM channels, I normalized z to [0, 1] by dividing by the far plane depth zFar. Then I used a little tricks to save 8 most and 8 least significant digits to BA 16-bit channels(following code below).
When rendering normal and depth values of the scene to the DXGI_FORMAT_R8G8B8A8_UNORM 2D texture, the main code is

cbuffer cbPerScene
{
float gZFar;
};

struct VertexIn
{
float3 PosL    : POSITION;
float3 NormalL : NORMAL;
float2 Tex     : TEXCOORD;
};

struct VertexOut
{
float4 PosH       : SV_POSITION;
float3 PosV       : POSITION;
float3 NormalV    : NORMAL;
float2 Tex        : TEXCOORD0;
};

VertexOut VS(VertexIn vin)
{
VertexOut vout;

// Transform to view space.
vout.PosV    = mul(float4(vin.PosL, 1.0f), gWorldView).xyz;
vout.NormalV = mul(vin.NormalL, (float3x3)gWorldInvTransposeView);

// Transform to homogeneous clip space.
vout.PosH = mul(float4(vin.PosL, 1.0f), gWorldViewProj);

// Output vertex attributes for interpolation across triangle.
vout.Tex = mul(float4(vin.Tex, 0.0f, 1.0f), gTexTransform).xy;

return vout;
}

float4 PS(VertexOut pin, uniform bool gAlphaClip) : SV_Target
{
// Interpolating normal can unnormalize it, so normalize it.
pin.NormalV = normalize(pin.NormalV);

if(gAlphaClip)
{
float4 texColor = gDiffuseMap.Sample( samLinear, pin.Tex );

clip(texColor.a - 0.1f);
}

float4 normalDepth = float4(0, 0, 0, 0);
normalDepth.rg = pin.NormalV.rg;
float depth = pin.PosV.b;
float z = depth / gZFar;
normalDepth.ba = float2(z, frac(256.0f*z));
return normalDepth;
}

technique11 NormalDepth
{
pass P0
{
SetVertexShader( CompileShader( vs_5_0, VS() ) );
SetGeometryShader( NULL );
SetPixelShader( CompileShader( ps_5_0, PS(false) ) );
}
}


When using this DXGI_FORMAT_R8G8B8A8_UNORM texture to build SSAO, the main code is

cbuffer cbPerFrame
{
float4x4 gViewToTexSpace; // Proj*Texture
float4   gOffsetVectors[14];
float4   gFrustumCorners[4];
float     gZFar;

// Coordinates given in view space.
float    gOcclusionRadius    = 0.5f;
float    gOcclusionFadeStart = 0.2f;
float    gOcclusionFadeEnd   = 2.0f;
float    gSurfaceEpsilon     = 0.05f;
};

Texture2D gNormalDepthMap;
Texture2D gRandomVecMap;

SamplerState samNormalDepth
{
Filter = MIN_MAG_LINEAR_MIP_POINT;

// Set a very far depth value if sampling outside of the NormalDepth map
// so we do not get false occlusions.
AddressU = BORDER;
AddressV = BORDER;
BorderColor = float4(0.0f, 0.0f, 0.0f, 1e5f);
};

SamplerState samRandomVec
{
Filter = MIN_MAG_LINEAR_MIP_POINT;
AddressU  = WRAP;
AddressV  = WRAP;
};

struct VertexIn
{
float3 PosL            : POSITION;
float3 ToFarPlaneIndex : NORMAL;
float2 Tex             : TEXCOORD;
};

struct VertexOut
{
float4 PosH       : SV_POSITION;
float3 ToFarPlane : TEXCOORD0;
float2 Tex        : TEXCOORD1;
};

VertexOut VS(VertexIn vin)
{
VertexOut vout;

// Already in NDC space.
vout.PosH = float4(vin.PosL, 1.0f);

// We store the index to the frustum corner in the normal x-coord slot.
vout.ToFarPlane = gFrustumCorners[vin.ToFarPlaneIndex.x].xyz;

// Pass onto pixel shader.
vout.Tex = vin.Tex;

return vout;
}

// Determines how much the sample point q occludes the point p as a function
// of distZ.
float OcclusionFunction(float distZ)
{
//
// If depth(q) is "behind" depth(p), then q cannot occlude p.  Moreover, if
// depth(q) and depth(p) are sufficiently close, then we also assume q cannot
// occlude p because q needs to be in front of p by Epsilon to occlude p.
//
// We use the following function to determine the occlusion.
//
//
//       1.0     -------------\
//               |           |  \
//               |           |    \
//               |           |      \
//               |           |        \
//               |           |          \
//               |           |            \
//  ------|------|-----------|-------------|---------|--> zv
//        0     Eps          z0            z1
//

float occlusion = 0.0f;
if(distZ > gSurfaceEpsilon)
{
float fadeLength = gOcclusionFadeEnd - gOcclusionFadeStart;

// Linearly decrease occlusion from 1 to 0 as distZ goes
// from gOcclusionFadeStart to gOcclusionFadeEnd.
occlusion = saturate( (gOcclusionFadeEnd-distZ)/fadeLength );
}

return occlusion;
}

float4 PS(VertexOut pin, uniform int gSampleCount) : SV_Target
{
// p -- the point we are computing the ambient occlusion for.
// n -- normal vector at p.
// q -- a random offset from p.
// r -- a potential occluder that might occlude p.

// Get viewspace normal and z-coord of this pixel.  The tex-coords for
// the fullscreen quad we drew are already in uv-space.
float4 normalDepth = gNormalDepthMap.SampleLevel(samNormalDepth, pin.Tex, 0.0f);

float2 nxy = normalDepth.rg;
float nz = sqrt(1 - pow(nxy.r, 2) - pow(nxy.g, 2));
nz = -nz;
float3 n = float3(nxy, nz);
float pz = normalDepth.b + normalDepth.a/256.0f;
pz *= gZFar;

//
// Reconstruct full view space position (x,y,z).
// Find t such that p = t*pin.ToFarPlane.
// p.z = t*pin.ToFarPlane.z
// t = p.z / pin.ToFarPlane.z
//
float3 p = (pz/pin.ToFarPlane.z)*pin.ToFarPlane;

// Extract random vector and map from [0,1] --> [-1, +1].
float3 randVec = 2.0f*gRandomVecMap.SampleLevel(samRandomVec, 4.0f*pin.Tex, 0.0f).rgb - 1.0f;

float occlusionSum = 0.0f;

// Sample neighboring points about p in the hemisphere oriented by n.
[unroll]
for(int i = 0; i < gSampleCount; ++i)
{
// Are offset vectors are fixed and uniformly distributed (so that our offset vectors
// do not clump in the same direction).  If we reflect them about a random vector
// then we get a random uniform distribution of offset vectors.
float3 offset = reflect(gOffsetVectors[i].xyz, randVec);

// Flip offset vector if it is behind the plane defined by (p, n).
float flip = sign( dot(offset, n) );

// Sample a point near p within the occlusion radius.
float3 q = p + flip * gOcclusionRadius * offset;

// Project q and generate projective tex-coords.
float4 projQ = mul(float4(q, 1.0f), gViewToTexSpace);
projQ /= projQ.w;

// Find the nearest depth value along the ray from the eye to q (this is not
// the depth of q, as q is just an arbitrary point near p and might
// occupy empty space).  To find the nearest depth we look it up in the depthmap.

float2 rz = gNormalDepthMap.SampleLevel(samNormalDepth, projQ.xy, 0.0f).ba;
float rpz = rz.r + rz.g/256.0f;
rpz *= gZFar;

// Reconstruct full view space position r = (rx,ry,rz).  We know r
// lies on the ray of q, so there exists a t such that r = t*q.
// r.z = t*q.z ==> t = r.z / q.z

float3 r = (rpz / q.z) * q;

//
// Test whether r occludes p.
//   * The product dot(n, normalize(r - p)) measures how much in front
//     of the plane(p,n) the occluder point r is.  The more in front it is, the
//     more occlusion weight we give it.  This also prevents self shadowing where
//     a point r on an angled plane (p,n) could give a false occlusion since they
//     have different depth values with respect to the eye.
//   * The weight of the occlusion is scaled based on how far the occluder is from
//     the point we are computing the occlusion of.  If the occluder r is far away
//     from p, then it does not occlude it.
//

float distZ = p.z - r.z;
float dp = max(dot(n, normalize(r - p)), 0.0f);
float occlusion = dp * OcclusionFunction(distZ);

occlusionSum += occlusion;
}

occlusionSum /= gSampleCount;

float access = 1.0f - occlusionSum;

// Sharpen the contrast of the SSAO map to make the SSAO affect more dramatic.
return saturate(pow(access, 4.0f));
}

technique11 Ssao
{
pass P0
{
SetVertexShader( CompileShader( vs_5_0, VS() ) );
SetGeometryShader( NULL );
SetPixelShader( CompileShader( ps_5_0, PS(14) ) );
}
}


When I check the SSAO texture before bluring with camera to an angle, the image is

[attachment=18446:2013-10-19_164907.jpg]

and then I move camera to the right, the image is

[attachment=18447:2013-10-19_170115.jpg]

Basically, when I move camera, the black and white areas vary heavily in the SSAO image.It's like getting an annoying amount of halo-ing on these surfaces.

The image below is the original SSAO image before bluring using DXGI_FORMAT_R16G16B16A16_FLOAT

[attachment=18448:2013-10-19_170332.jpg]

The false display has something to do with view position and orientation, I tried to modify the cosntants value in OcclusionFunction, such as gOcclusionRadius, but it didn't work, not apparently..

How can I wipe out the wrong dark display when it's not occluded? What could be causing this?

Thank you very much.

#### Share this post

##### Share on other sites

Since the only thing you changed was the format of the texture, and the corresponding mechanisms for reading/writing the texture, then I would assume that there is an issue in the code somewhere, or there is an inherent problem with using a low resolution texture for the data you need.

Have you tried to visualize the depth/normal texture prior to it being used?  This will likely give you a great insight into whether or not you are accurately reproducing the same data.  Create a simple shader to read the normal information and display in both the old code and the new code - then you can quickly see the differences visually.  If that looks reasonably similar, then I would check on the depth channels as well - make a similar before and after comparison.

It is just a hunch, but since you said you use a 'clever' trick for storing the upper and lower 8 bits into separate channels, I would suspect this as a potential issue.  Have you validated your technique with some test values?  Done any shader debugging to watch what value comes out of the reading functions?  Start here, and you should be able to find the issue.

#### Share this post

##### Share on other sites

Could it be that your x,y position values are just too in accurate. Consider that there is only 256 different values for x,y positions and your buffer resolution is already bigger than each of the values?

Why not just store the depth and reconstruct the position from screen space x,y positions and the depth.

Cheers!

#### Share this post

##### Share on other sites

Hey Guys, I'm back

Like Jason Z suggested, I tried to visualize the Normal Depth texture before and after using on next stage. I compare these two on R G B A channel respectively, which indicates normal_x normal_y depthz_hi8bits depthz_lo8bits. the result showing below.

[attachment=18458:2013-10-20_red.jpg]

upper-right is after,  lower-right is before [R]

[attachment=18459:2013-10-20_green.jpg]

upper-right is after,  lower-right is before [G]

[attachment=18460:2013-10-20_blue.jpg]

upper-right is after,  lower-right is before [B]

[attachment=18461:2013-10-20_alpha.jpg]

upper-right is after,  lower-right is before [A]

Observing these results, I found they're all corresponding to each other, However, I found halo-ing thing happening on A channel display just like zebra line, which is added to the final image. It's because A channel stores 8 low bits of depth z value, which varies heavily even on the same surface inherently, hence, I think the final image with halo-ing kind of thing has something related to it. I shall continue research on it

#### Share this post

##### Share on other sites

Could it be that your x,y position values are just too in accurate. Consider that there is only 256 different values for x,y positions and your buffer resolution is already bigger than each of the values?

Why not just store the depth and reconstruct the position from screen space x,y positions and the depth.

Cheers!

Hi kauna

through tests with images above, I think a 2-8bit format storing x,y of normal values is OK. Maybe this weird display is due to some process with the low 8 bits of depth value.

Thanks anyway!

#### Share this post

##### Share on other sites

This topic is 1518 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

## Create an account or sign in to comment

You need to be a member in order to leave a comment

## Create an account

Sign up for a new account in our community. It's easy!

Register a new account

## Sign in

Already have an account? Sign in here.

Sign In Now

• ### Forum Statistics

• Total Topics
628730
• Total Posts
2984427
• ### Similar Content

• Having some issues with a geometry shader in a very basic DX app.
We have an assignment where we are supposed to render a rotating textured quad, and in the geometry shader duplicate this quad and offset it by its normal. Very basic stuff essentially.
My issue is that the duplicated quad, when rendered in front of the original quad, seems to fail the Z test and thus the original quad is rendered on top of it.
Whats even weirder is that this only happens for one of the triangles in the duplicated quad, against one of the original quads triangles.

Here's a video to show you what happens: Video (ignore the stretched textures)

Here's my GS: (VS is simple passthrough shader and PS is just as basic)
struct VS_OUT { float4 Pos : SV_POSITION; float2 UV : TEXCOORD; }; struct VS_IN { float4 Pos : POSITION; float2 UV : TEXCOORD; }; cbuffer cbPerObject : register(b0) { float4x4 WVP; }; [maxvertexcount(6)] void main( triangle VS_IN input[3], inout TriangleStream< VS_OUT > output ) { //Calculate normal float4 faceEdgeA = input[1].Pos - input[0].Pos; float4 faceEdgeB = input[2].Pos - input[0].Pos; float3 faceNormal = normalize(cross(faceEdgeA.xyz, faceEdgeB.xyz)); //Input triangle, transformed for (uint i = 0; i < 3; i++) { VS_OUT element; VS_IN vert = input[i]; element.Pos = mul(vert.Pos, WVP); element.UV = vert.UV; output.Append(element); } output.RestartStrip(); for (uint j = 0; j < 3; j++) { VS_OUT element; VS_IN vert = input[j]; element.Pos = mul(vert.Pos + float4(faceNormal, 0.0f), WVP); element.Pos.xyz; element.UV = vert.UV; output.Append(element); } }
I havent used geometry shaders much so im not 100% on what happens behind the scenes.
Any tips appreciated!

• Hi, I'm building a game engine using DirectX11 in c++.
I need a basic physics engine to handle collisions and motion, and no time to write my own.
What is the easiest solution for this? Bullet and PhysX both seem too complicated and would still require writing my own wrapper classes, it seems.
I found this thing called PAL - physics abstraction layer that can support bullet, physx, etc, but it's so old and no info on how to download or install it.
The simpler the better. Please let me know, thanks!
• By Hexaa
I try to draw lines with different thicknesses using the geometry shader approach from here:
https://forum.libcinder.org/topic/smooth-thick-lines-using-geometry-shader
It seems to work great on my development machine (some Intel HD). However, if I try it on my target (Nvidia NVS 300, yes it's old) I get different results. See the attached images. There
seem to be gaps in my sine signal that the NVS 300 device creates, the intel does what I want and expect in the other picture.
It's a shame, because I just can't figure out why. I expect it to be the same. I get no Error in the debug output, with enabled native debugging. I disabled culling with CullMode.None. Could it be some z-fighting? I have little clue about it but I tested to play around with the RasterizerStateDescription and DepthBias properties with no success, no change at all. Maybe I miss something there?
I develop the application with SharpDX btw.
Any clues or help is very welcome

• Hi,
I'm currently trying to write a shader which shoud compute a fast fourier transform of some data, manipulating the transformed data, do an inverse FFT an then displaying the result as vertex offset and color. I use Unity3d and HLSL as shader language. One of the main problems is that the data should not be passed from CPU to GPU for every frame if possible. My original plan was to use a vertex shader and do the fft there, but I fail to find out how to store changing data betwen shader calls/passes. I found a technique called ping-ponging which seems to be based on writing and exchangeing render targets, but I couldn't find an example for HLSL as a vertex shader yet.
I found https://social.msdn.microsoft.com/Forums/en-US/c79a3701-d028-41d9-ad74-a2b3b3958383/how-to-render-to-multiple-render-targets-in-hlsl?forum=xnaframework
which seem to use COLOR0 and COLOR1 as such render targets.
Is it even possible to do such calculations on the gpu only? (/in this shader stage?, because I need the result of the calculation to modify the vertex offsets there)
I also saw the use of compute shaders in simmilar projects (ocean wave simulation), do they realy copy data between CPU / GPU for every frame?
How does this ping-ponging / rendertarget switching technique work in HLSL?
Have you seen an example of usage?
Any answer would be helpfull.
Thank you
appswert
• By ADDMX
Hi
Just a simple question about compute shaders (CS5, DX11).
Do the atomic operations (InterlockedAdd in my case) should work without any issues on RWByteAddressBuffer and be globaly coherent ?
I'v come back from CUDA world and commited fairly simple kernel that does some job, the pseudo-code is as follows:
(both kernels use that same RWByteAddressBuffer)
first kernel does some job and sets Result[0] = 0;
(using Result.Store(0, 0))
I'v checked with debugger, and indeed the value stored at dword 0 is 0
now my second kernel
RWByteAddressBuffer Result;  [numthreads(8, 8, 8)] void main() {     for (int i = 0; i < 5; i++)     {         uint4 v0 = DoSomeCalculations1();         uint4 v1 = DoSomeCalculations2();         uint4 v2 = DoSomeCalculations3();                  if (v0.w == 0 && v1.w == 0 && v2.w)             continue;         //    increment counter by 3, and get it previous value         // this should basically allocate space for 3 uint4 values in buffer         uint prev;         Result.InterlockedAdd(0, 3, prev);                  // this fills the buffer with 3 uint4 values (+1 is here as the first 16 bytes is occupied by DrawInstancedIndirect data)         Result.Store4((prev+0+1)*16, v0);         Result.Store4((prev+1+1)*16, v1);         Result.Store4((prev+2+1)*16, v2);     } } Now I invoke it with Dispatch(4,4,4)
Now I use DrawInstancedIndirect to draw the buffer, but ocassionaly there is missed triangle here and there for a frame, as if the atomic counter does not work as expected
do I need any additional synchronization there ?
I'v tried 'AllMemoryBarrierWithGroupSync' at the end of kernel, but without effect.
If I do not use atomic counter, and istead just output empty vertices (that will transform into degenerated triangles) the all is OK - as if I'm missing some form of synchronization, but I do not see such a thing in DX11.
I'v tested on both old and new nvidia hardware (680M and 1080, the behaviour is that same).

• 25
• 11
• 10
• 16
• 14