The yakiimo's result:
My result
The simplesampe11.cpp (render function)
HRESULT hr;
// If the settings dialog is being shown, then render it instead of rendering the app's scene
if( g_SettingsDlg.IsActive() )
{
g_SettingsDlg.OnRender( fElapsedTime );
return;
}
// Get the projection & view matrix from the camera class
D3DXMATRIX mWorld = *g_Camera.GetWorldMatrix();
D3DXMATRIX mView = *g_Camera.GetViewMatrix();
D3DXMATRIX mProj = *g_Camera.GetProjMatrix();
D3DXMATRIX mWorldViewProjection = mWorld * mView * mProj;
D3DXMATRIX mViewProjection = mView * mProj;
#pragma region backup RS and DST
ID3D11RasterizerState* pOldState;
pd3dImmediateContext->RSGetState( &pOldState );
ID3D11DepthStencilState* pDepthStencilStateStored = NULL;
UINT stencilRef;
pd3dImmediateContext->OMGetDepthStencilState( &pDepthStencilStateStored, &stencilRef );
#pragma endregion
const DXGI_SURFACE_DESC* pBackBufferDesc = DXUTGetDXGIBackBufferSurfaceDesc();
#pragma region configure values like constant buffetrs, rs, dst
D3D11_MAPPED_SUBRESOURCE MappedResource;
V( pd3dImmediateContext->Map( g_pPS_CB, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ) );
PS_CB* pPS_CB = ( PS_CB* )MappedResource.pData;
pPS_CB->nFrameWidth = pBackBufferDesc->Width;
pPS_CB->nFrameHeight = pBackBufferDesc->Height;
pd3dImmediateContext->Unmap( g_pPS_CB, 0 );
pd3dImmediateContext->PSSetConstantBuffers( 0, 1, &g_pPS_CB );
pd3dImmediateContext->RSSetState( g_pRS );
pd3dImmediateContext->OMSetDepthStencilState( g_pDepthStencilState, 0 );
#pragma endregion
g_storeFragments.OnRender( pd3dImmediateContext, pd3dDevice, &mWorld, &mViewProjection,
g_pFragmentLinkUAV, g_pStartOffsetUAV, pBackBufferDesc, g_pPS_CB );
ID3D11RenderTargetView* pRTV = DXUTGetD3D11RenderTargetView();
ID3D11DepthStencilView* pDSV = DXUTGetD3D11DepthStencilView();
pd3dImmediateContext->OMSetRenderTargets( 1, &pRTV, pDSV );
float ClearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
pd3dImmediateContext->OMSetDepthStencilState( g_pDepthStencilState, 0 );
pd3dImmediateContext->ClearRenderTargetView( pRTV, ClearColor );
gsortFragmentsAndRender.OnRender( pd3dImmediateContext, pd3dDevice, pBackBufferDesc,
g_pFragmentLinkSRV, g_pStartOffsetSRV, g_pPS_CB );
#pragma region restore and release RS with DST
pd3dImmediateContext->RSSetState( pOldState );
SAFE_RELEASE( pOldState );
pd3dImmediateContext->OMSetDepthStencilState( pDepthStencilStateStored, stencilRef );
SAFE_RELEASE( pDepthStencilStateStored );
SAFE_RELEASE( pRTV );
SAFE_RELEASE( pDSV );
#pragma endregion
#pragma region the others
DXUT_BeginPerfEvent( DXUT_PERFEVENTCOLOR, L"HUD / Stats" );
g_HUD.OnRender( fElapsedTime );
g_SampleUI.OnRender( fElapsedTime );
RenderText();
DXUT_EndPerfEvent();
static DWORD dwTimefirst = GetTickCount();
if ( GetTickCount() - dwTimefirst > 5000 )
{
OutputDebugString( DXUTGetFrameStats( DXUTIsVsyncEnabled() ) );
OutputDebugString( L"\n" );
dwTimefirst = GetTickCount();
}
#pragma endregion
the StoreFragments.cpp (render function)
HRESULT hr;
#pragma region clear Render target & depth stencil & UAV
float ClearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
ID3D11RenderTargetView* pRTV = DXUTGetD3D11RenderTargetView();
pd3dImmediateContext->ClearRenderTargetView( pRTV, ClearColor );
ID3D11DepthStencilView* pDSV = DXUTGetD3D11DepthStencilView();
pd3dImmediateContext->ClearDepthStencilView( pDSV, D3D11_CLEAR_DEPTH, 1.0f, 0 );
static const UINT clearValueUINT[1] = { 0xFFFFFFFF };
pd3dImmediateContext->ClearUnorderedAccessViewUint( pStartOffsetUAV, clearValueUINT );
#pragma endregion
#pragma region update constant buffers
D3D11_MAPPED_SUBRESOURCE MappedResource;
V( pd3dImmediateContext->Map( m_pVS_CB, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ) );
VS_CB* pVS_CB = ( VS_CB* )MappedResource.pData;
D3DXMatrixTranspose( &pVS_CB->m_mWorldViewProjection, pmViewProj );
pd3dImmediateContext->Unmap( m_pVS_CB, 0 );
pd3dImmediateContext->VSSetConstantBuffers( 0, 1, &m_pVS_CB );
#pragma endregion
#pragma region set UAV & RTV
ID3D11UnorderedAccessView* pUAVs[2];
pUAVs[0] = pFragmentLinkUAV;
pUAVs[1] = pStartOffsetUAV;
UINT Indices[] = {0, 0};
pd3dImmediateContext->OMSetRenderTargetsAndUnorderedAccessViews( 0, NULL, pDSV, 0, 2, pUAVs, Indices );
#pragma endregion
#pragma region set VB & render
UINT uStrides = sizeof( SCENE_VERTEX );
UINT uOffsets = 0;
pd3dImmediateContext->IASetVertexBuffers( 0, 1, &m_pVB, &uStrides, &uOffsets );
pd3dImmediateContext->IASetIndexBuffer( NULL, DXGI_FORMAT_R32_UINT, 0 );
pd3dImmediateContext->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP );
pd3dImmediateContext->IASetInputLayout( m_pVertexLayout );
pd3dImmediateContext->VSSetShader( m_pVS, NULL, 0 );
pd3dImmediateContext->PSSetShader( m_pPS, NULL, 0 );
pd3dImmediateContext->Draw( 4, 0 );
uOffsets = 4*sizeof( SCENE_VERTEX );
pd3dImmediateContext->IASetVertexBuffers( 0, 1, &m_pVB, &uStrides, &uOffsets );
pd3dImmediateContext->Draw( 4, 0 );
uOffsets = 8*sizeof( SCENE_VERTEX );
pd3dImmediateContext->IASetVertexBuffers( 0, 1, &m_pVB, &uStrides, &uOffsets );
pd3dImmediateContext->Draw( 4, 0 );
#pragma endregion
The SortAndRenderFragments.cpp (render function)
#pragma region Save the old viewport
D3D11_VIEWPORT vpOld[D3D11_VIEWPORT_AND_SCISSORRECT_MAX_INDEX];
UINT nViewPorts = 1;
pd3dImmediateContext->RSGetViewports( &nViewPorts, vpOld );
#pragma endregion
#pragma region Setup the viewport to match the backbuffer
D3D11_VIEWPORT vp;
vp.Width = static_cast< float >( pBackBufferDesc->Width );
vp.Height = static_cast< float >( pBackBufferDesc->Height );
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
vp.TopLeftX = 0;
vp.TopLeftY = 0;
pd3dImmediateContext->RSSetViewports( 1, &vp );
#pragma endregion
#pragma region setup buffer and render
UINT strides = sizeof( SCREEN_VERTEX );
UINT offsets = 0;
ID3D11Buffer* pBuffers[1] = { m_pVB };
pd3dImmediateContext->IASetInputLayout( m_pQuadLayout );
pd3dImmediateContext->IASetVertexBuffers( 0, 1, pBuffers, &strides, &offsets );
pd3dImmediateContext->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP );
ID3D11ShaderResourceView* aRViews[ 2 ] = { pFragmentLinkSRV, pStartOffsetSRV };
pd3dImmediateContext->PSSetShaderResources( 0, 2, aRViews );
pd3dImmediateContext->VSSetShader( m_pVS, NULL, 0 );
pd3dImmediateContext->PSSetShader( m_pPS, NULL, 0 );
pd3dImmediateContext->Draw( 4, 0 );
#pragma endregion
// Restore the Old viewport
pd3dImmediateContext->RSSetViewports( nViewPorts, vpOld );
The StoreFragments.hlsl, it is the same with original one
#include "OITShaderCommon.h"
cbuffer cbPerObject : register( b0 )
{
matrix g_mWorldViewProjection : packoffset( c0 );
}
struct SceneVS_Input
{
float4 pos : POSITION;
float4 color : COLOR;
};
struct SceneVS_Output
{
float4 pos : SV_POSITION;
float4 color : COLOR0;
};
// Fragment And Link Buffer
RWStructuredBuffer< FragmentLink > FLBuffer : register( u0 );
// Start Offset Buffer
RWByteAddressBuffer StartOffsetBuffer : register( u1 );
//----------------------------Vertex shader---------------------------
SceneVS_Output SceneVS( SceneVS_Input input )
{
SceneVS_Output output;
output.color = input.color;
output.pos = mul(input.pos, g_mWorldViewProjection );
return output;
}
//Store fragments into a linked-list buffer.
[earlydepthstencil]
void StoreFragmentsPS( SceneVS_Output input )
{
uint x = input.pos.x; // [0,g_nFrameWidth]
uint y = input.pos.y; // [0,g_nFrameHeight]
// Create fragment data.
uint4 nColor = saturate( input.color ) * 255;
FragmentLink element;
element.fragmentData.nColor = (nColor.x) | (nColor.y << 8) | (nColor.z << 16) | (nColor.a << 24);
element.fragmentData.fDepth = input.pos.z;
// Increment and get current pixel count.
uint nPixelCount= FLBuffer.IncrementCounter();
// Read and update Start Offset Buffer.
uint nIndex = y * g_nFrameWidth + x;
uint nStartOffsetAddress = 4 * nIndex;
uint nOldStartOffset;
StartOffsetBuffer.InterlockedExchange(
nStartOffsetAddress, nPixelCount, nOldStartOffset );
// Store fragment link.
element.nNext = nOldStartOffset;
FLBuffer[ nPixelCount ] = element;
}
The SortAndRenderFragments.hlsl
#include "OITShaderCommon.h"
StructuredBuffer<FragmentLink> FragmentLinkSRV : register(t0);
Buffer<uint> StartOffsetSRV : register(t1);
struct QuadVSinput
{
float4 pos : POSITION;
};
struct QuadVS_Output
{
float4 pos : SV_POSITION;
};
/**
Draw full screen quad.
*/
QuadVS_Output QuadVS( QuadVSinput Input )
{
QuadVS_Output Output;
Output.pos = Input.pos;
return Output;
}
// Max hardcoded.
// We're in trouble if the fragment linked list is larger than 32...
#define TEMPORARY_BUFFER_MAX 32
/**
Sort and render fragments.
*/
float4 SortFragmentsPS( QuadVS_Output input ) : SV_Target0
{
// index to current pixel.
uint nIndex = (uint)input.pos.y * g_nFrameWidth + (uint)input.pos.x;
FragmentData aData[ TEMPORARY_BUFFER_MAX ]; // temporary buffer
int anIndex[ TEMPORARY_BUFFER_MAX ]; // index array for the tempory buffer
uint nNumFragment = 0; // number of fragments in current pixel's linked list.
uint nNext = StartOffsetSRV[ nIndex ]; // get first fragment from the start offset buffer.
// early exit if no fragments in the linked list.
if( nNext == 0xFFFFFFFF ) {
return float4( 0.0, 0.0, 0.0, 0.0 );
}
// Read and store linked list data to the temporary buffer.
while( nNext != 0xFFFFFFFF )
{
FragmentLink element = FragmentLinkSRV[nNext];
aData[ nNumFragment ] = element.fragmentData;
anIndex[ nNumFragment ] = nNumFragment;
++nNumFragment;
nNext = element.nNext;
}
uint N2 = 1 << (int)(ceil(log2(nNumFragment)));
// bitonic sort implementation needs on pow2 data.
for(int i = nNumFragment; i < N2; i++)
{
anIndex = i;
aData.fDepth = 1.1f;
}
// Unoptimized sorting. (Bitonic Sort)
// loop from Merge( 2 ) to Merge( nCount )
for( int nMergeSize=2; nMergeSize<=N2; nMergeSize=nMergeSize*2 )
{
// Merge( nCount ) requires log2( nCount ) merges. Merge( nCount/2 ) -> Merge( 2 )
for( int nMergeSubSize=nMergeSize>>1; nMergeSubSize>0; nMergeSubSize=nMergeSubSize>>1 )
{
// compare and swap elements
for( int nElem=0; nElem<N2; ++nElem )
{
int nSwapElem = nElem^nMergeSubSize;
// check to make sure to only swap once
if( nSwapElem > nElem )
{
// sort in increasing order
if ( ( nElem & nMergeSize ) == 0 && aData[ anIndex[ nElem ] ].fDepth>aData[ anIndex[ nSwapElem ] ].fDepth )
{
int temp = anIndex[ nElem ];
anIndex[ nElem ] = anIndex[ nSwapElem ];
anIndex[ nSwapElem ] = temp;
}
// sort in descending order
if ( ( nElem & nMergeSize ) != 0 && aData[ anIndex[ nElem ] ].fDepth<aData[ anIndex[ nSwapElem ] ].fDepth )
{
int temp = anIndex[ nElem ];
anIndex[ nElem ] = anIndex[ nSwapElem ];
anIndex[ nSwapElem ] = temp;
}
}
}
}
}
// Now blend the sorted fragments.
float4 result = 0.0f;
for( int x = nNumFragment-1; x >= 0; --x )
{
uint nColor = aData[ anIndex[ x ] ].nColor;
float4 color;
color.r = ( (nColor >> 0) & 0xFF ) / 255.0f;
color.g = ( (nColor >> 8) & 0xFF ) / 255.0f;
color.b = ( (nColor >> 16) & 0xFF ) / 255.0f;
color.a = ( (nColor >> 24) & 0xFF ) / 255.0f;
result = lerp( result, color, color.a );
}
result.a = 1.0f;
return result;
}
Anybody help me, please? Thank in advance.