Jump to content
  • Advertisement
Sign in to follow this  
fries

DX11 RWStructuredBuffer CS Issue

Recommended Posts

Hi,

I'm having problems with a compute shader. I am trying to iteratively traverse a tree structure with multiple Dispatch calls.

Each thread reads in a node from an input StructuredBuffer, and using two "counting" RWStructuredBuffers as outputs, each thread either pushes the children of the current node into the output node buffer for the next iteration, or into a triangle output buffer to draw a triangle.

Looking in RenderDoc, everything works fine for the first few iterations, but then some odd stuff starts happening: The triangle buffer starts to get smaller, even though the only operation is to add triangles to it, and I don't reset the count between iterations.

Then after a few more iterations RenderDoc cant even keep up with what's going on: It starts reporting random counts for the triangle buffer - every time you view the pipeline state for a particular dispatch, the buffer counts are a new random number.

Does anyone have some ideas about what I'm doing wrong?


struct sGPUNodeView
{
	uint mNodeIndex;
	uint mViewIndex;
	uint mObjectInstanceIndex;
	uint mPadding;
};

struct sRingData
{
	uint mReadIndex;
	uint mWriteIndex;
	uint mPadding[2];
};

struct sGPUPackedMeshBVHNode
{
	float4 mSphere;
	uint4 mData;
	
	bool HasObjectInstance()
	{
		return mData.x & 1;
	}
	
	bool HasTriangle()
	{
		return mData.x & 2;
	}
	
	uint3 GetTriangleIndices()
	{
		return mData.yzw;
	}
	
	uint2 GetChildIndices()
	{
		return mData.yz;
	}
	
	uint GetObjectInstance()
	{
		return mData.w;
	}
	
	float4 GetSphere()
	{
		return mSphere;
	}
};

struct sPointSplatData
{
	float3 mPoisition;
	uint mViewIndex;
	uint mObjectInstanceIndex;
	uint mPadding[3];
};

struct sTriangleSplatData
{
	uint3 mIndices;
	uint mViewIndex;
	uint mObjectInstanceIndex;
	uint mPadding[3];
};


StructuredBuffer<sGPUNodeView> NodeViewBuffer;									// Input Node-View Buffer
globallycoherent RWStructuredBuffer<sGPUNodeView> OutNodeViewBuffer;			// Output Node-View Buffer
StructuredBuffer<sGPUPackedMeshBVHNode> BVHNodeBuffer;							// BVH Node Buffer
StructuredBuffer<float4x4> ViewBuffer;
globallycoherent RWStructuredBuffer<sPointSplatData> PointSplatBuffer;			// Output points, not really used at the moment
globallycoherent RWStructuredBuffer<sTriangleSplatData> TriangleSplatBuffer;	// Output Triangles
globallycoherent RWBuffer<uint> IndirectDispatchArgs;							// Indirect args for the next dispatch
Buffer<uint> IterationDataRead;													// Stores the number of nodes to process for the current iteration
RWBuffer<uint> IterationDataWrite;												// Stores the number of nodes to process for the next iteration

static const uint gTotalNumThreads = 64;
groupshared uint gNumAppendNodeViews[gTotalNumThreads];							// Used in parallel reduction to figure out how many items were added to OutNodeViewBuffer

// Append item to OutNodeViewBuffer
uint AppendNodeView(uint NodeIndex, uint ViewIndex, uint ObjectInstanceIndex)
{
	uint index = 0;
	if (NodeIndex != 0xffffffff)
	{		
		index = OutNodeViewBuffer.IncrementCounter();

		sGPUNodeView nodeView = (sGPUNodeView)0;
		nodeView.mNodeIndex = NodeIndex;
		nodeView.mViewIndex = ViewIndex;
		nodeView.mObjectInstanceIndex = ObjectInstanceIndex;
		OutNodeViewBuffer[index] = nodeView;
		
		// Needs to output the number, not the index.
		index++;
	}
	
	return index;
}

// No culling yet - should do frustum culling here
bool ShouldCullNodeView(sGPUNodeView nodeView, sGPUPackedMeshBVHNode node, float4x4 view)
{
	return false;
}

// This appends the node to either the point splat output buffer or the triangle output buffer
// Point splats are not used yet
void DrawNodeView(sGPUNodeView nodeView, sGPUPackedMeshBVHNode node, float4x4 view)
{
	if (node.HasTriangle())
	{
		uint index = TriangleSplatBuffer.IncrementCounter();
		
		sTriangleSplatData splat = (sTriangleSplatData)0;
		splat.mIndices = node.GetTriangleIndices();
		splat.mViewIndex = nodeView.mViewIndex;
		splat.mObjectInstanceIndex = nodeView.mObjectInstanceIndex;
		TriangleSplatBuffer[index] = splat;
	}
	else
	{
		uint index = PointSplatBuffer.IncrementCounter();

		sPointSplatData splat = (sPointSplatData)0;
		splat.mPoisition = node.GetSphere();
		splat.mViewIndex = nodeView.mViewIndex;
		splat.mObjectInstanceIndex = nodeView.mObjectInstanceIndex;
		PointSplatBuffer[index] = splat;
	}
}

// Approximate the node's screen space size, not used yet, return large number so
// no point splats occur.
float ApproximateProjectedSize(float4 Sphere, float4x4 View)
{
	return 5;
}

// Should this Node-View be drawn or further processed?
bool ShouldDrawNodeView(sGPUNodeView nodeView, sGPUPackedMeshBVHNode node, float4x4 view)
{
	// Nodes with triangles must be drawn
	if (node.HasTriangle())
	{
		return true;
	}
	else
	{		
		// If the node is too small, draw it and stop processing it's children
		float approximateProjectedSize = ApproximateProjectedSize(node.GetSphere(), view);
		return approximateProjectedSize <= 1.0f;
	}
}

[numthreads(gTotalNumThreads, 1, 1)]
void ProcessNodeViews_main(uint3 dispatchThreadID : SV_DispatchThreadID, uint GroupIndex : SV_GroupIndex, uint GroupID : SV_GroupID)
{
	gNumAppendNodeViews[GroupIndex] = 0;
	
	// Dont process more nodes than what is in the node-view buffer (IterationDataRead[0])
	if (dispatchThreadID.x < IterationDataRead[0])
	{		
		sGPUNodeView nodeView = NodeViewBuffer[dispatchThreadID.x];
		if (nodeView.mViewIndex != 0xffffffff) // this would be an error condition, and shouldnt really fail
		{
			// Retreive the node and view
			sGPUPackedMeshBVHNode node = BVHNodeBuffer[nodeView.mNodeIndex];
			float4x4 view = ViewBuffer[nodeView.mViewIndex];
			
			if (!ShouldCullNodeView(nodeView, node, view))
			{	
				if (ShouldDrawNodeView(nodeView, node, view))
				{
					DrawNodeView(nodeView, node, view);
				}
				else
				{		
					if (node.HasObjectInstance()) // This should never be true for now
					{
						nodeView.mObjectInstanceIndex = node.GetObjectInstance();
					}
						
					// Append the child node-views to the OutNodeViewBuffer
					uint2 children = node.GetChildIndices();
					uint maxNodes1 = AppendNodeView(children.x, nodeView.mViewIndex, nodeView.mObjectInstanceIndex);
					uint maxNodes2 = AppendNodeView(children.y, nodeView.mViewIndex, nodeView.mObjectInstanceIndex);
					
					// Store the maximum item number that is stored in the OutNodeViewBuffer
					gNumAppendNodeViews[GroupIndex] = max(maxNodes1, maxNodes2);
				}
			}
		}
	}
			
	// Parallel reduction of maximum item number in OutNodeViewBuffer
	[unroll(gTotalNumThreads)]
	for(uint s = gTotalNumThreads / 2; s > 0; s >>= 1)
	{
		if(GroupIndex < s)
		{
			gNumAppendNodeViews[GroupIndex] = max(gNumAppendNodeViews[GroupIndex], gNumAppendNodeViews[GroupIndex + s]);
		}
		
		GroupMemoryBarrierWithGroupSync();
	}

	// Have the first thread write out the dispatch args, and number of nodes for the next iteration
	if(GroupIndex == 0)
	{
		InterlockedMax(IndirectDispatchArgs[0], (gNumAppendNodeViews[0] + 63) >> 6);
		InterlockedMax(IterationDataWrite[0], gNumAppendNodeViews[0]);
	}
}

 

Share this post


Link to post
Share on other sites
Advertisement

It seems that after about 16 iterations, the value that it writes into IterationDataWrite[0] is 252 but the OutNodeViewBuffer count is only 228... These values should be the same...

Share this post


Link to post
Share on other sites

Ah! I found it...

InterlockedMax(IterationDataWrite[0], gNumAppendNodeViews[0]);

That value is never set to 0, so it just keeps maxing and maxing

>.>
<.<

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
Sign in to follow this  

  • Advertisement
×

Important Information

By using GameDev.net, you agree to our community Guidelines, Terms of Use, and Privacy Policy.

GameDev.net is your game development community. Create an account for your GameDev Portfolio and participate in the largest developer community in the games industry.

Sign me up!