Hi.
This is how you can clamp out the samples/ pixels at the borders.
Just to make sure we're safe on copyright etc., this is strongly based on the Blur shader implementation in the Frank D Luna D3D11 book.
[numthreads(N, 1, 1)]
void HorzBlurCS(int3 groupThreadID : SV_GroupThreadID,
int3 dispatchThreadID : SV_DispatchThreadID)
{
// fill local thread storage to reduce bandwidth. Blur N N pixels, needs N + 2 * BlurRadius for Blur radius
// thread group runs N threads. To get the extra 2*BlurRadius pixels, have 2*BlurRadius threads sample an extra pixel.
if(groupThreadID.x < gBlurRadius)
{
// clamp out of bound samples that occur at image borders
int x = max(dispatchThreadID.x - gBlurRadius, 0);
gCache[groupThreadID.x] = gInput[int2(x, dispatchThreadID.y)];
}
if(groupThreadID.x >= N-gBlurRadius)
{
// clamp out of bound samples that occur at image borders
int x = min(dispatchThreadID.x + gBlurRadius, gInput.Length.x-1);
gCache[groupThreadID.x+2*gBlurRadius] = gInput[int2(x, dispatchThreadID.y)];
}
// clamp out of bound samples that occur at image borders
gCache[groupThreadID.x+gBlurRadius] = gInput[min(dispatchThreadID.xy, gInput.Length.xy-1)];
// wait for all threads to finish.
GroupMemoryBarrierWithGroupSync();
// Now blur each pixel.
float4 blurColor = float4(0, 0, 0, 0);
[unroll]
for(int i = -gBlurRadius; i <= gBlurRadius; ++i)
{
int k = groupThreadID.x + gBlurRadius + i;
blurColor += gWeights[i+gBlurRadius]*gCache[k];
}
gOutput[dispatchThreadID.xy] = blurColor;
}