I have implemented Gaussian Blur using compute shaders and it works good but as I increase the alpha the outcome becomes darker and darker. And I don't know understand why because the weights sum is equal to one.

This is how I calculate weights :

void CalculateWeights()
            weights = new float[2 * BlurRadius+1];

            for (int i = 0; i < weights.Length; i++)
                uint x = (uint)i-BlurRadius;
                weights[i] = (float)Math.Exp(- (  (x*x)/(2*Alpha*Alpha)  )  ) ;

            float sum = 0;

            foreach (float weight in weights)
                sum += weight;

            for (int i = 0; i < weights.Length; i++)
                weights[i] /= sum;


And this is Horizontal Blur shader:

cbuffer Globals
	float weights[2*BlurRadius+1];
	float Width;
	float Height;

Texture2D Input;
RWTexture2D<float4> Output;

SamplerState ClampSampler

	AddressU = clamp;
	AddressV = clamp;

groupshared float4 Cache[ThreadsX+2*BlurRadius];

[numthreads(ThreadsX, 1, 1)]
void main(int3 dispathThreadId:SV_DispatchThreadID,int3 groupThreadId:SV_GroupThreadID)
	//set left pixels
	if (groupThreadId.x < BlurRadius)
		float2 SamplePos = float2((dispathThreadId.x - BlurRadius) / Width, dispathThreadId.y / Height);
		Cache[groupThreadId.x] = Input.SampleLevel(ClampSampler, SamplePos,0);

	//set right pixels
	if ((ThreadsX - groupThreadId.x) <= BlurRadius)
		float2 SamplePos = float2((dispathThreadId.x + BlurRadius) / Width, dispathThreadId.y / Height);
			Cache[groupThreadId.x + 2 * BlurRadius] = Input.SampleLevel(ClampSampler, SamplePos,0);

	float2 SamplePos = float2(dispathThreadId.x / Width, dispathThreadId.y / Height);

	Cache[BlurRadius + groupThreadId.x] = Input.SampleLevel(ClampSampler, SamplePos,0);
	// Wait for all threads to finish sampling

	float4 blurColor = float4(0,0,0,0);

	for (int i = -BlurRadius; i < BlurRadius; i++)
		blurColor += weights[i + BlurRadius] * Cache[groupThreadId.x+i+BlurRadius];

	Output[dispathThreadId.xy] = float4(, 1.0f);

technique11 Tech0
	pass P0
		SetComputeShader(CompileShader(cs_5_0, main()));

And the Vertical Blur Shader:

cbuffer Globals
	float weights[2*BlurRadius+1];
	float Width;
	float Height;

Texture2D Input;
RWTexture2D<float4> Output;

SamplerState ClampSampler

	AddressU = clamp;
	AddressV = clamp;

groupshared float4 Cache[ThreadsY+2*BlurRadius];

[numthreads(1, ThreadsY, 1)]
void main(int3 dispathThreadId:SV_DispatchThreadID,int3 groupThreadId:SV_GroupThreadID)
	//set up pixels
	if (groupThreadId.y < BlurRadius)
		float2 SamplePos = float2(dispathThreadId.x / Width, (dispathThreadId.y - BlurRadius) / Height);
		Cache[groupThreadId.y] = Input.SampleLevel(ClampSampler, SamplePos,0);

	//set down pixels
	if ((ThreadsY - groupThreadId.y) <= BlurRadius)
		float2 SamplePos = float2(dispathThreadId.x / Width, (dispathThreadId.y + BlurRadius) / Height);
			Cache[groupThreadId.y + 2 * BlurRadius] = Input.SampleLevel(ClampSampler, SamplePos,0);

	float2 SamplePos = float2(dispathThreadId.x / Width, dispathThreadId.y / Height);

	Cache[BlurRadius + groupThreadId.y] = Input.SampleLevel(ClampSampler, SamplePos,0);
	// Wait for all threads to finish sampling

	float4 blurColor = float4(0,0,0,0);

	for (int i = -BlurRadius; i < BlurRadius; i++)
		blurColor += weights[i + BlurRadius] * Cache[groupThreadId.y+i+BlurRadius];

	Output[dispathThreadId.xy] = float4(, 1.0f);

technique11 Tech0
	pass P0
		SetComputeShader(CompileShader(cs_5_0, main()));

Any idea why is this happening? The darkening is more clear when Blur radius is a low number such as 2 or 3.

Thanks in advance


I figured it out myself. the for loops shoud be as follows :

for (int i = -BlurRadius; i <= BlurRadius; i++)
blurColor += weights[i + BlurRadius] * Cache[groupThreadId.y+i+BlurRadius];

It needed the i<=BlurRadius

