Not Quite "Real Time" Ambient Oclussion!!! Help!!!!!

Started by
8 comments, last by Jason Z 15 years, 3 months ago
well finally started learning how to use shaders and render to target and all that(its been 11 days since i started learning directx :)) and so decided to make an "SSAO" shader (if this is one) im not sure how exactly ssao is implemented but i do know it uses the z buffer so here is my attempt at fake AO: i first load the depth map into a texture and then on a subsequent pass with i sample the z values of the surrounding pixels with a particular radius and calculaate their effects(4 samples for this) and then i subsequently double the radius and again randomly sample 4 pixels with slightly different settings as to how they affect the shade of this pixel. hence a total of 4+4 = 8 samples per pixel... for the randomness of the sampling i use a random noise texture created in GIMP.. to ensure that the values are within a definite radius i use fmod.. However the problem now is it is VERY slow (like around 15-20 fps one quarter of the screen is full) and around 3-4 fps when the entire screen is filled with geometry !!(im rendering at a res of 1024x1024 and the random noise texture dimensions are also the same btw)... any ideas how to improve efficiency??? also i have a question... is it possible to modify the data in a texture pixel by pixel instead of rendering geometry, calling the vertex shader etc?? in other words could i directly "pipe" data from a texture to pixel shader so that it will operate on it on a per pixel basis?? thanks in advance!!
struct ZVertexToPixel
{
	float4 Position     : POSITION;    
	float4 Position2D    : TEXCOORD0;
};

struct ZPixelToFrame
{
	float4 Color : COLOR0;
};

struct AOVertexToPixel
{
	float4 Position     : POSITION;    
	float4 Position2D    : TEXCOORD0;
};

struct AOPixelToFrame
{
	float4 Color : COLOR0;
};

float4x4 xViewProjection;
Texture xFrame;
Texture xRand;
float xMaxDepth=40.0f;


sampler ColoredTextureSampler = sampler_state { texture = <xFrame> ; magfilter 

= POINT; minfilter = POINT; mipfilter=POINT; AddressU = mirror; AddressV = 

mirror;};

sampler Rand = sampler_state { texture = <xRand> ; magfilter = POINT; 

minfilter = POINT; mipfilter=POINT; AddressU = mirror; AddressV = mirror;};

ZVertexToPixel Z_VS( float4 inPos : POSITION)
{
	ZVertexToPixel Output = (ZVertexToPixel)0;
	Output.Position = mul(inPos, xViewProjection);
	Output.Position2D = Output.Position;

	return Output;    
}

ZPixelToFrame Z_PS(ZVertexToPixel PSIn)
{
	ZPixelToFrame Output = (ZPixelToFrame)0;

	Output.Color = PSIn.Position2D.w/xMaxDepth;

	return Output;
}

AOVertexToPixel AO_VS( float4 inPos : POSITION)
{
	AOVertexToPixel Output = (AOVertexToPixel)0;
	Output.Position = mul(inPos, xViewProjection);
	Output.Position2D = Output.Position;

	return Output;    
}

AOPixelToFrame AO_PS(AOVertexToPixel PSIn)
{
	AOPixelToFrame Output = (AOPixelToFrame)0;
	float2 ProjectedTexCoords;
	ProjectedTexCoords.x = PSIn.Position2D.x/PSIn.Position2D.w/2.0f +0.5f;
	ProjectedTexCoords.y= -PSIn.Position2D.y/PSIn.Position2D.w/2.0f +0.5f;
	float4 Temp=0;
	int i;
	int j;
	float2 Temp2;
	Temp2=ProjectedTexCoords;

	float4 b=tex2D(ColoredTextureSampler,ProjectedTexCoords);

	//With preset Radius
	for(i=-1;i<2;i++)
	{
		for(j=-1;j<2;j++)
		{
			Temp2.y+=i*fmod(tex2D(Rand,ProjectedTexCoords+i*cos(i)*0.233),0.04);
			Temp2.x+=j*fmod(tex2D(Rand,ProjectedTexCoords+i/sin(j*0.3455)),0.04);
			float a=length(b)-(length(tex2D(ColoredTextureSampler,Temp2)));
			if(a>0.001&&a<0.27)
				Temp+=b-tex2D(ColoredTextureSampler,Temp2);
		}

	}
	Temp2=ProjectedTexCoords;

	//With double of Preset Radius
	for(i=-1;i<2;i++)
	{
		for(j=-1;j<2;j++)
		{
			Temp2.y+=2*i*fmod(tex2D(Rand,ProjectedTexCoords+i*cos(i)*0.233),0.04);
			Temp2.x+=2*j*fmod(tex2D(Rand,ProjectedTexCoords+i/sin(j*0.3455)),0.04);
			float a=length(b)-(length(tex2D(ColoredTextureSampler,Temp2)));
			if(a>0.002&&a<0.4)
				Temp+=b-tex2D(ColoredTextureSampler,Temp2);
		}

	}

	Output.Color = 1-Temp/1.4;
	return Output;
}



technique RTAO
{
	pass Pass0
	{
		VertexShader = compile vs_3_0 Z_VS();
		PixelShader = compile ps_3_0 Z_PS();
	}
	pass Pass1
	{
		VertexShader = compile vs_3_0 AO_VS();
		PixelShader = compile ps_3_0 AO_PS();
	}

}

here are some pics: without double radius samples: (only 4 samples) Free Image Hosting at www.ImageShack.us<br /><br />QuickPost Free Image Hosting at www.ImageShack.us<br /><br />QuickPost Free Image Hosting at www.ImageShack.us<br /><br />QuickPost Free Image Hosting at www.ImageShack.us<br /><br />QuickPost With the double radius samples : (4+4=8 samples) Free Image Hosting at www.ImageShack.us<br /><br />QuickPost Free Image Hosting at www.ImageShack.us<br /><br />QuickPost with 8 samples plus selective gaussian blur in GIMP(probably what it would look like after a blur pass) Free Image Hosting at www.ImageShack.us<br /><br />QuickPost PS: Im running this on a geforce 6600 vanilla... what kind of performance can i expect from it anyway??
Advertisement
Someone Please Help!!
Come on guys !!! PLzz help!!
I think with your childish "OMFG hlepz pls!!one" and your try to raise attention by posting that every hour you just disqualified yourself. The probable reason why nobody helps at the moment is that, voila, nobody *can* help at the moment (maybe asleep, at work, or, as for me, lack of knowledge).

Bump your threads, if you want. But then just every 1-3 days, otherwise most people get pissed off, especially because of style of writing.
um i think u saw the times but not the date... those two messages were posted more than just an "hour" apart... as for the childish tone ,well i aint exactly 40 am i?? anyway will try to post better..
Quote:um i think u saw the times but not the date...
Quote:Posted - 12/17/2008 3:32:24 PM
Quote:Posted - 12/17/2008 4:47:05 PM


--

Quote:well i aint exactly 40 am i??

I guessed so.

Some hints:

u -> you
!!!!one -> !
?????// -> ?
Thanks in forward! -> <nothing> (it implies you are not going to answer replies)
plz -> Please

As a rule of thumb, start sentences with uppercase letters (though this is not a deep requirement). Never write ALL CAPS ON (it's like yelling). A good alternative is to write in italics, or to make something more outstanding, in bold letters.

But: You're already doing good with the paragraphs.

Fear not, worse examples exist ;)
Also if you are new to DirectX and have only been learning and trying for about 11 days I'd suggest reading a bit more about DirectX itself and learning more about it's internal workings before heading to more difficult subjects such as SSAO and other kinds of shader techniques. But that's just my opinion.
Quote:Original post by rohith291991
...
i first load the depth map into a texture and then on a subsequent pass with i sample the z values of the surrounding pixels with a particular radius and calculaate their effects(4 samples for this) and then i subsequently double the radius and again randomly sample 4 pixels with slightly different settings as to how they affect the shade of this pixel. hence a total of 4+4 = 8 samples per pixel... for the randomness of the sampling i use a random noise texture created in GIMP.. to ensure that the values are within a definite radius i use fmod..
...
With the double radius samples : (4+4=8 samples)

PS: Im running this on a geforce 6600 vanilla... what kind of performance can i expect from it anyway??


There are two loops, each samples texture 3-4 times in inner loop thus giving you total number of samples much higher than 8.

// 3 times
for(i=-1;i<2;i++)
{
// 3 times
for(j=-1;j<2;j++)
{
// 2 texture lookups
Temp2.y+=i*fmod(tex2D(Rand,ProjectedTexCoords+i*cos(i)*0.233),0.04);
Temp2.x+=j*fmod(tex2D(Rand,ProjectedTexCoords+i/sin(j*0.3455)),0.04);

// one more texture lookup
float a=length(b)-(length(tex2D(ColoredTextureSampler,Temp2)));

// and even one more
if(a>0.001&&a<0.27)
Temp+=b-tex2D(ColoredTextureSampler,Temp2);
}
}

So basically, you have 3x3 iterations, 3-4 samples per iteration ~= 27-36 samples, two times... which is somewhere between 54 and 72 tex2Ds per pixel!

chris
And those samples are complex dependent reads... on an ancient card like GeForce 6 this shader is a recipe for slowness(it would be slow on anything though), unless you minimize the sample numbers.. most SSAO methods only do about 8-16 samples.
If you are interested in SSAO, take a look at the environmental section of the D3D10 book linked in my signature. There is a chapter on SSAO, with a sample implementation provided for you to reference and compare with your own.

This topic is closed to new replies.

Advertisement