Help me and others solve Real-Time Glow once and for all (screens inside)

212

Author

October 23, 2004 04:16 PM

I have tried so many different ways of solving this problem. All to produce the same result, that result being the problem of Gaussian blur only affecting the model being affected. A Picture below to better depict this idea:

I applied the tron-like glow. On the right, the mask generated from a rgb * a computation in a pixel shader. This is pre-processed before the whole scene.


PS_OUTPUT_NMAPL psCreateMask(PS_INPUT_MASK i)
{
	PS_OUTPUT_NMAPL o;
	// Sample Base Texture
	half4 vDiffuseTex = tex2D(tDiffuseMap, i.vTex0);
	o.vColor = vDiffuseTex * vDiffuseTex.a;
	return(o);
}

The following is the "bloom" or "real-time glow shaders"


texture tAlphaMask0;

half fTexelIncrement;
half fTexelIncrementx2;
half fTexelIncrementx3;
half fTexelIncrementx4;

/*static const float fBlurWeights[4] = 
{
    0.026995,
    0.064759,
    0.120985,
    0.176033,
};*/

sampler2D tAlphaMap0 = sampler_state
{
Texture   = <tAlphaMask0>;
MinFilter = ANISOTROPIC;
MagFilter = LINEAR;
MipFilter = LINEAR;
};

struct VS_INPUT_GLOW		 
{
	float4  vPosition		 : POSITION;
	float4  vColor			 : COLOR0;
    half2   vTex0			 : TEXCOORD0;	// Base Texture
};
struct VS_OUTPUT_GLOW
{
	float4  vPosition    : POSITION;
    half2   vTex0        : TEXCOORD0;	
    half2   vTex1		 : TEXCOORD1;
    half2   vTex2		 : TEXCOORD2;
    half2   vTex3		 : TEXCOORD3;
    half2   vTex4		 : TEXCOORD4;	
    half2   vTex5        : TEXCOORD5;	
    half2   vTex6		 : TEXCOORD6;
    half2   vTex7		 : TEXCOORD7;
};
struct PS_INPUT_GLOW
{
    half2   vTex0        : TEXCOORD0;	
    half2   vTex1		 : TEXCOORD1;
    half2   vTex2		 : TEXCOORD2;
    half2   vTex3		 : TEXCOORD3;
    half2   vTex4		 : TEXCOORD4;	
    half2   vTex5        : TEXCOORD5;	
    half2   vTex6		 : TEXCOORD6;
    half2   vTex7		 : TEXCOORD7;
};
struct PS_OUTPUT_GLOW
{
	float4  vColor		 : COLOR0;
};
VS_OUTPUT_GLOW vsGlowV(VS_INPUT_GLOW i)
{
	VS_OUTPUT_GLOW o;
	// transform position from world space into view and then projection space
    o.vPosition = float4(i.vPosition.xyz,1.0f);
    
	// Vertical Convultion
    o.vTex0 = half2(i.vTex0.x, i.vTex0.y + fTexelIncrement);
    o.vTex1 = half2(i.vTex0.x, i.vTex0.y + fTexelIncrementx2);
    o.vTex2 = half2(i.vTex0.x, i.vTex0.y - fTexelIncrement);
    o.vTex3 = half2(i.vTex0.x, i.vTex0.y - fTexelIncrementx2);
    
	o.vTex4 = half2(i.vTex0.x, i.vTex0.y + fTexelIncrementx3);
    o.vTex5 = half2(i.vTex0.x, i.vTex0.y + fTexelIncrementx4);
    o.vTex6 = half2(i.vTex0.x, i.vTex0.y - fTexelIncrementx3);
    o.vTex7 = half2(i.vTex0.x, i.vTex0.y - fTexelIncrementx4);
        
    return(o);
}
VS_OUTPUT_GLOW vsGlowH(VS_INPUT_GLOW i)
{
	VS_OUTPUT_GLOW o;
	// transform position from world space into view and then projection space
    o.vPosition = float4(i.vPosition.xyz,1.0f);
     
    // Horizontal Convultion
    o.vTex0 = half2(i.vTex0.x + fTexelIncrement,   i.vTex0.y);
    o.vTex1 = half2(i.vTex0.x + fTexelIncrementx2, i.vTex0.y);
    o.vTex2 = half2(i.vTex0.x - fTexelIncrement,   i.vTex0.y);
    o.vTex3 = half2(i.vTex0.x - fTexelIncrementx2, i.vTex0.y);
    
    o.vTex4 = half2(i.vTex0.x + fTexelIncrementx3, i.vTex0.y);
    o.vTex5 = half2(i.vTex0.x + fTexelIncrementx4, i.vTex0.y);
    o.vTex6 = half2(i.vTex0.x - fTexelIncrementx3, i.vTex0.y);
    o.vTex7 = half2(i.vTex0.x - fTexelIncrementx4, i.vTex0.y);
    
    return(o);
}
PS_OUTPUT_GLOW psGlow(PS_INPUT_GLOW i)
{
	PS_OUTPUT_GLOW o;
	// Sample Base Texture
	half4 vAlphaTex = tex2D(tAlphaMap0, i.vTex0);
	
	// Convultion
	vAlphaTex.rgb += tex2D(tAlphaMap0, i.vTex0).rgb;
	vAlphaTex.rgb += tex2D(tAlphaMap0, i.vTex1).rgb;
	vAlphaTex.rgb += tex2D(tAlphaMap0, i.vTex2).rgb;
	vAlphaTex.rgb += tex2D(tAlphaMap0, i.vTex3).rgb;
	
	o.vColor = vAlphaTex;
	
	return(o);
};

technique t0
{
    pass p0
    {
		VertexShader = compile vs_1_1 vsGlowH();
		PixelShader = compile ps_2_0 psGlow();
    }
    pass p1
    {
		VertexShader = compile vs_1_1 vsGlowV();
		PixelShader = compile ps_2_0 psGlow();
    }
}

I have gone through many different sources of information, Nvidia's Real-Time glow source code for the little space ship, tried to model my implementation of it. I have also looked @ microsoft's implementation. But to no avail have I been able to solve this problem. I have tried using separate render-targets for each seperable convolution. Different render-states, etc. Nothing has fixed the problem of the blur being "clipped" if you will, by the extents of the model geometry. Is this related to the alpha testing of the hardware? What am I doing wrong. Following here is the code I use in DirectX 9.0c to actually render the scene:


// Render Pipeline
__inline void cSilicon::Render()
{
	m_pd3dDevice->GetRenderTarget(0,&m_cEffects.getRenderTargetBackBuffer());
	m_pd3dDevice->GetDepthStencilSurface( &m_cEffects.getStencilBackBuffer() );
	// Pre-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture )
	m_pd3dDevice->SetRenderTarget(0,m_cEffects.getRenderTarget());
	m_pd3dDevice->SetDepthStencilSurface(m_cEffects.getStencilPP());
	m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER | D3DCLEAR_STENCIL, 0x00000000, 1.0f, 0L );
	// Begin the scene - ** Draw Front to Back
    m_pd3dDevice->BeginScene();
	m_pd3dDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_TRUE );
	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW );
	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, FALSE );
	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );

	m_cMesh.getClient()->getSkinnedModel().RenderAlphaMask();
	// End the scene
    m_pd3dDevice->EndScene();
	
	// Render the Actual Scene
	m_pd3dDevice->SetRenderTarget(0,m_cEffects.getRenderTargetBackBuffer());
	m_pd3dDevice->SetDepthStencilSurface(m_cEffects.getStencilBackBuffer());
	// Clear the backbuffer
    m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0x00000000, 1.0f, 0L );
	m_pd3dDevice->BeginScene();

	if ( m_cMesh.getClient()->getRigidModel() )
		m_cMesh.getClient()->getRigidModel()->Render();
	else
		m_cMesh.getClient()->getSkinnedModel().Render();

	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsSkinned().size() ; ++iD )
		m_cMesh.getModelsSkinned()[iD].RenderFrustum();

	// All Other Models
	// Lighting OFF, Z <ON>, Alpha Blending <OFF> ( cg Shaders, 3D Objects w/o Lighting )
	for ( unsigned int iD = 0 ; iD < m_cMesh.getModels().size() ; ++iD )
		m_cMesh.getModels()[iD].RenderFrustum();
	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsRigid().size() ; ++iD )
		m_cMesh.getModelsRigid()[iD].RenderFrustum();
	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsAnimRigid().size() ; ++iD )
		m_cMesh.getModelsAnimRigid()[iD].RenderFrustum();
		
	// 2D Stuff Here
	m_pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,FALSE);
	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE );
	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, FALSE );
	// Sprites -> Rendered Alpha Blending ON
	m_cSprite.BeginBatch();
	m_cSprite.Render(m_cEffects.getGlowRenderTargetTex(),D3DXVECTOR3(256.0f,0.0f,0.0f));
	m_cSprite.EndBatch();

	// Re-Enable RenderStates
	m_pd3dDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, TRUE );
    m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );

	DebugOut();

	// Post-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture ) requires ps2.0
	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_FALSE );
	m_pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,FALSE);
	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_NONE );
	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE );
	m_pd3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
	m_pd3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);

	m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetTechnique(m_cEffects.getShader()[GLOW].m_pd3dxTechnique[0]);
	unsigned int numPasses;
	m_cEffects.getShader()[GLOW].m_pd3dxEffect->Begin( &numPasses, D3DXFX_DONOTSAVESTATE|D3DXFX_DONOTSAVESHADERSTATE );

	m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetTexture( m_cEffects.getShader()[GLOW].m_pd3dxHandle[GLMASKTEX0],
															m_cEffects.getGlowRenderTargetTex() );
	for ( unsigned int uiX = 0 ; uiX < numPasses ; ++uiX )
	{
		if ( uiX == 0 )
		{
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrement",   (0.5f / m_cEffects.getRTResoltion(0)) );
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx2", (0.5f / m_cEffects.getRTResoltion(0)) * 2.0f);
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx3", (0.5f / m_cEffects.getRTResoltion(0)) * 3.0f);
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx4", (0.5f / m_cEffects.getRTResoltion(0)) * 4.0f);
		}
		else
		{
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrement",   (0.5f / m_cEffects.getRTResoltion(1)) );
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx2", (0.5f / m_cEffects.getRTResoltion(1)) * 2.0f);
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx3", (0.5f / m_cEffects.getRTResoltion(1)) * 3.0f);
			m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetFloat( "fTexelIncrementx4", (0.5f / m_cEffects.getRTResoltion(1)) * 4.0f);
		}

		m_cEffects.getShader()[GLOW].m_pd3dxEffect->BeginPass( uiX );
		// Draw the Screen-Aligned Quad
		m_pd3dDevice->SetVertexDeclaration(m_cEffects.getPPDecl());
		m_pd3dDevice->SetStreamSource( 0, m_cEffects.getQuadPostProcess(), 0, sizeof(PPVERT) );
		m_pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);
		m_cEffects.getShader()[GLOW].m_pd3dxEffect->EndPass();
	}
	m_cEffects.getShader()[GLOW].m_pd3dxEffect->End();

	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );
	// End the scene
    m_pd3dDevice->EndScene();
	
	// After Present, Render States are Reset to Defaults
	// Present the backbuffer contents to the display
    m_pd3dDevice->Present( NULL, NULL, NULL, NULL );
}

I hope someone will look at what I've done, and I'm sure its simple. Please help me and others interested in this technique to solve this problem! Thank you! [Edited by - Carandiru on October 23, 2004 4:34:53 PM]

poly-gone

148

October 23, 2004 11:06 PM

Using a smaller render target for the blurring (something like 1/8 x 1/8 the original) should do the trick. Expanding the blur render target to the screen size will automatically result in color bleeding.

Carandiru

212

Author

November 06, 2004 04:56 PM

Well I have changed the render target to be 256x256, I have also discussed with a professional game developer my problem, and he suggested trying to draw a plane under the character, and applying the same method. Well it works for the plane under the character rather well. A Screenshot, and latest build download below:

I'm wondering if there is something strange going on with my blur, its very standard. I have updated the code a bit from the last post (Skinning specific shader first, all other models next for creation of mask) - Same final postprocessing shader for glow for both.

VS_OUTPUT_MASK vsCreateMask(VS_INPUT_MASK i){	VS_OUTPUT_MASK o;		float4      netPos = 0.0f;			// .w Holds Last Accumlated Weight Value         // Compensate for lack of UBYTE4 on Geforce3    int4 vIndexVector = D3DCOLORtoUBYTE4(i.vBlendIndices);        // calculate the Position/normal using the "normal" weights     // and accumulate the weights to calculate the last weight	// This is The Last Accumulated Weight Value    netPos.w += i.vBlendWeights[0];            netPos.xyz += mul(i.vPosition, uBoneMatrixArrayL[vIndexVector[0]]) * i.vBlendWeights[0];    // Calculate the Weight    netPos.w = 1.0f - netPos.w;     // Now that we have the calculated weight, add in the final influence    netPos.xyz += (mul(i.vPosition, uBoneMatrixArrayL[vIndexVector[0]]) * netPos.w);	          	        	// transform position from world space into view and then projection space    o.vPosition = mul(float4(netPos.xyz, 1.0f), uViewProjL);        o.vTex0 = i.vTex0;        return(o);}PS_OUTPUT_NMAPL psCreateMask(PS_INPUT_MASK i){	PS_OUTPUT_NMAPL o;	// Sample Base Texture	half4 vDiffuseTex = tex2D(tAlphaSourceSampler, i.vTex0);	o.vColor = vDiffuseTex * vDiffuseTex.a;	return(o);}///////////////////////////////////////////////////////// Skinning Mask Creation for  Real-Time Glow		 /////////////////////////////////////////////////////////technique t4{	pass p0    {        VertexShader = compile vs_1_1 vsCreateMask();        PixelShader = compile ps_1_1 psCreateMask();    }}

Generic Mask Creation & Post Processing Glow/Blur

texture tPostProcess;texture tAlphaSource;half fTexelIncrement;half fTexelIncrementx2;half fTexelIncrementx3;half fTexelIncrementx4;half4x4    uViewProjL : VIEWPROJECTION;#define WT9_0 1.0#define WT9_1 0.8#define WT9_2 0.6#define WT9_3 0.4#define WT9_4 0.2#define WT9_NORMALIZE (WT9_0+2.0*(WT9_1+WT9_2+WT9_3+WT9_4))sampler2D tAlphaSourceSampler = sampler_state{Texture   = <tAlphaSource>;AddressU = CLAMP;AddressV = CLAMP;MinFilter = POINT;MagFilter = LINEAR;MipFilter = POINT;};sampler2D tPostProcessSampler = sampler_state{Texture   = <tPostProcess>;AddressU = CLAMP;AddressV = CLAMP;MinFilter = POINT;MagFilter = LINEAR;MipFilter = NONE;};// Alpha Mask Pre-Processing Semanticsstruct VS_INPUT_MASK{    float4  vPosition		 : POSITION;    half2   vTex0			 : TEXCOORD0;	// Base Texture};struct VS_OUTPUT_MASK{	float4  vPosition		 : POSITION;	half2   vTex0			 : TEXCOORD0;	};struct PS_INPUT_MASK{	half2   vTex0			 : TEXCOORD0;};struct PS_OUTPUT_MASK{	half4 vColor		  : COLOR0;};// Glow Post-Processing Semanticsstruct VS_INPUT_GLOW		 {	float4  vPosition		 : POSITION;    half2   vTex0			 : TEXCOORD0;	// Base Texture};struct VS_OUTPUT_GLOW{	float4  vPosition    : POSITION;    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;    half2   vTex4		 : TEXCOORD4;	    half2   vTex5        : TEXCOORD5;	    half2   vTex6		 : TEXCOORD6;    half2   vTex7		 : TEXCOORD7;};struct PS_INPUT_GLOW{    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;    half2   vTex4		 : TEXCOORD4;	    half2   vTex5        : TEXCOORD5;	    half2   vTex6		 : TEXCOORD6;    half2   vTex7		 : TEXCOORD7;};struct PS_OUTPUT_GLOW{	float4  vColor		 : COLOR0;};VS_OUTPUT_GLOW vsGlowV(VS_INPUT_GLOW i){	VS_OUTPUT_GLOW o;	// transform position from world space into view and then projection space    o.vPosition = float4(i.vPosition.xyz,1.0f);        half vCoord = i.vTex0.y + fTexelIncrement;	// Vertical Convultion    o.vTex0 = half2(i.vTex0.x, vCoord + fTexelIncrement);    o.vTex1 = half2(i.vTex0.x, vCoord + fTexelIncrementx2);    o.vTex2 = half2(i.vTex0.x, vCoord - fTexelIncrement);    o.vTex3 = half2(i.vTex0.x, vCoord - fTexelIncrementx2);    	o.vTex4 = half2(i.vTex0.x, vCoord + fTexelIncrementx3);    o.vTex5 = half2(i.vTex0.x, vCoord + fTexelIncrementx4);    o.vTex6 = half2(i.vTex0.x, vCoord - fTexelIncrementx3);    o.vTex7 = half2(i.vTex0.x, vCoord - fTexelIncrementx4);            return(o);}VS_OUTPUT_GLOW vsGlowH(VS_INPUT_GLOW i){	VS_OUTPUT_GLOW o;	// transform position from world space into view and then projection space    o.vPosition = float4(i.vPosition.xyz,1.0f);         half vCoord = i.vTex0.x + fTexelIncrement;    // Horizontal Convultion    o.vTex0 = half2(vCoord + fTexelIncrement,   i.vTex0.y);    o.vTex1 = half2(vCoord + fTexelIncrementx2, i.vTex0.y);    o.vTex2 = half2(vCoord - fTexelIncrement,   i.vTex0.y);    o.vTex3 = half2(vCoord - fTexelIncrementx2, i.vTex0.y);        o.vTex4 = half2(vCoord + fTexelIncrementx3, i.vTex0.y);    o.vTex5 = half2(vCoord + fTexelIncrementx4, i.vTex0.y);    o.vTex6 = half2(vCoord - fTexelIncrementx3, i.vTex0.y);    o.vTex7 = half2(vCoord - fTexelIncrementx4, i.vTex0.y);        return(o);}PS_OUTPUT_GLOW psGlow(PS_INPUT_GLOW i){	PS_OUTPUT_GLOW o;	// Sample Base Texture	half4 vAlphaTex = tex2D(tPostProcessSampler, i.vTex0);		// Convultion	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex0).rgb * (WT9_1/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex1).rgb * (WT9_2/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex2).rgb * (WT9_3/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex3).rgb * (WT9_4/WT9_NORMALIZE);		vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex4).rgb * (WT9_1/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex5).rgb * (WT9_2/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex6).rgb * (WT9_3/WT9_NORMALIZE);	vAlphaTex.rgb += tex2D(tPostProcessSampler, i.vTex7).rgb * (WT9_3/WT9_NORMALIZE);	o.vColor = vAlphaTex;		return(o);};VS_OUTPUT_MASK vsCreateMask(VS_INPUT_MASK i){	VS_OUTPUT_MASK o;	      	        	// transform position from world space into view and then projection space    o.vPosition = mul(float4(i.vPosition.xyz, 1.0f), uViewProjL);        o.vTex0 = i.vTex0;        return(o);}PS_OUTPUT_MASK psCreateMask(PS_INPUT_MASK i){	PS_OUTPUT_MASK o;	// Sample Base Texture	half4 vDiffuseTex = tex2D(tAlphaSourceSampler, i.vTex0);	o.vColor = vDiffuseTex * vDiffuseTex.a;	return(o);}///////////////////////////////////////////////////////// Mask Creation for Real-Time Glow					 /////////////////////////////////////////////////////////technique t0{	pass p0    {        VertexShader = compile vs_1_1 vsCreateMask();        PixelShader = compile ps_1_1 psCreateMask();    }}///////////////////////////////////////////////////////// Real-Time Glow PostProcessing Technique			 /////////////////////////////////////////////////////////technique t1{    pass p0    {		cullmode = none;		ZEnable = false;		VertexShader = compile vs_1_1 vsGlowH();		PixelShader = compile ps_2_0 psGlow();    }    pass p1    {		cullmode = none;		ZEnable = false;		VertexShader = compile vs_1_1 vsGlowV();		PixelShader = compile ps_2_0 psGlow();    }}

This is both the mask creation during the pre-processing step for the final glow source texture, and the blur and glow code.

PreProcessing Step:

__inline void cSilicon::PreProcessGlow(){	// Pre-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture )	m_pd3dDevice->SetRenderTarget(0,m_cEffects.getRenderTarget());	m_pd3dDevice->SetDepthStencilSurface(m_cEffects.getStencilPP());	m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER | D3DCLEAR_STENCIL, 0x00000000, 1.0f, 0L );	// Begin the scene - ** Draw Front to Back    m_pd3dDevice->BeginScene();	m_pd3dDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_TRUE );	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW );	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, FALSE );	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );		// Skinned Models	m_cEffects.BeginPreProcessMaskSkin();	m_cMesh.getClient()->getSkinnedModel().RenderAlphaMask();	m_cEffects.EndPreProcessMaskSkin();	// Everything Else	m_cEffects.BeginPreProcessMask();	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsAnimRigid().size() ; ++iD )		m_cMesh.getModelsAnimRigid()[iD].RenderAlphaMask();	m_cEffects.EndPreProcessMask();			// End the scene    m_pd3dDevice->EndScene();}

PostProcessing Step:

__inline void cSilicon::PostProcessGlow(){	unsigned int numPasses;	// Post-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture ) requires ps2.0	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_FALSE );	m_pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,FALSE);	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_NONE );	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE );	m_pd3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);	m_pd3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);	m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetTechnique(m_cEffects.getShader()[GLOW].m_pd3dxTechnique[GLTECHGLOW]);	m_cEffects.getShader()[GLOW].m_pd3dxEffect->SetTexture( m_cEffects.getShader()[GLOW].m_pd3dxHandle[GLPOSTTEX],															m_cEffects.getGlowRenderTargetTex() );	m_cEffects.getShader()[GLOW].m_pd3dxEffect->Begin( &numPasses, D3DXFX_DONOTSAVESTATE|D3DXFX_DONOTSAVESHADERSTATE );	for ( unsigned int uiX = 0 ; uiX < numPasses ; ++uiX )	{		m_cEffects.getShader()[GLOW].m_pd3dxEffect->BeginPass( uiX );		// Draw the Screen-Aligned Quad		m_pd3dDevice->SetVertexDeclaration(m_cEffects.getPPDecl());		m_pd3dDevice->SetStreamSource( 0, m_cEffects.getQuadPostProcess(), 0, sizeof(PPVERT) );		m_pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);		m_cEffects.getShader()[GLOW].m_pd3dxEffect->EndPass();	}	m_cEffects.getShader()[GLOW].m_pd3dxEffect->End();	m_pd3dDevice->SetVertexShader(NULL);	m_pd3dDevice->SetPixelShader(NULL);	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );}

Main Project Render Function:

// Render Pipeline__inline void cSilicon::Render(){	PreProcessGlow();	// Render the Actual Scene	m_pd3dDevice->SetRenderTarget(0,m_cEffects.getRenderTargetBackBuffer());	m_pd3dDevice->SetDepthStencilSurface(m_cEffects.getStencilBackBuffer());	// Clear the backbuffer    m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0x00000000, 1.0f, 0L );	m_pd3dDevice->BeginScene();	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );	if ( m_cMesh.getClient()->getRigidModel() )		m_cMesh.getClient()->getRigidModel()->Render();	else		m_cMesh.getClient()->getSkinnedModel().Render();	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsSkinned().size() ; ++iD )		m_cMesh.getModelsSkinned()[iD].RenderFrustum();	// All Other Models	// Lighting OFF, Z <ON>, Alpha Blending <OFF> ( cg Shaders, 3D Objects w/o Lighting )	for ( unsigned int iD = 0 ; iD < m_cMesh.getModels().size() ; ++iD )		m_cMesh.getModels()[iD].RenderFrustum();	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsRigid().size() ; ++iD )		m_cMesh.getModelsRigid()[iD].RenderFrustum();	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsAnimRigid().size() ; ++iD )		m_cMesh.getModelsAnimRigid()[iD].RenderFrustum();	PostProcessGlow();	// 2D Stuff Here	m_pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,FALSE);	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE );	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, FALSE );	DebugOut();	// Sprites -> Rendered Alpha Blending ON	m_cSprite.BeginBatch();	m_cSprite.Render(m_cEffects.getGlowRenderTargetTex(),D3DXVECTOR3(544.0f,0.0f,0.0f));	m_cSprite.EndBatch();	// End the scene    m_pd3dDevice->EndScene();		// After Present, Render States are Reset to Defaults	// Present the backbuffer contents to the display    m_pd3dDevice->Present( NULL, NULL, NULL, NULL );}

With the latest screenshot above, I have highlighted the areas to give a better description of what is happening. I was told using a MAGFILTER of bilinear would solve this, seems to work for the plane, but not the mesh!

To better see the problem, download a build of the project in this state. ps 2.0 is required.

Download Better Visualization of Problem

Some controls:
SPACE - Change Render Mode
ENTER - Ragdoll Physics (In Progress...)
ARROWKEYS - MoveMent
MOUSE - Camera Move

Thank you, this is very frustrating!

[Edited by - Carandiru on November 6, 2004 5:56:19 PM]

Carandiru

212

Author

November 07, 2004 01:21 PM

Doesn't this look smashing!!

Thought I would put up the solution, heres the code:
Shaders:

texture tPostProcess;texture tAlphaSource;half4x4  uViewProjL : VIEWPROJECTION;half    fUvOffsetToUse;   // which set of uv offsets to use; should be 0, 1, 2, 3, or 4.half4   fUvBase[20];      // index = 4*effect[0..4] + texstage[0..3]#define WT9_0 1.0#define WT9_1 0.8#define WT9_2 0.6#define WT9_3 0.4#define WT9_4 0.2#define WT9_NORMALIZE (WT9_0+2.0*(WT9_1+WT9_2+WT9_3+WT9_4))sampler2D tAlphaSourceSampler = sampler_state{Texture   = <tAlphaSource>;AddressU = CLAMP;AddressV = CLAMP;MinFilter = POINT;MagFilter = LINEAR;MipFilter = POINT;};sampler2D tPostProcessSampler = sampler_state{Texture   = <tPostProcess>;AddressU = CLAMP;AddressV = CLAMP;MinFilter = POINT;MagFilter = LINEAR;MipFilter = NONE;};// Alpha Mask Pre-Processing Semanticsstruct VS_INPUT_MASK{    float4  vPosition		 : POSITION;    half2   vTex0			 : TEXCOORD0;	// Base Texture};struct VS_OUTPUT_MASK{	float4  vPosition		 : POSITION;	half2   vTex0			 : TEXCOORD0;	};struct PS_INPUT_MASK{	half2   vTex0			 : TEXCOORD0;};struct PS_OUTPUT_MASK{	half4 vColor		  : COLOR0;};// Glow Post-Processing Semanticsstruct VS_INPUT_GLOW		 {	float4  vPosition		 : POSITION;    half2   vTex0			 : TEXCOORD0;	// Base Texture};struct VS_OUTPUT_GLOW_11{	float4  vPosition    : POSITION;    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;};struct PS_INPUT_GLOW_11{    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;};struct VS_OUTPUT_GLOW_20{	float4  vPosition    : POSITION;    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;    half2   vTex4		 : TEXCOORD4;	    half2   vTex5        : TEXCOORD5;	    half2   vTex6		 : TEXCOORD6;    half2   vTex7		 : TEXCOORD7;};struct PS_INPUT_GLOW_20{    half2   vTex0        : TEXCOORD0;	    half2   vTex1		 : TEXCOORD1;    half2   vTex2		 : TEXCOORD2;    half2   vTex3		 : TEXCOORD3;    half2   vTex4		 : TEXCOORD4;	    half2   vTex5        : TEXCOORD5;	    half2   vTex6		 : TEXCOORD6;    half2   vTex7		 : TEXCOORD7;};struct PS_OUTPUT_GLOW{	half4  vColor		 : COLOR0;};VS_OUTPUT_GLOW_11 vsGlow11(VS_INPUT_GLOW i){	VS_OUTPUT_GLOW_11 o;	// transform position from world space into view and then projection space    o.vPosition = float4(i.vPosition.xyz,1.0f);             // Read which set of offsets to use    int offset = (int)fUvOffsetToUse * 4;        o.vTex0 = i.vTex0 + fUvBase[offset].xy;    o.vTex1 = i.vTex0 + fUvBase[offset+1].xy;    o.vTex2 = i.vTex0 + fUvBase[offset+2].xy;    o.vTex3 = i.vTex0 + fUvBase[offset+3].xy;        return(o);}PS_OUTPUT_GLOW psGlow11(PS_INPUT_GLOW_11 i){	PS_OUTPUT_GLOW o;	// Sample Base Texture	o.vColor = tex2D(tPostProcessSampler, i.vTex0) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex1) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex2) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex3) * 0.25;    return(o);}VS_OUTPUT_GLOW_20 vsGlow20(VS_INPUT_GLOW i){	VS_OUTPUT_GLOW_20 o;	// transform position from world space into view and then projection space    o.vPosition = float4(i.vPosition.xyz,1.0f);             // Read which set of offsets to use    int offset = (int)fUvOffsetToUse * 4;        o.vTex0 = i.vTex0 + fUvBase[offset].xy;    o.vTex1 = i.vTex0 + fUvBase[offset+1].xy;    o.vTex2 = i.vTex0 + fUvBase[offset+2].xy;    o.vTex3 = i.vTex0 + fUvBase[offset+3].xy;	o.vTex4 = i.vTex0 + fUvBase[offset].xy;    o.vTex5 = i.vTex0 + fUvBase[offset+1].xy;    o.vTex6 = i.vTex0 + fUvBase[offset+2].xy;    o.vTex7 = i.vTex0 + fUvBase[offset+3].xy;        return(o);}PS_OUTPUT_GLOW psGlow20(PS_INPUT_GLOW_20 i){	PS_OUTPUT_GLOW o;	// Sample Base Texture	o.vColor = tex2D(tPostProcessSampler, i.vTex0) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex1) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex2) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex3) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex4) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex5) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex6) * 0.25;	o.vColor += tex2D(tPostProcessSampler, i.vTex7) * 0.25;    return(o);}VS_OUTPUT_MASK vsCreateMask(VS_INPUT_MASK i){	VS_OUTPUT_MASK o;	      	        	// transform position from world space into view and then projection space    o.vPosition = mul(float4(i.vPosition.xyz, 1.0f), uViewProjL);        o.vTex0 = i.vTex0;        return(o);}PS_OUTPUT_MASK psCreateMask(PS_INPUT_MASK i){	PS_OUTPUT_MASK o;	// Sample Base Texture	half4 vDiffuseTex = tex2D(tAlphaSourceSampler, i.vTex0);	o.vColor = vDiffuseTex * vDiffuseTex.a;	return(o);}VS_OUTPUT_MASK vsPassThru(VS_INPUT_MASK i){	VS_OUTPUT_MASK o;	      	        	// transform position from world space into view and then projection space    o.vPosition = float4(i.vPosition.xyz, 1.0f);    o.vTex0 = i.vTex0;        return(o);}///////////////////////////////////////////////////////// Mask Creation for Real-Time Glow					 /////////////////////////////////////////////////////////technique t0{	pass p0    {        VertexShader = compile vs_1_1 vsCreateMask();        PixelShader = compile ps_1_1 psCreateMask();    }}technique t1{	pass p0    {		ZEnable = False;        CullMode = None;		VertexShader = compile vs_1_1 vsPassThru();		PixelShader  =         asm        {            ; Declare pixel shader version            ps.1.1                        ; just sample a texture and output that color            tex t0            mov r0, t0        };                Sampler[0] = <tPostProcessSampler>;	}}///////////////////////////////////////////////////////// Real-Time Glow PostProcessing Technique ps1.1  	 /////////////////////////////////////////////////////////technique t2{    pass p0    {		ZEnable = False;        CullMode = None;        VertexShader = compile vs_1_1 vsGlow11();        PixelShader  = compile ps_1_1 psGlow11();    }}///////////////////////////////////////////////////////// Real-Time Glow PostProcessing Technique ps2.0 	 /////////////////////////////////////////////////////////technique t3{    pass p0    {		ZEnable = False;        CullMode = None;        VertexShader = compile vs_1_1 vsGlow20();        PixelShader  = compile ps_2_0 psGlow20();    }}

CPP Source:

if ( !CreateShader("Shaders\\Glow.fx", 3, 3) )	{		MessageBox(hWnd,"cEffects::CreateShader FAILED - RealTime Glow","projectSilicon",MB_ICONERROR);		return(false);	}		if ( m_bShader20 )		m_uiGlowPasses = 3;	// Glow Alpha Source + 2 Convultion Filter RT's	else		m_uiGlowPasses = 5;	// Glow Alpha Source + 4 Convultion Filter RT's	m_ptexRenderTarget = new LPDIRECT3DTEXTURE9[m_uiGlowPasses];	m_pRT			   = new IDirect3DSurface9*[m_uiGlowPasses];	// Glow Alpha Source + 2 Convultion Filter RT's	for ( unsigned int uiX = 0 ; uiX < m_uiGlowPasses ; ++uiX )	{		if ( FAILED ( m_pd3dDevice->CreateTexture( RTRES, RTRES, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8,												D3DPOOL_DEFAULT, &m_ptexRenderTarget[uiX], NULL )))		{			MessageBox(hWnd,"cEffects::CreateTexture FAILED - Glow RenderTarget","projectSilicon",MB_ICONERROR);			return(false);		}		m_ptexRenderTarget[uiX]->GetSurfaceLevel( 0, &m_pRT[uiX] );	}	// Screen-sized quad	// Create vertex declaration for post-process    if( FAILED( m_pd3dDevice->CreateVertexDeclaration( vertDeclPP, &m_pVertDeclPP ) ) )		return(false);		if ( FAILED ( m_pd3dDevice->CreateVertexBuffer(  4 * sizeof(PPVERT), 													D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, 													D3DFVF_PPVERT, D3DPOOL_DEFAULT, &m_pQuadPostProcess, NULL ) ) )		return(false);	PPVERT* pSrc(NULL);	float fU_adjust = 0.5f / (float)m_cD3D.getPP().BackBufferWidth;    float fV_adjust = 0.5f / (float)m_cD3D.getPP().BackBufferWidth;	if ( SUCCEEDED (m_pQuadPostProcess->Lock(0,4*sizeof(PPVERT),(VOID**)&pSrc,0)))	{		for ( unsigned int i = 0; i < 4; ++i)        {            pSrc->Position = D3DXVECTOR3((i==0 || i==3) ? -1.0f : 1.0f,                                          (i<2)          ? -1.0f : 1.0f,                                          0.0f);            pSrc->Tex      = D3DXVECTOR2(((i==0 || i==3) ? 0.0f : 1.0f) + fU_adjust,                                           ((i<2)          ? 1.0f : 0.0f) + fV_adjust);            pSrc++;        }		m_pQuadPostProcess->Unlock();	}	else		return(false);	if ( !SetParameterOptions_RealTimeGlow( m_vcFXShader.back().m_pd3dxEffect, m_vcFXShader.back().m_pd3dxHandle,											m_vcFXShader.back().m_pd3dxTechnique ) )	{		MessageBox(hWnd,"cEffects::SetParameterOptions_RealTimeGlow FAILED - VertexSkinning","projectSilicon",MB_ICONERROR);		return(false);	}bool cEffects::SetParameterOptions_RealTimeGlow( LPD3DXEFFECT const& pCurEffect, D3DXHANDLE* const& pCurHandles,												 D3DXHANDLE* const& pCurTechniques ){	// Current Diffuse Texture for Alpha Mask Technique to Sample	if ( !(pCurHandles[GLPRETEX] = pCurEffect->GetParameterByName(NULL, "tAlphaSource")) )		return(false);	// Alpha Mask Texture Sampler 	if ( !(pCurHandles[GLPOSTTEX] = pCurEffect->GetParameterByName(NULL, "tPostProcess")) )		return(false);	// ViewProj Matrix (Half Precision)	if ( !(pCurHandles[GLVIEWPROJL] = pCurEffect->GetParameterByName(NULL, "uViewProjL")) )		return(false);	CreateAndWriteUVOffsets(RTRES,RTRES,pCurEffect);	// Alpha Mask - PreProcessing	if ( !(pCurTechniques[GLTECHMASK] = pCurEffect->GetTechnique(GLTECHMASK)) )		return(false);	if ( m_bShader20 )	{		// Glow - PostProcessing (ps2.0)		if ( !(pCurTechniques[GLTECHGLOW] = pCurEffect->GetTechnique(3)) )			return(false);	}	else	{		// Glow - PostProcessing (ps1.1)		if ( !(pCurTechniques[GLTECHGLOW] = pCurEffect->GetTechnique(2)) )			return(false);	}		// Passthru Quad Simple Shader	if ( !(pCurTechniques[GLTECHPASS] = pCurEffect->GetTechnique(GLTECHPASS)) )		return(false);	m_pd3dDevice->GetRenderTarget(0,&m_pRTBackBuffer);	m_pd3dDevice->GetDepthStencilSurface( &m_pStencilBackBuffer );	return(true);}void cEffects::CreateAndWriteUVOffsets(int width, int height, LPD3DXEFFECT const& pCurEffect){    // displace texture-uvs so that the sample points on the     // texture describe     // i)   a square around the texel to sample.    //      the edges of the square are distance s from the center texel.    //      Due to bilinear filtering and application of equal weights (1/4)     //      in the pixel shader, the following filter is implemented for the 9 samples    //          abc    //          def    //          ghi:    //      filtered pixel = (s*s)/4 (a+c+g+i) + (s-s*s)/2 (b+d+f+h) + (1-s)^2 e    //         Thus, choosing s = 0 means no filtering (also no offsets)    //      s = 2/3 results in an equally weighted, 9-sample box-filter (and is called    //      type4) and s = 1/2 results in a circular cone-filter (and is called type1).    // ii) a square around the texel to sample, so as to include sixteen texels:    //          abcd    //          efgh    //          ijkl    //          mnop    //      Center texel is assumed to be "j", and offsets are made so that the texels    //      are the combinations of (a, b, e, f), (c, d, g, h), (i, j, m, n), and     //      (k, l, o, p)    // iii) A quad-sample filter:    //         a    //         b    //        cde    //      Center texel is "b" and sampled dead center.  The second sample is     //      dead-center "a", and the last two samples are interpolations between    //      (c,d) and (d,e).  Connecting the samples with the center pixel should     //      produce three lines that measure the same angle (120 deg) between them.    //      This sampling pattern may be rotated around "b".    // first the easy one: no offsets    float const     noOffsetX[4] = { 0.0f, 0.0f, 0.0f, 0.0f};     float const     noOffsetY[4] = { 0.0f, 0.0f, 0.0f, 0.0f};    float const     kPerTexelWidth  = 1.0f/static_cast<float>(width);    float const     kPerTexelHeight = 1.0f/static_cast<float>(height);    float           s               = 0.5f;    float const     eps             = 10.0e-4f;    float const     rotAngle1       = D3DXToRadian( 0.0f );    float const     rotAngle2       = rotAngle1 + D3DXToRadian(120.0f);     float const     rotAngle3       = rotAngle1 + D3DXToRadian(240.0f);     // Change filter kernel for 9-sample box filtering, but for edge-detection we are     // going to use interpolated texels.  Why?  Because we detect diagonal edges only    // and the vertical and horizontal filtering seems to help.            float const type1OffsetX[4] = { -s * kPerTexelWidth,                                     -s * kPerTexelWidth,                                       s * kPerTexelWidth,                                        s * kPerTexelWidth  };    float const type1OffsetY[4] = { -s * kPerTexelHeight,                                      s * kPerTexelHeight,                                      s * kPerTexelHeight,                                     -s * kPerTexelHeight };    // we have to bring the 16 texel-sample-filter a bit closer to the center to avoid     // separation due to floating point inaccuracies.    float const type2OffsetX[4] = { -1 * kPerTexelWidth + eps,                                      -1 * kPerTexelWidth + eps,                                     1.0f * kPerTexelWidth - eps,                                     1.0f * kPerTexelWidth - eps };    float const type2OffsetY[4] = { -1 * kPerTexelHeight+ eps,                                     1.0f * kPerTexelHeight- eps,                                     1.0f * kPerTexelHeight- eps,                                     -1 * kPerTexelHeight+ eps };    float const type3OffsetX[4] = {0.0f,  sinf(rotAngle1)*kPerTexelWidth,                                            sinf(rotAngle2)*kPerTexelWidth,                                            sinf(rotAngle3)*kPerTexelWidth  };    float const type3OffsetY[4] = {0.0f, -cosf(rotAngle1)*kPerTexelHeight,                                          -cosf(rotAngle2)*kPerTexelHeight,                                          -cosf(rotAngle3)*kPerTexelHeight };    s = 2.0f/3.0f;      // same as type 1, except s is different    float const type4OffsetX[4] = { -s * kPerTexelWidth,                                     -s * kPerTexelWidth,                                       s * kPerTexelWidth,                                        s * kPerTexelWidth  };    float const type4OffsetY[4] = { -s * kPerTexelHeight,                                      s * kPerTexelHeight,                                      s * kPerTexelHeight,                                     -s * kPerTexelHeight };    // write all these offsets to constant memory    for (int i = 0; i < 4; ++i)    {        D3DXVECTOR4  noOffset(      noOffsetX,    noOffsetY, 0.0f, 0.0f);        D3DXVECTOR4  type1Offset(type1OffsetX, type1OffsetY, 0.0f, 0.0f);        D3DXVECTOR4  type2Offset(type2OffsetX, type2OffsetY, 0.0f, 0.0f);        D3DXVECTOR4  type3Offset(type3OffsetX, type3OffsetY, 0.0f, 0.0f);        D3DXVECTOR4  type4Offset(type4OffsetX, type4OffsetY, 0.0f, 0.0f);        // helpful comment:        // the first 4 UvBase vectors are the 4 texture stage u/v's for "no-offset" sampling.        // the next 4 UvBase vectors are the 4 texture stage u/v's for 9-sample box filter sampling,        // and so on.        char str[64];        sprintf(str, "fUvBase[%d]", i     );         pCurEffect->SetVector(str, &noOffset);        sprintf(str, "fUvBase[%d]", i +  4);         pCurEffect->SetVector(str, &type1Offset);        sprintf(str, "fUvBase[%d]", i +  8);         pCurEffect->SetVector(str, &type2Offset);        sprintf(str, "fUvBase[%d]", i + 12);         pCurEffect->SetVector(str, &type3Offset);        sprintf(str, "fUvBase[%d]", i + 16);         pCurEffect->SetVector(str, &type4Offset);    }}void cEffects::PreProcessGlow(){	// Pre-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture )	m_pd3dDevice->SetRenderTarget(0,m_pRT[0]);	m_pd3dDevice->SetDepthStencilSurface(NULL);	m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET, 0x00000000, 1.0f, 0L );	m_pd3dDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_TRUE );	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW );	m_pd3dDevice->SetRenderState( D3DRS_LIGHTING, FALSE );	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );		// Everything Else	m_vcFXShader[GLOW].m_pd3dxEffect->SetTechnique(m_vcFXShader[GLOW].m_pd3dxTechnique[GLTECHMASK]);	m_vcFXShader[GLOW].m_pd3dxEffect->Begin( NULL, D3DXFX_DONOTSAVESTATE|D3DXFX_DONOTSAVESHADERSTATE );	for ( unsigned int iD = 0 ; iD < m_cMesh.getModelsAnimRigid().size() ; ++iD )		m_cMesh.getModelsAnimRigid()[iD].RenderAlphaMask();	m_vcFXShader[GLOW].m_pd3dxEffect->End();	m_pd3dDevice->SetVertexShader(NULL);	m_pd3dDevice->SetPixelShader(NULL);	// Skinned Models	// Set View Projection Matrix	m_cEffects.getShader()[SKINNING].m_pd3dxEffect->SetMatrix( m_vcFXShader[SKINNING].m_pd3dxHandle[SKVIEWPROJL],															   &m_matViewProj );	m_vcFXShader[SKINNING].m_pd3dxEffect->SetTechnique(m_vcFXShader[SKINNING].m_pd3dxTechnique[SKTECHMASK]);	m_vcFXShader[SKINNING].m_pd3dxEffect->Begin( NULL, D3DXFX_DONOTSAVESTATE|D3DXFX_DONOTSAVESHADERSTATE );		m_cMesh.getClient()->getSkinnedModel().RenderAlphaMask();		m_vcFXShader[SKINNING].m_pd3dxEffect->End();	m_pd3dDevice->SetVertexShader(NULL);	m_pd3dDevice->SetPixelShader(NULL);}void cEffects::PostProcessGlow(){	// Post-Processing Alpha Mask rendering for Real-Time Glow ( Render to Texture ) requires ps2.0	m_pd3dDevice->SetVertexDeclaration(m_pVertDeclPP);	m_pd3dDevice->SetStreamSource( 0, m_pQuadPostProcess, 0, sizeof(PPVERT) );	m_pd3dDevice->SetRenderState( D3DRS_ZENABLE, D3DZB_FALSE );	m_pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,FALSE);	m_pd3dDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_NONE );	float fOffset = 4.0f;	m_vcFXShader[GLOW].m_pd3dxEffect->SetValue("fUvOffsetToUse", &fOffset, sizeof(float));		m_vcFXShader[GLOW].m_pd3dxEffect->SetTechnique(m_vcFXShader[GLOW].m_pd3dxTechnique[GLTECHGLOW]);		for ( unsigned int uiX = 1 ; uiX < m_uiGlowPasses ; ++uiX )	{		m_pd3dDevice->SetRenderTarget(0,m_pRT[uiX]);		m_pd3dDevice->SetDepthStencilSurface(NULL);		m_pd3dDevice->Clear( 0L, NULL, D3DCLEAR_TARGET, 0x00000000, 1.0f, 0L );		m_vcFXShader[GLOW].m_pd3dxEffect->SetTexture( m_vcFXShader[GLOW].m_pd3dxHandle[GLPOSTTEX],													  m_ptexRenderTarget[uiX-1] );		m_vcFXShader[GLOW].m_pd3dxEffect->Begin( NULL, 0 );		m_vcFXShader[GLOW].m_pd3dxEffect->BeginPass( 0 );		// Draw the Screen-Aligned Quad		m_pd3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, 0, 2);		m_vcFXShader[GLOW].m_pd3dxEffect->EndPass();		m_vcFXShader[GLOW].m_pd3dxEffect->End();	}		m_pd3dDevice->SetVertexShader(NULL);	m_pd3dDevice->SetPixelShader(NULL);	m_pd3dDevice->SetRenderTarget(0,m_pRTBackBuffer);	m_pd3dDevice->SetDepthStencilSurface(m_pStencilBackBuffer);		m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE );	m_pd3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);	m_pd3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);	m_vcFXShader[GLOW].m_pd3dxEffect->SetTechnique(m_vcFXShader[GLOW].m_pd3dxTechnique[GLTECHPASS]);		for ( unsigned int uiX = 1 ; uiX < m_uiGlowPasses ; ++uiX )	{		m_vcFXShader[GLOW].m_pd3dxEffect->SetTexture( m_vcFXShader[GLOW].m_pd3dxHandle[GLPOSTTEX],													  m_ptexRenderTarget[uiX] );		m_vcFXShader[GLOW].m_pd3dxEffect->Begin( NULL, 0 );		m_vcFXShader[GLOW].m_pd3dxEffect->BeginPass( 0 );		// Draw the Screen-Aligned Quad		m_pd3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, 0, 2);		m_vcFXShader[GLOW].m_pd3dxEffect->EndPass();		m_vcFXShader[GLOW].m_pd3dxEffect->End();	}	m_pd3dDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, FALSE );}

And if anyone is interested I have uploaded the latest build, test it out, tell me if it works. Requires Shader Model 1.1 or greater.
projectSiliconBuild.rar - Glow Test