Vertex Texture Fetch SM3.0

Started by
5 comments, last by Tispe 11 years, 7 months ago
Hi

I have trouble getting VTF to work. What I have is a texture (pAnimationTexture) created in the default pool that will be sampled by the vertex shader. And I have another texture (pTempTexture) created in the systemmem pool, otherwise they are identical. The format is D3DFMT_A32B32G32R32F.

I have a test matrix I want to get to the vertex shader without using the Float Constant Registers (once I get this working I will start passing arrays of skinning data this way). This test matrix I fill with values such that it will be recognizeable in PIX.

I lock the texture located in the systemmem pool and memcpy the matrix into the texture, then unlock.

I use the IDirect3DDevice9::UpdateTexture method on the two textures since locking won't work on the texture in the default pool. This transfers the matrix to the texture which I sample.

My problem is that I don't get the right value when debugging in PIX. I expect to get my values but instead the first four floats of the sample is "r2 (0.000, 0.000, 0.000, 1.000) float4"

Hope someone here can see my errors.

VS:

// Vertex shader input structure
struct VS_INPUT
{
float4 Position : POSITION;
float2 Texture : TEXCOORD0;
};

// Vertex shader output structure
struct VS_OUTPUT
{
float4 Position : POSITION;
float2 Texture : TEXCOORD0;
};

// Global variables
float4x4 World;
float4x4 ViewProj;
sampler2D Tex1 : register(s1);
VS_OUTPUT vs_main( in VS_INPUT In )
{
VS_OUTPUT Out; //create an output vertex
float4x4 WorldViewProj = mul(World, ViewProj);
Out.Position = mul(In.Position, WorldViewProj); //apply vertex transformation
Out.Texture = In.Texture; //copy original texcoords
float4 SampleBone = tex2Dlod(Tex1, float4(0.5/480.0, 0.5/100.0, 0, 0));
float temp2 = SampleBone[0] + SampleBone[1] + SampleBone[2] + SampleBone[3];
Out.Texture.y += temp2 * 0.00001;
return Out; //return output vertex
}



// Parameters:
//
// sampler2D Tex1;
// float4x4 ViewProj;
// float4x4 World;
//
//
// Registers:
//
// Name Reg Size
// ------------ ----- ----
// World c0 4
// ViewProj c4 4
// Tex1 s1 1
//
vs_3_0
def c8, 0.00104166672, 0.00499999989, 0, 9.99999975e-006
dcl_position v0 // In<0,1,2,3>
dcl_texcoord v1 // In<4,5>
dcl_2d s1
dcl_position o0
dcl_texcoord o1.xy
mov r0, c0 // ::World<0,4,8,12>
mul r1, r0, c4.x
mov r2, c1 // ::World<1,5,9,13>
mul r3, r2, c4.y
add r1, r1, r3
mov r3, c2 // ::World<2,6,10,14>
mul r4, r3, c4.z
add r1, r1, r4
mov r4, c3 // ::World<3,7,11,15>
mul r5, r4, c4.w
add r1, r1, r5 // ::WorldViewProj<0,4,8,12>
mul r5, r0, c5.x
mul r6, r2, c5.y
add r5, r5, r6
mul r6, r3, c5.z
add r5, r5, r6
mul r6, r4, c5.w
add r5, r5, r6 // ::WorldViewProj<1,5,9,13>
mul r6, r0, c6.x
mul r7, r2, c6.y
add r6, r6, r7
mul r7, r3, c6.z
add r6, r6, r7
mul r7, r4, c6.w
add r6, r6, r7 // ::WorldViewProj<2,6,10,14>
mul r0, r0, c7.x
mul r2, r2, c7.y
add r0, r0, r2
mul r2, r3, c7.z
add r0, r0, r2
mul r2, r4, c7.w
add r0, r0, r2 // ::WorldViewProj<3,7,11,15>
dp4 r1.x, v0, r1 // ::Out<0>
dp4 r1.y, v0, r5 // ::Out<1>
dp4 r1.z, v0, r6 // ::Out<2>
dp4 r1.w, v0, r0 // ::Out<3>
mov r0.xy, v1 // ::Out<4,5>
texldl r2, c8.xyzz, s1 // ::SampleBone<0,1,2,3>
add r0.z, r2.y, r2.x
add r0.z, r2.z, r0.z
add r0.z, r2.w, r0.z // ::temp2<0>
mul r0.z, r0.z, c8.w
add r0.y, r0.z, r0.y // ::Out<5>
mov o0, r1 // ::vs_main<0,1,2,3>
mov o1.x, r0.x // ::vs_main<4>
mov o1.y, r0.y // ::vs_main<5>
// approximately 47 instruction slots used (2 texture, 45 arithmetic)


Before render:

hr = pTempTexture->LockRect(0,&rc,NULL,0); //Might pass flag D3DLOCK_DISCARD
if(hr==D3D_OK)
{
memcpy(rc.pBits, pTestMatrix, sizeof(D3DXMATRIX));
pTempTexture->UnlockRect(0);
}

pGraphicsDevice->GetDevice()->UpdateTexture(pTempTexture, pAnimationTexture);

pGraphicsDevice->GetDevice()->SetTexture(1, pAnimationTexture);
pGraphicsDevice->GetDevice()->SetSamplerState(1, D3DSAMP_MAXANISOTROPY, 1); // anisotropic level
pGraphicsDevice->GetDevice()->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT); // minification
pGraphicsDevice->GetDevice()->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT); // magnification
pGraphicsDevice->GetDevice()->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE); // No mipmaps
Advertisement
I would check the texture itself in PIX and see if it has values you expect, or just (0, 0, 0, 1) everywhere. You can't directly view the floating point values and instead just see colors, but that should be enough to determine if the data you're setting is getting through.

It's been a while but when I used to do this in D3D9 I used to use a dynamic texture instead, and just Lock that directly. Dynamic textures should be optimized for the use case of CPU->GPU data flow.
"D3DUSAGE_DYNAMIC and D3DPOOL_MANAGED are incompatible and should not be used together".

"Textures created with D3DPOOL_DEFAULT are not lockable. Textures created in video memory are lockable when created with USAGE_DYNAMIC."

"Video memory textures cannot be locked, but must be modified by calling IDirect3DDevice9::UpdateSurface or IDirect3DDevice9::UpdateTexture. There are exceptions for some proprietary driver pixel formats that Direct3D 9 does not recognize. These can be locked."

What does this mean? Does the second statement make exception to the first statement?
That documentation is confusing; AFAIK it should say -- Textures created with D3DPOOL_DEFAULT are not lockable except when created with USAGE_DYNAMIC.
The code I use to calculate texture creation arguments is:DWORD usage = 0;
usage |= cpuWrite ? D3DUSAGE_DYNAMIC : 0;
usage |= gpuWrite && !isDepth ? D3DUSAGE_RENDERTARGET : 0;
usage |= isDepth ? D3DUSAGE_DEPTHSTENCIL : 0;
D3DPOOL pool = (cpuWrite||gpuWrite||isDepth) ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED;


Also, you need to use [font=courier new,courier,monospace]CheckDeviceFormat[/font] with [font=courier new,courier,monospace]D3DUSAGE_QUERY_VERTEXTEXTURE[/font] to see what texture formats are compatible with VTF on your GPU.
IIRC, some earlier GPUs will only support FP16, while others will only support FP32, etc...
It's a requirement that all SM3.0 GPU's do support VTF of some kind, but there's even a few rare ATI cards that say they support VTF, but then do not return OK for any texture formats with the above test!
Thank you, I will try this.
Ok, I updated to using a dynamic texture which I lock directly.

Even thugh I don't get the values in PIX that I wanted from sampling; the first 4 pixels are grey, and they shold be because all the floats have are the same value. If I change the first 4 floats to some other number, they get lighter, gray if I keep them all the same. And if I change the 4 floats to different values I get color on the pixel.

So yes, I do think I get the texture updated with the right values, but I can't get those values out to the shader!

texldl r2, c8.xyzz, s1

This samples the texture (first 4 floats) to r2, which becomes (0.000, 0.000, 0.000, 1.000) float4. So something goes wrong during sampling...

Are all my sampler states correct?
What about my texture creation?

pAnimationTexture = NULL;
D3DLOCKED_RECT rc;
HRESULT hr = pGraphicsDevice->GetDevice()->CreateTexture(TexWidth, TexHeight,
1, D3DUSAGE_DYNAMIC, D3DFMT_A32B32G32R32F,
D3DPOOL_DEFAULT, &pAnimationTexture, NULL);
if( hr!=D3D_OK )
{
MessageBox(NULL, L"Failed to create AnimTex", L"Error", MB_OK);
PostQuitMessage(0);
}
hr = pAnimationTexture->LockRect(0,&rc,NULL,D3DLOCK_DISCARD);
if(hr==D3D_OK)
{
ZeroMemory(rc.pBits, TexWidth*TexHeight*4);
pAnimationTexture->UnlockRect(0);
}



D3DLOCKED_RECT rc;
HRESULT hr = pAnimationTexture->LockRect(0,&rc,NULL,D3DLOCK_DISCARD);
if(hr==D3D_OK)
{
memcpy(rc.pBits, pTestMatrix, sizeof(D3DXMATRIX));
pAnimationTexture->UnlockRect(0);
}
Solved it!


pGraphicsDevice->GetDevice()->SetTexture(1, pAnimationTexture);

Changed to:

pGraphicsDevice->GetDevice()->SetTexture(D3DVERTEXTEXTURESAMPLER0, pAnimationTexture);

This topic is closed to new replies.

Advertisement