Are float4x4 arrays supported in Shader Model 2 (D3D9, vs_4_0_level_9_3)?

Started by
18 comments, last by Stefan Fischlschweiger 8 years, 12 months ago

Hi,

I have this Vertex Shader...

Common.hlsl:


// Constant buffer to be updated by application per object
cbuffer PerObject : register(b0)
{
    // WorldViewProjection matrix
    float4x4 WorldViewProjection;
    
    // We need the world matrix so that we can
    // calculate the lighting in world space
    float4x4 World;
    
    // Inverse transpose of world, used for
    // bringing normals into world space, especially
    // necessary where non-uniform scaling has been applied
    float4x4 WorldInverseTranspose;
};

// A simple directional light (e.g. the sun)
struct DirectionalLight
{
    float4 Color;
    float3 Direction;
};

// Constant buffer - updated once per frame
// Note: HLSL data is packed in such a
// way that it does not cross a 16-byte boundary
cbuffer PerFrame: register (b1)
{
    DirectionalLight Light;
    float3 CameraPosition;
};

// Constant buffer to hold our material configuration
// Note: HLSL data is packed in such a
// way that it does not cross a 16-bytes boundary
cbuffer PerMaterial : register (b2)
{
    float4 MaterialAmbient;
    float4 MaterialDiffuse;
    float4 MaterialSpecular;
    float MaterialSpecularPower;
    bool HasTexture;
    float4 MaterialEmissive;
    float4x4 UVTransform;
};

// Constant buffer to hold our skin matrices for each bone.
// Note: 1024*64 = maximum bytes for a constant buffer in SM5
cbuffer PerArmature : register(b3)
{
    float4x4 Bones[50];
};

// Vertex Shader input structure (from Application)
struct VertexShaderInput
{
    float4 Position : SV_Position;// Position - xyzw
    float3 Normal : NORMAL;    // Normal - for lighting and mapping operations
    float4 Color : COLOR0;     // Color - vertex color, used to generate a diffuse color
    float2 TextureUV: TEXCOORD0; // UV - texture coordinate
    uint4 SkinIndices : BLENDINDICES0; // blend indices
    float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

// Pixel Shader input structure (from Vertex Shader)
struct PixelShaderInput
{
    float4 Position : SV_Position;
    // Interpolation of combined vertex and material diffuse
    float4 Diffuse : COLOR;
    // Interpolation of vertex UV texture coordinate
    float2 TextureUV: TEXCOORD0;

    // We need the World Position and normal for light calculations
    float3 WorldNormal : NORMAL;
    float3 WorldPosition : WORLDPOS;
};

float3 Lambert(float4 pixelDiffuse, float3 normal, float3 toLight)
{
    // Calculate diffuse color (using Lambert's Cosine Law - dot product of 
    // light and normal) Saturate to clamp the value within 0 to 1.
    float3 diffuseAmount = saturate(dot(normal, toLight));
    return pixelDiffuse.rgb * diffuseAmount;
}

float3 SpecularPhong(float3 normal, float3 toLight, float3 toEye)
{
    // R = reflect(i,n) => R = i - 2 * n * dot(i,n)
    float3 reflection = reflect(-toLight, normal);

    // Calculate the specular amount (smaller specular power = larger specular highlight)
    // Cannot allow a power of 0 otherwise the model will appear black and white
    float specularAmount = pow(saturate(dot(reflection,toEye)), max(MaterialSpecularPower,0.00001f));
    return MaterialSpecular.rgb * specularAmount;
}

float3 SpecularBlinnPhong(float3 normal, float3 toLight, float3 toEye)
{
    // Calculate the half vector
    float3 halfway = normalize(toLight + toEye);

    // Saturate is used to prevent backface light reflection
    // Calculate specular (smaller specular power = larger specular highlight)
    float specularAmount = pow(saturate(dot(normal, halfway)), max(MaterialSpecularPower,0.00001f));
    return MaterialSpecular.rgb * specularAmount;
}

VS.hlsl:


#include "Common.hlsl"

void SkinVertex(float4 weights, uint4 bones, inout float4 position, inout float3 normal)
{
    // If there are skin weights apply vertex skinning
    if (weights.x != 0)
    {
        // Calculate the skin transform from up to four bones and weights
        float4x4 skinTransform = Bones[bones.x] * weights.x +
            Bones[bones.y] * weights.y +
            Bones[bones.z] * weights.z +
            Bones[bones.w] * weights.w;
   
        // Apply skinning to vertex and normal
        position = mul(position, skinTransform);
        
        // We assume here that the skin transform includes only uniform scaling (if any)
        normal = mul(normal, (float3x3)skinTransform);
    }
}

// Vertex shader main function
PixelShaderInput VSMain(VertexShaderInput vertex)
{
    PixelShaderInput result = (PixelShaderInput)0;

    // Apply vertex skinning if any
    SkinVertex(vertex.SkinWeights, (uint4)vertex.SkinIndices, vertex.Position, vertex.Normal);

    result.Position = mul(vertex.Position, WorldViewProjection);
    result.Diffuse = vertex.Color * MaterialDiffuse;
    // Apply material UV transformation
    result.TextureUV = mul(float4(vertex.TextureUV.x, vertex.TextureUV.y, 0, 1), (float4x2)UVTransform).xy;

    // We use the inverse transpose of the world so that if there is non uniform
    // scaling the normal is transformed correctly. We also use a 3x3 so that 
    // the normal is not affected by translation (i.e. a vector has the same direction
    // and magnitude regardless of translation)
    result.WorldNormal = mul(vertex.Normal, (float3x3)WorldInverseTranspose);
    
    result.WorldPosition = mul(vertex.Position, World).xyz;
    
    return result;
}

Can anyone please tell me, if Shader Model 2, namely Shader Profile vs_4_0_level_9_3 does support arrays in constant buffers? I am speaking about the line 53 in Common.hlsl. I have declared a float4x4 Bones[60] array to store bones for character skinning. But when I run the renderer on the Windows Phone 8.1, the character is not skinning right (it is not animating, only staying in TPose and moving in the space). After days of debugging I've find out, that when I load bones into PerArmature cbuffer, only 1 bone gets in there. That is what is causing the weird movement and not actual skinning.

So can anybody confirm, that you cannot store a float4x4 array in a cbuffer (Shader Model 2, D3D9)?... or explain to me what I might be doing wrong, that always only 1 bone loads into PerArmature cbuffer?

Thank you in advance.

Advertisement

float4x4 for bone matricies is not apropriate, since a bone matrix is 3x4 matrix. I would strongly advice you to reform your code to send your matricies into a float4 matriciesrows[60*3] array if you want to conform to older compiler versions smoothly and not outperform them. This way, if you send rows, you can achive 3x4 optimization - constructing float3x4 or float4x4 objects in vertex shader from the rows is just trivial fast operation - or you may use the rows for transforming right away as well. And also this way you will use more native and stable uniform setters on cpu device, since setting array of float4's is what every gpu is ready for the most.

float4x4 for bone matricies is not apropriate, since a bone matrix is 3x4 matrix. I would strongly advice you to reform your code to send your matricies into a float4 matriciesrows[60*3] array if you want to conform to older compiler versions smoothly and not outperform them. This way, if you send rows, you can achive 3x4 optimization - constructing float3x4 or float4x4 objects in vertex shader from the rows is just trivial fast operation - or you may use the rows for transforming right away as well. And also this way you will use more native and stable uniform setters on cpu device, since setting array of float4's is what every gpu is ready for the most.

Thank you for your answer. Do u think that whas the reason for the problem, that I was experiencing? I still do not know if using float4x4 array is supported in DirectX Feature Level 9_3 (Shader Model 2). You're saying that float4 array should be fine then?

Is there a way to check the content of the constant buffer in the Graphics Debugger (Visual Studio 2013)? I rewrote the code so the PerArmature cbuffer stores float4 matricesRows[numberOfBones * 3] now. In the shader code I am assembling the skinTransform matrix from the 3 rows and then do the skinning.

This still did not solve the problem. The character is still skinning as if there were only 1 bone loaded in the cbuffer. I need to check what is really loaded in the constant buffer. Is there a way to check it?

I gess then - as if the character is influenced only by one bone - that your uniformal matrcies and their setting from cpu is rather correct, you should check your bone indicies and weights in vertex attributes, their declarations for the shader and so on.

I gess then - as if the character is influenced only by one bone - that your uniformal matrcies and their setting from cpu is rather correct, you should check your bone indicies and weights in vertex attributes, their declarations for the shader and so on.

A weird thing is, that if I run in the Emulator, it works fine. The character is animating just as it should. But when I try to run the exact same code in the real device, it is doing this weird thing. Anyway I will recheck the indicies and weights in the vertex attributes.

Could u please look over these for me? This is how I have it atm:

In shader:


// Vertex Shader input structure (from Application)
struct VertexShaderInput
{
    float4 Position : SV_Position;// Position - xyzw
    float3 Normal : NORMAL;    // Normal - for lighting and mapping operations
    float4 Color : COLOR0;     // Color - vertex color, used to generate a diffuse color
    float2 TextureUV: TEXCOORD0; // UV - texture coordinate
    uint4 SkinIndices : BLENDINDICES0; // blend indices
    float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

This is in the engine:


[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct Vertex
{
    public Vector3 Position;
    public Vector3 Normal;
    public Color Color;
    public Vector2 UV;
    public Common.Mesh.SkinningVertex Skin;
}

...

[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct SkinningVertex
{
    public uint BoneIndex0;
    public uint BoneIndex1;
    public uint BoneIndex2;
    public uint BoneIndex3;
    public float BoneWeight0;
    public float BoneWeight1;
    public float BoneWeight2;
    public float BoneWeight3;
}



[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct SkinningVertex
{
public uint BoneIndex0;
public uint BoneIndex1;
public uint BoneIndex2;
public uint BoneIndex3;
public float BoneWeight0;
public float BoneWeight1;
public float BoneWeight2;
public float BoneWeight3;
}

I find this strange, if it is really the gpu vertex buffer memory explanation in this structure, then uint for a index and float for a weight is incredible waste, since standard is 4 bytes for 4 indicies, and 4 bytes for 4 weights. Then you usualy put vertex declaration to vertex shader as float4 vector for the four weights, and float4 vector for the four indicies, and in vertex function you convert the indicies floating vecor member to int type before using it as the index to uniform array, such as:

struct VertexShaderInput
{
float4 Position : SV_Position;// Position - xyzw
float3 Normal : NORMAL; // Normal - for lighting and mapping operations
float4 Color : COLOR0; // Color - vertex color, used to generate a diffuse color
float2 TextureUV: TEXCOORD0; // UV - texture coordinate
float4 SkinIndices : BLENDINDICES0; // blend indices
float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

...

int4 indx=(int4)SkinIndices; // or check proper float to int conversion in HLSL

u_matricies[indx.x] // to refer with it

with weights you must usualy perform

float influence=SkinWeights.x*(1/255) ;// since weights tend to be unnormalized 0-255 byte values

all and all, you should post your vertex buffer declaration you establish on cpu side. At lower gpu devices, only floating values are actual types that enter the vertex function, wheather they are of any declaration in the real vertex buffer memory, wheather 16 bit integers, or 8bit integers or whatever, cpu side declaration just explains how to interpret the number value to establish actual floating value to the shader.


I find this strange, if it is really the gpu vertex buffer memory explanation in this structure, then uint for a index and float for a weight is incredible waste, since standard is 4 bytes for 4 indicies, and 4 bytes for 4 weights. Then you usualy put vertex declaration to vertex shader as float4 vector for the four weights, and float4 vector for the four indicies, and in vertex function you convert the indicies floating vecor member to int type before using it as the index to uniform array, such as:

struct VertexShaderInput
{
float4 Position : SV_Position;// Position - xyzw
float3 Normal : NORMAL; // Normal - for lighting and mapping operations
float4 Color : COLOR0; // Color - vertex color, used to generate a diffuse color
float2 TextureUV: TEXCOORD0; // UV - texture coordinate
float4 SkinIndices : BLENDINDICES0; // blend indices
float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

...
int4 indx=(int4)SkinIndices; // or check proper float to int conversion in HLSL
u_matricies[indx.x] // to refer with it

with weights you must usualy perform
float influence=SkinWeights.x*(1/255) ;// since weights tend to be unnormalized 0-255 byte values

I have it declared as I wrote in my previous post - the wasteful way as you said, but I use it in a shader as a float4 for weights and uint4 for indices, if you look at my very 1st post. I have the VSMain method there. Although, I will do the changes according to your advices.


all and all, you should post your vertex buffer declaration you establish on cpu side. At lower gpu devices, only floating values are actual types that enter the vertex function, wheather they are of any declaration in the real vertex buffer memory, wheather 16 bit integers, or 8bit integers or whatever, cpu side declaration just explains how to interpret the number value to establish actual floating value to the shader.

Im sorry, I'm not 100% sure what you mean. You want me to do what?

This topic is closed to new replies.

Advertisement