Sign in to follow this  
dmtuan

[HLSL] Decreasing Shader Model from SM5 to SM2 (vs_5_0 to vs_4_0_level_9_3)

Recommended Posts

Hi, 

 

I am not an expert on HLSL. Hope someone can help me rewrite this Shader, so it can work on Shader Model 2 (vs_4_0_level_9_3). I have an engine running on Windows 8.1 (App Store), which uses DirectX 11.2 (feature level 11_1). I am trying to rewrite this engine, so it runs on Windows Phone 8.1. In order to accomplish that, I had to decrease the DirectX feature level to 9_3, which only uses Shader Model 2 (unlike feature level 11_1, which supports Shader Model 5).

 

I am having difficulties to rewrite my vertex shader (originally written for vs_5_0), so that it's now for vs_4_0_level_9_3. Here are my shaders written for vs_5_0 profile:

 

 

Common.hlsl:

// Constant buffer to be updated by application per object
cbuffer PerObject : register(b0)
{
    // WorldViewProjection matrix
    float4x4 WorldViewProjection;
    
    // We need the world matrix so that we can
    // calculate the lighting in world space
    float4x4 World;
    
    // Inverse transpose of world, used for
    // bringing normals into world space, especially
    // necessary where non-uniform scaling has been applied
    float4x4 WorldInverseTranspose;
};

// A simple directional light (e.g. the sun)
struct DirectionalLight
{
    float4 Color;
    float3 Direction;
};

// Constant buffer - updated once per frame
// Note: HLSL data is packed in such a
// way that it does not cross a 16-byte boundary
cbuffer PerFrame: register (b1)
{
    DirectionalLight Light;
    float3 CameraPosition;
};

// Constant buffer to hold our material configuration
// Note: HLSL data is packed in such a
// way that it does not cross a 16-bytes boundary
cbuffer PerMaterial : register (b2)
{
    float4 MaterialAmbient;
    float4 MaterialDiffuse;
    float4 MaterialSpecular;
    float MaterialSpecularPower;
    bool HasTexture;
    float4 MaterialEmissive;
    float4x4 UVTransform;
};

// Constant buffer to hold our skin matrices for each bone.
// Note: 1024*64 = maximum bytes for a constant buffer in SM5
cbuffer PerArmature : register(b3)
{
    float4x4 Bones[63];
};

// Vertex Shader input structure (from Application)
struct VertexShaderInput
{
    float4 Position : SV_Position;// Position - xyzw
    float3 Normal : NORMAL;    // Normal - for lighting and mapping operations
    float4 Color : COLOR0;     // Color - vertex color, used to generate a diffuse color
    float2 TextureUV: TEXCOORD0; // UV - texture coordinate
    uint4 SkinIndices : BLENDINDICES0; // blend indices
    float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

// Pixel Shader input structure (from Vertex Shader)
struct PixelShaderInput
{
    float4 Position : SV_Position;
    // Interpolation of combined vertex and material diffuse
    float4 Diffuse : COLOR;
    // Interpolation of vertex UV texture coordinate
    float2 TextureUV: TEXCOORD0;

    // We need the World Position and normal for light calculations
    float3 WorldNormal : NORMAL;
    float3 WorldPosition : WORLDPOS;
};

float3 Lambert(float4 pixelDiffuse, float3 normal, float3 toLight)
{
    // Calculate diffuse color (using Lambert's Cosine Law - dot product of 
    // light and normal) Saturate to clamp the value within 0 to 1.
    float3 diffuseAmount = saturate(dot(normal, toLight));
    return pixelDiffuse.rgb * diffuseAmount;
}

float3 SpecularPhong(float3 normal, float3 toLight, float3 toEye)
{
    // R = reflect(i,n) => R = i - 2 * n * dot(i,n)
    float3 reflection = reflect(-toLight, normal);

    // Calculate the specular amount (smaller specular power = larger specular highlight)
    // Cannot allow a power of 0 otherwise the model will appear black and white
    float specularAmount = pow(saturate(dot(reflection,toEye)), max(MaterialSpecularPower,0.00001f));
    return MaterialSpecular.rgb * specularAmount;
}

float3 SpecularBlinnPhong(float3 normal, float3 toLight, float3 toEye)
{
    // Calculate the half vector
    float3 halfway = normalize(toLight + toEye);

    // Saturate is used to prevent backface light reflection
    // Calculate specular (smaller specular power = larger specular highlight)
    float specularAmount = pow(saturate(dot(normal, halfway)), max(MaterialSpecularPower,0.00001f));
    return MaterialSpecular.rgb * specularAmount;
}

VS.hlsl:

#include "Common.hlsl"

void SkinVertex(float4 weights, uint4 bones, inout float4 position, inout float3 normal)
{
    // If there are skin weights apply vertex skinning
    if (weights.x != 0)
    {
        // Calculate the skin transform from up to four bones and weights
        float4x4 skinTransform = Bones[bones.x] * weights.x +
            Bones[bones.y] * weights.y +
            Bones[bones.z] * weights.z +
            Bones[bones.w] * weights.w;
   
        // Apply skinning to vertex and normal
        position = mul(position, skinTransform);
        
        // We assume here that the skin transform includes only uniform scaling (if any)
        normal = mul(normal, (float3x3)skinTransform);
    }
}

// Vertex shader main function
PixelShaderInput VSMain(VertexShaderInput vertex)
{
    PixelShaderInput result = (PixelShaderInput)0;

    // Apply vertex skinning if any
    SkinVertex(vertex.SkinWeights, vertex.SkinIndices, vertex.Position, vertex.Normal);

    result.Position = mul(vertex.Position, WorldViewProjection);
    result.Diffuse = vertex.Color * MaterialDiffuse;
    // Apply material UV transformation
    result.TextureUV = mul(float4(vertex.TextureUV.x, vertex.TextureUV.y, 0, 1), (float4x2)UVTransform).xy;

    // We use the inverse transpose of the world so that if there is non uniform
    // scaling the normal is transformed correctly. We also use a 3x3 so that 
    // the normal is not affected by translation (i.e. a vector has the same direction
    // and magnitude regardless of translation)
    result.WorldNormal = mul(vertex.Normal, (float3x3)WorldInverseTranspose);
    
    result.WorldPosition = mul(vertex.Position, World).xyz;
    
    return result;
}

When I try to compile this vertex shader with vs_4_0_level_9_3 profile, I get this error:

Common.hlsl(76,14-18): error X4507: maximum vs_4_0_level_9_3 constant register index (256) exceeded - Try reducing number of constants referenced

 

I see the error refers to the line "float4x4 Bones[1024];" in Common.hlsl. I tried to decrease it to "float4x4 Bones[63];", but I got another error:

Common.hlsl(30,14-32): error X4507: maximum vs_4_0_level_9_3 constant register index (256) exceeded - Try reducing number of constants referenced

... the line "float4x4 WorldViewProjection;"

 

I really don't know what now.. :/ any help would be appreciated.

 

Thank you in advance for any pointers and advices.

Edited by dmtuan

Share this post


Link to post
Share on other sites


You're using a total of 276 registers. 252 for the bones, and 24 for the other stuff. Like imoogiBG says, you need to reduce it more.
 
Note that you if you still need 63 bones, you can probably pack the needed info into 3 float4 registers, and reconstruct the matrix in the vertex shader (since some of the bone matrices elements will always be 0 or 1).

 

Thank you all for your advices. You were right. I was using 276, so it was 20 more than allowed. I decreased it to 43 bones, and it compiled fine. It is a little bit less than I need, but I will figure something out with the bones number.

 

I have one more question. If I decrease float4x4 Bones[1024] to float4x4 Bones[43], do I need to change something in the Input Layout for Vertex Shader? This is what I have now:

new[]
                {
                    // "SV_Position" = vertex coordinate in object space
                    new InputElement("SV_Position", 0, Format.R32G32B32_Float, 0, 0),
                    // "NORMAL" = the vertex normal
                    new InputElement("NORMAL", 0, Format.R32G32B32_Float, 12, 0),
                    // "COLOR"
                    new InputElement("COLOR", 0, Format.R8G8B8A8_UNorm, 24, 0),
                    // "UV"
                    new InputElement("TEXCOORD", 0, Format.R32G32_Float, 28, 0),
                    // "BLENDINDICES"
                    new InputElement("BLENDINDICES", 0, Format.R32G32B32A32_UInt, 36, 0),
                    // "BLENDWEIGHT"
                    new InputElement("BLENDWEIGHT", 0, Format.R32G32B32A32_Float, 52, 0),
                }));

After changing bone number to 43, i get this error while defining the Input Layout. I'd like to know if the error is related to the bones number change or if it's related to something else.

Share this post


Link to post
Share on other sites

What's the error? SV_Position is a DX10 and above thing, maybe that's the problem? My guess is that something in your layout is not supported in the feature level you're using.

Share this post


Link to post
Share on other sites
I have one more question. If I decrease float4x4 Bones[1024] to float4x4 Bones[43], do I need to change something in the Input Layout for Vertex Shader? This is what I have now:

shader resources are not part form Input assembler, so there is no need to update the Input Layout, the layout only specifies how you re going to pass mesh vertices to the shader.
 

 

are you using D3D11_CREATE_DEVICE_DEBUG, there should be an error?

 

Edited by imoogiBG

Share this post


Link to post
Share on other sites


What's the error? SV_Position is a DX10 and above thing, maybe that's the problem? My guess is that something in your layout is not supported in the feature level you're using.

 

It says: Message = "HRESULT: [0x80070057], Module: [General], ApiCode: [E_INVALIDARG/Invalid Arguments], Message: The parameter is incorrect.\r\n"

 

I think it is because of new InputElement("BLENDINDICES", 0, Format.R32G32B32A32_UInt, 36, 0)

I'm looking into https://msdn.microsoft.com/en-us/library/windows/desktop/ff471324(v=vs.85).aspx, and there is no reference of R32G32B32A32_UInt. 

Share this post


Link to post
Share on other sites


shader resources are not part form Input assembler, so there is no need to update the Input Layout, the layout only specifies how you re going to pass mesh vertices to the shader.
 
 
are you using D3D11_CREATE_DEVICE_DEBUG, there should be an error?

 

I see, so it's not related to the change in the bones number.

 

No, Im not using D3D11_CREATE_DEVICE_DEBUG.

 

Anyway, I think I got pass this. Thank you all for your help.

Share this post


Link to post
Share on other sites


Note that you if you still need 63 bones, you can probably pack the needed info into 3 float4 registers, and reconstruct the matrix in the vertex shader (since some of the bone matrices elements will always be 0 or 1).

 

Hi, could you please walk me through this? It seems I will need more bones after all. I'm not sure I entirely understand what you mean. Thanks

Share this post


Link to post
Share on other sites

old video cards (respectivy in old feature levels) had only 256 vector registers for uniforms (aka equal to the size of 256 float4 variables for example, or 64 mat4 matrices). if you want to reander with more bones, you defentaly can't fit them in those 256 registers the options are:

- try to shrink your matrices, use 3 registers for translation, scaling and rotation ,maybe scaling is always 1,1,1?

- CPU skinning.

- Split the mesh by bones in order to need less bones in the vertex shader.

- try to store matrices in a texture, and use texture lookups. Textures will be slower but with a lot of space for matrices.

Edited by imoogiBG

Share this post


Link to post
Share on other sites


- try to shrink your matrices, use 3 registers for translation, scaling and rotation ,maybe scaling is always 1,1,1?
- CPU skinning.
- Split the mesh by bones in order to need less bones in the vertex shader.
- try to store matrices in a texture, and use texture lookups. Textures will be slower but with a lot of space for matrices.

 

How costly do u think CPU skinning would be? It means to perform skinTransforms in the render loop before you send the vertex into a vertex shader, right?

 

I am thinking about what I can shrink, but I think it will not be enough.

 

Other 2 methods you mentioned seem rather time consuming. I am not sure I wanna go into that.

Share this post


Link to post
Share on other sites

Well it depends on the number of unique instances that use skinning and the target platform.
If you have few models per fame that use skinning, then CPU skinning is a good choice.

Otherwise using a texture may sound a bit odd, but this techinque has been used in many games.

 

In most cases data shrinking isn't a good choce, because you're locking features form the artists.

Splitting mesh is a a tricky process, but if you can offload that work to an external tool what would save a lot a headaches.

Edited by imoogiBG

Share this post


Link to post
Share on other sites

Knowing why 9_3 doesn't like Format.R32G32B32A32_UInt would really help. As said, create the device with the debug flag, then watch the  output log. Well explained here.

 

I guess it fails because SM2/3 never had such a format for vertices (see here for D3D9: D3DDECLTYPE). IIRC blend indices used UBYTE4, so R8G8B8A8_UINT might work (Edit: Yeah, it's actually listed in that link you posted). Alternatively: the old SDK samples used actual float4 and cast it back to int in the shader.

 

32bit for bone indices is too much anyway. Billions of bones ? Unlikely.

Edited by unbird

Share this post


Link to post
Share on other sites


Well it depends on the number of unique instances that use skinning and the target platform.
If you have few models per fame that use skinning, then CPU skinning is a good choice.
Otherwise using a texture may sound a bit odd, but this techinque has been used in many games.

 

I'll probably try to implement that CPU skinning. I expect to have 2 moving characters at most per frame, so we will see how it performs on a Phone

Share this post


Link to post
Share on other sites

Hi, could you please walk me through this? It seems I will need more bones after all. I'm not sure I entirely understand what you mean. Thanks

 

Basically, instead of passing the 4x4 matrix as a transform, you pass the translation/scale/rotation components used to create that matrix (that's 3 + 3+ 4 floats, so 10, which fits in 3 float4's), and then you recreate it in the vertex shader.

 

Or a slightly different option (for a simpler vertex shader and less changes to your CPU code): pluck out the non-varying terms from the actual transform matrix (they should fit in 3 float4's, although that might require uniform x-y-z scaling). It's been a while since I've done this, but looking at some old XNA code, here's what I do on the CPU:

                Matrix m = skinningDataValue.InverseBindPose[bone] *
                                            worldTransforms[bone];

                skinTransforms[bone] = m;
                // Here are the 12 terms I pluck out from the transform matrix:
                skinTransformsCompact[bone * 3] = new Vector4(m.M11, m.M21, m.M31, m.M41);
                skinTransformsCompact[bone * 3 + 1] = new Vector4(m.M12, m.M22, m.M32, m.M42);
                skinTransformsCompact[bone * 3 + 2] = new Vector4(m.M13, m.M23, m.M33, m.M43);

And then the shader snippet that calculates the blend matrix:


#define MaxBones 78	// With float4x4, we could only handle 59
// Using triplets of float4s lets us get more bones in than if we used float4x4
float4 BonesCompact[MaxBones * 3];




float4x4 CalcBlendMatrix(VS_INPUT data)
{
  int4 ix = int4(data.BoneIndices * 3);

  float4x3 fx;
  fx._m00_m10_m20_m30 = BonesCompact[ix.x];
  fx._m01_m11_m21_m31 = BonesCompact[ix.x+1];
  fx._m02_m12_m22_m32 = BonesCompact[ix.x+2];

  float4x3 fy;
  fy._m00_m10_m20_m30 = BonesCompact[ix.y];
  fy._m01_m11_m21_m31 = BonesCompact[ix.y+1];
  fy._m02_m12_m22_m32 = BonesCompact[ix.y+2];

  float4x3 fz;
  fz._m00_m10_m20_m30 = BonesCompact[ix.z];
  fz._m01_m11_m21_m31 = BonesCompact[ix.z+1];
  fz._m02_m12_m22_m32 = BonesCompact[ix.z+2];

  float4x3 fw;
  fw._m00_m10_m20_m30 = BonesCompact[ix.w];
  fw._m01_m11_m21_m31 = BonesCompact[ix.w+1];
  fw._m02_m12_m22_m32 = BonesCompact[ix.w+2];

  float4x3 ret = fx * data.BoneWeights.x;
  ret += fy * data.BoneWeights.y;
  ret += fz * data.BoneWeights.z;
  ret += fw * data.BoneWeights.w;

  float4x4 mat;
  mat._m00_m10_m20_m30 = ret._m00_m10_m20_m30;
  mat._m01_m11_m21_m31 = ret._m01_m11_m21_m31;
  mat._m02_m12_m22_m32 = ret._m02_m12_m22_m32;
  mat._m03_m13_m23_m33 = float4(0, 0, 0, 1);

  return mat;
}

 

 

(It looks like you're using sharpDX or something, so keep in mind the matrices might have a different row-major vs column-major, etc... so you might not be able to just copy verbatim).

Edited by phil_t

Share this post


Link to post
Share on other sites


Basically, instead of passing the 4x4 matrix as a transform, you pass the translation/scale/rotation components used to create that matrix (that's 3 + 3+ 4 floats, so 10, which fits in 3 float4's), and then you recreate it in the vertex shader.

 

If I understand this correctly.. I load the skinTransform matrix (4x4) from the .fbx file, where animations are baked to the skeleton and model. I take this 4x4 matrix and I have to deconstruct them to translation, scale and rotation components (I get 3 float4's)... and instead of sending the whole float4x4 skinTransform matrix, I only send float3 translate, float3 rotate, float3 scale to a shader.. and in the shader I reconstruct the skinTransform matrix from these 3 components... is that right?

Share this post


Link to post
Share on other sites


pluck out the non-varying terms from the actual transform matrix (they should fit in 3 float4's, although that might require uniform x-y-z scaling)

 

Can I ask how did u figure out which terms to pluck out? Also in the snippet you posted, you did not do the uniform x-y-z scaling, right? Why is the uniform scaling needed anyway?

Share this post


Link to post
Share on other sites


Can I ask how did u figure out which terms to pluck out? Also in the snippet you posted, you did not do the uniform x-y-z scaling, right? Why is the uniform scaling needed anyway?

 

I probably popped open the linear algebra section of my computer graphics book to brush up on things, and then verified in the debugger :P

 

This post shows what the various transformation matrices look like. You can see the bottom row is always (0, 0, 0, 1).

 

So it looks like uniform scaling or not doesn't really matter in this case.

Share this post


Link to post
Share on other sites


This post shows what the various transformation matrices look like. You can see the bottom row is always (0, 0, 0, 1).
 
So it looks like uniform scaling or not doesn't really matter in this case.

 

Ok... so it seems I just have to take the first 3 rows, send them as 3 float4's to the vertex shader.. and in the shader I will reconstruct the original matrix, since I know the last row is always (0, 0, 0, 1)...

Share this post


Link to post
Share on other sites

Ah, I have another issue. If you look at the Vertex Shader I posted in the 1st post here.. can any of you think of any functions or instructions, that are not valid in Shader Model 2? Namely, Shader Model vs_4_0_level_9_3.

 

I am experiencing this strange thing. When I run the program in the emulator, the character loads fine and animates fine. But when I do it in a real device, the character is not animating. I suspect the problem is in the SkinVertex() function.. but I cannot pin point exact problem. In the device, the character is moving in space in a rotation-like trajectory, but it stays in TPose. It is suppose to be standing and waving his hand. 

 

Debugging in the emulator is not accurate, since it might support some high level functions, even thou the engine is set on only feature level 9_3, shader model 2.. at least I read so on msdn. 

Share this post


Link to post
Share on other sites


What's the error? SV_Position is a DX10 and above thing, maybe that's the problem? My guess is that something in your layout is not supported in the feature level you're using.

 

Hi, could u please help me to rewrite my Vertex Shader and Pixel Shader, so that they do not use SV_Position and SV_Target? It seems to be like you said. SV_Position and SV_Target are not supported in Shader Model 2.

 

Here is the snippet from my hlsl code:

// Vertex Shader input structure (from Application)
struct VertexShaderInput
{
    float4 Position : SV_Position;// Position - xyzw
    float3 Normal : NORMAL;    // Normal - for lighting and mapping operations
    float4 Color : COLOR0;     // Color - vertex color, used to generate a diffuse color
    float2 TextureUV: TEXCOORD0; // UV - texture coordinate
    uint4 SkinIndices : BLENDINDICES0; // blend indices
    float4 SkinWeights : BLENDWEIGHT0; // blend weights
};

// Pixel Shader input structure (from Vertex Shader)
struct PixelShaderInput
{
    float4 Position : SV_Position;
    // Interpolation of combined vertex and material diffuse
    float4 Diffuse : COLOR;
    // Interpolation of vertex UV texture coordinate
    float2 TextureUV: TEXCOORD0;

    // We need the World Position and normal for light calculations
    float3 WorldNormal : NORMAL;
    float3 WorldPosition : WORLDPOS;
};

PixelShaderInput VSMain(VertexShaderInput vertex)
{
    PixelShaderInput result = (PixelShaderInput)0;

    // Apply vertex skinning if any
    SkinVertex(vertex.SkinWeights, vertex.SkinIndices, vertex.Position, vertex.Normal);

    result.Position = mul(vertex.Position, WorldViewProjection);
    result.Diffuse = vertex.Color * MaterialDiffuse;
    // Apply material UV transformation
    result.TextureUV = mul(float4(vertex.TextureUV.x, vertex.TextureUV.y, 0, 1), (float4x2)UVTransform).xy;

    // We use the inverse transpose of the world so that if there is non uniform
    // scaling the normal is transformed correctly. We also use a 3x3 so that 
    // the normal is not affected by translation (i.e. a vector has the same direction
    // and magnitude regardless of translation)
    result.WorldNormal = mul(vertex.Normal, (float3x3)WorldInverseTranspose);
    
    result.WorldPosition = mul(vertex.Position, World).xyz;
    
    return result;

}

float4 PSMain(PixelShaderInput pixel) : SV_Target
{
    // Normalize our vectors as they are not 
    // guaranteed to be unit vectors after interpolation
    float3 normal = normalize(pixel.WorldNormal);
    float3 toEye = normalize(CameraPosition - pixel.WorldPosition);
    float3 toLight = normalize(-Light.Direction);

    // Texture sample here (use white if no texture)
    float4 sample = (float4)1.0f;
    if (HasTexture)
        sample = Texture0.Sample(Sampler, pixel.TextureUV);

    float3 ambient = MaterialAmbient.rgb;
    float3 emissive = MaterialEmissive.rgb;
    float3 diffuse = Lambert(pixel.Diffuse, normal, toLight);
    float3 specular = SpecularBlinnPhong(normal, toLight, toEye);

    // Calculate final color component
    float3 color = (saturate(ambient+diffuse) * sample.rgb + specular) * Light.Color.rgb + emissive;
    // We saturate ambient+diffuse to ensure there is no over-
    // brightness on the texture sample if the sum is greater than 1
    
    // Calculate final alpha value
    float alpha = pixel.Diffuse.a * sample.a;

    // Return result
    return float4(color, alpha);

}

I am not sure if it's ok just to replace SV_position with POSITION or POSITION0 or if I need to make any other adjusments...

Share this post


Link to post
Share on other sites

I tried to change line 4 to float4 Position : POSITION;// Position - xyzw

 

Line 15 to float4 Position : VPOS;

 

And line 50 to float4 PSMain(PixelShaderInput pixel) : COLOR

 

But when I try to compile these shaders with fxc.exe, I get this error:

error X4541: veretex shader must minimally write all four components of SV_position

 

I don't know how to deal with this...

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this