Sign in to follow this  
Shnoutz

skip optimization + static const variable = bug?

Recommended Posts

Hello,

 

I have trouble with one of my shader, it works flawlessly optimized but if I compile it with skip optimization, it fails.

I tracked down the issue, it seems one of my static const variable (stored in r0 in asm) is being overwritten at some point.

 

Worth nothing that this happens in debug and release, with or without warp device and to the best of my knowledge is only related to the compilation process. The problem is clearly visible in the disassembled code.

 

The shader is quite long so I will post snippets of the shader with corresponding asm.

(I can post the whole shader or make a small program to illustrate the issue if needed).

static const float g_oneOverSqrt2 = 0.7071067811865475f;
static const float g_twoOverSqrt2 = 1.414213562373095f;

...

float4 decompressQuaternion(uint compressedQuaternion)
{
	uint4 uc = compressedQuaternion;
	uc >>= uint4(22, 12, 2, 0);
	uc &= uint4(1023, 1023, 1023, 3);
	float4 fc;
	fc.xyz = (float3)uc.xyz / 1023.0f * g_twoOverSqrt2 - g_oneOverSqrt2;
	fc.w = sqrt(1 - dot(fc.xyz, fc.xyz));
	if(uc.w == 0) return fc.wxyz;
	else if(uc.w == 1) return fc.xwyz;
	else if(uc.w == 2) return fc.xywz;
	else return fc.xyzw;
}

...

	const uint componentIndex = DTid.x;
	if(componentIndex < b_parameters.count)
	{
...
		float4 rotation = decompressQuaternion(p_data.rotation);
...
	}

The same sections in ASM looks like this:

// Init static const variables
mov r0.x, l(0.707107)  // NOTE: r0.x <- g_oneOverSqrt2
// At this point r0.x == g_oneOverSqrt2

// The asm version of the if(componentIndex < b_parameters.count)
mov r0.w, vThreadID.x  // r0.w <- componentIndex
ult r1.x, r0.w, CB0[0][0].w
if_nz r1.x
  mov r0.x, CB0[0][0].w  // r0.x <- b_parameters.count
// Just above, r0.x is overwritten by CB0[0][0].w

// The asm version of decompressQuaternion,
// note the use of the r0.x
  mov r7.xyzw, r7.xyzw
  mov r8.xyz, l(22,12,2,0)
  ushr r7.xyz, r7.xyzx, r8.xyzx
  mov r8.xyzw, l(1023,1023,1023,3)
  and r7.xyzw, r7.xyzw, r8.xyzw
  utof r7.xyz, r7.xyzx
  div r7.xyz, r7.xyzx, l(1023.000000, 1023.000000, 1023.000000, 0.000000)
  mul r7.xyz, r0.yyyy, r7.xyzx
  mov r8.xyz, -r0.xxxx              // NOTE: use of r0.x
  add r7.xyz, r7.xyzx, r8.xyzx
  itof r0.x, l(1)
  dp3 r0.y, r7.xyzx, r7.xyzx
  mov r0.y, -r0.y
  add r0.x, r0.y, r0.x
  sqrt r8.w, r0.x
  if_z r7.w
    mov r8.x, r8.w
    mov r8.yzw, r7.xxyz
  else
    mov r0.x, l(1)
    ieq r0.x, r0.x, r7.w
    if_nz r0.x
      mov r8.y, r8.w
      mov r8.zw, r7.yyyz
    else
      mov r0.x, l(2)
      ieq r0.x, r0.x, r7.w
      if_nz r0.x
        mov r8.z, r8.w
        mov r8.w, r7.z
      else
        mov r8.z, r7.z
        mov r8.w, r8.w
      endif 
      mov r8.y, r7.y
    endif 
    mov r8.x, r7.x
  endif 

I have avoided the issue by using literals in the shader code for oneOverSqrt2 and twoOverSqrt2 but I tend to use static const variable to share code between c++ and hlsl and using macros is messing up my design.

 

Anyone else had trouble with static const variables in un-optimized shaders?

 

Cheers!

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this