__declspec(align(16)) unsigned int dcAntiLogScaleLookUpTableUShort[65536];
//zNear and zFar are the clipping planes in model space (not z-buffer space).
int _dcBuildAntiLogScaleLookUpTable(double zNear, double zFar, bool bufferCorrection)
{
__declspec(align(16)) double ushrtRange[2] = {65535.0f, 65535.0f};
__declspec(align(16)) double ushrtFloor[2] = {0.0f, 0.0f};
__declspec(align(16)) double countBy[2] = {2.0f, 2.0f};
__declspec(align(16)) double increment[2] = {0.0f,1.0f};
__declspec(align(16)) double nearZ[2] = {zNear, zNear};
__declspec(align(16)) double farZ[2] = {zFar, zFar};
__declspec(align(16)) double zNearTimesZFar[2] = {zNear * zFar, zNear * zFar};
__declspec(align(16)) double zFarMinusZNear[2] = {zFar - zNear, zFar - zNear};
__declspec(align(16)) double zNearMinusZFar[2] = {-zFar + zNear, -zFar + zNear};
__asm
{
mov ecx, 65536 ; counter set to 65536
shr ecx, 1 ; div by 2 XMM==2 doubles
mov eax, 0 ; pointer offset
movapd XMM2, [increment] ;
loop_main: ;
movapd XMM0, XMM2 ; preserve the increment
movapd XMM1, [zNearTimesZFar]; load zNearTimesZFar
;--------------------------------
divpd XMM0, [ushrtRange] ; x = i / 65535
mulpd XMM0, [zNearMinusZFar]; x = zNearMinusZFar * x
addpd XMM0, [farZ] ; x+= zFar
divpd XMM0, [zNearTimesZFar]; x = zNearTimesZFar / x
subpd XMM1, [nearZ] ; x -= zNear
divpd XMM1, [zFarMinusZNear]; x /= zFarMinusZNear
mulpd XMM1, [ushrtRange] ; x *= 65535
minpd XMM1, [ushrtRange] ;
maxpd XMM1, [floorAll] ;
cvttpd2dq XMM0, XMM1 ; convert to unsigned ints
movdq2q MM0, XMM0 ; move them to an MM register
movq [dcAntiLogScaleLookUpTableUShort + eax], MM0; move to LUT
addps XMM2, [countBy] ;
add eax, 8 ; increment pointer offset
dec ecx ;
jnz loop_main ;
emms;
};
return 0;
}
[edited by - Ren Aissanz on June 7, 2004 4:52:16 PM]
SSE2 and logarithmic to linear conversions
OK, here's what I'm trying to do:
In 3d programming APIs, they typically use logarithmic functionality to represent the depth of the worldspace (things further away are spaced on a log ramp).
My software uses the deptrh component to represent, well, DEPTH, BUT it has to be linear or it's no good for my purposes. To this end, I have a function that constructs a log-to-linear lookup table (saves time and on the fly CPU time). It however, seems to be broken. It produces nothing but 65535 once I have clamped it, whereas it should be a range of values from 0 to 65535.
I have debugged the HECK out of this thing, and finding nothing, am looking for a fresh set of eyes. If any of you geniuses out there are fluent in SSE2 assembler, check it out, and tell me what I'm doing wrong. (code follows)
This topic is closed to new replies.
Advertisement
Popular Topics
Advertisement