Jump to content
  • Advertisement

Archived

This topic is now archived and is closed to further replies.

Ren Aissanz

SSE2 and logarithmic to linear conversions

This topic is 5273 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

OK, here's what I'm trying to do: In 3d programming APIs, they typically use logarithmic functionality to represent the depth of the worldspace (things further away are spaced on a log ramp). My software uses the deptrh component to represent, well, DEPTH, BUT it has to be linear or it's no good for my purposes. To this end, I have a function that constructs a log-to-linear lookup table (saves time and on the fly CPU time). It however, seems to be broken. It produces nothing but 65535 once I have clamped it, whereas it should be a range of values from 0 to 65535. I have debugged the HECK out of this thing, and finding nothing, am looking for a fresh set of eyes. If any of you geniuses out there are fluent in SSE2 assembler, check it out, and tell me what I'm doing wrong. (code follows)
__declspec(align(16)) unsigned int	dcAntiLogScaleLookUpTableUShort[65536];
//zNear and zFar are the clipping planes in model space (not z-buffer space).

int _dcBuildAntiLogScaleLookUpTable(double zNear, double zFar, bool bufferCorrection)
{
	__declspec(align(16)) double ushrtRange[2]		= {65535.0f, 65535.0f};
	__declspec(align(16)) double ushrtFloor[2]		= {0.0f, 0.0f};
	__declspec(align(16)) double countBy[2]			= {2.0f, 2.0f};
	__declspec(align(16)) double increment[2]		= {0.0f,1.0f};
	__declspec(align(16)) double nearZ[2]			= {zNear, zNear};
	__declspec(align(16)) double farZ[2]			= {zFar, zFar};
	__declspec(align(16)) double zNearTimesZFar[2]	= {zNear * zFar, zNear * zFar};
	__declspec(align(16)) double zFarMinusZNear[2]	= {zFar - zNear, zFar - zNear};
	__declspec(align(16)) double zNearMinusZFar[2]	= {-zFar + zNear, -zFar + zNear};

	__asm
	{
		mov		ecx,		65536			;	counter	set to 65536
		shr		ecx,		1				;	div by 2 XMM==2 doubles
		mov		eax,		0				;	pointer offset
		movapd	XMM2,		[increment]		;	
		loop_main:							;
			movapd	XMM0,	XMM2			;	preserve the increment
			movapd	XMM1,	[zNearTimesZFar];	load zNearTimesZFar
			;--------------------------------
			divpd	XMM0,	[ushrtRange]	;	x = i / 65535
			mulpd	XMM0,	[zNearMinusZFar];	x = zNearMinusZFar * x
			addpd	XMM0,	[farZ]			;	x+= zFar
			divpd	XMM0,	[zNearTimesZFar];	x = zNearTimesZFar / x
			subpd	XMM1,	[nearZ]			;	x -= zNear
			divpd	XMM1,	[zFarMinusZNear];	x /= zFarMinusZNear
			mulpd	XMM1,	[ushrtRange]	;	x *= 65535
			minpd	XMM1,	[ushrtRange]	;
			maxpd	XMM1,	[floorAll]		;
			cvttpd2dq	XMM0,	XMM1		;	convert to unsigned ints
			movdq2q		MM0,	XMM0		;	move them to an MM register
			movq [dcAntiLogScaleLookUpTableUShort + eax], MM0;	move to LUT
			addps	XMM2,	[countBy]		;
			add		eax,	8				;	increment pointer offset
			dec		ecx						;
		jnz loop_main						;
		emms;
	};

	return 0;
}
[edited by - Ren Aissanz on June 7, 2004 4:52:16 PM]

Share this post


Link to post
Share on other sites
Advertisement

  • Advertisement
×

Important Information

By using GameDev.net, you agree to our community Guidelines, Terms of Use, and Privacy Policy.

We are the game development community.

Whether you are an indie, hobbyist, AAA developer, or just trying to learn, GameDev.net is the place for you to learn, share, and connect with the games industry. Learn more About Us or sign up!

Sign me up!