Sign in to follow this  

Extreme DXT Compression paper

This topic is 1993 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

Has anybody gotten the code in the Extreme DXT Compression paper to work?
[url="http://www.cauldron.sk/files/extreme_dxt_compression.pdf"]http://www.cauldron.sk/files/extreme_dxt_compression.pdf[/url]

There are no explicit requirements concerning the alignment of the input arrays. I've tried both default alignment and aligning them to 16 bit boundaries, however the code crashes when it gets to the last line I listed below. I've also defined USE_SSE3 and FIX_DXT1_BUG.

[CODE]
void CompressImageDXT1(const BYTE* argb, BYTE* dxt1, int width, int height) {
int x_count;
int y_count;
__asm {
mov esi, DWORD PTR argb // src
mov edi, DWORD PTR dxt1 // dst
mov eax, DWORD PTR height
mov DWORD PTR y_count, eax
y_loop:
mov eax, DWORD PTR width
mov DWORD PTR x_count, eax
x_loop:
mov eax, DWORD PTR width // width * 1
lea ebx, DWORD PTR [eax + eax*2] // width * 3
movdqa xmm0, XMMWORD PTR [esi + 0] // src + width * 0 + 0
movdqa xmm3, XMMWORD PTR [esi + eax*4 + 0] // src + width * 4 + 0
movdqa xmm1, xmm0
pmaxub xmm0, xmm3
pmaxub xmm0, XMMWORD PTR [esi + eax*8 + 0] // src + width * 8 + 0
pmaxub xmm0, XMMWORD PTR [esi + ebx*4 + 0] // src + width * 12 + 0
pminub xmm1, xmm3
pminub xmm1, XMMWORD PTR [esi + eax*8 + 0] // src + width * 8 + 0
pminub xmm1, XMMWORD PTR [esi + ebx*4 + 0] // src + width * 12 + 0
pshufd xmm2, xmm0, 0x4E
pshufd xmm3, xmm1, 0x4E
pmaxub xmm0, xmm2
pminub xmm1, xmm3
pshufd xmm2, xmm0, 0xB1
pshufd xmm3, xmm1, 0xB1
pmaxub xmm0, xmm2
pminub xmm1, xmm3
movdqa xmm4, XMMWORD PTR [esi + 16] // src + width * 0 + 16
movdqa xmm7, XMMWORD PTR [esi + eax*4 + 16] // src + width * 4 + 16
movdqa xmm5, xmm4
pmaxub xmm4, xmm7
pmaxub xmm4, XMMWORD PTR [esi + eax*8 + 16] // src + width * 8 + 16
pmaxub xmm4, XMMWORD PTR [esi + ebx*4 + 16] // src + width * 12 + 16
pminub xmm5, xmm7
pminub xmm5, XMMWORD PTR [esi + eax*8 + 16] // src + width * 8 + 16
pminub xmm5, XMMWORD PTR [esi + ebx*4 + 16] // src + width * 12 + 16
pshufd xmm6, xmm4, 0x4E
pshufd xmm7, xmm5, 0x4E
pmaxub xmm4, xmm6
pminub xmm5, xmm7
pshufd xmm6, xmm4, 0xB1
pshufd xmm7, xmm5, 0xB1
pmaxub xmm4, xmm6
pminub xmm5, xmm7
movdqa XMMWORD PTR sse2_minimum[ 0], xmm1
movdqa XMMWORD PTR sse2_minimum[16], xmm5
movdqa xmm7, XMMWORD PTR SSE2_BYTE_0
punpcklbw xmm0, xmm7
punpcklbw xmm4, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm5, xmm7
movdqa xmm2, xmm0
movdqa xmm6, xmm4
psubw xmm2, xmm1
psubw xmm6, xmm5
movq MMWORD PTR sse2_range[ 0], xmm2
movq MMWORD PTR sse2_range[16], xmm6
psrlw xmm2, 4
psrlw xmm6, 4
psubw xmm0, xmm2
psubw xmm4, xmm6
paddw xmm1, xmm2
paddw xmm5, xmm6
punpcklwd xmm0, xmm1
pmullw xmm0, XMMWORD PTR SSE2_BOUNDS_SCALE
pand xmm0, XMMWORD PTR SSE2_BOUNDS_MASK
movdqa XMMWORD PTR sse2_bounds[ 0], xmm0
punpcklwd xmm4, xmm5
pmullw xmm4, XMMWORD PTR SSE2_BOUNDS_SCALE
pand xmm4, XMMWORD PTR SSE2_BOUNDS_MASK
movdqa XMMWORD PTR sse2_bounds[16], xmm4
movzx ecx, WORD PTR sse2_range [ 0]
movzx edx, WORD PTR sse2_range [16]
mov eax, DWORD PTR sse2_bounds[ 0]
mov ebx, DWORD PTR sse2_bounds[16]
shr eax, 8
shr ebx, 8
or eax, DWORD PTR sse2_bounds[ 4]
or ebx, DWORD PTR sse2_bounds[20]
or eax, DWORD PTR sse2_bounds[ 8]
or ebx, DWORD PTR sse2_bounds[24]
mov DWORD PTR [edi + 0], eax
mov DWORD PTR [edi + 8], ebx
[/CODE]

Share this post


Link to post
Share on other sites

This topic is 1993 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this