Ok, I'm not a an MMX programmer.... (LONG)
here''s my problem:
I''ve been using an MMX alpha blend blitter routine written in MMX by some-else for a while now. But, what I have found out is that it doesn''t handle surface widths which are not multiple of 4 properly. Here is the code it uses:
i = height;
do
{
j = width/4;
// If you see two lines together this means that I am guessing that these two
// instructions will pair in the Pentium U & V pipes.
_asm
{
push edi ;Save off to restore later
push esi ;Save off to restore later
mov edi,lpDest ; Move the destination pointer into edi
mov esi,lpSprite ; Move the source pointer into esi
SPAN_RUN_565: movq mm7,[edi] ; Copy the 8 bytes pointed to by edi into mm7
movq mm6,[esi] ; Copy the 8 bytes pointed to by esi into mm6
movq mm2,ALPHA64 ; Copy ALPHA64 into mm2
movq mm0,mm7 ; RED - Copy mm7 to mm0, destination pixels
pand mm0,MASKRED ; RED - and mm0 with the red channel mask [0r00 0r00 0r00 0r00]
movq mm1,mm6 ; RED - Copy mm6 to mm1, source pixels
pand mm1,MASKRED ; RED - and mm1 with red channel mask [0r00 0r00 0r00 0r00]
psrlw mm0,11 ; RED - shift each pixel to the right by 11 [000r 000r 000r 000r]
movq mm5,mm7 ; GRN - Copy destination pixels (mm7) to mm5
psrlw mm1,11 ; RED - shift each pixel to the right by 11 [000r 000r 000r 000r]
paddw mm1, ADD64
movq mm3,mm6 ; GRN - Copy source pixels (mm6) to mm3
psubsw mm1,mm0 ; RED - Subtract the destination from the source
pand mm5,MASKGREEN ; GRN - and mm5 with the green channel mask [00g0 00g0 00g0 00g0]
pmullw mm1,mm2 ; RED - multiple the subtraction result by the ALPHA value
pand mm3,MASKGREEN ; GRN - and mm3 with the green channel mask [00g0 00g0 00g0 00g0]
psrlw mm5,5 ; GRN - shift each pixel to the right by 5 [000g 000g 000g 000g]
psrlw mm3,5 ; GRN - Shift green source to the right by 5
nop ; Do nothing
paddw mm3, ADD64
psrlw mm1,8 ; RED - Divide by 256 this is done to avoid floating point math
psubsw mm3,mm5 ; GRN - Subtract the green source and destination components
pmullw mm3,mm2 ; GRN - Multiple the result from the subtraction by the alpha value
paddw mm1,mm0 ; RED - Add the destination red component value
psubw mm1, ALPHABY4
psllw mm1,11 ; RED - Shift the red component back into place, 11 to the left
movq mm0,mm7 ; BLU - Copy the destination pixels to mm0
pand mm0, MASKBLUE ; BLU - ''and'' mm0 with the blue channel mask [000b 000b 000b 000b]
psrlw mm3,8 ; GRN - Divide by 256 this is done to avoid floating point math
paddw mm3,mm5 ; GRN - Add the destination green component value
movq mm4, mm6 ; BLU - Copy the source into mm4
psubw mm3, ALPHABY4
pand mm4, MASKBLUE ; BLU - ''and'' mm0 with the blue channel mask [000b 000b 000b 000b]
psllw mm3,5 ; GRN - Shift the green component back into place, 5 to the left
paddw mm4, ADD64
psubsw mm4,mm0 ; BLU - Subtract the blue source and destination components
por mm1,mm3 ; GRN - ''or'' the red and green components results back together
pmullw mm4,mm2 ; BLU - Multiple the result from the subtraction by the alpha value
movq mm3,COLORKEY64 ; Load COLORKEY64 into mm4
psrlw mm4,8 ; BLU - Divide by 256 this is done to avoid floating point math
pcmpeqw mm3,mm6 ; Compare colorKey to original source
paddw mm4,mm0 ; BLU - Add the destination blue component value
movq mm5,mm3 ; Copy mm3 to mm5
psubw mm4, ALPHABY4
por mm1,mm4 ; BLU - ''or'' the blue result to the red and green results
pand mm5,mm7 ; ''and'' mm5 and the original destination pixels
pandn mm3,mm1 ; not''s mm3 then performs and "AND" with mm1
por mm3,mm5 ; And mm3 and mm5
movq [edi],mm3 ; Copy the 4 alpha blended pixels to the destination
add edi,8 ; Add 8 bytes to the destination pointer
mov eax,dword ptr [j] ; Load the value of j into eax
add esi,8 ; Add 8 bytes to the source pointer
sub eax,1 ; subtract 1 from eax (j--)
mov dword ptr [lpDest], edi ; Save the new destination pointer value
mov dword ptr [j],eax ; save the new value back to j
mov dword ptr [lpSprite], esi ; Save the new source pointer value
cmp eax,0 ; If eax = 0 we have set the flag
jg SPAN_RUN_565 ; if flag is zero finish else loop back for more
emms ; Clean up the MMX registers
pop esi ; Restore esi
pop edi ; Restore edi
}
lpDest += dbuf;
lpSprite += sbuf;
}while (--i > 0);
okay so I need a clean up routine after every line which will handle the 1, 2, or 3 pixels left over buy the move 4 at a time functions. something like this:
if (j*4 != width)
{
handle the pixels here
}
can anybody give me a hand in filling in the blank?
I am the author of that code. I''m surprised I did not code for that case. My bad. If you look in the source file where you got that code from you find something like:
if (oddWidth)
{
sTemp = *((WORD*)lpSprite);
if (sTemp != ColorKey)
{
dTemp = *((WORD*)lpDest);
sb = sTemp & 0x1f;
db = dTemp & 0x1f;
sg = (sTemp >> 5) & 0x3f;
dg = (dTemp >> 5) & 0x3f;
sr = (sTemp >> 11) & 0x1f;
dr = (dTemp >> 11) & 0x1f;
*((WORD*)lpDest) = (WORD)((ALPHA * (sb - db) >> 8) + db /
((ALPHA * (sg - dg) >> 8) + dg) << 5 /
((ALPHA * (sr - dr) >> 8) + dr) << 11);
}
lpDest += 2;
lpSprite += 2;
}
This is not 100% what you need but you should be able to figure it out from there.
John
if (oddWidth)
{
sTemp = *((WORD*)lpSprite);
if (sTemp != ColorKey)
{
dTemp = *((WORD*)lpDest);
sb = sTemp & 0x1f;
db = dTemp & 0x1f;
sg = (sTemp >> 5) & 0x3f;
dg = (dTemp >> 5) & 0x3f;
sr = (sTemp >> 11) & 0x1f;
dr = (dTemp >> 11) & 0x1f;
*((WORD*)lpDest) = (WORD)((ALPHA * (sb - db) >> 8) + db /
((ALPHA * (sg - dg) >> 8) + dg) << 5 /
((ALPHA * (sr - dr) >> 8) + dr) << 11);
}
lpDest += 2;
lpSprite += 2;
}
This is not 100% what you need but you should be able to figure it out from there.
John
This topic is closed to new replies.
Advertisement
Popular Topics
Advertisement