#### Archived

This topic is now archived and is closed to further replies.

# Optimizations

This topic is 6552 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

## Recommended Posts

I have designed this function to alpha blend using lookup tables, but it isn't fast enough. I have found that the slowest part is the actual plotting of the pixel. Is there any way to speed this up, maybe using assembly? The code to plot the pixel on a 640x480 16bit surface:
#define _RGB16BIT565(r,g,b) (b / (g << 6) / (r << 11))

...

dvidbuffer[index_x + dx + l_Pitch[index_y + dy]] = _RGB16BIT565(fred,fgreen,fblue);

where dvidbuffer is a video buffer in video memory, and l_Pitch is a lookup table containing all possible values for y*lPitch. Thanks in advance. Edited by - ziplux on 2/13/00 9:07:16 AM

##### Share on other sites
I have found a code somewhere about alphablending . Maybe it helps you out. it only changes colors a bit, and mix them a bit.
here is the code , it calculates 2 pixels at a time:

// water alpha blending

DWORD PLUS64 = 64 / (64 << 16);
DWORD ALPHABY4 = (ALPHA / 4) / ((ALPHA / 4) << 16;
DWORD doubleColorKey = sColorKey / (sColorKey << 16);

...

//is the sprite with odd or even ( calculation )

if ( width % 2 == 1 )
{
oddWidth = TRUE;
width = (width - 1) / 2; //div by 2, 2 pixels at a time
}
else
{
width = width / 2; //div by 2, 2 pixels at a time

}

i = height;
do
{
if ( oddWidth )
{
sTemp = *((word*)lpSprite);

if ( sTemp != sColorkey )
{
dTemp = *((word*)lpDest);

sb = sTemp & 0x1f;
db = dTemp & 0x1f;
sg = (sTemp >> 5) & 0x3F;
dg = (dTemp >> 5) & 0x3F;
sr = (sTemp >> 11) & 0x1F;
dr = (dTemp >> 11) & 0x1F;

*((word*)lpDest) = (DWORD)((ALPHA * (sb - db)
>> 8) + db /
((ALPHA * (sg - dg) >> 8) + dg) << 5 /
((ALPHA * (sr - dr) >> 8) + dr) <<
11);
}

lpDest += 2;
lpSprite += 2;
}
j = width
do
{
sTemp = *((DWORD*)lpSprite);

if ( sTemp != doubleColorKey )
{
dTemp = *((DWORD*)lpDest);

sb = sTemp & 0x001f001f;
db = dTemp & 0x001f001f;
sg = (sTemp >> 5) & 0x003F003F;
dg = (dTemp >> 5) & 0x003F003F;
sr = (sTemp >> 11) & 0x001F001F;
dr = (dTemp >> 11) & 0x001F001F;

BLUEC = ((((ALPHA * ((sb + PLUS64) - db)) >>
8) + db) - ALPHABY4) & 0x001F001F;
GREENC = (((((ALPHA * ((sg + PLUS64) - dg))
>> 8) + dg) - ALPHABY4) & 0x003F003F) << 5;
REDC = (((((ALPHA * ((sr + PLUS64) - dr))
>> 8) + dr) - ALPHABY4) & 0x001F001F) << 1;

Result = BLUEC / GREENC / REDC;

if ( (sTemp >> 16) == sColorKey )
Result = (Result & 0xFFFF) /
(dTemp & 0xFFFF0000);
else if ( (sTemp & 0xFFFF) == sColorkey )
Result = (Result &
0xFFFF0000) / (dTemp & 0xFFFF);

*((DWORD*)lpDest) = Result;
}
lpDest += 4;
lpSprite += 4;

}while ( --j > 0 );
lpDest += dbuf;
lpSprite += sbuf;

}while ( --i > 0);

##### Share on other sites
Maybe you should try to do all the processing on a system memory surface, then blitting it to you video memory surface. accessing video memory directly is very slow.

##### Share on other sites
Exactly what Alex said. Accessing video memory randomly is VERY slow, so u''re better off doing all the stuff in system memory, and then blitting the whole thing to video memory at once.

##### Share on other sites
In addition to working on a surface in system memory, you should consider, instead of reindexing the surface for every pixel, acquiring a pointer to the beginning of the line that you''re working on, and incrementing the pointer across the line. That should save you a few additions and a dereference per pixel plotted.