Archived

This topic is now archived and is closed to further replies.

Assembler015

custom memset faster than Microsoft's

Recommended Posts

I wrote these a while ago and my benchmarks against microsofts came out faster. (Using clocks) if anyone has a profiler I would like to see the results you get...heres the code:
    
void memcpy32 (void* _dest, void* _source, unsigned int _length);
void memcpy16 (void* _dest, void* _source, unsigned int _length);
void memcpy8  (void* _dest, void* _source, unsigned int _length);

void memset32 (void* _memory, unsigned int _source, unsigned int _dwords);
void memset16 (void* _memory, unsigned int _source, unsigned int _words);
void memset8  (void* _memory, unsigned int _source, unsigned int _bytes);

void __declspec(naked)memcpy32 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY32_DONE

MEMCPY32_CPYNEXT:
		mov eax, [esi]
		mov edx, [esi+4]
		mov [ebx], eax
		sub ecx, 2
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8

		cmp ecx, 2
		jz short MEMCPY32_DONE
		jb short MEMCPY32_LAST
		jmp short MEMCPY32_CPYNEXT

MEMCPY32_LAST:
		mov eax, [esi]
		mov [ebx], eax

MEMCPY32_DONE:
		ret
	}
}

void __declspec(naked)memcpy16 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY16_DONE

MEMCPY16_CPYNEXT:
		cmp ecx, 4
		jz short MEMCPY16_DONE
		jb short MEMCPY16_LAST

		mov eax, [esi]
		mov edx, [esi+4]
		sub ecx, 4
		mov [ebx], eax
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8
		jmp short MEMCPY16_CPYNEXT

MEMCPY16_LAST:
		mov ax, [esi]
		mov [ebx], ax
		dec ecx
		jz short MEMCPY16_DONE

		mov ax, [esi+2]
		mov [ebx+2], ax
		dec ecx
		jz short MEMCPY16_DONE

		mov ax, [esi+4]
		mov [ebx+4], ax

MEMCPY16_DONE:
		ret
	}
}

void __declspec(naked)memcpy8 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY8_DONE

MEMCPY8_CPYNEXT:
		cmp ecx, 8
		jz short MEMCPY8_DONE
		jb short MEMCPY8_LAST

		mov eax, [esi]
		mov edx, [esi+4]
		sub ecx, 8
		mov [ebx], eax
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8
		jmp short MEMCPY8_CPYNEXT

MEMCPY8_LAST:
		mov al, [esi]
		mov [ebx], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+1]
		mov [ebx+1], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+2]
		mov [ebx+2], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+3]
		mov [ebx+3], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+4]
		mov [ebx+4], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+5]
		mov [ebx+5], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+6]
		mov [ebx+6], al

MEMCPY8_DONE:
		ret
	}
}

void __declspec(naked)memset32 (void* _memory, unsigned int _source, unsigned int _dwords)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz MEMSET32_DONE

MEMSET32_SETNEXT:
		mov [ebx], eax

		dec ecx
		jz MEMSET32_DONE

		add ebx, 4
		jmp short MEMSET32_SETNEXT

MEMSET32_DONE:
		ret
	}
}

void __declspec(naked)memset16 (void* _memory, unsigned int _source, unsigned int _words)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]
		
		mov dx, ax
		shl edx, 16
		mov dx, ax

		test ecx, ecx
		jz MEMSET16_DONE

MEMSET16_SETNEXT:
		cmp ecx, 2
		jb short MEMSET16_LAST

		mov [ebx], edx

		sub ecx, 2

		add ebx, 4
		jmp short MEMSET16_SETNEXT

MEMSET16_LAST:
		test ecx, ecx
		jz short MEMSET16_DONE

		mov [ebx], ax

MEMSET16_DONE:
		ret
	}
}

void __declspec(naked)memset8 (void* _memory, unsigned int _source, unsigned int _bytes)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]
		
		mov dl, al
		shl edx, 8
		mov dl, al
		shl edx, 8
		mov dl, al
		shl edx, 8
		mov dl, al

		test ecx, ecx
		jz MEMSET8_DONE

MEMSET8_SETNEXT:
		cmp ecx, 4
		jb short MEMSET8_LAST

		mov [ebx], edx

		sub ecx, 4

		add ebx, 4
		jmp short MEMSET8_SETNEXT

MEMSET8_LAST:
		test ecx, ecx
		jz short MEMSET8_DONE

		mov [ebx], al
		dec ecx
		jz short MEMSET8_DONE

		mov [ebx+1], al
		dec ecx
		jz short MEMSET8_DONE

		mov [ebx+2], al


MEMSET8_DONE:
		ret
	}
}

    
Edited by - Assembler015 on January 13, 2002 8:13:00 PM Edited by - Assembler015 on January 13, 2002 8:13:43 PM

Share this post


Link to post
Share on other sites