• Advertisement

Archived

This topic is now archived and is closed to further replies.

custom memset faster than Microsoft's

This topic is 5883 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

I wrote these a while ago and my benchmarks against microsofts came out faster. (Using clocks) if anyone has a profiler I would like to see the results you get...heres the code:
    
void memcpy32 (void* _dest, void* _source, unsigned int _length);
void memcpy16 (void* _dest, void* _source, unsigned int _length);
void memcpy8  (void* _dest, void* _source, unsigned int _length);

void memset32 (void* _memory, unsigned int _source, unsigned int _dwords);
void memset16 (void* _memory, unsigned int _source, unsigned int _words);
void memset8  (void* _memory, unsigned int _source, unsigned int _bytes);

void __declspec(naked)memcpy32 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY32_DONE

MEMCPY32_CPYNEXT:
		mov eax, [esi]
		mov edx, [esi+4]
		mov [ebx], eax
		sub ecx, 2
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8

		cmp ecx, 2
		jz short MEMCPY32_DONE
		jb short MEMCPY32_LAST
		jmp short MEMCPY32_CPYNEXT

MEMCPY32_LAST:
		mov eax, [esi]
		mov [ebx], eax

MEMCPY32_DONE:
		ret
	}
}

void __declspec(naked)memcpy16 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY16_DONE

MEMCPY16_CPYNEXT:
		cmp ecx, 4
		jz short MEMCPY16_DONE
		jb short MEMCPY16_LAST

		mov eax, [esi]
		mov edx, [esi+4]
		sub ecx, 4
		mov [ebx], eax
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8
		jmp short MEMCPY16_CPYNEXT

MEMCPY16_LAST:
		mov ax, [esi]
		mov [ebx], ax
		dec ecx
		jz short MEMCPY16_DONE

		mov ax, [esi+2]
		mov [ebx+2], ax
		dec ecx
		jz short MEMCPY16_DONE

		mov ax, [esi+4]
		mov [ebx+4], ax

MEMCPY16_DONE:
		ret
	}
}

void __declspec(naked)memcpy8 (void* _dest, void* _source, unsigned int _length)
{
	__asm
	{
		mov ebx, [esp+4]
		mov esi, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz short MEMCPY8_DONE

MEMCPY8_CPYNEXT:
		cmp ecx, 8
		jz short MEMCPY8_DONE
		jb short MEMCPY8_LAST

		mov eax, [esi]
		mov edx, [esi+4]
		sub ecx, 8
		mov [ebx], eax
		mov [ebx+4], edx

		add esi, 8
		add ebx, 8
		jmp short MEMCPY8_CPYNEXT

MEMCPY8_LAST:
		mov al, [esi]
		mov [ebx], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+1]
		mov [ebx+1], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+2]
		mov [ebx+2], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+3]
		mov [ebx+3], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+4]
		mov [ebx+4], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+5]
		mov [ebx+5], al
		dec ecx
		jz short MEMCPY8_DONE

		mov al, [esi+6]
		mov [ebx+6], al

MEMCPY8_DONE:
		ret
	}
}

void __declspec(naked)memset32 (void* _memory, unsigned int _source, unsigned int _dwords)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]

		test ecx, ecx
		jz MEMSET32_DONE

MEMSET32_SETNEXT:
		mov [ebx], eax

		dec ecx
		jz MEMSET32_DONE

		add ebx, 4
		jmp short MEMSET32_SETNEXT

MEMSET32_DONE:
		ret
	}
}

void __declspec(naked)memset16 (void* _memory, unsigned int _source, unsigned int _words)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]
		
		mov dx, ax
		shl edx, 16
		mov dx, ax

		test ecx, ecx
		jz MEMSET16_DONE

MEMSET16_SETNEXT:
		cmp ecx, 2
		jb short MEMSET16_LAST

		mov [ebx], edx

		sub ecx, 2

		add ebx, 4
		jmp short MEMSET16_SETNEXT

MEMSET16_LAST:
		test ecx, ecx
		jz short MEMSET16_DONE

		mov [ebx], ax

MEMSET16_DONE:
		ret
	}
}

void __declspec(naked)memset8 (void* _memory, unsigned int _source, unsigned int _bytes)
{
	__asm
	{
		mov ebx, [esp+4]
		mov eax, [esp+8]
		mov ecx, [esp+12]
		
		mov dl, al
		shl edx, 8
		mov dl, al
		shl edx, 8
		mov dl, al
		shl edx, 8
		mov dl, al

		test ecx, ecx
		jz MEMSET8_DONE

MEMSET8_SETNEXT:
		cmp ecx, 4
		jb short MEMSET8_LAST

		mov [ebx], edx

		sub ecx, 4

		add ebx, 4
		jmp short MEMSET8_SETNEXT

MEMSET8_LAST:
		test ecx, ecx
		jz short MEMSET8_DONE

		mov [ebx], al
		dec ecx
		jz short MEMSET8_DONE

		mov [ebx+1], al
		dec ecx
		jz short MEMSET8_DONE

		mov [ebx+2], al


MEMSET8_DONE:
		ret
	}
}

    
Edited by - Assembler015 on January 13, 2002 8:13:00 PM Edited by - Assembler015 on January 13, 2002 8:13:43 PM

Share this post


Link to post
Share on other sites
Advertisement

  • Advertisement