I was interested in seeing what the performance different between C++, C# and ASM with a C++ wrapper would be. So far the results have surprised me since C# is the fastest, this must be because my C++/ASM isn't good enough so maybe you guys can point out where I'm going wrong.
The program I'm testing with is quite simple all it does is:
a) get a number from the user
b) calculate all the primes between 1 and the number
c) output the number of primes found and the time taken to find them.
All 3 programs use the same algorithm:
if x is 1, 2, or 3 return true
if x devided by 2 has a remainder of zero return false
otherwise devide by all odd numbers starting with 3 until the square root of x is reached or the remainder is zero (indicating a composite number)
I compiled all of the programs with full optimizations and ran them on my computer (AthlonXP 2800+ with 1GB of RAM). The C++ version and my ASM version both took 19.5-20secs to find all the primes between 1 and 10,000,000.
The C# version took 20-20.5secs when I was using ints, but after I changed it to use doubles to reduce the ammount of type conversions, it does it in 16-17secs!
When I looked at the disassembly for the C# version I found that it's using almost all floating point processer instructions, but then I calls some functions that I couldn't step into (well I'm sure I could if I spent some time figuring out how).
Here's a short part of the disassembly. I haven't used floating point instructions much and I don't really understand what it's doing in the beginning there. It looks to me like it's just moving data through the ALU by pushing and then poping it somewhere else.
if (TestNumber % x == 0)
0000008c fld qword ptr [ebp+8]
0000008f sub esp,8
00000092 fstp qword ptr [esp]
00000095 fld qword ptr [esp+10h]
00000099 sub esp,8
0000009c fstp qword ptr [esp]
0000009f call 79235954
000000a4 fldz
000000a6 fcomip st,st(1)
000000a8 fstp st(0)
000000aa jp 000000B9
000000ac jne 000000B9
Here's the source of my C# version
using System;
using System.Collections.Generic;
using System.Text;
namespace Csharp_Test_console_application
{
class Program
{
static void Main(string[] args)
{
double x;
int PrimesFound = 0;
DateTime start, end;
Console.WriteLine("Find primes up to 'x'. Please enter x:");
x = Convert.ToDouble(Console.ReadLine());
start = System.DateTime.Now;
for (double y = 1; y <= x; y++)
{
if (IsPrime(y))
{
PrimesFound++;
}
}
end = System.DateTime.Now;
Console.WriteLine("Found {0} primes in {1}", PrimesFound, end - start);
Console.Read();
}
static bool IsPrime(double TestNumber)
{
double limit = System.Math.Sqrt(TestNumber) + 1;
if (TestNumber == 2)
return true;
if(TestNumber % 2 == 0) //Check if TestNumber is even
return false;
double x = 3;
//Devide until the square root is reached
for (; x < limit; x += 2)
{
if (TestNumber % x == 0)
return false;
}
return true;
}
}
}
Source of my C++ version
// Cpp primes.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
using std::cin;
using std::cout;
bool IsPrime(int TestNumber);
int _tmain(int argc, _TCHAR* argv[])
{
int x, PrimesFound = 0;
DWORD start, end;
cout << "Find primes up to 'x'. Please enter x:";
cin >> x;
start = GetTickCount();
for (int y = 1; y <= x; y++)
{
if (IsPrime(y))
{
PrimesFound++;
}
}
end = GetTickCount();
cout << "Found " << PrimesFound << " primes in " << end - start;
while(!_kbhit());
return 0;
}
_inline bool IsPrime(int TestNumber)
{
int limit = (int)sqrt((double)TestNumber) + 1;
if (TestNumber == 2)
return true;
if(TestNumber % 2 == 0) //Check if TestNumber is even
return false;
int x = 3;
//Devide until the square root is reached
for (; x < limit; x += 2)
{
if (TestNumber % x == 0)
return false;
}
return true;
}
Assembly with C++ for the input and output
// asm primes.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
using std::cout;
using std::cin;
int _tmain(int argc, _TCHAR* argv[])
{
int x, PrimesFound = 0;
DWORD start, end;
cout << "Find primes up to 'x'. Please enter x:";
cin >> x;
start = GetTickCount();
int limit, temp;
_asm
{
pusha //Save registers
mov esi, 1 //Start checking numbers from 1
mov edi, x //Loop until X is reached
Forloop: cmp esi, edi
ja Done //Exit if X has been reached
//Check if this number is one, two, or three
cmp esi, 3
ja Next
inc PrimesFound
jmp CheckDone
//Check if divisible by two
Next: test esi, 1 //If ESI is odd it's not divisible by 2
jnz Next2 // so continuing checking for primality
jmp CheckDone
Next2: mov temp, esi
fild temp //Convert
fsqrt // to square
fistp limit // root
mov ebx, 3 //Set EBX to 3
mov ecx, limit //Set up the loop counter
shr ecx, 1 // devide by 2 since we'll be incrementing by 2
Check: mov eax, esi //Restore test number
xor edx, edx // and clear remainder
div ebx //Divide and
or edx, edx // test for
jz CheckDone // zero
inc ebx //increment divisor
inc ebx // ""
loop Check //Continue until the square root is reached
//Number is prime
inc PrimesFound
CheckDone: inc esi //Increment ESI so the next number will be checked
jmp Forloop // for primality
Done: popa //Restore registers
}
end = GetTickCount();
cout << "Found " << PrimesFound << " primes in " << end - start;
while(!_kbhit());
return 0;
}
stdafx.h for C++ versions
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#include <stdio.h>
#include <tchar.h>
#include <math.h>
#include <windows.h>
#include <conio.h>
#include <iostream>
// TODO: reference additional headers your program requires here