my c++ d c# benchmark!

Started by
70 comments, last by Raghar 17 years, 6 months ago
Today I saw the D's pi computing sample (\dmd\samples\d\pi.d) and thought that porting it to c++ and c# for making a little benchmark should be easy: d:
import std.c.stdio;
import std.c.stdlib;
import std.c.time;

const int LONG_TIME=4000;

byte[] p;
byte[] t;
int q;

int main(char[][] args)
{
	int startime, endtime;
	int i;

	if (args.length == 2) {
		sscanf(&args[1][0],"%d",&q);
	} else {
		printf("Usage: pi [precision]\n");
		exit(55);
	}

	if (q < 0)
	{
		printf("Precision was too low, running with precision of 0.\n");
		q = 0;
	}

	if (q > LONG_TIME)
	{
	    printf("Be prepared to wait a while...\n");
	}

	// Compute one more digit than we display to compensate for rounding
	q++;

	p.length = q + 1;
	t.length = q + 1;

	/* compute pi */

	std.c.time.time(&startime);
	arctan(2);
	arctan(3);
	mul4();
	std.c.time.time(&endtime);

	// Return to the number of digits we want to display
	q--;

	/* print pi */

	printf("pi = %d.",cast(int)(p[0]));
	for (i = 1; i <= q; i++)
	printf("%d",cast(int)(p));
	printf("\n");
	printf("%ld seconds to compute pi with a precision of %d digits.\n",endtime-startime,q);

	return 0;
}

void arctan(int s)
{
	int n;

	t[0] = 1;
	div(s); /* t[] = 1/s */
	add();
	n = 1;
	do {
		mul(n);
		div(s * s);
		div(n += 2);
		if (((n-1) / 2) % 2 == 0)
			add();
		else
			sub();
	} while (!tiszero());
}

void add()
{
	int j;

	for (j = q; j >= 0; j--)
	{
		if (t[j] + p[j] > 9) {
			p[j] += t[j] - 10;
			p[j-1] += 1;
		} else
			p[j] += t[j];
	}
}

void sub()
{
	int j;

	for (j = q; j >= 0; j--)
		if (p[j] < t[j]) {
			p[j] -= t[j] - 10;
			p[j-1] -= 1;
		} else
			p[j] -= t[j];
}

void mul(int multiplier)
{
	int b;
	int i;
	int carry = 0, digit = 0;

	for (i = q; i >= 0; i--) {
		b = (t * multiplier + carry);
		digit = b % 10;
		carry = b / 10;
		t = digit;
	}
}

/* t[] /= l */

void div(int divisor)
{
	int i, b;
	int quotient, remainder = 0;

	for (i = 0; i <= q; i++) {
		b = (10 * remainder + t);
		quotient = b / divisor;
		remainder = b % divisor;
		t = quotient;
	}
}

void div4()
{
	int i, c, d = 0;

	for (i = 0; i <= q; i++) {
		c = (10 * d + p) / 4;
		d = (10 * d + p) % 4;
		p = c;
	}
}

void mul4()
{
	int i, c, d;

	d = c = 0;

	for (i = q; i >= 0; i--) {
		d = (p * 4 + c) % 10;
		c = (p * 4 + c) / 10;
		p = d;
	}
}

int tiszero()
{
	int k;

	for (k = 0; k <= q; k++)
		if (t[k] != 0)
			return false;
	return true;
}

(I compiled pi.d with dmd -O -release pi.d) c++:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define LONG_TIME 4000

__int8* p;
__int8* t;
int q;


void arctan(int s);
void add();
void sub();
void mul(int);
void div(int);
void mul4();
void div4();
int tiszero();

int main(int argv,char** args)
{
	time_t startime, endtime;
	int i;

	if (argv == 2) {
		sscanf(&args[1][0],"%d",&q);
	} else {
		printf("Usage: pi [precision]\n");
		exit(55);
	}

	if (q < 0)
	{
		printf("Precision was too low, running with precision of 0.\n");
		q = 0;
	}

	if (q > LONG_TIME)
	{
	    printf("Be prepared to wait a while...\n");
	}

	// Compute one more digit than we display to compensate for rounding
	q++;

	p = new __int8[ q + 1 ];
	t = new __int8[ q + 1 ];
	for(int tt=0;tt<=q;tt++) 
	{
		p=0;
		t=0;
	}
	/* compute pi */

	time(&startime);
	arctan(2);
	arctan(3);
	mul4();
	time(&endtime);

	// Return to the number of digits we want to display
	q--;

	/* print pi */

	printf("pi = %d.",(int)(p[0]));
	for (i = 1; i <= q; i++)
	printf("%d",(int)(p));
	printf("\n");
	printf("%ld seconds to compute pi with a precision of %d digits.\n",(long)endtime-(long)startime,q);

	delete [] p;
	delete [] t;
	return 0;
}

void arctan(int s)
{
	int n;

	t[0] = 1;
	div(s); /* t[] = 1/s */
	add();
	n = 1;
	do {
		mul(n);
		div(s * s);
		div(n += 2);
		if (((n-1) / 2) % 2 == 0)
			add();
		else
			sub();
	} while (!tiszero());
}

void add()
{
	int j;

	for (j = q; j >= 0; j--)
	{
		if (t[j] + p[j] > 9) {
			p[j] += t[j] - 10;
			p[j-1] += 1;
		} else
			p[j] += t[j];
	}
}

void sub()
{
	int j;

	for (j = q; j >= 0; j--)
		if (p[j] < t[j]) {
			p[j] -= t[j] - 10;
			p[j-1] -= 1;
		} else
			p[j] -= t[j];
}

void mul(int multiplier)
{
	int b;
	int i;
	int carry = 0, digit = 0;

	for (i = q; i >= 0; i--) {
		b = (t * multiplier + carry);
		digit = b % 10;
		carry = b / 10;
		t = digit;
	}
}

/* t[] /= l */

void div(int divisor)
{
	int i, b;
	int quotient, remainder = 0;

	for (i = 0; i <= q; i++) {
		b = (10 * remainder + t);
		quotient = b / divisor;
		remainder = b % divisor;
		t = quotient;
	}
}

void div4()
{
	int i, c, d = 0;

	for (i = 0; i <= q; i++) {
		c = (10 * d + p) / 4;
		d = (10 * d + p) % 4;
		p = c;
	}
}

void mul4()
{
	int i, c, d;

	d = c = 0;

	for (i = q; i >= 0; i--) {
		d = (p * 4 + c) % 10;
		c = (p * 4 + c) / 10;
		p = d;
	}
}

int tiszero()
{
	int k;

	for (k = 0; k <= q; k++)
		if (t[k] != 0)
			return false;
	return true;
}

c#:
using System;
using System.Diagnostics;

class Pi
{
    static void Main(string[] args)
    {
        Pi pi = new Pi();
        pi.run(args);
    }
    private const int LONG_TIME = 4000;
    sbyte[] p;
    sbyte[] t;
    int q;

    void run(string[] args)
    {
        Stopwatch timer = new Stopwatch();
        int i;

        if (args.Length == 1)
        {
            q = int.Parse(args[0]);
        }
        else
        {
            Console.WriteLine("Usage: pi [precision]");
            return;
        }
        
        if (q < 0)
        {
            Console.WriteLine("Precision was too low, running with precision of 0.");
            q = 0;
        }

        if (q > LONG_TIME)
        {
            Console.WriteLine("Be prepared to wait a while...");
        }

        // Compute one more digit than we display to compensate for rounding
        q++;

        p = new sbyte[q + 1];
        t = new sbyte[q + 1];
        /* compute pi */

        timer.Start();
        arctan(2);
        arctan(3);
        mul4();
        timer.Stop();

        // Return to the number of digits we want to display
        q--;

        /* print pi */

        Console.Write("pi = {0}.", p[0]);
        for (i = 1; i <= q; i++)
            Console.Write(p);
        Console.WriteLine();
        Console.WriteLine("{0} seconds to compute pi with a precision of {1} digits.", timer.ElapsedMilliseconds / 1000.0, q);

        return;
    }

    void arctan(int s)
    {
        int n;

        t[0] = 1;
        div(s); /* t[] = 1/s */
        add();
        n = 1;
        do
        {
            mul(n);
            div(s * s);
            div(n += 2);
            if (((n - 1) / 2) % 2 == 0)
                add();
            else
                sub();
        } while (!tiszero());
    }

    void add()
    {
        int j;

        for (j = q; j >= 0; j--)
        {
            if (t[j] + p[j] > 9)
            {
                p[j] += (sbyte)(t[j] - 10);
                p[j - 1] += 1;
            }
            else
                p[j] += t[j];
        }
    }

    void sub()
    {
        int j;

        for (j = q; j >= 0; j--)
            if (p[j] < t[j])
            {
                p[j] -= (sbyte)(t[j] - 10);
                p[j - 1] -= 1;
            }
            else
                p[j] -= t[j];
    }

    void mul(int multiplier)
    {
        int b;
        int i;
        int carry = 0, digit = 0;

        for (i = q; i >= 0; i--)
        {
            b = (t * multiplier + carry);
            digit = b % 10;
            carry = b / 10;
            t = (sbyte)digit;
        }
    }

    /* t[] /= l */

    void div(int divisor)
    {
        int i, b;
        int quotient, remainder = 0;

        for (i = 0; i <= q; i++)
        {
            b = (10 * remainder + t);
            quotient = b / divisor;
            remainder = b % divisor;
            t = (sbyte)quotient;
        }
    }

    void div4()
    {
        int i, c, d = 0;

        for (i = 0; i <= q; i++)
        {
            c = (10 * d + p) / 4;
            d = (10 * d + p) % 4;
            p = (sbyte)c;
        }
    }

    void mul4()
    {
        int i, c, d;

        d = c = 0;

        for (i = q; i >= 0; i--)
        {
            d = (p * 4 + c) % 10;
            c = (p * 4 + c) / 10;
            p = (sbyte)d;
        }
    }

    bool tiszero()
    {
        int k;

        for (k = 0; k <= q; k++)
            if (t[k] != 0)
                return false;
        return true;
    }

 
}

I used the release builds for the benchmark, here are the results: d : 26 seconds to compute pi with a precision of 10000 digits. c# : 34,745(34.745) seconds to compute pi with a precision of 10000 digits. c++ : 15 seconds to compute pi with a precision of 10000 digits. I do not want to start a language war... we had enough such wars in the past days and I have just made this benchmark because I saw the “What do you think of the D language?“ thread and was testing d. I'm just surprised a little: I thought that D shouldn't be much slower than c++ and I thought that c# would be much faster. Maybe there is some optimization option I have not used(?) What do you think about the results? (maybe some one can test java) (I really like the invariant and unittest features of D, are there equivalents for c#?) -I can't reply until tomorrow- [Edited by - Kambiz on October 7, 2006 2:02:45 AM]
Advertisement
34,745 seconds! You realy ran the program for 9 hours?
The comma is the new dot. (At least in germany.)
In most/all of Europe, actually. So, 3.14159 would be 3,14159. You really should know that, since chances are it will bite you one time (as you saw, it made a vast difference here).

What compiler where you using for C++ and what optimizations did you enable?
One of traditions in the computer technology was use of "." for 0.030. The "," was used for something else.

"," was used traditionally only in math classes. However teachers would survive "." instead if they weren't completely retarded.

BTW if you'd like to test Java, should it be a direct port, or should it use BigDecimal instead? You might like to add an 64 bit ASM program as well.
Alright, managed code haters -- don't feel too vindicated yet. I sat down with the C# code for about 15 minutes, and made some modifications to the mul and div functions.
using System;using System.Diagnostics;class Pi{	static void Main( string[] args )	{		Pi pi = new Pi();		pi.run( args );	}	private const int LONG_TIME = 4000;	sbyte[] p;	sbyte[] t;	int q;	void run( string[] args )	{		Stopwatch timer = new Stopwatch();		int i;		if( args.Length == 1 )		{			q = int.Parse( args[0] );		}		else		{			Console.WriteLine( "Usage: pi [precision]" );			return;		}		if( q < 0 )		{			Console.WriteLine( "Precision was too low, running with precision of 0." );			q = 0;		}		if( q > LONG_TIME )		{			Console.WriteLine( "Be prepared to wait a while..." );		}		// Compute one more digit than we display to compensate for rounding		q++;		p = new sbyte[q + 1];		t = new sbyte[q + 1];		/* compute pi */		timer.Start();		arctan( 2 );		arctan( 3 );		mul4();		timer.Stop();		// Return to the number of digits we want to display		q--;		/* print pi */		Console.Write( "pi = {0}.", p[0] );		for( i = 1; i <= q; i++ )			Console.Write( p );		Console.WriteLine();		Console.WriteLine( "{0} seconds to compute pi with a precision of {1} digits.", timer.ElapsedMilliseconds / 1000.0, q );		return;	}	void arctan( int s )	{		int n;		t[0] = 1;		div( s ); /* t[] = 1/s */		add();		n = 1;		do		{			mul( n );			div( s * s );			div( n += 2 );			if( ( ( n - 1 ) / 2 ) % 2 == 0 )				add();			else				sub();		} while( !tiszero() );	}	void add()	{		int j;		for( j = q; j >= 0; j-- )		{			if( t[j] + p[j] > 9 )			{				p[j] += (sbyte) ( t[j] - 10 );				p[j - 1] += 1;			}			else				p[j] += t[j];		}	}	void sub()	{		int j;		for( j = q; j >= 0; j-- )			if( p[j] < t[j] )			{				p[j] -= (sbyte) ( t[j] - 10 );				p[j - 1] -= 1;			}			else				p[j] -= t[j];	}	void mul( int multiplier )	{		int b;		int i;		int carry = 0, digit;		for( i = q; i >= 0; i-- )		{			b = ( t * multiplier + carry );			carry = (int) ( b * 0.1f );			digit = b - 10 * carry;			t = (sbyte) digit;		}	}	/* t[] /= l */	void div( int divisor )	{		int i, b;		int quotient, remainder = 0;		float fdiv = 1.0f / divisor;		for( i = 0; i <= q; i++ )		{			b = ( 10 * remainder + t );			quotient = (int) ( b * fdiv );			remainder = b - divisor * quotient;			t = (sbyte) quotient;		}	}	void div4()	{		int i, c, d = 0;		for( i = 0; i <= q; i++ )		{			c = ( 10 * d + p ) / 4;			d = ( 10 * d + p ) % 4;			p = (sbyte) c;		}	}	void mul4()	{		int i, c, d;		d = c = 0;		for( i = q; i >= 0; i-- )		{			d = ( p * 4 + c ) % 10;			c = ( p * 4 + c ) / 10;			p = (sbyte) d;		}	}	bool tiszero()	{		int k;		for( k = 0; k <= q; k++ )			if( t[k] != 0 )				return false;		return true;	}}

Initial figures on my system before modification were in the vicinity of about 19s (probably closer to 20) for the C++ and 38s for the C#. This version of the C# runs in just under 22s. I brought the same optimizations back to the C++ version, but they didn't appear to have any particular effect.

Perhaps someone can step up and defend D's honor. 26s is much worse than I expected. (Although the fact that the D code was written by Digital Mars does not bode well.)

[Edited by - Promit on October 6, 2006 9:28:51 PM]
SlimDX | Ventspace Blog | Twitter | Diverse teams make better games. I am currently hiring capable C++ engine developers in Baltimore, MD.
The fastest way to calculate PI in C is of course:

#define _ -F<00||--F-OO--;int F=00,OO=00;main(){F_OO();printf("%1.3f\n",4.*-F/OO/OO);}F_OO(){            _-_-_-_       _-_-_-_-_-_-_-_-_    _-_-_-_-_-_-_-_-_-_-_-_  _-_-_-_-_-_-_-_-_-_-_-_-_-_ _-_-_-_-_-_-_-_-_-_-_-_-_-_-_ _-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ _-_-_-_-_-_-_-_-_-_-_-_-_-_-_ _-_-_-_-_-_-_-_-_-_-_-_-_-_-_  _-_-_-_-_-_-_-_-_-_-_-_-_-_    _-_-_-_-_-_-_-_-_-_-_-_        _-_-_-_-_-_-_-_            _-_-_-_}
deathkrushPS3/Xbox360 Graphics Programmer, Mass Media.Completed Projects: Stuntman Ignition (PS3), Saints Row 2 (PS3), Darksiders(PS3, 360)
Quote:Original post by TrueTom
The comma is the new dot. (At least in germany.)

yes... (I have a German Windows.)

Quote:Original post by Promit
Alright, managed code haters -- don't feel too vindicated yet. I sat down with the C# code for about 15 minutes, and made some modifications to the mul and div functions.
...

19.443 seconds to compute pi with a precision of 10000 digits.
But because of the modifications we should not compare these result with the others.

Quote:Original post by Ezbez
What compiler where you using for C++ and what optimizations did you enable?

MSVC++ 2005
/O2 /Ot /GL /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /FD /EHsc /MD /Fo"Release\\" /Fd"Release\vc80.pdb" /W3 /nologo /c /Wp64 /Zi /TP /errorReport:prompt
Linker:
/OUT:"D:\kambiz\c++\pi\Release\pi.exe" /INCREMENTAL:NO /NOLOGO /MANIFEST /MANIFESTFILE:"Release\pi.exe.intermediate.manifest" /DEBUG /PDB:"d:\kambiz\c++\pi\Release\pi.pdb" /SUBSYSTEM:CONSOLE /OPT:REF /OPT:ICF /LTCG /MACHINE:X86 /ERRORREPORT:PROMPT kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib
Quote:Original post by Kambiz
But because of the modifications we should not compare these result with the others.


Under which conditions may we compare one result with another?

Quote:Original post by ToohrVyk
Quote:Original post by Kambiz
But because of the modifications we should not compare these result with the others.


Under which conditions may we compare one result with another?
Presumably, one is only allowed to use the exact implementation originally written in C, without taking into account the fact that a good C implementation may be a terrible implementation for another language.

In short, when comparing the performance of languages, one must assume that all programmers are competent at no languages other than C.
SlimDX | Ventspace Blog | Twitter | Diverse teams make better games. I am currently hiring capable C++ engine developers in Baltimore, MD.

This topic is closed to new replies.

Advertisement