• Advertisement
Sign in to follow this  

Unity my c++ d c# benchmark!

This topic is 4150 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

Today I saw the D's pi computing sample (\dmd\samples\d\pi.d) and thought that porting it to c++ and c# for making a little benchmark should be easy: d:
import std.c.stdio;
import std.c.stdlib;
import std.c.time;

const int LONG_TIME=4000;

byte[] p;
byte[] t;
int q;

int main(char[][] args)
{
	int startime, endtime;
	int i;

	if (args.length == 2) {
		sscanf(&args[1][0],"%d",&q);
	} else {
		printf("Usage: pi [precision]\n");
		exit(55);
	}

	if (q < 0)
	{
		printf("Precision was too low, running with precision of 0.\n");
		q = 0;
	}

	if (q > LONG_TIME)
	{
	    printf("Be prepared to wait a while...\n");
	}

	// Compute one more digit than we display to compensate for rounding
	q++;

	p.length = q + 1;
	t.length = q + 1;

	/* compute pi */

	std.c.time.time(&startime);
	arctan(2);
	arctan(3);
	mul4();
	std.c.time.time(&endtime);

	// Return to the number of digits we want to display
	q--;

	/* print pi */

	printf("pi = %d.",cast(int)(p[0]));
	for (i = 1; i <= q; i++)
	printf("%d",cast(int)(p));
	printf("\n");
	printf("%ld seconds to compute pi with a precision of %d digits.\n",endtime-startime,q);

	return 0;
}

void arctan(int s)
{
	int n;

	t[0] = 1;
	div(s); /* t[] = 1/s */
	add();
	n = 1;
	do {
		mul(n);
		div(s * s);
		div(n += 2);
		if (((n-1) / 2) % 2 == 0)
			add();
		else
			sub();
	} while (!tiszero());
}

void add()
{
	int j;

	for (j = q; j >= 0; j--)
	{
		if (t[j] + p[j] > 9) {
			p[j] += t[j] - 10;
			p[j-1] += 1;
		} else
			p[j] += t[j];
	}
}

void sub()
{
	int j;

	for (j = q; j >= 0; j--)
		if (p[j] < t[j]) {
			p[j] -= t[j] - 10;
			p[j-1] -= 1;
		} else
			p[j] -= t[j];
}

void mul(int multiplier)
{
	int b;
	int i;
	int carry = 0, digit = 0;

	for (i = q; i >= 0; i--) {
		b = (t * multiplier + carry);
		digit = b % 10;
		carry = b / 10;
		t = digit;
	}
}

/* t[] /= l */

void div(int divisor)
{
	int i, b;
	int quotient, remainder = 0;

	for (i = 0; i <= q; i++) {
		b = (10 * remainder + t);
		quotient = b / divisor;
		remainder = b % divisor;
		t = quotient;
	}
}

void div4()
{
	int i, c, d = 0;

	for (i = 0; i <= q; i++) {
		c = (10 * d + p) / 4;
		d = (10 * d + p) % 4;
		p = c;
	}
}

void mul4()
{
	int i, c, d;

	d = c = 0;

	for (i = q; i >= 0; i--) {
		d = (p * 4 + c) % 10;
		c = (p * 4 + c) / 10;
		p = d;
	}
}

int tiszero()
{
	int k;

	for (k = 0; k <= q; k++)
		if (t[k] != 0)
			return false;
	return true;
}

(I compiled pi.d with dmd -O -release pi.d) c++:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define LONG_TIME 4000

__int8* p;
__int8* t;
int q;


void arctan(int s);
void add();
void sub();
void mul(int);
void div(int);
void mul4();
void div4();
int tiszero();

int main(int argv,char** args)
{
	time_t startime, endtime;
	int i;

	if (argv == 2) {
		sscanf(&args[1][0],"%d",&q);
	} else {
		printf("Usage: pi [precision]\n");
		exit(55);
	}

	if (q < 0)
	{
		printf("Precision was too low, running with precision of 0.\n");
		q = 0;
	}

	if (q > LONG_TIME)
	{
	    printf("Be prepared to wait a while...\n");
	}

	// Compute one more digit than we display to compensate for rounding
	q++;

	p = new __int8[ q + 1 ];
	t = new __int8[ q + 1 ];
	for(int tt=0;tt<=q;tt++) 
	{
		p=0;
		t=0;
	}
	/* compute pi */

	time(&startime);
	arctan(2);
	arctan(3);
	mul4();
	time(&endtime);

	// Return to the number of digits we want to display
	q--;

	/* print pi */

	printf("pi = %d.",(int)(p[0]));
	for (i = 1; i <= q; i++)
	printf("%d",(int)(p));
	printf("\n");
	printf("%ld seconds to compute pi with a precision of %d digits.\n",(long)endtime-(long)startime,q);

	delete [] p;
	delete [] t;
	return 0;
}

void arctan(int s)
{
	int n;

	t[0] = 1;
	div(s); /* t[] = 1/s */
	add();
	n = 1;
	do {
		mul(n);
		div(s * s);
		div(n += 2);
		if (((n-1) / 2) % 2 == 0)
			add();
		else
			sub();
	} while (!tiszero());
}

void add()
{
	int j;

	for (j = q; j >= 0; j--)
	{
		if (t[j] + p[j] > 9) {
			p[j] += t[j] - 10;
			p[j-1] += 1;
		} else
			p[j] += t[j];
	}
}

void sub()
{
	int j;

	for (j = q; j >= 0; j--)
		if (p[j] < t[j]) {
			p[j] -= t[j] - 10;
			p[j-1] -= 1;
		} else
			p[j] -= t[j];
}

void mul(int multiplier)
{
	int b;
	int i;
	int carry = 0, digit = 0;

	for (i = q; i >= 0; i--) {
		b = (t * multiplier + carry);
		digit = b % 10;
		carry = b / 10;
		t = digit;
	}
}

/* t[] /= l */

void div(int divisor)
{
	int i, b;
	int quotient, remainder = 0;

	for (i = 0; i <= q; i++) {
		b = (10 * remainder + t);
		quotient = b / divisor;
		remainder = b % divisor;
		t = quotient;
	}
}

void div4()
{
	int i, c, d = 0;

	for (i = 0; i <= q; i++) {
		c = (10 * d + p) / 4;
		d = (10 * d + p) % 4;
		p = c;
	}
}

void mul4()
{
	int i, c, d;

	d = c = 0;

	for (i = q; i >= 0; i--) {
		d = (p * 4 + c) % 10;
		c = (p * 4 + c) / 10;
		p = d;
	}
}

int tiszero()
{
	int k;

	for (k = 0; k <= q; k++)
		if (t[k] != 0)
			return false;
	return true;
}

c#:
using System;
using System.Diagnostics;

class Pi
{
    static void Main(string[] args)
    {
        Pi pi = new Pi();
        pi.run(args);
    }
    private const int LONG_TIME = 4000;
    sbyte[] p;
    sbyte[] t;
    int q;

    void run(string[] args)
    {
        Stopwatch timer = new Stopwatch();
        int i;

        if (args.Length == 1)
        {
            q = int.Parse(args[0]);
        }
        else
        {
            Console.WriteLine("Usage: pi [precision]");
            return;
        }
        
        if (q < 0)
        {
            Console.WriteLine("Precision was too low, running with precision of 0.");
            q = 0;
        }

        if (q > LONG_TIME)
        {
            Console.WriteLine("Be prepared to wait a while...");
        }

        // Compute one more digit than we display to compensate for rounding
        q++;

        p = new sbyte[q + 1];
        t = new sbyte[q + 1];
        /* compute pi */

        timer.Start();
        arctan(2);
        arctan(3);
        mul4();
        timer.Stop();

        // Return to the number of digits we want to display
        q--;

        /* print pi */

        Console.Write("pi = {0}.", p[0]);
        for (i = 1; i <= q; i++)
            Console.Write(p);
        Console.WriteLine();
        Console.WriteLine("{0} seconds to compute pi with a precision of {1} digits.", timer.ElapsedMilliseconds / 1000.0, q);

        return;
    }

    void arctan(int s)
    {
        int n;

        t[0] = 1;
        div(s); /* t[] = 1/s */
        add();
        n = 1;
        do
        {
            mul(n);
            div(s * s);
            div(n += 2);
            if (((n - 1) / 2) % 2 == 0)
                add();
            else
                sub();
        } while (!tiszero());
    }

    void add()
    {
        int j;

        for (j = q; j >= 0; j--)
        {
            if (t[j] + p[j] > 9)
            {
                p[j] += (sbyte)(t[j] - 10);
                p[j - 1] += 1;
            }
            else
                p[j] += t[j];
        }
    }

    void sub()
    {
        int j;

        for (j = q; j >= 0; j--)
            if (p[j] < t[j])
            {
                p[j] -= (sbyte)(t[j] - 10);
                p[j - 1] -= 1;
            }
            else
                p[j] -= t[j];
    }

    void mul(int multiplier)
    {
        int b;
        int i;
        int carry = 0, digit = 0;

        for (i = q; i >= 0; i--)
        {
            b = (t * multiplier + carry);
            digit = b % 10;
            carry = b / 10;
            t = (sbyte)digit;
        }
    }

    /* t[] /= l */

    void div(int divisor)
    {
        int i, b;
        int quotient, remainder = 0;

        for (i = 0; i <= q; i++)
        {
            b = (10 * remainder + t);
            quotient = b / divisor;
            remainder = b % divisor;
            t = (sbyte)quotient;
        }
    }

    void div4()
    {
        int i, c, d = 0;

        for (i = 0; i <= q; i++)
        {
            c = (10 * d + p) / 4;
            d = (10 * d + p) % 4;
            p = (sbyte)c;
        }
    }

    void mul4()
    {
        int i, c, d;

        d = c = 0;

        for (i = q; i >= 0; i--)
        {
            d = (p * 4 + c) % 10;
            c = (p * 4 + c) / 10;
            p = (sbyte)d;
        }
    }

    bool tiszero()
    {
        int k;

        for (k = 0; k <= q; k++)
            if (t[k] != 0)
                return false;
        return true;
    }

 
}

I used the release builds for the benchmark, here are the results: d : 26 seconds to compute pi with a precision of 10000 digits. c# : 34,745(34.745) seconds to compute pi with a precision of 10000 digits. c++ : 15 seconds to compute pi with a precision of 10000 digits. I do not want to start a language war... we had enough such wars in the past days and I have just made this benchmark because I saw the “What do you think of the D language?“ thread and was testing d. I'm just surprised a little: I thought that D shouldn't be much slower than c++ and I thought that c# would be much faster. Maybe there is some optimization option I have not used(?) What do you think about the results? (maybe some one can test java) (I really like the invariant and unittest features of D, are there equivalents for c#?) -I can't reply until tomorrow- [Edited by - Kambiz on October 7, 2006 2:02:45 AM]

Share this post


Link to post
Share on other sites
Advertisement
Guest Anonymous Poster
34,745 seconds! You realy ran the program for 9 hours?

Share this post


Link to post
Share on other sites
In most/all of Europe, actually. So, 3.14159 would be 3,14159. You really should know that, since chances are it will bite you one time (as you saw, it made a vast difference here).

What compiler where you using for C++ and what optimizations did you enable?

Share this post


Link to post
Share on other sites
One of traditions in the computer technology was use of "." for 0.030. The "," was used for something else.

"," was used traditionally only in math classes. However teachers would survive "." instead if they weren't completely retarded.

BTW if you'd like to test Java, should it be a direct port, or should it use BigDecimal instead? You might like to add an 64 bit ASM program as well.

Share this post


Link to post
Share on other sites
Alright, managed code haters -- don't feel too vindicated yet. I sat down with the C# code for about 15 minutes, and made some modifications to the mul and div functions.
using System;
using System.Diagnostics;

class Pi
{
static void Main( string[] args )
{
Pi pi = new Pi();
pi.run( args );
}

private const int LONG_TIME = 4000;
sbyte[] p;
sbyte[] t;
int q;

void run( string[] args )
{
Stopwatch timer = new Stopwatch();
int i;

if( args.Length == 1 )
{
q = int.Parse( args[0] );
}
else
{
Console.WriteLine( "Usage: pi [precision]" );
return;
}

if( q < 0 )
{
Console.WriteLine( "Precision was too low, running with precision of 0." );
q = 0;
}

if( q > LONG_TIME )
{
Console.WriteLine( "Be prepared to wait a while..." );
}

// Compute one more digit than we display to compensate for rounding
q++;

p = new sbyte[q + 1];
t = new sbyte[q + 1];
/* compute pi */

timer.Start();
arctan( 2 );
arctan( 3 );
mul4();
timer.Stop();

// Return to the number of digits we want to display
q--;

/* print pi */

Console.Write( "pi = {0}.", p[0] );
for( i = 1; i <= q; i++ )
Console.Write( p );
Console.WriteLine();
Console.WriteLine( "{0} seconds to compute pi with a precision of {1} digits.", timer.ElapsedMilliseconds / 1000.0, q );

return;
}

void arctan( int s )
{
int n;

t[0] = 1;
div( s ); /* t[] = 1/s */
add();
n = 1;
do
{
mul( n );
div( s * s );
div( n += 2 );
if( ( ( n - 1 ) / 2 ) % 2 == 0 )
add();
else
sub();
} while( !tiszero() );
}

void add()
{
int j;

for( j = q; j >= 0; j-- )
{
if( t[j] + p[j] > 9 )
{
p[j] += (sbyte) ( t[j] - 10 );
p[j - 1] += 1;
}
else
p[j] += t[j];
}
}

void sub()
{
int j;

for( j = q; j >= 0; j-- )
if( p[j] < t[j] )
{
p[j] -= (sbyte) ( t[j] - 10 );
p[j - 1] -= 1;
}
else
p[j] -= t[j];
}

void mul( int multiplier )
{
int b;
int i;
int carry = 0, digit;

for( i = q; i >= 0; i-- )
{
b = ( t * multiplier + carry );
carry = (int) ( b * 0.1f );
digit = b - 10 * carry;
t = (sbyte) digit;
}
}

/* t[] /= l */

void div( int divisor )
{
int i, b;
int quotient, remainder = 0;
float fdiv = 1.0f / divisor;

for( i = 0; i <= q; i++ )
{
b = ( 10 * remainder + t );
quotient = (int) ( b * fdiv );
remainder = b - divisor * quotient;
t = (sbyte) quotient;
}
}

void div4()
{
int i, c, d = 0;

for( i = 0; i <= q; i++ )
{
c = ( 10 * d + p ) / 4;
d = ( 10 * d + p ) % 4;
p = (sbyte) c;
}
}

void mul4()
{
int i, c, d;

d = c = 0;

for( i = q; i >= 0; i-- )
{
d = ( p * 4 + c ) % 10;
c = ( p * 4 + c ) / 10;
p = (sbyte) d;
}
}

bool tiszero()
{
int k;

for( k = 0; k <= q; k++ )
if( t[k] != 0 )
return false;
return true;
}
}







Initial figures on my system before modification were in the vicinity of about 19s (probably closer to 20) for the C++ and 38s for the C#. This version of the C# runs in just under 22s. I brought the same optimizations back to the C++ version, but they didn't appear to have any particular effect.

Perhaps someone can step up and defend D's honor. 26s is much worse than I expected. (Although the fact that the D code was written by Digital Mars does not bode well.)

[Edited by - Promit on October 6, 2006 9:28:51 PM]

Share this post


Link to post
Share on other sites
The fastest way to calculate PI in C is of course:


#define _ -F<00||--F-OO--;
int F=00,OO=00;main(){F_OO();printf("%1.3f\n",4.*-F/OO/OO);}F_OO()
{
_-_-_-_
_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_-_-_-_-_
_-_-_-_-_-_-_-_
_-_-_-_
}

Share this post


Link to post
Share on other sites
Quote:
Original post by TrueTom
The comma is the new dot. (At least in germany.)

yes... (I have a German Windows.)

Quote:
Original post by Promit
Alright, managed code haters -- don't feel too vindicated yet. I sat down with the C# code for about 15 minutes, and made some modifications to the mul and div functions.
...

19.443 seconds to compute pi with a precision of 10000 digits.
But because of the modifications we should not compare these result with the others.

Quote:
Original post by Ezbez
What compiler where you using for C++ and what optimizations did you enable?

MSVC++ 2005
/O2 /Ot /GL /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /FD /EHsc /MD /Fo"Release\\" /Fd"Release\vc80.pdb" /W3 /nologo /c /Wp64 /Zi /TP /errorReport:prompt
Linker:
/OUT:"D:\kambiz\c++\pi\Release\pi.exe" /INCREMENTAL:NO /NOLOGO /MANIFEST /MANIFESTFILE:"Release\pi.exe.intermediate.manifest" /DEBUG /PDB:"d:\kambiz\c++\pi\Release\pi.pdb" /SUBSYSTEM:CONSOLE /OPT:REF /OPT:ICF /LTCG /MACHINE:X86 /ERRORREPORT:PROMPT kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib

Share this post


Link to post
Share on other sites
Quote:
Original post by Kambiz
But because of the modifications we should not compare these result with the others.


Under which conditions may we compare one result with another?

Share this post


Link to post
Share on other sites
Quote:
Original post by ToohrVyk
Quote:
Original post by Kambiz
But because of the modifications we should not compare these result with the others.


Under which conditions may we compare one result with another?
Presumably, one is only allowed to use the exact implementation originally written in C, without taking into account the fact that a good C implementation may be a terrible implementation for another language.

In short, when comparing the performance of languages, one must assume that all programmers are competent at no languages other than C.

Share this post


Link to post
Share on other sites
Quote:
Original post by Promit
Presumably, one is only allowed to use the exact implementation originally written in C, without taking into account the fact that a good C implementation may be a terrible implementation for another language.


You seem to be right. I've only just noticed that the version advertised as C++ isn't C++, but is in fact C.

Share this post


Link to post
Share on other sites
Quote:
Original post by Kambiz
I'm just surprised a little: I thought that D shouldn't be much slower than c++ and I thought that c# would be much faster. Maybe there is some optimization option I have not used(?)
What do you think about the results?
Interesting, but you're comparing the Microsoft C++ optimizer with the Digital Mars C++ optimizer. DMD, the Digital Mars D compiler, is built using the Digital Mars C++ optimizer.

To compare apples-apples, it makes sense to compare DMD versus DMC++, or GDC versus g++, as then the optimizer and code generator would be the same for each language. I tried DMD and DMC++, and got essentially the same times for each.



Share this post


Link to post
Share on other sites
I actually have a suspicion that some of the modifications I applied in the C# version will improve the D version's performance as well. Anyone want to try my edits on D and see what happens? I really don't feel like downloading DMD.

Share this post


Link to post
Share on other sites
Quote:
Original post by Kambiz
d : 26 seconds to compute pi with a precision of 10000 digits.
c# : 34,745(34.745) seconds to compute pi with a precision of 10000 digits.
c++ : 15 seconds to compute pi with a precision of 10000 digits.
...
I do not want to start a language war...

Yeah, right. The fact that you only gave a decimal point only for the C# version (using a comma to represent it, making it look like a huge number) and your dismissal of Promit's benchmark implies that you hold a certain prejudice toward C#. Now, theres nothing inherently wrong with that but trying to spread propaganda in the form of a benchmark is pretty sad if you ask me.

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
I'll make it fair barakus.

d : 26 seconds to compute pi with a precision of 10000 digits.
c# : 35 seconds to compute pi with a precision of 10000 digits.
c++ : 15 seconds to compute pi with a precision of 10000 digits.

As for propaganda, I see none - just the results of a benchmark.

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
i don't want to comment on the benchmark a lot. quite frankly these kind of performance benchmarks don't mean so much, but yes they can be interesting in some cases. a pure number crunching benchmark should be winnable by a native language like c++, no surprise there. a realistic number cruncher would resort to simd instructions anyways which (for now) leaves all other alternatives far behind.. but that's only a matter of time (for me that is the main reason not to use managed languages for these kinds of applications. i need the simd capabilities the metal offers).
i like the fact, though, that people are starting to defend c# like most of us here do with c++ and the sc++l. that kind of advocacy for a certain language is a very good sign for the language (and - admit it! - its creators). i think that on its own is a good process.

cheers,
simon

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
Quote:
Original post by barakus
Yeah, right. The fact that you only gave a decimal point only for the C# version (using a comma to represent it, making it look like a huge number) [...] implies that you hold a certain prejudice toward C#.
I think you're reading too much to that. In Europe (where he's from), believe it or not, people use comma to separate decimals (mind-boggling, I know!). And that only C# version had decimals is likely due to the fact that in C# he had to write the outputting code slightly differently. The C-version's timing and output is more of a direct port from the original D code (using the same printf statement).
Quote:
your dismissal of Promit's benchmark
I think this was fair. It's not like Promit's code is more idiomatic C#, it's just a tweak that happens to make .NET run faster. Similar tweaks could probably be found in C++ and D versions as well, and we could spend weeks optimizing each one. Why not actually compare versions that are near identical in code to save the trouble? It even looks to me like his code would make the algorithm wrong due to the fact that floating point math isn't as accurate, but I didn't check this..

Share this post


Link to post
Share on other sites
I've deleted the AP replies that seem intent on derailing this thread. So far the discussion is clean and interesting - whilst these benchmarks may not be the best way to compare languages they are a good platform for learning and discussing various characteristics.

The whole "," or "." thing is a bit pointless - why bother arguing over something so silly [smile]

Cheers,
Jack

Share this post


Link to post
Share on other sites
I'm not much of a .net freak but to me it seems like .net is going to be slower no matter what in this specific case. It's basically a series of tight loops with bounds checked array accesses which will generate at least an extra branch for each array access, probably with stack unwinding also.

I see nothing wrong with the test and it demonstrates what is a fairly well known weakness of .net, tight loops and a lot of array accesses.

Share this post


Link to post
Share on other sites
Quote:
Original post by Anonymous Poster
It's not like Promit's code is more idiomatic C#, it's just a tweak that happens to make .NET run faster. Similar tweaks could probably be found in C++ and D versions as well, and we could spend weeks optimizing each one. Why not actually compare versions that are near identical in code to save the trouble? It even looks to me like his code would make the algorithm wrong due to the fact that floating point math isn't as accurate, but I didn't check this..


Different languages work in different ways. It is extremely biased to write a program in one language (C++) and expect a direct port to run equally fast on other languages (D or C#, in this case). Spending "weeks" (hopefully less) on optimizing each language according to their strengths would provide the best benchmarks. After all, I could care less how well C# or D implements C-specific algorithms. What I care about is the performance I'll achieve using those languages in the way they're designed.

- Mike

Share this post


Link to post
Share on other sites
Looks like DMD is missing some optimization oportunities in the div and mul functions. I tried using a similar modification to what Promit had posted:

import std.c.stdio;
import std.c.stdlib;
import std.c.time;

const int LONG_TIME=4000;

byte[] p;
byte[] t;
int q;

int main(char[][] args)
{
int startime, endtime;
int i;

if (args.length == 2) {
sscanf(&args[1][0],"%d",&q);
} else {
printf("Usage: pi [precision]\n");
exit(55);
}

if (q < 0)
{
printf("Precision was too low, running with precision of 0.\n");
q = 0;
}

if (q > LONG_TIME)
{
printf("Be prepared to wait a while...\n");
}

// Compute one more digit than we display to compensate for rounding
q++;

p.length = q + 1;
t.length = q + 1;

/* compute pi */

std.c.time.time(&startime);
arctan(2);
arctan(3);
mul4();
std.c.time.time(&endtime);

// Return to the number of digits we want to display
q--;

/* print pi */

printf("pi = %d.",cast(int)(p[0]));
for (i = 1; i <= q; i++)
printf("%d",cast(int)(p));
printf("\n");
printf("%ld seconds to compute pi with a precision of %d digits.\n",endtime-startime,q);

return 0;
}

void arctan(int s)
{
int n;

t[0] = 1;
div(s); /* t[] = 1/s */
add();
n = 1;
do {
mul(n);
div(s * s);
div(n += 2);
if (((n-1) / 2) % 2 == 0)
add();
else
sub();
} while (!tiszero());
}

void add()
{
int j;

for (j = q; j >= 0; j--)
{
if (t[j] + p[j] > 9) {
p[j] += t[j] - 10;
p[j-1] += 1;
} else
p[j] += t[j];
}
}

void sub()
{
int j;

for (j = q; j >= 0; j--)
if (p[j] < t[j]) {
p[j] -= t[j] - 10;
p[j-1] -= 1;
} else
p[j] -= t[j];
}

void mul(int multiplier)
{
int b;
int i;
int carry = 0, digit = 0;

for (i = q; i >= 0; i--) {
b = (t * multiplier + carry);
carry = b / 10;
digit = b - carry * 10;
t = digit;
}
}

/* t[] /= l */

void div(int divisor)
{
int i, b;
int quotient, remainder = 0;

for (i = 0; i <= q; i++) {
b = (10 * remainder + t);
quotient = b / divisor;
remainder = b - divisor * quotient;
t = quotient;
}
}

void div4()
{
int i, c, d = 0;

for (i = 0; i <= q; i++) {
c = (10 * d + p) / 4;
d = (10 * d + p) % 4;
p = c;
}
}

void mul4()
{
int i, c, d;

d = c = 0;

for (i = q; i >= 0; i--) {
d = (p * 4 + c) % 10;
c = (p * 4 + c) / 10;
p = d;
}
}

int tiszero()
{
int k;

for (k = 0; k <= q; k++)
if (t[k] != 0)
return false;
return true;
}


Then I tested the original and modified program on two systems, on linux machine from my univ and my laptop running windows. Here are my results:

Linux 2.6.17-1.2142_FC4smp

Digital Mars D Compiler v0.168
dmd, no modifications: 50 seconds
dmd, modifications: 33 seconds
dmd flags: -release -inline -O

g++ (GCC) 4.0.2 20051125 (Red Hat 4.0.2-8)
g++, no modifications: 29 seconds
g++, modifications: 32 seconds
g++ flags: -O3



Windows XP SP2 Pro

Digital Mars D Compiler v0.168
dmd, no modifications: 20 seconds
dmd, modifications: 15 seconds
dmd flags: -release -inline -O

g++ (GCC) 3.4.2 (mingw-special)
g++, no modifications: 12 seconds
g++, modifications: 14 seconds
g++ flags: -O3

gdc (GCC) 3.4.2 (mingw-special) (gdc 0.19, using dmd 0.162)
gdc, no modifications: 12 seconds
gdc, modifications: 14 seconds
gdc flags: -frelease -finline -O3



I ran each test twice and took the better result. I used a windows build of the GCC D Compiler available from the following page: http://gdcwin.sourceforge.net/

Share this post


Link to post
Share on other sites
Quote:
Original post by Anonymous Poster
I think this was fair. It's not like Promit's code is more idiomatic C#, it's just a tweak that happens to make .NET run faster. Similar tweaks could probably be found in C++ and D versions as well, and we could spend weeks optimizing each one. Why not actually compare versions that are near identical in code to save the trouble? It even looks to me like his code would make the algorithm wrong due to the fact that floating point math isn't as accurate, but I didn't check this..

Comparing identical versions is pointless unless you're comparing different compilers of the same langage. In other words, its only fair if you're comparing VC++ to g++. When comparing different languages, you have to make changes for each langage in order to keep the test fair...a tightly optimized program for language X may well run like shit in language Y. You should keep the algorithm the same, and that's it.

CM

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
Quote:
Original post by doctorsixstring
Different languages work in different ways.
It's more of a comparison of compilers than languages here. As you can see, the codes are near identical. What we could conclude from Promit's tests is that the C++ compiler relieves the writer of the code from micro-optimizations more than the .NET compiler does. He did the same fix for C++ and the speed was unaffected. So when writing code casually for C++ without regard to micro-optimizations, you'd get 34/15 times faster code than with the .NET compiler. With .NET you'd have to spend extra effort doing the compiler's job to get down to 22/20. I think this already tells us something useful.
Quote:
What I care about is the performance I'll achieve using those languages in the way they're designed.
Can you claim with a straight face that Promit's opitimization is how you would've written the code in the first place with C#, and you would've used the modulo operator on C++ instead? It's not like C# was "designed" to perform worse with the integer modulo/div combo than C++. It's just a quirk.

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
Quote:
Original post by Conner McCloud
Comparing identical versions is pointless unless you're comparing different compilers of the same langage. In other words, its only fair if you're comparing VC++ to g++. When comparing different languages, you have to make changes for each langage in order to keep the test fair...a tightly optimized program for language X may well run like shit in language Y.
Even if we were comparing different compilers of the same language, one could use the same reasoning you do. One would want to optimize one's code for the particular compiler's quirks, no? And code tightly optimized for compiler X could run like shit when compiled with compiler Y.

For a code like this all three are essentially identical. None of them provide some super speed constructs that lack from the other languages that should be exploited for this code. Promit's changes still work for D and C++ directly. The original D code worked for C++ and C# (essentially) directly. I know there are tons of actual cases where language differences matter for speed, only this is not one of them. Promit's change was only to make the compiler happy, not to make the code "better C#".

Share this post


Link to post
Share on other sites
Quote:
Original post by Anonymous Poster
It's more of a comparison of compilers than languages here.


Comparing a C# compiler to a C or C++ compiler to a D compiler is a bit like comparing apples to oranges, don't you think?

Quote:
Original post by Anonymous Poster
As you can see, the codes are near identical.


As I mentioned earlier, I think this is the wrong way to run a benchmark. What if the original example is written in a non-C language, and then ported to C in a way that is beneficial to the original language? Obviously that would be unfair.

Quote:
Original post by Anonymous Poster
What we could conclude from Promit's tests is that the C++ compiler relieves the writer of the code from micro-optimizations more than the .NET compiler does. He did the same fix for C++ and the speed was unaffected. So when writing code casually for C++ without regard to micro-optimizations, you'd get 34/15 times faster code than with the .NET compiler. With .NET you'd have to spend extra effort doing the compiler's job to get down to 22/20. I think this already tells us something useful.


I agree that C++ has the edge over C# in this case. It is interesting that C#'s modulus operator is so much slower that C++. What version of .NET was used for the tests? Has anyone compared it to non-MS implementations like Mono?

Quote:
Original post by Anonymous Poster
Can you claim with a straight face that Promit's opitimization is how you would've written the code in the first place with C#, and you would've used the modulo operator on C++ instead?


No, I honestly would never have thought to use Promit's code instead of the modulus.

Quote:
Original post by Anonymous Poster
It's not like C# was "designed" to perform worse with the integer modulo/div combo than C++. It's just a quirk.


I agree. It would be interesting to see if this quirk is common across all versions and implementations of .NET.

- Mike

Share this post


Link to post
Share on other sites
Sign in to follow this  

  • Advertisement
  • Advertisement
  • Popular Tags

  • Advertisement
  • Popular Now

  • Similar Content

    • By Manuel Berger
      Hello fellow devs!
      Once again I started working on an 2D adventure game and right now I'm doing the character-movement/animation. I'm not a big math guy and I was happy about my solution, but soon I realized that it's flawed.
      My player has 5 walking-animations, mirrored for the left side: up, upright, right, downright, down. With the atan2 function I get the angle between player and destination. To get an index from 0 to 4, I divide PI by 5 and see how many times it goes into the player-destination angle.

      In Pseudo-Code:
      angle = atan2(destination.x - player.x, destination.y - player.y) //swapped y and x to get mirrored angle around the y axis
      index = (int) (angle / (PI / 5));
      PlayAnimation(index); //0 = up, 1 = up_right, 2 = right, 3 = down_right, 4 = down

      Besides the fact that when angle is equal to PI it produces an index of 5, this works like a charm. Or at least I thought so at first. When I tested it, I realized that the up and down animation is playing more often than the others, which is pretty logical, since they have double the angle.

      What I'm trying to achieve is something like this, but with equal angles, so that up and down has the same range as all other directions.

      I can't get my head around it. Any suggestions? Is the whole approach doomed?

      Thank you in advance for any input!
       
    • By Yosef BenSadon
      Hi , I was considering this start up http://adshir.com/, for investment and i would like a little bit of feedback on what the developers community think about the technology.
      So far what they have is a demo that runs in real time on a Tablet at over 60FPS, it runs locally on the  integrated GPU of the i7 . They have a 20 000 triangles  dinosaur that looks impressive,  better than anything i saw on a mobile device, with reflections and shadows looking very close to what they would look in the real world. They achieved this thanks to a  new algorithm of a rendering technique called Path tracing/Ray tracing, that  is very demanding and so far it is done mostly for static images.
      From what i checked around there is no real option for real time ray tracing (60 FPS on consumer devices). There was imagination technologies that were supposed to release a chip that supports real time ray tracing, but i did not found they had a product in the market or even if the technology is finished as their last demo  i found was with a PC.  The other one is OTOY with their brigade engine that is still not released and if i understand well is more a cloud solution than in hardware solution .
      Would there  be a sizable  interest in the developers community in having such a product as a plug-in for existing game engines?  How important  is Ray tracing to the  future of high end real time graphics?
    • By bryandalo
      Good day,

      I just wanted to share our casual game that is available for android.

      Description: Fight your way from the ravenous plant monster for survival through flips. The rules are simple, drag and release your phone screen. Improve your skills and show it to your friends with the games quirky ranks. Select an array of characters using the orb you acquire throughout the game.

      Download: https://play.google.com/store/apps/details?id=com.HellmodeGames.FlipEscape&hl=en
       
      Trailer: 
       
    • By khawk
      Watch the latest from Unity.
       
    • By GytisDev
      Hello,
      without going into any details I am looking for any articles or blogs or advice about city building and RTS games in general. I tried to search for these on my own, but would like to see your input also. I want to make a very simple version of a game like Banished or Kingdoms and Castles,  where I would be able to place like two types of buildings, make farms and cut trees for resources while controlling a single worker. I have some problem understanding how these games works in the back-end: how various data can be stored about the map and objects, how grids works, implementing work system (like a little cube (human) walks to a tree and cuts it) and so on. I am also pretty confident in my programming capabilities for such a game. Sorry if I make any mistakes, English is not my native language.
      Thank you in advance.
  • Advertisement