Is there any high performance maths library

Started by
11 comments, last by erwincoumans 16 years, 1 month ago
Quote:Original post by WillC

You might like to check out the open source Sony vectorized math library, which is available to download as part of the Bullet dynamics engine.


I second the Sony vector math libs.

Indy
Advertisement
Hmm, I did a few tests. While the Sony lib was faster, the accuracy looks questionable. I compared with it Ogre maths, which has also SIMD and hacked custom maths code.

Here's my test. Note, I just did some data IO to ensure it doesn't get compiled away in Release mode.

#include <iostream>#include "vectormath/cpp/vectormath_aos.h"#include <Ogre.h>#ifdef _DEBUG	#pragma comment (lib, "OgreMain_d.lib")#else	#pragma comment (lib, "OgreMain.lib")#endif#include <cmath>#include <float.h>#include <fstream>using namespace std;using namespace Vectormath::Aos;ostream & operator << (ostream & os, const Vector3 & v){	return os << v.getX () << "," << v.getY () << "," << v.getZ ();}#include <windows.h>/// Create a Timer, which will immediately begin counting/// up from 0.0 seconds./// You can call reset() to make it start over.class Timer{public:	Timer()	{		reset();	}	/// reset() makes the timer start over counting from 0.0 seconds.	void reset()	{		unsigned __int64 pf;		QueryPerformanceFrequency( (LARGE_INTEGER *)&pf );		freq_ = 1.0 / (double)pf;		QueryPerformanceCounter( (LARGE_INTEGER *)&baseTime_ );	}	/// seconds() returns the number of seconds (to very high resolution)	/// elapsed since the timer was last created or reset().	double seconds()	{		unsigned __int64 val;		QueryPerformanceCounter( (LARGE_INTEGER *)&val );		return (val - baseTime_) * freq_;	}	/// seconds() returns the number of milliseconds (to very high resolution)	/// elapsed since the timer was last created or reset().	double milliseconds()	{		return seconds() * 1000.0;	}private:	double freq_;	unsigned __int64 baseTime_;};struct MyVec{	union	{		struct		{			float x,y,z;		};		float data [3];	};	MyVec () : x(0), y(0), z(0)	{}	MyVec (float x, float y, float z)	 : x (x), y (y), z (z)	{}	MyVec & operator = (const MyVec & v)	{		memcpy (data, v.data, sizeof (float) * 3);		return *this;	}};ostream & operator << (ostream & os, const MyVec & v){	return os << v.x << "," << v.y << "," << v.z;}MyVec MyCross (const MyVec & v1, const MyVec & v2){	return (MyVec (v1.y * v2.z - v1.z * v2.y				  ,v1.z * v2.x - v1.x * v2.z				  ,v1.x * v2.y - v1.y * v2.x));}MyVec MyNormalize (const MyVec & v){	MyVec result = v;	const float distance = sqrt (v.x*v.x + v.y*v.y + v.z*v.z);	if (FLT_EPSILON < distance)	{		result.x /= distance;		result.y /= distance;		result.z /= distance;	}	return result;}int main (){	Timer timer;	const int  max_iteration = 10000000;	// this causes integer overflow in new in VS2005?	//	const int  max_iteration = 100000000;	{		double startTime = timer.seconds ();		Vectormath::Aos::Vector3 *array = new Vectormath::Aos::Vector3 [max_iteration];		Vectormath::Aos::Vector3 result;		for (int i = 0; i < max_iteration ;++i)		{			Vectormath::Aos::Vector3 v1 (i,2,3);			Vectormath::Aos::Vector3 v2 (i,3,4);			Vectormath::Aos::Vector3 v3 = cross (v1, v2);			Vectormath::Aos::Vector3 v4 = normalize (v3);			result = v4;			array  = result;		}		double endTime = timer.seconds ();		ofstream ofs ("1.txt");		for (int i = 0; i < 1000; ++i)		{			ofs << array  << endl;		}		delete [] array;		cout << result << endl;		cout << "Time taken is for SIMD is " << endTime - startTime << endl;	}	{		double startTime = timer.seconds ();		MyVec *array = new MyVec [max_iteration];		MyVec result;		for (int i = 0; i < max_iteration ;++i)		{			MyVec v1 (i,2,3);			MyVec v2 (i,3,4);			MyVec v3 = MyCross (v1, v2);			MyVec v4 = MyNormalize (v3);			result = v4;			array  = result;		}		double endTime = timer.seconds ();		ofstream ofs ("2.txt");		for (int i = 0; i < 1000; ++i)		{			ofs << array  << endl;		}		delete [] array;		cout << result << endl;		cout << "Time taken is for MyVec is " << endTime - startTime << endl;	}	{		double startTime = timer.seconds ();		Ogre::Vector3 *array = new Ogre::Vector3 [max_iteration];		Ogre::Vector3 result;		for (int i = 0; i < max_iteration ;++i)		{			Ogre::Vector3 v1 (i,2,3);			Ogre::Vector3 v2 (i,3,4);			Ogre::Vector3 v3 = v1.crossProduct (v2);			Ogre::Vector3 v4 = v3.normalisedCopy ();			result = v4;			array  = result;		}		double endTime = timer.seconds ();		ofstream ofs ("3.txt");		for (int i = 0; i < 1000; ++i)		{			ofs << array  << endl;		}		delete [] array;		cout << result << endl;		cout << "Time taken is for MyVec is " << endTime - startTime << endl;	}	return 0;}
You can add a Newton Rapson iteration to provide more precision when using _mm_rsqrt_ps. Note that this only applies to the SSE version of the Sony Vectormath library. The Playstation 3 Cell SPU and PPU versions should be fine.

It has been improved, so the SSE version of Sony Vectormath in Bullet 2.67 will have higher precision, at the cost of some performance. There is still the normalizeApprox for the faster version. You can download the SSE fix here:

http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1551

Thanks for the feedback, hope this helps,
Erwin
http://bulletphysics.com

[Edited by - erwincoumans on February 28, 2008 1:12:02 PM]

This topic is closed to new replies.

Advertisement