fast vector class (source) * not so fast

Started by
78 comments, last by Abominacion 17 years, 1 month ago
just wrote a fast vector class using SSE... free to use for anyone that needs it... its only upto SSE2 instructions.. the dot product could be further improved with a SSE3 instruction that sums the values in the array.. i havent found the name of this instruction though.. suggestions for improvement are welcome! EDIT:: changed some stuff... EDIT2:: added some stuff.. fixed crash bugs EDIT3:: seems like people tryin this code gets worse results than regular code... which means that is a not so fast vector class...


//////////////////////////////////////
/// vec3 BY Dragon_Strike, Robert Nagy
/// 2007-03-01 
//////////////////////////////////////


#ifndef vec3_H
#define vec3_H

#include <math.h>

static bool UseSSE = true;

__declspec(align(16)) class vec3
{
	
	public:

		float x,y,z,w;		

  		vec3(float _x = 0, float _y = 0, float _z = 0, float _w = 1)
		{
			x = _x; y = _y; z = _z; w = _w;
		}

		vec3(const vec3 &vec)
		{
			x = vec.x; y = vec.y; z = vec.z; w = vec.w;
		}

		~vec3() {}

		void Set(float ex, float why, float zee)
		{
			x = ex; y = why; z = zee;
		}

		vec3 operator*(float num) const
		{
			return vec3(x*num, y*num, z*num);	
		}

		vec3 operator*(const vec3 &vec) const
		{
			return vec3(x*vec.x, y*vec.y, z*vec.z);		
		}

		vec3 operator/(float num) const
		{
			return vec3(x/num, y/num, z/num);	
		}

		vec3 operator/(const vec3 &vec) const
		{
			return vec3(x/vec.x, y/vec.y, z/vec.z);		
		}

		vec3 operator+(const vec3 &vec) const
		{
			return vec3(x+vec.x, y+vec.y, z+vec.z);		
		}

		vec3 operator+(float num) const
		{
			return vec3(x+num, y+num, z+num);	
		}

		vec3 operator-(const vec3 &vec) const
		{
			return vec3(x-vec.x, y-vec.y, z-vec.z);		
		}
		vec3 operator-(float num) const
		{
			return vec3(x-num, y-num, z-num);	
		}
		vec3 operator-() const
		{
			return vec3(-x, -y, -z);	
		}

		void operator+=(const vec3 &vec)
		{
			x += vec.x; y += vec.y; z += vec.z;	
		}
		void operator+=(float f)
		{
			x += f; y += f; z += f;	
		}

		void operator-=(const vec3 &vec)
		{
			x -= vec.x; y -= vec.y; z -= vec.z;	
		}
		void operator-=(float f)
		{
			x -= f; y -= f; z -= f;	
		}

		void operator/=(float f)
		{
			x /= f; y /= f; z /= f;	
		}

		void operator*=(float f)
		{
			x *= f; y *= f; z *= f;	
		}

		void operator=(const vec3 &vec) 
		{
			x = vec.x ; y = vec.y ; z = vec.z;	
		}
		void operator=(float f) 
		{
			x = f ; y = f ; z = f;	
		}

		bool operator != (const vec3& vec)
		{
			return ((fabs(x - vec.x) > EPSILON) || (fabs(y - vec.y) > EPSILON) || (fabs(z - vec.z) > EPSILON));
		}

		bool operator == (const vec3& vec)
		{
			return !(*this != vec);
		}

};

inline float length(const vec3 &vec)
{
	float f;
	if (UseSSE)
	{
		__asm {
			mov esi, vec
			movaps xmm0, [esi]		

			mulps xmm0, xmm0
			movaps xmm1, xmm0
			shufps xmm1, xmm1, 1001110b
			addps xmm0, xmm1
			movaps xmm1, xmm0
			shufps xmm1, xmm1, 00010001b
			addps xmm0, xmm1
			sqrtss xmm0, xmm0			
			movss f, xmm0
		}
	}
	else
	{
		f = sqrt(vec.x*vec.x + vec.y*vec.y + vec.z*vec.z);
	}

	return f;
}

inline float distance(const vec3 &vec1, const vec3 &vec2)
{
	return length(vec1-vec2);
}

inline vec3 cross(const vec3 &vec1, const vec3 &vec2)
{
	vec3 vec;

	if (UseSSE)
	{
	
		__asm {

			mov esi, vec1
			mov edi, vec2
			movaps xmm0, [esi]
			movaps xmm1, [edi]

			movaps xmm2, xmm0
			movaps xmm3, xmm1
			shufps xmm0, xmm0, 11001001b
			shufps xmm1, xmm1, 11010010b
			mulps xmm0, xmm1
			shufps xmm2, xmm2, 11010010b
			shufps xmm3, xmm3, 11001001b
			mulps xmm2, xmm3
			subps xmm0, xmm2

			movaps vec, xmm0
		}
		
	}
	else
	{
		vec = vec3(vec1.y * vec2.z - vec1.z * vec2.y, 
				   vec1.z * vec2.x - vec1.x * vec2.z, 
				   vec1.x * vec2.y - vec1.y * vec2.x);

	}

	return vec;	
}

inline float dot(const vec3 &vec1, const vec3 &vec2)
{
	return (vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z );
}

inline vec3 normalize(vec3 &vec1)
{
	vec3 vec;
	if (UseSSE)
	{
		__asm {
			mov esi, vec1
			movaps xmm0, [esi]

			movaps xmm2, xmm0
			mulps xmm0, xmm0
			movaps xmm1, xmm0
			shufps xmm1, xmm1, 1001110b
			addps xmm0, xmm1
			movaps xmm1, xmm0
			shufps xmm1, xmm1, 00010001b
			addps xmm0, xmm1

			rsqrtps xmm0, xmm0
			mulps xmm2, xmm0
			movaps vec, xmm2
		}	
	}
	else
	{
		float l = length(vec1);
		if (l > 0)		
			vec = vec1 / l;
	}

	return vec;
}

inline vec3 faceforward(const vec3 &N, const vec3 &I, const vec3 &Nref)
{
	vec3 vec = N;
	if (dot(Nref, I) < 0)
		return vec;
	else
		return -vec;
}

inline bool any(const vec3 &vec)
{
	return (vec.x > EPSILON || vec.y > EPSILON || vec.z > EPSILON );
}

inline bool all(const vec3 &vec)
{
	return (vec.x > EPSILON && vec.y > EPSILON && vec.z > EPSILON );
}

inline vec3 abs(const vec3 &vec1)
{
	vec3 vec;
	if (vec1.x < 0)
		vec.x = -vec1.x;
	if (vec1.y < 0)
		vec.y = -vec1.y;
	if (vec1.z < 0)
		vec.z = -vec1.z;

	return vec;
}

#endif







[Edited by - Dragon_Strike on March 3, 2007 8:09:58 PM]
Advertisement
vec3& operator=(const vec3 &vec) {   x = vec.x ; y = vec.y ; z = vec.z;	   return *this;}
Just my 2 cents on a more abstract, but still high performance, data structure:

enum {x, y, z, w};struct vec3{...   float m[3];...   float& operator[](int i)   {       return m;   }   float operator[](int i) const   {       return m;   }


Then you can do stuff like:

vec3 foo;height = foo[y]; // etc...


And RE: the overloaded == function, you may want to take an epsilon into account:

#define EPSILON 0.0001f   friend bool operator != (const vec3& a, const vec3& b)   {      const float epsilon = 0.0001f;      return ((fabs(a[x] - b[x]) > EPSILON) || (fabs(a[y] - b[y]) > EPSILON) || (fabs(a[z] - b[z]) > EPSILON));   }   friend bool operator == (const vec3& a, const vec3& b)   {      return !(a != b); // Do this so you only need to update one function if == or != changes...   }


HTH
The binary operators should be implemented as non-member functions.

I don't know a lot about the x86 vector unit, but would it be faster if the vector elements were stored in the vector registers rather than main memory? That would eliminate copies to and from memory between consecutive operations (though perhaps the optimizer already does this).
John BoltonLocomotive Games (THQ)Current Project: Destroy All Humans (Wii). IN STORES NOW!
Quote:Original post by x452Alba
Just my 2 cents on a more abstract, but still high performance, data structure:

*** Source Snippet Removed ***

why is taht better?
Quote:
Then you can do stuff like:

*** Source Snippet Removed ***

And RE: the overloaded == function, you may want to take an epsilon into account:

i dont quite understand what that would be good for?
Quote:
*** Source Snippet Removed ***

HTH


thx for the suggestions
Quote:Original post by JohnBolton
The binary operators should be implemented as non-member functions.

I don't know a lot about the x86 vector unit, but would it be faster if the vector elements were stored in the vector registers rather than main memory? That would eliminate copies to and from memory between consecutive operations (though perhaps the optimizer already does this).


could u explain this a bit more in detail?
Quote:Original post by x452Alba
And RE: the overloaded == function, you may want to take an epsilon into account


- this goes for the "any(..)" functions too.

I'm a lazy bum so I'll ask: have you benchmarked this against say, the DirectX vectors?
the epsilon code is wrong :P

to compare two floats:
inline bool float_equal( float a, float b ) { return fabsf( a - b ) <= std::max( 1.0f, std::max(a,b) ) * FLT_EPSILON; } 


Comparing the individual elements in the vectors against each other probably isn't the best way to compare two vectors. Should probably compare the length and angle between the vectors.

And it's much better to use templates for creating vector classes, then you can have vectors of arbitrary size and only need to code a single class :)
Quote:
Original post by _swx_
the epsilon code is wrong :P


ummmmmmmmmm.... They're both correct, used in the industry, and acceptable. Just that your's is more *mathematically* correct. No big deal. :)

This topic is closed to new replies.

Advertisement