# fast vector class (source) * not so fast

This topic is 4123 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

## Recommended Posts

just wrote a fast vector class using SSE... free to use for anyone that needs it... its only upto SSE2 instructions.. the dot product could be further improved with a SSE3 instruction that sums the values in the array.. i havent found the name of this instruction though.. suggestions for improvement are welcome! EDIT:: changed some stuff... EDIT2:: added some stuff.. fixed crash bugs EDIT3:: seems like people tryin this code gets worse results than regular code... which means that is a not so fast vector class...

//////////////////////////////////////
/// vec3 BY Dragon_Strike, Robert Nagy
/// 2007-03-01
//////////////////////////////////////

#ifndef vec3_H
#define vec3_H

#include <math.h>

static bool UseSSE = true;

__declspec(align(16)) class vec3
{

public:

float x,y,z,w;

vec3(float _x = 0, float _y = 0, float _z = 0, float _w = 1)
{
x = _x; y = _y; z = _z; w = _w;
}

vec3(const vec3 &vec)
{
x = vec.x; y = vec.y; z = vec.z; w = vec.w;
}

~vec3() {}

void Set(float ex, float why, float zee)
{
x = ex; y = why; z = zee;
}

vec3 operator*(float num) const
{
return vec3(x*num, y*num, z*num);
}

vec3 operator*(const vec3 &vec) const
{
return vec3(x*vec.x, y*vec.y, z*vec.z);
}

vec3 operator/(float num) const
{
return vec3(x/num, y/num, z/num);
}

vec3 operator/(const vec3 &vec) const
{
return vec3(x/vec.x, y/vec.y, z/vec.z);
}

vec3 operator+(const vec3 &vec) const
{
return vec3(x+vec.x, y+vec.y, z+vec.z);
}

vec3 operator+(float num) const
{
return vec3(x+num, y+num, z+num);
}

vec3 operator-(const vec3 &vec) const
{
return vec3(x-vec.x, y-vec.y, z-vec.z);
}
vec3 operator-(float num) const
{
return vec3(x-num, y-num, z-num);
}
vec3 operator-() const
{
return vec3(-x, -y, -z);
}

void operator+=(const vec3 &vec)
{
x += vec.x; y += vec.y; z += vec.z;
}
void operator+=(float f)
{
x += f; y += f; z += f;
}

void operator-=(const vec3 &vec)
{
x -= vec.x; y -= vec.y; z -= vec.z;
}
void operator-=(float f)
{
x -= f; y -= f; z -= f;
}

void operator/=(float f)
{
x /= f; y /= f; z /= f;
}

void operator*=(float f)
{
x *= f; y *= f; z *= f;
}

void operator=(const vec3 &vec)
{
x = vec.x ; y = vec.y ; z = vec.z;
}
void operator=(float f)
{
x = f ; y = f ; z = f;
}

bool operator != (const vec3& vec)
{
return ((fabs(x - vec.x) > EPSILON) || (fabs(y - vec.y) > EPSILON) || (fabs(z - vec.z) > EPSILON));
}

bool operator == (const vec3& vec)
{
return !(*this != vec);
}

};

inline float length(const vec3 &vec)
{
float f;
if (UseSSE)
{
__asm {
mov esi, vec
movaps xmm0, [esi]

mulps xmm0, xmm0
movaps xmm1, xmm0
shufps xmm1, xmm1, 1001110b
movaps xmm1, xmm0
shufps xmm1, xmm1, 00010001b
sqrtss xmm0, xmm0
movss f, xmm0
}
}
else
{
f = sqrt(vec.x*vec.x + vec.y*vec.y + vec.z*vec.z);
}

return f;
}

inline float distance(const vec3 &vec1, const vec3 &vec2)
{
return length(vec1-vec2);
}

inline vec3 cross(const vec3 &vec1, const vec3 &vec2)
{
vec3 vec;

if (UseSSE)
{

__asm {

mov esi, vec1
mov edi, vec2
movaps xmm0, [esi]
movaps xmm1, [edi]

movaps xmm2, xmm0
movaps xmm3, xmm1
shufps xmm0, xmm0, 11001001b
shufps xmm1, xmm1, 11010010b
mulps xmm0, xmm1
shufps xmm2, xmm2, 11010010b
shufps xmm3, xmm3, 11001001b
mulps xmm2, xmm3
subps xmm0, xmm2

movaps vec, xmm0
}

}
else
{
vec = vec3(vec1.y * vec2.z - vec1.z * vec2.y,
vec1.z * vec2.x - vec1.x * vec2.z,
vec1.x * vec2.y - vec1.y * vec2.x);

}

return vec;
}

inline float dot(const vec3 &vec1, const vec3 &vec2)
{
return (vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z );
}

inline vec3 normalize(vec3 &vec1)
{
vec3 vec;
if (UseSSE)
{
__asm {
mov esi, vec1
movaps xmm0, [esi]

movaps xmm2, xmm0
mulps xmm0, xmm0
movaps xmm1, xmm0
shufps xmm1, xmm1, 1001110b
movaps xmm1, xmm0
shufps xmm1, xmm1, 00010001b

rsqrtps xmm0, xmm0
mulps xmm2, xmm0
movaps vec, xmm2
}
}
else
{
float l = length(vec1);
if (l > 0)
vec = vec1 / l;
}

return vec;
}

inline vec3 faceforward(const vec3 &N, const vec3 &I, const vec3 &Nref)
{
vec3 vec = N;
if (dot(Nref, I) < 0)
return vec;
else
return -vec;
}

inline bool any(const vec3 &vec)
{
return (vec.x > EPSILON || vec.y > EPSILON || vec.z > EPSILON );
}

inline bool all(const vec3 &vec)
{
return (vec.x > EPSILON && vec.y > EPSILON && vec.z > EPSILON );
}

inline vec3 abs(const vec3 &vec1)
{
vec3 vec;
if (vec1.x < 0)
vec.x = -vec1.x;
if (vec1.y < 0)
vec.y = -vec1.y;
if (vec1.z < 0)
vec.z = -vec1.z;

return vec;
}

#endif


[Edited by - Dragon_Strike on March 3, 2007 8:09:58 PM]

##### Share on other sites
vec3& operator=(const vec3 &vec) {   x = vec.x ; y = vec.y ; z = vec.z;	   return *this;}

##### Share on other sites
Just my 2 cents on a more abstract, but still high performance, data structure:

enum {x, y, z, w};struct vec3{...   float m[3];...   float& operator[](int i)   {       return m;   }   float operator[](int i) const   {       return m;   }

Then you can do stuff like:

vec3 foo;height = foo[y]; // etc...

And RE: the overloaded == function, you may want to take an epsilon into account:

#define EPSILON 0.0001f   friend bool operator != (const vec3& a, const vec3& b)   {      const float epsilon = 0.0001f;      return ((fabs(a[x] - b[x]) > EPSILON) || (fabs(a[y] - b[y]) > EPSILON) || (fabs(a[z] - b[z]) > EPSILON));   }   friend bool operator == (const vec3& a, const vec3& b)   {      return !(a != b); // Do this so you only need to update one function if == or != changes...   }

HTH

##### Share on other sites
The binary operators should be implemented as non-member functions.

I don't know a lot about the x86 vector unit, but would it be faster if the vector elements were stored in the vector registers rather than main memory? That would eliminate copies to and from memory between consecutive operations (though perhaps the optimizer already does this).

##### Share on other sites
Quote:
 Original post by x452AlbaJust my 2 cents on a more abstract, but still high performance, data structure:*** Source Snippet Removed ***

why is taht better?
Quote:
 Then you can do stuff like:*** Source Snippet Removed ***And RE: the overloaded == function, you may want to take an epsilon into account:

i dont quite understand what that would be good for?
Quote:
 *** Source Snippet Removed ***HTH

thx for the suggestions

##### Share on other sites
Quote:
 Original post by JohnBoltonThe binary operators should be implemented as non-member functions.I don't know a lot about the x86 vector unit, but would it be faster if the vector elements were stored in the vector registers rather than main memory? That would eliminate copies to and from memory between consecutive operations (though perhaps the optimizer already does this).

could u explain this a bit more in detail?

##### Share on other sites
Quote:
 Original post by x452AlbaAnd RE: the overloaded == function, you may want to take an epsilon into account

- this goes for the "any(..)" functions too.

##### Share on other sites
I'm a lazy bum so I'll ask: have you benchmarked this against say, the DirectX vectors?

##### Share on other sites
the epsilon code is wrong :P

to compare two floats:
inline bool float_equal( float a, float b ) { return fabsf( a - b ) <= std::max( 1.0f, std::max(a,b) ) * FLT_EPSILON; }

Comparing the individual elements in the vectors against each other probably isn't the best way to compare two vectors. Should probably compare the length and angle between the vectors.

And it's much better to use templates for creating vector classes, then you can have vectors of arbitrary size and only need to code a single class :)

##### Share on other sites
Quote:
 Original post by _swx_the epsilon code is wrong :P

ummmmmmmmmm.... They're both correct, used in the industry, and acceptable. Just that your's is more *mathematically* correct. No big deal. :)

• 37
• 12
• 10
• 10
• 9
• ### Forum Statistics

• Total Topics
631360
• Total Posts
2999554
×