Vector\Matrix library design help...

Started by
18 comments, last by Zahlman 17 years, 1 month ago
Quote:Original post by sirGustav
what's wrong with writing your own vector library?


There's nothing wrong with that. However, it takes time, and when you've written it four times for four different languages, plus an additional two for learning purposes, it definitely starts to get old.

My free time is limited (two Master's Doctorates plus an additional part-time job, and lots of people I care about that I see daily), and the three hours and a half of development time that I get weekly are too precious to me to be spent on anything other than interesting actions. That might be because I'm jaded, though.
Advertisement
Quote:Original post by Timkin
Zahlman, what would be wrong with using std::valarray<> over boost::array? I.e., what does the latter give that makes up for the additional dependency on the boost library at compile time?


Well, the name is ugly ('value' doesn't add extra information, and then on top of that, they abbreviate it?), but otherwise it should be fine. I just keep forgetting it exists, is all :)
OMG I've just asked if I should do a specific sized mathematical Vector\Matrix or a general one, I did mean for OpenGL, which only uses mostly 3d vectors and 4d matrices... I answered myself after a while...

U've been talking about performance and space and all, now why the hell would I need the STL vector class when my data structures are STATIC not DYNAMIC... vector is a dynamic structure like a list and all of those that are related to it...

Think about it, whats faster?
Accesing an array or accesing a list?

GLTVector.h:

#ifndef LG_VECTOR_H#define LG_VECTOR_H#ifdef _cplusplusnamespace glUtillities {#endif // _cplusplus	typedef GLfloat  vector3f[3];#ifdef _cplusplus	class GLTVector3f {	public:		GLTVector3f(GLfloat a = .0f, GLfloat b = .0f, GLfloat c = .0f);		GLTVector3f& operator+=(GLTVector3f vec);		GLTVector3f& operator+=(vector3f vec);		GLTVector3f& operator+=(GLfloat scal);		GLTVector3f& operator-=(GLTVector3f vec);		GLTVector3f& operator-=(vector3f vec);		GLTVector3f& operator-=(GLfloat scal);		bool operator==(GLTVector3f vec);		bool operator!=(GLTVector3f vec);		bool operator==(vector3f vec);		bool operator!=(vector3f vec);	private:		vector3f m_vec;	};	extern "C" {#endif // _cplusplus		/*		 * Add, Substruct and Multiply a vec by a scalar, the result will be updated in vec.		 */		extern void gltAddScalarToVector(vector3f vec, GLfloat scal);		extern void gltSubScalarOfVector(vector3f vec, GLfloat scal);		extern void gltScalarProductVectors(vector3f vec, GLfloat scal);		/*		 * Add, Substruct vec1 and vec2 and insert the result in vecOut.		 */		extern void gltAddVectors(vector3f vec1, vector3f vec2, vector3f vecOut);		extern void gltSubVectors(vector3f vec1, vector3f vec2, vector3f vecOut);		/*		 * Returns the length of vec.		 */		extern GLfloat gltVectorLength(vector3f vec);		/*		 * Returns the dot product of vec1 and vec2.		 */		extern GLfloat gltDotProductVector(vector3f vec1, vector3f vec2);		/*		 * Do a cross product of vec1 and vec2 and store it in vecOut.		 */		extern void	gltCrossProductVector(vector3f vec1, vector3f vec2, vector3f vecOut);		/*		 * Normalize vec and store the result in it as well.		 */		extern void gltNormalizeVector(vector3f vec);		/*		 * Get the normal vector of a plane which contains the points vec1, vec2 and vec3		 * when they do not form a line and are in CCW winding and store the result in vecOut.		 */		extern void gltGetNormalVector(vector3f vec1, vector3f vec2, vector3f vec3, vector3f vecOut);		/*		 * See if vec1 equals to vec2, if so return GL_TRUE, otherwise, GL_FALSE		 */		extern GLboolean gltVectorsEqual(vector3f vec1, vector3f vec2);#ifdef _cplusplus	}  // extern "C"}	   // namespace glUtillities#endif // _cplusplus#endif // LG_VECTOR_H


GLTVector.c:

#include <windows.h>#include <GL\gl.h>#include <math.h>#include "GLTVector.h"void gltAddScalarToVector(vector3f vec, GLfloat scal) {	GLint i;	for(i=0; i<3; ++i)		vec += scal;}void gltSubScalarOfVector(vector3f vec, GLfloat scal) {	GLint i;	for(i=0; i<3; ++i)		vec -= scal;}void gltScalarProductVectors(vector3f vec, GLfloat scal) {	GLint i;	for(i=0; i<3; ++i)		vec *= scal;}void gltAddVectors(vector3f vec1, vector3f vec2, vector3f vecOut) {	GLint i;	for(i=0; i<3; ++i)		vecOut = vec1 * vec2;}void gltSubVectors(vector3f vec1, vector3f vec2, vector3f vecOut) {	GLint i;	for(i=0; i<3; ++i)		vecOut = vec1 - vec2;}GLfloat gltVectorLength(vector3f vec) {	GLfloat length = 0;	GLint	i;	for(i=0; i<3; ++i)		length += (vec * vec);	return (GLfloat)sqrt(length);}GLfloat gltDotProductVector(vector3f vec1, vector3f vec2) {	GLfloat prod = 0;	GLint	i;	for(i=0; i<3; ++i)		prod += (vec1 * vec2);	return prod;}void gltCrossProductVector(vector3f vec1, vector3f vec2, vector3f vecOut) {	vecOut[0] = (vec1[1] * vec2[2]) - (vec1[2] * vec2[1]);	vecOut[1] = (vec1[2] * vec2[0]) - (vec1[0] * vec2[2]);	vecOut[2] = (vec1[0] * vec2[1]) - (vec1[1] * vec2[0]);}void gltNormalizeVector(vector3f vec) {	GLint i;	for(i=0; i<3; ++i)		vec /= gltVectorLength(vec);}void gltGetNormalVector(vector3f vec1, vector3f vec2, vector3f vec3, vector3f vecOut) {	vector3f v1, v2;	gltSubVectors(vec2, vec1, v1);	gltSubVectors(vec3, vec1, v2);	gltCrossProductVector(v1, v2, vecOut);	gltNormalizeVector(vecOut);}GLboolean gltVectorsEqual(vector3f vec1, vector3f vec2) {	GLint i;	for(i=0; i<3; ++i)		if(vec1 != vec2)			return GL_FALSE;	return GL_TRUE;}


GLTVector.cpp:

#include <windows.h>#include <GL\gl.h>#include <math.h>#include "GLTVector.h"using namespace glUtillities;GLTVector3f::GLTVector3f(GLfloat a, GLfloat b, GLfloat c) {	m_vec[0] = a;	m_vec[1] = b;	m_vec[2] = c;}GLTVector3f& GLTVector3f::operator+=(GLTVector3f vec) {	vector3f	temp;	GLTVector3f tmpObj;	gltAddVectors(m_vec, vec.m_vec, temp);	memcpy(tmpObj.m_vec, temp, sizeof(vector3f));	return tmpObj;}GLTVector3f& GLTVector3f::operator+=(vector3f vec) {	vector3f temp;	GLTVector3f tmpObj;	gltAddVectors(m_vec, m_vec, temp);	memcpy(tmpObj.m_vec, temp, sizeof(vector3f));	return tmpObj;}GLTVector3f& GLTVector3f::operator+=(GLfloat scal) {	gltAddScalarToVector(m_vec, scal);	return *this;}GLTVector3f& GLTVector3f::operator-=(GLTVector3f vec) {	vector3f temp;	GLTVector3f tmpObj;	gltSubVectors(m_vec, vec.m_vec, temp);	memcpy(tmpObj.m_vec, temp, sizeof(vector3f));	return tmpObj;}GLTVector3f& GLTVector3f::operator-=(vector3f vec) {	vector3f temp;	GLTVector3f tmpObj;	gltSubVectors(m_vec, vec, temp);	memcpy(tmpObj.m_vec, temp, sizeof(vector3f));	return tmpObj;}GLTVector3f& GLTVector3f::operator-=(GLfloat scal) {	gltSubScalarOfVector(m_vec, scal);	return *this;}bool GLTVector3f::operator==(GLTVector3f vec) {	GLboolean ret = gltVectorsEqual(m_vec, vec.m_vec);	if(ret == GL_TRUE)		return true;	return false;}bool GLTVector3f::operator!=(GLTVector3f vec) {	return !(*this == vec);}bool GLTVector3f::operator==(vector3f vec) {	GLboolean ret = gltVectorsEqual(m_vec, vec);	if(ret == GL_TRUE)		return true;	return false;}bool GLTVector3f::operator!=(vector3f vec) {	return !(*this == vec);}
Quote:Original post by Fimbulvetr
U've been talking about performance and space and all, now why the hell would I need the STL vector class when my data structures are STATIC not DYNAMIC... vector is a dynamic structure like a list and all of those that are related to it...

Think about it, whats faster?
Accesing an array or accesing a list?


I'd say, Zahlman has already answered that question before you even asked it. You probably missed the part where he mentioned boost::array, too.
Whether you find a mathematical vector library which suits you, or decide to roll your own, I'd suggest sticking to 4-component vectors and 4x4 or 4x3 matrices. If you do this at the start, it's much easier to switch to a SIMD version later. For example SSE2 on PC and Xbox.

It means your code has to decide what to set the W value to, but better to handle that up front than work out what to do in hundreds of places later. These days I think I'd only use 3-vectors for space-critical storage, and load the value into a 4-vector for manipulation.

Footnote: I don't have a strong opinion about all the boost suggestions posted earlier - if you find it convenient to derive from, embed, or just plain use a {std,boost}::{val}array<float, 4> (or whatever) feel free. I must admit haven't found many situations where I've wanted to refer to a vector's components by index (0..3) as opposed to by name (x..w) though.

(Obviously if/when you do go the SIMD route, you'll likely be using a single language-extension-provided 4-vector member, e.g. __m128.)

Cheers,

Will
Hey All,

I'm just doing the design for my Math classes at the moment and have used almost the exact same method as Fimbulvetr, but stuck with the bog standard .x, .y etc.

My question is that i noticed while scanning the DirectX math header files that they use a memcpy to clear or copy a matrix. Is this a great deal faster than say m11 = in.m11?

Cheers
Just FYI you're code is very inefficient and slow:

GLTVector3f& GLTVector3f::operator+=(GLTVector3f vec) {
vector3f temp;
GLTVector3f tmpObj;

gltAddVectors(m_vec, vec.m_vec, temp);

memcpy(tmpObj.m_vec, temp, sizeof(vector3f));

return tmpObj;
}

Total waste, that memcpy and function call are SLOW compared to just doing the math in this function.

Also, why the glt before everything? Get rid of the GLFloat, it's just a typedef too. Make it generic so you can use Direct3D later too. There is no real difference between them. This way you only have to make this thing once, not a few dozen times.

If you want, make a template class and make life even easier.
Personally, I've only found the need for 2/3/4 vectors so far, but I've heard that 5 vectors have some use in physics. My own view is to have an optimized class for each of the common cases - 2/3/4 - and 5 if you're using them alot for physics. Then I'd have a generic, templatized version handy just in case you ever need a vector of another size.

I'll also agree with with what someone said above: Not every operation should rightly be a member function.

Also, you can make your "class" in a dual C / C++ manner using "ifdef __cplusplus", here's an exerpt from a 16bit color class using this technique:
// Shift values for building and reading rgb16 componentsstatic const DWORD RGB16_SHIFT_R = 11;static const DWORD RGB16_SHIFT_G = 5;static const DWORD RGB16_SHIFT_B = 0;// Forward declare the rgb16 color structstruct rgb16;/***************************************************************************************************** Declare the C API to the rgb16 color struct *****************************************************************************************************/WORD rgb16_build(const BYTE r, const BYTE g, const BYTE b);// ... more function declarations ...struct rgb16{#ifdef __cplusplus	rgb16()						        { }	rgb16(WORD _rgb)                    { RGB = _rgb; }	rgb16(const BYTE _r, const BYTE _g, const BYTE _b)	                                    { RGB = rgb16_build(_r, _g, _b); }	// Build color	static WORD Build(const BYTE r, const BYTE g, const BYTE b)	                                    { return rgb16_build(r, g, b); }		// ... more methods ...#endif	WORD RGB;};inline WORD rgb16_build(const BYTE r, const BYTE g, const BYTE b){	return ((WORD)r << RGB16_SHIFT_R) | ((WORD)g << RGB16_SHIFT_G) | ((WORD)b << RGB16_SHIFT_B);}


With inlining, this code generates optimal assembly language regardless of whether the C or C++ api is used (at least on Microsoft's compilers.) The constuctors and member functions (which simply call the C API functions) are conditionally compiled when the __CPLUSPLUS macro has been defined (Note: other pre-processors may use a different macro for this, I'm not sure.) and compiling as vanilla C will only create the C API functions.

Back to the vectors/matrices, the most performant approach is expression templates + SSE2 assembly (or SSE2 intrinsics, if you're using Intel's compiler.) but expression templates are a fairly advanced subject.

throw table_exception("(? ???)? ? ???");

The cplusplus thing is also pointless unless you're using a very old compiler. Which case it's time to upgrade anyways as it will be out of date. You don't need C compatible code and if you do, wow... C isn't a horrid language but C++ is SO much better.
Quote:Original post by Fimbulvetr
U've been talking about performance and space and all,


Because you're the one who seems to be concerned about it and I wanted to make sure you didn't make any wrong assumptions about the overhead of standard library components.

Quote:now why the hell would I need the STL vector class when my data structures are STATIC not DYNAMIC... vector is a dynamic structure like a list and all of those that are related to it...


Well yes, it's dynamic in the sense that it can be resized, which property a list also has. But it's not a list, it's an array. An array which can be resized. There is a list class, too, for those situations when you really need the properties of a list (such as: nodes don't get moved around in memory; need to splice sublists; need frequent insertion not at the end). It's called - big surprise! - std::list.

Quote:Think about it, whats faster?
Accesing an array or accesing a list?


1) Depends what you mean by "access".
2) As mentioned, std::vector isn't a list anyway. I asked if you knew the overhead; you evidently don't, so I will explain how a typical std::vector implementation works in detail.

Of course, if you don't need the resizing, then by all means, use a container that isn't resizable and which takes advantage of that to trim down memory even more. I already mentioned one - boost::array - and Timkin mentioned another: std::valarray.



The basics of a std::vector internally look something like this (with LOTS of useful stuff removed - NONE of which costs you any extra space or performance, because of how classes are implemented in C++):

namespace fake_std {  // the REAL std::vector lets you replace the way memory allocation is done,  // too. You can base it off of malloc()/free() if you want, or make it take  // memory out of a specific pool... the Boost library again has pre-written  // "pool allocators" that you can plug in to the standard library containers.  template <typename T>  class vector {    T* allocation; // The data.    size_t size; // the number of elements currently "in use".    size_t capacity; // the size of the allocated chunk of memory.    // Total constant overhead, 12 bytes, on typical systems.    // Proportional overhead varies. The vector implements an optimization    // whereby, when the vector needs to resize, the allocation is basically    // increased by some multiple, rather than by 1 or by a constant amount.     // The multiple is typically either 1.5 or 2, but other values are known     // (I will use 2, because it's marginally easier to code).    // The net result is a proportional overhead that is at most 100% (i.e.    // doubling the actual needed space). This is a reason for using something    // like boost::array etc. when it's appropriate ;) But on average, it will    // be more like 50% - probably less, because short vectors are more common    // than long vectors, and also because you can explicitly resize vectors    // when you know how much you need, which avoids the wasted space.    // There is also a standard technique for "trimming" a vector if you don't    // know how much space you'll need, but you will know when you're done    // filling it:    // std::vector<T>(current).swap(current);    // The vector normally implements an optimization whereby only the    // elements up to 'size' get initialized (i.e. get their constructors    // called). This is something you can't do with a plain old new[] array    // allocation. Instead, it manually calls constructors and destructors    // "in place" in a "raw" chunk of memory. We'll want some helper functions    // to implement that...    void construct(int pos) {      new (allocation + pos) T(); // "placement new"    }        void copy(T* src, T* dst) {      for (int i = 0; i < size; ++i) {        new (dst + i) T(src);      }    }    void cleanup(T* array) {      for (int i = 0; i < size; ++i) {        array.~T(); // explicitly invokes the destructor.      } // The arrays that will be used always have size-many *initialized*      // elements, even if they are a bigger *allocation*.      // This is something you need to do with placement new, which doesn't      // itself allocate anything - the vector has separated the processes of      // memory allocation/deallocation and object initialization/cleanup.    }    void realloc(int new_capacity, bool preserve) {      T* old = allocation;      allocation = reinterpret_cast<T*>(new char[new_capacity * sizeof(T)]);      if (preserve) { copy(old, allocation); }      cleanup(old);      capacity = new_capacity;    }    public:    vector(int amount) : size(amount) { realloc(size, true); }    vector(const vector& other) : size(other.size) {      realloc(size, false);      copy(other.allocation, allocation);    }    vector& operator=(const vector& rhs) {      vector other(rhs);      swap(other);      return *this;    }    void swap(const vector& other) {      std::swap(allocation, other.allocation);      std::swap(size, other.size);      std::swap(capacity, other.capacity);    }    ~vector() { cleanup(allocation); }    // OK, how about indexing?    T& operator[](size_t index) { return allocation[index]; }    const T& operator[](size_t index) const { return allocation[index]; }    // See? *It's just an array*.    // Your compiler can easily inline the function call, and also cache the    // 'allocation' pointer if you repeatedly index in a loop (both of these    // are child's play when it comes to compiler optimizations; I can tell you    // this because I actually took a course on the stuff in university).    // It will perform just as fast as using a plain old array. Really.    // OK, now how about appending something, the infamous push_back that lets    // us resize the contents implicitly if necessary?    void push_back(const T& to_push) {      if (size == capacity) {        realloc((capacity ? 2 * capacity : 1), true);      }      construct(size);      allocation[size++] = to_push;    } // Again pretty simple.  }}


I probably made some mistakes in there, because I didn't test anything or refer to the standard, and was also deliberately leaving out huge chunks of the interface to show only the most basic principles of it. But you get the idea. There's no list here. There's an array, which automatically resizes when needed (by a push_back or insert, or by an explicit resize or reserve), and cleans up after its own memory allocation. And also (normally - i.e. if your compiler isn't completely brain-dead and/or ancient) implements at least two clever optimizations that most programmers probably wouldn't ever even think of, let alone know how to implement. (Again, I might well have gotten them subtly wrong ;) )

Of course, if you want real details of a real implementation, they're probably available to you. Normally the implementation of standard library stuff is contained in actual source code files on your hard drive; you should be able to find them fairly easily. (Understanding them is another matter. They tend to use really stupid variable names.)

This topic is closed to new replies.

Advertisement