3D Vector class using template expressions : need help !

Started by
13 comments, last by Bakura 16 years, 11 months ago
Hi everyone !

First of all, thanks for all your answers !

the_edd > The source code is on the first message !

Julian90 > Ok, I've put your code, now the result is much better with Visual, but not the same as GCC :/.

Vec3 <double> A (1.0, 1.0, 1.0);Vec3 <double> B (2.0, 2.0, 2.0);Vec3 <double> C (3.0, 3.0, 3.0);maStructure.Start ();for (size_t i = 0 ; i != 10000000 ; ++i)   A = (B+A);maStructure.End ();


With Visual : 0.103777
With GCC : 0.0663422

And with that code :

Trial maStructure;srand(time(0));Vec3 <double> A ((double)rand(), (double)rand(), (double)rand());Vec3 <double> B ((double)rand(), (double)rand(), (double)rand());Vec3 <double> C ((double)rand(), (double)rand(), (double)rand());maStructure.Start ();for (size_t i = 0 ; i != 10000000 ; ++i)   A = (B+A);maStructure.End ();


GCC : 0.0642897
Visual : 0.0996674

It's still slightly faster on GCC, don't know why, and don't know why also why on Visual it is faster with values get with rand...


Extrarius >

for (size_t i = 0; i != 10000000; ++i)    {        // Select random values from the array to prevent the compiler        // performing any tricks based on how we loop over the values        // since we will be adding each thing numerous times        // (you may want to vary this to reflect how you would iterate        // over an array in a real application)        int index = rand() % size;        A[index] = A[index] + B[index] + C[index] + A[index];        B[index] = A[index] + B[index] + C[index] + B[index];        C[index] = A[index] + B[index] + C[index] + C[index];    }


It's useless to do that I think, because the goal of expression object is to avoid many and many temporary objects for expression like A = B+C+D, if you add them by an index, you lose the attraction of expression templates :/.

Thanks everyone :).
Advertisement
Hi :)

I spent the afternoon the code again my class, but this time in a much more light manner ! And, guess why, it's incredibly fast, even on GCC, and rox the naïve manner ;).

Thanks for everyone, I'm gonna continue coding :p.
Hi !

In fact, it's not better. If I read some tutorials, they all said that expression templates have to reach the same level of performance that hand rolled C loop, for exemple :

a = b+c+a with ET should have the same performance (or around), that :

for (size_t i = 0 ; i != 3 ; ++i)
a = b+c+a

But it's really not the case, and I really don't know why. Here is the code.

Class Vec3 (only one operator is overloaded, to keep it simple) :

#ifndef VEC3_HPP#define VEC3_HPP#include <iostream>#include "MathOps.hpp"class Vec3{	// Surcharge de l'opérateur <<	friend std::ostream & operator<< (std::ostream &, const Vec3 &);	public:		// Constructeurs / Destructeur		inline Vec3 ();		inline Vec3 (const Vec3 &);		inline Vec3 (const float, const float, const float);		inline ~Vec3 ();		// Surcharge de l'opérateur [] en écriture et en lecture		inline float operator[] (const size_t) const;		inline float & operator[] (const size_t);		// Surcharge de l'opérateur = avec un autre objet Vec3		inline Vec3 & operator= (const Vec3 &);		// Surcharge de l'opérateur = avec une expression template		template <typename Expr>		inline Vec3 & operator= (const Expr &);	private:		float val [3];};// Constructeur par défaut, ne fait rieninline Vec3::Vec3 (){}// Constructeur de copieinline Vec3::Vec3 (const Vec3 & vec){// Change by val[0] = vec[0], val[1]=vec[1], val[2]=vec[2] doesn't change// anything	for (size_t i = 0 ; i != 3 ; ++i)	   val = vec;}// Constructeur prenant trois entiers flottantsinline Vec3::Vec3 (const float x, const float y, const float z){	val[0] = x;	val[1] = y;	val[2] = z;}// Destructeurinline Vec3::~Vec3 (){}// Surcharge de l'opérateur [] en lectureinline float Vec3::operator[] (const size_t idx) const{	return val[idx];}// Surcharge de l'opérateur [] en écritureinline float & Vec3::operator[] (const size_t idx){	return val[idx];}// Surcharge de l'opérateur = avec un autre objet Vec3inline Vec3 & Vec3::operator= (const Vec3 & vec){// Change by val[0] = vec[0], val[1]=vec[1], val[2]=vec[2] doesn't change // anything	for (size_t i = 0 ; i != 3 ; ++i)	   val = vec;	return *this;}// Surcharge de l'opérateur = avec une expression templatetemplate <typename Expr>inline Vec3 & Vec3::operator= (const Expr & expr){	//Assigment<>::Assign (*this, expr);	for (size_t i = 0 ; i != 3 ; ++i)		val = expr;	return *this;}// ---------------------------------- FONCTION AMIES ---------------------------------------std::ostream & operator<< (std::ostream & out, const Vec3 & vec){	out << vec[0] << ' ' << vec[1] << ' ' << vec[2];   return out;}#endif // VEC3_HPP


And here is the file MathOps, with the Expression struct, and overloaded operator :

#ifndef MATHOPS_HPP#define MATHOPS_HPP// ------------------------------- Déclaration des structures -------------------------------struct opAdd; // Addition// --------------------------- Définition de la classe Expression --------------------------- template <typename L, typename OpTag, typename R>struct Expression{	// Constructeur / Destructeur	inline Expression (const L &, const R &);	inline ~Expression ();	// Surcharge de l'opérateur []. Permet l'évaluation de l'expression template	inline float operator[] (const size_t) const;	   private:		const L & l; // Opérande gauche		const R & r; // Opérande droite};// Constructeurtemplate <typename L, typename OpTag, typename R>inline Expression <L, OpTag, R>::Expression (const L & l, const R & r)   : l (l), r (r){}// Destructeurtemplate <typename L, typename OpTag, typename R>inline Expression <L, OpTag, R>::~Expression (){}// Surcharge de l'opérateur []. Permet l'évaluation de l'expression templatetemplate <typename L, typename OpTag, typename R>inline float Expression <L, OpTag, R>::operator[] (const size_t idx) const{	return OpTag::Compute (l[idx], r[idx]);}// ------------------------ Définition des structures mathématiques ------------------------// Additionstruct opAdd{	// Unique fonction de la structure. Se contente de renvoyer la somme de ses deux paramètres	static float Compute (const float a, const float b)	{		return a+b;	}};   // -------------------------------- Surcharge des opérateurs --------------------------------// Opérateur d'addition (+)template <typename L, typename R>inline Expression <L, opAdd, R> operator+ (const L & l, const R & r){	return Expression <L, opAdd, R> (l, r);}#endif // MATHOPS_HPP



Moreover, I've download the CML library which uses expression templates tambien, and, guess why, I have really good result with it, even better than hand-loop C. I tried to read their source code, but it's quite complicated and not very easy to read that sort of source code.

Some benchmarks (with Visual, every optimizations are set) :

With my class :

for (size_t i = 0 ; i != 200000000 ; ++i){   a = a+b;		}

= 2.25236

for (size_t i = 0 ; i != 200000000 ; ++i){    // Directly creating the expression    a = Expression <Vec3, opAdd, Vec3>::Expression (a, b);}

= 1.35986

for (size_t i = 0 ; i != 200000000 ; ++i){   for (size_t j = 0 ; j != 3 ; ++j)      a[j] = a[j]+b[j];		}

= 1.35905

With CML :

for (size_t i = 0 ; i != 200000000 ; ++i){   a = a+b;		}

= 0.501524 (How the hell can they have such a result ? :|)

for (size_t i = 0 ; i != 200000000 ; ++i){   for (size_t j = 0 ; j != 3 ; ++j)      a[j] = a[j]+b[j];		}

= 1.38717 (here they have around the same result of mine class if I use directly the expression (a = Expression <Vec3, opAdd, Vec3>::Expression (a, b);), so I guess they have other optimizations to reach such performance, but the only thing I'd like to have for now is to have the same performance if I write : a = Expression <Vec3, opAdd, Vec3>::Expression (a, b); and a=a+b, which is not the case right now.

Thanks

Bakura, who is really confused by those ET.
Quote:Original post by Bakura
the_edd > The source code is on the first message !


Vec3<> please?

EDIT: have you tested whether GCC is removing the loop entirely? "Benchmark" the program with the loop commented-out and compare to your current results.
Hi everyone !

I found the way to solve this fu**** head-ache problem ! Now, I got exactly the same result with a hand-rolled C loop and with template expression. In fact, the only thing I do is replace that sort of code :

struct MaClasse
{
inline MaClasse ();
// Other functions, but just declaration here !
}

inline MaClasse::MaClasse ()
{
}

// Other functions here !

By :

struct MaClasse
{
MaClasse () {}
// Other functions, declared and defined inside the struct
}


Before, when I ran the profiler with my code, I obtained everything (like the constructor to OpAdd <Vec3, Vec3>... Now, when I ran the profiler with all optimizations, I have... nothing :p. So it seems that the two compilers (Visual and GCC) couldn't inline my functions...

So now it works, but I try the excellent CML library and they have really better performance and I don't know why. I post a message on the forum, and he said he'll take a look at my code and answer here ;). For now, for those who are interested, here is the code :

[source code="cpp"]#ifndef VEC3_HPP#define VEC3_HPP#include "MathOps.hpp"class Vec3{   typedef double * iterator;   typedef const double * const_iterator;   public:      // Constructeur par défaudouble      Vec3 () {};      // Constructeur de copie      Vec3 (const Vec3 & vec)      {         myValues[0] = vec.myValues[0];         myValues[1] = vec.myValues[1];         myValues[2] = vec.myValues[2];      }      // Constructeur à pardoubleir de doublerois valeurs      Vec3 (const double x, const double y, const double z)      {         myValues[0] = x;         myValues[1] = y;         myValues[2] = z;      }      // Initialise chaque composante du vecteur à 0      void Zero ()      {         myValues[0] = myValues[1] = myValues[2] = 0.0;      }      // Surcharge de l'opérateur [] en lecture      double operator[] (const size_t idx) const      {         return myValues[idx];      }      // Surcharge de l'opérateur [] en écriture      double & operator[] (const size_t idx)      {         return myValues[idx];      }      // Surcharge de l'opérateur d'affectation avec un autre objet Vec3      Vec3 & operator= (const Vec3 & vec)      {         myValues[0] = vec.myValues[0];         myValues[1] = vec.myValues[1];         myValues[2] = vec.myValues[2];         return *this;      }      // Surcharge de l'opérateur d'affectation avec une expression template      template <typename Expr>      Vec3 & operator= (const Expr & myExpr)      {         // On affecte les valeurs grâce à la structure Assign			//Assign<>::Assignment (myValues, myExpr);			iterator i = begin();			//const_iterator iend = end();			iterator i2 = myExpr.begin();			*i = *i2;         return *this;      }      // Renvoit un pointeur sur le premier élément      iterator begin () {return myValues;}      iterator end () {return myValues + 3;}      const_iterator begin () const {return myValues;}      const_iterator end () const {return myValues + 3;}   private:      double myValues [3];};#endif // VEC3_HPP


[source code="cpp"]#ifndef MATHOPS_HPP#define MATHOPS_HPPtemplate <typename L, typename R, typename OpTag>class Expression{   typedef double * iterator;   typedef const double * const_iterator;   public:      // Constructeur      Expression (const L & l, const R & r)         : op1 (l), op2 (r)      {      }      // Surcharge de l'opérateur []. Se charge d'évaluer l'expression      double operator[] (const size_t idx) const      {         return OpTag::Evaluate (idx, op1, op2);      }      // Renvoit un pointeur sur le premier élément      iterator begin () {return OpTag::Evaluate (0, op1, op2);}      iterator end () {return OpTag::Evaluate (3, op1, op2);}      const_iterator begin () const {return OpTag::Evaluate (0, op1, op2);}      const_iterator end () const {return OpTag::Evaluate (3, op1, op2);}   private:      const L & op1; // Première opérande      const R & op2; // Seconde opérande};// Additionstruct OpAdd{   template <typename L, typename R>   static double Evaluate (const size_t idx, const L & l, const R & r)   {      return l[idx] + r[idx];   }};// Soustractionstruct OpSub{   template <typename L, typename R>   static double Evaluate (const size_t idx, const L & l, const R & r)   {      return l[idx] - r[idx];   }};// Surcharge de l'opérateur +template <typename L, typename R>inline Expression <L, R, OpAdd> operator+ (const L & l, const R & r){   return Expression <L, R, OpAdd> (l, r);}// Structure Assign.template <int N = 0>struct Assign{	template <typename L, typename R>	static void Assignment (L & l, const R & r)	{      l[N] = r[N];		Assign<N+1>::Assignment (l, r);	}};// Spécialisation d'Assign lorsque N=3template <>struct Assign<3>{	template <typename L, typename R>	static void Assignment (L & l, const R & r)	{	}};#endif // MATHOPS_HPP

This topic is closed to new replies.

Advertisement