You cant be sure that GCC inlines certain functions. (thats a good thing because it also optimizes for instruction cache, etc) and there is not much to speed up on a 2 line function I think. Maybe change divisions to multiplications, eliminate every branch?
Here is my dot, normalize, cross:
static float scalarNumber(const CVector3f *const v1, const CVector3f *const v2) {return (v1->x * v2->x + v1->y * v2->y + v1->z * v2->z);}
//********************************************************************
float CVector3f::normalize(){
float length = sqrt( x*x + y*y + z*z);
if( unlikely( CMathUtilEA::isNearZero( length)))
return 0.0f;
else{
x /= length;
y /= length;
z /= length;
return length;
}
}
//********************************************************************
void CVector3f::crossP(const CVector3f *const A, const CVector3f *const B, const CVector3f *const C){
CVector3f v1( A, B);
CVector3f v2( A, C);
x = v1.y * v2.z - v1.z * v2.y;
y = v1.z * v2.x - v1.x * v2.z;
z = v1.x * v2.y - v1.y * v2.x;
}