Jump to content

  • Log In with Google      Sign In   
  • Create Account


#Actuallipsryme

Posted 20 September 2013 - 02:48 PM

Yea release build and compiler is msvc (visual studio 2008)

 

 

edit: Doing it something like this makes it a little bit faster than before now:

// 4-component vector class using SIMD instructions
_declspec(align(16))
struct Vec4
{
    __m128 v;

    Vec4()
    {

    }

    Vec4(float x, float y, float z, float w)
    {
        v = _mm_set_ps(w, z, y, x);
    }

    inline const float X()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[0];
    }

    inline const float Y()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[1];
    }

    inline const float Z()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[2];
    }

    inline const void Normalize()
    {
        // 0x7F = 0111 1111 ~ means we don't want the w-component multiplied
        // and the result written to all 4 components
        __m128 dp = _mm_dp_ps(v, v, 0x7F); 

        // compute rsqrt of the dot product
        dp = _mm_rsqrt_ps(dp);

        // vec * rsqrt(dot(vec, vec))
        v = _mm_mul_ps(v, dp);
	  }

    inline const float Dot(const Vec4 &v2) const
    {
        // 0x7F = 0111 1111 ~ means we don't want w-component multiplied
        // and the result written to all 4 components
        __m128 dp = _mm_dp_ps(v, v2.v, 0x7F); 

        float result;
        _mm_store_ss(&result, dp);

        return result;
	  }
};

#2lipsryme

Posted 20 September 2013 - 02:48 PM

Yea release build and compiler is msvc (visual studio 2008)

 

 

Doing it something like this makes it a little bit faster than before now:

// 4-component vector class using SIMD instructions
_declspec(align(16))
struct Vec4
{
    __m128 v;

    Vec4()
    {

    }

    Vec4(float x, float y, float z, float w)
    {
        v = _mm_set_ps(w, z, y, x);
    }

    inline const float X()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[0];
    }

    inline const float Y()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[1];
    }

    inline const float Z()
    {
        union {__m128 v; float f[4]; } uf; // to access the 4 floats
        uf.v = v;
        return uf.f[2];
    }

    inline const void Normalize()
    {
        // 0x7F = 0111 1111 ~ means we don't want the w-component multiplied
        // and the result written to all 4 components
        __m128 dp = _mm_dp_ps(v, v, 0x7F); 

        // compute rsqrt of the dot product
        dp = _mm_rsqrt_ps(dp);

        // vec * rsqrt(dot(vec, vec))
        v = _mm_mul_ps(v, dp);
	  }

    inline const float Dot(const Vec4 &v2) const
    {
        // 0x7F = 0111 1111 ~ means we don't want w-component multiplied
        // and the result written to all 4 components
        __m128 dp = _mm_dp_ps(v, v2.v, 0x7F); 

        float result;
        _mm_store_ss(&result, dp);

        return result;
	  }
};

#1lipsryme

Posted 20 September 2013 - 02:22 PM

Yea release build and compiler is msvc (visual studio 2008)


PARTNERS