Maybe I'm doing it wrong?
Profiler.hpp
#pragma once
#ifndef _PROFILER_HPP_
#define _PROFILER_HPP_
#pragma comment (lib,"Winmm.lib")
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <mmsystem.h>
class Profiler
{
public:
static Profiler* Instance();
static void Destroy();
void operator()();
UINT64 GetDTime()
{
return(DTime);
}
private:
Profiler();
~Profiler();
static Profiler *instance;
UINT64 lastGameTime;
UINT64 DTime;
};
#endif
Benchmarks.hpp
#pragma once
#ifndef _TEST_FUNCTIONS_HPP_
#define _TEST_FUNCTIONS_HPP_
#include <Windows.h>
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <vector>
#include <fstream>
#include <ostream>
#include <istream>
#include <DirectXMath.h>
using namespace DirectX;
namespace Benchmark
{
XMFLOAT4X4 UnoptA(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec);
XMFLOAT4X4 UnoptB(XMFLOAT4X4 inputMat);
XMMATRIX OptA(CXMMATRIX inputMat, FXMVECTOR inputVec);
XMMATRIX OptB(CXMMATRIX inputMat);
void BenchUnopt(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec, UINT count, std::vector<XMFLOAT4X4> &buffer);
void BenchOpt(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec, UINT count, std::vector<XMFLOAT4X4> &buffer);
}
#endif
Profiler.cpp
#include "Profiler.hpp"
Profiler *Profiler::instance = nullptr;
Profiler *Profiler::Instance()
{
if(instance)
{
return(instance);
}
return(instance = new Profiler);
}
void Profiler::Destroy()
{
if(instance)
{
delete instance;
}
}
Profiler::Profiler(): DTime(0)
{
lastGameTime = (UINT64)timeGetTime();
}
Profiler::~Profiler()
{
}
void Profiler::operator()()
{
UINT64 ticks = (UINT64)timeGetTime();
DTime = ticks - lastGameTime;
lastGameTime = ticks;
}
BenchmarksOpt.cpp
#include "Benchmarks.hpp"
namespace Benchmark
{
XMMATRIX OptA(CXMMATRIX inputMat, FXMVECTOR inputVec)
{
return(OptB(XMMatrixMultiply(inputMat, XMMatrixRotationQuaternion(inputVec))));
}
XMMATRIX OptB(CXMMATRIX inputMat)
{
XMVECTOR determinant;
return(XMMatrixInverse(&determinant, inputMat));
}
void BenchOpt(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec, UINT count, std::vector<XMFLOAT4X4> &buffer)
{
XMMATRIX matrixInput = XMLoadFloat4x4(&inputMat);
XMVECTOR vectorInput = XMLoadFloat4(&inputVec);
for(UINT i = 0; i < count; i++)
{
XMFLOAT4X4 bufferMat;
XMStoreFloat4x4(&bufferMat, OptA(matrixInput, vectorInput));
buffer.push_back(bufferMat);
}
}
}
BenchmarksUnopt.cpp
#include "Benchmarks.hpp"
namespace Benchmark
{
XMFLOAT4X4 UnoptA(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec)
{
XMMATRIX matrixInput = XMLoadFloat4x4(&inputMat);
XMVECTOR vectorInput = XMLoadFloat4(&inputVec);
matrixInput = XMMatrixMultiply(matrixInput, XMMatrixRotationQuaternion(vectorInput));
XMFLOAT4X4 outputMat;
XMStoreFloat4x4(&outputMat, matrixInput);
return(UnoptB(outputMat));
}
XMFLOAT4X4 UnoptB(XMFLOAT4X4 inputMat)
{
XMMATRIX matrixInput = XMLoadFloat4x4(&inputMat);
XMVECTOR determinant;
matrixInput = XMMatrixInverse(&determinant, matrixInput);
XMFLOAT4X4 outputMat;
XMStoreFloat4x4(&outputMat, matrixInput);
return(outputMat);
}
void BenchUnopt(XMFLOAT4X4 inputMat, XMFLOAT4 inputVec, UINT count, std::vector<XMFLOAT4X4> &buffer)
{
for(UINT i = 0; i < count; i++)
{
buffer.push_back(UnoptA(inputMat, inputVec));
}
}
}
main.cpp
#include "Benchmarks.hpp"
#include "Profiler.hpp"
using namespace std;
using namespace Benchmark;
int main()
{
Profiler &ProfilerRef = *Profiler::Instance();
UINT count;
cout<<"Select iteration amount:";
cin>>count;
cout<<endl<<"Test will now begin"<<endl<<endl;
std::vector<XMFLOAT4X4> bufferUnopt;
std::vector<XMFLOAT4X4> bufferOpt;
bufferUnopt.reserve(count);
bufferOpt.reserve(count);
XMFLOAT4 testVector = XMFLOAT4(1.0f, 3.0f, 2.0f, 4.0f);
XMFLOAT4X4 testMatrix = XMFLOAT4X4(3.0f,3.0f,3.0f,3.0f,
3.0f,3.0f,3.0f,3.0f,
3.0f,3.0f,3.0f,3.0f,
3.0f,3.0f,3.0f,3.0f);
ProfilerRef();
BenchUnopt(testMatrix, testVector, count, bufferUnopt);
ProfilerRef();
cout<<"Unoptimized Benchmkar: "<<ProfilerRef.GetDTime()<<endl;
ProfilerRef();
BenchOpt(testMatrix, testVector, count, bufferOpt);
ProfilerRef();
cout<<"Optimized Benchmkar: "<<ProfilerRef.GetDTime()<<endl;
ofstream myfile;
myfile.open ("example.txt");
for(UINT i = 0; i < bufferOpt.size(); i++)
{
myfile<<bufferOpt[i]._11;
}
for(UINT i = 0; i < bufferUnopt.size(); i++)
{
myfile<<bufferUnopt[i]._11;
}
myfile.close();
return 0;
}
I split the Benchmarks# into 2 .cpp files,so I can get 2 seperate ASM/Machine code lists,the Unopt version always gets over 20 times more output,but the speed difference is almosst 0