I am posting the code the code here and would like to ask you guys if you see something obviously flawed about it.
The performance I am seeing is not anywhere close to what I expected.
class Job : public Object < Job >
{
public:
Job() : m_active(false)
{
}
void finish()
{
ScopedMutex lock(&m_mutex);
while(m_active)
m_inactive.wait(&m_mutex);
}
private:
friend class JobManager;
void activate()
{
ScopedMutex lock(&m_mutex);
while(m_active)
m_inactive.wait(&m_mutex);
m_active = true;
}
void run() = 0;
void deactivate()
{
ScopedMutex lock(&m_mutex);
m_active = false;
m_inactive.signal();
}
Mutex m_mutex;
bool m_active;
ConditionVariable m_inactive;
};
class JobManager : public Object < JobManager >
{
public:
JobManager(size_t const p_workers)
{
for(size_t i = 0; i < p_workers; ++i)
m_free.push(new Worker(this));
}
void addJob(Job * const p_job)
{
p_job->activate();
getWorker()->assign(p_job);
}
private:
class Worker : public Object < Worker, Thread >, public LinkBase < Worker >
{
public:
Worker(JobManager * const p_scheduler) : m_scheduler(p_scheduler), m_active(true), m_Job(nullptr)
{
start();
}
virtual ~Worker()
{
terminate();
join();
}
void assign(Job * const p_Job)
{
ScopedMutex lock(&m_mutex);
m_Job = p_Job;
m_assigned.signal();
}
private:
void run() override
{
ScopedMutex lock(&m_mutex);
while(m_active)
{
while(m_active && !m_Job)
m_assigned.wait(&m_mutex);
if(m_Job)
{
m_Job->run();
m_Job->deactivate();
m_Job = nullptr;
m_scheduler->freeWorker(this);
}
}
}
void terminate()
{
ScopedMutex lock(&m_mutex);
m_active = false;
m_assigned.signal();
}
JobManager * const m_scheduler;
Mutex m_mutex;
bool m_active;
Job * m_Job;
ConditionVariable m_assigned;
};
Worker * const getWorker()
{
ScopedMutex lock(&m_mutex);
while(!m_free.count())
m_workerFree.wait(&m_mutex);
return m_free.pop();
}
void freeWorker(Worker * const p_worker)
{
ScopedMutex lock(&m_mutex);
m_free.push(p_worker);
m_workerFree.signal();
}
Mutex m_mutex;
List < Worker > m_free;
ConditionVariable m_workerFree;
};
I am using pthread win32 inside the wrapper classes and with 8 worker threads, I can only queue 2x 16ms jobs in a 33ms frame on a 8 cores computer. I was not expecting to see 8x 16ms in 33ms, but I have the feeling something is wrong here.