I'm getting about 10 FPS with only about 20 draw calls of very small meshes.
Clearly I'm doing something very very wrong.
Is there an issue with populating a single command list to draw 20 items? I figured even if that isn't optimal, it shouldn't be THIS terrible. At this point my iPad running OpenGL ES is outperforming my d3d12 engine by a massive margin.
The following code runs before most of my draw calls to set up necessary state:
Sorry for the messed up tab spacings
void CoreStateMachine::prepareToDraw()
{
auto cl = continueRenderingCommands();
cl->SetGraphicsRootSignature(rootSignature.Get());
//prepare any buffers required by current shader program
auto prog = currentProgram;
if(prog)
{
if(!prog->globalCBuffer)
{
prog->globalCBuffer = make_shared<BufferArray>();
prog->globalCBufferDirty = true;
}
else
{
//this should be cleaned up at some point
preserveResourceUntilRenderComplete(prog->globalCBuffer->uploadBuffers[0]);
preserveResourceUntilRenderComplete(prog->globalCBuffer->buffers[0]);
prog->globalCBuffer = make_shared<BufferArray>();
prog->globalCBufferDirty = true;
}
if(prog->globalCBufferDirty)
{
prog->globalCBuffer->provideData(0, prog->globalCBufferSize, prog->globalCBufferData, BufferArray::UT_DYNAMIC);
prog->globalCBufferDirty = false;
}
cl->SetGraphicsRootConstantBufferView(2, prog->globalCBuffer->buffers[0]->GetGPUVirtualAddress());
}
auto currentVA = VertexArray::current();
assert(currentVA != nullptr);
currentVA->prepareForDraw();
device->CopyDescriptorsSimple(textureTableSize, cbSrvHeaps[descriptorHeapIndex]->hCPU(textureTableIndex), cpuCbSrvHeap->hCPU(0), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
device->CopyDescriptorsSimple(textureTableSize, samplerHeaps[descriptorHeapIndex]->hCPU(textureTableIndex), cpuSamplerHeap->hCPU(0), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
if(descriptorHeapsChanged)
{
ID3D12DescriptorHeap *descHeaps[] = { cbSrvHeaps[descriptorHeapIndex]->get(), samplerHeaps[descriptorHeapIndex]->get() };
cl->SetDescriptorHeaps(ARRAYSIZE(descHeaps), descHeaps);
descriptorHeapsChanged = false;
}
//might be smarter to set this up earlier if I can.. not sure what the tradeoff is here
if(pipelineState)
{
preserveResourceUntilRenderComplete(pipelineState);
pipelineState = nullptr;
}
ThrowIfFailed(device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)));
cl->SetPipelineState(pipelineState.Get());
if(descriptorTablesChanged)
{
cl->SetGraphicsRootDescriptorTable(0, cbSrvHeaps[descriptorHeapIndex]->hGPU(textureTableIndex));
cl->SetGraphicsRootDescriptorTable(1, samplerHeaps[descriptorHeapIndex]->hGPU(textureTableIndex));
descriptorTablesChanged = false;
}
}