I have a profile in my engine, and found that the time spent in matrix multiplication and cbuffer commit are most among all instructions.
After I shut down all light, The process is simple, for every object, update transform cbuffer and commit cbuffer, and draw.
In my engine .all shader share 10 cbuffer(5 for vs, 5 for ps).
one cbuffer look like these:
struct CBTransform /*: register(b0)*/
{
Matrix4f world_matrix;
Matrix4f world_invTrans_matrix;
Matrix4f world_view_proj_matrix;
Matrix4f light_view_proj_matrix;
};
setTransform(Renderable node)
{
// update transform cb
CBTransform *p = reinterpret_cast<CBTransform*>(renderbase->MapResource(_cbtrans, 0, D3D11_MAP_WRITE_DISCARD, 0));
Matrix4f world;
if (node->_hasBone)
world.initIdentity();
else
world = node->getTransform();
p->world_matrix = world;
p->world_invTrans_matrix = world;
Camera *camera = Engine::sceneManager().getMainCamera();
Matrix4f view = camera->getView();
Matrix4f proj = camera->getProjection();
Matrix4f mwvp = world *view *proj;
p->world_view_proj_matrix = mwvp;
if (_curlight) {
Matrix4f lightTrans = world * _curlight->getLightTransform(); // world* viewproj
p->light_view_proj_matrix = lightTrans;
}
renderbase->unMapResource(_cbtrans, 0);
}
// since all shaders share 10 cbuffer, I pass 10 to gpu at every draw call. I'm not sure if the method is right??
_context->VSSetConstantBuffers(0, (int)CBufType::MAX_CBUF_GROUP, _cBufs[(int)ShaderType::VERTEX_SHADER]);
_context->PSSetConstantBuffers(0, (int)CBufType::MAX_CBUF_GROUP, _cBufs[(int)ShaderType::PIXEL_SHADER]);