Why is my batch drawing method slower than my one draw call per model method?

Started by
4 comments, last by Hyunkel 13 years, 3 months ago
I'm testing out my model batch drawing method using the following loop:

            Matrix worldTransform = Matrix.Identity;            for (int i = 0; i < 500; ++i)            {                worldTransform.M41 = i * 2;                customModel.WorldTransform = worldTransform;                //modelDrawer.DrawInstancedModel(customModel);                                // OR               //modelDrawer.DrawModel(customModel);            }


My standard model drawing method is as follows:

        public void DrawModel(CustomModel customModel)        {            Effect effect = game.Effects[customModel.Effect];            GraphicsDevice graphicsDevice = game.GraphicsDevice;            Matrix[] transforms = customModel.ModelData.Transforms;            effect.Begin();            effect.CurrentTechnique.Passes[0].Begin();            for (int i = 0; i < customModel.Model.Meshes.Count; ++i)            {                ModelMesh mesh = customModel.Model.Meshes;                for (int ii = 0; ii < mesh.MeshParts.Count; ++ii)                {                    ModelMeshPart meshPart = mesh.MeshParts[ii];                    effect.Parameters["World"].SetValue(transforms[mesh.ParentBone.Index] * customModel.WorldTransform);                    effect.CommitChanges();                    graphicsDevice.VertexDeclaration = meshPart.VertexDeclaration;                    graphicsDevice.Vertices[0].SetSource(mesh.VertexBuffer, meshPart.StreamOffset, meshPart.VertexStride);                    graphicsDevice.Indices = mesh.IndexBuffer;                    graphicsDevice.DrawIndexedPrimitives(                        PrimitiveType.TriangleList,                        meshPart.BaseVertex,                        0,                        meshPart.NumVertices,                        meshPart.StartIndex,                        meshPart.PrimitiveCount                        );                }            }            effect.CurrentTechnique.Passes[0].End();            effect.End();        }


My batch drawing method is as follows:

        public void DrawInstancedModel(CustomModel customModel)        {            if (!isInBeginEndPair)            {                throw new InvalidOperationException("Begin must be called successfully before a Draw can be called.");            }            int numModelIndices = customModel.ModelData.Indices.Length;            int numModelVertices = customModel.ModelData.Vertices.Length;            if (Reserve(numModelVertices, numModelIndices))            {                int count = 0;                int offset = 0;                Matrix[] transforms = customModel.ModelData.Transforms;                for (int i = 0; i < customModel.Model.Meshes.Count; ++i)                {                    ModelMesh mesh = customModel.Model.Meshes;                    for (int ii = 0; ii < mesh.MeshParts.Count; ++ii)                    {                        ModelMeshPart meshPart = mesh.MeshParts[ii];                        // Create and store the world transforms for each mesh part                        // The mesh part vertices' 'Index' property will be assigned the new transformIndex value                        offset += count;                        count = meshPart.NumVertices;                        for (int v = offset; v < offset + count; ++v)                        {                            customModel.ModelData.Vertices[v].Index = transformIndex;                        }                        // [World Transform]                        worldTransforms[transformIndex++] =                            transforms[mesh.ParentBone.Index] *                            customModel.WorldTransform;                    }                }                for (int i = 0; i < numModelIndices; ++i)                {                    indices[numIndices++] = (ushort)(numVertices + customModel.ModelData.Indices);                }                customModel.ModelData.Vertices.CopyTo(vertices, numVertices);                numVertices += numModelVertices;            }        }        private bool Reserve(int numV, int numI)        {            if (numV > MAXVERTICES || numI > MAXINDICES)            {                // Whatever it is, we can't draw it                return false;            }            if (numVertices + numV >= MAXVERTICES || numIndices + numI >= MAXINDICES || (transformIndex + 1) >= MAXMODELSPERBATCH)            {                // We can draw it, but we need to make room first                FlushDrawing();            }            return true;        }        private void FlushDrawing()        {            if (numIndices > 0)            {                Effect effect = game.Effects["ModelBatch"];                // Set parameters for current camera                game.SetSharedParameters(camera);                vertexBuffer.SetData(vertices, 0, numVertices, SetDataOptions.Discard);                indexBuffer.SetData(indices, 0, numIndices, SetDataOptions.Discard);                game.GraphicsDevice.Vertices[0].SetSource(vertexBuffer, 0, VertexPositionNormalTextureIndex.SizeInBytes);                game.GraphicsDevice.Indices = indexBuffer;                game.GraphicsDevice.VertexDeclaration = game.VertexDeclarations["VertexPositionNormalTextureIndex"];                // Set effect parameters                parameter_WorldTransforms.SetValue(worldTransforms);                // Make sure parameter effects take place before drawing                effect.CommitChanges();                effect.Begin();                effect.CurrentTechnique.Passes[0].Begin();                game.GraphicsDevice.DrawIndexedPrimitives(PrimitiveType.TriangleList, 0, 0, numVertices, 0, numIndices / 3);                effect.CurrentTechnique.Passes[0].End();                effect.End();            }            numIndices = 0;            numVertices = 0;            transformIndex = 0;        }


Why would the one draw call per model be 8 times faster than the batch drawing method. How has my approach failed in this case?
Advertisement
Quote:Why would the one draw call per model be 8 times faster...

What delta-time are you measuring? Are you using PIX or some other profiler to get the DrawPrimitive time, or are you including the rebuilding of the vertex buffer? In batch mode, how many times do you do effect->Begin/End? If you call FlushDrawing several times, that may account for the difference. It may take more time to setup the effect than you save by combining buffers.

Please don't PM me with questions. Post them in the forums for everyone's benefit, and I can embarrass myself publicly.

You don't forget how to play when you grow old; you grow old when you forget how to play.

it looks to me like you are not instancing in the traditional way. You appear to be copying all of the vertices into a single buffer then drawing that buffer. The slowdown is because you are copying all of that data into a single massive vertex buffer.

For instancing you have your model's vertex data and then you create a separate vertex buffer that contains the instance specific matrix'.


Wisdom is knowing when to shut up, so try it.
--Game Development http://nolimitsdesigns.com: Reliable UDP library, Threading library, Math Library, UI Library. Take a look, its all free.
Quote:
What delta-time are you measuring? Are you using PIX or some other profiler to get the DrawPrimitive time, or are you including the rebuilding of the vertex buffer?


I'm not using anything too fancy to measure the delta time. I've just got a rudimentary method of counting frames per second:

    class FPSCounter    {        private SpriteFont font;        private int frameRate = 0;        private Game1 game;        private int numFrames = 0;        private TimeSpan timeElapsed = TimeSpan.Zero;        public FPSCounter(Game1 game, SpriteFont font)        {            this.game = game;            this.font = font;        }        public void Draw()        {            string fps = string.Format("FPS: {0}", frameRate);            // Remove automatic anti aliasing            //sprite_Batch.GraphicsDevice.SamplerStates[0].MagFilter = TextureFilter.None;            //sprite_Batch.GraphicsDevice.SamplerStates[0].MinFilter = TextureFilter.None;            //sprite_Batch.GraphicsDevice.SamplerStates[0].MipFilter = TextureFilter.None;            game.SpriteBatch.DrawString(font, fps, new Vector2(0, 0), Color.Black);            game.SpriteBatch.DrawString(font, fps, new Vector2(1, 0), Color.White);        }        public void Update(GameTime gameTime)        {            timeElapsed += gameTime.ElapsedGameTime;            if (timeElapsed > TimeSpan.FromSeconds(1))            {                timeElapsed -= TimeSpan.FromSeconds(1);                frameRate = numFrames;                numFrames = 0;            }            numFrames++;        }    }


Quote:
In batch mode, how many times do you do effect->Begin/End? If you call FlushDrawing several times, that may account for the difference.


FlushDrawing is being called 12 times per frame.

Quote:
It may take more time to setup the effect than you save by combining buffers.


How then can I speed up the process? I've reached the limit for registers in my HLSL code:

#define MAX_MODELS 50//=============================================//---[XNA to HLSL Variables]-------------------//=============================================shared float4x4 ViewProjection;shared float2 HalfPixel;texture Texture0;// Also keep in mind that if you're setting your matrices into shader constants in column-major format (which is the default), // then you don't need the last row since it will always be (0, 0, 0, 1). // So you can pack your matrices as float3x4's, and then each world matrix will only use 3 registers instead of 4.float4x4 WorldTransforms[MAX_MODELS];//=============================================//---[Texture Samplers]------------------------//=============================================sampler TextureSampler0 ={	sampler_state   	{   		Texture = <Texture0>;	       		MinFilter = None;   		MagFilter = None;   		MipFilter = None;   	       		AddressU = Clamp;  		AddressV = Clamp;	}};//=============================================//---[Structs]---------------------------------//=============================================struct VertexShaderInput{    float4 Position					: POSITION0;    float3 Normal					: NORMAL0;    float2 TextureCoordinates		: TEXCOORD0;    float Index						: PSIZE0;  };struct VertexShaderOutput{    float4 Position					: POSITION0;    float3 Normal					: NORMAL0;    float2 TextureCoordinates		: TEXCOORD0; };//=============================================//---[Vertex Shaders]--------------------------//=============================================VertexShaderOutput VertexShaderFunction(VertexShaderInput input){    VertexShaderOutput output = (VertexShaderOutput)0;		float4x4 worldTransform = WorldTransforms[round(input.Index)];			// [Transformation]	// • Multiplying input.Position by World then ViewProjection is faster than	//   concatenating World and ViewProjection matrices then multiplying input.Position by the result	input.Position = mul(input.Position, worldTransform);			output.Position = mul(input.Position, ViewProjection);		// Align texels to pixels (Half pixel offset for correct texel centering [DX9 Only])	// Should be done AFTER transformation	output.Position.xy -= HalfPixel;	output.TextureCoordinates = input.TextureCoordinates;    output.Normal = mul(input.Normal, worldTransform);                            return output;}//=============================================//---[Pixel Shaders]---------------------------//=============================================float4 PixelShaderFunction(VertexShaderOutput input) : COLOR0{	return tex2D(TextureSampler0, input.TextureCoordinates);}//=============================================//---[Techniques]------------------------------//=============================================technique Textured{    pass Pass0    {		    	AlphaBlendEnable = false;		// No transparency		ZEnable = true;		ZWriteEnable = true;						// Shadel Model 3.0 required for model instancing        VertexShader = compile vs_3_0 VertexShaderFunction();        PixelShader = compile ps_3_0 PixelShaderFunction();    }}technique Textured_Transparent{    pass Pass0    {				AlphaBlendEnable = true;		// Turn on Alpha Blending		ZEnable = true;		ZWriteEnable = true;			SrcBlend = One;					// Additive Blending		DestBlend = One;				// Additive Blending		BlendOp = Add;					// Additive Blending				// Shadel Model 3.0 required for model instancing        VertexShader = compile vs_3_0 VertexShaderFunction();        PixelShader = compile ps_3_0 PixelShaderFunction();				    }}


What changes would you suggest and is my method not close to that of instancing?

EDIT:

This was posted during my reply

Quote:
For instancing you have your model's vertex data and then you create a separate vertex buffer that contains the instance specific matrix'.


How would this setup work? How can it be possible with one draw call if I don't fill a single buffer?
http://msdn.microsoft.com/en-us/library/bb173349%28v=vs.85%29.aspx
Wisdom is knowing when to shut up, so try it.
--Game Development http://nolimitsdesigns.com: Reliable UDP library, Threading library, Math Library, UI Library. Take a look, its all free.
Quote: it looks to me like you are not instancing in the traditional way. You appear to be copying all of the vertices into a single buffer then drawing that buffer. The slowdown is because you are copying all of that data into a single massive vertex buffer.

For instancing you have your model's vertex data and then you create a separate vertex buffer that contains the instance specific matrix'.


This.

You need 2 vertex buffers, the first contains the usual vertex data for the model.
This first vertex buffer and the index buffer don't ever need to be updated!

You need a second vertex buffer, which you will update every frame, that contains all the instance specific data.
This is usually a WorldViewProjection matrix per instance, but you can add whatever information you need.

This topic is closed to new replies.

Advertisement