Sign in to follow this  
ktuluorion

Truly terrible rendering performance...

Recommended Posts

I'm starting to pull my hair out over this terrible performance I am getting. If anyone is willing to take a look at this stuff and see if they can figure out why it is rendering so slowly, I would appreciate it. Here is some information that can help: Here is some of the pix information from a frame. This is similar to what is rendered: My main rendering function:
int gameclass::render()
{

if( NULL == d3ddevice )
        return 1;

d3ddevice->Clear( 0, NULL, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER,               
				 D3DCOLOR_XRGB(0,0,0), 1.0f, 0 );


 d3ddevice->BeginScene();
setupmatrices(x,20,y);//y);
                   

my=my-300; //  convert the world x and y coordinates into a
mx=mx-400; //  grid


if (bx<10) bx=10;
if (bx>13) bx=13;

if (by<10) by=10;
if (by>13) by=13;


rendermodel(skybox,camx,15,camy,0.0f,0.0f,0.0f,5.0f,false);  
if (alive) rendermodel(tanks[0],(float)x,(float)(height-1),(float)y,0.0f,(float)(rot),0.0f,1.0f,false);

for (int i =by-10;i<by+7;i++)
 for (int j=bx-10;j<bx+7;j++)	 
 	 for (int c=0;c<heights[j][i];c++)	   
       	 if (blocks[j][i]>-1)rendermodel(block[blocks[j][i]],(float)(j-10)*2,(float)c,(float)(i-10)*2,0.0f,0.0f,0.0f,1.1f,false);  
   

for ( i =by-10;i<by+7;i++)
 for ( int j=bx-10;j<bx+7;j++)	 
 { 	 
	 if (build[j][i]==1) rendermodel(city,(float)(j-10)*2,(float)heights[j][i]-1,(float)(i-10)*2,0.0f,0.0f,0.0f,1.1f,false);  
	 if (build[j][i]==2) rendermodel(xxx,(float)(j-10)*2,(float)heights[j][i]-1,(float)(i-10)*2,0.0f,0.0f,0.0f,1.1f,false);  
 }

for ( i =by-10;i<by+7;i++)
 for (int j=bx-10;j<bx+7;j++)	  
     if ((tops[j][i]>0)||(heights[j][i]>0)){
			rendermodel(top[tops[j][i]],(float)(j-10)*2,(float)heights[j][i]-1,(float)(i-10)*2,0.0f,0.0f,0.0f,1.1f,false);  
	 }



//rendermodel(expl,(float)2,(float)heights[2][2]-1,(float)(2),0.0f,0.0f,0.0f,1.0f,true);  

 


	  
RECT srcrect;
srcrect.top=0;
srcrect.left=0;
srcrect.right=800;
srcrect.bottom=50;
hudsprite->Draw(hud,&srcrect,NULL,NULL,NULL,&D3DXVECTOR2(0,548.0),D3DCOLOR_ARGB(200,255,255,255));

srcrect.right=51;
srcrect.bottom=45;
srcrect.top=0;
srcrect.left=0;
hudselsprite->Draw(hudsel,&srcrect,NULL,NULL,NULL,&D3DXVECTOR2((sel*50)+2,550.0),D3DCOLOR_ARGB(75,255,255,255));
char buffer[10];

char buffer2[5];
sprintf(buffer2,"%d",fps);
sprintf(buffer,"FPS: %s",&buffer2);//[strlen(buffer)-8]);	

drawtext(buffer,D3DCOLOR_RGBA(255,0,0,105),0,0,600,50);

  char buffer3[10];
char buffer4[5];

setupmatrices(40,20,0);
POINT lp;
GetCursorPos(&lp);
mousex=lp.x;
mousey=lp.y;
D3DXVECTOR3 v,vf,vn;
D3DVIEWPORT9 vp;
d3ddevice->GetViewport(&vp);
D3DXMATRIX matproj;
D3DXMATRIX matview;
D3DXMATRIX matworld;
D3DXMatrixIdentity(&matworld);
if (d3ddevice->GetTransform(D3DTS_VIEW,&matview)!=D3D_OK) quit=1;
if (d3ddevice->GetTransform(D3DTS_PROJECTION,&matproj)!=D3D_OK)quit=1;

bool b=false;
for (int I =0;I<20;I++)
 for (int J=0;J<20;J++)	 
	 {


int C=heights[J][I];


	// int	C=heights[J*2+11][I*2+9];
	 
D3DXVec3Unproject(&vn,&D3DXVECTOR3(lp.x,lp.y,0),&vp,&matproj,&matview,&matworld);
D3DXVec3Unproject(&vf,&D3DXVECTOR3(lp.x,lp.y,1),&vp,&matproj,&matview,&matworld);
D3DXVec3Subtract(&v,&vf,&vn);


int close=0;
BOOL hit1=false;
BOOL hit2=false;
float dist;
hit1= D3DXBoxBoundProbe(&D3DXVECTOR3((J-10)*2-1,1,(I-10)*2-1),
    &D3DXVECTOR3((J-10)*2+1,C,(I-10)*2+1),
    &D3DXVECTOR3(cos(rad(camrot))*20+camx,camh+20,sin(rad(camrot))*20+camy),
    &v
);


if (((b==false) || (I<close))&& ((hit1)||(hit2))) {mxa=J;mya=I;close=I;b=true; rendermodel(select,(J-10)*2,C-1,(I-10)*2,0.0f,0,0.0f,1.1f,false);	
 sprintf(buffer4,"%d",C);
 
 
 sprintf(buffer3,"Coords: %s",&buffer4);//[strlen(buffer)-8]);	
    
	 drawtext(buffer3,D3DCOLOR_RGBA(255,0,0,105),0,50,150,100);
}	 
}	

	 d3ddevice->EndScene();
d3ddevice->Present(NULL,NULL,NULL,NULL);

return 0;
}




Which calls this to render models:
int gameclass::rendermodel(model mod,float x=0,float y=0,float z=0, float rx=0, float ry=0, float rz=0,float sc=0, bool tr=false)
{  
    D3DXMATRIX matWorld,matTrans;
    
D3DXMatrixTransformation( &matTrans,NULL,NULL,NULL,NULL,NULL,
&D3DXVECTOR3(x,y,z));

D3DXMatrixRotationYawPitchRoll(&matWorld,(float)rad(ry),(float)rad(rx),(float)rad(rz));

D3DXMatrixMultiply( &matWorld, &matWorld, &matTrans );

D3DXMatrixScaling( &matTrans,sc,sc,sc);
D3DXMatrixMultiply( &matWorld, &matWorld, &matTrans );

    d3ddevice->SetTransform( D3DTS_WORLD, &matWorld );
    

//d3ddevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE,   D3DMCS_MATERIAL );


for( DWORD i=0; i<mod.num; i++ )
    {
        //Set the material and texture for this subset
        D3DMATERIAL9 m = mod.materials[i];
	    //if (infront)
		 D3DCOLORVALUE c;
		 
		 c.r =mod.materials[i].Diffuse.r;
		 c.g=mod.materials[i].Diffuse.g;
		 c.b=mod.materials[i].Diffuse.b;

  if (tr==1) c.a= .5; else c.a=1;
	
		m.Diffuse=c;
        
		c.r =mod.materials[i].Ambient.r+.02;
		c.g=mod.materials[i].Ambient.g+.02;
		c.b=mod.materials[i].Ambient.b+.02;
		
		  if (tr==1) c.a= .5; else c.a=1;
		/*	c.r =1;
		c.g=1;
		c.b=1;
		  */
		m.Ambient=c;

        c.r =mod.materials[i].Specular.r;
		 c.g=mod.materials[i].Specular.g;
		 c.b=mod.materials[i].Specular.b;
		if (tr==1) c.a= .5; else c.a=1;
		m.Specular=c;


		//d3ddevice->SetMaterial( &mod.materials[i] );
        d3ddevice->SetMaterial( &m);
		d3ddevice->SetTexture( 0, mod.textures[i] );      
if (tr==1)   d3ddevice->SetTextureStageState(0,D3DTSS_ALPHAOP,.5); 
		//Draw the mesh subset
        mod.mesh->DrawSubset( i );
    }
if (tr==1)        d3ddevice->SetTextureStageState(0,D3DTSS_ALPHAOP,1); 
return 0;
}





Which are stored in this:
int gameclass::createmesh(model &mod, char *file)
{
    LPD3DXBUFFER pD3DXMtrlBuffer;

    if( FAILED( D3DXLoadMeshFromX( file, D3DXMESH_SYSTEMMEM, 
                                   d3ddevice, NULL, 
                                   &pD3DXMtrlBuffer,NULL, &mod.num,
								   &mod.mesh)))
    {
        return 1;
    }

    D3DXMATERIAL* d3dxMaterials = (D3DXMATERIAL*)pD3DXMtrlBuffer->GetBufferPointer();
    mod.materials = new D3DMATERIAL9[mod.num];
    mod.textures  = new LPDIRECT3DTEXTURE9[mod.num];
 _chdir("textures");
    for( DWORD i=0; i<mod.num; i++ )
    {
        mod.materials[i] = d3dxMaterials[i].MatD3D;

        //mod.materials[i].Ambient = mod.materials[i].Diffuse;
       
        if( FAILED( D3DXCreateTextureFromFile( d3ddevice, 
                                               d3dxMaterials[i].pTextureFilename, 
                                               &mod.textures[i] ) ) )
        {
            mod.textures[i] = NULL;
        }
    }

 
    pD3DXMtrlBuffer->Release();

D3DXComputeNormals(mod.mesh,NULL);

DWORD *pAdj=new DWORD[mod.mesh->GetNumFaces()*3];
        

mod.mesh->GenerateAdjacency(0.0f,pAdj);
            
        // optimize the mesh with attribute sorting
        // D3DXMESHOPT_ATTRSORT
        //pMesh->OptimizeInPlace(D3DXMESHOPT_VERTEXCACHE,pAdj,NULL,NULL,NULL) 
            
        // de-allocate adjacency data storage

mod.mesh->OptimizeInplace(D3DXMESHOPT_COMPACT || D3DXMESHOPT_ATTRSORT ||
						  D3DXMESHOPT_STRIPREORDER,pAdj,NULL,NULL,NULL);
 
	
D3DXCleanMesh(
  mod.mesh,
  pAdj,
  &mod.mesh,
  pAdj,
  NULL);
/*

D3DXSimplifyMesh(mod.mesh,
  pAdj,
  NULL,
  NULL,
  (mod.mesh->GetNumVertices()/5)*4,
  D3DXMESHSIMP_FACE,
  &mod.mesh);
*/
	delete pAdj;
_chdir("..");
	return 0;
}




Share this post


Link to post
Share on other sites
Hey,

Your top clickster is broken :).

I am finding it hard to tell what methods you are using for the rendering from that code.

- Are you rendering in big batches?
- Are you rendering using triangle strips?
- Are you creating/manually modifying the contents of non-dynamically create vertex buffers at run time?

Dave

Share this post


Link to post
Share on other sites
Top imageryOhtehyay fixed.

The middle code segment should show you how I am rendering using DrawSubset. that is the function I call each time I want to render a model (gameclass::rendermodel)

Share this post


Link to post
Share on other sites
Quote:
Original post by Demirug
I am only guess but is it possible that you render the blocks of your “terrain“ one by one?


Yes, I render each block by calling rendermesh. is there some more efficient way that i should be rendering all of the blocks?

Share this post


Link to post
Share on other sites
That means that each block is done in at least one Draw[Indexed]Primitive() call, probably more. How many times is DrawSubset called per frame? Try adding a counter to check that. Any more than 500/frame is bad, anything more than 1000 is really bad.

The best way would be to not use meshes to render like this, and use raw triangles instead. Then you could render the whole map in N calls to Draw[Indexed]Primitive, where N is the number of textures in your map.

Share this post


Link to post
Share on other sites
While I'm no expert in optimization, I can see that you're constantly setting your materials and textures with each call to the renderMesh function. You should at the very least batch your rendering calls by texture/material.

This pseudo-code really is not the best way to do this (some far more knowledgable people on here can probably point out the best way) but it should give a rough idea:

void renderMesh(Mesh *mesh)
{
// Draw polygons...
}

void setShader(Shader *shader)
{
// Set material, set texture.
}

void sortByShader()
{
// Sort each model/mesh and group the ones that use the same texture.
// i.e.
// Mesh tank uses Tank_Shader (combination of your material/texture)
// Bricks 1 to (maximum brick count) uses Brick_Shader
// so you group the bricks together and set the texture/material ONCE.
}

void Render()
{
sortByShader(); // Call this only when you need it.

for every mesh in the sorted mesh list
{
if (lastShader != mesh->shader)
{
lastShader = mesh->shader;
setShader(mesh->shader);
}

// Set your transforms , blah
renderMesh(mesh);
}
}




Hopefully this will bring up those frame rates even just a little. Another thing to do is to batch your DrawPrimitive calls, that is put as much common vertex data/index data you can into a single vertex/index buffer and draw it once (this however will mean you MAY have to pre-transform your vertices). Again, I'm no expert and I'm probably way off. So use at your own risk.

Share this post


Link to post
Share on other sites
It looks like I am using drawsubset once for each "block", then when i draw a house or something it is like 19.

So i'm guessing i'm callig drawsubset ~800 times for a frame. Is that too much?

Share this post


Link to post
Share on other sites
Clicky
"10k - 40k batches/s = 100% 1GHz CPU"
I.e. 10,000 to 40,000 calls to Draw[Indexed]Primitive per second (not frame) uses up 100% of a 1GHz CPU, meaning the CPU is so busy submitting batches to the GPU, it doesn't have time for anything else.

Share this post


Link to post
Share on other sites
Does anyone have a resource on how I would take all of these blocks and "batch" them so that there are fewer calls?

I'm confused, are the texture changes the problem, or the amount of drawsubsets? Or is that the same problem?

I think there is something that I am missing.

Share this post


Link to post
Share on other sites
Guest Anonymous Poster
your goal is to always push as many triangles to the graphics card with the least amount of stalls. This is usually accomplished by the following:

1) Have the minimal amount of set texture calls

2) Have the minimal amount of index/vertex changes


To do this you usually do the following:

1) group by texture, and render all the identically textured triangles after the settexture call. This way you only need one setTexture call before you render all your similarly textured triangles. Each time your use setTexture, the texture is uploaded to your graphics card, the less you do this, the better.

2) Render as many triangles in one batch as possible. Sending 12 triangles (a simple cube) over and over to the card will kill your performance. You card like to take the biggest possible bite at a time.


Share this post


Link to post
Share on other sites
I notice the following code in your rendermodel method:
D3DMATERIAL9 m = mod.materials[i];
Do you really need to make a copy of the material? How about
D3DMATERIAL9& m = mod.materials[i];
instead?

Share this post


Link to post
Share on other sites

Drawing huge amount of boxes can be done by batching or Geometry Instancing. First one can be done on every hardware, latter with recent hardware.

For batching, I'd suggest splitting your world into cells of some size and then create a buffer for the blocks inside that cell. Of course you need to group the block according to the materials. This method is lacking dynamics. If some block is changed someway you need to recreate the whole batch (rotating, moving , deleting, adding).

Geometry Instancing works pretty close to the basic "1 draw call per object" but it uses streams in order to accomplish the whole rendering task with 1 draw call. Practically first stream contains the object and the second stream matrices for each block to be rendered.

If you plan the block rendering part carefully, you can create an interface which is able to use Geometry Instancing if available and batching as a fall back method.

Regards

Share this post


Link to post
Share on other sites
Quote:
Original post by ktuluorion
Does anyone have a resource on how I would take all of these blocks and "batch" them so that there are fewer calls?

I'm confused, are the texture changes the problem, or the amount of drawsubsets? Or is that the same problem?

I think there is something that I am missing.


The amount of calls to the hardware in general is the problem. While some state changes are more expensive than others, it is still relatively expensive to call anything across the operating system rings.

I just offered one guy advice on a similar problem; see here. Even though it seems that the topic is about VB6, the advice applies to Direct3D in general.

Share this post


Link to post
Share on other sites
Quote:
Original post by Anonymous Poster
2) Render as many triangles in one batch as possible. Sending 12 triangles (a simple cube) over and over to the card will kill your performance. You card like to take the biggest possible bite at a time.


Im pretty new to DX myself after moving over from GL.
What you described here is my next step in optimizations but im having a hard time finding any sourcecode for this to familiarize myself with DX.

Figuring out how to build one big buffer is not a problem.
However im not sure about the matrices (translation, rotation, scaling) of each cube (in this example) and how to send them along with the vertexbuffer.
Is there a similar buffer to store matrices for each object as well?

Share this post


Link to post
Share on other sites
Sadly there is no trivial way of combining multiple transforms and single draw calls. There is a technique called "Geometry Instancing" that can achieve this, but it's fairly advanced and isn't available on all hardware - thus it doesn't make sense to be relying on it for such a basic feature [oh]

If your game level is static, then I would suggest pre-transforming each block to its correct position when you create your "super buffer" containing all geometry. That way you can despatch the entire level as a single draw call yet still have the view/proj matrices control the view of it (so it's not completely transformed).

hth
Jack

Share this post


Link to post
Share on other sites
Quote:
Original post by jollyjeffers
Sadly there is no trivial way of combining multiple transforms and single draw calls. There is a technique called "Geometry Instancing" that can achieve this, but it's fairly advanced and isn't available on all hardware - thus it doesn't make sense to be relying on it for such a basic feature [oh]

If your game level is static, then I would suggest pre-transforming each block to its correct position when you create your "super buffer" containing all geometry. That way you can despatch the entire level as a single draw call yet still have the view/proj matrices control the view of it (so it's not completely transformed).

hth
Jack


Hmmm, i would like my engine to be able to run on all computers so i will have to figure out something else (alternatively check and use geometry instancing if its available on the system and fall back on something else if not).

I get about 60 fps rendering 730 cubes with 2 random materials (sorted by material with minimal state changes) on a Radeon 9800 Pro, amd 3000+.
Sure i have an old card, but im pretty sure theres more to squeeze out of it.
I only set the vbuffer to use once, but the 730 calls to DrawPrimitives appears to be the big bottleneck here.

The cubes are nodes in a scenegraph with can be assigned custom transformations, rotations etc, so they cant be treated as static geometry.

Share this post


Link to post
Share on other sites
So let me get this straight -- somehow I will create a vertex buffer that contains all of my cubes combined? What kind of calls would I use to get several cubes combined and transformed?

Share this post


Link to post
Share on other sites
Quote:
Original post by ktuluorion
So let me get this straight -- somehow I will create a vertex buffer that contains all of my cubes combined? What kind of calls would I use to get several cubes combined and transformed?


Well with indexed triangle list it is pretty straight forward. Just check the amount of space you need for the cubes (indices and vertices) and the create the buffers of the calculated size.

Then comes a bit harder part. The vertices you store in the VB need to be in the world space. So you transform the positions (and normals) manually and store that data in the VB. For the indices, you have to take account that the locations of the vertices aren't the same, so you'll need to add a starting location of the vertex for every index.

Draw it on the paper and you'll see the logic. It is pretty simple. Then you can draw all the boxes with a single drawing call.

Share this post


Link to post
Share on other sites
I see conceptually what is going on.. but unfortunately I do not see how it would be coded. I'll have to go through the sdk docs a bit and see how I can transform these in world space.

The only thing I don't get is that starting vertex for each index part. Is there any good tutorial on how to do this stuff out there? Something with code that is easily understood?

Share this post


Link to post
Share on other sites
Quote:
Original post by jollyjeffers
Sadly there is no trivial way of combining multiple transforms and single draw calls. There is a technique called "Geometry Instancing" that can achieve this, but it's fairly advanced and isn't available on all hardware - thus it doesn't make sense to be relying on it for such a basic feature [oh]



Geometry instancing is available on all ATI SM2 cards and up (Radeon 9500 and up) and GeForce 6x00 and up, so these days I'd say that most users are likely to have it, and it's worth implementing. That said, for ATI pre-SM3 cards it's a cludge, and it won't run in Direct3D Debug, because it's defined to be a SM3 feature.

Note also that in my experience instancing becomes slower at higher numbers. On my Radeon 9800 Pro it because horrendously slow over 1000 instances (with the drivers a few months ago). On the GeForce 6600 it also became slower, but to a lesser extent and at a higher number. Still I won't recommend more than about 1000 instances at a time.

For simple objects, like your boxes, the constant instancing method works well. This works by storing the transforms in shader constants. The number of constants changes by the shader model, but you should be able to get a few tens of transforms in any shader model, let's say 40. Your vertex buffer contains your box duplicated these 40 times, with an instance index appended to the position (therefore xyzw instead of xyz). This index is then used for indexing into the constants.

For a good demo of instancing methods, including const instancing and how to enable instancing on SM2 ATI cards, see http://www.humus.ca/index.php?page=3D&ID=52 (Humus' is a great site).

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this