VertexBuffer performance issue. Idea for a strategy?

Started by
17 comments, last by Wartime 11 years, 11 months ago
Here is my code to create a chunk:


void WorldChunk::createChunk()
{
vert_count = 0;
index_count = 0;
index_number = 0;
CUSTOMVERTEX* Vertices;
int* Indices;
cdevice->CreateVertexBuffer( 24 * 16 * 16 * 16 * sizeof( CUSTOMVERTEX ), D3DUSAGE_WRITEONLY, D3DFVF_CUSTOMVERTEX, D3DPOOL_DEFAULT, &VB, NULL );
cdevice->CreateIndexBuffer(36 * 16 * 16 * 16 *sizeof(int),D3DUSAGE_WRITEONLY,D3DFMT_INDEX32,D3DPOOL_DEFAULT,&IB,NULL);
VB->Lock( 0, 0, ( void** )&Vertices, D3DLOCK_DISCARD);
IB->Lock(0,0,(void **)&Indices, D3DLOCK_DISCARD);
for(int x = 0; x < 16; x++)
{
for(int y = 0; y < 16; y++)
{
for(int z = 0; z < 16; z++)
{
//Ist da kein Block zeichnen wir den nicht...
if(chunk[x][y][z] == 0)
continue;
block_type = chunk[x][y][z];
//Befinden wir uns am linken Rand? Dann haben wir keinen linken Nachbarn ansonsten holen wir den aus dem Chunk-Array
//Dasselbe gilt für alle anderen Richtungen (Hab keine Lust, das für jede Abfrage zu wiederholen ;-) )
if(x > 0)
{
testblock = chunk[x-1][y][z];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x,y,z+1);
Vertices[vert_count].tu = 0.0f+((float)(block_type-1))*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x,y+1,z+1);
Vertices[vert_count+1].tu = 0.0f+((float)(block_type-1))*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x,y,z);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x,y+1,z);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4; //Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}

if(x < 16-1)
{
testblock = chunk[x+1][y][z];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x+1,y,z+1);
Vertices[vert_count].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x+1,y+1,z+1);
Vertices[vert_count+1].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x+1,y,z);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x+1,y+1,z);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4; //Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}
if(y > 0)
{
testblock = chunk[x][y-1][z];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x,y,z);
Vertices[vert_count].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x,y,z+1);
Vertices[vert_count+1].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x+1,y,z);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x+1,y,z+1);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4; //Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}
if(y < 16-1)
{
testblock = chunk[x][y+1][z];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x,y+1,z);
Vertices[vert_count].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x,y+1,z+1);
Vertices[vert_count+1].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x+1,y+1,z);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x+1,y+1,z+1);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4; //Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}
if(z > 0)
{
testblock = chunk[x][y][z-1];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x,y,z);
Vertices[vert_count].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x,y+1,z);
Vertices[vert_count+1].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x+1,y,z);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x+1,y+1,z);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4;//Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}
if(z < 16-1)
{
testblock = chunk[x][y][z+1];
}else{
testblock = 0;
}
if(testblock == 0)
{
Vertices[vert_count].position = D3DXVECTOR3(x,y,z+1);
Vertices[vert_count].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count].tv = 1.0f;
ver.push_back(Vertices[vert_count]);
Indices[index_count] = index_number;
ind.push_back(Indices[index_count]);
index_count++;

Vertices[vert_count+1].position = D3DXVECTOR3(x,y+1,z+1);
Vertices[vert_count+1].tu = 0.0f+(block_type-1)*0.25f;
Vertices[vert_count+1].tv = 0.0f;
ver.push_back(Vertices[vert_count+1]);
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+2].position = D3DXVECTOR3(x+1,y,z+1);
Vertices[vert_count+2].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+2].tv = 1.0f;
ver.push_back(Vertices[vert_count+2]);
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+2;
ind.push_back(Indices[index_count]);
index_count++;
Indices[index_count] = index_number+1;
ind.push_back(Indices[index_count]);
index_count++;
Vertices[vert_count+3].position = D3DXVECTOR3(x+1,y+1,z+1);
Vertices[vert_count+3].tu = 0.25f+(block_type-1)*0.25f;
Vertices[vert_count+3].tv = 0.0f;
ver.push_back(Vertices[vert_count+3]);
Indices[index_count] = index_number+3;
ind.push_back(Indices[index_count]);
index_count++;
index_number += 4;
vert_count += 4; //Erhöhe den Zähler um 6 , weil wir 6 Vertices gezeichnet haben...
}
}
}
}
//****************************************************************************************************************
IB->Unlock();
VB->Unlock();


If it's created i use the vectors to "quick fill" the buffers in the next frame (the vertices don't change)


void WorldChunk::QuickFill()
{
void* vv = NULL;
void* ii = NULL;
cdevice->CreateVertexBuffer( ver.size()*sizeof(CUSTOMVERTEX),D3DUSAGE_DYNAMIC, D3DFVF_CUSTOMVERTEX, D3DPOOL_DEFAULT, &VB, NULL );
cdevice->CreateIndexBuffer(ind.size()*sizeof(int),D3DUSAGE_DYNAMIC,D3DFMT_INDEX32,D3DPOOL_DEFAULT,&IB,NULL);
VB->Lock(0,(ver.size()+10)*sizeof(CUSTOMVERTEX),(void**)&vv,D3DLOCK_DISCARD);
memcpy(vv,&ver[0],ver.size()*sizeof(CUSTOMVERTEX));
VB->Unlock();
IB->Lock(0,(ind.size()+10)*sizeof(int),(void**)&ii,D3DLOCK_DISCARD);
memcpy(ii,&(ind[0]),ind.size()*sizeof(int));
IB->Unlock();
}
Advertisement
If the verts don't change you do not need to update the vertex buffer, and for what you are doing you really want to use a dynamic vertex buffer. Basically any mesh that updates fairly frequently should be stored in a dynamic vertex buffer, any mesh that doesn't should be in a static one.

One more thing that will help you is to not update the vertex buffers when they haven't changed from the last frame, the fastest data you send to the GPU is data you never send. Rendering with the same vertex buffer as the last frame when it hasn't changed will be the same as sending the same buffer again.

Worked on titles: CMR:DiRT2, DiRT 3, DiRT: Showdown, GRID 2, theHunter, theHunter: Primal, Mad Max, Watch Dogs: Legion

If you're calling CreateVertexBuffer and CreateIndexBuffer every frame, that explains why things slow down. Object creation is an expensive operation and should only be done during startup. In this particular case, you could use DrawPrimitiveUP instead of DrawPrimitive and it would run a lot faster - although completely reworking your code to use vertex buffers properly would be the real solution.

Direct3D has need of instancing, but we do not. We have plenty of glVertexAttrib calls.

Thank you both.

If I understand you the solution is:

  1. Create the Vertex and Idexbuffer once on Startup.
  2. Fill the buffers until they are full
  3. Draw the Primitives
  4. Clear Buffers
  5. If there are more Vertices go to 2



If this ist right, I've got another question:

What is if I walk around on the map.
The chunks i view change permanent if I turn around or go forward for a long time. So I have to change the buffer-content all the time.
How can I do this? Or is there another solution.
You got the idea yes, but you need to only fill the buffers when they change no change no update. Change happens either when a chunk comes into the view area or leaves it, or when a block in a visible chunk changes.

Also it isn't bad to have an in system memory buffer of the vertices in the list, it's just that you only send this list when the chunk is visible or a change has happened to it.

class Chunk
{
public:
void update()
{
//if you add or remove blocks from this chunk mark m_dirty = true so that you reupload the vb and ib
}
private:
bool m_dirty; //Only update the render buffers when this flag is set.
std::vector<Vertex> m_localChunkVertexData; //Change this in the update function and you need to reupload them to the GPU, but only change it when it is actuall there.
std::vector<unsigned int> m_localIndexData;
}


This will allow you to change the vertex list without having to lock the vertex or index buffers untill you are ready to upload to the device. Those vectors can also be local update function members which you write to the VB and IB once you have filled them out with the update you wanted.

Worked on titles: CMR:DiRT2, DiRT 3, DiRT: Showdown, GRID 2, theHunter, theHunter: Primal, Mad Max, Watch Dogs: Legion

OK,

Still one question:

Situation:
I filled the buffer until it's full. Now I draw the Vertices and flush the Buffer.
Second thing is that I fill it again with other vertices (because the buffer was full) and render.

If I re-render the frame (nothing has changed) i have to fill the buffer twice.
Once with the first data and then with the second to redraw all vertices, or?
One option is to just create a bigger buffer - make it large enough to hold data for an entire frame worth of drawing, and don't bother worrying about this.

That may not always be possible. Depending on how much you're drawing a full frame's worth of data may be too much. In that case don't worry about it either - just fill and flush the buffer as you need.

The important thing to remember is that there is no guaranteed one-size-fits-all approach to this. Depending on your application's needs you'll be making adjustments to the recommended basic approach. Sometimes you'll keep a system memory copy, sometimes you won't, sometimes you don't bother refilling the buffer if data doesn't change, sometimes it's not that big a deal and is cheaper to just fill the buffer anyway, and sometimes using a group of smaller static vertex buffers is preferable to using one big dynamic buffer.

Direct3D has need of instancing, but we do not. We have plenty of glVertexAttrib calls.


One option is to just create a bigger buffer - make it large enough to hold data for an entire frame worth of drawing, and don't bother worrying about this.

That may not always be possible. Depending on how much you're drawing a full frame's worth of data may be too much. In that case don't worry about it either - just fill and flush the buffer as you need.

The important thing to remember is that there is no guaranteed one-size-fits-all approach to this. Depending on your application's needs you'll be making adjustments to the recommended basic approach. Sometimes you'll keep a system memory copy, sometimes you won't, sometimes you don't bother refilling the buffer if data doesn't change, sometimes it's not that big a deal and is cheaper to just fill the buffer anyway, and sometimes using a group of smaller static vertex buffers is preferable to using one big dynamic buffer.

With VB's and IB's the trick is to find the best batch size that works across the set of cards you want to support. I am making a maze crawling game with a lot of 4 verts squares which make up the wall, when I submitted these as seperate drawcalls my performance tanked massively with less then 100.000 verts on screen. When I batched it up into a single vertex buffer performance jumped back to a solid 60 (vsync) on a HD4850.

A good rule of thumb is to try and get about 10.000 verts per vertex buffer if you have massive amounts of vertices to draw, this number can change according to situation and profiling ofcourse.

GPU's are bad at drawing buffers with a low amount of verts in them as most of the card is doing nothing then, but go over a threshold and performance dies as well as the card is too busy to deal with all the data you give it. It's a balance you have to find through some trial and error and profiling.

Worked on titles: CMR:DiRT2, DiRT 3, DiRT: Showdown, GRID 2, theHunter, theHunter: Primal, Mad Max, Watch Dogs: Legion

Thank you for your help.

Our game is going full speed ahead.

Here is the newest version:

[attachment=8473:Kubos.rar]

This topic is closed to new replies.

Advertisement