/* begins a set of textured quads */
void d3d_class::begin_quad_set(LPDIRECT3DTEXTURE8 texture) {
/* lock vertex buffer */
this->vertex_buffer->Lock(0, 0, (uchar **) &(this->vertices), 0);
/* reset vertices count */
this->vertices_count = 0;
/* set texture */
this->device->SetTexture(0, texture);
}
/* adds a quad to the vertex buffer */
void d3d_class::add_quad(RECT *dest) {
int i;
/*
0 ----- 1
| \ |
| \ |
| \ |
3 ----- 2
0: x, y -> left, top
1: x + width, y -> right, top
2: x + width, y + height -> right, bottom
3: x, y + height -> left, bottom
1, 2 are doubled
6 indices, 4 vertexes
*/
/* setup vertices */
i = this->vertices_count;
this->vertices.x = (float) dest->left;
this->vertices.y = (float) dest->top;
this->vertices.z = 1.0f;
this->vertices.rhw = 1.0f;
this->vertices.color = D3DCOLOR_XRGB(255, 255, 255);
this->vertices.u = 0.0f;
this->vertices.v = 0.0f;
this->vertices.x = (<span class="cpp-keyword">float</span>) dest->right;
<span class="cpp-keyword">this</span>->vertices.y = (<span class="cpp-keyword">float</span>) dest->top;
<span class="cpp-keyword">this</span>->vertices.z = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.rhw = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.color = D3DCOLOR_XRGB(<span class="cpp-number">255</span>, <span class="cpp-number">255</span>, <span class="cpp-number">255</span>);
<span class="cpp-keyword">this</span>->vertices.u = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.v = <span class="cpp-number">0</span>.0f;
<span class="cpp-keyword">this</span>->vertices.x = (<span class="cpp-keyword">float</span>) dest->right;
<span class="cpp-keyword">this</span>->vertices.y = (<span class="cpp-keyword">float</span>) dest->bottom;
<span class="cpp-keyword">this</span>->vertices.z = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.rhw = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.color = D3DCOLOR_XRGB(<span class="cpp-number">255</span>, <span class="cpp-number">255</span>, <span class="cpp-number">255</span>);
<span class="cpp-keyword">this</span>->vertices.u = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.v = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.x = (<span class="cpp-keyword">float</span>) dest->left;
<span class="cpp-keyword">this</span>->vertices.y = (<span class="cpp-keyword">float</span>) dest->bottom;
<span class="cpp-keyword">this</span>->vertices.z = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.rhw = <span class="cpp-number">1</span>.0f;
<span class="cpp-keyword">this</span>->vertices.color = D3DCOLOR_XRGB(<span class="cpp-number">255</span>, <span class="cpp-number">255</span>, <span class="cpp-number">255</span>);
<span class="cpp-keyword">this</span>->vertices.u = <span class="cpp-number">0</span>.0f;
<span class="cpp-keyword">this</span>->vertices.v = <span class="cpp-number">1</span>.0f;
<span class="cpp-comment">/* increase vertices count */</span>
<span class="cpp-keyword">this</span>->vertices_count += <span class="cpp-number">4</span>;
<span class="cpp-comment">/* flush the buffer if it's full */</span>
<span class="cpp-keyword">if</span> (<span class="cpp-keyword">this</span>->vertices_count == (VERTEX_BUFFER_SIZE * <span class="cpp-number">4</span>)) {
<span class="cpp-comment">/* unlock vertex buffer */</span>
<span class="cpp-keyword">this</span>->vertex_buffer->Unlock();
<span class="cpp-comment">/* draw quads in the buffer */</span>
<span class="cpp-keyword">this</span>->device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, <span class="cpp-number">0</span>, <span class="cpp-keyword">this</span>->vertices_count, <span class="cpp-number">0</span>, <span class="cpp-keyword">this</span>->vertices_count / <span class="cpp-number">2</span>);
<span class="cpp-comment">/* reset vertices count */</span>
<span class="cpp-keyword">this</span>->vertices_count = <span class="cpp-number">0</span>;
<span class="cpp-comment">/* lock vertex buffer */</span>
<span class="cpp-keyword">this</span>->vertex_buffer->Lock(<span class="cpp-number">0</span>, <span class="cpp-number">0</span>, (uchar **) &(<span class="cpp-keyword">this</span>->vertices), <span class="cpp-number">0</span>);
}
}
<span class="cpp-comment">/* ends a set of textured quads */</span>
<span class="cpp-keyword">void</span> d3d_class::end_quad_set(<span class="cpp-keyword">void</span>) {
<span class="cpp-comment">/* unlock vertex buffer */</span>
<span class="cpp-keyword">this</span>->vertex_buffer->Unlock();
<span class="cpp-comment">/* flush the buffer if it isn't empty */</span>
<span class="cpp-keyword">if</span> (<span class="cpp-keyword">this</span>->vertices_count != <span class="cpp-number">0</span>) {
<span class="cpp-keyword">this</span>->device->DrawIndexedPrimitive(D3DPT_TRIANGLELIST, <span class="cpp-number">0</span>, <span class="cpp-keyword">this</span>->vertices_count, <span class="cpp-number">0</span>, <span class="cpp-keyword">this</span>->vertices_count / <span class="cpp-number">2</span>);
<span class="cpp-comment">/* reset vertices count */</span>
<span class="cpp-keyword">this</span>->vertices_count = <span class="cpp-number">0</span>;
}
}
<span class="cpp-comment">/* begins scene */</span>
<span class="cpp-keyword">inline</span> <span class="cpp-keyword">void</span> d3d_class::begin_scene(<span class="cpp-keyword">void</span>) {
<span class="cpp-comment">/* clear the screen */</span>
<span class="cpp-keyword">this</span>->device->Clear(<span class="cpp-number">0</span>, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(<span class="cpp-number">0</span>, <span class="cpp-number">0</span>, <span class="cpp-number">0</span>), <span class="cpp-number">0</span>.0f, <span class="cpp-number">0</span>);
<span class="cpp-comment">/* begin scene */</span>
<span class="cpp-keyword">this</span>->device->BeginScene();
}
<span class="cpp-comment">/* ends scene */</span>
<span class="cpp-keyword">inline</span> <span class="cpp-keyword">void</span> d3d_class::end_scene(<span class="cpp-keyword">void</span>) {
<span class="cpp-comment">/* end the scene */</span>
<span class="cpp-keyword">this</span>->device->EndScene();
<span class="cpp-comment">/* present the scene */</span>
<span class="cpp-keyword">this</span>->device->Present(NULL, NULL, NULL, NULL);
}
</pre></div><!–ENDSCRIPT–>
Does anybody know what I'm doing wrong?
I call begin_scene, begin_quad_set once, add_quad 150 times and then end_quad_set and end_scene.
Thanks,
Gerben VV
Slow drawing textured quads with direct3D8
Hi,
I don't know what I'm doing wrong, but my game runs extremely slow.
I've got 40 FPS, which isn't much. (I've got an ATI Radeon 7000)
I've implented an index buffer, and I work with textured quads.
In my game loop, I'm drawing 150 quads.
These are my d3d8 functions:
Hi.
It is much more slower seting vertices 150 times per render loop. You set it once in one large vertex buffer when you initialize your application.
Then you just call render vertex buffer once per render loop.
It is much more slower seting vertices 150 times per render loop. You set it once in one large vertex buffer when you initialize your application.
Then you just call render vertex buffer once per render loop.
From a quick look at your code it seems that you'll be holding the lock on the buffer for a relatively long period of time. Whilst you hold that lock you could be stalling the pipeline - not good [smile]
Try re-writing it so that you compose the buffer entirely in system memory and then do a quick lock-memcpy-unlock operation when you need to.
Also, look into the various locking flags (discard for example) and verify that you're creating the vertex buffer with the correct usage flags. The Debug runtimes will usually scream-and-shout at you if you're doing anything obviously wrong here (you have run it against the debugs, right?)
One final note - the best optimization in this sort of case is at the algorithmic level. Resource modification is painful - you can alleviate some of the pain, but it's never going to be "nice"... so micro-optimization might get you somewhere, but the biggest gains are likely to come from changing the way the application uses/needs locks.
A trivial example - some sort of double-buffering approach so that you don't keep changing the same VB repeatedly. Maybe try to take advantage of any temporal coherancy and cache common results between frames so that you only do work when something needs changing.
hth
Jack
Try re-writing it so that you compose the buffer entirely in system memory and then do a quick lock-memcpy-unlock operation when you need to.
Also, look into the various locking flags (discard for example) and verify that you're creating the vertex buffer with the correct usage flags. The Debug runtimes will usually scream-and-shout at you if you're doing anything obviously wrong here (you have run it against the debugs, right?)
One final note - the best optimization in this sort of case is at the algorithmic level. Resource modification is painful - you can alleviate some of the pain, but it's never going to be "nice"... so micro-optimization might get you somewhere, but the biggest gains are likely to come from changing the way the application uses/needs locks.
A trivial example - some sort of double-buffering approach so that you don't keep changing the same VB repeatedly. Maybe try to take advantage of any temporal coherancy and cache common results between frames so that you only do work when something needs changing.
hth
Jack
Thanks,
I tried to lock the VB as less as possible, for every lock tages time.
What I figured out is that it is the Present() call that takes a long time.
I don't really understand the stalling pipeline problem.
I'll try setting the VB content in system-memory and then memcpy it, but it takes extra memory.
Can you actually setup 2 VBs?
algorithmic level?? You really should explain this to me, coz I don't understand this.
Finally, I don't know how to set different textures if I would copy the VB from system to video memory.
Quote:From a quick look at your code it seems that you'll be holding the lock on the buffer for a relatively long period of time. Whilst you hold that lock you could be stalling the pipeline - not good [smile]
I tried to lock the VB as less as possible, for every lock tages time.
What I figured out is that it is the Present() call that takes a long time.
I don't really understand the stalling pipeline problem.
Quote:Try re-writing it so that you compose the buffer entirely in system memory and then do a quick lock-memcpy-unlock operation when you need to.
I'll try setting the VB content in system-memory and then memcpy it, but it takes extra memory.
Quote:A trivial example - some sort of double-buffering approach so that you don't keep changing the same VB repeatedly. Maybe try to take advantage of any temporal coherancy and cache common results between frames so that you only do work when something needs changing.
Can you actually setup 2 VBs?
algorithmic level?? You really should explain this to me, coz I don't understand this.
Finally, I don't know how to set different textures if I would copy the VB from system to video memory.
This'll have to be quick - I've gotta go out in 5 [smile]
Direct3D's rendering is a pipeline - a series of connected stages, taking input and then passing on it's output to the next stage. If one stage has to wait for something else to finish, or for it's input data to be ready, then it is considered "stalled" - it's not doing anything useful.
If you have a lock on a vertex buffer it, by definition, means that any part of the graphics pipeline cannot actually render from it (because it might be in the process of being changed).
Yup, it'll take extra memory - but that really shouldn't be a problem. Even a large vertex buffer will only take a few hundred kilobytes of system memory - which is a drop-in-the-ocean for modern 256-512-1024mb machines.
Yup, create as many IDirect3DVertexBuffer9's as you want [smile] You typically tend to use one at a time (although even this can be "broken") but there's nothing wrong with having many vertex buffers.
The algorithmic level is how you design your code - it's what steps you implement to solve your problem. You might wan to look up "Big Oh" notation if you're not familiar with it. It's mathematically provable that some algorithms are faster than others - for example, "Quick Sort" is almost always going to be much faster than "Bubble Sort" no matter how much you try and optimize both [smile]
For each texture you have to set up the new texture, there's not really any sensible way around this. Although, remember that if you set the parameters to DrawPrimitive() or DrawIndexedPrimitive() appropriately you don't have to render the entire vertex buffer. You could quite conceivably render a large buffer in 4 different draw-calls and change the texture between them.
hth
Jack
Quote:Original post by gerbenvvQuote:From a quick look at your code it seems that you'll be holding the lock on the buffer for a relatively long period of time. Whilst you hold that lock you could be stalling the pipeline - not good [smile]
I tried to lock the VB as less as possible, for every lock tages time.
What I figured out is that it is the Present() call that takes a long time.
I don't really understand the stalling pipeline problem.
Direct3D's rendering is a pipeline - a series of connected stages, taking input and then passing on it's output to the next stage. If one stage has to wait for something else to finish, or for it's input data to be ready, then it is considered "stalled" - it's not doing anything useful.
If you have a lock on a vertex buffer it, by definition, means that any part of the graphics pipeline cannot actually render from it (because it might be in the process of being changed).
Quote:Original post by gerbenvvQuote:Try re-writing it so that you compose the buffer entirely in system memory and then do a quick lock-memcpy-unlock operation when you need to.
I'll try setting the VB content in system-memory and then memcpy it, but it takes extra memory.
Yup, it'll take extra memory - but that really shouldn't be a problem. Even a large vertex buffer will only take a few hundred kilobytes of system memory - which is a drop-in-the-ocean for modern 256-512-1024mb machines.
Quote:Original post by gerbenvvQuote:A trivial example - some sort of double-buffering approach so that you don't keep changing the same VB repeatedly. Maybe try to take advantage of any temporal coherancy and cache common results between frames so that you only do work when something needs changing.
Can you actually setup 2 VBs?
Yup, create as many IDirect3DVertexBuffer9's as you want [smile] You typically tend to use one at a time (although even this can be "broken") but there's nothing wrong with having many vertex buffers.
Quote:Original post by gerbenvv
algorithmic level?? You really should explain this to me, coz I don't understand this.
The algorithmic level is how you design your code - it's what steps you implement to solve your problem. You might wan to look up "Big Oh" notation if you're not familiar with it. It's mathematically provable that some algorithms are faster than others - for example, "Quick Sort" is almost always going to be much faster than "Bubble Sort" no matter how much you try and optimize both [smile]
Quote:Original post by gerbenvv
Finally, I don't know how to set different textures if I would copy the VB from system to video memory.
For each texture you have to set up the new texture, there's not really any sensible way around this. Although, remember that if you set the parameters to DrawPrimitive() or DrawIndexedPrimitive() appropriately you don't have to render the entire vertex buffer. You could quite conceivably render a large buffer in 4 different draw-calls and change the texture between them.
hth
Jack
I just want to clear up a common problem with people using Big-O notation and "proofs".
Big-O notation is used to meansure and compare "complexity" in terms of "if you have a very large number of elements, then two algorithm's with the same runtime / space complexity can converge to theoretically the same speed"
this works in the classroom and on paper when doing research in computer science. But in the real world those constants that get ignored ( O ( 2198120938 x n ) ~= O ( n ) ) are very very important... for example the original poster is running some code on 150 textured quads... so n = 150 for simplicity sake... and lets say in one algorithm, it runs in O (n) time and uses 150 instructions per quad... that sounds fast, linear time, O(n), etc... but lets say you have an O (nlogn) algorithm ("slower") but only uses 10 instructions per iteration...
O(n) < O(n log n)... but 150 * n(= 150) is worse than 10 * 150 * log(150)...
sorry about the O(n) rant but in the work place far too often Ill show optimizations for an algorithm that greatly reduce operational constants on a known algorithm that has the "best proven runtime complexity" and people just ignore it, "the constants are insignificant"... which is usually not the case... unless you can prove that your n is going to be much larger than a couple thousand... try optimizing the algorithms for constants as well..
to gerbenvv:
if you still care about optimizing code in general, every bit helps... you might want to avoid doign somethings you have there...
every time you have a "this->vertices.some variable = x;" the compiler is generating alot of instructions for you that you dont need…<br><br>try setting a temporary variable = to a reference of the vertex in question.<br><br>Vertex &tempVert = vertices; for example… now not only are you doing i + 3 only once instead of 8 times, you are also not dereferencing the array pointer that many times… sometimes the compiler can optimize this out, but in your case its based on a for loop index, which it will probably expect there is a reason you are dereferencing that many times.<br><br>if you are only doing it once or twice, an extra 40 instructions per rendering loop means nothing to computers of today… but you are doing it 40 times per loop iteration, that really adds up… especially if you want to have time to do game logic, input, sound… etc.. or port it to GBA where the processor has many less cycles available per second.<br><br>the second thing is using the D3DCOLOR_XRGB() macro to set all your colours to white… doesnt look like those change, so you could either do a COLORREF whiteColour = D3DCOLOR_XRGB(255, 255, 255); and then just use that variable… this isnt a big deal, but the macro in question does a few bit shifts and will use some scratch registers to do it, Im all about optimizing out as many instructions as you can… bit shifts are usually fast on all machines, but you never know…<br><br>Sorry that these are all terribly small things, just keep them in mind if you apply to game development studios doing work on non pc hardware… they are often fighting for clock cycles, especially when the AI guys keep thinking they should be writing physics code in their AI code ;) damn AI….<br><br>and I agree with jollyjeffers on the locking issue… stalling the graphics pipeline is always bad, you want to hold that lock for as short a time as possible, and dont worry memcpy is fast… and if not, write an MMX / SSE version to be faster :P<br><br>btw I love your commenting style, I always draw little ascii pictures in methods, its fun :)
Big-O notation is used to meansure and compare "complexity" in terms of "if you have a very large number of elements, then two algorithm's with the same runtime / space complexity can converge to theoretically the same speed"
this works in the classroom and on paper when doing research in computer science. But in the real world those constants that get ignored ( O ( 2198120938 x n ) ~= O ( n ) ) are very very important... for example the original poster is running some code on 150 textured quads... so n = 150 for simplicity sake... and lets say in one algorithm, it runs in O (n) time and uses 150 instructions per quad... that sounds fast, linear time, O(n), etc... but lets say you have an O (nlogn) algorithm ("slower") but only uses 10 instructions per iteration...
O(n) < O(n log n)... but 150 * n(= 150) is worse than 10 * 150 * log(150)...
sorry about the O(n) rant but in the work place far too often Ill show optimizations for an algorithm that greatly reduce operational constants on a known algorithm that has the "best proven runtime complexity" and people just ignore it, "the constants are insignificant"... which is usually not the case... unless you can prove that your n is going to be much larger than a couple thousand... try optimizing the algorithms for constants as well..
to gerbenvv:
if you still care about optimizing code in general, every bit helps... you might want to avoid doign somethings you have there...
every time you have a "this->vertices.some variable = x;" the compiler is generating alot of instructions for you that you dont need…<br><br>try setting a temporary variable = to a reference of the vertex in question.<br><br>Vertex &tempVert = vertices; for example… now not only are you doing i + 3 only once instead of 8 times, you are also not dereferencing the array pointer that many times… sometimes the compiler can optimize this out, but in your case its based on a for loop index, which it will probably expect there is a reason you are dereferencing that many times.<br><br>if you are only doing it once or twice, an extra 40 instructions per rendering loop means nothing to computers of today… but you are doing it 40 times per loop iteration, that really adds up… especially if you want to have time to do game logic, input, sound… etc.. or port it to GBA where the processor has many less cycles available per second.<br><br>the second thing is using the D3DCOLOR_XRGB() macro to set all your colours to white… doesnt look like those change, so you could either do a COLORREF whiteColour = D3DCOLOR_XRGB(255, 255, 255); and then just use that variable… this isnt a big deal, but the macro in question does a few bit shifts and will use some scratch registers to do it, Im all about optimizing out as many instructions as you can… bit shifts are usually fast on all machines, but you never know…<br><br>Sorry that these are all terribly small things, just keep them in mind if you apply to game development studios doing work on non pc hardware… they are often fighting for clock cycles, especially when the AI guys keep thinking they should be writing physics code in their AI code ;) damn AI….<br><br>and I agree with jollyjeffers on the locking issue… stalling the graphics pipeline is always bad, you want to hold that lock for as short a time as possible, and dont worry memcpy is fast… and if not, write an MMX / SSE version to be faster :P<br><br>btw I love your commenting style, I always draw little ascii pictures in methods, its fun :)
Well, thank you LEET_developer!
This is my new idea to do it:
Any ideas to improve this? [smile]
This is my new idea to do it:
setup quads -> realloc each time I add a new quadsort quads by texture -> so I don't have to set texture many timesfor each n vertices that fit in VB copy n vertices in sys memory lock VB copy n vertices to VB unlock VB for each n vertices that have same texture SetTexture DrawIndexedPrimitive n vertices endend
Any ideas to improve this? [smile]
I wont go picking at your pseudo code :) but you sorted by textures then called a set texture every iteration in the loop... Im not sure if direct X has a check to see if you've already set that texture or if it just goes and does it, in openGl I usually set a texture with a method I make that checks if the texture is the current one, its like a "free" optimization.
but sorting is a good idea... another thing you could do, I dont know if you are familiar wtih data structures, but you could have a hash table of buckets.
a hash table is basically an associative array. think of it like doing this
quadList["texture name"] and that would return you a quad (or list of quads in my bucket example) that all have the same texture... this way you wouldnt have to sort them, you just add them to the appropriate list as they are created...
with every implementation there are tradeoffs and it usually comes down to speed vs memory in the end... so you could even just allocate a big array for quads (if you want speed instead of space) and then just have the first 200 be the first text and then the next 200 the next texture, and so on... that also allows you to eliminate the sorting step, I dont know how often you add quads, but it could turn out to be an issue.
I dont know what limitations direct X has on vertex buffers, but you could try to make one big enough for your max number of quads... then at least you'll only do one lock and one memcpy... but again, I'm fairly new to direct X so maybe there is a limitation that wont allow you to make them big enough.
but sorting is a good idea... another thing you could do, I dont know if you are familiar wtih data structures, but you could have a hash table of buckets.
a hash table is basically an associative array. think of it like doing this
quadList["texture name"] and that would return you a quad (or list of quads in my bucket example) that all have the same texture... this way you wouldnt have to sort them, you just add them to the appropriate list as they are created...
with every implementation there are tradeoffs and it usually comes down to speed vs memory in the end... so you could even just allocate a big array for quads (if you want speed instead of space) and then just have the first 200 be the first text and then the next 200 the next texture, and so on... that also allows you to eliminate the sorting step, I dont know how often you add quads, but it could turn out to be an issue.
I dont know what limitations direct X has on vertex buffers, but you could try to make one big enough for your max number of quads... then at least you'll only do one lock and one memcpy... but again, I'm fairly new to direct X so maybe there is a limitation that wont allow you to make them big enough.
I didn't know you could use associative arrays in C++. (like PHP has)
But anyway, I'm gonna take a look in the msdn stuff of SetTexture.
And about the size of the VB, I don't know how many vertices I'm gonna draw, so I think I'm gonna code my pseudo code. [smile]
But anyway, I'm gonna take a look in the msdn stuff of SetTexture.
And about the size of the VB, I don't know how many vertices I'm gonna draw, so I think I'm gonna code my pseudo code. [smile]
LEET_Developer - you make some good/valid points, but I have to disagree in some ways. Micro-Optimization is indeed a useful trick, but in all my years of programming (not just games/graphics) it's yielded less returns that algorithmic-optimization.
Two examples off the top of my head:
1. Terrain rendering. In my early days I spent *ages* micro-optimizing my brute-force approach. Eliminated almost every unecessary cycle and I optimized every data structure to be properly aligned. I spent a day implementing Quadtree's - using some heavy data duplication and performance went up 10-20 fold.
2. General object culling. Had a highly optimized frustum culling algorithm that went through all the objects and checked them - but only when the object had/or camera had moved. It was about as tight as I could get it. I later went in and implemented heirarchical culling (with groups of objects as well as sub-objects) and it was almost as if culling was for free - it no longer even registered on my profiler [grin]
Then there is always the absolute classic argument against micro-optimization: Compilers. If you study the theory behind even a simple compiler you'll see that they'll optimize away a lot of the things that you've mentioned. Expression simplification, constant folding, dead-code removal.. Not only does this mean that you don't have to worry about it, but it means you don't need to confuse your source code and make it harder to read.
Also, if you start implementing your own low-level optimizations (especially if you break-out to assembly [oh]) then you're limiting the choices that a compiler has - and if you're going to try and out-optimize an optimizing compiler you really need to be very sure that what you're writing is better than what it can generate.
Based on your original code, that looks good to me. The only ways I can see of improving it are along the lines of temporal optimization - only update things if/when you have to. Whether you can actually do this very much depends on the rest of your programs flow/structure [smile]
hth
Jack
EDIT: Missed that SetTexture [headshake]. It's likely that the runtime or the driver will spot the duplicate SetTexture calls, but it's best not to rely on such things.
EDIT 2: I need to pay more attention [headshake]
[Edited by - jollyjeffers on December 8, 2005 8:45:02 AM]
Two examples off the top of my head:
1. Terrain rendering. In my early days I spent *ages* micro-optimizing my brute-force approach. Eliminated almost every unecessary cycle and I optimized every data structure to be properly aligned. I spent a day implementing Quadtree's - using some heavy data duplication and performance went up 10-20 fold.
2. General object culling. Had a highly optimized frustum culling algorithm that went through all the objects and checked them - but only when the object had/or camera had moved. It was about as tight as I could get it. I later went in and implemented heirarchical culling (with groups of objects as well as sub-objects) and it was almost as if culling was for free - it no longer even registered on my profiler [grin]
Then there is always the absolute classic argument against micro-optimization: Compilers. If you study the theory behind even a simple compiler you'll see that they'll optimize away a lot of the things that you've mentioned. Expression simplification, constant folding, dead-code removal.. Not only does this mean that you don't have to worry about it, but it means you don't need to confuse your source code and make it harder to read.
Also, if you start implementing your own low-level optimizations (especially if you break-out to assembly [oh]) then you're limiting the choices that a compiler has - and if you're going to try and out-optimize an optimizing compiler you really need to be very sure that what you're writing is better than what it can generate.
Quote:Any ideas to improve this?
Based on your original code, that looks good to me. The only ways I can see of improving it are along the lines of temporal optimization - only update things if/when you have to. Whether you can actually do this very much depends on the rest of your programs flow/structure [smile]
hth
Jack
EDIT: Missed that SetTexture [headshake]. It's likely that the runtime or the driver will spot the duplicate SetTexture calls, but it's best not to rely on such things.
EDIT 2: I need to pay more attention [headshake]
[Edited by - jollyjeffers on December 8, 2005 8:45:02 AM]
This topic is closed to new replies.
Advertisement
Popular Topics
Advertisement