Optimizing Generation

Started by
31 comments, last by Pink Horror 7 years, 11 months ago

I've almost optimized everything I can imagine, but being more of a novice there's probably some tricks that I don't yet know of. Here's some pieces of code I believe could be faster, and I'll explain what I've done for each one.

Generation:

Starting Generation:


    // Use this for initialization
    public IEnumerator Generate()
    {

        blocks = new Block[16, 16, 16];

        posx = (int)Position.x;
        posy = (int)Position.y;
        posz = (int)Position.z;


        loadthread = UnityThreadHelper.TaskDistributor.Dispatch(() => blocks = GeneratePlanet.Generate("rock", planet.planetSize, planet.planetSeed, planet.cave, planet.cracked, posx, posy, posz));

        while (loadthread.IsSucceeded == false)
        {
            yield return new WaitForSeconds(0.1f);
        }


        loadthread = UnityThreadHelper.TaskDistributor.Dispatch(() => blocks = GeneratePlanet.GenerateOres(blocks, planet.planetSize, planet.planetSeed, posx, posy, posz));

        while (loadthread.IsSucceeded == false)
        {
            yield return new WaitForSeconds(0.1f);
        }

        Generated = true;
        UpdatePlanetChunk();

    }

- Used threading to increase performance

- Used Coroutines to spread out processing

Actual Generation (Kinda Complicated! ...And the most intensive code):


   public static  Block[,,] Generate(string PlanetType, int planetSize, int seed, bool cave, bool cracked, int posx, int posy, int posz)
    {

        Block[,,] blocks = new Block[16, 16, 16]; 

        int PlanetSize = planetSize;

        int r =  PlanetSize / 2;
        int offsetx = PlanetSize / 2;
        int offsety = PlanetSize / 2;
        int offsetz = PlanetSize / 2;

        for (int x = 0; x < 16; x++)
        {
            for (int y = 0; y < 16; y++)
            {
                for (int z = 0; z < 16; z++)
                {

                    if (blocks[x, y, z] == null)
                    {
                        blocks[x, y, z] = new BlockEmpty();
                    }

                }

            }

        }

        for (int tx = 0; tx < 16; tx++)
        {
            for (int ty = 0; ty < 16; ty++)
            {
                for (int tz = 0; tz < 16; tz++)
                {

                        if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 2.5)
                        {
                            blocks[tx, ty, tz] = new BlockCore();
                        }

                        else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 3.5)
                        {
                            blocks[tx, ty, tz] = new BlockBedrock();
                        }

                        else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 5)
                        {
                            blocks[tx, ty, tz] = new BlockSubstone();
                        }

                        else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 6)
                        {
                            blocks[tx, ty, tz] = new BlockStone();
                        }


                    }

                }
            }


        



        if (cracked == true)
        {

            var CrackGen = new RidgeNoise(seed)
            {
                Frequency = (float)0.03,
                Lacunarity = 2,
                OctaveCount = 2,

                Exponent = 2,
                Offset = 1,
                Gain = (float)1,

            } * 1;


            for (int x = 0; x < 16; x++)
            {
                for (int y = 0; y < 16; y++)
                {
                    for (int z = 0; z < 16; z++)
                    {



                        float value = CrackGen.GetValue(new Vector3(x + (posx * 16), y + (posy * 16), z + (posz * 16)));

                        if (value > 1.00)
                        {
                            blocks[x, y, z] = new BlockEmpty();
                        }
                    }
                }
            }

        }


        if (cave == true)
        {

            var CaveGen = new BillowNoise(seed);

            CaveGen.Frequency = 0.08f;
            CaveGen.Persistence = 0.4f;

            for (int x = 0; x < 16; x++)
            {
                for (int y = 0; y < 16; y++)
                {
                    for (int z = 0; z < 16; z++)
                    {



                        float value = CaveGen.GetValue(new Vector3(x + (posx * 16), y + (posy * 16), z + (posz * 16)));

                        if (value > 0)
                        {
                            blocks[x, y, z] = new BlockEmpty();
                        }
                    }
                }
            }

        }

        return blocks;


    }


    public static Block[,,] GenerateOres(Block[,,] blocks, int planetSize, int seed, int posx, int posy, int posz)
    {

        var OreGen = new BillowNoise(seed);

        OreGen.Frequency = 0.16f;
        OreGen.Persistence = 0.4f;

        List<Block> OreTypes = new List<Block>();
        OreTypes.Add(new BlockOreUranium());

        foreach (Block ore in OreTypes)
        {


            for (int x = 0; x < 16; x++)
            {
                for (int y = 0; y < 16; y++)
                {
                    for (int z = 0; z < 16; z++)
                    {



                        float value = OreGen.GetValue(new Vector3(x + (posx * 16), y + (posy * 16), z + (posz * 16)));

                        if (value > ore.Rarity() && blocks[x, y, z].BlockName() == ore.BaseBlock())
                        {
                            blocks[x, y, z] = ore;

                        }
                    }
                }
            }
        }

        return blocks;

    }

- Used else-ifs to reduce testing of variables

- Changed to a chunk system to generate one bit at a time

If anyone sees any tips to improve this, thanks!

I'm making an open source voxel space game called V0xel_Sp4ce! Help me out with the repository here: https://github.com/SpikeViper/V0xel_Sp4ce

Advertisement

You need to use a profiler for this. What does your profiler tell you? Where are you actually spending your time? Is this really the bottleneck?

Using threads by itself distributes loads among processors but does not actually reduce the work, typically it causes a slight increase in total work but a division in wall clock time. Distributing your workload can help and is one of the easiest changes to make, but it is also one of the smallest ways to improve code.

After you've used your profiler to determine exactly where the actual bottlenecks are, use Unity's profiler commands Profiler.BeginSample() and Profiler.EndSample() to help isolate exactly where the problems are. Is the problem coming from a large number of calls that can be eliminated? Is it coming from unnecessary processing being done? Is it work that can be done with a different algorithm?

After you've profiled it, and used the profiler to isolate an exact section needing change, make the change and profile some more to make sure it actually improved the situation.

Without reviewing it in a profiler everything about it is guesswork. The change might have an improvement, or it may have no effect at all.

I agree with Frob, but that said, just looking at it, there are a couple of things that I can guess at:

Your sqrt is unneccessary, you can square both sides of the equation, and you can move the calculation of the various radii your checking against out into variables before the for loops. And then you can give them meaningful names too =) EDIT: If its not clear, I'm referring to (r - PlanetSize / 6)^2, which could be renamed minStoneRadiusSquared.

Also, you'll probably want to invert your for loop order, and do z, y, x, it's more memory friendly. If you are wondering why, imagine if you had a single array that was of length 16*16*16, and think of how you're jumping around in it as you travel through your innermost for loops.

EDIT: Though taking a deeper look, it looks like you're just trying to make blocks of a band of a sphere set type, which seems like something you could make separate iterations, without any radial checks at all, especially since those are going to be constant per planetsize, and your dealing with integers. (Which is why you should really deeply consider frob's questions)

You need to use a profiler for this. What does your profiler tell you? Where are you actually spending your time? Is this really the bottleneck?

Using threads by itself distributes loads among processors but does not actually reduce the work, typically it causes a slight increase in total work but a division in wall clock time. Distributing your workload can help and is one of the easiest changes to make, but it is also one of the smallest ways to improve code.

After you've used your profiler to determine exactly where the actual bottlenecks are, use Unity's profiler commands Profiler.BeginSample() and Profiler.EndSample() to help isolate exactly where the problems are. Is the problem coming from a large number of calls that can be eliminated? Is it coming from unnecessary processing being done? Is it work that can be done with a different algorithm?

After you've profiled it, and used the profiler to isolate an exact section needing change, make the change and profile some more to make sure it actually improved the situation.

Without reviewing it in a profiler everything about it is guesswork. The change might have an improvement, or it may have no effect at all.

Currently, 82.9% of my lag (during lag spikes) is due to PlanetChunk.Generate() [Coroutine: MoveNext]. I'll try to pinpoint it further and post more, but I am sure it is the generation code.

I'm making an open source voxel space game called V0xel_Sp4ce! Help me out with the repository here: https://github.com/SpikeViper/V0xel_Sp4ce

Shoot. I can only call on the profiler in the main thread.

I'm making an open source voxel space game called V0xel_Sp4ce! Help me out with the repository here: https://github.com/SpikeViper/V0xel_Sp4ce

I agree with Frob, but that said, just looking at it, there are a couple of things that I can guess at:

Your sqrt is unneccessary, you can square both sides of the equation, and you can move the calculation of the various radii your checking against out into variables before the for loops. And then you can give them meaningful names too =) EDIT: If its not clear, I'm referring to (r - PlanetSize / 6)^2, which could be renamed minStoneRadiusSquared.

Also, you'll probably want to invert your for loop order, and do z, y, x, it's more memory friendly. If you are wondering why, imagine if you had a single array that was of length 16*16*16, and think of how you're jumping around in it as you travel through your innermost for loops.

EDIT: Though taking a deeper look, it looks like you're just trying to make blocks of a band of a sphere set type, which seems like something you could make separate iterations, without any radial checks at all, especially since those are going to be constant per planetsize, and your dealing with integers. (Which is why you should really deeply consider frob's questions)

Those ideas actually did increase the speed quite a bit, and cut out some lag spikes.

I'm making an open source voxel space game called V0xel_Sp4ce! Help me out with the repository here: https://github.com/SpikeViper/V0xel_Sp4ce

The first time I didn't even bother reading the code, just pointing out the need for a profiler. So often people come on asking "Why is this slow?" and the profiler reveals that was not the problem at all.

If those comments applied, and looking over the code, seems like a few items could help.

It seems you are traversing a giant collection of data and applying a transformation or assignment or other piece of work. Don't run in a bunch of tiny zigzags, follow linearly across all the data. A large 1D array serving as your pool of objects is usually faster to manipulate in bulk linearly. If you break it up in parallel, start each one at 1/n intervals traveling linearly across their block.

Looks like you are intentionally creating a large number of objects. In nearly all game engines -- and unity is no exception -- adding and removing objects to the game world incurs a severe cost as items are placed into spatial trees, physics systems, event listeners, and other systems. The calls are expensive. Better to create a large pool of objects with a position that is considered "outside the world" and then incur the relatively small cost of moving the object within the world.

Strength reduction is your friend. The Pow() function for squaring is overkill, do the multiplication because it is faster than Pow(x,2); the exponentiation functions are primarily for harder cases like x^pi or x^e or other more difficult numbers than squared or cubed. The square root is unnecessary as was pointed out.

Loop invariants should be hoisted where reasonable. If you know you are operating inside a long sequence and all of them will satisfy the condition, test the condition once and use it for the longer run rather than re-testing on each item.

Ensure your objects are a cache friendly size. While it is harder with big engines like Unity and Unreal, you should try to minimize the size of the Block objects. If possible, get them down to the size of a cache line or a friendly multiple of it. 16 bytes, 32 bytes, 64 bytes, 128 bytes, 256 bytes, make it easy for the cache to recognize your stride as you walk along the array. If the cache prefetch can see you are stepping in 256 byte intervals it can preload your data for you. If you are stepping in variable increments or cache blocks in a 3-block/2-block stride, it will likely make it difficult to follow.

While these are good patterns generally, focusing effort on a specific set of code should usually wait until after the profiler identifies it as a bottleneck.

Don't intentionally write bad code, but at the same time, the first answers to performance tuning questions should generally be "Don't do it", followed by "Don't do it yet", followed by "Don't do it until you've measured, and measure after." Make sure it is an actual problem that needs to be addressed rather than speculatively adjusting things blindly. That is why my first response was "What does your profiler tell you?"

Yeah, pull anything you can farther up out of a loop if you can, for example:

Mathf.Pow(ty + (posy * 16) - r, 2) (which in addition to being overkill expensive, especially for integers, only changes when ty changes, yet you are calculating it every time in your inner most loop, so instead of only doing it 16*16 times, you are executing that code 16*16*16 times.

I also don't think your multithreaded code is helpful at the moment, you might want to strip it temporarily, especially if Unity's profiler is having a hard time with it.

And again, I think you should probably rethink how your iterating. For example, your cracked code. Rather than iterating over every block, why not randomly generate how many empty squares you think their should be. Then randomly generate a set of indices for the number of empty squares, index into those locations and set the block to empty. Instead of iterating over 16*16*16 squares and mostly doing nothing, you're looping only as many times as you have empty squares.

EDIT: Horrible pseudo code:

int numCrackedSquares = Random.GetValue()

for(int i = 0; i < numCrackedSquares; ++i)

{

int x = Random.GetInt()

int y = Random.GetInt()

int z = Random.GetInt()

blocks[x,y,z] = new BlockEmpty();

}

Found it! Using the profiler in deep profile mode, I figured out my noise being used for cave generation and ore gen was using 6 octaves. Cut it down to one octave, 60fps.

I'm making an open source voxel space game called V0xel_Sp4ce! Help me out with the repository here: https://github.com/SpikeViper/V0xel_Sp4ce

This bit...

if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 2.5)
{
blocks[tx, ty, tz] = new BlockCore();
}

else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 3.5)
{
blocks[tx, ty, tz] = new BlockBedrock();
}

else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 5)
{
blocks[tx, ty, tz] = new BlockSubstone();
}

else if (Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2)) <= r - PlanetSize / 6)
{
blocks[tx, ty, tz] = new BlockStone();
}

Could be

var posFactor = Mathf.Sqrt(Mathf.Pow(tx + (posx * 16) - r, 2) + Mathf.Pow(ty + (posy * 16) - r, 2) + Mathf.Pow(tz + (posz * 16) - r, 2));

if (posFactor <= r - PlanetSize / 2.5)
{
blocks[tx, ty, tz] = new BlockCore();
}

else if (posFactor <= r - PlanetSize / 3.5)
{
blocks[tx, ty, tz] = new BlockBedrock();
}

else if (posFactor <= r - PlanetSize / 5)
{
blocks[tx, ty, tz] = new BlockSubstone();
}

else if (posFactor <= r - PlanetSize / 6)
{
blocks[tx, ty, tz] = new BlockStone();
}

Otherwise exactly the same maths could be done upto 4 times per iteration each time giving the same result. Think DRY... Don't Repeat Yourself :)

This topic is closed to new replies.

Advertisement