Boost Serialization for Resource loading is extremely slow

Started by
18 comments, last by Hodgman 10 years, 9 months ago
I am making a game and have a resource file format for loading 3d models, with textures and meshes etc defined like this:

/* PackageHeader definition */
        struct PackageHeader
        {
            std::string mSignature;
            uint8_t mMajorVersion;
            uint8_t mMinorVersion;
    
    
            PackageHeader();
        };
    
        /* PackageMesh definition */
        struct PackageMesh
        {
            std::vector<Vec3> mVertexData;
            std::vector<Vec3> mNormalData;
            std::vector<Vec2> mTexCoordsData;
            std::vector<uint32_t> mIndiceData;
            uint16_t mMaterialIndex;
            bool mHasMaterial;
    
    
            PackageMesh();
        };
    
        /* PackageTexture definition */
        struct PackageTexture
        {
            std::string mName;
            std::vector<uint8_t> mTextureData;
            uint32_t mTextureWidth;         // width/height in pixels
            uint32_t mTextureHeight;
            ITexture::TextureFormat mTextureFormat;
            ITexture::TextureType mTextureType;
    
    
            PackageTexture();
        };
    
        /* PackageMaterial definition */
        struct PackageMaterial
        {
            std::string mName;
            PackageTexture mDiffuseTexture;
            Vec3 mDiffuseColor;
            Vec3 mAmbientColor;
            Vec3 mSpecularColor;
            Vec3 mEmissiveColor;
    
    
            PackageMaterial();
        };
    
        /* PackageModel definition */
        struct PackageModel
        {
            std::string mName;
            std::vector<PackageModel> mChildren;
            std::vector<PackageMesh> mMeshes;
            Mat4 mTransform;
    
    
            PackageModel();
        };
    
        /* JonsPackage definition */
        struct JonsPackage
        {
            PackageHeader mHeader;
            std::vector<PackageModel> mModels;
            std::vector<PackageMaterial> mMaterials;
    
    
            JonsPackage();
        };
I am using Boost Serialization to save/load from the filesystem, which up untill now has been absolutely wonderous as it requires almost no code to do it.
However after importing some 3d models and then try to load it up again, the loading times are enormous; it takes almost 30 seconds to load from filesystem and then to deserialize.
This is the code to serialize/deserialize:

JonsPackagePtr ReadJonsPkg(const std::string& jonsPkgName)
        {
            std::ifstream jonsPkgStream(jonsPkgName.c_str(), std::ios::in | std::ios::binary);        // TODO: support opening of older resource packages
            JonsPackagePtr pkg(HeapAllocator::GetDefaultHeapAllocator().AllocateObject<JonsPackage>(), boost::bind(&HeapAllocator::DeallocateObject<JonsPackage>, &HeapAllocator::GetDefaultHeapAllocator(), _1));
    
            if (jonsPkgStream && jonsPkgStream.good() && jonsPkgStream.is_open())
            {
                std::stringstream buf(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
                buf << jonsPkgStream.rdbuf();
                buf.seekg(0);
                jonsPkgStream.close();
    
                boost::archive::binary_iarchive iar(buf);
    
                iar >> (*pkg.get());
            }
    
            jonsPkgStream.close();
      
            return pkg;
        }
    
        bool WriteJonsPkg(const std::string& jonsPkgName, const JonsPackagePtr pkg)
        {
            std::ofstream outStream(jonsPkgName.c_str(), std::ios::out | std::ios::binary | std::ios::trunc);
            bool ret = false;
    
            if (outStream.is_open())
            {
                boost::archive::binary_oarchive oar(outStream);
                oar << (*pkg.get());
    
                ret = true;
            }
            
            return ret;
        }
Here is an image of the VS2012 performance analys:
The resource file I am using is about 26 MB on disc, contains 3 package models and 14 package textures. What could I possibly do about this, is my file format design a dead-end?
EDIT:
Added default constructors/boost::serialization::serialize
Constructors:

/* JonsPackagePtr definition */
    typedef boost::shared_ptr<JonsPackage> JonsPackagePtr;


    /*
     * Reading/writing JonsPackage files
     */
    JonsPackagePtr ReadJonsPkg(const std::string& jonsPkgName);
    bool WriteJonsPkg(const std::string& jonsPkgName, const JonsPackagePtr pkg);


    /* PackageHeader inlines */
    inline PackageHeader::PackageHeader() : mSignature("jons"), mMajorVersion(LatestMajorVersion), mMinorVersion(LatestMinorVersion)
    {
    }

    /* PackageModel inlines */
    inline PackageModel::PackageModel() : mName(""), mTransform(1.0f)
    {
    }

    /* PackageMesh inlines */
    inline PackageMesh::PackageMesh() : mMaterialIndex(0), mHasMaterial(false)
    {
    }

    /* PackageTexture inlines */
    inline PackageTexture::PackageTexture() : mName(""), mTextureWidth(0), mTextureHeight(0), mTextureFormat(ITexture::UNKNOWN_FORMAT), mTextureType(ITexture::UNKNOWN_TYPE)
    {
    }

    /* PackageMaterial inlines */
    inline PackageMaterial::PackageMaterial() : mName(""), mDiffuseColor(0.0f), mAmbientColor(0.0f), mSpecularColor(0.0f), mEmissiveColor(0.0f)
    {
    }

    /* JonsPackage inlines */
    inline JonsPackage::JonsPackage()
    {
    }

Boost::serialization::serialize, non-intrusive:


template<class Archive>
        void serialize(Archive & ar, JonsEngine::PackageHeader& header, const unsigned int version)
        {
            ar & header.mMajorVersion;
            ar & header.mMinorVersion;
            ar & header.mSignature;
        }

        template<class Archive>
        void serialize(Archive & ar, JonsEngine::PackageModel& model, const unsigned int version)
        {
            ar & model.mName;
            ar & model.mChildren;
            ar & model.mMeshes;
            ar & model.mTransform;
        }

        template<class Archive>
        void serialize(Archive & ar, JonsEngine::PackageMesh& mesh, const unsigned int version)
        {
            ar & mesh.mVertexData;
            ar & mesh.mNormalData;
            ar & mesh.mTexCoordsData;
            ar & mesh.mIndiceData;
            ar & mesh.mMaterialIndex;
            ar & mesh.mHasMaterial;
        }

        template<class Archive>
        void serialize(Archive & ar, JonsEngine::PackageTexture& texture, const unsigned int version)
        {
            ar & texture.mName;
            ar & texture.mTextureData;
            ar & texture.mTextureWidth;
            ar & texture.mTextureHeight;
            ar & texture.mTextureFormat;
            ar & texture.mTextureType;
        }

        template<class Archive>
        void serialize(Archive & ar, JonsEngine::PackageMaterial& material, const unsigned int version)
        {
            ar & material.mName;
            ar & material.mDiffuseTexture;
            ar & material.mDiffuseColor;
            ar & material.mAmbientColor;
            ar & material.mSpecularColor;
        }

        template<class Archive>
        void serialize(Archive & ar, JonsEngine::JonsPackage& pkg, const unsigned int version)
        {
            ar & pkg.mHeader;
            ar & pkg.mModels;
            ar & pkg.mMaterials;
        }

        template<class Archive>
        void serialize(Archive & ar, glm::detail::tmat4x4<glm::mediump_float>& transform, const unsigned int version)
        {
            ar & transform[0];
            ar & transform[1];
            ar & transform[2];
            ar & transform[3];
        }

        template<class Archive>
        void serialize(Archive & ar, glm::detail::tvec4<glm::mediump_float>& vec, const unsigned int version)
        {
            ar & vec.x;
            ar & vec.y;
            ar & vec.z;
            ar & vec.w;
        }

        template<class Archive>
        void serialize(Archive & ar, glm::detail::tvec3<glm::mediump_float>& vec, const unsigned int version)
        {
            ar & vec.x;
            ar & vec.y;
            ar & vec.z;
        }

        template<class Archive>
        void serialize(Archive & ar, glm::detail::tvec2<glm::mediump_float>& vec, const unsigned int version)
        {
            ar & vec.x;
            ar & vec.y;
        }
Advertisement

- You may see a performance benefit from disabling iterator checking (#define _HAS_ITERATOR_DEBUGGING 0), or by testing a release build.

- Why are you reading the whole file into a stringstream before loading the archive from it?

- boost::serialisation in feature-rich, but not all that fast. Consider a simpler alternative, like google's protocol buffers.

Tristam MacDonald. Ex-BigTech Software Engineer. Future farmer. [https://trist.am]

With 10% of the time being spent in std::vector::iterator::operator!=(), that is a very clear sign that iterator debugging is killing performance.

Chances are good that you are also spending a huge amount of time in stack frame checks. You find them in VC++ debug builds, and in real-world tests they decrease performance by about 5 times. (That is, a function that calls lots of other functions that should require 10 microseconds requires 50 microseconds with stack frame checks enabled.)

It is usually good to have (at least) 3 builds. One is fully debug. Don't use it unless you absolutely must. One is fully release with all optimizations enabled. That is what you sell and QA against.

The third is a build with some optimizations turned on (such as automatic inlining) and some debugging info turned off (such as checked iterators and stack frame checks). That is a good one for general development.

I'm gonna give it a try in a release build. I cant help but feel if it takes 30ish seconds already, even reducing the loading times by 5 is still way too much for such a small number of assets but I'll try it first.

Microsoft decided to have checked iterators in release builds, too. You need to specifically turn them off. It is a speed/safety decision, but most everyone decides the speed is more important. See this MSDN article for details.

Are you sure? from reading http://msdn.microsoft.com/en-us/library/vstudio/hh697468.aspx it seems to imply that by default it is disabled in Release mode, but that it can be turned on if necessary

I am making a game and have a resource file format for loading 3d models, with textures and meshes etc defined like this:


...
struct PackageHeader
        {
            std::string mSignature;
            uint8_t mMajorVersion;
            uint8_t mMinorVersion;
    
    
            PackageHeader();
        };
...

I am using Boost Serialization to save/load from the filesystem, which up untill now has been absolutely wonderous as it requires almost no code to do it.

However after importing some 3d models and then try to load it up again, the loading times are enormous; it takes almost 30 seconds to load from filesystem and then to deserialize.

This is the code to serialize/deserialize:

    
...
iar >> (*pkg.get());
...


Are you kidding? Your post doesn't not include struct constructors, nor the actual serialization and deserialization code (I assume it is defined in the missing JonsPackagePtr class), the only places where you might be doing something slow. Post a real code sample.

Omae Wa Mou Shindeiru

Added it, I don't see what it would change though as nothing fancy is done, I would assume the problem is in the design layout

edit: JonsPackagePtr is just a typedef for shared_ptr

Do you make modifications to the contents of those std::vectors during runtime, or are they read-only assets once they're loaded?

I've largely given up on serialization libraries for use with assets, and just load them in-place, e.g.

//https://code.google.com/p/eight/source/browse/include/eight/core/blob/types.h
#include "eight/core/blob/types.h"

struct PackageHeader
{
	StringOffset mSignature;
	uint8_t mMajorVersion;
	uint8_t mMinorVersion;
};
struct PackageMesh
{
	Offset<List<Vec3>> mVertexData;
	Offset<List<Vec3>> mNormalData;
	Offset<List<Vec2>> mTexCoordsData;
	Offset<List<uint32_t>> mIndiceData;
	uint16_t mMaterialIndex;
	uint16_t mHasMaterial;
};
struct PackageTexture
{
	StringOffset mName;
	Offset<List<uint8_t>> mTextureData;
	uint32_t mTextureWidth;//in pixels
	uint32_t mTextureHeight;
	uint32_t mTextureFormat;
	uint32_t mTextureType;
};
struct PackageMaterial
{
	StringOffset mName;
	PackageTexture mDiffuseTexture;
	Vec3 mDiffuseColor;
	Vec3 mAmbientColor;
	Vec3 mSpecularColor;
	Vec3 mEmissiveColor;
};
struct PackageModel
{
	StringOffset mName;
	Offset<List<PackageModel>> mChildren;
	Offset<List<PackageMesh>> mMeshes;
	Mat4 mTransform;
};
struct JonsPackage
{
	PackageHeader mHeader;
	Offset<List<PackageModel>> mModels;
	Offset<List<PackageMaterial>> mMaterials;
};

JonsPackage* Load( FileStuff& files, const std::string& name )
{
	char* bytes = files.ReadAllTheBytes( name );
	return (JonsPackage*)bytes;//Done, no serialization. Format on disc is same as format in memory.
}

They are read-only once they are deserialized.

That solution looks awesome, if I can skip on the serialization library I'm all for it, as I can then so no reason for it. Can you give a brief explanation on how this solution would work?

This topic is closed to new replies.

Advertisement