Allocator crash after alloc/dealloc

Started by
11 comments, last by l0k0 11 years, 3 months ago

As a learning exercise I wrote my own version of several STL components, this allocator generally works however while I was testing particles with it, it crashes after a variable number of iterations with alloc's and dealloc's . I've been playing with the code for ours and I'm sure there's something I'm doing wrong. Perhaps someone can point it out to me.

During the loop, I save the address of particles that will need to be dealloc'd in a vector, then later dealloc the pointers.


The problem always occurs with a segfault at ln. 76 and happens because the MemChunk's pointers point to memory that looks like it should be NULL but is something like 0x1, 0x5, etc.

This is the allocator's code


template<class T = void>
class PoolAllocator: public AllocBase<T>
{
public:
	typedef typename AllocBase<T>::AllocType AllocType;
	typedef typename AllocBase<T>::AllocTypePtr AllocTypePtr;

	PoolAllocator(size_t sizeInUnits = 1024) :
			mMemory(NULL), mUsedSize(0), mMaxSize(sizeInUnits), mOffset(
					sizeof(MemChunk) + sizeof(T))
	{
		mMemory = new (std::nothrow) char[mMaxSize * mOffset];
		// mMemory = malloc(mMaxSize * mOffset);
		if (!mMemory)
		{
			//Do something more informational here
			throw std::bad_alloc();
			return;
		}

		for (size_t i = 0; i < mMaxSize; i++)
		{
			MemChunk *pCurUnit = (MemChunk *) ((char *) mMemory + i * mOffset);

			pCurUnit->mPrevChunk = mFreeList.mLast;
			pCurUnit->mNextChunk = NULL;

			if (mFreeList.mLast)
				mFreeList.mLast->mNextChunk = pCurUnit;
			mFreeList.mLast = pCurUnit;

			if (!mFreeList.mFirst)
				mFreeList.mFirst = pCurUnit;
		}

	}

	virtual ~PoolAllocator()
	{
		delete[] mMemory;
	}

	template<typename T1>
	struct rebind
	{
		typedef PoolAllocator<T1> other;
	};

	struct MemChunk
	{
		MemChunk* mNextChunk, *mPrevChunk;
	};

	struct ChunkList
	{
		ChunkList() :
				mFirst(0), mLast(0)
		{
		}
		MemChunk* mFirst, *mLast;
	};

	virtual AllocTypePtr allocate(size_t n, const AllocTypePtr = 0)
	{

		if ((mUsedSize + n) > (mMaxSize))
			_resize((mUsedSize * mOffset) + (mOffset * n) * 2);

		//Get the last chunk of unused memory
		MemChunk* pCurChunk = mFreeList.mLast;

		if (mFreeList.mFirst == pCurChunk)
			mFreeList.mFirst = pCurChunk->mNextChunk;

		//This shouldn't happen, since it's the back of the free list
		if (pCurChunk->mNextChunk)
			pCurChunk->mNextChunk->mPrevChunk = pCurChunk->mPrevChunk;

		//This should only _not_ happen if this is the last available piece of memory
		//When it does, the value of pCurChunk->mNextChunk should be NULL
		if (pCurChunk->mPrevChunk)
			pCurChunk->mPrevChunk->mNextChunk = NULL;

		//Set the new value of the last chunk in the free list
		mFreeList.mLast = pCurChunk->mPrevChunk;

		//Place it at the end of the AllocList
		pCurChunk->mPrevChunk = mAllocList.mLast;
		pCurChunk->mNextChunk = NULL;

		if (mAllocList.mLast)
			mAllocList.mLast->mNextChunk = pCurChunk;

		mAllocList.mLast = pCurChunk;
		if (!mAllocList.mFirst)
			mAllocList.mFirst = pCurChunk;

		mUsedSize += 1;

		return reinterpret_cast<AllocTypePtr>((reinterpret_cast<char*>(pCurChunk) + sizeof(MemChunk)));
	}

	//Returns a pointer to the memory at a specific offset. Do NOT modify this pointer
	inline const AllocTypePtr getPtr(unsigned i) const
	{
		return reinterpret_cast<AllocTypePtr>(reinterpret_cast<char*>(mAllocList.mFirst)
				- (mOffset * i) + sizeof(MemChunk));
	}

	virtual void dealloc(AllocTypePtr ptr, size_t = 0)
	{
		MemChunk *pCurChunk = (MemChunk*) ((char*) ptr - sizeof(MemChunk));

		std::cout << pCurChunk << "\n";
		//Remove the chunk from the AllocList
		if (pCurChunk->mNextChunk)
			pCurChunk->mNextChunk->mPrevChunk = pCurChunk->mPrevChunk;

		if (pCurChunk->mPrevChunk)
			pCurChunk->mPrevChunk->mNextChunk = pCurChunk->mNextChunk;

		if (mAllocList.mLast == pCurChunk)
			mAllocList.mLast = pCurChunk->mPrevChunk;

		if (mAllocList.mFirst == pCurChunk)
			mAllocList.mFirst = pCurChunk->mNextChunk;

		//Put it back on the FreeList
		pCurChunk->mPrevChunk = mFreeList.mLast;
		pCurChunk->mNextChunk = NULL;

		if (mFreeList.mLast)
			mFreeList.mLast->mNextChunk = pCurChunk;

		mFreeList.mLast = pCurChunk;

		if (!mFreeList.mFirst)
			mFreeList.mFirst = pCurChunk;

		mUsedSize -= 1;

	}
protected:

	/*
	 * NOTE: This is a slow operation and can be avoided by preallocating enough memory
	 * to hold your data.
	 */
	void _resize(unsigned size)
	{
		//Simple array reallocation
		size_t newSize = size;
		char* newArr = new (std::nothrow) char[newSize * mOffset];

		for (unsigned counter = 0; counter < mUsedSize; ++counter)
			newArr[counter] = mMemory[counter];

		//memcpy(newArr, mMemory, mMaxSize * mOffset);

		mMaxSize = newSize;

		delete[] mMemory;

		mMemory = newArr;

		//Relink the list
		for (size_t i = 0; i < mMaxSize; i++)
		{
			MemChunk *pCurUnit = (MemChunk *) ((char *) mMemory + i * mOffset);

			pCurUnit->mPrevChunk = mFreeList.mLast;
			pCurUnit->mNextChunk = NULL;

			if (mFreeList.mLast)
				mFreeList.mLast->mNextChunk = pCurUnit;

			mFreeList.mLast = pCurUnit;

			if (!mFreeList.mFirst)
				mFreeList.mFirst = pCurUnit;
		}
	}

protected:
	char* mMemory; //The big chunk of allocated memory from the OS

	ChunkList mFreeList;
	ChunkList mAllocList;

	size_t mUsedSize; //current used size in chunks
	size_t mMaxSize; //peak size in chunks
	size_t mOffset; //The total step in memory from one chunk to another (also equal to the sizeof(MemChunk) + sizeof(T))

};

Here is the loop


for (unsigned i = 0; i < mNumParticles; i++)
	{
		// Check particle's lifespan
		//Particle* pTemp = reinterpret_cast<Particle*>(mBuffer) + i;
		Particle* pTemp = mAllocator.getPtr(i);

		if (pTemp->m_Life-- < 0)
		{
			//Keep up with the particles to delete
			m_ParticleList.push_back(pTemp);
			continue;
		}

		// Update particle's position
		pTemp->m_Position.fX += pTemp->m_Velocity.fX;
		pTemp->m_Position.fY += pTemp->m_Velocity.fY;

		glColor3f(float(pTemp->m_Colour.r) / 255, float(pTemp->m_Colour.g) / 255,
				float(pTemp->m_Colour.b) / 255);
		glVertex2fv(pTemp->m_Position.ptr());
	}

	bool clear = false;
	for (unsigned i = 0; i < m_ParticleList.size(); i++)
	{
		Particle* p = m_ParticleList[i];

		mAllocator.destroy(p);
		mAllocator.dealloc(p);

		mNumParticles--;
		clear = true;
	}

	if (clear)
		m_ParticleList.clear();

	// If there are less particles than the maximum allowed create another
	if (mNumParticles < m_MaxParticles)
		createParticles(m_MaxParticles - mNumParticles);

the createParticles method calls allocate.

Advertisement
Where is 'destroy()' defined? (used on like 28 of the second code section)

Also, is the 'dealloc()' call on line 29 there the one triggering the segfault? (based on the stack trace)
void hurrrrrrrr() {__asm sub [ebp+4],5;}

There are ten kinds of people in this world: those who understand binary and those who don't.

destroy is defined in AllocBase, it simply calls the destructor of the object,

and yes the dealloc call is triggering the segfault, although it will work for a variable amount of times before causing a crash.

allocate() calls _resize() in a way that basically boils down to:
size_t currentByteLength = (mUsedSize * mOffset);
size_t byteLengthOfNewData = (mOffset * n);
size_t resizeArg = currentByteLength + (byteLengthOfNewData * 2);
_resize(resizeArg);

But _resize() begins with:
size_t newSize = size;
    char* newArr = new (std::nothrow) char[newSize * mOffset]; //multiplying by mOffset again
    //there's no check of (newArr == NULL)

_resize() does not destroy the contents of mFreeList before attempting to use the C&P code from the ctor to reset it.

//This shouldn't happen, since .....
Then it's an error condition and should be handled as such.

Is this subtraction correct?
inline const AllocTypePtr getPtr(unsigned i) const{
  return reinterpret_cast<AllocTypePtr>(reinterpret_cast<char*>(mAllocList.mFirst)
    - (mOffset * i) + sizeof(MemChunk));}

Also, what is the purpose of argument 'n' in allocate()? It's used in the _resize() check but not after that.

Unless I'm quite more confused than usual this means that you have a large block of contiguous memory which contains MemChunk nodes interpolated with single data payloads. If you add the payload to the MemChunk then you can just cast the memory as a MemChunk pointer and use array indexing to fetch nodes. If you plan to have dynamic sizing using 'n' then I don't think you can set up all your nodes during allocation, since you don't know how much space is needed between them. (I don't understand why that's being done anyway.)

(OMG this forum bug... TO THE MOON, ALICE!)
void hurrrrrrrr() {__asm sub [ebp+4],5;}

There are ten kinds of people in this world: those who understand binary and those who don't.

Sorry I'm in a hurry at the moment, but I think STD allocators should be copyable. You don't have copy constuctor.

I think the answer is to have mMemory wrapped in std::shared_ptr.

<blockquote class="ipsBlockquote" data-author="Codarki" data-cid="5016113"><p>Sorry I'm in a hurry at the moment, but I think STD allocators should be copyable. You don't have copy constuctor.<br />&nbsp;<br />I think the answer is to have mMemory wrapped in std::shared_ptr.</p></blockquote><br />They are also supposed to be stateless (except for the new scoped ones in C++11), as in they cannot contain instance data members, as in two different instances (with potentially different states, if they are not stateless as specified) of the same type should always be equal:<pre class="_prettyXprint _lang-code _linenums:NaN">
CustomNamedAllocator&lt;T&gt; a1("Mama");
CustomNamedAllocator&lt;T&gt; a2("Papa");
// This should always be true, according to the standard.
static_assert(a1 == a2, "Mama is not Papa! Surprise!");
</pre><br />Although I am pretty sure that most STL allocation implementations are made state aware (at least MSVC and GCC are), but since none has mentioned a specific compiler, I thought I would bring this to attention.

'ptr' does not point to itself. Even if it did this would not be correct, since the arg is a copy with the same value rather than the same pointer. You need to search out the pointer value manually.


virtual void dealloc(char* ptr, size_t = 0) {
MemChunk *pCurChunk = (MemChunk*) ((char*) ptr - sizeof(MemChunk));

You must have edited this out, but I looked into it and finally made sense of it. After looking it over, I realized that this line of code was going back 8 bytes from 'ptr' but my real intention was to basically subtract 8 [ sizeof (MemChunk* )* 2 ] from the actual address of the pointer. I _think_ I accomplished that, like this.


MemChunk *pCurChunk = (MemChunk*) reinterpret_cast<char*>(reinterpret_cast<ptrdiff_t>(ptr)
- static_cast<ptrdiff_t>(sizeof(MemChunk*) * 2));

Whether this fixed my problem is still yet to be tested extensively, but it passed the quick test I put together for it.

I also took your advice and added a data member to the MemChunk struct which does allow me to use an array index, making things a lot more readable.

Lastly, I realized that the addresses from mFirst to mLast were descending, which was the opposite of what I wanted it to do (which accounts for the weird subtraction in the getPtr() function). To fix that, I made the loop that links the nodes together run backwards (from mMaxSize to 0). That allowed me to directly access the memory in the getPtr function the way I intended.

Does this not strike you as being a little silly?:
		mMemory = new (std::nothrow) char[mMaxSize * mOffset];
		// mMemory = malloc(mMaxSize * mOffset);
		if (!mMemory)
		{
			//Do something more informational here
			throw std::bad_alloc();
			return;
		}
I mean why explicitly use nothrow only to ultimately end up replicating the throw behaviour?
The return is also pointless since it can never be reached.
"In order to understand recursion, you must first understand recursion."
My website dedicated to sorting algorithms

I'm not sure if I should create new topic about this, I don't like creating topics around here.. I experimented with std allocators somewhat.

I did some benchmarks some years ago. It's easy to beat standard allocator (like 400% faster and in 25% memory overhead vs node allocation) if you can restrict to single threaded, and pool in user space.std::new is fast, and its very difficult to create benchmark to measure real usage. delete and repeated realloc is harder to beat. I did bunch of hand crafted free lists for x86 and x64 builds.

The effort, and risk for bugs is so great that IMO for consumer product it's not worth it.

Here's partial spam of the code. The idea I had was to separate allocation and construction, and have one or few shared pooled and paged allocators in user memory space, for std::vector, std::list and others. A lot of hand crafted containers omitted. You could craft multithreaded per thread allocator which would be faster, but I didn't have time to figure the synch for multithreaded deallocation. The point was to separate the allocation and construction of objects.

[source]

#define _HAS_ITERATOR_DEBUGGING 0
#define _SCL_SECURE 0

#include "block_memory.h"
#include "pt/std/map.h"
#include <iostream>

using namespace boost;

namespace pt {

template<typename T>
struct construction
{
construction() {}
construction(int) {}

__forceinline void construct(T* __restrict ptr, T const& t)
{
void* __restrict vptr = ptr;
::new (vptr) T(t);
}
__forceinline void destroy(T* __restrict ptr)
{
ptr->~T();
ptr;
}
};

template<typename T>
struct memory
{
typedef T value_type;
typedef value_type* pointer;
typedef value_type& reference;
typedef value_type const* const_pointer;
typedef value_type const& const_reference;

typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;

inline size_type max_size() const
{
// estimate maximum array size
size_type count = (size_type)(-1) / sizeof(T);
return (0 < count ? count : 1);
}
inline pointer allocate(size_type count)
{
// allocate storage for count elements of type T
return (T*)::operator new(count * sizeof(T));
}
inline void deallocate(pointer ptr, size_type)
{
::operator delete(ptr);
}

inline pointer address(reference r) const
{
return &r;
}
inline const_pointer address(const_reference r) const
{
return &r;
}
};

#include "paged_block_data.h"
#include "pt/boost/shared_ptr.h"

namespace pt {

#define PT_INLINE __forceinline
// Tells the compiler that the function returns an object that will not be
// aliased with any other pointers. Will propagate.
#define PT_RESTRICT_FUNC __declspec(restrict)

class block_memory
{
public: // type definitions
typedef int value_type;
typedef value_type* pointer;
typedef value_type& reference;
typedef value_type const* const_pointer;
typedef value_type const& const_reference;

typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;

static size_t const data_size = sizeof(value_type);

public:
block_memory(shared_ptr<paged_block_data> const& data);
shared_ptr<paged_block_data> const& data() const;

// std::allocator interface
size_type max_size() const;
pointer address(reference r) const;
const_pointer address(const_reference r) const;

template<size_t ElementSize>
PT_INLINE PT_RESTRICT_FUNC pointer allocate_element();
template<>
PT_INLINE PT_RESTRICT_FUNC pointer allocate_element<12>()
{
return reinterpret_cast<pointer>(m_block_data->allocate_16());
}
template<>
PT_INLINE PT_RESTRICT_FUNC pointer allocate_element<16>()
{
return reinterpret_cast<pointer>(m_block_data->allocate_16());
}
template<>
PT_INLINE PT_RESTRICT_FUNC pointer allocate_element<24>()
{
return reinterpret_cast<pointer>(m_block_data->allocate_32());
}

template<size_t ElementSize>
PT_INLINE void deallocate_element(value_type* __restrict ptr);
template<>
PT_INLINE void deallocate_element<12>(value_type* __restrict ptr)
{
m_block_data->deallocate_16(ptr);
}
template<>
PT_INLINE void deallocate_element<16>(value_type* __restrict ptr)
{
m_block_data->deallocate_16(ptr);
}
template<>
PT_INLINE void deallocate_element<24>(value_type* __restrict ptr)
{
m_block_data->deallocate_32(ptr);
}
private: // data members
shared_ptr<paged_block_data> m_block_data;
};

inline block_memory::block_memory(shared_ptr<paged_block_data> const& data)
: m_block_data(data)
{
}

inline shared_ptr<paged_block_data> const& block_memory::data() const
{
return m_block_data;
}

inline block_memory::size_type block_memory::max_size() const
{
// estimate maximum array size
size_type count = (size_type)(-1) / data_size;
return (0 < count ? count : 1);
}

PT_INLINE block_memory::pointer block_memory::address(reference r) const
{
return &r;
}

PT_INLINE block_memory::const_pointer block_memory::address(
const_reference r) const
{
return &r;
}


template<typename T>
struct typed_block_memory : public block_memory
{
typedef T value_type;
typedef value_type* __restrict pointer;
typedef value_type& reference;
typedef value_type const* __restrict const_pointer;
typedef value_type const& const_reference;

typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;

explicit typed_block_memory(shared_ptr<paged_block_data> const& data)
: block_memory(data)
{
}
inline size_type max_size() const
{
// estimate maximum array size
size_type count = (size_type)(-1) / sizeof(value_type);
return (0 < count ? count : 1);
}
inline pointer allocate(size_type count)
{
assert(count == 1); count;
block_memory::pointer ptr
= block_memory::allocate_element<sizeof(T)>();

return reinterpret_cast<pointer>(ptr);
}
inline void deallocate(pointer ptr, size_type count)
{
block_memory::pointer block_ptr
= reinterpret_cast<block_memory::pointer >(ptr);
count;
block_memory::deallocate_element<sizeof(T)>(block_ptr);
}
inline pointer address(reference r) const
{
return &r;
}
inline const_pointer address(const_reference r) const
{
return &r;
}
};

template<typename T>
class node_allocator : public construction<T>, public typed_block_memory<T>
{
public:
typedef T* pointer;

// convert an allocator<T> to an allocator<Other>
template<class Other>
struct rebind
{
typedef node_allocator<Other> other;
};

template<typename U>
node_allocator(node_allocator<U> other)
: typed_block_memory(other.data())
{
}

template<typename C, typename M>
node_allocator(C const& c, M const& m)
: construction(c)
, typed_block_memory(m)
{
}
};

class untyped_node_allocator
{
public:
typedef void* pointer;

// convert an untyped_node_allocator to an allocator<Other>
template<class Other>
struct rebind
{
typedef node_allocator<Other> other;
};

template<typename U>
untyped_node_allocator(node_allocator<U> other)
: m_memory_parameter(other.data())
{
}

template<typename C, typename M>
untyped_node_allocator(C const&, M const& m)
: m_memory_parameter(m)
{
}

template<class Other>
operator node_allocator<Other>()
{
return node_allocator<Other>(0, m_memory_parameter);
}

shared_ptr<pt::paged_block_data> m_memory_parameter;
};

void test_main()
{
shared_ptr<paged_block_data> block_data(new paged_block_data);
block_data->reserve();

untyped_node_allocator untyped_alloc(0, block_data);

std::list<int,untyped_node_allocator> int_test(allocator);

} // namespace pt

[/source]

<p>*edit: deleted bunch of code, becouse its most likely wrong and forum formatting is so ugly.</p>

This topic is closed to new replies.

Advertisement