Jump to content

  • Log In with Google      Sign In   
  • Create Account

Banner advertising on our site currently available from just $5!


1. Learn about the promo. 2. Sign up for GDNet+. 3. Set up your advert!


Aressera

Member Since 25 Mar 2007
Offline Last Active Yesterday, 09:22 PM

#5222483 C++ cant find a match for 16 bit float and how to convert 32 bit float to 16...

Posted by Aressera on 10 April 2015 - 01:35 PM

You may find the following well-commented code useful for converting between half-float and float types while handling special values like infinity and NAN. For me, these functions are wrapped in a HalfFloat class that has all of the standard arithmetic and conversion operators.


/// A static constant for a half float with a value of zero.
static const UInt16 ZERO = 0x0000;


/// A static constant for a half float with a value of not-a-number.
static const UInt16 NOT_A_NUMBER = 0xFFFF;


/// A static constant for a half float with a value of positive infinity.
static const UInt16 POSITIVE_INFINITY = 0x7C00;


/// A static constant for a half float with a value of negative infinity.
static const UInt16 NEGATIVE_INFINITY = 0xFC00;


/// A mask which isolates the sign of a half float number.
static const UInt16 HALF_FLOAT_SIGN_MASK = 0x8000;


/// A mask which isolates the exponent of a half float number.
static const UInt16 HALF_FLOAT_EXPONENT_MASK = 0x7C00;


/// A mask which isolates the significand of a half float number.
static const UInt16 HALF_FLOAT_SIGNIFICAND_MASK = 0x03FF;


/// A mask which isolates the sign of a single precision float number.
static const UInt32 FLOAT_SIGN_MASK = 0x80000000;


/// A mask which isolates the exponent of a single precision float number.
static const UInt32 FLOAT_EXPONENT_MASK = 0x7F800000;


/// A mask which isolates the significand of a single precision float number.
static const UInt32 FLOAT_SIGNIFICAND_MASK = 0x007FFFFF;


/// Convert the specified single precision float number to a half precision float number.
static UInt16 floatToHalfFloat( Float floatValue )
{
	// Catch special case floating point values.
	if ( math::isNAN( floatValue ) )
		return NOT_A_NUMBER;
	else if ( math::isInfinity( floatValue ) )
		return POSITIVE_INFINITY;
	else if ( math::isNegativeInfinity( floatValue ) )
		return NEGATIVE_INFINITY;
	
	UInt32 value = *((UInt32*)&floatValue);
	
	if ( floatValue == Float(0) )
		return UInt16( value >> 16 );
	else
	{
		// Start by computing the significand in half precision format.
		UInt16 output = UInt16((value & FLOAT_SIGNIFICAND_MASK) >> 13);
		
		register UInt32 exponent = ((value & FLOAT_EXPONENT_MASK) >> 23);
		
		// Check for subnormal numbers.
		if ( exponent != 0 )
		{
			// Check for overflow when converting large numbers, returning positive or negative infinity.
			if ( exponent > 142 )
				return UInt16((value & FLOAT_SIGN_MASK) >> 16) | UInt16(0x7C00);
			
			// Add the exponent of the half float, converting the offset binary formats of the representations.
			output |= (((exponent - 112) << 10) & HALF_FLOAT_EXPONENT_MASK);
		}
		
		// Add the sign bit.
		output |= UInt16((value & FLOAT_SIGN_MASK) >> 16);
		
		return output;
	}
}




/// Convert the specified half float number to a single precision float number.
static Float halfFloatToFloat( UInt16 halfFloat )
{
	// Catch special case half floating point values.
	switch ( halfFloat )
	{
		case NOT_A_NUMBER:
			return math::nan<Float>();
		case POSITIVE_INFINITY:
			return math::infinity<Float>();
		case NEGATIVE_INFINITY:
			return math::negativeInfinity<Float>();
	}
	
	// Start by computing the significand in single precision format.
	UInt32 value = UInt32(halfFloat & HALF_FLOAT_SIGNIFICAND_MASK) << 13;
	
	register UInt32 exponent = UInt32(halfFloat & HALF_FLOAT_EXPONENT_MASK) >> 10;
	
	if ( exponent != 0 )
	{
		// Add the exponent of the float, converting the offset binary formats of the representations.
		value |= (((exponent - 15 + 127) << 23) & FLOAT_EXPONENT_MASK);
	}
	
	// Add the sign bit.
	value |= UInt32(halfFloat & HALF_FLOAT_SIGN_MASK) << 16;
	
	return *((Float*)&value);
}




#5221010 check it out, real-time opencl ray tracer

Posted by Aressera on 02 April 2015 - 03:59 PM

Nice, now try in a non-trivial scene like Sponza, Sibenik, etc.




#5213396 Ray vs Sphere Issue

Posted by Aressera on 27 February 2015 - 04:01 PM

You need to check the values of d and d2 (in your code) to use the smallest one, these are distance along the ray's direction to the intersection. If the smallest one is negative, there is no intersection since the sphere is behind.

 

Something like this (note line "if ( t1 < T(0) )"):

template < typename T >
Bool Ray3D<T>:: intersectsSphere( const Vector3D<T>& position, T radius, T& distance ) const
{
	Vector3D<T> d = position - origin;
	T dSquared = d.getMagnitudeSquared();
	T rSquared = radius*radius;
	
	if ( dSquared < rSquared )
	{
		// The ray starts inside the sphere and so there is an intersection.
		distance = T(0);
		return true;
	}
	else
	{
		// Find the closest point on the ray to the sphere's center.
		T t1 = math::dot( d, direction );
		
		if ( t1 < T(0) )
		{
			// The ray points away from the sphere so there is no intersection.
			return false;
		}
		
		// Find the distance from the closest point to the sphere's surface.
		T t2Squared = rSquared - dSquared + t1*t1;
		
		if ( t2Squared < T(0) )
			return false;
		
		// Compute the distance along the ray of the intersection.
		distance = t1 - math::sqrt(t2Squared);
		
		return true;
	}
}



#5212961 Handling of modifier keys

Posted by Aressera on 25 February 2015 - 02:52 PM

A far simpler way to handle this behavior is to handle key events as just that - raw keyboard input. There shouldn't be any sort of translation of modifier keys done in the input system, that is application-level behavior. The input system collects the raw key presses/releases/repeats from the operating system and then forwards them to other systems via callbacks or events.

 

To handle modifiers, you just keep track of the current state of every key on the keyboard (or mouse or gamepad). This state is updated by the raw events sent by the input system (e.g. Shift key event, then Q key event).

 

Whenever you wish to handle a key action that can have a modifier (such as pressing Q), you check to see what the current known state of the Shift key is. If pressed, you handle the modified action or not. This is the job of the input translation system that translates from raw events to high-level actions dependent on the current application or game state.




#5210912 Disassociate mouse with its position

Posted by Aressera on 15 February 2015 - 08:43 PM

Raw input is the way to go to emulate the OS X way of handling mouse deltas, it's what I have done in my engine/GUI system to have parity on windows.




#5203211 OpenGL and OO approaches

Posted by Aressera on 09 January 2015 - 09:15 PM

I also do this in my engine as an API abstraction layer and to handle RAII. I have a class for Texture, Buffer, Framebuffer, Shader, ShaderPipeline, and Context that wraps the global context state (e.g. viewport, scissor test, bound framebuffers) and handles creation of the other resource classes. These resource classes provide a minimal simplified interface to all of the functionality for each object type. Right now it's full of virtual functions, but they are not called during rendering, only when modifying resources.

 

I've tried to hide as much of the state-machine aspect of OpenGL as possible because this is at odds with the other APIs and how the hardware works. The resource classes wrap the different resource types and the Context handles submitting rendering commands. There are no parts of OpenGL or other APIs exposed in header files to the end user. The rest of my rendering framework uses the base resource interfaces to implement more complex things like meshes that are composed of buffers, textures, and shaders. Meshes (and other renderable shapes) decide how they should be rendered and submit rendering commands to a queue that can be sorted and rendered by iterating through it and passing the data to the Context to be rendered.




#5203209 How can sample rate be 44000Hz?

Posted by Aressera on 09 January 2015 - 08:57 PM

You only need 2 samples to produce a sine-like oscillation, hence the Nyquist theorem. Imagine alternating -1 and 1 samples. The sample rate is the sample rate.

 

Most humans can't hear above around 15-16kHz once you're an adult. Only the very young can hear up to 20kHz. The extra frequency range is there so that there is some extra headroom above what is audible. This is needed for low-pass filtering when converting from higher sample rates or when low-pass filtering in the digital to analogue converters.




#5202112 a better fix your timestep?

Posted by Aressera on 05 January 2015 - 06:53 PM

So after player B at 15 FPS has updated 80 times (80 accumulated tiny errors), player A has updated 320 times, and has 4 times as many accumulation errors. Player B’s rocket is more correct than player A’s. Because Brazil.

 

Not true! Player B has more truncation error, and so the solution of player A for the rocket's differential equation is probably more accurate than player B, even though player A has performed more numerical computations. The accumulation error is several orders of magnitude smaller than those resulting from a 4x increase in timestep (the order of error depends on the order of the diff eq. governing the rocket's path). At best the error is O(dt^2) for first-order differential equations, so 16x worse local truncation error for player B!




#5200890 Bizarre 60x slowdown with DSP filter

Posted by Aressera on 30 December 2014 - 05:18 PM

How would I go about doing that?

 

Edit: Like this? Seems to work! Could you explain why the denormalized numbers cause a slowdown? (and if there is any general use cases where I should watch out for their effects)


// Sanitize the history.
for ( Index i = 0; i < numFilterSets; i++ )
{
	FilterHistory& history = localHistory.filters[i];
	
	for ( Index j = 0; j < 4; j++ )
	{
		history.input[j] = math::select( math::abs(history.input[j]) < SIMDType(math::epsilon<Float>()),
											SIMDType(Float32(0)), history.input[j] );
		history.output[j] = math::select( math::abs(history.output[j]) < SIMDType(math::epsilon<Float>()),
											SIMDType(Float32(0)), history.output[j] );
	}
}




#5192737 Calculate volume of non-convex polyhedron

Posted by Aressera on 13 November 2014 - 04:39 PM

I don't think it will work with intersecting triangles either (i.e. self-intersecting volume). In the worst case you can voxelize your mesh and use the total number of filled voxels as an approximation of the volume.




#5178192 my SIMD implementation is very slow :(

Posted by Aressera on 04 September 2014 - 07:21 PM

Check out this code for my SIMD ray tracer. It will show you how to do the traversal and intersection tests fast. (also see Embree stuff, that's the reference I used). The important thing to note is that rather than tracing a ray packet vs 1 AABB, you flatten the tree and intersect 1 ray (4-wide) with 4 child AABBs at once. This is great for incoherent rays, since there is no logic needed to handle splitting up ray packets. My code provides a generic interface for arbitrary primitives, and also can cache certain primitive types (i.e. triangles) for faster access and better storage for SIMD. This code gets me around 10 million incoherent rays/s on an i7 4770k with 8 threads in a scene with 80k triangles (sibenik cathedral model).

//##########################################################################################
//##########################################################################################
//############		
//############		Fat SIMD Ray Class Declaration
//############		
//##########################################################################################
//##########################################################################################




class RIM_ALIGN(16) AABBTree4:: FatSIMDRay : public math::SIMDRay3D<Float32,4>
{
	public:
		
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Constructors
			
			
			
			
			RIM_INLINE FatSIMDRay( const Ray3f& ray )
				:	math::SIMDRay3D<Float32,4>( ray ),
					inverseDirection( Float32(1) / ray.direction )
			{
				sign[0] = ray.direction.x < Float32(0);
				sign[1] = ray.direction.y < Float32(0);
				sign[2] = ray.direction.z < Float32(0);
			}
			
			
			
			
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Public Data Members
			
			
			
			
			/// The inverse of the direction vector of this SIMD Ray.
			SIMDVector3f inverseDirection;
			
			
			
			
			/// Indices of the sign of the ray's direction along the 3 axes: 0 for positive, 1 for negative.
			/**
			  * The axes are enumerated: 0 = X, 1 = Y, 2 = Z.
			  */
			Index sign[3];
			
			
			
			
};




//##########################################################################################
//##########################################################################################
//############		
//############		Node Class Declaration
//############		
//##########################################################################################
//##########################################################################################




class RIM_ALIGN(128) AABBTree4:: Node
{
	public:
		
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Constructors
			
			
			
			
			/// Create a new inner node with the specified child offsets and child AABBs.
			RIM_FORCE_INLINE Node( IndexType child0, IndexType child1, IndexType child2, IndexType child3,
									const StaticArray<AABB3f,4>& newAABB )
			{
				for ( Index i = 0; i < 4; i++ )
					setChildAABB( i, newAABB[i] );
				
				child[0] = this + child0;
				child[1] = this + child1;
				child[2] = this + child2;
				child[3] = this + child3;
			}
			
			
			
			
			/// Create a new leaf node for the specified primitive offset and primitive count.
			RIM_FORCE_INLINE Node( IndexType primitiveOffset, IndexType primitiveCount )
			{
				indices[0] = 0;
				indices[1] = primitiveOffset;
				indices[2] = primitiveCount;
			}
			
			
			
			
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Leaf Node Attribute Accessor Methods
			
			
			
			
			/// Return whether or not this is a leaf node.
			RIM_FORCE_INLINE Bool isLeaf() const
			{
				return indices[0] == 0;
			}
			
			
			
			
			/// Return the offset in the primitive array of this leaf node's primitives.
			RIM_FORCE_INLINE IndexType getPrimitiveOffset() const
			{
				return indices[1];
			}
			
			
			
			
			/// Set the offset in the primitive array of this leaf node's primitives.
			RIM_FORCE_INLINE void setPrimitiveOffset( IndexType newOffset )
			{
				indices[1] = newOffset;
			}
			
			
			
			
			/// Return the number of primitives that are part of this leaf node.
			RIM_FORCE_INLINE IndexType getPrimitiveCount() const
			{
				return indices[2];
			}
			
			
			
			
			/// Set the number of primitives that are part of this leaf node.
			RIM_FORCE_INLINE void setPrimitiveCount( IndexType newCount )
			{
				indices[2] = newCount;
			}
			
			
			
			
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Child Accessor Methods
			
			
			
			
			/// Return a pointer to the child
			RIM_FORCE_INLINE Node* getChild( Index i )
			{
				return child[i];
			}
			
			
			
			
			RIM_FORCE_INLINE const Node* getChild( Index i ) const
			{
				return child[i];
			}
			
			
			
			
			RIM_FORCE_INLINE void setChildAABB( Index i, const AABB3f& newAABB )
			{
				bounds[0][i] = newAABB.min.x;
				bounds[1][i] = newAABB.max.x;
				bounds[2][i] = newAABB.min.y;
				bounds[3][i] = newAABB.max.y;
				bounds[4][i] = newAABB.min.z;
				bounds[5][i] = newAABB.max.z;
			}
			
			
			
			
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Ray Intersection Methods
			
			
			
			
			RIM_FORCE_INLINE void intersectRay( const FatSIMDRay& ray, SIMDFloat4& near, SIMDFloat4& far ) const
			{
				SIMDFloat4 txmin = (bounds[0 + ray.sign[0]] - ray.origin.x) * ray.inverseDirection.x;
				SIMDFloat4 txmax = (bounds[1 - ray.sign[0]] - ray.origin.x) * ray.inverseDirection.x;
				SIMDFloat4 tymin = (bounds[2 + ray.sign[1]] - ray.origin.y) * ray.inverseDirection.y;
				SIMDFloat4 tymax = (bounds[3 - ray.sign[1]] - ray.origin.y) * ray.inverseDirection.y;
				SIMDFloat4 tzmin = (bounds[4 + ray.sign[2]] - ray.origin.z) * ray.inverseDirection.z;
				SIMDFloat4 tzmax = (bounds[5 - ray.sign[2]] - ray.origin.z) * ray.inverseDirection.z;
				
				const SIMDFloat4 zero( 0.0f );
				const SIMDFloat4 negativeInfinity( math::negativeInfinity<float>() );
				
				near = math::max( math::max( txmin, tymin ), math::max( tzmin, zero ) );
				far = math::max( math::min( math::min( txmax, tymax ), tzmax ), negativeInfinity );
			}
			
			
			
			
		//********************************************************************************
		//********************************************************************************
		//********************************************************************************
		//******	Public Data Members
			
			
			
			/// A set of 4 SIMD axis-aligned bounding boxes for this quad node.
			/**
			  * The bounding boxes are stored in the following format:
			  *	- 0: xMin
			  * - 1: xMax
			  * - 2: yMin
			  * - 3: yMax
			  * - 4: zMin
			  * - 5: zMax
			  */
			SIMDFloat4 bounds[6];
			
			
			
			
			/// The indices of the first 3 child nodes of this node.
			/**
			  * By convention, the last child (index == 3) is always the next node after this one,
			  * so its index is not stored.
			  */
			union
			{
				Node* child[4];
				
				IndexType indices[4];
			};
			
			
			
};




//##########################################################################################
//##########################################################################################
//############		
//############		Primitive AABB Class Declaration
//############		
//##########################################################################################
//##########################################################################################




class RIM_ALIGN(16) AABBTree4:: PrimitiveAABB
{
	public:
		
		RIM_FORCE_INLINE PrimitiveAABB( const AABB3f& aabb, Index newPrimitiveIndex )
			:	min( aabb.min ),
				max( aabb.max ),
				primitiveIndex( newPrimitiveIndex )
		{
			centroid = (min + max)*Float(0.5);
		}
		
		
		
		
		/// The minimum coordinate of the primitive's axis-aligned bounding box.
		RIM_ALIGN(16) SIMDFloat4 min;
		
		
		
		
		/// The maximum coordinate of the primitive's axis-aligned bounding box.
		RIM_ALIGN(16) SIMDFloat4 max;
		
		
		
		
		/// The centroid of the primitive's axis-aligned bounding box.
		RIM_ALIGN(16) SIMDFloat4 centroid;
		
		
		
		
		/// The index of this primitive in the primitive set.
		Index primitiveIndex;
		
		
		
};




//##########################################################################################
//##########################################################################################
//############		
//############		Split Bin Class Declaration
//############		
//##########################################################################################
//##########################################################################################




class RIM_ALIGN(16) AABBTree4:: SplitBin
{
	public:
		
		RIM_INLINE SplitBin()
			:	min( math::max<Float32>() ),
				max( math::min<Float32>() ),
				numPrimitives( 0 )
		{
		}
		
		RIM_ALIGN(16) SIMDFloat4 min;
		RIM_ALIGN(16) SIMDFloat4 max;
		
		
		Size numPrimitives;
		
};




//##########################################################################################
//##########################################################################################
//############		
//############		Cached Triangle Class Declaration
//############		
//##########################################################################################
//##########################################################################################




class RIM_ALIGN(16) AABBTree4:: CachedTriangle
{
	public:
		
		RIM_INLINE CachedTriangle( const SIMDVector3f& newV0,
									const SIMDVector3f& newE1,
									const SIMDVector3f& newE2,
									const StaticArray<IndexType,4>& newIndices  )
			:	v0( newV0 ),
				e1( newE1 ),
				e2( newE2 )
		{
			indices[0] = newIndices[0];
			indices[1] = newIndices[1];
			indices[2] = newIndices[2];
			indices[3] = newIndices[3];
		}
		
		/// The vertex of this triangle with index 0.
		SIMDVector3f v0;
		
		/// The edge vector between vertex 0 and vertex 1.
		SIMDVector3f e1;
		
		/// The edge vector between vertex 0 and vertex 2.
		SIMDVector3f e2;
		
		/// The indices of the 4 packed triangles.
		IndexType indices[4];
		
		
};




//##########################################################################################
//##########################################################################################
//############		
//############		Constructors
//############		
//##########################################################################################
//##########################################################################################




AABBTree4:: AABBTree4()
	:	nodes( NULL ),
		primitiveData( NULL ),
		primitiveDataCapacity( 0 ),
		primitiveSet( NULL ),
		cachedPrimitiveType( PrimitiveInterfaceType::UNDEFINED ),
		numPrimitives( 0 ),
		numNodes( 0 ),
		maxDepth( 0 ),
		maxNumPrimitivesPerLeaf( DEFAULT_MAX_PRIMITIVES_PER_LEAF ),
		numSplitCandidates( DEFAULT_NUM_SPLIT_CANDIDATES )
{
}




AABBTree4:: AABBTree4( const Pointer<const PrimitiveInterface>& newPrimitives )
	:	nodes( NULL ),
		primitiveData( NULL ),
		primitiveDataCapacity( 0 ),
		primitiveSet( newPrimitives ),
		cachedPrimitiveType( PrimitiveInterfaceType::UNDEFINED ),
		numPrimitives( 0 ),
		numNodes( 0 ),
		maxDepth( 0 ),
		maxNumPrimitivesPerLeaf( DEFAULT_MAX_PRIMITIVES_PER_LEAF ),
		numSplitCandidates( DEFAULT_NUM_SPLIT_CANDIDATES )
{
}




AABBTree4:: AABBTree4( const AABBTree4& other )
	:	primitiveSet( other.primitiveSet ),
		cachedPrimitiveType( other.cachedPrimitiveType ),
		numPrimitives( other.numPrimitives ),
		numNodes( other.numNodes ),
		maxDepth( other.maxDepth ),
		maxNumPrimitivesPerLeaf( other.maxNumPrimitivesPerLeaf ),
		numSplitCandidates( other.numSplitCandidates )
{
	if ( numNodes > 0 )
		nodes = util::copyArrayAligned( other.nodes, other.numNodes, sizeof(Node) );
	else
		nodes = NULL;
	
	if ( numPrimitives > 0 )
		primitiveData = other.copyPrimitiveData( primitiveDataCapacity );
	else
	{
		primitiveData = NULL;
		primitiveDataCapacity = 0;
	}
}




//##########################################################################################
//##########################################################################################
//############		
//############		Destructor
//############		
//##########################################################################################
//##########################################################################################




AABBTree4:: ~AABBTree4()
{
	if ( nodes )
		util::deallocateAligned( nodes );
	
	if ( primitiveData )
		util::deallocateAligned( primitiveData );
}




//##########################################################################################
//##########################################################################################
//############		
//############		Assignment Operator
//############		
//##########################################################################################
//##########################################################################################





AABBTree4& AABBTree4:: operator = ( const AABBTree4& other )
{
	if ( this != &other )
	{
		if ( numNodes < other.numNodes )
		{
			if ( nodes )
				util::deallocateAligned( nodes );
			
			nodes = util::copyArrayAligned( other.nodes, other.numNodes, sizeof(Node) );
		}
		else if ( other.numNodes > 0 )
			rim::util::copy( nodes, other.nodes, other.numNodes );
		
		if ( primitiveData )
				util::deallocateAligned( primitiveData );
		
		if ( other.numPrimitives > 0 )
			primitiveData = other.copyPrimitiveData( primitiveDataCapacity );
		else
		{
			primitiveData = NULL;
			primitiveDataCapacity = 0;
		}
		
		primitiveSet = other.primitiveSet;
		numPrimitives = other.numPrimitives;
		numNodes = other.numNodes;
		maxDepth = other.maxDepth;
		maxNumPrimitivesPerLeaf = other.maxNumPrimitivesPerLeaf;
		numSplitCandidates = other.numSplitCandidates;
	}
	
	return *this;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Primitive Accessor Methods
//############		
//##########################################################################################
//##########################################################################################




void AABBTree4:: setPrimitives( const Pointer<const PrimitiveInterface>& newPrimitives )
{
	primitiveSet = newPrimitives;
	
	// Set the number of nodes and primitives to 0 to signal that the BVH needs to be rebuilt.
	numNodes = 0;
	numPrimitives = 0;
}




const Pointer<const PrimitiveInterface>& AABBTree4:: getPrimitives() const
{
	return primitiveSet;
}




//##########################################################################################
//##########################################################################################
//############		
//############		BVH Attribute Accessor Methods
//############		
//##########################################################################################
//##########################################################################################




Size AABBTree4:: getMaxDepth() const
{
	return maxDepth;
}





Bool AABBTree4:: isValid() const
{
	return numNodes > 0;
}




//##########################################################################################
//##########################################################################################
//############		
//############		BVH Building Methods
//############		
//##########################################################################################
//##########################################################################################




void AABBTree4:: rebuild()
{
	const Size newNumPrimitives = primitiveSet->getSize();
	
	// Don't build the tree if there are no primitives.
	if ( primitiveSet.isNull() || newNumPrimitives == 0 )
		return;
	
	//**************************************************************************************
	
	// Allocate an array to hold the list of TriangleAABB primitives.
	PrimitiveAABB* primitiveAABBs = rim::util::allocateAligned<PrimitiveAABB>( newNumPrimitives, 16 );
	
	// Initialize all PrimitiveAABB primitives with the primitives for this tree.
	for ( Index i = 0; i < newNumPrimitives; i++ )
		new (primitiveAABBs + i) PrimitiveAABB( primitiveSet->getAABB(i), i );
	
	//**************************************************************************************
	
	Size numSplitBins = numSplitCandidates + 1;
	
	// Allocate a temporary array to hold the split bins.
	SplitBin* splitBins = rim::util::allocateAligned<SplitBin>( numSplitBins, 16 );
	
	//**************************************************************************************
	
	// Compute the number of nodes needed for this tree.
	Size newNumNodes = newNumPrimitives*Size(2) - 1;
	
	// Allocate space for the nodes in this tree.
	if ( newNumNodes > numNodes )
	{
		if ( nodes )
			rim::util::deallocateAligned( nodes );
		
		nodes = rim::util::allocateAligned<Node>( newNumNodes, sizeof(Node) );
	}
	
	numNodes = newNumNodes;
	
	// Build the tree, starting with the root node.
	buildTreeRecursive( nodes, primitiveAABBs, 0, newNumPrimitives,
						splitBins, numSplitBins, maxNumPrimitivesPerLeaf, 1, maxDepth );
	
	//**************************************************************************************
	
	// Determine if the BVH should cache the primitives based on their type.
	numPrimitives = newNumPrimitives;
	Size newPrimitiveDataSize = 0;
	
	switch ( primitiveSet->getType() )
	{
		case PrimitiveInterfaceType::TRIANGLES:
			newPrimitiveDataSize = getTriangleArraySize( nodes )*sizeof(CachedTriangle);
			break;
		default:
			newPrimitiveDataSize = numPrimitives*sizeof(Index);
			break;
	}
	
	// Allocate an array to hold the primitive data.
	if ( newPrimitiveDataSize > primitiveDataCapacity )
	{
		if ( primitiveData )
			rim::util::deallocateAligned( primitiveData );
		
		primitiveData = rim::util::allocateAligned<UByte>( newPrimitiveDataSize, 16 );
		primitiveDataCapacity = newPrimitiveDataSize;
	}
	
	// Copy the current order of the TriangleAABB list into the tree's list of primitive pointers.
	switch ( primitiveSet->getType() )
	{
		case PrimitiveInterfaceType::TRIANGLES:
			fillTriangleArray( (CachedTriangle*)primitiveData, primitiveSet, primitiveAABBs, nodes, 0 );
			cachedPrimitiveType = PrimitiveInterfaceType::TRIANGLES;
			break;
			
		default:
			fillPrimitiveIndices( (Index*)primitiveData, primitiveAABBs, numPrimitives );
			cachedPrimitiveType = PrimitiveInterfaceType::UNDEFINED;
			break;
	}
	
	//**************************************************************************************
	// Clean up the temporary arrays of TriangleAABB primitives and split bins.
	
	rim::util::deallocateAligned( primitiveAABBs );
	rim::util::deallocateAligned( splitBins );
}




void AABBTree4:: refit()
{
	if ( numNodes == 0 )
		return;
	
	// If the number or type of primitives has changed, rebuild the tree instead.
	if ( numPrimitives != primitiveSet->getSize() || cachedPrimitiveType != primitiveSet->getType() )
	{
		this->rebuild();
		return;
	}
	
	// Refit the tree for different kinds of primitives.
	switch ( cachedPrimitiveType )
	{
		case PrimitiveInterfaceType::TRIANGLES:
			this->refitTreeTriangles( nodes );
			break;
		default:
			this->refitTreeGeneric( nodes );
	}
}




//##########################################################################################
//##########################################################################################
//############		
//############		Ray Tracing Methods
//############		
//##########################################################################################
//##########################################################################################




static unsigned int bitCount( unsigned int mask )
{
#if defined(RIM_COMPILER_GCC)
	return __builtin_popcount( mask );
#elif defined(RIM_COMPILER_MSVC)
	return __popcnt( mask );
#else
	mask = mask - ((mask >> 1) & 0x55555555);
	mask = (mask & 0x33333333) + ((mask >> 2) & 0x33333333);
	return (((mask + (mask >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
#endif
}




static unsigned long firstSetBit( unsigned long mask )
{
#if defined(RIM_COMPILER_GCC)
	return __builtin_ctz( mask );
#elif defined(RIM_COMPILER_MSVC)
	unsigned long index;
	_BitScanForward( &index, mask );
	return index;
#else
	#error
#endif
}




static Int minIndex( const SIMDFloat4& x )
{
	const SIMDInt4 indices1( 0, 1, 2, 3 );
	const SIMDInt4 indices2( 2, 3, 0, 1 );
	
	// Shuffle the value once to find the minimum of 0 & 2, 1 & 3.
	SIMDFloat4 x2 = math::shuffle<2,3,0,1>( x );
	
	// Determine the indices of the values which are the minimum of 0 & 2, 1 & 3.
	SIMDInt4 indices3 = math::select( x < x2, indices1, indices2 );
	
	// Find the minimum of 0 & 2, 1 & 3.
	x2 = math::min( x, x2 );
	
	// Shuffle the values again to determine the minimum value.
	SIMDFloat4 x3 = math::shuffle<1,0,3,2>( x2 );
	
	// Compute the index of the closest intersection.
	SIMDInt4 minimumIndex = math::select( x2 < x3, indices3, math::shuffle<1,0,3,2>( indices3 ) );
	
	return minimumIndex[0];
}




static Int minIndex( const SIMDFloat4& x, SIMDFloat4& wideMin )
{
	const SIMDInt4 indices1( 0, 1, 2, 3 );
	const SIMDInt4 indices2( 2, 3, 0, 1 );
	
	// Shuffle the value once to find the minimum of 0 & 2, 1 & 3.
	SIMDFloat4 x2 = math::shuffle<2,3,0,1>( x );
	
	// Determine the indices of the values which are the minimum of 0 & 2, 1 & 3.
	SIMDInt4 indices3 = math::select( x < x2, indices1, indices2 );
	
	// Find the minimum of 0 & 2, 1 & 3.
	x2 = math::min( x, x2 );
	
	// Shuffle the values again to determine the minimum value.
	SIMDFloat4 x3 = math::shuffle<1,0,3,2>( x2 );
	
	// Compute the index of the closest intersection.
	SIMDInt4 minimumIndex = math::select( x2 < x3, indices3, math::shuffle<1,0,3,2>( indices3 ) );
	
	// Compute a 4-wide vector of the minimum value.
	wideMin = math::min( x2, x3 );
	
	return minimumIndex[0];
}




Bool AABBTree4:: traceRay( const Ray3f& newRay, Float maxDistance, TraversalStack& traversalStack,
							Float& closestIntersection, Index& closestPrimitive ) const
{
	if ( numNodes == 0 )
		return false;
	
	switch ( cachedPrimitiveType )
	{
		case PrimitiveInterfaceType::TRIANGLES:
			return this->traceRayVsTriangles( newRay, maxDistance, traversalStack, closestIntersection, closestPrimitive );
		
		default:
			return this->traceRayVsGeneric( newRay, maxDistance, traversalStack, closestIntersection, closestPrimitive );
	}
	
	return false;
}




Bool AABBTree4:: traceRay( const Ray3f& ray, Float maxDistance, TraversalStack& stack ) const
{
	Float d;
	Index primitiveIndex;
	
	return traceRay( ray, maxDistance, stack, d, primitiveIndex );
}




//##########################################################################################
//##########################################################################################
//############		
//############		Generic Ray Tracing Method
//############		
//##########################################################################################
//##########################################################################################




Bool AABBTree4:: traceRayVsGeneric( const Ray3f& newRay, Float maxDistance, TraversalStack& traversalStack,
									Float& closestIntersection, Index& closestPrimitive ) const
{
	const void** stackBase = traversalStack.getRoot();
	const void** stack = stackBase + 1;
	*stack = nodes;
	
	const PrimitiveInterface* const primitives = primitiveSet;
	const Index* const indices = (const Index*)primitiveData;
	
	FatSIMDRay ray( newRay );
	closestIntersection = maxDistance;
	SIMDFloat4 closestDistance( maxDistance );
	SIMDFloat4 near;
	SIMDFloat4 far;
	Float primitiveDistance;
	Index closestPrimitiveIndex;
	const Node* node = nodes;
	
	while ( true )
	{
		nextNode:
		
		if ( node->isLeaf() )
		{
			if ( primitives->intersectRay( indices + node->getPrimitiveOffset(),
											node->getPrimitiveCount(), newRay,
											primitiveDistance, closestPrimitiveIndex ) &&
				primitiveDistance < closestIntersection )
			{
				closestIntersection = primitiveDistance;
				closestDistance = SIMDFloat4( primitiveDistance );
				closestPrimitive = closestPrimitiveIndex;
			}
		}
		else
		{
			node->intersectRay( ray, near, far );
			
			SIMDBool4 intersectionResult = (near <= far) & (near < closestDistance);
			Int mask = intersectionResult.getMask();
			
			switch ( mask )
			{
				// No hits. Backtrack on the stack.
				case 0:
					break;
				
				// 1 Hit. Replace the current node with the hit child.
				case 1 << 0:	node = node->getChild(0);	goto nextNode;
				case 1 << 1:	node = node->getChild(1);	goto nextNode;
				case 1 << 2:	node = node->getChild(2);	goto nextNode;
				case 1 << 3:	node = node->getChild(3);	goto nextNode;
				
				// 2 Hits.
				case 3: // 0011
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[1] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(1);
					}
					else
					{
						*stack = node->getChild(1);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 5: // 0101
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[2] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(2);
					}
					else
					{
						*stack = node->getChild(2);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 6: // 0110
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[2] < near[1] )
					{
						*stack = node->getChild(1);
						node = node->getChild(2);
					}
					else
					{
						*stack = node->getChild(2);
						node = node->getChild(1);
					}
					goto nextNode;
				}
				
				case 9: // 1001
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 10: // 1010
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[1] )
					{
						*stack = node->getChild(1);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(1);
					}
					goto nextNode;
				}
				
				case 12: // 1100
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[2] )
					{
						*stack = node->getChild(2);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(2);
					}
					goto nextNode;
				}
				
				
				default:
				{
					// There are more than 2 hit children.
					// Determine the index of the closest hit child.
					Int closestChildIndex = minIndex( math::select( intersectionResult, near, closestDistance ) );
					
					// Clear the bit of the closest hit child.
					mask &= ~(1 << closestChildIndex);
					
					//****************************************************
					// Second hit.
					
					Int i = firstSetBit( mask );
					
					// Put the child onto the stack.
					stack++;
					*stack = node->getChild(i);
					
					// Clear the bit.
					mask &= ~(1 << i);
					
					//****************************************************
					// Third hit.
					
					i = firstSetBit( mask );
					
					// Put the child onto the stack.
					stack++;
					*stack = node->getChild(i);
					
					// Clear the bit.
					mask &= ~(1 << i);
					
					//****************************************************
					// Fourth hit, if necessary.
					
					if ( mask )
					{
						i = firstSetBit( mask );
						
						// Put the child onto the stack.
						stack++;
						*stack = node->getChild(i);
						
						// Clear the bit.
						mask &= ~(1 << i);
					}
					
					// Determine the next node to traverse.
					node = node->getChild(closestChildIndex);
					goto nextNode;
				}
			}
		}
		
		node = (const Node*)*stack;
		stack--;
		
		if ( stack == stackBase )
			break;
	}
	
	// If the distance is less than the maximum distance which we started with, there was an intersection.
	return closestIntersection < maxDistance;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Triangle Ray Tracing Method
//############		
//##########################################################################################
//##########################################################################################




Bool AABBTree4:: traceRayVsTriangles( const Ray3f& newRay, Float maxDistance, TraversalStack& traversalStack,
									Float& closestIntersection, Index& closestPrimitive ) const
{
	const void** stackBase = traversalStack.getRoot();
	const void** stack = stackBase + 1;
	*stack = nodes;
	
	const CachedTriangle* const triangles = (const CachedTriangle*)primitiveData;
	
	FatSIMDRay ray( newRay );
	closestIntersection = maxDistance;
	SIMDFloat4 closestDistance( maxDistance );
	SIMDFloat4 near;
	SIMDFloat4 far;
	SIMDFloat4 triangleDistance;
	const Node* node = nodes;
	
	while ( true )
	{
		nextNode:
		
		if ( node->isLeaf() )
		{
			const IndexType numNodePrimitives = node->getPrimitiveCount();
			
			if ( numNodePrimitives == 1 )
			{
				// Fast case for a single quad triangle.
				const CachedTriangle* triangle = triangles + node->getPrimitiveOffset();
				
				// Find the intersections.
				SIMDBool4 triangleMask = rayIntersectsTriangles( ray, *triangle, triangleDistance );
				triangleMask &= (triangleDistance < closestDistance);
				
				// Find the closest intersection index if there was an intersection.
				if ( triangleMask.getMask() )
				{
					// Set all non-intersecting triangles to have a very large distance
					// so that they won't affect the closest intersection computation.
					triangleDistance = math::select( triangleMask, triangleDistance, SIMDFloat4(math::max<Float>()) );
					
					Int minTIndex = minIndex( triangleDistance, closestDistance );
					
					closestPrimitive = triangle->indices[minTIndex];
					closestIntersection = closestDistance[minTIndex];
				}
			}
			else
			{
				// General case for many triangles.
				const CachedTriangle* triangle = triangles + node->getPrimitiveOffset();
				const CachedTriangle* const trianglesEnd = triangle + numNodePrimitives;
				
				while ( triangle != trianglesEnd )
				{
					// Compute the intersection distance for all 4 triangles.
					SIMDBool4 triangleMask = rayIntersectsTriangles( ray, *triangle, triangleDistance );
					triangleMask &= (triangleDistance < closestDistance);
					
					// Find the closest intersection index if there was an intersection.
					if ( triangleMask.getMask() )
					{
						// Set all non-intersecting triangles to have a very large distance
						// so that they won't affect the closest intersection computation.
						triangleDistance = math::select( triangleMask, triangleDistance, SIMDFloat4(math::max<Float>()) );
						
						Int minTIndex = minIndex( triangleDistance, closestDistance );
						
						closestPrimitive = triangle->indices[minTIndex];
						closestIntersection = closestDistance[minTIndex];
					}
					
					triangle++;
				}
			}
		}
		else
		{
			node->intersectRay( ray, near, far );
			
			SIMDBool4 intersectionResult = (near <= far) & (near < closestDistance);
			Int mask = intersectionResult.getMask();
			
			switch ( mask )
			{
				// No hits. Backtrack on the stack.
				case 0:
					break;
				
				// 1 Hit. Replace the current node with the hit child.
				case 1 << 0:	node = node->getChild(0);	goto nextNode;
				case 1 << 1:	node = node->getChild(1);	goto nextNode;
				case 1 << 2:	node = node->getChild(2);	goto nextNode;
				case 1 << 3:	node = node->getChild(3);	goto nextNode;
				
				// 2 Hits.
				case 3: // 0011
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[1] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(1);
					}
					else
					{
						*stack = node->getChild(1);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 5: // 0101
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[2] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(2);
					}
					else
					{
						*stack = node->getChild(2);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 6: // 0110
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[2] < near[1] )
					{
						*stack = node->getChild(1);
						node = node->getChild(2);
					}
					else
					{
						*stack = node->getChild(2);
						node = node->getChild(1);
					}
					goto nextNode;
				}
				
				case 9: // 1001
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[0] )
					{
						*stack = node->getChild(0);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(0);
					}
					goto nextNode;
				}
				
				case 10: // 1010
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[1] )
					{
						*stack = node->getChild(1);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(1);
					}
					goto nextNode;
				}
				
				case 12: // 1100
				{
					near = math::select( intersectionResult, near, closestDistance );
					stack++;
					
					if ( near[3] < near[2] )
					{
						*stack = node->getChild(2);
						node = node->getChild(3);
					}
					else
					{
						*stack = node->getChild(3);
						node = node->getChild(2);
					}
					goto nextNode;
				}
				
				
				default:
				{
					// There are more than 2 hit children.
					// Determine the index of the closest hit child.
					Int closestChildIndex = minIndex( math::select( intersectionResult, near, closestDistance ) );
					
					// Clear the bit of the closest hit child.
					mask &= ~(1 << closestChildIndex);
					
					//****************************************************
					// Second hit.
					
					Int i = firstSetBit( mask );
					
					// Put the child onto the stack.
					stack++;
					*stack = node->getChild(i);
					
					// Clear the bit.
					mask &= ~(1 << i);
					
					//****************************************************
					// Third hit.
					
					i = firstSetBit( mask );
					
					// Put the child onto the stack.
					stack++;
					*stack = node->getChild(i);
					
					// Clear the bit.
					mask &= ~(1 << i);
					
					//****************************************************
					// Fourth hit, if necessary.
					
					if ( mask )
					{
						i = firstSetBit( mask );
						
						// Put the child onto the stack.
						stack++;
						*stack = node->getChild(i);
						
						// Clear the bit.
						mask &= ~(1 << i);
					}
					
					// Determine the next node to traverse.
					node = node->getChild(closestChildIndex);
					goto nextNode;
				}
			}
		}
		
		node = (const Node*)*stack;
		stack--;
		
		if ( stack == stackBase )
			break;
	}
	
	// If the distance is less than the maximum distance which we started with, there was an intersection.
	return closestIntersection < maxDistance;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Ray Vs. Triangle Intersection Method
//############		
//##########################################################################################
//##########################################################################################




SIMDBool4 AABBTree4:: rayIntersectsTriangles( const SIMDRay3f& ray, const CachedTriangle& triangle, SIMDFloat4& distance )
{
	// the vector perpendicular to edge 2 and the ray's direction
	SIMDVector3f pvec = math::cross( ray.direction, triangle.e2 );
	SIMDFloat4 det = math::dot( triangle.e1, pvec );
	
	// Do the first rejection test for the triangles, test to see if the ray is in the same plane as the triangle.
	SIMDBool4 result = math::abs(det) >= math::epsilon<Float>();
	
	//************************************************************************************
	
	SIMDFloat4 inverseDet = Float(1) / det;
	
	SIMDVector3f v0ToSource = ray.origin - triangle.v0;
	
	SIMDFloat4 u = math::dot( v0ToSource, pvec ) * inverseDet;
	
	// Do the second rejection test for the triangles. See if the UV coordinate is within the valid range.
	result &= (u >= Float(0)) & (u <= Float(1));
	
	//************************************************************************************
	
	SIMDVector3f qvec = math::cross( v0ToSource, triangle.e1 );
	
	SIMDFloat4 v = math::dot( ray.direction, qvec ) * inverseDet;
	
	// Do the third rejection test for the triangles. See if the UV coordinate is within the valid range.
	result &= (v >= Float(0)) & (u + v <= Float(1));
	
	//************************************************************************************
	
	distance = math::dot( triangle.e2, qvec ) * inverseDet;
	
	// Make sure that the triangles are hit by the forward side of the ray.
	return result & (distance > math::epsilon<Float>());
}




//##########################################################################################
//##########################################################################################
//############		
//############		Recursive Tree Construction Method
//############		
//##########################################################################################
//##########################################################################################




Size AABBTree4:: buildTreeRecursive( Node* node, PrimitiveAABB* primitiveAABBs,
										Index start, Size numPrimitives,
										SplitBin* splitBins, Size numSplitBins, 
										Size maxNumPrimitivesPerLeaf, Size depth, Size& maxDepth )
{
	// The split axis used for each split (0 = X, 1 = Y, 2 = Z).
	StaticArray<Index,3> splitAxis;
	
	// The number of primitives in a child node (leaf or not).
	StaticArray<Index,4> numChildPrimitives;
	
	// The 4 volumes of the child nodes.
	StaticArray<AABB3f,4> volumes;
	
	//***************************************************************************
	// Partition the set of primitives into two sets.
	
	PrimitiveAABB* const primitiveAABBStart = primitiveAABBs + start;
	Size numLesserPrimitives = 0;
	
	partitionPrimitivesSAH( primitiveAABBStart, numPrimitives,
						splitBins, numSplitBins,
						splitAxis[0], numLesserPrimitives, volumes[0], volumes[2] );
	
	// Compute the number of primitives greater than the split plane along the split axis.
	Size numGreaterPrimitives = numPrimitives - numLesserPrimitives;
	
	//***************************************************************************
	// Partition the primitive subsets into four sets based on the next two splitting planes.
	
	// If the number of primitives on this side of the first partition is less than the max number of
	// primitives per leaf, put all the primitives in the first child.
	if ( numLesserPrimitives < maxNumPrimitivesPerLeaf )
	{
		numChildPrimitives[0] = numLesserPrimitives;
		numChildPrimitives[1] = 0;
		volumes[0] = computeAABBForPrimitives( primitiveAABBStart, numLesserPrimitives );
	}
	else
	{
		partitionPrimitivesSAH( primitiveAABBStart, numLesserPrimitives,
							splitBins, numSplitBins,
							splitAxis[1], numChildPrimitives[0], volumes[0], volumes[1] );
	}
	
	// If the number of primitives on this side of the first partition is less than the max number of
	// primitives per leaf, put all the primitives in the first child.
	if ( numGreaterPrimitives < maxNumPrimitivesPerLeaf )
	{
		numChildPrimitives[2] = numGreaterPrimitives;
		numChildPrimitives[3] = 0;
		volumes[2] = computeAABBForPrimitives( primitiveAABBStart + numLesserPrimitives, numGreaterPrimitives );
	}
	else
	{
		partitionPrimitivesSAH( primitiveAABBStart + numLesserPrimitives, numGreaterPrimitives,
							splitBins, numSplitBins,
							splitAxis[2], numChildPrimitives[2], volumes[2], volumes[3] );
	}
	
	// Compute the number of primitives greater than the split plane along the split axis.
	numChildPrimitives[1] = numLesserPrimitives - numChildPrimitives[0];
	numChildPrimitives[3] = numGreaterPrimitives - numChildPrimitives[2];
	
	//***************************************************************************
	// Determine for each child whether to create a leaf node or an inner node.
	
	// The 4 indices of either the location in the primitive list of a leaf's primitives,
	// or the relative offset of the child node from the parent.
	StaticArray<Index,4> indices;
	
	//***************************************************************************
	// Determine the type and attributes for each node.
	
	// Keep track of the total number of nodes in the subtree.
	Size numTreeNodes = 1;
	Size primitiveStartIndex = start;
	
	for ( Index i = 0; i < 4; i++ )
	{
		if ( numChildPrimitives[i] <= maxNumPrimitivesPerLeaf )
		{
			// This child is a leaf node.
			new (node + numTreeNodes) Node( primitiveStartIndex, numChildPrimitives[i] );
			indices[i] = numTreeNodes;
			
			numTreeNodes++;
		}
		else
		{
			// This child is an inner node, construct it recursively.
			Size numChildNodes = buildTreeRecursive( node + numTreeNodes, primitiveAABBs,
													primitiveStartIndex, numChildPrimitives[i],
													splitBins, numSplitBins, maxNumPrimitivesPerLeaf,
													depth + 1, maxDepth );
			
			// The relative index of this child from the parent node.
			indices[i] = numTreeNodes;
			
			numTreeNodes += numChildNodes;
		}
		
		primitiveStartIndex += numChildPrimitives[i];
	}
	
	//***************************************************************************
	// Create the node.
	
	new (node) Node( indices[0], indices[1], indices[2], indices[3], volumes );
	
	// Update the maximum tree depth.
	if ( depth > maxDepth )
		maxDepth = depth;
	
	// Return the number of nodes in this subtree.
	return numTreeNodes;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Surface Area Heuristic Object Partition Method
//############		
//##########################################################################################
//##########################################################################################




void AABBTree4:: partitionPrimitivesSAH( PrimitiveAABB* primitiveAABBs, Size numPrimitives,
											SplitBin* splitBins, Size numSplitBins,
											Index& splitAxis, Size& numLesserPrimitives,
											AABB3f& lesserVolume, AABB3f& greaterVolume )
{
	// If there are no primitives to partition, return immediately.
	if ( numPrimitives < 2 )
	{
		splitAxis = 0;
		numLesserPrimitives = numPrimitives;
		lesserVolume = computeAABBForPrimitives( primitiveAABBs, numPrimitives );
		return;
	}
	
	//**************************************************************************************
	// Compute the AABB of the primitive centroids.
	
	// We use the centroids as the 'keys' in splitting primitives.
	const AABB3f centroidAABB = computeAABBForPrimitiveCentroids( primitiveAABBs, numPrimitives );
	const Vector3f aabbDimension = centroidAABB.max - centroidAABB.min;
	
	//**************************************************************************************
	// Initialize the split bins.
	
	const Size numSplitCandidates = numSplitBins - 1;
	
	const Float binningConstant1 = Float(numSplitBins)*(Float(1) - Float(0.00001));
	Float minSplitCost = math::max<Float>();
	Float minSplitPlane = 0;
	math::SIMDScalar<float,4> lesserMin;
	math::SIMDScalar<float,4> lesserMax;
	math::SIMDScalar<float,4> greaterMin;
	math::SIMDScalar<float,4> greaterMax;
	numLesserPrimitives = 0;
	
	splitAxis = 0;
	
	for ( Index axis = 0; axis < 3; axis++ )
	{
		// Compute some constants that are valid for all bins/primitives.
		const Float binningConstant = binningConstant1 / aabbDimension[axis];
		const Float binWidth = aabbDimension[axis] / Float(numSplitBins);
		const Float binsStart = centroidAABB.min[axis];
		
		// Initialize the split bins to their starting values.
		for ( Index i = 0; i < numSplitBins; i++ )
			new (splitBins + i) SplitBin();
		
		//**************************************************************************************
		// For each primitive, determine which bin it overlaps and increase that bin's counter.
		
		for ( Index i = 0; i < numPrimitives; i++ )
		{
			const PrimitiveAABB& t = primitiveAABBs[i];
			
			Index binIndex = (Index)(binningConstant*(t.centroid[axis] - binsStart));
			SplitBin& bin = splitBins[binIndex];
			
			// Update the number of primitives that this bin contains, as well as the AABB for those primitives.
			bin.numPrimitives++;
			bin.min = math::min( bin.min, t.min );
			bin.max = math::max( bin.max, t.max );
		}
		
		//**************************************************************************************
		// Find the split plane with the smallest SAH cost.
		
		Size numLeftPrimitives = 0;
		math::SIMDScalar<float,4> leftMin( math::max<float>() );
		math::SIMDScalar<float,4> leftMax( math::min<float>() );
		
		for ( Index i = 0; i < numSplitCandidates; i++ )
		{
			// Since the left candidate is only growing, we can incrementally construct the AABB for this side.
			// Incrementally enlarge the bounding box for this side, and compute the number of primitives
			// on this side of the split.
			{
				SplitBin& bin = splitBins[i];
				numLeftPrimitives += bin.numPrimitives;
				leftMin = math::min( leftMin, bin.min );
				leftMax = math::max( leftMax, bin.max );
			}
			
			Size numRightPrimitives = 0;
			math::SIMDScalar<float,4> rightMin( math::max<float>() );
			math::SIMDScalar<float,4> rightMax( math::min<float>() );
			
			// Compute the bounding box for this side, and compute the number of primitives
			// on this side of the split.
			for ( Index j = i + 1; j < numSplitBins; j++ )
			{
				SplitBin& bin = splitBins[j];
				numRightPrimitives += bin.numPrimitives;
				rightMin = math::min( rightMin, bin.min );
				rightMax = math::max( rightMax, bin.max );
			}
			
			// Compute the cost for this split candidate.
			Float splitCost = Float(numLeftPrimitives)*getAABBSurfaceArea( leftMin, leftMax ) + 
							Float(numRightPrimitives)*getAABBSurfaceArea( rightMin, rightMax );
			
			// If the split cost is the lowest so far, use it as the new minimum split.
			if ( splitCost <= minSplitCost )
			{
				minSplitCost = splitCost;
				minSplitPlane = binsStart + binWidth*Float(i + 1);
				
				// Save the bounding boxes for this split candidate.
				lesserMin = leftMin;
				lesserMax = leftMax;
				greaterMin = rightMin;
				greaterMax = rightMax;
				
				// Save the number of primitives to the left of the split.
				numLesserPrimitives = numLeftPrimitives;
				
				// Save the axis of the minimum cost split candidate.
				splitAxis = axis;
			}
		}
	}
	
	//**************************************************************************************
	
	// If the split was unsuccessful, try a median split which is guaranteed to split the primitives.
	if ( numLesserPrimitives == 0 || numLesserPrimitives == numPrimitives )
	{
		// Choose to split along the axis with the largest extent.
		Size splitAxis = aabbDimension[0] > aabbDimension[1] ? 
						aabbDimension[0] > aabbDimension[2] ? 0 : 2 :
						aabbDimension[1] > aabbDimension[2] ? 1 : 2;
		
		// Use a median-based partition to split the primitives.
		partitionPrimitivesMedian( primitiveAABBs, numPrimitives, splitAxis, numLesserPrimitives, lesserVolume, greaterVolume );
		
		return;
	}
	
	//**************************************************************************************
	// Partition the primitives into two sets based on the minimal cost split plane.
	
	Index left = 0;
	Index right = numPrimitives - 1;
	
	while ( left < right )
	{
		// Move right while primitive < split plane.
		while ( primitiveAABBs[left].centroid[splitAxis] <= minSplitPlane && left < right )
			left++;
		
		// Move left while primitive > split plane.
		while ( primitiveAABBs[right].centroid[splitAxis] > minSplitPlane && left < right )
			right--;
		
		if ( left < right )
		{
			// Swap the primitives because they are out of order.
			const PrimitiveAABB temp = primitiveAABBs[left];
			primitiveAABBs[left] = primitiveAABBs[right];
			primitiveAABBs[right] = temp;
		}
	}
	
	// Set the number of primitives that are to the left of the split plane.
	lesserVolume = AABB3f( lesserMin[0], lesserMax[0], lesserMin[1], lesserMax[1], lesserMin[2], lesserMax[2] );
	greaterVolume = AABB3f( greaterMin[0], greaterMax[0], greaterMin[1], greaterMax[1], greaterMin[2], greaterMax[2] );
}




//##########################################################################################
//##########################################################################################
//############		
//############		Median Object Partition Method
//############		
//##########################################################################################
//##########################################################################################




void AABBTree4:: partitionPrimitivesMedian( PrimitiveAABB* primitiveAABBs, Size numPrimitives,
												Index splitAxis, Size& numLesserPrimitives,
												AABB3f& lesserVolume, AABB3f& greaterVolume )
{
	if ( numPrimitives == 2 )
	{
		numLesserPrimitives = 1;
		lesserVolume = computeAABBForPrimitives( primitiveAABBs, 1 );
		greaterVolume = computeAABBForPrimitives( primitiveAABBs + 1, 1 );
		return;
	}
	
	Index first = 0;
	Index last = numPrimitives - 1;
	Index middle = (first + last)/2;
	
	while ( 1 )
	{
		Index mid = first;
		const math::SIMDScalar<float,4>& key = primitiveAABBs[mid].centroid;
		
		for ( Index j = first + 1; j <= last; j ++)
		{
			if ( primitiveAABBs[j].centroid[splitAxis] > key[splitAxis] )
			{
				mid++;
				
				// interchange values.
				const PrimitiveAABB temp = primitiveAABBs[mid];
				primitiveAABBs[mid] = primitiveAABBs[j];
				primitiveAABBs[j] = temp;
			}
		}
		
		// interchange the first and mid value.
		const PrimitiveAABB temp = primitiveAABBs[mid];
		primitiveAABBs[mid] = primitiveAABBs[first];
		primitiveAABBs[first] = temp;
		
		if ( mid + 1 == middle )
			break;
		
		if ( mid + 1 > middle )
			last = mid - 1;
		else
			first = mid + 1;
	}
	
	numLesserPrimitives = numPrimitives / 2;
	
	lesserVolume = computeAABBForPrimitives( primitiveAABBs, numLesserPrimitives );
	greaterVolume = computeAABBForPrimitives( primitiveAABBs + numLesserPrimitives, numPrimitives - numLesserPrimitives );
}




//##########################################################################################
//##########################################################################################
//############		
//############		Generic Tree Refit Method
//############		
//##########################################################################################
//##########################################################################################




AABB3f AABBTree4:: refitTreeGeneric( Node* node )
{
	if ( node->isLeaf() )
	{
		if ( node->getPrimitiveCount() == 0 )
			return AABB3f();
		
		// Compute the bounding box of this leaf's primitives.
		const Index* primitive = (const Index*)primitiveData + node->getPrimitiveOffset();
		const Size primitiveCount = node->getPrimitiveCount();
		
		AABB3f result = primitiveSet->getAABB( primitive[0] );
		
		for ( Index i = 1; i < primitiveCount; i++ )
			result.enlargeFor( primitiveSet->getAABB( primitive[i] ) );
		
		return result;
	}
	else
	{
		AABB3f result;
		
		// Resursively find the new bounding box for the children of this node.
		for ( Index i = 0; i < 4; i++ )
		{
			AABB3f childAABB = refitTreeGeneric( node->getChild(i) );
			
			// Store the bounding box for the child in this node.
			node->setChildAABB( i, childAABB );
			
			// Find the bounding box containing all children.
			if ( i == 0 )
				result = childAABB;
			else
				result.enlargeFor( childAABB );
		}
		
		return result;
	}
}




//##########################################################################################
//##########################################################################################
//############		
//############		Triangle Tree Refit Method
//############		
//##########################################################################################
//##########################################################################################




AABB3f AABBTree4:: refitTreeTriangles( Node* node )
{
	if ( node->isLeaf() )
	{
		if ( node->getPrimitiveCount() == 0 )
			return AABB3f();
		
		// Compute the bounding box of this leaf's primitives.
		CachedTriangle* triangle = (CachedTriangle*)primitiveData + node->getPrimitiveOffset();
		const Size primitiveCount = node->getPrimitiveCount();
		
		AABB3f result = primitiveSet->getAABB( triangle[0].indices[0] );
		Vector3f v0, v1, v2;
		
		for ( Index i = 0; i < primitiveCount; i++ )
		{
			for ( Index j = 0; j < 4; j++ )
			{
				result.enlargeFor( primitiveSet->getAABB( triangle[i].indices[j] ) );
				
				// Update cached triangles.
				primitiveSet->getTriangle( triangle[i].indices[j], v0, v1, v2 );
				Vector3f e1 = v1 - v0;
				Vector3f e2 = v2 - v0;
				
				triangle[i].v0.x[j] = v0.x;
				triangle[i].v0.y[j] = v0.y;
				triangle[i].v0.z[j] = v0.z;
				triangle[i].e1.x[j] = e1.x;
				triangle[i].e1.y[j] = e1.y;
				triangle[i].e1.z[j] = e1.z;
				triangle[i].e2.x[j] = e2.x;
				triangle[i].e2.y[j] = e2.y;
				triangle[i].e2.z[j] = e2.z;
			}
		}
		
		return result;
	}
	else
	{
		AABB3f result;
		
		// Resursively find the new bounding box for the children of this node.
		for ( Index i = 0; i < 4; i++ )
		{
			AABB3f childAABB = refitTreeGeneric( node->getChild(i) );
			
			// Store the bounding box for the child in this node.
			node->setChildAABB( i, childAABB );
			
			// Find the bounding box containing all children.
			if ( i == 0 )
				result = childAABB;
			else
				result.enlargeFor( childAABB );
		}
		
		return result;
	}
}




//##########################################################################################
//##########################################################################################
//############		
//############		Primitive Index List Building Method
//############		
//##########################################################################################
//##########################################################################################




void AABBTree4:: fillPrimitiveIndices( Index* primitiveIndices, const PrimitiveAABB* primitiveAABBs, Size numPrimitives )
{
	for ( Index i = 0; i < numPrimitives; i++ )
		primitiveIndices[i] = primitiveAABBs[i].primitiveIndex;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Axis-Aligned Bound Box Calculation Methods
//############		
//##########################################################################################
//##########################################################################################




AABB3f AABBTree4:: computeAABBForPrimitives( const PrimitiveAABB* primitiveAABBs, Size numPrimitives )
{
	/// Create a bounding box with the minimum at the max float value and visce versa.
	SIMDFloat4 min( math::max<float>() );
	SIMDFloat4 max( math::min<float>() );
	
	const PrimitiveAABB* const primitiveAABBsEnd = primitiveAABBs + numPrimitives;
	
	while ( primitiveAABBs != primitiveAABBsEnd )
	{
		min = math::min( min, primitiveAABBs->min );
		max = math::max( max, primitiveAABBs->max );
		
		primitiveAABBs++;
	}
	
	return AABB3f( min[0], max[0], min[1], max[1], min[2], max[2] );
}




AABB3f AABBTree4:: computeAABBForPrimitiveCentroids( const PrimitiveAABB* primitiveAABBs, Size numPrimitives )
{
	/// Create a bounding box with the minimum at the max float value and visce versa.
	SIMDFloat4 min( math::max<float>() );
	SIMDFloat4 max( math::min<float>() );
	
	const PrimitiveAABB* const primitiveAABBsEnd = primitiveAABBs + numPrimitives;
	
	while ( primitiveAABBs != primitiveAABBsEnd )
	{
		min = math::min( min, primitiveAABBs->centroid );
		max = math::max( max, primitiveAABBs->centroid );
		
		primitiveAABBs++;
	}
	
	return AABB3f( min[0], max[0], min[1], max[1], min[2], max[2] );
}




float AABBTree4:: getAABBSurfaceArea( const SIMDFloat4& min,
										const SIMDFloat4& max )
{
	const SIMDFloat4 aabbDimension = max - min;
	
	return float(2)*(aabbDimension[0]*aabbDimension[1] +
					aabbDimension[0]*aabbDimension[2] +
					aabbDimension[1]*aabbDimension[2]);
}




//##########################################################################################
//##########################################################################################
//############		
//############		Triangle List Building Methods
//############		
//##########################################################################################
//##########################################################################################




Size AABBTree4:: getTriangleArraySize( const Node* node ) const
{
	if ( node->isLeaf() )
		return math::nextMultiple( node->getPrimitiveCount(), IndexType(4) ) >> 2;
	else
	{
		Size result = 0;
		
		for ( Index i = 0; i < 4; i++ )
			result += getTriangleArraySize( node->getChild(i) );
		
		return result;
	}
}




Size AABBTree4:: fillTriangleArray( CachedTriangle* triangles, const PrimitiveInterface* primitiveInterface,
									const PrimitiveAABB* aabbs, Node* node, Size numFilled )
{
	Size currentOutputIndex = numFilled;
	
	if ( node->isLeaf() )
	{
		Size numLeafTriangles = node->getPrimitiveCount();
		Size numTruncatedTriangles = ((numLeafTriangles >> 2) << 2);
		Size numPaddedTriangles = numTruncatedTriangles == numLeafTriangles ? 
									numTruncatedTriangles : numTruncatedTriangles + 4;
		
		// Update the per-node primitive count to reflect that 4 regular triangles = 1 cached triangle.
		Index currentOffset = node->getPrimitiveOffset();
		node->setPrimitiveOffset( currentOutputIndex );
		node->setPrimitiveCount( numPaddedTriangles >> 2 );
		
		Size numIterations = numPaddedTriangles >> 2;
		
		for ( Index k = 0; k < numIterations; k++ )
		{
			// Determine the number of triangles to go into this cached triangle, 4 or less.
			Size numRemainingTriangles = math::min( numLeafTriangles - k*4, Size(4) );
			
			Vector3f v0, v1, v2;
			SIMDVector3f simdV0;
			SIMDVector3f simdE1;
			SIMDVector3f simdE2;
			StaticArray<Index,4> indices;
			
			// Get the triangle from the primitive set.
			for ( Index t = 0; t < 4; t++ )
			{
				// If there are no more remaining triangles, use the last valid one.
				if ( t < numRemainingTriangles )
					indices[t] = aabbs[currentOffset + t].primitiveIndex;
				else
					indices[t] = aabbs[currentOffset + numRemainingTriangles - 1].primitiveIndex;
				
				primitiveInterface->getTriangle( indices[t], v0, v1, v2 );
				Vector3f e1 = v1 - v0;
				Vector3f e2 = v2 - v0;
				
				// Convert to SIMD layout.
				simdV0.x[t] = v0.x;
				simdV0.y[t] = v0.y;
				simdV0.z[t] = v0.z;
				simdE1.x[t] = e1.x;
				simdE1.y[t] = e1.y;
				simdE1.z[t] = e1.z;
				simdE2.x[t] = e2.x;
				simdE2.y[t] = e2.y;
				simdE2.z[t] = e2.z;
			}
			
			// Create the new triangle.
			new (triangles + currentOutputIndex) CachedTriangle( simdV0, simdE1, simdE2, indices );
			
			currentOffset += 4;
			currentOutputIndex++;
		}
	}
	else
	{
		for ( Index i = 0; i < 4; i++ )
			currentOutputIndex += fillTriangleArray( triangles, primitiveInterface, aabbs, node->getChild(i), currentOutputIndex );
	}
	
	return currentOutputIndex - numFilled;
}




//##########################################################################################
//##########################################################################################
//############		
//############		Primitive Data Copy Method
//############		
//##########################################################################################
//##########################################################################################




UByte* AABBTree4:: copyPrimitiveData( Size& newCapacity ) const
{
	switch ( cachedPrimitiveType )
	{
		case PrimitiveInterfaceType::TRIANGLES:
			return (UByte*)util::copyArrayAligned( (const CachedTriangle*)primitiveData, getTriangleArraySize(nodes), 16 );
		
		default:
			return (UByte*)util::copyArrayAligned( (const Index*)primitiveData, numPrimitives, 16 );
	}
}



#5174881 Thesis idea: offline collision detection

Posted by Aressera on 19 August 2014 - 06:16 PM

A similar idea was explored by this paper, which samples the high DOF configuration space between an object pair. In practice it doesn't seem to be very robust and requires a long precompute, high storage. Geometric techniques are much more accurate and robust.




#5165085 Bone parent indices to linked list

Posted by Aressera on 06 July 2014 - 11:58 AM

I don't think it is necessary to store any explicit hierarchy of the bones - In my animation system I just have a list of bones, and each bone has an associated parent bone index (in the list). This stores the hierarchy implicitly. Then, when computing the final matrices for each bone, I just iterate through the list and lazily compute the concatenated object-space matrices of each bone (and any parent bones, if they have not yet been computed). There doesn't seem to be any reason to store the skeleton hierarchy in a traditional tree structure.




#5163493 The doubt of a bounding sphere as the bounding volume

Posted by Aressera on 28 June 2014 - 02:38 PM

There is rarely one bounding volume to fit them all. You should probably have separate bounding volumes for physics and graphics, since the needs for both are different. The graphics bounding volume can be a little looser because it's only used for culling, while the physics BV should tightly bound the visual representation. You may also use a series of progressively more detailed representations: first test against a bounding sphere, then versus convex hull of the mesh, then versus the actual mesh itself. This avoids checking the detailed representation more often than necessary.




#5153228 High Speed Quadric Mesh Simplification without problems (Resolved)

Posted by Aressera on 13 May 2014 - 12:01 AM

That's why you need to update the error for neighboring edges after each collapse and re-sort the edge collapse list at each iteration after adding newly created edges - there are collapses that you could still make but you don't know about them because they're not at the top of the heap, or they correspond to new edges that weren't in the original mesh, and therefore don't even exist in the collapse list.

 

Here's my working edge collapse code (won't compile as-is), but it's well-commented and should give you an idea of how to handle the resorting efficiently.


class FatVertex
{
	public:
		
		inline FatVertex( const Vector3& newPosition )
			:	position( newPosition ),
				finalIndex( 0 ),
				collapsed( false ),
				checked( false )
		{
		}
		
		Vector3 position;
		
		/// A list of the indices of the vertices that share an edge with vertex.
		ShortArrayList<Index,4> vertexNeighbors;
		
		/// A list of the indices of the triangles that use this vertex.
		ShortArrayList<Index,6> triangleNeighbors;
		
		/// The final index of this vertex in the output list.
		Index finalIndex;
		
		/// A boolean value indicating whether or not this vertex has been collapsed.
		Bool collapsed;
		
		/// A boolean value indicating whether or not this vertex has been collapsed.
		Bool checked;
		
};




class FatTriangle
{
	public:
		
		inline FatTriangle( Index newV0, Index newV1, Index newV2, const Plane3& newPlane )
			:	plane( newPlane ),
				finalIndex( 0 ),
				collapsed( false )
		{
			v[0] = newV0;
			v[1] = newV1;
			v[2] = newV2;
		}
		
		inline Bool hasVertex( const Index vertexIndex ) const
		{
			return v[0] == vertexIndex || v[1] == vertexIndex || v[2] == vertexIndex;
		}
		
		inline Bool getVertexIndex( const Index vertexIndex, Index& index ) const
		{
			for ( Index i = 0; i < 3; i++ )
			{
				if ( v[i] == vertexIndex )
				{
					index = i;
					return true;
				}
			}
			return false;
		}
		
		inline Bool replaceVertex( Index replaceIndex, Index newIndex )
		{
			if ( v[0] == replaceIndex )
				v[0] = newIndex;
			else if ( v[1] == replaceIndex )
				v[1] = newIndex;
			else if ( v[2] == replaceIndex )
				v[2] = newIndex;
			else
				return false;
			
			return true;
		}
		
		
		/// The indices of the vertices of this triangle.
		Index v[3];
		
		/// The plane equation for this triangle.
		Plane3 plane;
		
		/// The final index of this triangle in the output list.
		Index finalIndex;
		
		/// A boolean value indicating whether or not this triangle has been collapsed.
		Bool collapsed;
		
		
};




class EdgeCollapse
{
	public:
		
		inline EdgeCollapse( Index newV1, Index newV2, const Vector3& newTarget, Real newCost )
			:	v1( newV1 ),
				v2( newV2 ),
				target( newTarget ),
				cost( newCost ),
				queueIndex( math::max<Index>() )
		{
		}
		
		/// Return whether or not this edge collapse is the same as another.
		inline Bool operator == ( const EdgeCollapse& other ) const
		{
			return (v1 == other.v1 && v2 == other.v2) || (v1 == other.v2 && v2 == other.v1);
		}
		
		Index v1;
		Index v2;
		
		Vector3 target;
		Real cost;
		
		/// The index position of this edge collapse in the edge collapse queue.
		Index queueIndex;
		
};




class EdgeCollapseReference
{
	public:
		
		inline EdgeCollapseReference( EdgeCollapse* newCollapse )
			:	collapse( newCollapse )
		{
		}
		
		/// Return whether or not this edge collapse is the same as another.
		inline Bool operator == ( const EdgeCollapseReference& other ) const
		{
			return collapse == other.collapse;
		}
		
		/// Return whether or not this edge collapse has a lower cost than another.
		inline Bool operator < ( const EdgeCollapseReference& other ) const
		{
			return collapse->cost > other.collapse->cost;
		}
		
		inline Real getCost() const
		{
			return collapse->cost;
		}
		
		EdgeCollapse* collapse;
		
		
};




class EdgeCollapseQueue
{
	public:
		
		inline EdgeCollapseQueue( Size newCapacity )
			:	array( util::allocate<EdgeCollapseReference>( newCapacity ) ),
				capacity( newCapacity ),
				numCollapses( 0 )
		{
		}
		
		
		inline ~EdgeCollapseQueue()
		{
			this->clear();
			util::deallocate( array );
		}
		
		
		inline void add( const EdgeCollapseReference& newCollapse )
		{
			if ( numCollapses == capacity )
				doubleCapacity();
			
			Index i = numCollapses;
			Index parent = getParentIndex(i);
			
			new (array + numCollapses) EdgeCollapseReference( newCollapse );
			array[i].collapse->queueIndex = i;
			numCollapses++;
			
			// reorder the queue's heap so that the new element is in the correct place.
			while ( array[parent] < array[i] )
			{
				swap( parent, i );
				
				i = parent;
				parent = getParentIndex(i);
			}
		}
		
		
		inline void remove()
		{
			// Decrement the number of elements in the queue.
			numCollapses--;
			
			// Copy the last element in the queue's array into the largest element's location.
			array[0] = array[numCollapses];
			array[0].collapse->queueIndex = 0;
			
			// Call the destructor for the old last element.
			array[numCollapses].~EdgeCollapseReference();
			
			// Convert the new array into a heap, starting at the first element.
			this->heapify( 0 );
		}
		
		/// Ensure that the heap is propery ordered after the specified element was changed.
		inline void update( Index i )
		{
			if ( i > 0 )
			{
				Index parent = getParentIndex(i);
				
				// Did the entry increase its value?
				if ( array[parent] < array[i] )
				{
					do
					{
						swap( parent, i );
						
						i = parent;
						parent = getParentIndex(i);
					}
					while ( i > 0 && array[parent] < array[i] );
					
					return;
				}
			}
			
			this->heapify( i );
		}
		
		
		inline const EdgeCollapseReference& getFirst() const
		{
			return *array;
		}
		
		
		inline Size getSize() const
		{
			return numCollapses;
		}
		
		
		inline void clear()
		{
			if ( array != NULL )
				callDestructors( array, numCollapses );
			
			numCollapses = Size(0);
		}
		
		
	private:
		
		/// Get the index of a child elements's parent element given the child element's index.
		/**
		  * If the child index is zero, the method returns 0 because this child element is
		  * at the top of the heap and has no parent by definition.
		  */
		inline static Index getParentIndex( Index child )
		{
			if ( child == Index(0) )
				return Index(0);
			
			return (child - Index(1))/Index(2);
		}
		
		
		/// Get the index of the left child element given the parent element's index.
		inline static Index getChildIndex1( Index parent )
		{
			return (parent << 1) + Index(1);
		}
		
		
		/// Get the index of the right child element given the parent element's index.
		inline static Index getChildIndex2( Index parent )
		{
			return (parent << 1) + Index(2);
		}
		
		
		/// Convert the specified sub-heap, with root at index i, into a heap.
		inline void heapify( Index i )
		{
			while ( i < numCollapses )
			{
				Index childIndex1 = getChildIndex1(i);
				Index childIndex2 = getChildIndex2(i);
				Index max;
				
				if ( childIndex1 < numCollapses && array[i] < array[childIndex1] )
					max = childIndex1;
				else
					max = i;
				
				if ( childIndex2 < numCollapses && array[max] < array[childIndex2] )
					max = childIndex2;
				
				if ( max == i )
					break;
				
				swap( max, i );
				i = max;
			}
		}
		
		// Swap two elements in the heap.
		GSOUND_FORCE_INLINE void swap( Index index1, Index index2 )
		{
			EdgeCollapseReference temp = array[index1];
			array[index1] = array[index2];
			array[index2] = temp;
			
			// Update the indices for the swapped elements.
			array[index1].collapse->queueIndex = index1;
			array[index2].collapse->queueIndex = index2;
		}
		
		
		inline void doubleCapacity()
		{
			// check to see if the array has not yet been allocated.
			if ( capacity == 0 )
			{
				// allocate the array to hold elements.
				capacity = 8;
				array = util::allocate<EdgeCollapseReference>( capacity );
			}
			else
				resize( capacity << 1 );
		}
		
		
		/// Double the size of the element array to increase the capacity of the priority queue.
		/**
		  * This method deallocates the previously used array, and then allocates
		  * a new block of memory with a size equal to double the previous size.
		  * It then copies over all of the previous elements into the new array.
		  */
		void resize( Size newCapacity )
		{
			// Update the capacity and allocate a new array.
			capacity = newCapacity;
			EdgeCollapseReference* oldArray = array;
			array = util::allocate<EdgeCollapseReference>( capacity );
			
			// copy the elements from the old array to the new array.
			moveObjects( array, oldArray, numCollapses );
			
			// deallocate the old array.
			util::deallocate( oldArray );
		}
		
		
		inline static void moveObjects( EdgeCollapseReference* destination,
												const EdgeCollapseReference* source, Size number )
		{
			const EdgeCollapseReference* const sourceEnd = source + number;
			
			while ( source != sourceEnd )
			{
				// copy the object from the source to destination
				new (destination) EdgeCollapseReference(*source);
				
				// call the destructors on the source
				source->~EdgeCollapseReference();
				
				destination++;
				source++;
			}
		}
		
		inline static void callDestructors( EdgeCollapseReference* array, Size number )
		{
			const EdgeCollapseReference* const arrayEnd = array + number;
			
			while ( array != arrayEnd )
			{
				array->~EdgeCollapseReference();
				array++;
			}
		}
		
		
		EdgeCollapseReference* array;
		
		Size capacity;
		
		
		Size numCollapses;
		
		
		
};




class QEMVertex
{
	public:
		
		inline QEMVertex( const Matrix4& newQ )
			:	Q( newQ )
		{
		}
		
		
		/// The quadric error metric matrix Q for this vertex.
		Matrix4 Q;
		
		
		/// A list of the edge collapses that include this vertex.
		ShortArrayList<EdgeCollapseReference,4> collapses;
		
		
};




Bool MeshPreprocessor:: collapseEdges( ArrayList<FatVertex>& vertices, ArrayList<FatTriangle>& triangles, Real maxCost )
{
	//***************************************************************************
	// Compute the error matrix for each vertex in the mesh.
	
	const Size numVertices = vertices.getSize();
	ArrayList<QEMVertex> qemVertices( numVertices );
	
	for ( Index i = 0; i < numVertices; i++ )
	{
		FatVertex& vertex = vertices[i];
		vertex.checked = false;
		qemVertices.add( computeQ( vertex, triangles ) );
	}
	
	//***************************************************************************
	// Compute the target vertices and initial costs for all edges in the mesh.
	
	ArrayList<EdgeCollapse> edgeCollapses;
	
	for ( Index i = 0; i < numVertices; i++ )
	{
		FatVertex& vertex = vertices[i];
		QEMVertex& qemVertex = qemVertices[i];
		const Size numNeighbors = vertex.vertexNeighbors.getSize();
		
		// Skip previously collapsed vertices.
		if ( vertex.collapsed )
			continue;
		
		for ( Index n = 0; n < numNeighbors; n++ )
		{
			const Index neighborIndex = vertex.vertexNeighbors[n];
			FatVertex& vertex2 = vertices[neighborIndex];
			
			// Skip neighbors that have already been checked.
			if ( vertex2.checked || vertex2.collapsed )
				continue;
			
			const QEMVertex& qemVertex2 = qemVertices[neighborIndex];
			
			// Compute the combined error matrix for these vertices.
			Matrix4 Q12 = qemVertex.Q + qemVertex2.Q;
	
			// Compute the optimal collapse vertex and the cost for this edge collapse.
			Vector3 target = computeCollapseVertex( Q12, vertex.position, vertex2.position );
			
			// Compute the error for this target vertex.
			Real cost = computeQError( Q12, target );
			
			// Add this edge collapse to the edge collapse list.
			edgeCollapses.add( EdgeCollapse( i, neighborIndex, target, cost ) );
		}
		
		vertex.checked = true;
	}
	
	EdgeCollapseQueue edgeCollapseQueue( edgeCollapses.getSize() );
	
	for ( Index i = 0; i < edgeCollapses.getSize(); i++ )
	{
		EdgeCollapse& collapse = edgeCollapses[i];
		QEMVertex& qemV1 = qemVertices[collapse.v1];
		QEMVertex& qemV2 = qemVertices[collapse.v2];
		
		qemV1.collapses.add( &collapse );
		qemV2.collapses.add( &collapse );
		edgeCollapseQueue.add( &collapse );
	}
	
	//***************************************************************************
	// Collapse edges until the maximum cost is reached.
	
	nextEdgeCollapse:
	
	while ( edgeCollapseQueue.getSize() > 0 )
	{
		// Get the next edge collapse, then remove it from the queue.
		EdgeCollapseReference collapseReference = edgeCollapseQueue.getFirst();
		edgeCollapseQueue.remove();
		
		// Check to see if all further collapses exceed the maximum cost parameter.
		// If so, we are done collapsing edges.
		if ( collapseReference.getCost() > maxCost )
			break;
		
		EdgeCollapse& collapse = *collapseReference.collapse;
		
		// Skip degenerate edge collapses.
		if ( collapse.v1 == collapse.v2 )
			continue;
		
		const Index fromIndex = collapse.v1;
		FatVertex& fromVertex = vertices[fromIndex];
		
		const Index toIndex = collapse.v2;
		FatVertex& toVertex = vertices[toIndex];
		
		// Skip 'from' or 'to' vertices that have already been collapsed.
		if ( fromVertex.collapsed || toVertex.collapsed )
			continue;
		
		if ( vertexIsBorder( fromVertex, triangles ) || vertexIsBorder( toVertex, triangles ) )
			continue;
		
		//***************************************************************************
		// Check to make sure this edge collapse won't invert any triangles.
		
		for ( Index t = 0; t < fromVertex.triangleNeighbors.getSize(); t++ )
		{
			const Index triangleIndex = fromVertex.triangleNeighbors[t];
			const FatTriangle& triangle = triangles[triangleIndex];
			
			// Check only triangles that aren't removed.
			if ( !triangle.hasVertex( toIndex ) )
			{
				// Recompute the triangle plane equation.
				Plane3 newPlane( triangle.v[0] == fromIndex ? collapse.target : vertices[triangle.v[0]].position,
								triangle.v[1] == fromIndex ? collapse.target : vertices[triangle.v[1]].position,
								triangle.v[2] == fromIndex ? collapse.target : vertices[triangle.v[2]].position );
				
				// If the normals don't point in the same direction, then the triangle
				// would be reversed by this collapse and the mesh would fold over.
				// Don't do this edge collapse because it makes the model degenerate.
				if ( math::dot( triangle.plane.normal, newPlane.normal ) < Real(0.0) )
					goto nextEdgeCollapse;
			}
		}
		
		for ( Index t = 0; t < toVertex.triangleNeighbors.getSize(); t++ )
		{
			const Index triangleIndex = toVertex.triangleNeighbors[t];
			const FatTriangle& triangle = triangles[triangleIndex];
			
			// Check only triangles that aren't removed.
			if ( !triangle.hasVertex( fromIndex ) )
			{
				// Recompute the triangle plane equation.
				Plane3 newPlane( triangle.v[0] == toIndex ? collapse.target : vertices[triangle.v[0]].position,
								triangle.v[1] == toIndex ? collapse.target : vertices[triangle.v[1]].position,
								triangle.v[2] == toIndex ? collapse.target : vertices[triangle.v[2]].position );
				
				// If the normals don't point in the same direction, then the triangle
				// would be reversed by this collapse and the mesh would fold over.
				// Don't do this edge collapse because it makes the model degenerate.
				if ( math::dot( triangle.plane.normal, newPlane.normal ) < Real(0.0) )
					goto nextEdgeCollapse;
			}
		}
		
		//***************************************************************************
		
		// Mark the 'from' vertex as collapsed.
		fromVertex.collapsed = true;
		
		// Update the 'to' vertex to have the optimal collapsed position.
		toVertex.position = collapse.target;
		
		
		// Update the triangles that shared the from vertex.
		for ( Index t = 0; t < fromVertex.triangleNeighbors.getSize(); t++ )
		{
			const Index triangleIndex = fromVertex.triangleNeighbors[t];
			FatTriangle& triangle = triangles[triangleIndex];
			
			if ( triangle.hasVertex( toIndex ) )
			{
				// This triangle becomes degenerate after the edge collapse.
				// Mark it as collapsed.
				triangle.collapsed = true;
				
				// Remove this triangle from the list of neighbors for its vertices.
				if ( triangle.v[0] != fromIndex )
					vertices[triangle.v[0]].triangleNeighbors.remove( triangleIndex );
				
				if ( triangle.v[1] != fromIndex )
					vertices[triangle.v[1]].triangleNeighbors.remove( triangleIndex );
				
				if ( triangle.v[2] != fromIndex )
					vertices[triangle.v[2]].triangleNeighbors.remove( triangleIndex );
			}
			else
			{
				// This triangle needs to be updated with the new vertex index.
				triangle.replaceVertex( fromIndex, toIndex );
				
				// Recompute the triangle plane equation.
				triangle.plane = Plane3( vertices[triangle.v[0]].position,
										vertices[triangle.v[1]].position,
										vertices[triangle.v[2]].position );
				
				// Make sure the to vertex has this triangle as its neighbor.
				toVertex.triangleNeighbors.add( triangleIndex );
			}
		}
		
		// Clear the triangle neighbor list for the 'from' vertex since it is no longer used.
		fromVertex.triangleNeighbors.clear();
		
		toVertex.vertexNeighbors.removeUnordered( fromIndex );
		
		for ( Index i = 0; i < fromVertex.vertexNeighbors.getSize(); i++ )
		{
			const Index neighborIndex = fromVertex.vertexNeighbors[i];
			
			if ( neighborIndex == toIndex )
				continue;
			
			// Add this neighbor of the 'from' vertex to the list of neighbors for the 'to' vertex.
			if ( !toVertex.vertexNeighbors.contains( neighborIndex ) )
			{
				// Update the 'to' vertex.
				toVertex.vertexNeighbors.add( neighborIndex );
				
				// Update the neighbor vertex.
				FatVertex& neighbor = vertices[neighborIndex];
				neighbor.vertexNeighbors.removeUnordered( fromIndex );
				neighbor.vertexNeighbors.add( toIndex );
			}
		}
		
		fromVertex.vertexNeighbors.clear();
		
		QEMVertex& fromQEMVertex = qemVertices[fromIndex];
		QEMVertex& toQEMVertex = qemVertices[toIndex];
		
		// Compute the error matrix for the collapsed vertex.
		toQEMVertex.Q += fromQEMVertex.Q;
		
		// Remove the current collapse from the 'to' vertex.
		toQEMVertex.collapses.remove( collapseReference );
		
		// Merge the collapses associated with the 'from' vertex with those from the 'to' vertex.
		for ( Index i = 0; i < fromQEMVertex.collapses.getSize(); i++ )
		{
			const EdgeCollapseReference& ref = fromQEMVertex.collapses[i];
			
			// Replace the 'from' vertex in the collapse structures.
			if ( ref.collapse->v1 == fromIndex )
				ref.collapse->v1 = toIndex;
			else if ( ref.collapse->v2 == fromIndex )
				ref.collapse->v2 = toIndex;
			
			if ( ref.collapse->v1 == ref.collapse->v2 )
				continue;
			
			// Add the collapse to the collapses for the 'to' vertex if it is not a duplicate.
			if ( !toQEMVertex.collapses.contains( ref ) )
				toQEMVertex.collapses.add( ref );
		}
		
		// Clear the collapses from the 'from' vertex.
		fromQEMVertex.collapses.clear();
		
		// Recompute the cost for all edge collapses that involve the 'to' vertex.
		for ( Index i = 0; i < toQEMVertex.collapses.getSize(); )
		{
			const EdgeCollapseReference& ref = toQEMVertex.collapses[i];
			EdgeCollapse& newCollapse = *ref.collapse;
			const Index v1Index = newCollapse.v1;
			const Index v2Index = newCollapse.v2;
			
			// Compute the combined error matrix for these vertices.
			Matrix4 Q12 = qemVertices[v1Index].Q + qemVertices[v2Index].Q;
			
			// Compute the new optimal collapse vertex and the cost for this edge collapse.
			newCollapse.target = computeCollapseVertex( Q12, vertices[v1Index].position, vertices[v2Index].position );
			
			// Compute the error for this target vertex.
			newCollapse.cost = computeQError( Q12, newCollapse.target );
			
			// Update the recomputed edge collapse in the heap.
			edgeCollapseQueue.update( newCollapse.queueIndex );
			
			i++;
		}
	}
	
	//***************************************************************************
	
	return true;
}




Matrix4 MeshPreprocessor:: computeQ( const FatVertex& vertex, const ArrayList<FatTriangle>& triangles )
{
	const Size numTriangleNeighbors = vertex.triangleNeighbors.getSize();
	Matrix4 Q;
	
	for ( Index i = 0; i < numTriangleNeighbors; i++ )
	{
		const Index triangleIndex = vertex.triangleNeighbors[i];
		const FatTriangle& triangle = triangles[triangleIndex];
		
		// Compute the error matrix for this triangle's plane.
		Vector4 p( triangle.plane.normal, triangle.plane.offset );
		Matrix4 Kp( p.x*p.x, p.y*p.x, p.z*p.x, p.w*p.x,
					p.x*p.y, p.y*p.y, p.z*p.y, p.w*p.y,
					p.x*p.z, p.y*p.z, p.z*p.z, p.w*p.z,
					p.x*p.w, p.y*p.w, p.z*p.w, p.w*p.w );
		
		// Accumulate the error matrix.
		Q += Kp;
	}
	
	return Q;
}





Real MeshPreprocessor:: computeQError( const Matrix4& Q, const Vector3& v )
{
	Vector4 v4( v, 1 );
	
	return math::abs( math::dot( v4, Q*v4 ) );
}




Vector3 MeshPreprocessor:: computeCollapseVertex( const Matrix4& Q12, const Vector3& v1, const Vector3& v2 )
{/*
	// Compute the matrix to solve for the optimal collapse location.
	Matrix4 q = Q12;
	q.x.w = q.y.w = q.z.w = 0;
	q.w.w = 1;
	
	// Try inverting the matrix Q to compute the minimum-cost collapsed vertex.
	Matrix4 qInverse;
	
	Vector3 midpoint = math::midpoint( v1, v2 );
	Real midpointCost = computeQError( Q12, midpoint );
	
	if ( q.invert( qInverse, 0.1 ) && computeQError( Q12, qInverse.w.getXYZ() ) < midpointCost )
	{
		return qInverse.w.getXYZ();
	}
	else*/
	{
		// Inversion failed so pick the vertex or midpoint with the lowest cost.
		Vector3 midpoint = math::midpoint( v1, v2 );
		Real midpointCost = computeQError( Q12, midpoint );
		Real v1Cost = computeQError( Q12, v1 );
		Real v2Cost = computeQError( Q12, v2 );
		
		// Return the vertex or midpoint with the least cost.
		if ( v1Cost < v2Cost )
		{
			if ( v1Cost < midpointCost )
				return v1;
			else
				return midpoint;
		}
		else
		{
			if ( v2Cost < midpointCost )
				return v2;
			else
				return midpoint;
		}
	}
}




Bool MeshPreprocessor:: vertexIsBorder( const FatVertex& vertex, const ArrayList<FatTriangle>& triangles )
{
	const Size numVertexNeighbors = vertex.vertexNeighbors.getSize();
	const Size numTriangleNeighbors = vertex.triangleNeighbors.getSize();
	
	for ( Index v = 0; v < numVertexNeighbors; v++ )
	{
		const Index neighborIndex = vertex.vertexNeighbors[v];
		Size numNeighborTriangles = 0;
		
		for ( Index t = 0; t < numTriangleNeighbors; t++ )
		{
			const FatTriangle& triangle = triangles[vertex.triangleNeighbors[t]];
			
			if ( triangle.hasVertex( neighborIndex ) )
				numNeighborTriangles++;
		}
		
		if ( numNeighborTriangles == Size(1) )
			return true;
	}
	
	return false;
}





PARTNERS