Jump to content

  • Log In with Google      Sign In   
  • Create Account


LarryKing

Member Since 25 Jan 2011
Offline Last Active Jul 14 2014 07:59 PM
-----

#5112046 PinWheel Encryption

Posted by LarryKing on 25 November 2013 - 09:47 PM

Edit: See the newest version in this post

New phases have been added: Turbulence and Avalanche.

 

--

 

Hello everybody, I'm looking for some feedback on an encryption technique/algorithm I've been working on for the past few days: PinWheel (PNWL) encryption.
Now, I've found that explaining how the technique works is a challenge in and of itself, so please bear with me – I've even included lots of pictures.
Before I get to how the algorithm works, here are some statistics:
 
Table1_zpsdac853d8.png

Basic Information About PNWL:

  • Operates on 256 Byte Blocks
  • Makes heavy use of XOR
  • “Spins” the data to achieve encryption
  • Strength of encryption is exponentially proportional to the password length

Essentially PNWL works by splitting up 256 bytes of data into sized blocks. Sort of like this:
 
Diagram2_zps845df3cd.png
 
Thus, one block of 256 bytes (the main block) contains four blocks of 64 bytes; each of these blocks contains four blocks of 16 bytes, and similarly each of these blocks contain four blocks of 4 bytes.
 
To encrypt the data each block's content is quartered and then spun clockwise. As the quartered block spins, its content is internally XOR'd.
 
ForwardSpin_zps728b53a3.png
This hierarchy of spins is repeated for each character in the password. Furthermore, the magnitude of each spin is determined by the respective char.
 
Diagram4_zps62f8d79b.png
 
The only exception to the “Spin” technique are the Block4's, which instead “roll.” The amount of roll is determined by a set of magic numbers:

MAGIC[4][4] = { { 1, 3, 5, 7}, { 1, 7, 2, 9}, { 2, 3, 5, 7}, { 1, 9, 9, 6} };

Diagram3_zps49b6b914.png
To encrypt:
 
For each character in the password:

  • Roll Block4's Left
  • Spin Bloc16's
  • Spin Block64's
  • Spin the Block256

To decrypt:
 
Reverse the password, and then for each character:

  • Spin the Block256 in reverse
  • Spin the Block64's in reverse
  • Spin the Block16's in reverse
  • Roll Block4's Right

Anyways enough talk; here's the code, which also attached (note: requires SSE3)

//Copyright (C) 2013 Laurence King
//
//Permission is hereby granted, free of charge, to any person obtaining a
//copy of this software and associated documentation files (the "Software"),
//to deal in the Software without restriction, including without limitation
//the rights to use, copy, modify, merge, publish, distribute, sublicense,
//and/or sell copies of the Software, and to permit persons to whom the
//Software is furnished to do so, subject to the following conditions:
//
//The above copyright notice and this permission notice shall be included 
//in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
//INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
//PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
//HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
//SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#pragma once

#include <intrin.h>

#ifdef _MSC_VER
#define ALIGN( n )	__declspec( align( n ) )
#else
#define ALIGN( n ) alignas( n )
#endif

#define PNWL_MASK1 0x3
#define PNWL_MASK2 0xC
#define PNWL_MASK3 0x30
#define PNWL_MASK4 0xC0

namespace PinWheel
{
	typedef				int int32;
	typedef unsigned	int uint32;

	// PNWL Magic constants
	const uint32 MAGIC[4][4] = { { 1, 3, 5, 7}, { 1, 7, 2, 9}, { 2, 3, 5, 7}, { 1, 9, 9, 6} };

	ALIGN(16)
	struct Block16
	{
		union
		{
			uint32		Data[4];
			__m128i		vData;
		};

		void Spin0 (void);
		void Spin1 (void);
		void Spin2 (void);
		void Spin3 (void);

		void rSpin0 (void);
		void rSpin1 (void);
		void rSpin2 (void);
		void rSpin3 (void);
	};

	ALIGN(16)
	struct Block64
	{
		union
		{
			uint32		_Data[16];
			Block16		Blocks[4];
			__m128i		vData [4];
		};

		void Spin0 (void);
		void Spin1 (void);
		void Spin2 (void);
		void Spin3 (void);
		
		void rSpin0 (void);
		void rSpin1 (void);
		void rSpin2 (void);
		void rSpin3 (void);

	};

	ALIGN(16)
	struct Block256
	{
		union
		{
			uint32		_Data[64];
			__m128i		_vData[16];
			Block16		_Block16[16];
			Block64		Blocks[4];
		};

		void Spin0 (void);
		void Spin1 (void);
		void Spin2 (void);
		void Spin3 (void);

		void rSpin0 (void);
		void rSpin1 (void);
		void rSpin2 (void);
		void rSpin3 (void);

		void Forward(const char *);
		void Reverse(const char *);
	};

	#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
	#define ROTATE_RIGHT(x, n) (((x) >> (n)) | ((x) << (32-(n))))

	void Block16::Spin0(void)
	{
		Data[3] ^= Data[0];
		Data[2] ^= Data[0];
		Data[1] ^= Data[0];
		Data[0] = ~Data[0];
	}
	void Block16::Spin1(void)
	{
		Data[3] ^= Data[0];

		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(2, 1, 0, 3));
	}
	void Block16::Spin2(void)
	{
		Data[3] ^= Data[0];
		Data[2] ^= Data[0];
		Data[0] = ~Data[0];

		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(1, 0, 3, 2));
	}
	void Block16::Spin3(void)
	{
		Data[3] ^= Data[0];
		Data[2] ^= Data[0];
		Data[1] ^= Data[0];

		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(0, 3, 2, 1));
	}
	void Block16::rSpin0(void)
	{
		Data[0] = ~Data[0];	
		Data[1] ^= Data[0];
		Data[2] ^= Data[0];
		Data[3] ^= Data[0];
	}
	void Block16::rSpin1(void)
	{
		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(0, 3, 2, 1));

		Data[3] ^= Data[0];
	}
	void Block16::rSpin2(void)
	{
		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(1, 0, 3, 2));

		Data[0] = ~Data[0];
		Data[2] ^= Data[0];
		Data[3] ^= Data[0];	
	}
	void Block16::rSpin3(void)
	{
		vData = _mm_shuffle_epi32(vData, _MM_SHUFFLE(2, 1, 0, 3));

		Data[3] ^= Data[0];
		Data[2] ^= Data[0];
		Data[1] ^= Data[0];
	}

	void Block64::Spin0(void)
	{
		vData[3] = _mm_xor_si128(vData[0], vData[3]);
		vData[2] = _mm_xor_si128(vData[0], vData[2]);
		vData[1] = _mm_xor_si128(vData[0], vData[1]);
	}
	void Block64::Spin1(void)
	{
		__m128i val_a = vData[0];

		vData[0] = _mm_xor_si128(vData[3], val_a);
		vData[3] = vData[2]; // _mm_xor_si128(vData[2], val_a);
		vData[2] = vData[1]; // _mm_xor_si128(vData[1], val_a);
		vData[1] = val_a;
	}
	void Block64::Spin2(void)
	{		
		__m128i val_ab = vData[0];

		vData[0] = _mm_xor_si128(vData[2], val_ab);
		vData[2] = val_ab;

		val_ab = vData[1];

		vData[1] = _mm_xor_si128(vData[3], val_ab);
		vData[3] = val_ab;
	}
	void Block64::Spin3(void)
	{
		__m128i val_a = vData[0];

		vData[0] = _mm_xor_si128(vData[1], val_a);
		vData[1] = _mm_xor_si128(vData[2], val_a);
		vData[2] = _mm_xor_si128(vData[3], val_a);
		vData[3] = val_a;
	}
	void Block64::rSpin0(void)
	{
		vData[3] = _mm_xor_si128(vData[0], vData[3]);
		vData[2] = _mm_xor_si128(vData[0], vData[2]);
		vData[1] = _mm_xor_si128(vData[0], vData[1]);
	}
	void Block64::rSpin1(void)
	{
		__m128i val_a = vData[1];

		vData[1] = vData[2];
		vData[2] = vData[3];
		vData[3] = _mm_xor_si128(vData[0], val_a);
		vData[0] = val_a;
	}
	void Block64::rSpin2(void)
	{		
		__m128i val_ab = vData[2];

		vData[2] = _mm_xor_si128(vData[0], val_ab);
		vData[0] = val_ab;

		val_ab = vData[3];

		vData[3] = _mm_xor_si128(vData[1], val_ab);
		vData[1] = val_ab;
	}
	void Block64::rSpin3(void)
	{
		__m128i val_a = vData[3];

		vData[3] = _mm_xor_si128(vData[2], val_a);
		vData[2] = _mm_xor_si128(vData[1], val_a);
		vData[1] = _mm_xor_si128(vData[0], val_a);
		vData[0] = val_a;
	}

	void Block256::Spin0(void)
	{
		_vData[0x4] = _mm_xor_si128(_vData[0x0], _vData[0x4]);
		_vData[0x8] = _mm_xor_si128(_vData[0x0], _vData[0x8]);
		_vData[0xC] = _mm_xor_si128(_vData[0x0], _vData[0xC]);

		_vData[0x5] = _mm_xor_si128(_vData[0x1], _vData[0x5]);
		_vData[0x9] = _mm_xor_si128(_vData[0x1], _vData[0x9]);
		_vData[0xD] = _mm_xor_si128(_vData[0x1], _vData[0xD]);

		_vData[0x6] = _mm_xor_si128(_vData[0x2], _vData[0x6]);
		_vData[0xA] = _mm_xor_si128(_vData[0x2], _vData[0xA]);
		_vData[0xE] = _mm_xor_si128(_vData[0x2], _vData[0xE]);

		_vData[0x7] = _mm_xor_si128(_vData[0x3], _vData[0x7]);
		_vData[0xB] = _mm_xor_si128(_vData[0x3], _vData[0xB]);
		_vData[0xF] = _mm_xor_si128(_vData[0x3], _vData[0xF]);
	}
	void Block256::Spin1(void)
	{
		__m128i val_ = _vData[0];

		_vData[0x0] = _mm_xor_si128(val_, _vData[0xC]);
		_vData[0xC] = _vData[0x8];
		_vData[0x8] = _vData[0x4];
		_vData[0x4] = val_;

		val_ = _vData[1];

		_vData[0x1] = _mm_xor_si128(val_, _vData[0xD]);
		_vData[0xD] = _vData[0x9];
		_vData[0x9] = _vData[0x5];
		_vData[0x5] = val_;

		val_ = _vData[2];

		_vData[0x2] = _mm_xor_si128(val_, _vData[0xE]);
		_vData[0xE] = _vData[0xA];
		_vData[0xA] = _vData[0x6];
		_vData[0x6] = val_;

		val_ = _vData[3];

		_vData[0x3] = _mm_xor_si128(val_, _vData[0xF]);
		_vData[0xF] = _vData[0xB];
		_vData[0xB] = _vData[0x7];
		_vData[0x7] = val_;
	}
	void Block256::Spin2(void)
	{
		__m128i val_ = _vData[0];

		_vData[0x0] = _mm_xor_si128(val_, _vData[0x8]);
		_vData[0x8] = val_;

		val_ = _vData[1];

		_vData[0x1] = _mm_xor_si128(val_, _vData[0x9]);
		_vData[0x9] = val_;

		val_ = _vData[2];

		_vData[0x2] = _mm_xor_si128(val_, _vData[0xA]);
		_vData[0xA] = val_;

		val_ = _vData[3];

		_vData[0x3] = _mm_xor_si128(val_, _vData[0xB]);
		_vData[0xB] = val_;

		val_ = _vData[4];

		_vData[0x4] = _mm_xor_si128(_vData[0x8], _vData[0xC]);
		_vData[0xC] = val_;

		val_ = _vData[5];

		_vData[0x5] = _mm_xor_si128(_vData[0x9], _vData[0xD]);
		_vData[0xD] = val_;

		val_ = _vData[6];

		_vData[0x6] = _mm_xor_si128(_vData[0xA], _vData[0xE]);
		_vData[0xE] = val_;

		val_ = _vData[7];

		_vData[0x7] = _mm_xor_si128(_vData[0xB], _vData[0xF]);
		_vData[0xF] = val_;


	}
	void Block256::Spin3(void)
	{
		__m128i val_ = _vData[0];

		_vData[0x0] = _mm_xor_si128(val_, _vData[0x4]);
		_vData[0x4] = _mm_xor_si128(val_, _vData[0x8]);
		_vData[0x8] = _mm_xor_si128(val_, _vData[0xC]);
		_vData[0xC] = val_;

		val_ = _vData[1];

		_vData[0x1] = _mm_xor_si128(val_, _vData[0x5]);
		_vData[0x5] = _mm_xor_si128(val_, _vData[0x9]);
		_vData[0x9] = _mm_xor_si128(val_, _vData[0xD]);
		_vData[0xD] = val_;

		val_ = _vData[2];

		_vData[0x2] = _mm_xor_si128(val_, _vData[0x6]);
		_vData[0x6] = _mm_xor_si128(val_, _vData[0xA]);
		_vData[0xA] = _mm_xor_si128(val_, _vData[0xE]);
		_vData[0xE] = val_;

		val_ = _vData[3];

		_vData[0x3] = _mm_xor_si128(val_, _vData[0x7]);
		_vData[0x7] = _mm_xor_si128(val_, _vData[0xB]);
		_vData[0xB] = _mm_xor_si128(val_, _vData[0xF]);
		_vData[0xF] = val_;
	}
	void Block256::rSpin0(void)
	{
		_vData[0x4] = _mm_xor_si128(_vData[0x0], _vData[0x4]);
		_vData[0x8] = _mm_xor_si128(_vData[0x0], _vData[0x8]);
		_vData[0xC] = _mm_xor_si128(_vData[0x0], _vData[0xC]);

		_vData[0x5] = _mm_xor_si128(_vData[0x1], _vData[0x5]);
		_vData[0x9] = _mm_xor_si128(_vData[0x1], _vData[0x9]);
		_vData[0xD] = _mm_xor_si128(_vData[0x1], _vData[0xD]);

		_vData[0x6] = _mm_xor_si128(_vData[0x2], _vData[0x6]);
		_vData[0xA] = _mm_xor_si128(_vData[0x2], _vData[0xA]);
		_vData[0xE] = _mm_xor_si128(_vData[0x2], _vData[0xE]);

		_vData[0x7] = _mm_xor_si128(_vData[0x3], _vData[0x7]);
		_vData[0xB] = _mm_xor_si128(_vData[0x3], _vData[0xB]);
		_vData[0xF] = _mm_xor_si128(_vData[0x3], _vData[0xF]);
	}
	void Block256::rSpin1(void)
	{
		__m128i val_ = _vData[4];

		_vData[0x4] = _vData[0x8];
		_vData[0x8] = _vData[0xC];
		_vData[0xC] = _mm_xor_si128(val_, _vData[0x0]);
		_vData[0x0] = val_;

		val_ = _vData[5];

		_vData[0x5] = _vData[0x9];
		_vData[0x9] = _vData[0xD];
		_vData[0xD] = _mm_xor_si128(val_, _vData[0x1]);
		_vData[0x1] = val_;

		val_ = _vData[6];

		_vData[0x6] = _vData[0xA];
		_vData[0xA] = _vData[0xE];
		_vData[0xE] = _mm_xor_si128(val_, _vData[0x2]);
		_vData[0x2] = val_;

		val_ = _vData[7];

		_vData[0x7] = _vData[0xB];
		_vData[0xB] = _vData[0xF];
		_vData[0xF] = _mm_xor_si128(val_, _vData[0x3]);
		_vData[0x3] = val_;
	}
	void Block256::rSpin2(void)
	{
		__m128i val_ = _vData[8];

		_vData[0x8] = _mm_xor_si128(val_, _vData[0x0]);
		_vData[0x0] = val_;

		val_ = _vData[9];

		_vData[0x9] = _mm_xor_si128(val_, _vData[0x1]);
		_vData[0x1] = val_;

		val_ = _vData[0xA];

		_vData[0xA] = _mm_xor_si128(val_, _vData[0x2]);
		_vData[0x2] = val_;

		val_ = _vData[0xB];

		_vData[0xB] = _mm_xor_si128(val_, _vData[0x3]);
		_vData[0x3] = val_;

		val_ = _vData[0xC];

		_vData[0xC] = _mm_xor_si128(_vData[0x0], _vData[0x4]);
		_vData[0x4] = val_;

		val_ = _vData[0xD];

		_vData[0xD] = _mm_xor_si128(_vData[0x1], _vData[0x5]);
		_vData[0x5] = val_;

		val_ = _vData[0xE];

		_vData[0xE] = _mm_xor_si128(_vData[0x2], _vData[0x6]);
		_vData[0x6] = val_;

		val_ = _vData[0xF];

		_vData[0xF] = _mm_xor_si128(_vData[0x3], _vData[0x7]);
		_vData[0x7] = val_;


	}
	void Block256::rSpin3(void)
	{
		__m128i val_ = _vData[0xC];

		_vData[0xC] = _mm_xor_si128(val_, _vData[0x8]);
		_vData[0x8] = _mm_xor_si128(val_, _vData[0x4]);
		_vData[0x4] = _mm_xor_si128(val_, _vData[0x0]);
		_vData[0x0] = val_;

		val_ = _vData[0xD];

		_vData[0xD] = _mm_xor_si128(val_, _vData[0x9]);
		_vData[0x9] = _mm_xor_si128(val_, _vData[0x5]);
		_vData[0x5] = _mm_xor_si128(val_, _vData[0x1]);
		_vData[0x1] = val_;

		val_ = _vData[0xE];

		_vData[0xE] = _mm_xor_si128(val_, _vData[0xA]);
		_vData[0xA] = _mm_xor_si128(val_, _vData[0x6]);
		_vData[0x6] = _mm_xor_si128(val_, _vData[0x2]);
		_vData[0x2] = val_;

		val_ = _vData[0xF];

		_vData[0xF] = _mm_xor_si128(val_, _vData[0xB]);
		_vData[0xB] = _mm_xor_si128(val_, _vData[0x7]);
		_vData[0x7] = _mm_xor_si128(val_, _vData[0x3]);
		_vData[0x3] = val_;
	}

	void Block256::Forward(const char * key)
	{
		for(char c = *(key++); c != 0; c = *(key++))
		{
			uint32 amnt0 =	c & PNWL_MASK1;
			uint32 amnt1 = (c & PNWL_MASK2 ) >> 2;
			uint32 amnt2 = (c & PNWL_MASK3 ) >> 4;
			uint32 amnt3 = (c & PNWL_MASK4 ) >> 6;

			#pragma region BLOCK4

			for(int i = 0; i < 64; i+=4)
			{
				_Data[i] =		ROTATE_LEFT( _Data[i] ,		MAGIC[amnt3][0] );
				_Data[i + 1] =	ROTATE_LEFT( _Data[i + 1] ,	MAGIC[amnt3][1] );
				_Data[i + 2] =	ROTATE_LEFT( _Data[i + 2] ,	MAGIC[amnt3][2] );
				_Data[i + 3] =	ROTATE_LEFT( _Data[i + 3] ,	MAGIC[amnt3][3] );
			}
			#pragma endregion

			#pragma region BLOCK16
			switch (amnt0)
			{
			case 0:
				for(int i = 0; i < 16; i++)
					_Block16[i].Spin0();
				break;
			case 1:
				for(int i = 0; i < 16; i++)
					_Block16[i].Spin1();
				break;
			case 2:
				for(int i = 0; i < 16; i++)
					_Block16[i].Spin2();
				break;
			case 3:
				for(int i = 0; i < 16; i++)
					_Block16[i].Spin3();
				break;
			}
			#pragma endregion

			#pragma region BLOCK64
			switch (amnt1)
			{
			case 0:
				Blocks[0].Spin0();
				Blocks[1].Spin0();
				Blocks[2].Spin0();
				Blocks[3].Spin0();
				break;
			case 1:
				Blocks[0].Spin1();
				Blocks[1].Spin1();
				Blocks[2].Spin1();
				Blocks[3].Spin1();
				break;
			case 2:
				Blocks[0].Spin2();
				Blocks[1].Spin2();
				Blocks[2].Spin2();
				Blocks[3].Spin2();
				break;
			case 3:
				Blocks[0].Spin3();
				Blocks[1].Spin3();
				Blocks[2].Spin3();
				Blocks[3].Spin3();
				break;
			}
			#pragma endregion

			#pragma region BLOCK256
			switch (amnt2)
			{
			case 0:
				Spin0();
				break;
			case 1:
				Spin1();
				break;
			case 2:
				Spin2();
				break;
			case 3:
				Spin3();
				break;
			}
			#pragma endregion

		}

	}

	// Expects the key to already have been reversed
	void Block256::Reverse(const char * rKey)
	{
		for(char c = *(rKey++); c != 0; c = *(rKey++))
		{
			uint32 amnt0 =	c & PNWL_MASK1;
			uint32 amnt1 = (c & PNWL_MASK2 ) >> 2;
			uint32 amnt2 = (c & PNWL_MASK3 ) >> 4;
			uint32 amnt3 = (c & PNWL_MASK4 ) >> 6;

			#pragma region BLOCK256
			switch (amnt2)
			{
			case 0:
				rSpin0();
				break;
			case 1:
				rSpin1();
				break;
			case 2:
				rSpin2();
				break;
			case 3:
				rSpin3();
				break;
			}
			#pragma endregion

			#pragma region BLOCK64
			switch (amnt1)
			{
			case 0:
				Blocks[0].rSpin0();
				Blocks[1].rSpin0();
				Blocks[2].rSpin0();
				Blocks[3].rSpin0();
				break;
			case 1:
				Blocks[0].rSpin1();
				Blocks[1].rSpin1();
				Blocks[2].rSpin1();
				Blocks[3].rSpin1();
				break;
			case 2:
				Blocks[0].rSpin2();
				Blocks[1].rSpin2();
				Blocks[2].rSpin2();
				Blocks[3].rSpin2();
				break;
			case 3:
				Blocks[0].rSpin3();
				Blocks[1].rSpin3();
				Blocks[2].rSpin3();
				Blocks[3].rSpin3();
				break;
			}
			#pragma endregion

			#pragma region BLOCK16
			switch (amnt0)
			{
			case 0:
				for(int i = 0; i < 16; i++)
					_Block16[i].rSpin0();
				break;
			case 1:
				for(int i = 0; i < 16; i++)
					_Block16[i].rSpin1();
				break;
			case 2:
				for(int i = 0; i < 16; i++)
					_Block16[i].rSpin2();
				break;
			case 3:
				for(int i = 0; i < 16; i++)
					_Block16[i].rSpin3();
				break;
			}
			#pragma endregion

			#pragma region BLOCK4
			for(int i = 0; i < 64; i+=4)
			{
				_Data[i] =		ROTATE_RIGHT( _Data[i] ,		MAGIC[amnt3][0] );
				_Data[i + 1] =	ROTATE_RIGHT( _Data[i + 1] ,	MAGIC[amnt3][1] );
				_Data[i + 2] =	ROTATE_RIGHT( _Data[i + 2] ,	MAGIC[amnt3][2] );
				_Data[i + 3] =	ROTATE_RIGHT( _Data[i + 3] ,	MAGIC[amnt3][3] );
			}
			#pragma endregion

		}
	}
}

And here is how you would encrypt some data:

PinWheel::Block256 * blocks = reinterpret_cast<PinWheel::Block256 *>(memblock);

for(int i = 0; i < blockcount; i++)
{
	blocks[i].Forward(password.data());
}

Now for some visual examples of PNWL encryption in action:
(For illustration purposes, these were created by encrypting the image portion of either 24bpp bitmaps or grayscale bitmaps)

 

Mona:

Spoiler

 

Simple Triangles:

Spoiler

 

Flower ( grayscale bitmap)

Spoiler

 

Where I can see improvement: PNWL was designed to make use of SIMD commands, however it can be done without them.

I don't have a processor that supports AVX2, but I predict a 30% boost if it was used, for example, on the Roll portion. Furthermore, multithreading could yield excellent returns

 

Attached is the source code for PNWL and a quick console app to test it out.

 

Thank you

Attached Files




#5017316 Model Format: Concept and Loading

Posted by LarryKing on 03 January 2013 - 06:37 PM

Hello all, I'm looking for some feedback regrading a model format and loader I've been working on over the past few days. I'm transitioning to C++, coming from C# and this has been a great exercise thus far, but I feel this post might end up a bit lengthy.
 
Some background as to what I wanted to accomplish with the file format:

  • The model can contain a large number of meshes.
  • Each mesh can have an arbitrary number of vertex buffers; not that a mesh should have 8, 16, or even 65,535 vertex streams, just that it could.
  • Each mesh can have an arbitrary number of textures; again, see above.
  • Each mesh and the model itself and should have some sort of bounding volume.
  • Fast loading; the model should use local pointers and require minimal live processing.
  • Possibility for compression

 
After some research, it appears that loading directly into memory and then adjusting local pointers seems to be one of the fastest ways to load an object. So the entire format reflects the objects that make up my model.
 
A model contains: a pointer to some ModelTextures, a pointer to the MeshHeaders, a pointer to the MeshCullDatas and a pointer to the MeshDrawDatas.
 
I've tried to implement some Data Oriented Design – a very different concept coming from C#. I've split up the meshes into arrays of data needed for different operations: Culling and Drawing.
 
Furthermore, I'm attempting to implement this as part of my content manager, so a ModelTexture, is really just a wrapper around a shared_ptr<Texture2D> that is retrieved from another content cache.
 
All right, so here is what the model format looks like, I made a diagram!
*sorry it's so tall...
 
ModelFormat_zps271dc65c.png
 
The actual files are exported from a tool I've written in C#. I'm loading Collada files via Assimp, calculating any user requested data and displaying the model via SharpDX in a WinForms app.
 
In the end, the model gets exported to the file by the exporter first writing each mesh data object to “virtual memory,” adjusting all of the pointers and finally using a binary writer to spit out the finished file.
 
Pretty straight forward for me as I'm used to C#, but the scary stuff happens when we get to actually loading the file in C++.
 
First I load the entire file with an ifstream into a char[]. Then I cast the char[] to a Model. Now I need to offset the local pointers so that the model will work in memory; however! I read somewhere that you can't add pointers in C++, only subtract them, but to offset local pointers you needed to add!
After much internet searching, I finally found an object ptrdiff_t that I could retrieve from a pointer, add to, and then cast back to a pointer. The question then became, “Is this legal, what I'm doing?” For a full day I pondered before quizzically deciding that it should? be legal. I mean how else would you offset pointers when you shouldn't just cast to an int?
The next problem arrived when I realized that I needed to somehow delete the model from memory as well. Again not sure, as I had casted a char[] to a model, if I could delete the model. I pretended I could and wrote the destructor. Miraculously it seemed to work! The “Memory” window in Visual Studio seemed to show that the object had successfully been deleted, although I'm still not sure if I need to call delete on the model's pointers as they weren't created with new.
 
So now, I have all this code for loading a model, but I'm not sure if it's legal, safe, or even sensible!
 
Enough talk though, here's the code for loading the model:
 

std::shared_ptr<Ruined::Graphics::Model> ModelLoader::Load(const std::string &name)
{
	Ruined::Graphics::Model * model;

	std::ifstream file (m_BaseDirectory + name, std::ios::in|std::ios::binary|std::ios::ate);
	if (file.is_open())
	{
		// Get the file's total size
		unsigned int size = file.tellg();
		// Create a char[] of the size to load the file into
		char* memblock = new char [size];
		// Seek to the beginning and read the file
		file.seekg (0, std::ios::beg);
		file.read (memblock, size);
		// Finally close the file
		file.close();

		// Cast the char[] to a Ruined::Graphics::Model pointer
		model = static_cast<Ruined::Graphics::Model *>((void*)memblock);

		// The location of the model in memory
		ptrdiff_t memOffset = (ptrdiff_t)model;

		// Offset the model's local pointers
		// Mesh Headers
		ptrdiff_t intOffset;
		model->MeshHeaders = (Ruined::Graphics::MeshHeader*)(memOffset + (ptrdiff_t)model->MeshHeaders);

		// Mesh Culling Datas
		// intOffset = (ptrdiff_t)model->MeshCullDatas;
		model->MeshCullDatas = (Ruined::Graphics::MeshCullData*)(memOffset + (ptrdiff_t)model->MeshCullDatas);

		// Mesh Drawing Datas
		// intOffset = (ptrdiff_t)model->MeshDrawDatas;
		model->MeshDrawDatas = (Ruined::Graphics::MeshDrawData*)(memOffset + (ptrdiff_t)model->MeshDrawDatas);

		// Model's Ruined::Graphics::ModelTexture pointer
		// intOffset = (ptrdiff_t)model->Textures;
		model->Textures = (Ruined::Graphics::ModelTexture*)(memOffset + (ptrdiff_t)model->Textures);
				
		// Load the model's textures
		for(int t = 0; t < model->TextureCount; t++)
		{
			// Offset TextureName pointers
			// intOffset = (ptrdiff_t)(model->Textures[t].TextureName);
			model->Textures[t].TextureName = (char*)(memOffset + (ptrdiff_t)(model->Textures[t].TextureName));
			// Load the texture
			model->Textures[t].TextureContent = p_TextureCache->Load(model->Textures[t].TextureName);
		}

		HRESULT hresult;
		Ruined::Graphics::MeshDrawData * tempMeshD = nullptr;
		for(int m = 0; m < model->MeshCount; m++)
		{

			// Build the buffers
			tempMeshD = &model->MeshDrawDatas[m];

			// Offset Index Buffer
			// intOffset = (ptrdiff_t)tempMeshD->IndexBuffer;
			tempMeshD->IndexBuffer = (ID3D11Buffer*)(memOffset + (ptrdiff_t)tempMeshD->IndexBuffer);

			// Offset Vertex Buffer
			// intOffset = (ptrdiff_t)tempMeshD->VertexBuffer;
			tempMeshD->VertexBuffers = (ID3D11Buffer**)(memOffset + (ptrdiff_t)tempMeshD->VertexBuffers);

			// Offset Strides
			// intOffset = (ptrdiff_t)tempMeshD->Strides;
			tempMeshD->Strides = (unsigned int*)(memOffset + (ptrdiff_t)tempMeshD->Strides);

			// Offset Resources
			intOffset = (ptrdiff_t)tempMeshD->Resources;
			tempMeshD->Resources = (ID3D11ShaderResourceView**)(memOffset + intOffset);

			// Convert Resources * to unsigned int *
			unsigned int * index = (unsigned int*)(memOffset + intOffset);

			// Assign to the poingters from the model's textures
			for(int t = 0; t < model->MeshHeaders[m].ResourceCount; t++)		
				tempMeshD->Resources[t] = model->Textures[index[t]].TextureContent.get()->p_shaderResourceView;

			// Desc for the index buffer
			D3D11_BUFFER_DESC indexBufferDesc;		
			indexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
			indexBufferDesc.ByteWidth = tempMeshD->IndexCount * (tempMeshD->IndexFormat == DXGI_FORMAT_R16_UINT ? sizeof(unsigned short) : sizeof(unsigned int));
			indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
			indexBufferDesc.CPUAccessFlags = 0;
			indexBufferDesc.MiscFlags = 0;

			D3D11_SUBRESOURCE_DATA indexData;
			indexData.pSysMem = tempMeshD->IndexBuffer;
			indexData.SysMemPitch = 0;
			indexData.SysMemSlicePitch = 0;

			hresult = p_Graphics->GetDevice()->CreateBuffer(&indexBufferDesc, &indexData, &tempMeshD->IndexBuffer);
			if(FAILED(hresult))
			{
				OutputDebugStringA("Failed to create Index Buffer");
			}

						
			// Create each vertex buffer
			Ruined::Graphics::MeshBufferDesc * tempDesc = (Ruined::Graphics::MeshBufferDesc*)(tempMeshD->VertexBuffers);
			for(unsigned int b = 0; b < tempMeshD->VertexBufferCount; b++)
			{
							
				// Each buffer gets a desc
				D3D11_BUFFER_DESC bufferDesc;
				bufferDesc.Usage = D3D11_USAGE_DEFAULT;
				bufferDesc.ByteWidth = tempDesc[b].BufferWidth;
				bufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
				bufferDesc.CPUAccessFlags = 0;
				bufferDesc.MiscFlags = 0;

				// Each buffer needs a subresource data
				D3D11_SUBRESOURCE_DATA subData;
				subData.pSysMem = (void*)((ptrdiff_t)tempDesc[b].Data + memOffset);
				subData.SysMemPitch = 0;
				subData.SysMemSlicePitch = 0;

				hresult = p_Graphics->GetDevice()->CreateBuffer(&bufferDesc, &subData, &(tempMeshD->VertexBuffers[b]));
				if(FAILED(hresult))
				{
					OutputDebugStringA("Failed to create Vertex Buffer");
				}
			}

						
		}

					
	}
	else
	{
		std::string errorMsg = "Failed to load Model: ";
		errorMsg += m_BaseDirectory + name + "\n";
		OutputDebugStringA(errorMsg.c_str());

		// Set model equal to something
		model = new Ruined::Graphics::Model();
	}

	std::shared_ptr<Ruined::Graphics::Model> sModel(model);

	return sModel;
}

So that makes a little more sense, here is Model.h:

#include "MeshDrawData.h"
#include "ModelTexture.h"

#include <memory>

namespace Ruined
{
	namespace Graphics
	{
		// Combine Model Header and Model Pointers
		struct __declspec(dllexport) Model
		{

		public:
			// Header
			unsigned short _FILETYPE;
			unsigned short _FILEVERSION;
			unsigned int ModelSize;
		
			unsigned short TextureCount;
			unsigned short MeshCount;
			DirectX::BoundingBox BoundingBox;

			// Pointers
			ModelTexture * Textures;
			MeshHeader * MeshHeaders;
			MeshCullData * MeshCullDatas;
			MeshDrawData * MeshDrawDatas;

		public:
			Model(void);
			
			~Model(void);
		};
	}
}

#endif

 
Here is ModelTexture.h:

#pragma once
#ifndef _MODELTEXTURE_H
#define _MODELTEXTURE_H_ 

// Includes //
#include "Texture2D.h"

#include <memory>

namespace Ruined
{
	namespace Graphics
	{
		struct __declspec(dllexport) ModelTexture
		{
		public:
			char* TextureName;
			std::shared_ptr<Texture2D> TextureContent;
		};
	}
}

#endif

 
 
Here are the mesh objects:

#ifndef _MESHCULLDATA_H_
#define _MESHCULLDATA_H_

#include <DirectXCollision.h>

namespace Ruined
{
	namespace Graphics
	{
		struct __declspec(dllexport) MeshCullData
		{
		public:
			DirectX::BoundingBox BoundingBox;
		};
	}
}

#endif
#ifndef _MESHHEADER_H_
#define _MESHHEADER_H_

#include "MeshBufferDesc.h"

namespace Ruined
{
	namespace Graphics
	{
		enum MeshMask : unsigned short
		{
			Undefined       = 0x0000,
			Texture         = 0x0001,
			UVCoord         = 0x0002,
			Color           = 0x0004,
			Normal          = 0x0008,
			Tangent         = 0x0010,
			Binormal        = 0x0020,
			BoneIndices     = 0x0040,
			BoneWeights     = 0x0080,
			SplitBuffers    = 0x0100,
			AlphaBlend      = 0x4000
		};

		struct __declspec(dllexport) MeshHeader
		{
			MeshMask Mask;
			unsigned char UVStreamCount;
			unsigned char ColorStreamCount;
			unsigned short ResourceCount;
		};
	}
}

#endif
#ifndef _MESHDRAWDATA_H_
#define _MESHDRAWDATA_H_

#include <d3d11.h>

namespace Ruined
{
	namespace Graphics
	{
		struct __declspec(dllexport) MeshDrawData
		{
		public:
			unsigned int VertexBufferCount;
			ID3D11Buffer ** VertexBuffers;
			unsigned int * Strides;
			ID3D11Buffer * IndexBuffer;
			DXGI_FORMAT IndexFormat;
			ID3D11ShaderResourceView ** Resources;
			unsigned int IndexCount;
		};
	}
}

#endif
#ifndef _MESHBUFFERDESC_H_
#define _MESHBUFFERDESC_H_


namespace Ruined
{
	namespace Graphics
	{
		// Used for creating vertex buffers.
		// Only accessed at load time.
		struct __declspec(dllexport) MeshBufferDesc
		{
		public:
			unsigned int BufferWidth;
			void * Data;
		};
	}
}

#endif

Lastly here is the Model destructor:

Model::~Model(void)
{
	if(Textures != nullptr)
	{
		for(int t = 0; t < TextureCount; t++)
		{
			Textures[t].TextureContent.reset();
			Textures[t].TextureName = nullptr;
		}
	}

	if(MeshDrawDatas != nullptr)
	{
		for(int m = 0; m < MeshCount; m++)
		{
			if(MeshDrawDatas[m].IndexBuffer != nullptr)
			{
				MeshDrawDatas[m].IndexBuffer->Release();
				MeshDrawDatas[m].IndexBuffer = nullptr;
			}

			for(int v = 0; v < MeshDrawDatas[m].VertexBufferCount; v++)
			{
				if(MeshDrawDatas[m].VertexBuffers[v] != nullptr)
				{
					MeshDrawDatas[m].VertexBuffers[v]->Release();
					MeshDrawDatas[m].VertexBuffers[v] = nullptr;
				}
			}
		}
	}
}

 
Holly cow! That's one long post.
If anyone could take the time to read this, even just part of it, and lend me a hand, I would be very thankful.




#4950584 Point-light normal issue

Posted by LarryKing on 19 June 2012 - 08:31 AM

Well, I feel like a complete idiot!
I found the error... I wasn't sending the normal-buffer to the graphics card!

I have no idea how I missed that :P

Oh, well. Live and learn!

-Thank you to everyone who looked this over


PARTNERS