Show differencesHistory of post edits
#ActualCodarki
Posted 31 December 2012 - 09:29 PM
<p>*edit: deleted bunch of code, becouse its most likely wrong and forum formatting is so ugly.</p>
#1Codarki
Posted 31 December 2012 - 09:18 PM
and as example for the test for basic building blocks for cache friendly memory indices, 64 element list (of integers of numbers 0-63) which fits in one 64bit register. Only usable in 64 bit builds though (if I remember correctly). Also very much tied to single cpu architecture, and most likely have some bugs.
#ifndef PT_FIXED_INT_LIST_64_H
#define PT_FIXED_INT_LIST_64_H
#include "popcount.h"
#include <cassert>
namespace pt {
#define PT_INLINE __forceinline
#define PT_ASSUME(e) (__assume(e))
typedef signed __int64 int64;
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned __int64 uint64;
/// List of integers, fixed capacity of 64 elements.
///
/// Consumes 8 bytes of memory, each node is one bit. Whole list fits in a 64
/// bit register to enable compiler to apply heavy optimizations.
///
class fixed_int_list_64
{
public:
typedef uint32 value_type;
enum filled_type{ filled };
class iterator;
/// Constructs new empty list.
fixed_int_list_64();
/// Constructs new list filled with indices 0 to 31.
fixed_int_list_64(filled_type);
bool empty() const;
bool full() const;
value_type size() const;
iterator begin();
iterator end();
//iterator iterator_at(uint32 index);
void insert(value_type index);
void insert_2(value_type start_index);
void insert_4(value_type start_index);
/// Returns true if next 2 elements from index are available.
bool next_2_available(value_type index) const;
/// Returns true if next 4 elements from index are available.
bool next_4_available(value_type index) const;
/// Returns iterator which is greater than or equal to \a index.
iterator find_ge(value_type index) const;
iterator erase(value_type index);
iterator erase(iterator it);
void erase_no_return_value(value_type index);
void pop_front();
void pop_back();
private: // data members
uint64 m_data;
};
class fixed_int_list_64::iterator
{
public:
typedef uint32 value_type;
iterator(uint32 index, uint64 const& data)
: m_data(&data)
, m_index(index)
{
}
value_type operator*() const
{
return static_cast<value_type>(m_index);
}
iterator& operator++()
{
// Index of 64 is end iterator, and it will zero out the data at
// shift.
uint64 const data = *m_data;
// Shift out one past the current bit towards LSB.
// For some reason >> 64u doesnt clear MSB, so mask out MSB.
uint64 shifted_data = (data >> (m_index + 1u))
& ~uint64(0x8000000000000000);
// Count zeros to skip.
uint32 const skip_count = trailing_zero_count(shifted_data);
// m_index = (skip_count < 64) ? index + 1 + skip_count : 64;
uint32 const next_index = m_index + 1u + skip_count;
int32 const a = 63 - skip_count;
// index = (a >= 0) ? next_index : 32;
m_index = select(a, next_index, 64);
return *this;
}
bool operator!=(iterator const& other) const
{
return m_index != other.m_index
|| m_data != other.m_data;
}
bool operator==(iterator const& other) const
{
return m_index == other.m_index
&& m_data == other.m_data;
}
bool is_end() const
{
return m_index == 64u;
}
private: // data members
uint64 const* __restrict m_data;
uint32 m_index;
};
// .inl
inline fixed_int_list_64::fixed_int_list_64()
: m_data(0)
{
}
inline fixed_int_list_64::fixed_int_list_64(filled_type)
: m_data(~static_cast<uint64>(0))
{
}
inline bool fixed_int_list_64::empty() const
{
return m_data == 0;
}
inline bool fixed_int_list_64::full() const
{
return m_data == ~static_cast<uint64>(0);
}
inline uint32 fixed_int_list_64::size() const
{
return popcount_wiki_3(m_data);
}
inline fixed_int_list_64::iterator fixed_int_list_64::begin()
{
uint32 const first_bit_index = trailing_zero_count(m_data);
return iterator(first_bit_index, m_data);
}
inline fixed_int_list_64::iterator fixed_int_list_64::end()
{
return iterator(64, m_data);
}
//iterator fixed_int_list_32::iterator_at(uint32 index)
//{
//}
inline void fixed_int_list_64::insert(uint32 index)
{
PT_ASSUME(index < 64);
m_data |= uint64(1) << index; // set bit
}
inline void fixed_int_list_64::insert_2(value_type start_index)
{
PT_ASSUME(start_index < 63);
m_data |= uint64(3) << start_index;
}
inline void fixed_int_list_64::insert_4(value_type start_index)
{
PT_ASSUME(start_index < 61);
m_data |= uint64(15) << start_index;
}
inline bool fixed_int_list_64::next_2_available(value_type index) const
{
// return true if two bits are set
return ((m_data >> index) & 3u) == 3u;
}
inline bool fixed_int_list_64::next_4_available(value_type index) const
{
// return true if four bits are set
return ((m_data >> index) & 15u) == 15u;
}
/// Returns iterator which is greater than or equal to \a index.
inline fixed_int_list_64::iterator fixed_int_list_64::find_ge(uint32 index) const
{
assert(index <= 64);
PT_ASSUME(index <= 64);
// on an i386 "x shr/shl 32" = "x"
// ie m_data >> 32 must have msb masked out.
uint64 const data = m_data;
uint64 const mask = ~static_cast<uint64>(0) >> (index - 1);
uint64 const masked_bits = (data >> index) & mask;
uint32 const first_bit_index = trailing_zero_count(masked_bits);
int32 const next_index = index + first_bit_index;
uint32 const clamped_next_index = branchless_min(next_index, int32(64));
return iterator(clamped_next_index, m_data);
}
inline fixed_int_list_64::iterator fixed_int_list_64::erase(uint32 index)
{
PT_ASSUME(index < 64);
iterator const next = find_ge(index + 1);
m_data &= ~(uint64(1) << index); // clear bit
return next;
}
inline fixed_int_list_64::iterator fixed_int_list_64::erase(iterator it)
{
uint32 const index = *it;
++it;
m_data &= ~(uint64(1) << index); // clear bit
return it;
}
inline void fixed_int_list_64::erase_no_return_value(value_type index)
{
PT_ASSUME(index < 64);
m_data &= ~(uint64(1) << index); // clear bit
}
inline void fixed_int_list_64::pop_front()
{
uint32 const first_bit_index = trailing_zero_count(m_data);
PT_ASSUME(first_bit_index < 64);
m_data &= ~(uint64(1) << first_bit_index); // clear bit
}
inline void fixed_int_list_64::pop_back()
{
uint64 const data = m_data;
uint32 const last_bit_index = 63 - leading_zero_count(data);
PT_ASSUME(last_bit_index < 64);
m_data &= ~(uint64(1) << last_bit_index); // clear bit
}
///
template<int Count>
class fixed_int_list_64_pool
{
public:
static size_t const list_count = Count;
fixed_int_list_64_pool()
{
uintptr_t ptr = reinterpret_cast<uintptr_t>(&m_data[0]);
ptr += alignment - 1;
ptr -= ptr % alignment;
m_aligned_data = reinterpret_cast<fixed_int_list_64* __restrict>(ptr);
}
fixed_int_list_64& at(size_t index)
{
assert(index < list_count);
return m_aligned_data[index];
}
private: // data members
static size_t const alignment = 64;
fixed_int_list_64 m_data[list_count + (alignment - 1)];
fixed_int_list_64* __restrict m_aligned_data;
};
} // namespace pt
#endif