Jump to content

  • Log In with Google      Sign In   
  • Create Account

#ActualCodarki

Posted 31 December 2012 - 09:29 PM

<p>*edit: deleted bunch of code, becouse its most likely wrong and forum formatting is so ugly.</p>

#1Codarki

Posted 31 December 2012 - 09:18 PM

and as example for the test for basic building blocks for cache friendly memory indices, 64 element list (of integers of numbers 0-63) which fits in one 64bit register. Only usable in 64 bit builds though (if I remember correctly). Also very much tied to single cpu architecture, and most likely have some bugs.

 

#ifndef PT_FIXED_INT_LIST_64_H

#define PT_FIXED_INT_LIST_64_H


#include "popcount.h"

#include <cassert>


namespace pt {


#define PT_INLINE __forceinline

#define PT_ASSUME(e) (__assume(e))


typedef signed __int64 int64;

typedef unsigned char uint8;

typedef unsigned short uint16;

typedef unsigned int uint32;

typedef unsigned __int64 uint64;


/// List of integers, fixed capacity of 64 elements.

///

/// Consumes 8 bytes of memory, each node is one bit. Whole list fits in a 64

/// bit register to enable compiler to apply heavy optimizations.

///

class fixed_int_list_64

{

public:

    typedef uint32 value_type;

    enum filled_type{ filled };

    class iterator;


    /// Constructs new empty list.

    fixed_int_list_64();

    /// Constructs new list filled with indices 0 to 31.

    fixed_int_list_64(filled_type);


    bool empty() const;

    bool full() const;

    value_type size() const;


    iterator begin();

    iterator end();

    //iterator iterator_at(uint32 index);


    void insert(value_type index);


    void insert_2(value_type start_index);

    void insert_4(value_type start_index);


    /// Returns true if next 2 elements from index are available.

    bool next_2_available(value_type index) const;

    /// Returns true if next 4 elements from index are available.

    bool next_4_available(value_type index) const;


    /// Returns iterator which is greater than or equal to \a index.

    iterator find_ge(value_type index) const;


    iterator erase(value_type index);

    iterator erase(iterator it);


    void erase_no_return_value(value_type index);


    void pop_front();

    void pop_back();


private: // data members

    uint64 m_data;

};


class fixed_int_list_64::iterator

{

public:

    typedef uint32 value_type;


    iterator(uint32 index, uint64 const& data)

    : m_data(&data)

    , m_index(index)

    {

    }


    value_type operator*() const

    {

        return static_cast<value_type>(m_index);

    }

    iterator& operator++()

    {

        // Index of 64 is end iterator, and it will zero out the data at

        // shift.


        uint64 const data = *m_data;


        // Shift out one past the current bit towards LSB.

        // For some reason >> 64u doesnt clear MSB, so mask out MSB.

        uint64 shifted_data = (data >> (m_index + 1u))

            & ~uint64(0x8000000000000000);


        // Count zeros to skip.

        uint32 const skip_count = trailing_zero_count(shifted_data);


        // m_index = (skip_count < 64) ? index + 1 + skip_count : 64;


        uint32 const next_index = m_index + 1u + skip_count;

        int32 const a = 63 - skip_count;


        // index = (a >= 0) ? next_index : 32;

        m_index = select(a, next_index, 64);


        return *this;

    }


    bool operator!=(iterator const& other) const

    {

        return m_index != other.m_index

            || m_data != other.m_data;

    }

    bool operator==(iterator const& other) const

    {

        return m_index == other.m_index

            && m_data == other.m_data;

    }

    bool is_end() const

    {

        return m_index == 64u;

    }


private: // data members

    uint64 const* __restrict m_data;

    uint32 m_index;

};


// .inl


inline fixed_int_list_64::fixed_int_list_64()

: m_data(0)

{

}


inline fixed_int_list_64::fixed_int_list_64(filled_type)

: m_data(~static_cast<uint64>(0))

{

}


inline bool fixed_int_list_64::empty() const

{

    return m_data == 0;

}


inline bool fixed_int_list_64::full() const

{

    return m_data == ~static_cast<uint64>(0);

}


inline uint32 fixed_int_list_64::size() const

{

    return popcount_wiki_3(m_data);

}


inline fixed_int_list_64::iterator fixed_int_list_64::begin()

{

    uint32 const first_bit_index = trailing_zero_count(m_data);

    return iterator(first_bit_index, m_data);

}


inline fixed_int_list_64::iterator fixed_int_list_64::end()

{

    return iterator(64, m_data);

}


//iterator fixed_int_list_32::iterator_at(uint32 index)

//{

//}


inline void fixed_int_list_64::insert(uint32 index)

{

    PT_ASSUME(index < 64);

    m_data |= uint64(1) << index; // set bit

}


inline void fixed_int_list_64::insert_2(value_type start_index)

{

    PT_ASSUME(start_index < 63);

    m_data |= uint64(3) << start_index;

}


inline void fixed_int_list_64::insert_4(value_type start_index)

{

    PT_ASSUME(start_index < 61);

    m_data |= uint64(15) << start_index;

}


inline bool fixed_int_list_64::next_2_available(value_type index) const

{

    // return true if two bits are set

    return ((m_data >> index) & 3u) == 3u;

}


inline bool fixed_int_list_64::next_4_available(value_type index) const

{

    // return true if four bits are set

    return ((m_data >> index) & 15u) == 15u;

}


/// Returns iterator which is greater than or equal to \a index.

inline fixed_int_list_64::iterator fixed_int_list_64::find_ge(uint32 index) const

{

    assert(index <= 64);

    PT_ASSUME(index <= 64);


    // on an i386 "x shr/shl 32" = "x"

    // ie m_data >> 32 must have msb masked out.


    uint64 const data = m_data;

    uint64 const mask = ~static_cast<uint64>(0) >> (index - 1);

    uint64 const masked_bits = (data >> index) & mask;


    uint32 const first_bit_index = trailing_zero_count(masked_bits);

    int32 const next_index = index + first_bit_index;

    uint32 const clamped_next_index = branchless_min(next_index, int32(64));

    return iterator(clamped_next_index, m_data);

}


inline fixed_int_list_64::iterator fixed_int_list_64::erase(uint32 index)

{

    PT_ASSUME(index < 64);

    iterator const next = find_ge(index + 1);

    m_data &= ~(uint64(1) << index); // clear bit

    return next;

}


inline fixed_int_list_64::iterator fixed_int_list_64::erase(iterator it)

{

    uint32 const index = *it;

    ++it;


    m_data &= ~(uint64(1) << index); // clear bit

    return it;

}


inline void fixed_int_list_64::erase_no_return_value(value_type index)

{

    PT_ASSUME(index < 64);

    m_data &= ~(uint64(1) << index); // clear bit

}


inline void fixed_int_list_64::pop_front()

{

    uint32 const first_bit_index = trailing_zero_count(m_data);

    PT_ASSUME(first_bit_index < 64);

    m_data &= ~(uint64(1) << first_bit_index); // clear bit

}


inline void fixed_int_list_64::pop_back()

{

    uint64 const data = m_data;

    uint32 const last_bit_index = 63 - leading_zero_count(data);

    PT_ASSUME(last_bit_index < 64);

    m_data &= ~(uint64(1) << last_bit_index); // clear bit

}


///


template<int Count>

class fixed_int_list_64_pool

{

public:

    static size_t const list_count = Count;

    fixed_int_list_64_pool()

    {

        uintptr_t ptr = reinterpret_cast<uintptr_t>(&m_data[0]);

        ptr += alignment - 1;

        ptr -= ptr % alignment;

        m_aligned_data = reinterpret_cast<fixed_int_list_64* __restrict>(ptr);

    }


    fixed_int_list_64& at(size_t index)

    {

        assert(index < list_count);

        return m_aligned_data[index];

    }

private: // data members

    static size_t const alignment = 64;


    fixed_int_list_64 m_data[list_count + (alignment - 1)];

    fixed_int_list_64* __restrict m_aligned_data;

};


} // namespace pt


#endif

 


PARTNERS