I can't always do that (it's dependent on endianness), but yeah looks like shifting by 32 isn't defined behaviour. I've got around it using a 'special condition' which I hate. Here's the whole function, I need to work on the logic and try to remove the condition.
// ------------------------------------------------------
// copy memory bit stream from source to destination.
// ------------------------------------------------------
// u8* out : destination begining of stream.
// u32 outptr : bit pointer into the destination bitstream.
// u32 outlimit : destination container size in bits.
// const u8* src : source begining of stream.
// u32 srcptr : bit pointer into the source bitstream.
// u32 srclen : number of bits to copy from source to destination.
// return bool : success or failure (overflow).
//
// source bits :
// byte 1 byte 2 byte 3 byte 4 byte 5
// +--------+--------+--------+--------+--------+
// |........|......DE|FGHIJKLM|NOPQRST.|........|
// +--------+--------+--------+--------+--------+
//
// destination bits :
// byte 1 byte 2 byte 3 byte 4 byte 5
// +--------+--------+--------+--------+--------+
// |ABC.....|........|........|........|........|
// +--------+--------+--------+--------+--------+
//
// result :
// byte 1 byte 2 byte 3 byte 4 byte 5
// +--------+--------+--------+--------+--------+
// |ABCDEFGH|IJKLMNOP|QRST....|........|........|
// +--------+--------+--------+--------+--------+
// ------------------------------------------------------
bool bit_copy_32(void* out, u32 outptr, u32 outlimit, const void* src, u32 srcptr, u32 srclen)
{
NE_ASSERT(((u32)out & 3) == 0);
NE_ASSERT(((u32)src & 3) == 0);
NE_ASSERT((outlimit & 3) == 0);
const u32* src32 = (const u32*) src;
u32* out32 = (u32*) out;
if(outptr + srclen > outlimit)
return false;
// word-aligned copy. Use faster word / word copy mechanism.
if( (srcptr & 31) == 0 && (outptr & 31) == 0)
{
u32 srcword = (srcptr >> 5); // srouce word address.
u32 outword = (outptr >> 5); // destination word address.
u32 words = (srclen >> 5); // number of words to copy.
memcpy(out32 + outword, src32 + srcword, words * 4); // copy words.
// move to the end of the words to copy remaining bits.
u32 bits = (words << 5);
srclen -= bits;
outptr += bits;
srcptr += bits;
}
// bit copy.
while(srclen > 0)
{
// extract portions of words of similar size from the source.
u32 srcword = (srcptr >> 5); // source word address.
u32 outword = (outptr >> 5); // destination word address.
u32 srcbitp = (srcptr & 31); // source bit address.
u32 outbitp = (outptr & 31); // destination bit address.
u32 outarea = (32 - outbitp); // number of bits we need to clear at the destination to override with source bits.
// copy bits from source word to destination word.
#if(ENDIAN_ORDER == ENDIAN_LITTLE)
{
if(outarea == 32)
{
out32[outword] = ((src32[srcword] >> srcbitp) << outbitp); // paste bits from source word.
}
else
{
out32[outword] = ((out32[outword] << outarea) >> outarea); // clear area in destination word.
out32[outword] |= ((src32[srcword] >> srcbitp) << outbitp); // paste bits from source word.
}
}
#elif(ENDIAN_ORDER == ENDIAN_BIG)
{
if(outarea == 32)
{
out32[outword] = ((src32[srcword] << srcbitp) >> outbitp); // paste bits from source word.
}
else
{
out32[outword] = ((out32[outword] >> outarea) << outarea); // clear area in destination word.
out32[outword] |= ((src32[srcword] << srcbitp) >> outbitp); // paste bits from source word.
}
}
#else
{
NE_ERROR("unknown endianness");
return false;
}
#endif
// how many bits we copied from source to destination.
u32 srcarea = (32 - srcbitp); // number of bits we copied from source.
u32 cpycount = min3(srcarea, outarea, srclen); // smallest portion we copied.
// move to next bits in streams.
srclen -= cpycount;
srcptr += cpycount;
outptr += cpycount;
}
return true;
}