I don't have any Rijndael at hand with me right now, but this is a working version of Rijndael and you can just add some stuff to get the expanded key.

That looks so different from my implementation that I don't know where to begin.

The expanded key material for encryption and decryption is the same afaik (unless there exists some strange variant that does not) so if your encryption routines work then it suggests your decryption routines are wrong. Perhaps you could post them so we can take a look.

My implementation, and the implementation I used as a reference, uses two key expansion functions per key size, one for encryption, one for decryption.

My 128 bit pair of functions are:

static void expandEncKey128(ui8 * k, ui8 * rc){
k[0] ^= sbox[k[13]] ^ *rc;
k[1] ^= sbox[k[14]];
k[2] ^= sbox[k[15]];
k[3] ^= sbox[k[12]];
*rc = ((*rc << 1) ^ (((*rc >> 7) & 1) * 0x1bu));
for(int i = 4; i < 16; i += 4){
k[i + 0] ^= k[i - 4];
k[i + 1] ^= k[i - 3];
k[i + 2] ^= k[i - 2];
k[i + 3] ^= k[i - 1];
}
}
static void expandDecKey128(ui8 * k, ui8 * rc){
for(int i = 12; i > 0; i -= 4){
k[i + 0] ^= k[i - 4];
k[i + 1] ^= k[i - 3];
k[i + 2] ^= k[i - 2];
k[i + 3] ^= k[i - 1];
}
*rc = (*rc >> 1) ^ ((*rc & 1u) * 0x8du);
k[0] ^= sbox[k[13]] ^ *rc;
k[1] ^= sbox[k[14]];
k[2] ^= sbox[k[15]];
k[3] ^= sbox[k[12]];
}

They combine a lot of steps into few operations; my implementation is byte-oriented, if it isn't obvious. From my perspective, these are inverses of each other. The (obfuscated) encryption routine looks like this:

static ui8 gfXTime(ui8 x){
return ((x << 1) ^ (((x >> 7) & 1) * 0x1bu));
}
static ui8 rconInv(ui8 rc){
return (rc >> 1) ^ ((rc & 1u) * 0x8du);
}
/*
* 0 4 8 12
* 1 5 9 13
* 2 6 10 14
* 3 7 11 15
*/
static void subBytes(ui8 * a){
for(int i = 16; i--;)
a[i] = sbox[a[i]];
}
static void subBytesInv(ui8 * a){
for(int i = 16; i--;)
a[i] = sboxInv[a[i]];
}
static void addRoundKey(ui8 * a, ui8 * key){
for(int i = 16; i--;)
a[i] ^= key[i];
}
static void addRoundKeyCopy(ui8 * buf, ui8 * key, ui8 * copyKey){
for(int i = 16; i--;){
copyKey[i] = key[i];
buf[i] ^= key[i];
copyKey[i + 16] = key[i + 16];
}
}
static void shiftRows(ui8 * a){
unsigned int t1, t2;
/* 1 -> 13 -> 9 -> 5 -> 1 */
t1 = a[1];
a[1] = a[5];
a[5] = a[9];
a[9] = a[13];
a[13] = t1;
/* 2 -> 10 -> 2 */
t1 = a[10];
a[10] = a[2];
a[2] = t1;
/* 3 -> 7 -> 11 -> 15 -> 3 */
t2 = a[3];
a[3] = a[15];
a[15] = a[11];
a[11] = a[7];
a[7] = t2;
/* 14 -> 6 -> 14 */
t2 = a[6];
a[6] = a[14];
a[14] = t2;
}
static void shiftRowsInv(ui8 * a){
unsigned int t1, t2;
/* 1 <- 13 <- 9 <- 5 <- 1 */
t1 = a[1];
a[1] = a[13];
a[13] = a[9];
a[9] = a[5];
a[5] = t1;
/* 2 <- 10 <- 2 */
t1 = a[2];
a[2] = a[10];
a[10] = t1;
/* 3 <- 7 <- 11 <- 15 <- 3 */
t2 = a[3];
a[3] = a[7];
a[7] = a[11];
a[11] = a[15];
a[15] = t2;
/* 6 <- 14 <- 6 */
t2 = a[6];
a[6] = a[14];
a[14] = t2;
}
static void mixColumns(ui8 * r){
ui8 a[4];
ui8 b[4];
for(int i = 0; i < 16; i += 4){
a[0] = r[i];
a[1] = r[i + 1];
a[2] = r[i + 2];
a[3] = r[i + 3];
b[0] = gfXTime(r[i]);
b[1] = gfXTime(r[i + 1]);
b[2] = gfXTime(r[i + 2]);
b[3] = gfXTime(r[i + 3]);
r[i] = b[0] ^ a[3] ^ a[2] ^ b[1] ^ a[1];
r[i + 1] = b[1] ^ a[0] ^ a[3] ^ b[2] ^ a[2];
r[i + 2] = b[2] ^ a[1] ^ a[0] ^ b[3] ^ a[3];
r[i + 3] = b[3] ^ a[2] ^ a[1] ^ b[0] ^ a[0];
}
}
static void mixColumnsInv(ui8 * r){
ui32 a, b, c, d, e, x, y, z;
for(int i = 0; i < 16; i += 4){
a = r[i];
b = r[i + 1];
c = r[i + 2];
d = r[i + 3];
e = a ^ b ^ c ^ d;
z = gfXTime(e);
x = e ^ gfXTime(gfXTime(z ^ a ^ c)); y = e ^ gfXTime(gfXTime(z ^ b ^ d));
r[i] ^= x ^ gfXTime(a ^ b);
r[i + 1] ^= y ^ gfXTime(b ^ c);
r[i + 2] ^= x ^ gfXTime(c ^ d);
r[i + 3] ^= y ^ gfXTime(d ^ a);
}
}

void encrypt(ui8 * out, const ui8 * in){
ui8 state[16];
for(sizeType i = 16; i--;)
state[i] = in[i];
ui8 rcon = 1;
addRoundKeyCopy(state, encryptKey_, key_);
for(sizeType i = 1; i < 10; ++i){
subBytes(state);
shiftRows(state);
mixColumns(state);
expandEncKey(key_, &rcon);
addRoundKey(state, key_);
subBytes(state);
shiftRows(state);
expandEncKey(key_, &rcon);
addRoundKey(state, key_);
for(sizeType i = 16; i--;)
out[i] = state[i];
}
for(sizeType i = 16; i--;)
state[i] = 0;
}

This encryption method produces results that align with published test vectors. However, this decryption does not work, whether it is this decryption function doesn't work, or the key expansion doesn't work, or something.

void decrypt(void * out, const void * in){
ui8 state[16];
for(sizeType i = 16; i--;)
state[i] = in[i];
ui8 rcon = 0x6c;
addRoundKeyCopy(state, decryptKey_, key_);
for(sizeType i = 1; i < 10; ++i){
shiftRowsInv(state);
subBytesInv(state);
expandDecKey(key_, &rcon);
addRoundKey(state, key_);
mixColumnsInv(state);
}
shiftRowsInv(state);
subBytesInv(state);
addRoundKey(state, key_);
for(sizeType i = 16; i--;)
out[i] = state[i];
for(sizeType i = 16; i--;)
state[i] = 0;
}

encryptKey_ and decryptKey_ are both initialized to the bytes of the key by the caller.

**Edited by Ectara, 30 March 2014 - 10:38 PM.**