I wish to make my project runnable under Windows (DX client, server) and Linux (server only).
Target architectures are x86, x86-64 recommended.
I have a few functions, mostly those related to fixed point arithmetics, which would benefit from inline assembly.
but MSVC and GCC are very different in terms of declaring an asm block, not to mention different in the asm syntax itself...
I was only able to think of the following workaround :
1) declare each asm-instruction to be used with a #define, probably 1 for each size/type of operands combinaison
2) use those macros one by one in the c++ code
3) pray
macro declaration example :
// COMPILER_MSVC or COMPILER_GCC are defined in some other header...
#if (defined COMPILER_MSVC)
#define ASM86_MOV_R32toR32(dst, src) __asm { mov dst, src }
#define ASM86_MOV_R32toV32(dst, src) __asm { mov dst, src }
#define ASM86_MOV_V32toR32(dst, src) __asm { mov dst, src }
// etc.
#elif (defined COMPILER_GCC)
#define ASM86_MOV_R32toR32(dst, src) asm("mov %%"#src#", %%"#dst#";" : : : "%"#dst)
#define ASM86_MOV_R32toV32(dst, src) asm("mov %%"#src#", %0;" : "=r"(dst) : : )
#define ASM86_MOV_V32toR32(dst, src) asm("mov %0, %%"#dst#";" : : "r"(src) : "%"#dst)
// etc.
#endif
usage example :
// int64 and uint64 are typedef'd in some other header...
// Returns the encoding nr of a value vr such as : vr=va/vb,
// given na and nb, respectively the encodings of two values va and vb.
// 'Scalar' type is 64b fixed point with 30b after point.
// => vr = nr.2^-30 ; va = na.2^-30 ; vb = nb.2^-30
inline int64 ScalarFixedImpl::div(int64 na, int64 nb) {
bool negA = na<0;
bool negB = nb<0;
#ifdef ARCH_64
uint64 absna = negA? static_cast<uint64>(-na) : static_cast<uint64>(na);
uint64 absnb = negB? static_cast<uint64>(-nb) : static_cast<uint64>(nb);
uint64 absResult;
ASM8664_XORQ_R64(rdx, rdx); // zeroes rdx
ASM8664_MOVQ_V64toR64(rax, absna); // puts na in rax => rdx:rax = 0:na
ASM8664_DIVQ_V64(absnb); // rdx = rdx:rax mod nb = na mod nb, i.e the first rest (r)
// rax = rdx:rax div nb = na div nb, i.e the first quotient (q1)
ASM8664_SHLQ_R64(rax, 30); // rax = q1<<30
ASM8664_MOVQ_R64toV64(absResult, rax); // stores q1<<30 in absResult
ASM8664_XORQ_R64(rax, rax); // zeroes rax => rdx:rax = r:0
ASM8664_DIVQ_V64(absnb); // rdx = rdx:rax mod nb = (r<<64) mod nb, i.e the second rest (discarded)
// rax = rdx:rax div nb = (r<<64) div nb, i.e the second quotient (q2)
ASM8664_SHRQ_R64(rax, 34); // rax = q2>>34 --- logical shift to leave zeroes in left bits
ASM8664_ORQ_R64toV64(absResult, rax); // absResult = absResult|rax = q1<<30 | q2>>34 = (uint64) q1.q2 with a 30b fixed point.
return (negA^negB)? -static_cast<int64>(absResult) : static_cast<int64>(absResult);
#else
// here comes the pain...
assert(false, "ScalarFixedImpl::div : not yet implemented for 32b");
#endif
}
So, here are my questions ^^ :
- Will that work ?
- Will both compilers be able to optimize, say, the use of the input variables (not the asm itself, of course) ?
- Do you think of another solution ?
I guess a few guys out there won't be able to refrain from answering to the unspoken question : "Is it worth the pain ?"
As I am a very kind person, here you are :
- Is it worth the pain ?
Thanks in advance







