very strange bug (when runing c basic arrays code)

Started by
39 comments, last by Washu 9 years, 10 months ago
No, it's not a matter of 'what values' it's a matter of YOU learning to use the RIGHT TOOLS to debug this.

Given the call stack, memory dumps and other information this would probably be trivial to work out - use the right tool and stop trying to get others to debug things blindly for you.

This kind of thing is PRECISELY why the tools exist.
(Same with using a real profiler to work out performance problems.)
Advertisement

this is assembly output

 
 
.file "tests.c"
.section .rdata,"dr"
LC0:
.ascii "yo1\0"
LC1:
.ascii "yo2\0"
LC2:
.ascii "yo3\0"
LC3:
.ascii "ay\0"
.text
.p2align 4,,15
.globl __Z14matrix_mul_ssev
.def __Z14matrix_mul_ssev; .scl 2; .type 32; .endef
__Z14matrix_mul_ssev:
pushl %ebx
xorl %ebx, %ebx
subl $168, %esp
movl $LC0, (%esp)
call __Z5alertPcz
movaps _modelRight_4x, %xmm0
movl $LC1, (%esp)
movaps %xmm0, 16(%esp)
call __Z5alertPcz
movaps _modelRight_4y, %xmm0
movl $LC2, (%esp)
movaps %xmm0, 32(%esp)
call __Z5alertPcz
movaps _modelRight_4z, %xmm0
movaps %xmm0, 48(%esp)
movaps _modelUp_4x, %xmm0
movaps %xmm0, 64(%esp)
movaps _modelUp_4y, %xmm0
movaps %xmm0, 80(%esp)
movaps _modelUp_4z, %xmm0
movaps %xmm0, 96(%esp)
movaps _modelDir_4x, %xmm0
movaps %xmm0, 112(%esp)
movaps _modelDir_4y, %xmm0
movaps %xmm0, 128(%esp)
movaps _modelDir_4z, %xmm0
movaps %xmm0, 144(%esp)
.p2align 4,,7
L2:
movl $LC3, (%esp)
call __Z5alertPcz
movaps _n_x(,%ebx,4), %xmm0
movaps _n_y(,%ebx,4), %xmm3
movaps 32(%esp), %xmm5
movaps 16(%esp), %xmm2
mulps %xmm3, %xmm5
movaps _n_z(,%ebx,4), %xmm4
mulps %xmm0, %xmm2
movaps 48(%esp), %xmm1
mulps %xmm4, %xmm1
movaps 80(%esp), %xmm6
addps %xmm5, %xmm2
mulps %xmm3, %xmm6
movaps 96(%esp), %xmm5
addps %xmm1, %xmm2
movaps 64(%esp), %xmm1
mulps %xmm4, %xmm5
movaps %xmm2, _normal_x(,%ebx,4)
mulps %xmm0, %xmm1
mulps 128(%esp), %xmm3
mulps 112(%esp), %xmm0
addps %xmm6, %xmm1
mulps 144(%esp), %xmm4
addps %xmm5, %xmm1
addps %xmm3, %xmm0
movaps %xmm1, _normal_y(,%ebx,4)
addps %xmm4, %xmm0
movaps %xmm0, _normal_z(,%ebx,4)
addl $4, %ebx
cmpl $100000, %ebx
jne L2
addl $168, %esp
popl %ebx
ret
.p2align 4,,15
.globl __Z5testsv
.def __Z5testsv; .scl 2; .type 32; .endef
__Z5testsv:
subl $28, %esp
call __Z14matrix_mul_ssev
movl $0, (%esp)
call _exit
.globl _modelDir_4z
.data
.align 16
_modelDir_4z:
.long 1067869798
.long 1067869798
.long 1067869798
.long 1067869798
.globl _modelDir_4y
.align 16
_modelDir_4y:
.long 1067030938
.long 1067030938
.long 1067030938
.long 1067030938
.globl _modelDir_4x
.align 16
_modelDir_4x:
.long 1066192077
.long 1066192077
.long 1066192077
.long 1066192077
.globl _modelUp_4z
.align 16
_modelUp_4z:
.long 1067869798
.long 1067869798
.long 1067869798
.long 1067869798
.globl _modelUp_4y
.align 16
_modelUp_4y:
.long 1067030938
.long 1067030938
.long 1067030938
.long 1067030938
.globl _modelUp_4x
.align 16
_modelUp_4x:
.long 1066192077
.long 1066192077
.long 1066192077
.long 1066192077
.globl _modelRight_4z
.align 16
_modelRight_4z:
.long 1067869798
.long 1067869798
.long 1067869798
.long 1067869798
.globl _modelRight_4y
.align 16
_modelRight_4y:
.long 1067030938
.long 1067030938
.long 1067030938
.long 1067030938
.globl _modelRight_4x
.align 16
_modelRight_4x:
.long 1066192077
.long 1066192077
.long 1066192077
.long 1066192077
.globl _n_z
.bss
.align 16
_n_z:
.space 400000
.globl _n_y
.align 16
_n_y:
.space 400000
.globl _n_x
.align 16
_n_x:
.space 400000
.globl _normal_z
.align 16
_normal_z:
.space 400000
.globl _normal_y
.align 16
_normal_y:
.space 400000
.globl _normal_x
.align 16
_normal_x:
.space 400000
.globl _modelDir_z
.data
.align 4
_modelDir_z:
.long 1067869798
.globl _modelDir_y
.align 4
_modelDir_y:
.long 1067030938
.globl _modelDir_x
.align 4
_modelDir_x:
.long 1066192077
.globl _modelUp_z
.align 4
_modelUp_z:
.long 1067869798
.globl _modelUp_y
.align 4
_modelUp_y:
.long 1067030938
.globl _modelUp_x
.align 4
_modelUp_x:
.long 1066192077
.globl _modelRight_z
.align 4
_modelRight_z:
.long 1067869798
.globl _modelRight_y
.align 4
_modelRight_y:
.long 1067030938
.globl _modelRight_x
.align 4
_modelRight_x:
.long 1066192077
.def __Z5alertPcz; .scl 2; .type 32; .endef
.def _exit; .scl 2; .type 32; .endef
 
for this code

void matrix_mul_sse()
{
 
  alert("yo1");
 
   __m128 mRx = _mm_load_ps((float*) &modelRight_4x);
 
  alert("yo2");
 
   __m128 mRy = _mm_load_ps((const float*) &modelRight_4y);
 
  alert("yo3");
 
   __m128 mRz = _mm_load_ps((const float*) &modelRight_4z);
   __m128 mUx = _mm_load_ps((const float*) &modelUp_4x);
   __m128 mUy = _mm_load_ps((const float*) &modelUp_4y);
   __m128 mUz = _mm_load_ps((const float*) &modelUp_4z);
   __m128 mDx = _mm_load_ps((const float*) &modelDir_4x);
   __m128 mDy = _mm_load_ps((const float*) &modelDir_4y);
   __m128 mDz = _mm_load_ps((const float*) &modelDir_4z);
 
  for(int i=0; i<100*1000; i+=4)
  {
 
    alert("ay");
 
   __m128 nx = _mm_load_ps( &n_x[i]);
   __m128 ny = _mm_load_ps( &n_y[i]);
   __m128 nz = _mm_load_ps( &n_z[i]);
 
   __m128 normalx = _mm_add_ps(_mm_add_ps(_mm_mul_ps(nx,mRx), _mm_mul_ps(ny,mRy)), _mm_mul_ps(nz,mRz));
   __m128 normaly = _mm_add_ps(_mm_add_ps(_mm_mul_ps(nx,mUx), _mm_mul_ps(ny,mUy)), _mm_mul_ps(nz,mUz));
   __m128 normalz = _mm_add_ps(_mm_add_ps(_mm_mul_ps(nx,mDx), _mm_mul_ps(ny,mDy)), _mm_mul_ps(nz,mDz));
 
    _mm_store_ps(  normal_x + i, normalx );
    _mm_store_ps(  normal_y + i, normaly );
    _mm_store_ps(  normal_z + i, normalz );
 
 }
 
}
 
void tests()
{
    matrix_mul_sse();
 
    exit(0);
 
}
 
 
this semms that compiler is reading the data to xmm then stores to the stack, maybe this stack is unaligned or something? i thought that it will keep it in registers - the thing of reading things to xmm0 and then storing this to local variables seem like a waste,
is the esp misalignment the reason?

*ps some bug makes code tags very annoying

PS this seems that really this is a reazon

__m128 mRx ...

values are unaligned on stack, how it could be such stupid?

is there some explanation for this?

there are only 8 xmm registers (16 in 64 bit mode).

At the beginning you are already calling _mm_load_ps 10 times.

The callstack is of interest because it maybe corrupted by some operation that you dont see at the moment. Maybe the stack-frame is corrupted or something else is wrong. The debugger maybe shows you other information, exceptions or the like. Because of the nature of optimized code noone will debug assembler output that the compiler generates after doing massive optimizations because the assembler statements are normally little correlate with the source code.

So dont expect anyone to look into a bunch of assembler statements to tell you whats wrong with it.

there are only 8 xmm registers (16 in 64 bit mode).

At the beginning you are already calling _mm_load_ps 10 times.

i forgot this, sad 8 is not much - but still there is this unalignment problem - this seem abnormal to not align this on stack

The callstack is of interest because it maybe corrupted by some operation that you dont see at the moment. Maybe the stack-frame is corrupted or something else is wrong. The debugger maybe shows you other information, exceptions or the like. Because of the nature of optimized code noone will debug assembler output that the compiler generates after doing massive optimizations because the assembler statements are normally little correlate with the source code.

So dont expect anyone to look into a bunch of assembler statements to tell you whats wrong with it.

not so unclear - i got more weak medium assembly skills but thi is not so hard

movaps _modelRight_4x, %xmm0
movaps %xmm0, 16(%esp)

movaps _modelRight_4y, %xmm0
movaps %xmm0, 32(%esp)

movaps _modelRight_4z, %xmm0
movaps
%xmm0, 48(%esp)

movaps _modelUp_4x, %xmm0
movaps
%xmm0, 64(%esp)

movaps _modelUp_4y, %xmm0
movaps
%xmm0, 80(%esp)

movaps _modelUp_4z, %xmm0
movaps
%xmm0, 96(%esp)

movaps _modelDir_4x, %xmm0
movaps
%xmm0, 112(%esp)

movaps _modelDir_4y, %xmm0
movaps
%xmm0, 128(%esp)

movaps _modelDir_4z, %xmm0
movaps
%xmm0, 144(%esp)

this was generated for

__m128 mRx = _mm_load_ps((float*) &modelRight_4x);

__m128 mRy = _mm_load_ps((const float*) &modelRight_4y);
__m128 mRz = _mm_load_ps((const float*) &modelRight_4z);
__m128 mUx = _mm_load_ps((const float*) &modelUp_4x);
__m128 mUy = _mm_load_ps((const float*) &modelUp_4y);
__m128 mUz = _mm_load_ps((const float*) &modelUp_4z);
__m128 mDx = _mm_load_ps((const float*) &modelDir_4x);
__m128 mDy = _mm_load_ps((const float*) &modelDir_4y);
__m128 mDz = _mm_load_ps((const float*) &modelDir_4z);

try to add a volatile local variable onto the stack and make same fancy declspecs to align it on 16Bytes.

Maybe the compiler places the locals behind the variable.

It would be important to tell the compiler not to remove the 16Byte aligned local variable.

i forgot this, sad 8 is not much - but still there is this unalignment problem - this seem abnormal to not align this on stack


Maybe it is aligned.
Maybe it isn't.

Do you know what would tell you?

A DEBUGGER

It REALLY isn't that hard...

i forgot this, sad 8 is not much - but still there is this unalignment problem - this seem abnormal to not align this on stack


Maybe it is aligned.
Maybe it isn't.

Do you know what would tell you?

A DEBUGGER

It REALLY isn't that hard...

i checked this in runtime - it was not aligned - do not propaganda me as this is boring me and is invaluable - do i propaganda you to use or not to use something ? that would be silly imo, this is yr choice i think - so use whatever you want but not bore me

- the point is different - why it is unaligned and how to align this?

This topic is closed to new replies.

Advertisement