Jump to content

  • Log In with Google      Sign In   
  • Create Account


#Actualtanzanite7

Posted 12 November 2012 - 10:47 AM

Not a burning problem - just curiosity:

Using "static std::function<bool(int64u)> fnAdd = [&amp;](int64u tag)->bool { ... fnAdd(...) ... }" produces the suspected/expected, but unnecessary, inline initialization / shutdown code + other pointless overhead:
0000000140021BA2  mov		 eax,dword ptr [$S2 (0141678770h)] // static initialization check overhead ...
0000000140021BA8  and		 eax,1 
0000000140021BAB  test	    eax,eax 
0000000140021BAD  jne		 ResGlShader::update+85h (0140021BF5h) // ... end of check.
0000000140021BAF  mov		 eax,dword ptr [$S2 (0141678770h)] // one time overhead ...
0000000140021BB5  or		  eax,1
0000000140021BB8  mov		 dword ptr [$S2 (0141678770h)],eax // this whole flag check/update looks completely bonkers - i suspect some crazy cache optimization (bit-packing multiple flags into one 32bit memory location?).
// capture code ... all i need is "this" and one local ("valid"), but it stores also what i am using via this - probably for performance reasons.
0000000140021BBE  lea		 r9,[valid] 
0000000140021BC3  mov		 r8,qword ptr [this] 
0000000140021BCB  lea		 rdx,[sources] 
0000000140021BD0  lea		 rcx,[rsp+60h] 
// the storing part is moved elsewhere (over a dozen loads and stores) - probably for instruction cache reasons (it should have moved most of the leading garbage there too though)
0000000140021BD5  call	    <lambda_ccd461a2e723c170260c9697d9a65f88>::<lambda_ccd461a2e723c170260c9697d9a65f88> (0140021EA0h) 
0000000140021BDA  mov		 rdx,rax 
0000000140021BDD  lea		 rcx,[fnAdd (0141678780h)]
// this is terrible ... introduces "malloc"/exceptions ... ffs. None of that is needed ... argh!
0000000140021BE4  call	    std::function<bool __cdecl(unsigned __int64)>::function<bool __cdecl(unsigned __int64)><<lambda_ccd461a2e723c170260c9697d9a65f88> > (014003A2E0h) 
// destruction at program exit ... there is nothing in lambda function to destroy - dealing with std::function ("free") is probably all it does.
0000000140021BE9  lea		 rcx,[`ResGlShader::update'::`2'::`dynamic atexit destructor for 'fnAdd'' (01400C6380h)] 
0000000140021BF0  call	    atexit (014003E7F0h)
0000000140021BF5 // ... end of skip

// The "fnAdd" function call has some extra std::function overhead too (argh*2):
// /.../ input: int64u param + "this". Stack pointer for local variable does not need passing. Clean and neat.
0000000140021C0C  call	    std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator() (0140036E00h)
// ... call will land in (std::function check for valid contained function ... completely redundant):
0000000140036E00  mov		 qword ptr [rsp+10h],rdx 
0000000140036E05  sub		 rsp,28h 
0000000140036E09  mov		 rcx,qword ptr [rcx+18h] 
0000000140036E0D  test	    rcx,rcx 
0000000140036E10  je		  std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator()+22h (0140036E22h) 
0000000140036E12  mov		 rax,qword ptr [rcx] 
0000000140036E15  lea		 rdx,[_Vx0] 
0000000140036E1A  call	    qword ptr [rax+10h] // call for contained function object ... used as variable and not a fixed constant which it actually is x_x. It will land ...
0000000140036E1D  add		 rsp,28h 
0000000140036E21  ret
// ... here:
000000014003D430  mov		 rdx,qword ptr [rdx] 
000000014003D433  add		 rcx,8 
000000014003D437  jmp		 <lambda_ccd461a2e723c170260c9697d9a65f88>::operator() (0140021EF0h) // finally ...

VC does the smart thing on certain conditions (stuff that does not have side effects mainly) and moves such static initializations out of the function body - but it can not do it here. Can i help it?

The "std::function" has many side effects and hence can not be optimized out, but i can not do recursion without it (or did i miss some other workaround that works with VC2012?) !

This is not performance related question (i do not see any situation where it would matter ... if it would matter then i would make a real member function that would have none of the considerable overhead) - it is just for my own educational purposes in regards of lambda functions. So far very pleased with them - just that doing recursion with them is a f* disaster (works fine, just looks like a disaster).

edit:
Well, one can not use "static" often with lambdas, so overhead there is mostly unavoidable - except getting rid of std::function. Giving up for now (aka. using a real function) - but leaving the question open in case a better solution shows up.

#4tanzanite7

Posted 12 November 2012 - 10:47 AM

Not a burning problem - just curiosity:

Using "static std::function<bool(int64u)> fnAdd = [&amp;](int64u tag)->bool { ... fnAdd(...) ... }" produces the suspected/expected, but unnecessary, inline initialization / shutdown code + other pointless overhead:
0000000140021BA2  mov		 eax,dword ptr [$S2 (0141678770h)] // static initialization check overhead ...
0000000140021BA8  and		 eax,1 
0000000140021BAB  test	    eax,eax 
0000000140021BAD  jne		 ResGlShader::update+85h (0140021BF5h) // ... end of check.
0000000140021BAF  mov		 eax,dword ptr [$S2 (0141678770h)] // one time overhead ...
0000000140021BB5  or		  eax,1
0000000140021BB8  mov		 dword ptr [$S2 (0141678770h)],eax // this whole flag check/update looks completely bonkers - i suspect some crazy cache optimization (bit-packing multiple flags into one 32bit memory location?).
// capture code ... all i need is "this" and one local ("valid"), but it stores also what i am using via this - probably for performance reasons.
0000000140021BBE  lea		 r9,[valid] 
0000000140021BC3  mov		 r8,qword ptr [this] 
0000000140021BCB  lea		 rdx,[sources] 
0000000140021BD0  lea		 rcx,[rsp+60h] 
// the storing part is moved elsewhere (over a dozen loads and stores) - probably for instruction cache reasons (it should have moved most of the leading garbage there too though)
0000000140021BD5  call	    <lambda_ccd461a2e723c170260c9697d9a65f88>::<lambda_ccd461a2e723c170260c9697d9a65f88> (0140021EA0h) 
0000000140021BDA  mov		 rdx,rax 
0000000140021BDD  lea		 rcx,[fnAdd (0141678780h)]
// this is terrible ... introduces "malloc"/exceptions ... ffs. None of that is needed ... argh!
0000000140021BE4  call	    std::function<bool __cdecl(unsigned __int64)>::function<bool __cdecl(unsigned __int64)><<lambda_ccd461a2e723c170260c9697d9a65f88> > (014003A2E0h) 
// destruction at program exit ... there is nothing in lambda function to destroy - dealing with std::function ("free") is probably all it does.
0000000140021BE9  lea		 rcx,[`ResGlShader::update'::`2'::`dynamic atexit destructor for 'fnAdd'' (01400C6380h)] 
0000000140021BF0  call	    atexit (014003E7F0h)
0000000140021BF5 // ... end of skip

// The "fnAdd" function call has some extra std::function overhead too (argh*2):
// /.../ input: int64u param + "this". Stack pointer for local variable does not need passing. Clean and neat.
0000000140021C0C  call	    std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator() (0140036E00h)
// ... call will land in (std::function check for valid contained function ... completely redundant):
0000000140036E00  mov		 qword ptr [rsp+10h],rdx 
0000000140036E05  sub		 rsp,28h 
0000000140036E09  mov		 rcx,qword ptr [rcx+18h] 
0000000140036E0D  test	    rcx,rcx 
0000000140036E10  je		  std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator()+22h (0140036E22h) 
0000000140036E12  mov		 rax,qword ptr [rcx] 
0000000140036E15  lea		 rdx,[_Vx0] 
0000000140036E1A  call	    qword ptr [rax+10h] // call for contained function object ... used as variable and not a fixed constant which it actually is x_x. It will land ...
0000000140036E1D  add		 rsp,28h 
0000000140036E21  ret
// ... here:
000000014003D430  mov		 rdx,qword ptr [rdx] 
000000014003D433  add		 rcx,8 
000000014003D437  jmp		 <lambda_ccd461a2e723c170260c9697d9a65f88>::operator() (0140021EF0h) // finally ...

VC does the smart thing on certain conditions (stuff that does not have side effects mainly) and moves such static initializations out of the function body - but it can not do it here. Can i help it?

The "std::function" has many side effects and hence can not be optimized out, but i can not do recursion without it (or did i miss some other workaround that works with VC2012?) !

This is not performance related question (i do not see any situation where it would matter ... if it would matter then i would make a real member function that would have none of the considerable overhead) - it is just for my own educational purposes in regards of lambda functions. So far very pleased with them - just that doing recursion with them is a f* disaster (works fine, just looks like a disaster).

edit:
Well, one can not use "static" often with lambdas, so overhead there is mostly unavoidable - except getting rid of std::function. Giving up for now (aka. using a real function) - but leaving the question open in case a better solution shows up.

#3tanzanite7

Posted 12 November 2012 - 10:47 AM

Not a burning problem - just curiosity:

Using "static std::function<bool(int64u)> fnAdd = [&amp;](int64u tag)->bool { ... fnAdd(...) ... }" produces the suspected/expected, but unnecessary, inline initialization / shutdown code + other pointless overhead:
0000000140021BA2  mov		 eax,dword ptr [$S2 (0141678770h)] // static initialization check overhead ...
0000000140021BA8  and		 eax,1 
0000000140021BAB  test	    eax,eax 
0000000140021BAD  jne		 ResGlShader::update+85h (0140021BF5h) // ... end of check.
0000000140021BAF  mov		 eax,dword ptr [$S2 (0141678770h)] // one time overhead ...
0000000140021BB5  or		  eax,1
0000000140021BB8  mov		 dword ptr [$S2 (0141678770h)],eax // this whole flag check/update looks completely bonkers - i suspect some crazy cache optimization (bit-packing multiple flags into one 32bit memory location?).
// capture code ... all i need is "this" and one local ("valid"), but it stores also what i am using via this - probably for performance reasons.
0000000140021BBE  lea		 r9,[valid] 
0000000140021BC3  mov		 r8,qword ptr [this] 
0000000140021BCB  lea		 rdx,[sources] 
0000000140021BD0  lea		 rcx,[rsp+60h] 
// the storing part is moved elsewhere (over a dozen loads and stores) - probably for instruction cache reasons (it should have moved most of the leading garbage there too though)
0000000140021BD5  call	    <lambda_ccd461a2e723c170260c9697d9a65f88>::<lambda_ccd461a2e723c170260c9697d9a65f88> (0140021EA0h) 
0000000140021BDA  mov		 rdx,rax 
0000000140021BDD  lea		 rcx,[fnAdd (0141678780h)]
// this is terrible ... introduces "malloc"/exceptions ... ffs. None of that is needed ... argh!
0000000140021BE4  call	    std::function<bool __cdecl(unsigned __int64)>::function<bool __cdecl(unsigned __int64)><<lambda_ccd461a2e723c170260c9697d9a65f88> > (014003A2E0h) 
// destruction at program exit ... there is nothing in lambda function to destroy - dealing with std::function ("free") is probably all it does.
0000000140021BE9  lea		 rcx,[`ResGlShader::update'::`2'::`dynamic atexit destructor for 'fnAdd'' (01400C6380h)] 
0000000140021BF0  call	    atexit (014003E7F0h)
0000000140021BF5 // ... end of skip

// The "fnAdd" function call has some extra std::function overhead too (argh*2):
// /.../ input: int64u param + "this". Stack pointer for local variable does not need passing. Clean and neat.
0000000140021C0C  call	    std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator() (0140036E00h)
// ... call will land in (std::function check for valid contained function ... completely redundant):
0000000140036E00  mov		 qword ptr [rsp+10h],rdx 
0000000140036E05  sub		 rsp,28h 
0000000140036E09  mov		 rcx,qword ptr [rcx+18h] 
0000000140036E0D  test	    rcx,rcx 
0000000140036E10  je		  std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator()+22h (0140036E22h) 
0000000140036E12  mov		 rax,qword ptr [rcx] 
0000000140036E15  lea		 rdx,[_Vx0] 
0000000140036E1A  call	    qword ptr [rax+10h] // call for contained function object ... used as variable and not a fixed constant which it actually is x_x. It will land ...
0000000140036E1D  add		 rsp,28h 
0000000140036E21  ret
// ... here:
000000014003D430  mov		 rdx,qword ptr [rdx] 
000000014003D433  add		 rcx,8 
000000014003D437  jmp		 <lambda_ccd461a2e723c170260c9697d9a65f88>::operator() (0140021EF0h) // finally ...

VC does the smart thing on certain conditions (stuff that does not have side effects mainly) and moves such static initializations out of the function body - but it can not do it here. Can i help it?

The "std::function" has many side effects and hence can not be optimized out, but i can not do recursion without it (or did i miss some other workaround that works with VC2012?) !

This is not performance related question (i do not see any situation where it would matter ... if it would matter then i would make a real member function that would have none of the considerable overhead) - it is just for my own educational purposes in regards of lambda functions. So far very pleased with them - just that doing recursion with them is a f* disaster (works fine, just looks like a disaster).

edit:
Well, one can not use "static" often with lambdas, so overhead there is mostly unavoidable - except getting rid of std::function. Giving up for now (aka. using a real function) - but leaving the question open in case a better solution shows up.

#2tanzanite7

Posted 12 November 2012 - 10:43 AM

Not a burning problem - just curiosity:

Using "static std::function<bool(int64u)> fnAdd = [&amp;](int64u tag)->bool { ... fnAdd(...) ... }" produces the suspected/expected, but unnecessary, inline initialization / shutdown code + other pointless overhead:
0000000140021BA2  mov		 eax,dword ptr [$S2 (0141678770h)] // static initialization check overhead ...
0000000140021BA8  and		 eax,1 
0000000140021BAB  test	    eax,eax 
0000000140021BAD  jne		 ResGlShader::update+85h (0140021BF5h) // ... end of check.
0000000140021BAF  mov		 eax,dword ptr [$S2 (0141678770h)] // one time overhead ...
0000000140021BB5  or		  eax,1
0000000140021BB8  mov		 dword ptr [$S2 (0141678770h)],eax // this whole flag check/update looks completely bonkers - i suspect some crazy cache optimization (bit-packing multiple flags into one 32bit memory location?).
// capture code ... all i need is "this" and one local ("valid"), but it stores also what i am using via this - probably for performance reasons.
0000000140021BBE  lea		 r9,[valid] 
0000000140021BC3  mov		 r8,qword ptr [this] 
0000000140021BCB  lea		 rdx,[sources] 
0000000140021BD0  lea		 rcx,[rsp+60h] 
// the storing part is moved elsewhere (over a dozen loads and stores) - probably for instruction cache reasons (it should have moved most of the leading garbage there too though)
0000000140021BD5  call	    <lambda_ccd461a2e723c170260c9697d9a65f88>::<lambda_ccd461a2e723c170260c9697d9a65f88> (0140021EA0h) 
0000000140021BDA  mov		 rdx,rax 
0000000140021BDD  lea		 rcx,[fnAdd (0141678780h)]
// this is terrible ... introduces "malloc"/exceptions ... ffs. None of that is needed ... argh!
0000000140021BE4  call	    std::function<bool __cdecl(unsigned __int64)>::function<bool __cdecl(unsigned __int64)><<lambda_ccd461a2e723c170260c9697d9a65f88> > (014003A2E0h) 
// destruction at program exit ... there is nothing in lambda function to destroy - dealing with std::function ("free") is probably all it does.
0000000140021BE9  lea		 rcx,[`ResGlShader::update'::`2'::`dynamic atexit destructor for 'fnAdd'' (01400C6380h)] 
0000000140021BF0  call	    atexit (014003E7F0h)
0000000140021BF5 // ... end of skip

// The "fnAdd" function call has some extra std::function overhead too (argh*2):
// /.../ input: int64u param + "this". Stack pointer for local variable does not need passing. Clean and neat.
0000000140021C0C  call	    std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator() (0140036E00h)
// ... call will land in (std::function check for valid contained function ... completely redundant):
0000000140036E00  mov		 qword ptr [rsp+10h],rdx 
0000000140036E05  sub		 rsp,28h 
0000000140036E09  mov		 rcx,qword ptr [rcx+18h] 
0000000140036E0D  test	    rcx,rcx 
0000000140036E10  je		  std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator()+22h (0140036E22h) 
0000000140036E12  mov		 rax,qword ptr [rcx] 
0000000140036E15  lea		 rdx,[_Vx0] 
0000000140036E1A  call	    qword ptr [rax+10h] // call for contained function object ... used as variable and not a fixed constant which it actually is x_x. It will land ...
0000000140036E1D  add		 rsp,28h 
0000000140036E21  ret
// ... here:
000000014003D430  mov		 rdx,qword ptr [rdx] 
000000014003D433  add		 rcx,8 
000000014003D437  jmp		 <lambda_ccd461a2e723c170260c9697d9a65f88>::operator() (0140021EF0h) // finally ...

VC does the smart thing on certain conditions (stuff that does not have side effects mainly) and moves such static initializations out of the function body - but it can not do it here. Can i help it?

The "std::function" has many side effects and hence can not be optimized out, but i can not do recursion without it (or did i miss some other workaround that works with VC2012?) !

This is not performance related question (i do not see any situation where it would matter ... if it would matter then i would make a real member function that would have none of the considerable overhead) - it is just for my own educational purposes in regards of lambda functions. So far very pleased with them - just that doing recursion with them is a f* disaster (works fine, just looks like a disaster).

edit:
Well, one can not use "static" often with lambdas, so overhead there is mostly unavoidable - except getting rid of std::function. Giving up for now (aka. using a real function) - but leaving the question open in case a better solution shows up.

#1tanzanite7

Posted 12 November 2012 - 10:18 AM

Not a burning problem - just curiosity:

Using "static std::function<bool(int64u)> fnAdd = [&](int64u tag)->bool { ... fnAdd(...) ... }" produces the suspected/expected, but unnecessary, inline initialization / shutdown code + other pointless overhead:
0000000140021BA2  mov		 eax,dword ptr [$S2 (0141678770h)] // static initialization check overhead ...
0000000140021BA8  and		 eax,1 
0000000140021BAB  test	    eax,eax 
0000000140021BAD  jne		 ResGlShader::update+85h (0140021BF5h) // ... end of check.
0000000140021BAF  mov		 eax,dword ptr [$S2 (0141678770h)] // one time overhead ...
0000000140021BB5  or		  eax,1
0000000140021BB8  mov		 dword ptr [$S2 (0141678770h)],eax // this whole flag check/update looks completely bonkers - i suspect some crazy cache optimization (bit-packing multiple flags into one 32bit memory location?).
// capture code ... all i need is "this" and one local ("valid"), but it stores also what i am using via this - probably for performance reasons.
0000000140021BBE  lea		 r9,[valid] 
0000000140021BC3  mov		 r8,qword ptr [this] 
0000000140021BCB  lea		 rdx,[sources] 
0000000140021BD0  lea		 rcx,[rsp+60h] 
// the storing part is moved elsewhere (over a dozen loads and stores) - probably for instruction cache reasons (it should have moved most of the leading garbage there too though)
0000000140021BD5  call	    <lambda_ccd461a2e723c170260c9697d9a65f88>::<lambda_ccd461a2e723c170260c9697d9a65f88> (0140021EA0h) 
0000000140021BDA  mov		 rdx,rax 
0000000140021BDD  lea		 rcx,[fnAdd (0141678780h)]
// this is terrible ... introduces "malloc"/exceptions ... ffs. None of that is needed ... argh!
0000000140021BE4  call	    std::function<bool __cdecl(unsigned __int64)>::function<bool __cdecl(unsigned __int64)><<lambda_ccd461a2e723c170260c9697d9a65f88> > (014003A2E0h) 
// destruction at program exit ... there is nothing in lambda function to destroy - dealing with std::function ("free") is probably all it does.
0000000140021BE9  lea		 rcx,[`ResGlShader::update'::`2'::`dynamic atexit destructor for 'fnAdd'' (01400C6380h)] 
0000000140021BF0  call	    atexit (014003E7F0h)
0000000140021BF5 // ... end of skip

// The "fnAdd" function call has some extra std::function overhead too (argh*2):
// /.../ input: int64u param + "this". Stack pointer for local variable does not need passing. Clean and neat.
0000000140021C0C  call	    std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator() (0140036E00h)
// ... call will land in (std::function check for valid contained function ... completely redundant):
0000000140036E00  mov		 qword ptr [rsp+10h],rdx 
0000000140036E05  sub		 rsp,28h 
0000000140036E09  mov		 rcx,qword ptr [rcx+18h] 
0000000140036E0D  test	    rcx,rcx 
0000000140036E10  je		  std::_Func_class<bool,unsigned __int64,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil,std::_Nil>::operator()+22h (0140036E22h) 
0000000140036E12  mov		 rax,qword ptr [rcx] 
0000000140036E15  lea		 rdx,[_Vx0] 
0000000140036E1A  call	    qword ptr [rax+10h] // call for contained function object ... used as variable and not a fixed constant which it actually is x_x. It will land ...
0000000140036E1D  add		 rsp,28h 
0000000140036E21  ret
// ... here:
000000014003D430  mov		 rdx,qword ptr [rdx] 
000000014003D433  add		 rcx,8 
000000014003D437  jmp		 <lambda_ccd461a2e723c170260c9697d9a65f88>::operator() (0140021EF0h) // finally ...

VC does the smart thing on certain conditions (stuff that does not have side effects mainly) and moves such static initializations out of the function body - but it can not do it here. Can i help it?

The "std::function" has many side effects and hence can not be optimized out, but i can not do recursion without it (or did i miss some other workaround that works with VC2012?) !

This is not performance related question (i do not see any situation where it would matter ... if it would matter then i would make a real member function that would have none of the considerable overhead) - it is just for my own educational purposes in regards of lambda functions. So far very pleased with them - just that doing recursion with them is a f* disaster (works fine, just looks like a disaster).

PARTNERS