Author: Gerd Isenberg
Date: 11:48:57 06/13/04
Go up one level in this thread
Same loops call by reference and call by value with asm bsf versions.
Terrible, but less register pressure, and thanks to store/load-units not that
bad, first with inlining, then without inlining the bitscans:
the _asm inlines
__forceinline
int bitScanAndResetAsm(BitBoard &bb)
{
__asm
{
mov ebx, [bb]
xor eax, eax
mov edx, 1
bsf ecx, [ebx]
jnz found
bsf ecx, [ebx + 4]
lea ebx, [ebx + 4]
xor eax, 32
found:
shl edx, cl
xor eax, ecx
xor [ebx], edx
}
}
__forceinline
int bitScanForewardAsm(BitBoard bb) // last one
{
__asm
{
bsf eax, [bb+4]
xor eax, 32
bsf eax, [bb]
}
}
?traverseByReference@@YAX_KPAH@Z PROC NEAR ; traverseByReference, COMDAT
; 68 : {
00000 55 push ebp
00001 8b ec mov ebp, esp
00003 51 push ecx
; 69 : while (bb)
00004 8b 45 08 mov eax, DWORD PTR _bb$[ebp]
00007 8b 4d 0c mov ecx, DWORD PTR _bb$[ebp+4]
0000a 0b c1 or eax, ecx
0000c 74 45 je SHORT $L653
0000e 53 push ebx
; 70 : {
; 71 : *pSquares++ = bitScanAndResetAsm(bb);
0000f 8d 4d 08 lea ecx, DWORD PTR _bb$[ebp]
00012 56 push esi
00013 8b 75 10 mov esi, DWORD PTR _pSquares$[ebp]
00016 89 4d fc mov DWORD PTR $T647[ebp], ecx
$L603:
00019 8b 5d fc mov ebx, DWORD PTR $T647[ebp]
0001c 33 c0 xor eax, eax
0001e ba 01 00 00 00 mov edx, 1
00023 0f bc 0b bsf ecx, DWORD PTR [ebx]
00026 75 0a jne SHORT $found$645
00028 0f bc 4b 04 bsf ecx, DWORD PTR [ebx+4]
0002c 8d 5b 04 lea ebx, DWORD PTR [ebx+4]
0002f 83 f0 20 xor eax, 32 ; 00000020H
$found$645:
00032 d3 e2 shl edx, cl
00034 33 c1 xor eax, ecx
00036 31 13 xor DWORD PTR [ebx], edx
00038 8b 55 08 mov edx, DWORD PTR _bb$[ebp]
0003b 89 06 mov DWORD PTR [esi], eax
0003d 8b 45 0c mov eax, DWORD PTR _bb$[ebp+4]
00040 83 c6 04 add esi, 4
00043 0b d0 or edx, eax
00045 75 d2 jne SHORT $L603
; 72 : }
; 73 : *pSquares = -1;
00047 c7 06 ff ff ff
ff mov DWORD PTR [esi], -1
0004d 5e pop esi
0004e 5b pop ebx
; 74 : }
0004f 8b e5 mov esp, ebp
00051 5d pop ebp
00052 c3 ret 0
$L653:
; 72 : }
; 73 : *pSquares = -1;
00053 8b 45 10 mov eax, DWORD PTR _pSquares$[ebp]
00056 c7 00 ff ff ff
ff mov DWORD PTR [eax], -1
; 74 : }
0005c 8b e5 mov esp, ebp
0005e 5d pop ebp
0005f c3 ret 0
?traverseByReference@@YAX_KPAH@Z ENDP ; traverseByReference
?traverseByValue@@YAX_KPAH@Z PROC NEAR ; traverseByValue, COMDAT
; 78 : while (bb)
00000 8b 4c 24 04 mov ecx, DWORD PTR _bb$[esp-4]
00004 8b 54 24 08 mov edx, DWORD PTR _bb$[esp]
00008 8b c1 mov eax, ecx
0000a 0b c2 or eax, edx
0000c 74 3d je SHORT $L661
0000e 56 push esi
0000f 8b 74 24 10 mov esi, DWORD PTR _pSquares$[esp]
00013 57 push edi
$L610:
; 79 : {
; 80 : *pSquares++ = bitScanForewardAsm(bb);
00014 0f bc 44 24 10 bsf eax, DWORD PTR _bb$[esp+8]
00019 83 f0 20 xor eax, 32 ; 00000020H
0001c 0f bc 44 24 0c bsf eax, DWORD PTR _bb$[esp+4]
00021 89 06 mov DWORD PTR [esi], eax
; 81 : bb &= bb-1; // clear bit
00023 8b c1 mov eax, ecx
00025 83 c6 04 add esi, 4
00028 83 c0 ff add eax, -1
0002b 8b fa mov edi, edx
0002d 83 d7 ff adc edi, -1
00030 23 c8 and ecx, eax
00032 23 d7 and edx, edi
00034 8b c1 mov eax, ecx
00036 0b c2 or eax, edx
00038 89 4c 24 0c mov DWORD PTR _bb$[esp+4], ecx
0003c 89 54 24 10 mov DWORD PTR _bb$[esp+8], edx
00040 75 d2 jne SHORT $L610
; 82 : }
; 83 : *pSquares = -1;
00042 c7 06 ff ff ff
ff mov DWORD PTR [esi], -1
00048 5f pop edi
00049 5e pop esi
; 84 : }
0004a c3 ret 0
$L661:
; 82 : }
; 83 : *pSquares = -1;
0004b 8b 4c 24 0c mov ecx, DWORD PTR _pSquares$[esp-4]
0004f c7 01 ff ff ff
ff mov DWORD PTR [ecx], -1
; 84 : }
00055 c3 ret 0
?traverseByValue@@YAX_KPAH@Z ENDP ; traverseByValue
loops without inlining:
?traverseByReference@@YAX_KPAH@Z PROC NEAR ; traverseByReference, COMDAT
; 67 : while (bb)
00000 8b 44 24 04 mov eax, DWORD PTR _bb$[esp-4]
00004 8b 4c 24 08 mov ecx, DWORD PTR _bb$[esp]
00008 0b c1 or eax, ecx
0000a 74 2b je SHORT $L650
0000c 56 push esi
0000d 8b 74 24 10 mov esi, DWORD PTR _pSquares$[esp]
$L603:
; 68 : {
; 69 : *pSquares++ = bitScanAndResetAsm(bb);
00011 8d 4c 24 08 lea ecx, DWORD PTR _bb$[esp]
00015 51 push ecx
00016 e8 00 00 00 00 call ?bitScanAndResetAsm@@YAHAA_K@Z ;
bitScanAndResetAsm
0001b 8b 54 24 0c mov edx, DWORD PTR _bb$[esp+4]
0001f 89 06 mov DWORD PTR [esi], eax
00021 8b 44 24 10 mov eax, DWORD PTR _bb$[esp+8]
00025 83 c4 04 add esp, 4
00028 83 c6 04 add esi, 4
0002b 0b d0 or edx, eax
0002d 75 e2 jne SHORT $L603
; 70 : }
; 71 : *pSquares = -1;
0002f c7 06 ff ff ff
ff mov DWORD PTR [esi], -1
00035 5e pop esi
; 72 : }
00036 c3 ret 0
$L650:
; 70 : }
; 71 : *pSquares = -1;
00037 8b 44 24 0c mov eax, DWORD PTR _pSquares$[esp-4]
0003b c7 00 ff ff ff
ff mov DWORD PTR [eax], -1
; 72 : }
00041 c3 ret 0
?traverseByReference@@YAX_KPAH@Z ENDP ; traverseByReference
PUBLIC ?traverseByValue@@YAX_KPAH@Z ; traverseByValue
; Function compile flags: /Ogty
; COMDAT ?traverseByValue@@YAX_KPAH@Z
_TEXT SEGMENT
_bb$ = 8
_pSquares$ = 16
?traverseByValue@@YAX_KPAH@Z PROC NEAR ; traverseByValue, COMDAT
; 76 : while (bb)
00000 8b 4c 24 04 mov ecx, DWORD PTR _bb$[esp-4]
00004 8b 54 24 08 mov edx, DWORD PTR _bb$[esp]
00008 8b c1 mov eax, ecx
0000a 0b c2 or eax, edx
0000c 74 32 je SHORT $L655
0000e 56 push esi
0000f 8b 74 24 10 mov esi, DWORD PTR _pSquares$[esp]
00013 57 push edi
$L610:
; 77 : {
; 78 : *pSquares++ = bitScanForewardAsm(bb);
00014 52 push edx
00015 51 push ecx
00016 e8 00 00 00 00 call ?bitScanForewardAsm@@YAH_K@Z ; bitScanForewardAsm
0001b 89 06 mov DWORD PTR [esi], eax
; 79 : bb &= bb-1; // clear bit
0001d 8b c1 mov eax, ecx
0001f 83 c4 08 add esp, 8
00022 83 c6 04 add esi, 4
00025 83 c0 ff add eax, -1
00028 8b fa mov edi, edx
0002a 83 d7 ff adc edi, -1
0002d 23 c8 and ecx, eax
0002f 23 d7 and edx, edi
00031 8b c1 mov eax, ecx
00033 0b c2 or eax, edx
00035 75 dd jne SHORT $L610
; 80 : }
; 81 : *pSquares = -1;
00037 c7 06 ff ff ff
ff mov DWORD PTR [esi], -1
0003d 5f pop edi
0003e 5e pop esi
; 82 : }
0003f c3 ret 0
$L655:
; 80 : }
; 81 : *pSquares = -1;
00040 8b 4c 24 0c mov ecx, DWORD PTR _pSquares$[esp-4]
00044 c7 01 ff ff ff
ff mov DWORD PTR [ecx], -1
; 82 : }
0004a c3 ret 0
?traverseByValue@@YAX_KPAH@Z ENDP ; traverseByValue
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.