Author: Gerd Isenberg
Date: 10:15:22 06/13/04
Go up one level in this thread
MSC6 seems to have no problems with references - at least with those simple
functions!
In favour to the "passed by value" version is the common bb-1 expression.
Using 1ULL<<sq is probably a bit more expensive for x86-32.
If you inspect the following assembly the difference seems negligible.
Matt Taylors 32-bit folded super de Bruijn.
#include <stdio.h>
typedef unsigned __int64 BitBoard;
const unsigned int lsz64_tbl[64] =
{
63,30, 3,32,59,14,11,33,
60,24,50, 9,55,19,21,34,
61,29, 2,53,51,23,41,18,
56,28, 1,43,46,27, 0,35,
62,31,58, 4, 5,49,54, 6,
15,52,12,40, 7,42,45,16,
25,57,48,13,10,39, 8,44,
20,47,38,22,17,37,36,26,
};
__forceinline
unsigned int bitScanAndReset(BitBoard &bb)
{
BitBoard lsb = bb ^ (bb - 1);
bb &= ~lsb; // omit this line to retain current LSB
unsigned int foldedLSB = ((int) lsb) ^ ((int)(lsb>>32));
return lsz64_tbl[foldedLSB * 0x78291ACF >> 26];
}
__forceinline
unsigned int bitScanForeward(BitBoard bb)
{
BitBoard lsb = bb ^ (bb - 1);
unsigned int foldedLSB = ((int) lsb) ^ ((int)(lsb>>32));
return lsz64_tbl[foldedLSB * 0x78291ACF >> 26];
}
void traverseByReference(BitBoard bb, int* pSquares)
{
while (bb)
{
*pSquares++ = bitScanAndReset(bb);
}
*pSquares = -1; // terminate
}
void traverseByValue(BitBoard bb, int* pSquares)
{
while (bb)
{
*pSquares++ = bitScanForeward(bb);
bb &= bb-1; // clear bit
}
*pSquares = -1; // terminate
}
int main(int argc, char* argv[])
{
BitBoard bb1 = -1, bb2 = -1;
int squares1[66], squares2[66], i;
traverseByReference(bb1, squares1);
traverseByValue(bb2, squares2);
printf("\nsquares1\n");
for (i=0; i < 64 && squares1[i] >= 0; ++i)
printf("%d ", squares1[i]);
printf("\nsquares2\n");
for (i=0; i < 64 && squares2[i] >= 0; ++i)
printf("%d ", squares2[i]);
printf("\n");
return 0;
}
here the generated assembly of both traverse functions:
?traverseByReference@@YAX_KPAH@Z PROC NEAR ; traverseByReference, COMDAT
; 36 : while (bb)
00000 8b 54 24 04 mov edx, DWORD PTR _bb$[esp-4]
00004 56 push esi
00005 8b 74 24 0c mov esi, DWORD PTR _bb$[esp+4]
00009 8b c2 mov eax, edx
0000b 0b c6 or eax, esi
0000d 74 49 je SHORT $L646
0000f 53 push ebx
00010 55 push ebp
00011 57 push edi
00012 8b 7c 24 1c mov edi, DWORD PTR _pSquares$[esp+12]
$L596:
; 37 : {
; 38 : *pSquares++ = bitScanAndReset(bb);
00016 8b ca mov ecx, edx
00018 8b c6 mov eax, esi
0001a 83 c1 ff add ecx, -1
0001d 83 d0 ff adc eax, -1
00020 33 ca xor ecx, edx
00022 33 c6 xor eax, esi
00024 8b d9 mov ebx, ecx
00026 8b e8 mov ebp, eax
00028 33 c1 xor eax, ecx
0002a 69 c0 cf 1a 29
78 imul eax, 2015959759 ; 78291acfH
00030 c1 e8 1a shr eax, 26 ; 0000001aH
00033 f7 d3 not ebx
00035 8b 0c 85 00 00
00 00 mov ecx, DWORD PTR _lsz64_tbl[eax*4]
0003c 23 d3 and edx, ebx
0003e f7 d5 not ebp
00040 23 f5 and esi, ebp
00042 89 0f mov DWORD PTR [edi], ecx
00044 8b c2 mov eax, edx
00046 83 c7 04 add edi, 4
00049 0b c6 or eax, esi
0004b 75 c9 jne SHORT $L596
; 39 : }
; 40 : *pSquares = -1;
0004d c7 07 ff ff ff
ff mov DWORD PTR [edi], -1
00053 5f pop edi
00054 5d pop ebp
00055 5b pop ebx
00056 5e pop esi
; 41 : }
00057 c3 ret 0
$L646:
; 39 : }
; 40 : *pSquares = -1;
00058 8b 4c 24 10 mov ecx, DWORD PTR _pSquares$[esp]
0005c 5e pop esi
0005d c7 01 ff ff ff
ff mov DWORD PTR [ecx], -1
; 41 : }
00063 c3 ret 0
?traverseByReference@@YAX_KPAH@Z ENDP ; traverseByReference
?traverseByValue@@YAX_KPAH@Z PROC NEAR ; traverseByValue, COMDAT
; 44 : {
00000 56 push esi
; 45 : while (bb)
00001 8b 74 24 08 mov esi, DWORD PTR _bb$[esp]
00005 57 push edi
00006 8b 7c 24 10 mov edi, DWORD PTR _bb$[esp+8]
0000a 8b c6 mov eax, esi
0000c 0b c7 or eax, edi
0000e 74 46 je SHORT $L658
00010 53 push ebx
00011 55 push ebp
00012 8b 6c 24 1c mov ebp, DWORD PTR _pSquares$[esp+12]
$L603:
; 46 : {
; 47 : *pSquares++ = bitScan(bb);
00016 8b ce mov ecx, esi
00018 8b d7 mov edx, edi
0001a 83 c1 ff add ecx, -1
0001d 83 d2 ff adc edx, -1
00020 8b d9 mov ebx, ecx
00022 8b c2 mov eax, edx
00024 33 de xor ebx, esi
00026 33 c7 xor eax, edi
; 48 : bb &= bb-1; // clear bit
00028 23 f1 and esi, ecx
0002a 33 c3 xor eax, ebx
0002c 23 fa and edi, edx
0002e 69 c0 cf 1a 29
78 imul eax, 2015959759 ; 78291acfH
00034 c1 e8 1a shr eax, 26 ; 0000001aH
00037 8b ce mov ecx, esi
00039 83 c5 04 add ebp, 4
0003c 8b 04 85 00 00
00 00 mov eax, DWORD PTR _lsz64_tbl[eax*4]
00043 0b cf or ecx, edi
00045 89 45 fc mov DWORD PTR [ebp-4], eax
00048 75 cc jne SHORT $L603
; 49 : }
; 50 : *pSquares = -1;
0004a c7 45 00 ff ff
ff ff mov DWORD PTR [ebp], -1
00051 5d pop ebp
00052 5b pop ebx
00053 5f pop edi
00054 5e pop esi
; 51 : }
00055 c3 ret 0
$L658:
; 49 : }
; 50 : *pSquares = -1;
00056 8b 54 24 14 mov edx, DWORD PTR _pSquares$[esp+4]
0005a 5f pop edi
0005b 5e pop esi
0005c c7 02 ff ff ff
ff mov DWORD PTR [edx], -1
; 51 : }
00062 c3 ret 0
?traverseByValue@@YAX_KPAH@Z ENDP ; traverseByValue
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.