Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: FirstOne/LastOne

Author: Gerd Isenberg

Date: 10:15:22 06/13/04

Go up one level in this thread


MSC6 seems to have no problems with references - at least with those simple
functions!

In favour to the "passed by value" version is the common bb-1 expression.
Using 1ULL<<sq is probably a bit more expensive for x86-32.
If you inspect the following assembly the difference seems negligible.

Matt Taylors 32-bit folded super de Bruijn.

#include <stdio.h>

typedef unsigned __int64 BitBoard;

const unsigned int lsz64_tbl[64] =
{
 63,30, 3,32,59,14,11,33,
 60,24,50, 9,55,19,21,34,
 61,29, 2,53,51,23,41,18,
 56,28, 1,43,46,27, 0,35,
 62,31,58, 4, 5,49,54, 6,
 15,52,12,40, 7,42,45,16,
 25,57,48,13,10,39, 8,44,
 20,47,38,22,17,37,36,26,
};

__forceinline
unsigned int bitScanAndReset(BitBoard &bb)
{
    BitBoard lsb = bb ^ (bb - 1);
    bb &= ~lsb;  // omit this line to retain current LSB
    unsigned int foldedLSB = ((int) lsb) ^ ((int)(lsb>>32));
    return lsz64_tbl[foldedLSB * 0x78291ACF >> 26];
}

__forceinline
unsigned int bitScanForeward(BitBoard bb)
{
    BitBoard lsb = bb ^ (bb - 1);
    unsigned int foldedLSB = ((int) lsb) ^ ((int)(lsb>>32));
    return lsz64_tbl[foldedLSB * 0x78291ACF >> 26];
}

void traverseByReference(BitBoard bb, int* pSquares)
{
    while (bb)
    {
        *pSquares++ = bitScanAndReset(bb);
    }
    *pSquares = -1; // terminate
}

void traverseByValue(BitBoard bb, int* pSquares)
{
    while (bb)
    {
        *pSquares++ = bitScanForeward(bb);
        bb &= bb-1; // clear bit
    }
    *pSquares = -1; // terminate
}


int main(int argc, char* argv[])
{
    BitBoard bb1 = -1, bb2 = -1;
    int squares1[66], squares2[66], i;
    traverseByReference(bb1, squares1);
    traverseByValue(bb2, squares2);
    printf("\nsquares1\n");
    for (i=0; i < 64 && squares1[i] >= 0; ++i)
        printf("%d ", squares1[i]);
    printf("\nsquares2\n");
    for (i=0; i < 64 && squares2[i] >= 0; ++i)
        printf("%d ", squares2[i]);
    printf("\n");
    return 0;
}


here the generated assembly of both traverse functions:

?traverseByReference@@YAX_KPAH@Z PROC NEAR		; traverseByReference, COMDAT

; 36   : 	while (bb)

  00000	8b 54 24 04	 mov	 edx, DWORD PTR _bb$[esp-4]
  00004	56		 push	 esi
  00005	8b 74 24 0c	 mov	 esi, DWORD PTR _bb$[esp+4]
  00009	8b c2		 mov	 eax, edx
  0000b	0b c6		 or	 eax, esi
  0000d	74 49		 je	 SHORT $L646
  0000f	53		 push	 ebx
  00010	55		 push	 ebp
  00011	57		 push	 edi
  00012	8b 7c 24 1c	 mov	 edi, DWORD PTR _pSquares$[esp+12]
$L596:

; 37   : 	{
; 38   : 		*pSquares++ = bitScanAndReset(bb);

  00016	8b ca		 mov	 ecx, edx
  00018	8b c6		 mov	 eax, esi
  0001a	83 c1 ff	 add	 ecx, -1
  0001d	83 d0 ff	 adc	 eax, -1
  00020	33 ca		 xor	 ecx, edx
  00022	33 c6		 xor	 eax, esi
  00024	8b d9		 mov	 ebx, ecx
  00026	8b e8		 mov	 ebp, eax
  00028	33 c1		 xor	 eax, ecx
  0002a	69 c0 cf 1a 29
	78		 imul	 eax, 2015959759		; 78291acfH
  00030	c1 e8 1a	 shr	 eax, 26			; 0000001aH
  00033	f7 d3		 not	 ebx
  00035	8b 0c 85 00 00
	00 00		 mov	 ecx, DWORD PTR _lsz64_tbl[eax*4]
  0003c	23 d3		 and	 edx, ebx
  0003e	f7 d5		 not	 ebp
  00040	23 f5		 and	 esi, ebp
  00042	89 0f		 mov	 DWORD PTR [edi], ecx
  00044	8b c2		 mov	 eax, edx
  00046	83 c7 04	 add	 edi, 4
  00049	0b c6		 or	 eax, esi
  0004b	75 c9		 jne	 SHORT $L596

; 39   : 	}
; 40   : 	*pSquares = -1;

  0004d	c7 07 ff ff ff
	ff		 mov	 DWORD PTR [edi], -1
  00053	5f		 pop	 edi
  00054	5d		 pop	 ebp
  00055	5b		 pop	 ebx
  00056	5e		 pop	 esi

; 41   : }

  00057	c3		 ret	 0
$L646:

; 39   : 	}
; 40   : 	*pSquares = -1;

  00058	8b 4c 24 10	 mov	 ecx, DWORD PTR _pSquares$[esp]
  0005c	5e		 pop	 esi
  0005d	c7 01 ff ff ff
	ff		 mov	 DWORD PTR [ecx], -1

; 41   : }

  00063	c3		 ret	 0
?traverseByReference@@YAX_KPAH@Z ENDP			; traverseByReference



?traverseByValue@@YAX_KPAH@Z PROC NEAR			; traverseByValue, COMDAT

; 44   : {

  00000	56		 push	 esi

; 45   : 	while (bb)

  00001	8b 74 24 08	 mov	 esi, DWORD PTR _bb$[esp]
  00005	57		 push	 edi
  00006	8b 7c 24 10	 mov	 edi, DWORD PTR _bb$[esp+8]
  0000a	8b c6		 mov	 eax, esi
  0000c	0b c7		 or	 eax, edi
  0000e	74 46		 je	 SHORT $L658
  00010	53		 push	 ebx
  00011	55		 push	 ebp
  00012	8b 6c 24 1c	 mov	 ebp, DWORD PTR _pSquares$[esp+12]
$L603:

; 46   : 	{
; 47   : 		*pSquares++ = bitScan(bb);

  00016	8b ce		 mov	 ecx, esi
  00018	8b d7		 mov	 edx, edi
  0001a	83 c1 ff	 add	 ecx, -1
  0001d	83 d2 ff	 adc	 edx, -1
  00020	8b d9		 mov	 ebx, ecx
  00022	8b c2		 mov	 eax, edx
  00024	33 de		 xor	 ebx, esi
  00026	33 c7		 xor	 eax, edi

; 48   : 		bb &= bb-1; // clear bit

  00028	23 f1		 and	 esi, ecx
  0002a	33 c3		 xor	 eax, ebx
  0002c	23 fa		 and	 edi, edx
  0002e	69 c0 cf 1a 29
	78		 imul	 eax, 2015959759		; 78291acfH
  00034	c1 e8 1a	 shr	 eax, 26			; 0000001aH
  00037	8b ce		 mov	 ecx, esi
  00039	83 c5 04	 add	 ebp, 4
  0003c	8b 04 85 00 00
	00 00		 mov	 eax, DWORD PTR _lsz64_tbl[eax*4]
  00043	0b cf		 or	 ecx, edi
  00045	89 45 fc	 mov	 DWORD PTR [ebp-4], eax
  00048	75 cc		 jne	 SHORT $L603

; 49   : 	}
; 50   : 	*pSquares = -1;

  0004a	c7 45 00 ff ff
	ff ff		 mov	 DWORD PTR [ebp], -1
  00051	5d		 pop	 ebp
  00052	5b		 pop	 ebx
  00053	5f		 pop	 edi
  00054	5e		 pop	 esi

; 51   : }

  00055	c3		 ret	 0
$L658:
; 49   : 	}
; 50   : 	*pSquares = -1;

  00056	8b 54 24 14	 mov	 edx, DWORD PTR _pSquares$[esp+4]
  0005a	5f		 pop	 edi
  0005b	5e		 pop	 esi
  0005c	c7 02 ff ff ff
	ff		 mov	 DWORD PTR [edx], -1

; 51   : }

  00062	c3		 ret	 0
?traverseByValue@@YAX_KPAH@Z ENDP			; traverseByValue






This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.