Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Matt Taylor's Bitscan with reset improved

Author: Gerd Isenberg

Date: 10:51:25 05/04/05

Go up one level in this thread


This "ugly" intrinsic bitscan with reset one seems faster than inline assembly.
In the mentioned serialize loop it takes 1100 cycles for 64 set bits,
~17 cycles per scan.

Gerd


extern "C"
unsigned char _BitScanForward(unsigned long* index, unsigned long mask);
#pragma intrinsic(_BitScanForward)

__forceinline
int _bitScanWithReset(BitBoard &bb)
{ // assumes bb != 0
  unsigned long idx;
  unsigned int* pbb = (unsigned int*)&bb;
  if ( _BitScanForward(&idx, *pbb )
  {
    *pbb = *pbb & (*pbb - 1);
    return idx;
  }
  pbb++;
  _BitScanForward(&idx, *pbb);
  *pbb = *pbb & (*pbb - 1);
  return idx + 32;
}

int serialize(BitBoard bb, int sq[], int foo)
{
  for (int n=0; bb; sq[n++] = _bitScanWithReset(bb) | foo);
  return n;
}

?serialize@@YAH_KQAHH@Z PROC				; serialize
  00510	8b 54 24 08	 mov	 edx, DWORD PTR _bb$[esp]
  00514	56		 push	 esi
  00515	8b 74 24 08	 mov	 esi, DWORD PTR _bb$[esp]
  00519	8b ce		 mov	 ecx, esi
  0051b	33 c0		 xor	 eax, eax
  0051d	0b ca		 or	 ecx, edx
  0051f	74 3f		 je	 SHORT $LN1@serialize
  00521	53		 push	 ebx
  00522	8b 5c 24 14	 mov	 ebx, DWORD PTR _sq$[esp+4]
  00526	55		 push	 ebp
  00527	57		 push	 edi
  00528	8b 7c 24 20	 mov	 edi, DWORD PTR _foo$[esp+12]
  0052c	8d 64 24 00	 npad	 4
$LL3@serialize:
  00530	0f bc ce	 bsf	 ecx, esi
  00533	74 07		 je	 SHORT $LN6@serialize
  00535	8d 6e ff	 lea	 ebp, DWORD PTR [esi-1]
  00538	23 f5		 and	 esi, ebp
  0053a	eb 13		 jmp	 SHORT $LN7@serialize
$LN6@serialize:
  0053c	0f bc ca	 bsf	 ecx, edx
  0053f	89 4c 24 14	 mov	 DWORD PTR _idx$4119[esp+12], ecx
  00543	8d 4a ff	 lea	 ecx, DWORD PTR [edx-1]
  00546	23 d1		 and	 edx, ecx
  00548	8b 4c 24 14	 mov	 ecx, DWORD PTR _idx$4119[esp+12]
  0054c	83 c1 20	 add	 ecx, 32
$LN7@serialize:
  0054f	0b cf		 or	 ecx, edi
  00551	89 0c 83	 mov	 DWORD PTR [ebx+eax*4], ecx
  00554	8b ce		 mov	 ecx, esi
  00556	83 c0 01	 add	 eax, 1
  00559	0b ca		 or	 ecx, edx
  0055b	75 d3		 jne	 SHORT $LL3@serialize
  0055d	5f		 pop	 edi
  0055e	5d		 pop	 ebp
  0055f	5b		 pop	 ebx
$LN1@serialize:
  00560	5e		 pop	 esi
  00561	c3		 ret	 0
?serialize@@YAH_KQAHH@Z ENDP				; serialize



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.