Computer Chess Club Archives

Search

Terms

Messages

Subject: Re: BitScan with reset - not so impressive with 3DNow!

Author: Walter Faxon

Date: 15:49:10 12/02/02

On December 02, 2002 at 16:48:52, Gerd Isenberg wrote:

>May be i found not the best way to to b&(-b) with mmx, but building the 64-bit
>two's complement with mmx-dword is not so nice. First you have to do the one's
>complement by pxor -1, then comparing low dword with -1 and building an
>conditional overflow, adding 00:01 or 01:01...
>So using 32-bit registers was the fastest so far, but that may require some
>additional push/pop.
>
>Some times mesured in seconds with this dumb loop (nothing inlined):
>K7XP2.1+ ~1.8GHz
>
>10-bit pattern		bsf	PI2FD	btr	c
>0x0000000011111133	15.3	18.0	19.1	22.8
>0x1010111010101110	19.7	18.5	19.6	23.4
>0x1111113300000000	20.6	18.0	19.1	22.8
>
>inlined are ~5 seconds faster
>
>Cheers,
>Gerd
>
>-----------------------------------------------------------------------------
>int main()
>{
>	DWORD start = GetTickCount();
>	for (int i=0; i < 100000000; i++) // 10^8
>	{
>		BitBoard bb = 0x1010111010101110; // 10 bits set
>		while (bb)
>			bitSearchAndReset_PI2FD(bb); // 10^9 runs in total
>	}
>	DWORD stop = GetTickCount();
>	printf("Time in seconds: %d.%03d\n", (stop-start)/1000, (stop-start)%1000 );
>}
>
>-----------------------------------------------------------------------------
>
>int bitSearchAndReset_bsf(BitBoard &bb)
>{
>	__asm
>	{
>		xor		edx, edx
>		mov		esi, [bb]
>		xor		eax, eax
>		inc		edx
>		bsf		ecx, [esi]
>		jnz		found
>		bsf		ecx, [esi + 4]
>		lea		esi, [esi + 4]
>		xor		eax, 32
>	found:
>		shl		edx, cl
>		xor		eax, ecx
>		xor		[esi], edx
>	}
>}
>
>-----------------------------------------------------------------------------
>
>int bitSearchAndReset_PI2FD(BitBoard &bb)
>{
>	__asm
>	{
>		mov	ebx, [bb]	; get the reference (like a pointer)
>		pxor	mm1, mm1	; 0, to get the dword mask
>
>		mov	edx, [ebx]	; get bb
>		mov	esi, [ebx+4]; bb -> esi:edx
>
>		mov	ecx, edx
>		mov	eax, esi	; bb -> eax:esi
>
>		pcmpeqd	mm6, mm6	; -1 to complement the dword mask
>		pxor	mm7, mm7	; 0, to add both final dwords
>
>		neg	ecx		; low -bb
>		adc	eax, 0		; consider borrow
>		and	ecx, edx	; low (bb & -bb)
>		neg	eax		; high -bb
>		movd	mm0, ecx	; low (bb & -bb)
>		and	eax, esi	; high (bb & -bb)
>		xor	edx, ecx	; reset low
>		movd	mm2, eax	; high (bb & -bb)
>		xor	esi, eax	; reset high
>		punpckldq mm0, mm2	; bb & -bb -> single bit in mm0
>
>		mov	[ebx],   edx	; write modified bb back
>		mov	[ebx+4], esi
>
>		pcmpeqd	mm1, mm0	; mask of the zero dword
>		PI2FD	mm0, mm0	; 3f8..,400..
>		pxor	mm1, mm6	; mask of the none zero dword
>		psrlq	mm6, 63		; 00:01
>		psrld	mm0, 23		; 3f8 to 7f
>		psrld	mm1, 25		; 7f mask
>		psllq	mm6, 32+5	; 20:00
>		psubd	mm0, mm1	; - 7f mask
>		por	mm0, mm6	; + 32 in high dword
>		pand	mm0, mm1	; & 7f mask
>		psadbw	mm0, mm7	; add all bytes
>		movd	eax, mm0
>	}
>}
>
>-----------------------------------------------------------------------------
>
>int bitSearchAndReset_btr(BitBoard &bb)
>{
>	__asm
>	{
>		mov	edx, [bb]
>		bsf	eax, [edx+4]
>		xor	eax, 32
>		bsf	eax, [edx]
>		btr	[edx],eax
>	}
>}
>
>-----------------------------------------------------------------------------
>
>int bitSearchAndReset_C(BitBoard &bb)
>{
>	BitBoard lsbb = bb & (-(__int64)bb);
>	bb ^= lsbb;
>	unsigned int lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
>	return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
>		    ^((lsb & 0xffff0000)!=0))<<1)
>		    ^((lsb & 0xff00ff00)!=0))<<1)
>		    ^((lsb & 0xf0f0f0f0)!=0))<<1)
>		    ^((lsb & 0xcccccccc)!=0))<<1)
>		    ^((lsb & 0xaaaaaaaa)!=0);
>}
==============================================================================

Hi, Gerd.

As long as you're test-comparing bit-search-and-reset codes, I wonder if you
could please consider also comparing my C code version, posted on CCC with the
subject "Another hacky method for bitboard bit extraction" on November 17.  I
repeat it below.  You can of course make those changes required for proper
compilation and comparison in your setup.  Thanks!

-- Walter

Code follows:
// ---------------------------------------------------------------------------

typedef unsigned long long  u64;    // nonstandard
typedef unsigned long       u32;
typedef unsigned char       u8;

extern const u8 LSB_64_table[154];              // bit number table
#define LSB_64_adj  -51                         // offset to table base
#define LSB_64_magic  ( (u32)0x01C5FC81 )       // magic constant

// ---------------------------------------------------------------------------
// LSB_64() -- find, remove, report least-significant bit of 64.
// Argument 'bb' must be non-null.  Method:  fold then table lookup.
// Written by Walter Faxon, June 2002.  No copyright.  No warranty.
//
inline                  // inline declaration may differ by compiler
u8 LSB_64( u64* bb )
    {
    u64 t64;
    u32 t32;
    t64 = *bb - 1;
    *bb &= t64;         // omit this line to retain current LSB
    t64 ^= *bb;
    t32 = (u32)t64 ^ (u32)(t64 >> 32);
    t32 ^= LSB_64_magic;
    t32 += t32 >> 16;
    t32 -= t32 >> 8;
    return LSB_64_table [LSB_64_adj + (u8)t32];
    }

// ---------------------------------------------------------------------------
// Table reports number of low-order bit as 0, high-order as 63.
// (Numbering can be reversed by changing this table.)
// Important:  arrange storage so that this table is kept in the cache.
const u8 LSB_64_table[154] =
    {
#define __  0
    23,__,__,__,31,__,__,39,19,__, 17,16,18,__,47,10,20, 9, 8,11,
     1, 0, 2,57,56,58, 3,12,__,59, __,__,21,__, 4,__,__,60,__,__,
    __,__,__,13,__,__,__,__,__,__,  5,__,__,61,__,__,__,__,__,__,
    __,__,__,__,22,__,__,__,30,__, __,38,__,__,__,14,__,__,46,__,
    __,__, 6,__,__,62,__,__,__,54, __,__,__,__,__,__,__,__,__,__,
    29,__,__,37,__,__,__,__,__,__, 45,__,__,__,__,__,28,__,__,36,
    __,53,__,__,27,__,44,35,26,24, 25,34,32,33,43,40,41,52,42,15,
    __,50,48,49,__,51, 7,__,__,63, __,__,__,55
#undef __
    };

//eof

P.S.  You can even avoid the table lookup if you're willing to deal with
scrambled square indecies in the range 0 to 153.

P.P.S.  I feel a little like the unlucky scientist whose results were lost
because they were only published _four_ times... :)

Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 12:50:15 12/03/02
- Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 20:30:50 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 05:20:35 12/05/02
    - Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 05:51:14 12/05/02
      - Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 05:56:46 12/05/02
  - Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 01:39:05 12/04/02
    - Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 14:13:02 12/04/02
      - Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 20:27:46 12/04/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 01:15:35 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 06:03:09 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 07:12:55 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 04:34:37 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 05:10:42 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dan Newman 05:27:54 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 08:25:27 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dieter Buerssner 09:48:02 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 12:41:02 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dieter Buerssner 13:06:06 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 12:44:53 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 12:39:40 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dieter Buerssner 13:07:53 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Miguel A. Ballicora 07:40:18 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dieter Buerssner 09:21:18 12/05/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Miguel A. Ballicora 10:07:53 12/06/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Dieter Buerssner 15:06:30 12/06/02
- Re: BitScan with reset - not so impressive with 3DNow! Jeremiah Penery 13:14:57 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Gian-Carlo Pascutto 01:39:10 12/04/02
    - Re: BitScan with reset - not so impressive with 3DNow! Jeremiah Penery 08:03:09 12/04/02
      - Re: BitScan with reset - not so impressive with 3DNow! Dezhi Zhao 13:54:05 12/04/02
  - Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 15:07:26 12/03/02
Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 17:27:39 12/02/02
- Possible small improvement to hacky method Walter Faxon 21:49:11 12/05/02
  - Re: Possible small improvement to hacky method Matt Taylor 12:57:13 12/10/02
    - Re: Possible small improvement to hacky method Gerd Isenberg 16:14:18 12/10/02
      - Re: Possible small improvement to hacky method Matt Taylor 18:27:15 12/10/02
  - Re: Possible small improvement to hacky method Frank Phillips 03:33:00 12/07/02
    - Re: Possible small improvement to hacky method Walter Faxon 11:03:04 12/07/02
      - Re: Possible small improvement to hacky method Frank Phillips 12:20:16 12/07/02
  - Re: Possible small improvement to hacky method Matt Taylor 02:33:42 12/06/02
    - Re: Possible small improvement to hacky method Walter Faxon 19:50:40 12/06/02
      - Re: Possible small improvement to hacky method Matt Taylor 11:14:32 12/07/02
        
        Re: Possible small improvement to hacky method Walter Faxon 20:40:38 12/07/02
        
        Re: Possible small improvement to hacky method Matt Taylor 02:39:16 12/08/02
        
        Re: Possible small improvement to hacky method Walter Faxon 22:59:51 12/08/02
        
        Re: Possible small improvement to hacky method Matt Taylor 00:07:49 12/09/02
Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 16:43:15 12/02/02
- Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 06:46:27 12/03/02
- Re: BitScan with reset - not so impressive with 3DNow! Gian-Carlo Pascutto 03:40:36 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 07:09:31 12/03/02
    - Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 12:15:27 12/03/02
      - Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 14:27:07 12/03/02
    - Re: BitScan with reset - not so impressive with 3DNow! Miguel A. Ballicora 10:35:54 12/03/02
      - Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 14:00:45 12/03/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Miguel A. Ballicora 09:00:03 12/04/02
        
        Oups, null as terminator is not so smart! Gerd Isenberg 15:19:22 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Russell Reagan 06:32:16 12/03/02
    - Re: BitScan with reset - not so impressive with 3DNow! Gian-Carlo Pascutto 08:47:54 12/03/02
    - Re: BitScan with reset - not so impressive with 3DNow! Gerd Isenberg 07:50:32 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 06:05:30 12/03/02
  - Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 04:00:37 12/03/02
    - Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 07:29:50 12/04/02
      - Re: BitScan with reset - not so impressive with 3DNow! Sune Fischer 07:39:34 12/04/02
        
        Re: BitScan with reset - not so impressive with 3DNow! Matt Taylor 09:03:07 12/04/02
- Re: BitScan with reset - not so impressive with 3DNow! Walter Faxon 18:01:05 12/02/02
  - Re: BitScan with reset - not so impressive with 3DNow! Alessandro Damiani 02:05:24 12/03/02

This page took 0.03 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.