Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Preliminary 14% Increase in Speed!

Author: Walter Faxon

Date: 21:29:42 12/23/02

Go up one level in this thread


On December 23, 2002 at 15:10:29, Matt Taylor wrote:

<snip>

>>Since the code only finds the LSB for a 32-bit integer, I converted it to an LSB
>>64 routine by adding a front stage. My initial estimate is that this routine
>>will suffer a little from the table-based rotuine you posted previously.
>>
>>I've got a bunch of stuff to do today (including picking up my fixed car and
>>fixing someone's computer), but the page should be updated with the new routines
>>and scores tonight.
>>
>>-Matt
>
>My hand-optimized integer version is 14% faster over the BEST routine thus far!
>I'm going to give VC 7 a shot now. VC 7 usually does a little better than I can
>do by hand.
>
>-Matt


Yikes!

Here is the original 64-bit source posted by Gerd on CCC (maybe also posted
earlier; I don't know if the generic C algorithm is original to him).  Though
this version doesn't remove the bit (he later posted a version which does), have
we all been wasting a year or more trying to improve it??

--------------------
Subject : Re: Bitboards with BSF/BSR
Posted by : Gerd Isenberg on December 12, 2001 at 05:37:21

I use bsf in the way that assumes that undefined means unchanged here, simply by
empirical evidence. So long as i play on my own hardware it's OK for me. Has
anybody found that this doesn't work on other x86-CPUs?

Gerd


// precondition: bb not null
__forceinline unsigned int BitSearch(BitBoard bb)
{
#ifdef    _M_IX86
__asm
{
    bsf        eax,[bb+4]
    add        al, 32
    bsf        eax,[bb]
}
#else
BitBoard lsbb = bb & (-(__int64)bb);
UINT32 lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
            +((lsb & 0xffff0000)!=0))<<1)
            +((lsb & 0xff00ff00)!=0))<<1)
            +((lsb & 0xf0f0f0f0)!=0))<<1)
            +((lsb & 0xcccccccc)!=0))<<1)
            +((lsb & 0xaaaaaaaa)!=0);
#endif
}

--------------------
Later he posted a version with bit removal (with other code):
--------------------
Subject: Re: LSB and POPCOUNT optimization
Posted by Gerd Isenberg on June 18, 2002 at 13:14:28:

On June 18, 2002 at 12:15:09, Adriano Bedeschi de Souza wrote:

>Could someone help me with LSB (MSB) and POPCOUNT optimization?
>
>thx,
>Bedeschi

some sample source code for msc6 with sone inline asm:

PopCount / BitCount :

#ifdef  _M_IX86
__forceinline int _fastcall BitCount (BitBoard bb)
{
        __asm
        {
                mov     ecx, dword ptr bb
                xor     eax, eax
                test    ecx, ecx
                jz      l1
        l0: lea     edx, [ecx-1]
                inc     eax
                and     ecx, edx
                jnz     l0
        l1: mov     ecx, dword ptr bb+4
                test    ecx, ecx
                jz      l3
        l2: lea     edx, [ecx-1]
                inc     eax
                and     ecx, edx
                jnz     l2
        l3:
        }
}
#else
__forceinline int _fastcall BitCount (BitBoard bb)
{
        static const UINT32 m1 = 0x55555555;
        static const UINT32 m2 = 0x33333333;
        static const UINT32 m3 = 0x0f0f0f0f;
        UINT32 l = LOWBOARD(bb);
        UINT32 h = HIGHBOARD(bb);
        l = l - ((l>>1) & m1);
        h = h - ((h>>1) & m1);
        l = (l & m2) + ((l>>2) & m2);
        h = (h & m2) + ((h>>2) & m2);
        l = (l & m3) + ((l>>4) & m3);
        h = (h & m3) + ((h>>4) & m3);
        l = (l & 0x0ffff) + (l>>16);
        h = (h & 0x0ffff) + (h>>16);
        return ((l & 0x0ff) + (l>>8) + (h & 0x0ff) + (h>>8));
}
#endif

LSB / BitSearch / BitScan:

// precondition: bb not null
__forceinline unsigned int BitSearch(BitBoard bb)
{
#ifdef  _M_IX86
        __asm
        {
                bsf             eax,[bb+4]
                xor             eax, 32
                bsf             eax,[bb]
        }
#else
        BitBoard lsbb = bb & (-(__int64)bb);
        UINT32 lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
        return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
                                +((lsb & 0xffff0000)!=0))<<1)
                                +((lsb & 0xff00ff00)!=0))<<1)
                                +((lsb & 0xf0f0f0f0)!=0))<<1)
                                +((lsb & 0xcccccccc)!=0))<<1)
                                +((lsb & 0xaaaaaaaa)!=0);
#endif
}

LSB with reset found bit:

// precondition: bb not null
__forceinline unsigned int BitSearchAndReset(BitBoard &bb)
{
#ifdef  _M_IX86
        __asm
        {
                mov             edx, [bb]
                bsf             eax, [edx+4]
                xor             eax, 32
                bsf             eax, [edx]
                btr             [edx],eax
        }
#else
        BitBoard lsbb = bb & (-(__int64)bb);
        bb ^= lsbb;
        UINT32 lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
        return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
                                +((lsb & 0xffff0000)!=0))<<1)
                                +((lsb & 0xff00ff00)!=0))<<1)
                                +((lsb & 0xf0f0f0f0)!=0))<<1)
                                +((lsb & 0xcccccccc)!=0))<<1)
                                +((lsb & 0xaaaaaaaa)!=0);
#endif
}

to traverse BitBoards:

   while (bb)
   {
      unsigned int sq = BitSearchAndReset(bb);
      ....
   }

// msb in with inline asm

#ifdef  _M_IX86
__forceinline unsigned int BitSearchReverse(BitBoard bb)
{
        __asm
        {
                bsr             eax,[bb]
                bsr             eax,[bb+4]
                setnz   dl
                shl             dl, 5
                add             al, dl
        }
}
#endif

regards,
Gerd
--------------------

(Walter again:  <Heavy sigh>)

I'm going to be away for most of this week.  A good chance to clear my head...

-- Walter



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.