Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: PopCnt and Leadz

Author: David Rasmussen

Date: 05:53:34 12/03/00

Go up one level in this thread


On December 02, 2000 at 11:09:03, Vincent Diepeveen wrote:

>On December 02, 2000 at 07:23:16, David Rasmussen wrote:
>
>>Hi there.
>>
>>Someone, please post inline assembler functions for me to just cut and paste
>>into my existing files for my chess program, for these functions, in VC++6.
>>
>>I don't bother to write them myself, I don't even bother to take the crafty
>>stuff and put that in, in case I have to change anything. I'm just curious about
>>the speed gain.
>>
>>P.S. My bitboard implementation has first bit (0) at A8 and last bit (63) at H1.
>>
>>Thanks in advance.
>
>Sorry to dissappoint you but PopCnt and Leadz are pretty slow
>in their current crafty implementations, basically because
>bitboards are for 64 bits machines and intel is 32 bits for a long
>period of time still.
>
> vcinline.h:
>
>extern unsigned char  first_ones[65536];
>extern unsigned char  last_ones[65536];
>
>#if _MSC_VER >= 1200
>#define FORCEINLINE __forceinline
>#else
>#define FORCEINLINE __inline
>#endif
>
>
>FORCEINLINE int PopCnt(BITBOARD a) {
>
>/* Because Crafty bitboards are typically sparsely populated, we use a
>   streamlined version of the boolean.c algorithm instead of the one in x86.s */
>
>  __asm {
>        mov     ecx, dword ptr a
>        xor     eax, eax
>        test    ecx, ecx
>        jz      l1
>    l0: lea     edx, [ecx-1]
>        inc     eax
>        and     ecx, edx
>        jnz     l0
>    l1: mov     ecx, dword ptr a+4
>        test    ecx, ecx
>        jz      l3
>    l2: lea     edx, [ecx-1]
>        inc     eax
>        and     ecx, edx
>        jnz     l2
>    l3:
>  }
>}
>
>
>FORCEINLINE int FirstOne(BITBOARD a) {
>
>#if _M_IX86 <= 500 /* on plain Pentiums, use boolean.c algorithm */
>  __asm {
>        movzx   edx, word ptr a+6
>        xor     eax, eax
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a+4
>        mov     eax, 16
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a+2
>        mov     eax, 32
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a
>        mov     eax, 48
>  l1:   add     al, byte ptr first_ones[edx]
>  }
>#else /* BSF and BSR are *fast* instructions on PPro/PII */
>  __asm {
>        bsr     edx, dword ptr a+4
>        mov     eax, 31
>        jnz     l1
>        bsr     edx, dword ptr a
>        mov     eax, 63
>        jnz     l1
>        mov     edx, -1
>  l1:   sub     eax, edx
>  }
>#endif /* _M_IX86 > 500 */
>}
>
>FORCEINLINE int LastOne(BITBOARD a) {
>
>#if _M_IX86 <= 500 /* on plain Pentiums, use boolean.c algorithm */
>  __asm {
>        movzx   edx, word ptr a
>        mov     eax, 48
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a+2
>        mov     eax, 32
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a+4
>        mov     eax, 16
>        test    edx, edx
>        jnz     l1
>        mov     dx, word ptr a+6
>        xor     eax, eax
>        test    edx, edx
>        jnz     l1
>        mov     eax, 48
>l1:     add     al, byte ptr last_ones[edx]
>  }
>#else /* BSF and BSR are *fast* instructions on PPro/PII */
>  __asm {
>        bsf     edx, dword ptr a
>        mov     eax, 63
>        jnz     l1
>        bsf     edx, dword ptr a+4
>        mov     eax, 31
>        jnz     l1
>        mov     edx, -33
>  l1:   sub     eax, edx
>  }
>#endif /* _M_IX386 > 500 */
>}

Not to be unthankful, but I believe that that implementation assumes A1=0, H8=63



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.