Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Bitboards and Piece Lists

Author: Dann Corbit

Date: 02:03:59 06/15/01

Go up one level in this thread


This is the test driver I used to bench bit operations:

#include "amd3dx.h"

/* Athlon MMX optimized popcount function, based on AMD's Athlon
   optimization manuals */

unsigned DensePopulationCount (unsigned __int64 v)
{
 static const __int64 C55 = 0x5555555555555555;
 static const __int64 C33 = 0x3333333333333333;
 static const __int64 C0F = 0x0F0F0F0F0F0F0F0F;

 __asm {
  movd mm0, word ptr v;
  punpckldq mm0, word ptr v +  4;
  movq mm1,mm0;
  psrld mm0,1;
  pand mm0,[C55];
  psubd mm1,mm0;
  movq mm0,mm1;
  psrld mm1,2;
  pand mm0,[C33];
  pand mm1,[C33];
  paddd mm0,mm1;
  movq mm1,mm0;
  psrld mm0,4;
  paddd mm0,mm1;
  pand mm0,[C0F];
  pxor mm1,mm1;
  psadbw (mm0,mm1);
  movd eax,mm0;
  emms;
 }
}


/* From Crafty, this is for sparsely populated bitmaps */
unsigned SparsePopulationCount (unsigned __int64 a) {
  __asm {
        mov     ecx, dword ptr a
        xor     eax, eax
        test    ecx, ecx
        jz      l1
    l0: lea     edx, [ecx-1]
        inc     eax
        and     ecx, edx
        jnz     l0
    l1: mov     ecx, dword ptr a+4
        test    ecx, ecx
        jz      l3
    l2: lea     edx, [ecx-1]
        inc     eax
        and     ecx, edx
        jnz     l2
    l3:
  }
}

unsigned FirstBit(unsigned __int64 a) {
  __asm {
        bsr     edx, dword ptr a+4
        mov     eax, 31
        jnz     l1
        bsr     edx, dword ptr a
        mov     eax, 63
        jnz     l1
        mov     edx, -1
  l1:   sub     eax, edx
  }
}

unsigned LastBit(unsigned __int64 a) {
  __asm {
        bsf     edx, dword ptr a
        mov     eax, 63
        jnz     l1
        bsf     edx, dword ptr a+4
        mov     eax, 31
        jnz     l1
        mov     edx, -33
  l1:   sub     eax, edx
  }
}
unsigned int             first_piece[65536];
#define TwoFullRanks (65535)
static  unsigned __int64    FPMask1;
static  unsigned __int64    FPMask2;
int ifp()
{

	unsigned i, j;

FPMask1 = ((unsigned __int64 ) TwoFullRanks << 16);
FPMask2 = ((unsigned __int64 ) TwoFullRanks << 32);

    /* Lookup tables for the first- & last- piece functions */
    for (i = 0; i < 65536; i++) {
        /* First piece in a two rank chunk */
        for (j = 0; j < 16; j++) {
            if (i & (1 << j)) {
                first_piece[i] = j;
                break;
            }
        }
	}
}
unsigned             FirstPiece(unsigned __int64  B)
{
    if (B & TwoFullRanks)
        return first_piece[B & TwoFullRanks];
    if (B & FPMask1)
        return first_piece[(B >> 16) & TwoFullRanks] + 16;
    if (B & FPMask2)
        return first_piece[(B >> 32) & TwoFullRanks] + 32;
    return first_piece[B >> 48] + 48;
}

unsigned firstb(unsigned __int64 bits)
{
__asm {

;
; This very clever first bit routine (much too clever for me)
; is due to Terje Mathisen
;

        mov     ecx, DWORD PTR [bits+4]
	    mov     edx, 0
        mov     ebx, DWORD PTR [bits]
	    mov     eax, 0

; The result should be [0..63] if the input was non-zero, -1 otherwise

; Input in ECX:EBX
        bsf edx,ecx ; Bit index for second half
        bsf eax,ebx ; Bit index of first half

        add edx,32 ; Add 32 to the second half index
        sub ebx,1

        sbb ebx,ebx ; -1 if EBX (first half) was zero
        sub ecx,1

        sbb ecx,ecx ; -1 if ECX (second half) was zero
        and edx,ebx ; EDX will be zero unless low half was zero

        add eax,edx ; return (EAX != 0): EAX : EDX+32
        and ebx,ecx ; -1 if all 64 bits were zero

        or eax,ebx
      }
}

unsigned fb(unsigned __int64 x)
{
return (unsigned)(x & (-x));
}



unsigned firstbit(unsigned __int64 bits)
{
__asm {
;       64-bit number to move into ECX:EBX, will never be zero!
        mov ecx,dword ptr [bits+4]
        mov ebx,dword ptr [bits]
        bsf eax,ecx
        add eax,32
        bsf eax,ebx
      }
}
unsigned first(unsigned __int64 bits)
{
__asm {
        mov     ecx, DWORD PTR [bits+4]
    	mov     edx, 0
        mov     ebx, DWORD PTR [bits]
    	mov     eax, 0

; The result should be [0..63] if the input was non-zero, -1 otherwise

        bsf edx,ecx      ; Bit index for second half
        mov eax, -33     ; Need edx to be -1 if ecx was zero

        cmovz edx, eax   ; Now we are ready for an empty qword
        bsf eax,ebx      ; Bit index of first half

        lea edx,+32[edx] ; Add 32 to the second half index
        cmovz eax, edx   ; Predicated move and we are done
    }
}

/*
 * Find number of nonzero bits in a unsigned __int64.
 * Defined by Ernst A. Heinz, University of Karlsruhe.
 */
unsigned int CPopulationCount(unsigned __int64 b)
  {
    unsigned int n;

    for (n = 0; b != 0; n++, b &= (b-1));
        return n;
  }

/*
This one sometimes gives the WRONG ANSWER!!!
#define mm1 ((unsigned __int64) 0x5555555555555555)
#define mm2 ((unsigned __int64) 0x3333333333333333)

unsigned int NCPopulationCount(unsigned __int64 b)
{
    unsigned int n;
    const unsigned __int64 a = b - ((b >> 1) & mm1);
    const unsigned __int64 c = (a & mm2) + ((a >> 2) & mm2);
    n = ((unsigned int)c) + ((unsigned int)(c >> 32));
    n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F);
    n = (n & 0xFFFF) + (n >> 16);
    n = (n & 0xFF) + (n >> 8);
    return n;
}

*/
/*
;  population count of a 32 bit register
;
;  input:
;    esi = number to population count
;
;  output:
;    eax = population count
;
;  destroys:
;   esi, eax
;   eflags
*/
unsigned popcount32fm(unsigned int input)
{
__asm {
        mov     esi, input
        mov     eax,esi
        shr     eax,1
        and     eax,055555555h           ;  (n >> 1) & 0x55555555
        sub     esi,eax                  ;  n - ((n >> 1) & 0x55555555)
        mov     eax,esi
        shr     eax,2                    ;  n >> 2
        and     esi,033333333h           ;  n & 0x33333333
        and     eax,033333333h           ;  (n >> 2)  & 0x33333333
        add     esi,eax                  ;  n = (n & 0x33333333) + ((n >> 2) &
0x33333333)
        mov     eax,esi
        shr     eax,4                    ;  n >> 4
        add     eax,esi                  ;  n + (n >> 4)
        and     eax,00F0F0F0Fh           ;  n = (n + (n >> 4) & 0x0F0F0F0F)
        imul    eax,001010101h           ;  add by multiplying with a "magic
number"
        shr     eax,24                   ;  shift result into place
      }
}

unsigned popcount32(unsigned int input)
{
__asm {
        mov     esi, input
        mov     eax,esi
        shr     eax,1
        and     eax,055555555h           ;  (n >> 1) & 0x55555555
        sub     esi,eax                  ;  n - ((n >> 1) & 0x55555555)
        mov     eax,esi
        shr     eax,2                    ;  n >> 2
        and     esi,033333333h           ;  n & 0x33333333
        and     eax,033333333h           ;  (n >> 2)  & 0x33333333
        add     esi,eax                  ;  n = (n & 0x33333333) + ((n >> 2) &
0x33333333)
        mov     eax,esi
        shr     eax,4                    ;  n >> 4
        add     eax,esi                  ;  n + (n >> 4)
        and     eax,00F0F0F0Fh           ;  n = (n + (n >> 4) & 0x0F0F0F0F)
        mov     esi,eax
        shr     esi,8                    ;  n >> 8
        add     eax,esi                  ;  n = n + (n >> 8)
        mov     esi,eax
        shr     esi,16                   ;  n >> 16
        add     eax,esi                  ;  n = n + (n >> 16)
        and     eax,00000003Fh           ;  popcount
      }
}

typedef union {
    unsigned __int64 i64;
    unsigned        i32[2];
}               bb;

unsigned        pc(bb b)
{
    return (popcount32fm(b.i32[0]) + popcount32fm(b.i32[1]));
}

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>

int             rand_range(int N)
{
    return (int) ((double) rand() / ((double) RAND_MAX + 1) * N);
}

char            string[32767];

#define BITMASK(b) (1 << ((b) % CHAR_BIT))
#define BITSLOT(b) ((b) / CHAR_BIT)
#define BITSET(a, b) ((a)[BITSLOT(b)] |= BITMASK(b))

int             main(void)
{
    int             i, j;
    int             mbc;
    int             ntr;
    int             c1,
                    c2,
                    c3,
                    c4,
                    c5,
                    c6,
                    c7;
    int             a1=1,
                    a2=1,
                    a3=1,
                    a4=1,
                    a5=1,
                    a6=1,
                    a7=1;

    bb              a;
    unsigned char  *bar = (unsigned char *) &a.i64;
    ifp();
    puts("Please enter the maximum number of bits:");
    if (fgets(string, sizeof string, stdin))
        mbc = atoi(string);
    mbc = max(mbc, 1);
    mbc = min(mbc, 32);
    puts("Please enter the maximum number of trials:");
    if (fgets(string, sizeof string, stdin))
        ntr = atoi(string);


    for (i = 0; i < ntr; i++) {
        a.i64=0;
        for (j = 0; j < mbc; j++)
            BITSET(bar, rand_range(63));

#ifdef TEST_BIT_COUNT
        c1 = DensePopulationCount(a.i64);
        c2 = SparsePopulationCount(a.i64);
        c3 = CPopulationCount(a.i64);
//        c4 = NCPopulationCount(a.i64);
        c5 = popcount32fm(a.i32[0]) + popcount32fm(a.i32[1]);
        c6 = popcount32(a.i32[0]) + popcount32(a.i32[1]);
//        c7 = pc(a);
        if (c1 != c2)
            printf("c1=%u, c2 = %u\n", c1, c2);
        if (c1 != c3)
            printf("c1=%u, c3 = %u\n", c1, c3);
//        if (c1 != c4)
//            printf("c1=%u, c4 = %u\n", c1, c4);
        if (c1 != c5)
            printf("c1=%u, c5 = %u\n", c1, c5);
        if (c1 != c6)
            printf("c1=%u, c6 = %u\n", c1, c6);
//        if (c1 != c7)
//            printf("c1=%u, c7 = %u\n", c1, c7);
#else
c3 = FirstPiece(a.i64);
c4 = firstb(a.i64);
c6 = firstbit(a.i64);
c7 = first(a.i64);
a3 &= c3 == c3;
a4 &= c4 == c3;
a6 &= c6 == c3;
a7 &= c7 == c3;

#ifdef _DEBUG
printf("n=%x %x,  c3 = %d, c4 = %d, c6 = %d, c7 = %d\n",
	   a.i32[0], a.i32[1], c3, c4, c6, c7);
#endif
#endif
    }
if (a3) puts("c3 agrees with c3");
if (a4) puts("c4 agrees with c3");
if (a6) puts("c6 agrees with c3");
if (a7) puts("c7 agrees with c3");


    return 0;
}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.