Author: Gerd Isenberg
Date: 06:50:34 10/08/03
Hi all bitboard freaks,
after playing a bit with SSE2 integer intrinsics, i got rid of the syntax and
wrote a wrapper class around the __m128i intrinsic type with some constructors
and operators - and a small testframe. The 128-bit Kogge Stone routines are
readable now and seem quite well optimized in release mode.
Comments and suggestions welcome.
Cheers,
Gerd
//====================================================================
#include <stdio.h>
#include <emmintrin.h>
typedef unsigned __int64 BitBoard;
//====CDblBB==========================================================
class CDblBB
{
public:
CDblBB(){}
CDblBB(const BitBoard &low, const BitBoard &high) {
dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;}
CDblBB(__m128i a){dbl = a;}
CDblBB(const CDblBB& a){dbl = a.dbl;}
CDblBB(const __m128i* ptr){dbl = _mm_load_si128(ptr);}
CDblBB(const CDblBB* ptr){dbl = _mm_load_si128(&ptr->dbl);}
BitBoard getLowBoard() const {return dbl.m128i_u64[0];}
BitBoard getHighBoard() const {return dbl.m128i_u64[1];}
void store(__m128i* ptr){_mm_store_si128 (ptr, dbl);}
void store(CDblBB* ptr) {_mm_store_si128 (&ptr->dbl, dbl);}
CDblBB& operator>>=(int shift) {
dbl = _mm_srli_epi64(dbl, shift); return *this;}
CDblBB& operator<<=(int shift) {
dbl = _mm_slli_epi64(dbl, shift); return *this;}
CDblBB& operator&=(const CDblBB &dbb) {
dbl = _mm_and_si128(dbl, dbb.dbl); return *this;}
CDblBB& operator|=(const CDblBB &dbb) {
dbl = _mm_or_si128(dbl, dbb.dbl); return *this;}
CDblBB& operator^=(const CDblBB &dbb) {
dbl = _mm_xor_si128(dbl, dbb.dbl); return *this;}
CDblBB& operator+=(const CDblBB &dbb) {
dbl = _mm_add_epi8(dbl, dbb.dbl); return *this;}
CDblBB& operator-=(const CDblBB &dbb) {
dbl = _mm_sub_epi8(dbl, dbb.dbl); return *this;}
CDblBB notA() const {
return CDblBB(_mm_and_si128(dbl, snotA));}
CDblBB notH() const {
return CDblBB(_mm_and_si128(dbl, snotH));}
CDblBB andNotA() const {
return CDblBB(_mm_andnot_si128 (dbl, snotA));}
CDblBB andNotH() const {
return CDblBB(_mm_andnot_si128 (dbl, snotH));}
friend CDblBB operator>>(const CDblBB &dbb, int shift) {
return CDblBB(_mm_srli_epi64(dbb.dbl, shift));}
friend CDblBB operator<<(const CDblBB &dbb, int shift) {
return CDblBB(_mm_slli_epi64(dbb.dbl, shift));}
friend CDblBB operator&(const CDblBB &a, const CDblBB &b) {
return CDblBB(_mm_and_si128(a.dbl, b.dbl));}
friend CDblBB operator|(const CDblBB &a, const CDblBB &b) {
return CDblBB(_mm_or_si128(a.dbl, b.dbl));}
friend CDblBB operator^(const CDblBB &a, const CDblBB &b) {
return CDblBB(_mm_xor_si128(a.dbl, b.dbl));}
friend CDblBB operator+(const CDblBB &a, const CDblBB &b) {
return CDblBB(_mm_add_epi8(a.dbl, b.dbl));}
friend CDblBB operator-(const CDblBB &a, const CDblBB &b) {
return CDblBB(_mm_sub_epi8(a.dbl, b.dbl));}
friend CDblBB operator~(const CDblBB &a) {
return CDblBB(_mm_xor_si128(a.dbl, minusOne));}
// some output routines
void printBoards()
{
for ( int i = 7; i >= 0; --i)
printf("%s %s\n", rankStr[dbl.m128i_u8[i]],
rankStr[dbl.m128i_u8[i+8]]);
printf("\n");
}
static void initRankStr() // rankStr used by printBoards
{
for (int i = 0; i < 256; ++i)
{
for (int j = 0; j < 8; ++j)
rankStr[i][j] = binDigit[(i & (1<<j)) != 0];
rankStr[i][j] = '\0';
}
}
protected:
__m128i dbl; // this intrinsic type is wrapped here
protected:
static const __m128i snotA;
static const __m128i snotH;
static const __m128i minusOne;
protected:
static const char binDigit[2];
static char rankStr[256][10];
};
char CDblBB::rankStr[256][10];
const char CDblBB::binDigit[2] = { '°','\333'}; // { '0','1'};
const __m128i CDblBB::snotA =
{
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
};
const __m128i CDblBB::snotH =
{
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
};
const __m128i CDblBB::minusOne =
{
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
};
//====CDblBB==========================================================
// some test frame
struct sSource
{
sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {}
CDblBB rooks;
CDblBB occup;
};
struct sTarget
{
CDblBB left;
CDblBB right;
CDblBB up;
CDblBB down;
};
void rightAttacks(sTarget* pTarget, const sSource* pSource)
{
#if 1
CDblBB rook(&pSource->rooks);
CDblBB occu(&pSource->occup);
(occu ^ (occu - rook - rook)).store(&pTarget->right);
#else
CDblBB g(&pSource->rooks);
CDblBB p(pSource->occup.andNotA());
g |= p & (g<<1);
p &= p<<1;
g |= p & (g<<2);
p &= p<<2;
g |= p & (g<<4);
(g<<1).notA().store(&pTarget->right);
#endif
}
void leftAttacks(sTarget* pTarget, const sSource* pSource)
{
CDblBB g(&pSource->rooks);
CDblBB p(pSource->occup.andNotH());
g |= p & (g>>1);
p &= p>>1;
g |= p & (g>>2);
p &= p>>2;
g |= p & (g>>4);
(g>>1).notH().store(&pTarget->left);
}
void upAttacks(sTarget* pTarget, const sSource* pSource)
{
CDblBB g(&pSource->rooks);
CDblBB p(~pSource->occup);
g |= p & (g<<8);
p &= p<<8;
g |= p & (g<<16);
p &= p<<16;
g |= p & (g<<32);
(g<<8).store(&pTarget->up);
}
void downAttacks(sTarget* pTarget, const sSource* pSource)
{
CDblBB g(&pSource->rooks);
CDblBB p(~pSource->occup);
g |= p & (g>>8);
p &= p>>8;
g |= p & (g>>16);
p &= p>>16;
g |= p & (g>>32);
(g>>8).store(&pTarget->down);
}
int main(int argc, char* argv[])
{
CDblBB::initRankStr();
// some sample rooks e.g. white/black
sSource source(0x8040201008040211, 0x0102040810204088);
sTarget target;
source.rooks.printBoards();
source.occup.printBoards();
rightAttacks(&target, &source);
leftAttacks(&target, &source);
upAttacks(&target, &source);
downAttacks(&target, &source);
target.right.printBoards();
target.left.printBoards();
target.up.printBoards();
target.down.printBoards();
getchar();
return 0;
}
This page took 0.01 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.