Author: Gerd Isenberg
Date: 01:15:21 03/31/04
Go up one level in this thread
Looks fine ;-)
Curious about call __chkstk in isDeBruijnN, but not in the recursive function
genDeBruijn. Yes, isDeBruijnN has a local 4KByte array on the frame, and has to
clear it too, so under runtime considerations call __chkstk doesn't matter much.
Instead of call memset i would prefere an inlined intrinsic of that e.g. with a
8-byte aligned bool array and REP STOSQ with rcx=4096/8. I guess there are some
additional compiler flags...
If you have some additional time, it would be nice to see the assembly of a
kogge-stone filler with a bit more register pressure:
Thanks again,
Gerd
#include <stdio.h>
#include <emmintrin.h>
typedef unsigned __int64 BitBoard;
class BBPair
{
friend class XMM;
friend class GPR;
public:
__forceinline BBPair(){}
__forceinline BBPair(const BitBoard &low, const BitBoard &high) {
sgl[0] = low; sgl[1] = high;}
// some output routines
void printBoards()
{
for ( int i = 7; i >= 0; --i)
printf("%s %s\n", rankStr[dbl.m128i_u8[i]],rankStr[dbl.m128i_u8[i+8]]);
printf("\n");
}
static void initRankStr() // used by printBoards
{
for (int i = 0; i < 256; ++i)
{
for (int j = 0; j < 8; ++j)
rankStr[i][j] = binDigit[(i & (1<<j)) != 0];
rankStr[i][j] = '\0';
}
}
protected:
union
{
__m128i dbl; // this intrinsice type is wrapped here
BitBoard sgl[2];
};
protected:
static const char binDigit[2];
static char rankStr[256][10];
};
char BBPair::rankStr[256][10];
const char BBPair::binDigit[2] = { '°','\333'}; // { '0','1'};
class XMM : public BBPair
{
public:
__forceinline XMM(){}
__forceinline XMM(const BitBoard &low, const BitBoard &high) {
dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;}
__forceinline XMM(const BitBoard &low) {dbl.m128i_u64[0] = low;}
__forceinline XMM(__m128i a){dbl = a;}
__forceinline XMM(const BBPair& a){dbl = a.dbl;}
__forceinline XMM(const __m128i* ptr){dbl = _mm_load_si128(ptr);}
__forceinline XMM(const BBPair* ptr){dbl = _mm_load_si128(&ptr->dbl);}
__forceinline BitBoard getLowBoard() const {return dbl.m128i_u64[0];}
__forceinline BitBoard getHighBoard() const {return dbl.m128i_u64[1];}
__forceinline void store(__m128i* ptr) {_mm_store_si128 (ptr, dbl);}
__forceinline void store(BBPair* ptr) {_mm_store_si128 (&ptr->dbl, dbl);}
__forceinline XMM copyWord0() {return XMM(_mm_shuffle_epi32 (dbl, 0));}
__forceinline XMM copyWord1() {return XMM(_mm_shuffle_epi32 (dbl, 0x55));}
__forceinline int getInt16() {return _mm_extract_epi16(dbl,0);}
__forceinline XMM& compare32(const XMM &dbb) {dbl = _mm_cmpeq_epi32(dbl,
dbb.dbl); return *this;}
__forceinline XMM& addAbsByteDiff(const XMM &dbb) {dbl = _mm_sad_epu8(dbl,
dbb.dbl); return *this;}
__forceinline XMM& addWords(const XMM &dbb) {dbl =
_mm_add_epi32(_mm_shuffle_epi32(dbb.dbl,0x56), dbl); return *this;}
__forceinline XMM& operator>>=(int shift) { dbl = _mm_srli_epi64(dbl, shift);
return *this;}
__forceinline XMM& operator<<=(int shift) {dbl = _mm_slli_epi64(dbl, shift);
return *this;}
__forceinline XMM& operator&=(const XMM &dbb) {dbl = _mm_and_si128(dbl,
dbb.dbl); return *this;}
__forceinline XMM& operator|=(const XMM &dbb) {dbl = _mm_or_si128(dbl,
dbb.dbl); return *this;}
__forceinline XMM& operator^=(const XMM &dbb) {dbl = _mm_xor_si128(dbl,
dbb.dbl); return *this;}
__forceinline XMM& operator+=(const XMM &dbb) {dbl = _mm_add_epi8(dbl,
dbb.dbl); return *this;}
__forceinline XMM& operator-=(const XMM &dbb) {dbl = _mm_sub_epi8(dbl,
dbb.dbl); return *this;}
__forceinline XMM notA() const {return XMM(_mm_and_si128(dbl, snotA));}
__forceinline XMM notH() const {return XMM(_mm_and_si128(dbl, snotH));}
__forceinline friend XMM operator>>(const XMM &dbb, int shift) {return
XMM(_mm_srli_epi64(dbb.dbl, shift));}
__forceinline friend XMM operator<<(const XMM &dbb, int shift) {return
XMM(_mm_slli_epi64(dbb.dbl, shift));}
__forceinline friend XMM operator&(const XMM &a, const XMM &b) {return
XMM(_mm_and_si128(a.dbl, b.dbl));}
__forceinline friend XMM operator|(const XMM &a, const XMM &b) {return
XMM(_mm_or_si128(a.dbl, b.dbl));}
__forceinline friend XMM operator^(const XMM &a, const XMM &b) {return
XMM(_mm_xor_si128(a.dbl, b.dbl));}
__forceinline friend XMM operator+(const XMM &a, const XMM &b) {return
XMM(_mm_add_epi8(a.dbl, b.dbl));}
__forceinline friend XMM operator-(const XMM &a, const XMM &b) {return
XMM(_mm_sub_epi8(a.dbl, b.dbl));}
__forceinline friend XMM operator~(const XMM &a) {return
XMM(_mm_xor_si128(a.dbl, minusOne));}
protected:
static const __m128i snotA;
static const __m128i snotH;
static const __m128i minusOne;
};
const __m128i XMM::snotA =
{
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
'\376', '\376', '\376', '\376',
};
const __m128i XMM::snotH =
{
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
'\177', '\177', '\177', '\177',
};
const __m128i XMM::minusOne =
{
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
'\377', '\377', '\377', '\377',
};
class GPR : public BBPair
{
public:
__forceinline GPR(){}
__forceinline GPR(const BitBoard &low, const BitBoard &high) {sgl[0] = low;
sgl[1] = high;}
__forceinline GPR(const BBPair& a) {sgl[0] = a.sgl[0]; sgl[1] = a.sgl[1];}
__forceinline GPR(const BBPair* ptr){sgl[0] = ptr->sgl[0]; sgl[1] =
ptr->sgl[1];}
__forceinline BitBoard getLowBoard() const {return sgl[0];}
__forceinline BitBoard getHighBoard() const {return sgl[1];}
__forceinline void store(BBPair* ptr) {ptr->sgl[0] = sgl[0]; ptr->sgl[1] =
sgl[1];}
__forceinline GPR& operator>>=(int shift) { sgl[0]>>=shift; sgl[1]>>=shift;
return *this;}
__forceinline GPR& operator<<=(int shift) { sgl[0]<<=shift; sgl[1]<<=shift;
return *this;}
__forceinline GPR& operator&=(const GPR &dbb) {sgl[0]&=dbb.sgl[0];
sgl[1]&=dbb.sgl[1]; return *this;}
__forceinline GPR& operator|=(const GPR &dbb) {sgl[0]|=dbb.sgl[0];
sgl[1]|=dbb.sgl[1]; return *this;}
__forceinline GPR& operator^=(const GPR &dbb) {sgl[0]^=dbb.sgl[0];
sgl[1]^=dbb.sgl[1]; return *this;}
__forceinline GPR notA() const {
return GPR(sgl[0]&0xfefefefefefefefe, sgl[1]&0xfefefefefefefefe);}
__forceinline GPR notH() const {
return GPR(sgl[0]&0x7f7f7f7f7f7f7f7f, sgl[1]&0x7f7f7f7f7f7f7f7f);}
__forceinline friend GPR operator>>(const GPR &dbb, int shift) {
return GPR(dbb.sgl[0]>>shift, dbb.sgl[1]>>shift);}
__forceinline friend GPR operator<<(const GPR &dbb, int shift) {
return GPR(dbb.sgl[0]<<shift, dbb.sgl[1]<<shift);}
__forceinline friend GPR operator&(const GPR &a, const GPR &b) {
return GPR(a.sgl[0]&b.sgl[0], a.sgl[1]&b.sgl[1]);}
__forceinline friend GPR operator|(const GPR &a, const GPR &b) {
return GPR(a.sgl[0]|b.sgl[0], a.sgl[1]|b.sgl[1]);}
__forceinline friend GPR operator^(const GPR &a, const GPR &b) {
return GPR(a.sgl[0]^b.sgl[0], a.sgl[1]^b.sgl[1]);}
__forceinline friend GPR operator-(const GPR &a, const GPR &b)
{
GPR c;
for (int i=0; i < 16; i++)
c.dbl.m128i_u8[i] = a.dbl.m128i_u8[i] - b.dbl.m128i_u8[i];
return c;
}
__forceinline friend GPR operator~(const GPR &a) {
return GPR(~a.sgl[0], ~a.sgl[1]);}
};
struct sSource
{
sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {}
BBPair rooks;
BBPair occup;
};
struct sTarget
{
BBPair up;
BBPair down;
};
template <class T> void updownAttacks(sTarget* pTarget, const sSource* pSource)
{
T gu(&pSource->rooks);
T pu(&pSource->occup);
pu = ~pu; // empty
T gd(gu);
T pd(pu);
gu |= pu & (gu<<8);
gd |= pd & (gd>>8);
pu &= pu<<8;
pd &= pd>>8;
gu |= pu & (gu<<16);
gd |= pd & (gd>>16);
pu &= pu<<16;
pd &= pd>>16;
gu |= pu & (gu<<32);
gd |= pd & (gd>>32);
(gu<<8).store(&pTarget->up);
(gd>>8).store(&pTarget->down);
}
int main(int argc, char* argv[])
{
BBPair::initRankStr();
sSource source1(0x0000200000000000, 0x0102040810204088);
sSource source2(0x0000004000000000, 0x0102040810204088);
sTarget target1;
sTarget target2;
updownAttacks<GPR>(&target1, &source1);
updownAttacks<XMM>(&target2, &source2);
target1.down.printBoards();
target1.up.printBoards();
target2.down.printBoards();
target2.up.printBoards();
return 0;
}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.