Author: Gerd Isenberg
Date: 01:15:21 03/31/04
Go up one level in this thread
Looks fine ;-) Curious about call __chkstk in isDeBruijnN, but not in the recursive function genDeBruijn. Yes, isDeBruijnN has a local 4KByte array on the frame, and has to clear it too, so under runtime considerations call __chkstk doesn't matter much. Instead of call memset i would prefere an inlined intrinsic of that e.g. with a 8-byte aligned bool array and REP STOSQ with rcx=4096/8. I guess there are some additional compiler flags... If you have some additional time, it would be nice to see the assembly of a kogge-stone filler with a bit more register pressure: Thanks again, Gerd #include <stdio.h> #include <emmintrin.h> typedef unsigned __int64 BitBoard; class BBPair { friend class XMM; friend class GPR; public: __forceinline BBPair(){} __forceinline BBPair(const BitBoard &low, const BitBoard &high) { sgl[0] = low; sgl[1] = high;} // some output routines void printBoards() { for ( int i = 7; i >= 0; --i) printf("%s %s\n", rankStr[dbl.m128i_u8[i]],rankStr[dbl.m128i_u8[i+8]]); printf("\n"); } static void initRankStr() // used by printBoards { for (int i = 0; i < 256; ++i) { for (int j = 0; j < 8; ++j) rankStr[i][j] = binDigit[(i & (1<<j)) != 0]; rankStr[i][j] = '\0'; } } protected: union { __m128i dbl; // this intrinsice type is wrapped here BitBoard sgl[2]; }; protected: static const char binDigit[2]; static char rankStr[256][10]; }; char BBPair::rankStr[256][10]; const char BBPair::binDigit[2] = { '°','\333'}; // { '0','1'}; class XMM : public BBPair { public: __forceinline XMM(){} __forceinline XMM(const BitBoard &low, const BitBoard &high) { dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;} __forceinline XMM(const BitBoard &low) {dbl.m128i_u64[0] = low;} __forceinline XMM(__m128i a){dbl = a;} __forceinline XMM(const BBPair& a){dbl = a.dbl;} __forceinline XMM(const __m128i* ptr){dbl = _mm_load_si128(ptr);} __forceinline XMM(const BBPair* ptr){dbl = _mm_load_si128(&ptr->dbl);} __forceinline BitBoard getLowBoard() const {return dbl.m128i_u64[0];} __forceinline BitBoard getHighBoard() const {return dbl.m128i_u64[1];} __forceinline void store(__m128i* ptr) {_mm_store_si128 (ptr, dbl);} __forceinline void store(BBPair* ptr) {_mm_store_si128 (&ptr->dbl, dbl);} __forceinline XMM copyWord0() {return XMM(_mm_shuffle_epi32 (dbl, 0));} __forceinline XMM copyWord1() {return XMM(_mm_shuffle_epi32 (dbl, 0x55));} __forceinline int getInt16() {return _mm_extract_epi16(dbl,0);} __forceinline XMM& compare32(const XMM &dbb) {dbl = _mm_cmpeq_epi32(dbl, dbb.dbl); return *this;} __forceinline XMM& addAbsByteDiff(const XMM &dbb) {dbl = _mm_sad_epu8(dbl, dbb.dbl); return *this;} __forceinline XMM& addWords(const XMM &dbb) {dbl = _mm_add_epi32(_mm_shuffle_epi32(dbb.dbl,0x56), dbl); return *this;} __forceinline XMM& operator>>=(int shift) { dbl = _mm_srli_epi64(dbl, shift); return *this;} __forceinline XMM& operator<<=(int shift) {dbl = _mm_slli_epi64(dbl, shift); return *this;} __forceinline XMM& operator&=(const XMM &dbb) {dbl = _mm_and_si128(dbl, dbb.dbl); return *this;} __forceinline XMM& operator|=(const XMM &dbb) {dbl = _mm_or_si128(dbl, dbb.dbl); return *this;} __forceinline XMM& operator^=(const XMM &dbb) {dbl = _mm_xor_si128(dbl, dbb.dbl); return *this;} __forceinline XMM& operator+=(const XMM &dbb) {dbl = _mm_add_epi8(dbl, dbb.dbl); return *this;} __forceinline XMM& operator-=(const XMM &dbb) {dbl = _mm_sub_epi8(dbl, dbb.dbl); return *this;} __forceinline XMM notA() const {return XMM(_mm_and_si128(dbl, snotA));} __forceinline XMM notH() const {return XMM(_mm_and_si128(dbl, snotH));} __forceinline friend XMM operator>>(const XMM &dbb, int shift) {return XMM(_mm_srli_epi64(dbb.dbl, shift));} __forceinline friend XMM operator<<(const XMM &dbb, int shift) {return XMM(_mm_slli_epi64(dbb.dbl, shift));} __forceinline friend XMM operator&(const XMM &a, const XMM &b) {return XMM(_mm_and_si128(a.dbl, b.dbl));} __forceinline friend XMM operator|(const XMM &a, const XMM &b) {return XMM(_mm_or_si128(a.dbl, b.dbl));} __forceinline friend XMM operator^(const XMM &a, const XMM &b) {return XMM(_mm_xor_si128(a.dbl, b.dbl));} __forceinline friend XMM operator+(const XMM &a, const XMM &b) {return XMM(_mm_add_epi8(a.dbl, b.dbl));} __forceinline friend XMM operator-(const XMM &a, const XMM &b) {return XMM(_mm_sub_epi8(a.dbl, b.dbl));} __forceinline friend XMM operator~(const XMM &a) {return XMM(_mm_xor_si128(a.dbl, minusOne));} protected: static const __m128i snotA; static const __m128i snotH; static const __m128i minusOne; }; const __m128i XMM::snotA = { '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', '\376', }; const __m128i XMM::snotH = { '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', '\177', }; const __m128i XMM::minusOne = { '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', }; class GPR : public BBPair { public: __forceinline GPR(){} __forceinline GPR(const BitBoard &low, const BitBoard &high) {sgl[0] = low; sgl[1] = high;} __forceinline GPR(const BBPair& a) {sgl[0] = a.sgl[0]; sgl[1] = a.sgl[1];} __forceinline GPR(const BBPair* ptr){sgl[0] = ptr->sgl[0]; sgl[1] = ptr->sgl[1];} __forceinline BitBoard getLowBoard() const {return sgl[0];} __forceinline BitBoard getHighBoard() const {return sgl[1];} __forceinline void store(BBPair* ptr) {ptr->sgl[0] = sgl[0]; ptr->sgl[1] = sgl[1];} __forceinline GPR& operator>>=(int shift) { sgl[0]>>=shift; sgl[1]>>=shift; return *this;} __forceinline GPR& operator<<=(int shift) { sgl[0]<<=shift; sgl[1]<<=shift; return *this;} __forceinline GPR& operator&=(const GPR &dbb) {sgl[0]&=dbb.sgl[0]; sgl[1]&=dbb.sgl[1]; return *this;} __forceinline GPR& operator|=(const GPR &dbb) {sgl[0]|=dbb.sgl[0]; sgl[1]|=dbb.sgl[1]; return *this;} __forceinline GPR& operator^=(const GPR &dbb) {sgl[0]^=dbb.sgl[0]; sgl[1]^=dbb.sgl[1]; return *this;} __forceinline GPR notA() const { return GPR(sgl[0]&0xfefefefefefefefe, sgl[1]&0xfefefefefefefefe);} __forceinline GPR notH() const { return GPR(sgl[0]&0x7f7f7f7f7f7f7f7f, sgl[1]&0x7f7f7f7f7f7f7f7f);} __forceinline friend GPR operator>>(const GPR &dbb, int shift) { return GPR(dbb.sgl[0]>>shift, dbb.sgl[1]>>shift);} __forceinline friend GPR operator<<(const GPR &dbb, int shift) { return GPR(dbb.sgl[0]<<shift, dbb.sgl[1]<<shift);} __forceinline friend GPR operator&(const GPR &a, const GPR &b) { return GPR(a.sgl[0]&b.sgl[0], a.sgl[1]&b.sgl[1]);} __forceinline friend GPR operator|(const GPR &a, const GPR &b) { return GPR(a.sgl[0]|b.sgl[0], a.sgl[1]|b.sgl[1]);} __forceinline friend GPR operator^(const GPR &a, const GPR &b) { return GPR(a.sgl[0]^b.sgl[0], a.sgl[1]^b.sgl[1]);} __forceinline friend GPR operator-(const GPR &a, const GPR &b) { GPR c; for (int i=0; i < 16; i++) c.dbl.m128i_u8[i] = a.dbl.m128i_u8[i] - b.dbl.m128i_u8[i]; return c; } __forceinline friend GPR operator~(const GPR &a) { return GPR(~a.sgl[0], ~a.sgl[1]);} }; struct sSource { sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {} BBPair rooks; BBPair occup; }; struct sTarget { BBPair up; BBPair down; }; template <class T> void updownAttacks(sTarget* pTarget, const sSource* pSource) { T gu(&pSource->rooks); T pu(&pSource->occup); pu = ~pu; // empty T gd(gu); T pd(pu); gu |= pu & (gu<<8); gd |= pd & (gd>>8); pu &= pu<<8; pd &= pd>>8; gu |= pu & (gu<<16); gd |= pd & (gd>>16); pu &= pu<<16; pd &= pd>>16; gu |= pu & (gu<<32); gd |= pd & (gd>>32); (gu<<8).store(&pTarget->up); (gd>>8).store(&pTarget->down); } int main(int argc, char* argv[]) { BBPair::initRankStr(); sSource source1(0x0000200000000000, 0x0102040810204088); sSource source2(0x0000004000000000, 0x0102040810204088); sTarget target1; sTarget target2; updownAttacks<GPR>(&target1, &source1); updownAttacks<XMM>(&target2, &source2); target1.down.printBoards(); target1.up.printBoards(); target2.down.printBoards(); target2.up.printBoards(); return 0; }
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.