Computer Chess Club Archives


Search

Terms

Messages

Subject: Thanks

Author: Gerd Isenberg

Date: 01:15:21 03/31/04

Go up one level in this thread


Looks fine ;-)

Curious about call __chkstk in isDeBruijnN, but not in the recursive function
genDeBruijn. Yes, isDeBruijnN has a local 4KByte array on the frame, and has to
clear it too, so under runtime considerations call __chkstk doesn't matter much.
Instead of call memset i would prefere an inlined intrinsic of that e.g. with a
8-byte aligned bool array and REP STOSQ with rcx=4096/8. I guess there are some
additional compiler flags...

If you have some additional time, it would be nice to see the assembly of a
kogge-stone filler with a bit more register pressure:

Thanks again,
Gerd


#include <stdio.h>
#include <emmintrin.h>

typedef unsigned __int64 BitBoard;


class BBPair
{
	friend class XMM;
	friend class GPR;
public:
	__forceinline BBPair(){}
	__forceinline BBPair(const BitBoard &low, const BitBoard &high) {
		sgl[0] = low; sgl[1] = high;}

	// some output routines
	void printBoards()
	{
		for ( int i = 7; i >= 0; --i)
			printf("%s %s\n", rankStr[dbl.m128i_u8[i]],rankStr[dbl.m128i_u8[i+8]]);
		printf("\n");
	}

	static void initRankStr() // used by printBoards
	{
		for (int i = 0; i < 256; ++i)
		{
			for (int j = 0; j < 8; ++j)
				rankStr[i][j]   = binDigit[(i & (1<<j)) != 0];
			rankStr[i][j] = '\0';
		}
	}

protected:
	union
	{
		__m128i	 dbl; // this intrinsice type is wrapped here
		BitBoard sgl[2];
	};
protected:
	static const char binDigit[2];
	static char rankStr[256][10];
};

char BBPair::rankStr[256][10];
const char BBPair::binDigit[2] = { '°','\333'};	// { '0','1'};

class XMM : public BBPair
{
public:
	__forceinline XMM(){}
	__forceinline XMM(const BitBoard &low, const BitBoard &high) {
		dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;}

	__forceinline XMM(const BitBoard &low) {dbl.m128i_u64[0] = low;}

	__forceinline XMM(__m128i a){dbl = a;}
	__forceinline XMM(const BBPair& a){dbl = a.dbl;}
	__forceinline XMM(const __m128i* ptr){dbl = _mm_load_si128(ptr);}
	__forceinline XMM(const BBPair* ptr){dbl = _mm_load_si128(&ptr->dbl);}

	__forceinline BitBoard getLowBoard() const {return dbl.m128i_u64[0];}
	__forceinline BitBoard getHighBoard() const {return dbl.m128i_u64[1];}

	__forceinline void store(__m128i* ptr) {_mm_store_si128 (ptr, dbl);}
	__forceinline void store(BBPair* ptr) {_mm_store_si128 (&ptr->dbl, dbl);}

	__forceinline XMM copyWord0() {return XMM(_mm_shuffle_epi32 (dbl, 0));}
	__forceinline XMM copyWord1() {return XMM(_mm_shuffle_epi32 (dbl, 0x55));}
	__forceinline int getInt16() {return _mm_extract_epi16(dbl,0);}

	__forceinline XMM& compare32(const XMM &dbb) {dbl = _mm_cmpeq_epi32(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& addAbsByteDiff(const XMM &dbb) {dbl = _mm_sad_epu8(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& addWords(const XMM &dbb) {dbl =
_mm_add_epi32(_mm_shuffle_epi32(dbb.dbl,0x56), dbl); return *this;}


	__forceinline XMM& operator>>=(int shift) {	dbl = _mm_srli_epi64(dbl, shift);
return *this;}
	__forceinline XMM& operator<<=(int shift) {dbl = _mm_slli_epi64(dbl, shift);
return *this;}
	__forceinline XMM& operator&=(const XMM &dbb) {dbl = _mm_and_si128(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& operator|=(const XMM &dbb) {dbl = _mm_or_si128(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& operator^=(const XMM &dbb) {dbl = _mm_xor_si128(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& operator+=(const XMM &dbb) {dbl = _mm_add_epi8(dbl,
dbb.dbl); return *this;}
	__forceinline XMM& operator-=(const XMM &dbb) {dbl = _mm_sub_epi8(dbl,
dbb.dbl); return *this;}

	__forceinline XMM notA() const {return XMM(_mm_and_si128(dbl, snotA));}
	__forceinline XMM notH() const {return XMM(_mm_and_si128(dbl, snotH));}

	__forceinline friend XMM operator>>(const XMM &dbb, int shift) {return
XMM(_mm_srli_epi64(dbb.dbl, shift));}
	__forceinline friend XMM operator<<(const XMM &dbb, int shift) {return
XMM(_mm_slli_epi64(dbb.dbl, shift));}
	__forceinline friend XMM operator&(const XMM &a, const XMM &b) {return
XMM(_mm_and_si128(a.dbl, b.dbl));}
	__forceinline friend XMM operator|(const XMM &a, const XMM &b) {return
XMM(_mm_or_si128(a.dbl, b.dbl));}
	__forceinline friend XMM operator^(const XMM &a, const XMM &b) {return
XMM(_mm_xor_si128(a.dbl, b.dbl));}
	__forceinline friend XMM operator+(const XMM &a, const XMM &b) {return
XMM(_mm_add_epi8(a.dbl, b.dbl));}
	__forceinline friend XMM operator-(const XMM &a, const XMM &b) {return
XMM(_mm_sub_epi8(a.dbl, b.dbl));}
	__forceinline friend XMM operator~(const XMM &a) {return
XMM(_mm_xor_si128(a.dbl, minusOne));}



protected:
	static const __m128i snotA;
	static const __m128i snotH;
	static const __m128i minusOne;

};

const __m128i XMM::snotA =
{
	'\376', '\376', '\376', '\376',
	'\376', '\376', '\376', '\376',
	'\376', '\376', '\376', '\376',
	'\376', '\376', '\376', '\376',
};

const __m128i XMM::snotH =
{
	'\177', '\177', '\177', '\177',
	'\177', '\177', '\177', '\177',
	'\177', '\177', '\177', '\177',
	'\177', '\177', '\177', '\177',
};

const __m128i XMM::minusOne =
{
	'\377', '\377', '\377', '\377',
	'\377', '\377', '\377', '\377',
	'\377', '\377', '\377', '\377',
	'\377', '\377', '\377', '\377',
};

class GPR : public BBPair
{
public:
	__forceinline GPR(){}
	__forceinline GPR(const BitBoard &low, const BitBoard &high) {sgl[0] = low;
sgl[1] = high;}
	__forceinline GPR(const BBPair& a) {sgl[0] = a.sgl[0]; sgl[1] = a.sgl[1];}
	__forceinline GPR(const BBPair* ptr){sgl[0] = ptr->sgl[0]; sgl[1] =
ptr->sgl[1];}

	__forceinline BitBoard getLowBoard() const {return sgl[0];}
	__forceinline BitBoard getHighBoard() const {return sgl[1];}

	__forceinline void store(BBPair* ptr) {ptr->sgl[0] = sgl[0]; ptr->sgl[1] =
sgl[1];}

	__forceinline GPR& operator>>=(int shift) {	sgl[0]>>=shift; sgl[1]>>=shift;
return *this;}
	__forceinline GPR& operator<<=(int shift) {	sgl[0]<<=shift; sgl[1]<<=shift;
return *this;}
	__forceinline GPR& operator&=(const GPR &dbb) {sgl[0]&=dbb.sgl[0];
sgl[1]&=dbb.sgl[1]; return *this;}
	__forceinline GPR& operator|=(const GPR &dbb) {sgl[0]|=dbb.sgl[0];
sgl[1]|=dbb.sgl[1]; return *this;}
	__forceinline GPR& operator^=(const GPR &dbb) {sgl[0]^=dbb.sgl[0];
sgl[1]^=dbb.sgl[1]; return *this;}

	__forceinline GPR notA() const {
		return GPR(sgl[0]&0xfefefefefefefefe, sgl[1]&0xfefefefefefefefe);}

	__forceinline GPR notH() const {
		return GPR(sgl[0]&0x7f7f7f7f7f7f7f7f, sgl[1]&0x7f7f7f7f7f7f7f7f);}

	__forceinline friend GPR operator>>(const GPR &dbb, int shift) {
		return GPR(dbb.sgl[0]>>shift, dbb.sgl[1]>>shift);}
	__forceinline friend GPR operator<<(const GPR &dbb, int shift) {
		return GPR(dbb.sgl[0]<<shift, dbb.sgl[1]<<shift);}
	__forceinline friend GPR operator&(const GPR &a, const GPR &b) {
		return GPR(a.sgl[0]&b.sgl[0], a.sgl[1]&b.sgl[1]);}
	__forceinline friend GPR operator|(const GPR &a, const GPR &b) {
		return GPR(a.sgl[0]|b.sgl[0], a.sgl[1]|b.sgl[1]);}
	__forceinline friend GPR operator^(const GPR &a, const GPR &b) {
		return GPR(a.sgl[0]^b.sgl[0], a.sgl[1]^b.sgl[1]);}

	__forceinline friend GPR operator-(const GPR &a, const GPR &b)
	{
		GPR c;
		for (int i=0; i < 16; i++)
			c.dbl.m128i_u8[i] = a.dbl.m128i_u8[i] - b.dbl.m128i_u8[i];
		return c;
	}

	__forceinline friend GPR operator~(const GPR &a) {
		return GPR(~a.sgl[0], ~a.sgl[1]);}

};


struct sSource
{
	sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {}
	BBPair rooks;
	BBPair occup;
};

struct sTarget
{
	BBPair up;
	BBPair down;
};

template <class T> void updownAttacks(sTarget* pTarget, const sSource* pSource)
{
	T gu(&pSource->rooks);
	T pu(&pSource->occup);
	pu = ~pu; // empty
	T gd(gu);
	T pd(pu);
	gu |= pu & (gu<<8);
	gd |= pd & (gd>>8);
	pu &= pu<<8;
	pd &= pd>>8;
	gu |= pu & (gu<<16);
	gd |= pd & (gd>>16);
	pu &= pu<<16;
	pd &= pd>>16;
	gu |= pu & (gu<<32);
	gd |= pd & (gd>>32);
	(gu<<8).store(&pTarget->up);
	(gd>>8).store(&pTarget->down);
}

int main(int argc, char* argv[])
{
	BBPair::initRankStr();

	sSource source1(0x0000200000000000, 0x0102040810204088);
	sSource source2(0x0000004000000000, 0x0102040810204088);
	sTarget target1;
	sTarget target2;

	updownAttacks<GPR>(&target1, &source1);
	updownAttacks<XMM>(&target2, &source2);

	target1.down.printBoards();
	target1.up.printBoards();
	target2.down.printBoards();
	target2.up.printBoards();

	return 0;
}




This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.