Computer Chess Club Archives


Search

Terms

Messages

Subject: BitScan with reset - not so impressive with 3DNow!

Author: Gerd Isenberg

Date: 13:48:52 12/02/02

Go up one level in this thread


May be i found not the best way to to b&(-b) with mmx, but building the 64-bit
two's complement with mmx-dword is not so nice. First you have to do the one's
complement by pxor -1, then comparing low dword with -1 and building an
conditional overflow, adding 00:01 or 01:01...
So using 32-bit registers was the fastest so far, but that may require some
additional push/pop.

Some times mesured in seconds with this dumb loop (nothing inlined):
K7XP2.1+ ~1.8GHz

10-bit pattern		bsf	PI2FD	btr	c
0x0000000011111133	15.3	18.0	19.1	22.8
0x1010111010101110	19.7	18.5	19.6	23.4
0x1111113300000000	20.6	18.0	19.1	22.8

inlined are ~5 seconds faster

Cheers,
Gerd

-----------------------------------------------------------------------------
int main()
{
	DWORD start = GetTickCount();
	for (int i=0; i < 100000000; i++) // 10^8
	{
		BitBoard bb = 0x1010111010101110; // 10 bits set
		while (bb)
			bitSearchAndReset_PI2FD(bb); // 10^9 runs in total
	}
	DWORD stop = GetTickCount();
	printf("Time in seconds: %d.%03d\n", (stop-start)/1000, (stop-start)%1000 );
}

-----------------------------------------------------------------------------

int bitSearchAndReset_bsf(BitBoard &bb)
{
	__asm
	{
		xor		edx, edx
		mov		esi, [bb]
		xor		eax, eax
		inc		edx
		bsf		ecx, [esi]
		jnz		found
		bsf		ecx, [esi + 4]
		lea		esi, [esi + 4]
		xor		eax, 32
	found:
		shl		edx, cl
		xor		eax, ecx
		xor		[esi], edx
	}
}

-----------------------------------------------------------------------------

int bitSearchAndReset_PI2FD(BitBoard &bb)
{
	__asm
	{
		mov	ebx, [bb]	; get the reference (like a pointer)
		pxor	mm1, mm1	; 0, to get the dword mask

		mov	edx, [ebx]	; get bb
		mov	esi, [ebx+4]; bb -> esi:edx

		mov	ecx, edx
		mov	eax, esi	; bb -> eax:esi

		pcmpeqd	mm6, mm6	; -1 to complement the dword mask
		pxor	mm7, mm7	; 0, to add both final dwords

		neg	ecx		; low -bb
		adc	eax, 0		; consider borrow
		and	ecx, edx	; low (bb & -bb)
		neg	eax		; high -bb
		movd	mm0, ecx	; low (bb & -bb)
		and	eax, esi	; high (bb & -bb)
		xor	edx, ecx	; reset low
		movd	mm2, eax	; high (bb & -bb)
		xor	esi, eax	; reset high
		punpckldq mm0, mm2	; bb & -bb -> single bit in mm0

		mov	[ebx],   edx	; write modified bb back
		mov	[ebx+4], esi

		pcmpeqd	mm1, mm0	; mask of the zero dword
		PI2FD	mm0, mm0	; 3f8..,400..
		pxor	mm1, mm6	; mask of the none zero dword
		psrlq	mm6, 63		; 00:01
		psrld	mm0, 23		; 3f8 to 7f
		psrld	mm1, 25		; 7f mask
		psllq	mm6, 32+5	; 20:00
		psubd	mm0, mm1	; - 7f mask
		por	mm0, mm6	; + 32 in high dword
		pand	mm0, mm1	; & 7f mask
		psadbw	mm0, mm7	; add all bytes
		movd	eax, mm0
	}
}

-----------------------------------------------------------------------------

int bitSearchAndReset_btr(BitBoard &bb)
{
	__asm
	{
		mov	edx, [bb]
		bsf	eax, [edx+4]
		xor	eax, 32
		bsf	eax, [edx]
		btr	[edx],eax
	}
}

-----------------------------------------------------------------------------

int bitSearchAndReset_C(BitBoard &bb)
{
	BitBoard lsbb = bb & (-(__int64)bb);
	bb ^= lsbb;
	unsigned int lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
	return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
		    ^((lsb & 0xffff0000)!=0))<<1)
		    ^((lsb & 0xff00ff00)!=0))<<1)
		    ^((lsb & 0xf0f0f0f0)!=0))<<1)
		    ^((lsb & 0xcccccccc)!=0))<<1)
		    ^((lsb & 0xaaaaaaaa)!=0);
}



This page took 0.26 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.