Author: Gerd Isenberg
Date: 13:48:52 12/02/02
Go up one level in this thread
May be i found not the best way to to b&(-b) with mmx, but building the 64-bit
two's complement with mmx-dword is not so nice. First you have to do the one's
complement by pxor -1, then comparing low dword with -1 and building an
conditional overflow, adding 00:01 or 01:01...
So using 32-bit registers was the fastest so far, but that may require some
additional push/pop.
Some times mesured in seconds with this dumb loop (nothing inlined):
K7XP2.1+ ~1.8GHz
10-bit pattern bsf PI2FD btr c
0x0000000011111133 15.3 18.0 19.1 22.8
0x1010111010101110 19.7 18.5 19.6 23.4
0x1111113300000000 20.6 18.0 19.1 22.8
inlined are ~5 seconds faster
Cheers,
Gerd
-----------------------------------------------------------------------------
int main()
{
DWORD start = GetTickCount();
for (int i=0; i < 100000000; i++) // 10^8
{
BitBoard bb = 0x1010111010101110; // 10 bits set
while (bb)
bitSearchAndReset_PI2FD(bb); // 10^9 runs in total
}
DWORD stop = GetTickCount();
printf("Time in seconds: %d.%03d\n", (stop-start)/1000, (stop-start)%1000 );
}
-----------------------------------------------------------------------------
int bitSearchAndReset_bsf(BitBoard &bb)
{
__asm
{
xor edx, edx
mov esi, [bb]
xor eax, eax
inc edx
bsf ecx, [esi]
jnz found
bsf ecx, [esi + 4]
lea esi, [esi + 4]
xor eax, 32
found:
shl edx, cl
xor eax, ecx
xor [esi], edx
}
}
-----------------------------------------------------------------------------
int bitSearchAndReset_PI2FD(BitBoard &bb)
{
__asm
{
mov ebx, [bb] ; get the reference (like a pointer)
pxor mm1, mm1 ; 0, to get the dword mask
mov edx, [ebx] ; get bb
mov esi, [ebx+4]; bb -> esi:edx
mov ecx, edx
mov eax, esi ; bb -> eax:esi
pcmpeqd mm6, mm6 ; -1 to complement the dword mask
pxor mm7, mm7 ; 0, to add both final dwords
neg ecx ; low -bb
adc eax, 0 ; consider borrow
and ecx, edx ; low (bb & -bb)
neg eax ; high -bb
movd mm0, ecx ; low (bb & -bb)
and eax, esi ; high (bb & -bb)
xor edx, ecx ; reset low
movd mm2, eax ; high (bb & -bb)
xor esi, eax ; reset high
punpckldq mm0, mm2 ; bb & -bb -> single bit in mm0
mov [ebx], edx ; write modified bb back
mov [ebx+4], esi
pcmpeqd mm1, mm0 ; mask of the zero dword
PI2FD mm0, mm0 ; 3f8..,400..
pxor mm1, mm6 ; mask of the none zero dword
psrlq mm6, 63 ; 00:01
psrld mm0, 23 ; 3f8 to 7f
psrld mm1, 25 ; 7f mask
psllq mm6, 32+5 ; 20:00
psubd mm0, mm1 ; - 7f mask
por mm0, mm6 ; + 32 in high dword
pand mm0, mm1 ; & 7f mask
psadbw mm0, mm7 ; add all bytes
movd eax, mm0
}
}
-----------------------------------------------------------------------------
int bitSearchAndReset_btr(BitBoard &bb)
{
__asm
{
mov edx, [bb]
bsf eax, [edx+4]
xor eax, 32
bsf eax, [edx]
btr [edx],eax
}
}
-----------------------------------------------------------------------------
int bitSearchAndReset_C(BitBoard &bb)
{
BitBoard lsbb = bb & (-(__int64)bb);
bb ^= lsbb;
unsigned int lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb);
return ((((((((((HIGHBOARD(lsbb)!=0) <<1)
^((lsb & 0xffff0000)!=0))<<1)
^((lsb & 0xff00ff00)!=0))<<1)
^((lsb & 0xf0f0f0f0)!=0))<<1)
^((lsb & 0xcccccccc)!=0))<<1)
^((lsb & 0xaaaaaaaa)!=0);
}
This page took 0.02 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.