Author: Gerd Isenberg
Date: 13:48:52 12/02/02
Go up one level in this thread
May be i found not the best way to to b&(-b) with mmx, but building the 64-bit two's complement with mmx-dword is not so nice. First you have to do the one's complement by pxor -1, then comparing low dword with -1 and building an conditional overflow, adding 00:01 or 01:01... So using 32-bit registers was the fastest so far, but that may require some additional push/pop. Some times mesured in seconds with this dumb loop (nothing inlined): K7XP2.1+ ~1.8GHz 10-bit pattern bsf PI2FD btr c 0x0000000011111133 15.3 18.0 19.1 22.8 0x1010111010101110 19.7 18.5 19.6 23.4 0x1111113300000000 20.6 18.0 19.1 22.8 inlined are ~5 seconds faster Cheers, Gerd ----------------------------------------------------------------------------- int main() { DWORD start = GetTickCount(); for (int i=0; i < 100000000; i++) // 10^8 { BitBoard bb = 0x1010111010101110; // 10 bits set while (bb) bitSearchAndReset_PI2FD(bb); // 10^9 runs in total } DWORD stop = GetTickCount(); printf("Time in seconds: %d.%03d\n", (stop-start)/1000, (stop-start)%1000 ); } ----------------------------------------------------------------------------- int bitSearchAndReset_bsf(BitBoard &bb) { __asm { xor edx, edx mov esi, [bb] xor eax, eax inc edx bsf ecx, [esi] jnz found bsf ecx, [esi + 4] lea esi, [esi + 4] xor eax, 32 found: shl edx, cl xor eax, ecx xor [esi], edx } } ----------------------------------------------------------------------------- int bitSearchAndReset_PI2FD(BitBoard &bb) { __asm { mov ebx, [bb] ; get the reference (like a pointer) pxor mm1, mm1 ; 0, to get the dword mask mov edx, [ebx] ; get bb mov esi, [ebx+4]; bb -> esi:edx mov ecx, edx mov eax, esi ; bb -> eax:esi pcmpeqd mm6, mm6 ; -1 to complement the dword mask pxor mm7, mm7 ; 0, to add both final dwords neg ecx ; low -bb adc eax, 0 ; consider borrow and ecx, edx ; low (bb & -bb) neg eax ; high -bb movd mm0, ecx ; low (bb & -bb) and eax, esi ; high (bb & -bb) xor edx, ecx ; reset low movd mm2, eax ; high (bb & -bb) xor esi, eax ; reset high punpckldq mm0, mm2 ; bb & -bb -> single bit in mm0 mov [ebx], edx ; write modified bb back mov [ebx+4], esi pcmpeqd mm1, mm0 ; mask of the zero dword PI2FD mm0, mm0 ; 3f8..,400.. pxor mm1, mm6 ; mask of the none zero dword psrlq mm6, 63 ; 00:01 psrld mm0, 23 ; 3f8 to 7f psrld mm1, 25 ; 7f mask psllq mm6, 32+5 ; 20:00 psubd mm0, mm1 ; - 7f mask por mm0, mm6 ; + 32 in high dword pand mm0, mm1 ; & 7f mask psadbw mm0, mm7 ; add all bytes movd eax, mm0 } } ----------------------------------------------------------------------------- int bitSearchAndReset_btr(BitBoard &bb) { __asm { mov edx, [bb] bsf eax, [edx+4] xor eax, 32 bsf eax, [edx] btr [edx],eax } } ----------------------------------------------------------------------------- int bitSearchAndReset_C(BitBoard &bb) { BitBoard lsbb = bb & (-(__int64)bb); bb ^= lsbb; unsigned int lsb = LOWBOARD(lsbb) | HIGHBOARD(lsbb); return ((((((((((HIGHBOARD(lsbb)!=0) <<1) ^((lsb & 0xffff0000)!=0))<<1) ^((lsb & 0xff00ff00)!=0))<<1) ^((lsb & 0xf0f0f0f0)!=0))<<1) ^((lsb & 0xcccccccc)!=0))<<1) ^((lsb & 0xaaaaaaaa)!=0); }
This page took 0.05 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.