Author: Matt Taylor
Date: 10:12:56 02/11/03
I think I've finally worked all the bugs out of the inline assembly version of
the bitscan and reset routines. As promised, the code follows. I have it
downloadable online at:
ftp://24.73.126.6/lsb.c
ftp://24.73.126.6/lsb.h
The actual files follow as in-line text.
lsb.c:
#include "test.h"
#include "lsb.h"
#ifdef __LSB_USE_TABLE
const unsigned char LSB_32_table[LSB_32_SIZE] =
{
#define __ 0
23,16,17,__,18,10, 8, 9,19,11, 31,__,__,__,__,__,20,12,__,__,
__,__,__,__,__,__,__,__,__,__, __,__,21,13,__,__,__,__,__,__,
__,__,__,__,__,__,__,__,__,__, __,__,__,__,__,__,__,__,__,__,
__,__,__,__,22,14,__,__,__,__, 6,__,__,__,30,__,__,__,__,__,
__,__,__,__,__,__,__,__,__,__, __,__,__,__,__,__,__,__,__,__,
__,__, 5,__,__,__,29,__,__,__, 3,__,__,__, 2,__, 1, 0, 4,__,
__,__,28,__,__,__,26,24,25,15, 27,__,__, 7,
#undef __
};
#endif /* __LSB_USE_TABLE */
lsb.h:
/*
lsb.h - LSB manipulation in 64-bit bitboard
How the function works:
BitSearchReset takes a 64-bit bitboard and searches for the LSB.
Once it finds the LSB, it resets it, and it converts it to an index
counting up from 0 to 31. It assumes its parameter is a valid pointer
to a non-zero bitboard (i.e. at least 1 bit must be set).
How to use this file:
* Modify _BB64_TYPE to alias onto your bitboard type
* Modify _I32_TYPE to alias onto your 32-bit int type
* Modify _I8_TYPE to alias onto your 8-bit int type
* Modify _INLINE to alias onto your inline keyword if desired
* Define either __LSB_SMALL or __LSB_FAST
* Define either __BSF_SLOW or __BSF_FAST
* Use either GNU C, MS VC, or compatible compilers
Things left to do:
* Processor-specific optimizations
* Support processors prior to K6-2 and Pentium 2...
* Support for x86-64
* Support for 64-bit C intrinsic
Other notes:
* __LSB_SMALL means size-optimization (still quite fast, though)
* __LSB_FAST means speed-optimization
* __BSF_SLOW means processor has a slow bsf instruction (non-P6)
* __BSF_FAST means processor has a fast bsf instruction (P6)
Bugs:
* Not sure if gcc uses "__inline" as the inline function keyword
* Possible gcc assembly bugs
*/
#ifndef __LSB_H
#define __LSB_H
// Don't touch below this unless you know what you're doing!
#if defined(_MSC_VER)
#define _I64_TYPE unsigned __int64
#define _I32_TYPE unsigned __int32
#define _I8_TYPE unsigned __int8
#define _INLINE __inline
#elif defined(__GNUC__)
#define _I64_TYPE unsigned long long
#define _I32_TYPE unsigned long
#define _I8_TYPE unsigned char
#define _INLINE __inline
#else
#error Please modify lsb.h for your compiler!
#endif /* defined(_MSC_VER) */
#define LSB_32_SIZE 134
#undef __LSB_USE_C_INLINE
#undef __LSB_USE_TABLE
typedef union
{
_I64_TYPE full;
struct
{
_I32_TYPE low;
_I32_TYPE high;
} parts;
} _LSB_I64;
// Heh heh, probably not the best way to do it, but works.
extern const unsigned char LSB_32_table[LSB_32_SIZE];
static _INLINE _I32_TYPE BitSearchReset(_I64_TYPE *bb);
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb);
static _INLINE _I32_TYPE BitSearchReset(_I64_TYPE *bb)
{
return BitSearchResetInternal((_LSB_I64 *) bb);
}
#if defined(__LSB_SMALL)
#if defined(__BSF_FAST)
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_LSB_I64 nTemp;
_I32_TYPE nBit;
nTemp = *bb;
__asm
{
mov ecx, [nTemp.parts.high]
mov eax, [nTemp.parts.low]
mov edx, 1
bsf ecx, ecx
or ecx, 32
bsf eax, eax
cmovnz ecx, eax
mov [nBit], ecx
shl edx, cl
shr ecx, 5
xor DWORD PTR [nTemp+ecx*4], edx
}
*bb = nTemp;
return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_I32_TYPE nBit, nDummy;
__asm__ __volatile__(
"bsfl %%ecx, %%ecx\n\t"
"orl $32, %%ecx\n\t"
"bsfl %0, %0\n\t"
"cmovnzl %0, %%ecx\n\t"
"movl $1, %1\n\t"
"movl %%ecx, %0\n\t"
"shll %%cl, %1\n\t"
"shrl $5, %%ecx\n\t"
"xorl %1, (%3,%%ecx,4)"
: "=&r" (nBit), "=&r" (nDummy)
: "0" (bb->parts.low), "c" (bb->parts.high), "r" (bb)
: "memory");
return nBit;
}
#else
#warning No intrinsic assembly __LSB_SMALL/__BSF_FAST available for your
compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#elif defined(__BSF_SLOW)
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_LSB_I64 nTemp;
_I32_TYPE nBit;
nTemp = *bb;
__asm
{
mov ecx, [nTemp.parts.low]
mov edx, [nTemp.parts.high]
xor eax, eax
cmp ecx, 1
cmovc ecx, edx
adc eax, eax
mov edx, 1
bsf ecx, ecx
mov [nBit], ecx
shl edx, cl
xor DWORD PTR [nTemp+eax*4], edx
shl eax, 5
or [nBit], eax
}
*bb = nTemp;
return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_I32_TYPE nBit;
__asm__ __volatile__(
"xorl %0, %0\n\t"
"cmpl $1, %%ecx\n\t"
"cmovcl %2, %%ecx\n\t"
"adcl %0, %0\n\t"
"movl $1, %2\n\t"
"leal (%3,%0,4), %3\n\t"
"bsfl %%ecx, %%ecx\n\t"
"shll %%cl, %2\n\t"
"shll $5, %0\n\t"
"xorl %2, (%3)\n\t"
"orl %%ecx, %0"
: "=&r" (nBit)
: "c" (bb->parts.low), "r" (bb->parts.high), "r" (bb)
: "memory");
return nBit;
}
#else
#warning No intrinsic assembly __LSB_SMALL/__BSF_SLOW available for your
compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#else
#error Please define either __BSF_SLOW or __BSF_FAST!
#endif /* defined(__BSF_FAST) */
#elif defined(__LSB_FAST)
#define __LSB_USE_TABLE
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_LSB_I64 nTemp;
_I32_TYPE nBit;
nTemp = *bb;
_asm
{
mov edx, [nTemp.parts.low]
mov ecx, [nTemp.parts.high]
xor eax, eax
cmp edx, 1
cmovc edx, ecx
adc eax, eax
mov ecx, edx
neg edx
and edx, ecx
xor DWORD PTR [nTemp+eax*4], edx
dec edx
shl eax, 5
xor edx, 0x05407DB7
mov ecx, edx
shr edx, 16
mov [nBit], eax
add edx, ecx
sub dl, dh
movzx ecx, dl
xor edx, edx
movzx eax, BYTE PTR [LSB_32_table+ecx]
or [nBit], eax
}
*bb = nTemp;
return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_I32_TYPE nBit;
__asm__ __volatile__(
"xorl %0, %0\n\t"
"cmpl $1, %1\n\t"
"cmovcl %2, %1\n\t"
"adcl %0, %0\n\t"
"movl %1, %2\n\t"
"negl %1\n\t"
"andl %2, %1\n\t"
"xorl %1, (%3,%0,4)\n\t"
"decl %1\n\t"
"xorl $0x05407DB7, %1\n\t"
"movl %1, %2\n\t"
"shrl $16, %1\n\t"
"shll $5, %0\n\t"
"addl %2, %1\n\t"
"subb %h1, %b1\n\t"
"movzbl %b1, %2\n\t"
"movzbl LSB_32_table(%2), %1\n\t"
"orl %1, %0"
: "=&r" (nBit)
: "r" (bb->parts.low), "r" (bb->parts.high), "r" (bb)
: "memory");
return nBit;
}
#else
#warning No intrinsic assembly __LSB_FAST available for your compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#else
#error Please define either __LSB_SMALL or __LSB_FAST!
#endif /* defined(__LSB_SMALL) */
#ifdef __LSB_USE_C_INLINE
#define __LSB_USE_TABLE
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
_I32_TYPE bit, index, half;
index = (bb->parts.low == 0);
if (bb->parts.low == 0)
half = bb->parts.high;
else
half = bb->parts.low;
bit = half & -half;
index <<= 1;
index |= ((bit & 0xFFFF0000) != 0);
index <<= 1;
index |= ((bit & 0xFF00FF00) != 0);
index <<= 1;
index |= ((bit & 0xF0F0F0F0) != 0);
index <<= 1;
index |= ((bit & 0xCCCCCCCC) != 0);
index <<= 1;
index |= ((bit & 0xAAAAAAAA) != 0);
return index;
}
#endif /* __LSB_USE_C_INLINE */
/* Clean up (don't clean up __LSB_USE_TABLE or LSB_32_SIZE!) */
#undef __LSB_USE_C_INLINE
#undef _I64_TYPE
#undef _I32_TYPE
#undef _I8_TYPE
#undef _INLINE
#endif /* __LSB_H */
-Matt
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.