Computer Chess Club Archives


Search

Terms

Messages

Subject: Bitscan

Author: Matt Taylor

Date: 10:12:56 02/11/03


I think I've finally worked all the bugs out of the inline assembly version of
the bitscan and reset routines. As promised, the code follows. I have it
downloadable online at:
ftp://24.73.126.6/lsb.c
ftp://24.73.126.6/lsb.h

The actual files follow as in-line text.
lsb.c:
#include "test.h"
#include "lsb.h"

#ifdef __LSB_USE_TABLE
const unsigned char LSB_32_table[LSB_32_SIZE] =
{
#define __  0
	23,16,17,__,18,10, 8, 9,19,11, 31,__,__,__,__,__,20,12,__,__,
	__,__,__,__,__,__,__,__,__,__, __,__,21,13,__,__,__,__,__,__,
	__,__,__,__,__,__,__,__,__,__, __,__,__,__,__,__,__,__,__,__,
	__,__,__,__,22,14,__,__,__,__,  6,__,__,__,30,__,__,__,__,__,
	__,__,__,__,__,__,__,__,__,__, __,__,__,__,__,__,__,__,__,__,
	__,__, 5,__,__,__,29,__,__,__,  3,__,__,__, 2,__, 1, 0, 4,__,
	__,__,28,__,__,__,26,24,25,15, 27,__,__, 7,
#undef __
};
#endif /* __LSB_USE_TABLE */

lsb.h:
/*
	lsb.h - LSB manipulation in 64-bit bitboard

	How the function works:
	   BitSearchReset takes a 64-bit bitboard and searches for the LSB.
	Once it finds the LSB, it resets it, and it converts it to an index
	counting up from 0 to 31. It assumes its parameter is a valid pointer
	to a non-zero bitboard (i.e. at least 1 bit must be set).

	How to use this file:
	   * Modify _BB64_TYPE to alias onto your bitboard type
	   * Modify _I32_TYPE to alias onto your 32-bit int type
	   * Modify _I8_TYPE to alias onto your 8-bit int type
	   * Modify _INLINE to alias onto your inline keyword if desired
	   * Define either __LSB_SMALL or __LSB_FAST
	   * Define either __BSF_SLOW or __BSF_FAST
	   * Use either GNU C, MS VC, or compatible compilers

	Things left to do:
	   * Processor-specific optimizations
	   * Support processors prior to K6-2 and Pentium 2...
	   * Support for x86-64
	   * Support for 64-bit C intrinsic

	Other notes:
	   * __LSB_SMALL means size-optimization (still quite fast, though)
	   * __LSB_FAST means speed-optimization
	   * __BSF_SLOW means processor has a slow bsf instruction (non-P6)
	   * __BSF_FAST means processor has a fast bsf instruction (P6)

	Bugs:
	   * Not sure if gcc uses "__inline" as the inline function keyword
	   * Possible gcc assembly bugs
*/

#ifndef __LSB_H
#define __LSB_H

// Don't touch below this unless you know what you're doing!
#if defined(_MSC_VER)
#define _I64_TYPE               unsigned __int64
#define _I32_TYPE               unsigned __int32
#define _I8_TYPE                unsigned __int8
#define _INLINE                 __inline
#elif defined(__GNUC__)
#define _I64_TYPE               unsigned long long
#define _I32_TYPE               unsigned long
#define _I8_TYPE                unsigned char
#define _INLINE                 __inline
#else
#error Please modify lsb.h for your compiler!
#endif /* defined(_MSC_VER) */

#define LSB_32_SIZE             134

#undef __LSB_USE_C_INLINE
#undef __LSB_USE_TABLE

typedef union
{
	_I64_TYPE full;

	struct
	{
		_I32_TYPE low;
		_I32_TYPE high;
	} parts;
} _LSB_I64;

// Heh heh, probably not the best way to do it, but works.
extern const unsigned char LSB_32_table[LSB_32_SIZE];

static _INLINE _I32_TYPE BitSearchReset(_I64_TYPE *bb);
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb);

static _INLINE _I32_TYPE BitSearchReset(_I64_TYPE *bb)
{
	return BitSearchResetInternal((_LSB_I64 *) bb);
}

#if defined(__LSB_SMALL)
#if defined(__BSF_FAST)
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_LSB_I64 nTemp;
	_I32_TYPE nBit;

	nTemp = *bb;

	__asm
	{
		mov	ecx, [nTemp.parts.high]
		mov	eax, [nTemp.parts.low]
		mov	edx, 1
		bsf	ecx, ecx
		or	ecx, 32
		bsf	eax, eax
		cmovnz	ecx, eax
		mov	[nBit], ecx
		shl	edx, cl
		shr	ecx, 5
		xor	DWORD PTR [nTemp+ecx*4], edx
	}

	*bb = nTemp;

	return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_I32_TYPE nBit, nDummy;

	__asm__ __volatile__(
	    "bsfl	%%ecx, %%ecx\n\t"
	    "orl	$32, %%ecx\n\t"
	    "bsfl	%0, %0\n\t"
	    "cmovnzl	%0, %%ecx\n\t"
	    "movl	$1, %1\n\t"
	    "movl	%%ecx, %0\n\t"
	    "shll	%%cl, %1\n\t"
	    "shrl	$5, %%ecx\n\t"
	    "xorl	%1, (%3,%%ecx,4)"

	    : "=&r" (nBit), "=&r" (nDummy)
	    : "0" (bb->parts.low), "c" (bb->parts.high), "r" (bb)
	    : "memory");

	return nBit;
}
#else
#warning No intrinsic assembly __LSB_SMALL/__BSF_FAST available for your
compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#elif defined(__BSF_SLOW)
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_LSB_I64 nTemp;
	_I32_TYPE nBit;

	nTemp = *bb;

	__asm
	{
		mov	ecx, [nTemp.parts.low]
		mov	edx, [nTemp.parts.high]
		xor	eax, eax
		cmp	ecx, 1
		cmovc	ecx, edx
		adc	eax, eax
		mov	edx, 1
		bsf	ecx, ecx
		mov	[nBit], ecx
		shl	edx, cl
		xor	DWORD PTR [nTemp+eax*4], edx
		shl	eax, 5
		or	[nBit], eax
	}

	*bb = nTemp;

	return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_I32_TYPE nBit;

	__asm__ __volatile__(
	    "xorl	%0, %0\n\t"
	    "cmpl	$1, %%ecx\n\t"
	    "cmovcl	%2, %%ecx\n\t"
	    "adcl	%0, %0\n\t"
	    "movl	$1, %2\n\t"
	    "leal	(%3,%0,4), %3\n\t"
	    "bsfl	%%ecx, %%ecx\n\t"
	    "shll	%%cl, %2\n\t"
	    "shll	$5, %0\n\t"
	    "xorl	%2, (%3)\n\t"
	    "orl	%%ecx, %0"

	    : "=&r" (nBit)
	    : "c" (bb->parts.low), "r" (bb->parts.high), "r" (bb)
	    : "memory");

	return nBit;
}
#else
#warning No intrinsic assembly __LSB_SMALL/__BSF_SLOW available for your
compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#else
#error Please define either __BSF_SLOW or __BSF_FAST!
#endif /* defined(__BSF_FAST) */
#elif defined(__LSB_FAST)
#define __LSB_USE_TABLE
#if defined(_MSC_VER)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_LSB_I64 nTemp;
	_I32_TYPE nBit;

	nTemp = *bb;

	_asm
	{
		mov	edx, [nTemp.parts.low]
		mov	ecx, [nTemp.parts.high]
		xor	eax, eax
		cmp	edx, 1
		cmovc	edx, ecx
		adc	eax, eax
		mov	ecx, edx
		neg	edx
		and	edx, ecx
		xor	DWORD PTR [nTemp+eax*4], edx
		dec	edx
		shl	eax, 5
		xor	edx, 0x05407DB7
		mov	ecx, edx
		shr	edx, 16
		mov	[nBit], eax
		add	edx, ecx
		sub	dl, dh
		movzx	ecx, dl
		xor	edx, edx
		movzx	eax, BYTE PTR [LSB_32_table+ecx]
		or	[nBit], eax
	}

	*bb = nTemp;

	return nBit;
}
#elif defined(__GNUC__)
static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_I32_TYPE nBit;

	__asm__ __volatile__(
	    "xorl	%0, %0\n\t"
	    "cmpl	$1, %1\n\t"
	    "cmovcl	%2, %1\n\t"
	    "adcl	%0, %0\n\t"
	    "movl	%1, %2\n\t"
	    "negl	%1\n\t"
	    "andl	%2, %1\n\t"
	    "xorl	%1, (%3,%0,4)\n\t"
	    "decl	%1\n\t"
	    "xorl	$0x05407DB7, %1\n\t"
	    "movl	%1, %2\n\t"
	    "shrl	$16, %1\n\t"
	    "shll	$5, %0\n\t"
	    "addl	%2, %1\n\t"
	    "subb	%h1, %b1\n\t"
	    "movzbl	%b1, %2\n\t"
	    "movzbl	LSB_32_table(%2), %1\n\t"
	    "orl	%1, %0"

	    : "=&r" (nBit)
	    : "r" (bb->parts.low), "r" (bb->parts.high), "r" (bb)
	    : "memory");

	return nBit;
}
#else
#warning No intrinsic assembly __LSB_FAST available for your compiler!
#warning Using fast inline C routine
#define __LSB_USE_C_INLINE
#endif /* defined(_MSC_VER) */
#else
#error Please define either __LSB_SMALL or __LSB_FAST!
#endif /* defined(__LSB_SMALL) */

#ifdef __LSB_USE_C_INLINE
#define __LSB_USE_TABLE

static _INLINE _I32_TYPE BitSearchResetInternal(_LSB_I64 *bb)
{
	_I32_TYPE bit, index, half;

	index = (bb->parts.low == 0);
	if (bb->parts.low == 0)
		half = bb->parts.high;
	else
		half = bb->parts.low;

	bit = half & -half;

	index <<= 1;
	index |= ((bit & 0xFFFF0000) != 0);
	index <<= 1;
	index |= ((bit & 0xFF00FF00) != 0);
	index <<= 1;
	index |= ((bit & 0xF0F0F0F0) != 0);
	index <<= 1;
	index |= ((bit & 0xCCCCCCCC) != 0);
	index <<= 1;
	index |= ((bit & 0xAAAAAAAA) != 0);

	return index;
}
#endif /* __LSB_USE_C_INLINE */

/* Clean up (don't clean up __LSB_USE_TABLE or LSB_32_SIZE!) */
#undef __LSB_USE_C_INLINE
#undef _I64_TYPE
#undef _I32_TYPE
#undef _I8_TYPE
#undef _INLINE

#endif /* __LSB_H */

-Matt



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.