Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Arasan 7.4 64-bit Speedup

Author: Eugene Nalimov

Date: 19:56:40 03/30/04

Go up one level in this thread


On March 30, 2004 at 16:56:47, Gerd Isenberg wrote:

>// oups, not so easy out of the class context
>// two routines i played with some time ago:
>// probably not so good to test the compiler ;-)
>
>typedef unsigned __int64 BitBoard;
>
>bool isDeBruijnN(const BitBoard &magic, int n)
>{
>	bool isdbjn[64*64];
>	int bitpos;
>	unsigned int deBruijn;
>
>	for (bitpos = 0; bitpos < 64*64; bitpos++)
>		isdbjn[bitpos] = false;
>
>	for (bitpos = 0; bitpos < 64; bitpos++)
>	{
>		deBruijn = (unsigned int) ((magic << bitpos) >> (64-n));
>		if ( isdbjn[deBruijn] )
>			return false;
>		isdbjn[deBruijn] = true;
>	}
>	return true;
>}
>
>
>// and a recursive one
>
>static unsigned int nodeCount = 0;
>static unsigned int foundCount = 0;
>
>void genDeBruijn(BitBoard sequence, int bitpos)
>{
>	static BitBoard uniqueCheck = 0;
>	nodeCount++;
>	unsigned int uniqueIdx = (unsigned int) (sequence>>bitpos) & 63;
>	BitBoard uniqueBit = (BitBoard)1 << uniqueIdx;
>
>	if ( (uniqueCheck & uniqueBit) == 0 && uniqueIdx != 32 )
>	{
>		if ( bitpos == 0 )
>		{
>			foundCount++;
>                        // notify(sequence);
>		}
>		else
>		{
>			uniqueCheck |= uniqueBit;
>			if ( bitpos == 1 )
>			{
>				genDeBruijn(sequence|1, 0);
>			}
>			else
>			{
>				bitpos--;
>				genDeBruijn(sequence|((BitBoard)1<<bitpos), bitpos);
>				genDeBruijn(sequence, bitpos);
>			}
>			uniqueCheck &= ~uniqueBit;
>		}
>	}
>}

; Listing generated by Microsoft (R) Optimizing Compiler Version 14.00.40316

include listing.inc

INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES

_BSS	SEGMENT
foundCount DD	01H DUP (?)
nodeCount DD	01H DUP (?)
_BSS	ENDS
PUBLIC	?isDeBruijnN@@YA_NAEB_KH@Z			; isDeBruijnN
EXTRN	__chkstk:PROC
EXTRN	memset:PROC
xdata	SEGMENT
$unwind$?isDeBruijnN@@YA_NAEB_KH@Z DD 020d01H
	DD	0207010dH
xdata	ENDS
pdata	SEGMENT
$pdata$?isDeBruijnN@@YA_NAEB_KH@Z DD @imagerel($LN15#)
	DD	@imagerel($LN15#+13)
	DD	@imagerel($unwind$?isDeBruijnN@@YA_NAEB_KH@Z#)
pdata	ENDS
xdata	SEGMENT
$chain$1$?isDeBruijnN@@YA_NAEB_KH@Z DD 041c21H
	DD	0205741cH
	DD	02063408H
	DD	@imagerel($LN15#)
	DD	@imagerel($LN15#+13)
	DD	@imagerel($unwind$?isDeBruijnN@@YA_NAEB_KH@Z#)
xdata	ENDS
pdata	SEGMENT
$pdata$1$?isDeBruijnN@@YA_NAEB_KH@Z DD @imagerel($LN15#+13)
	DD	@imagerel($LN15#+96)
	DD	@imagerel($chain$1$?isDeBruijnN@@YA_NAEB_KH@Z#)
pdata	ENDS
xdata	SEGMENT
$chain$2$?isDeBruijnN@@YA_NAEB_KH@Z DD 021H
	DD	@imagerel($LN15#)
	DD	@imagerel($LN15#+13)
	DD	@imagerel($unwind$?isDeBruijnN@@YA_NAEB_KH@Z#)
xdata	ENDS
pdata	SEGMENT
$pdata$2$?isDeBruijnN@@YA_NAEB_KH@Z DD @imagerel($LN15#+96)
	DD	@imagerel($LN15#+154)
	DD	@imagerel($chain$2$?isDeBruijnN@@YA_NAEB_KH@Z#)
; Function compile flags: /Ogtpy
pdata	ENDS
_TEXT	SEGMENT
isdbjn$ = 32
magic$ = 4160
n$ = 4168
?isDeBruijnN@@YA_NAEB_KH@Z PROC				; isDeBruijnN
; File c:\repro\f.cpp
; Line 8
$LN15:
	mov	eax, 4152				; 00001038H
	call	__chkstk
	sub	rsp, rax
	mov	QWORD PTR [rsp+4144], rbx
	mov	rbx, rcx
; Line 14
	lea	r8d, QWORD PTR [rax-56]
	lea	rcx, QWORD PTR isdbjn$[rsp]
	mov	QWORD PTR [rsp+4136], rdi
	mov	edi, edx
	xor	edx, edx
	call	memset
	mov	rdx, QWORD PTR [rbx]
	mov	rbx, QWORD PTR [rsp+4144]
; Line 16
	mov	eax, 64					; 00000040H
	xor	r11d, r11d
	sub	eax, edi
	mov	rdi, QWORD PTR [rsp+4136]
	movsxd	r8, eax
	npad	14
$LL4@isDeBruijn:
; Line 18
	mov	ecx, r11d
	mov	rax, rdx
	shl	rax, cl
	mov	rcx, r8
	shr	rax, cl
; Line 19
	mov	ecx, eax
	cmp	BYTE PTR isdbjn$[rsp+rcx], 0
	jne	SHORT $LN12@isDeBruijn
	inc	r11d
; Line 21
	mov	BYTE PTR isdbjn$[rsp+rcx], 1
	cmp	r11d, 64				; 00000040H
	jl	SHORT $LL4@isDeBruijn
; Line 23
	mov	al, 1
; Line 24
	add	rsp, 4152				; 00001038H
	ret	0
$LN12@isDeBruijn:
; Line 20
	xor	al, al
; Line 24
	add	rsp, 4152				; 00001038H
	ret	0
?isDeBruijnN@@YA_NAEB_KH@Z ENDP				; isDeBruijnN
_TEXT	ENDS
PUBLIC	?genDeBruijn@@YAX_KH@Z				; genDeBruijn
;	COMDAT ?uniqueCheck@?1??genDeBruijn@@YAX_KH@Z@4_KA
_BSS	SEGMENT
?uniqueCheck@?1??genDeBruijn@@YAX_KH@Z@4_KA DQ 01H DUP (?) ;
`genDeBruijn'::`2'::uniqueCheck
_BSS	ENDS
xdata	SEGMENT
$unwind$?genDeBruijn@@YAX_KH@Z DD 072501H
	DD	047425H
	DD	05641bH
	DD	063416H
	DD	06204H
xdata	ENDS
pdata	SEGMENT
$pdata$?genDeBruijn@@YAX_KH@Z DD @imagerel($LN9#)
	DD	@imagerel($LN9#+184)
	DD	@imagerel($unwind$?genDeBruijn@@YAX_KH@Z#)
; Function compile flags: /Ogtpy
pdata	ENDS
_TEXT	SEGMENT
sequence$ = 64
bitpos$ = 72
?genDeBruijn@@YAX_KH@Z PROC				; genDeBruijn
; Line 33
$LN9:
	sub	rsp, 56					; 00000038H
; Line 39
	mov	rax, QWORD PTR ?uniqueCheck@?1??genDeBruijn@@YAX_KH@Z@4_KA
	inc	DWORD PTR nodeCount
	mov	QWORD PTR [rsp+48], rbx
	mov	QWORD PTR [rsp+40], rsi
	mov	rsi, rcx
	mov	ecx, edx
	mov	QWORD PTR [rsp+32], rdi
	mov	edi, edx
	mov	rdx, rsi
	shr	rdx, cl
	mov	r8d, 1
	and	edx, 63					; 0000003fH
	mov	rbx, r8
	mov	ecx, edx
	shl	rbx, cl
	test	rax, rbx
	jne	SHORT $LN3@genDeBruij
	cmp	edx, 32					; 00000020H
	je	SHORT $LN3@genDeBruij
; Line 41
	test	edi, edi
	jne	SHORT $LN4@genDeBruij
; Line 43
	inc	DWORD PTR foundCount
	mov	rdi, QWORD PTR [rsp+32]
	mov	rsi, QWORD PTR [rsp+40]
	mov	rbx, QWORD PTR [rsp+48]
; Line 62
	add	rsp, 56					; 00000038H
	ret	0
$LN4@genDeBruij:
; Line 48
	or	rax, rbx
; Line 49
	cmp	edi, r8d
	mov	QWORD PTR ?uniqueCheck@?1??genDeBruijn@@YAX_KH@Z@4_KA, rax
	jne	SHORT $LN2@genDeBruij
; Line 51
	or	rsi, r8
	xor	edx, edx
; Line 53
	jmp	SHORT $LN8@genDeBruij
$LN2@genDeBruij:
; Line 55
	dec	edi
; Line 56
	mov	ecx, edi
	mov	edx, edi
	shl	r8, cl
	or	r8, rsi
	mov	rcx, r8
	call	?genDeBruijn@@YAX_KH@Z			; genDeBruijn
; Line 57
	mov	edx, edi
$LN8@genDeBruij:
	mov	rcx, rsi
	call	?genDeBruijn@@YAX_KH@Z			; genDeBruijn
; Line 59
	not	rbx
	and	QWORD PTR ?uniqueCheck@?1??genDeBruijn@@YAX_KH@Z@4_KA, rbx
$LN3@genDeBruij:
	mov	rdi, QWORD PTR [rsp+32]
	mov	rsi, QWORD PTR [rsp+40]
	mov	rbx, QWORD PTR [rsp+48]
; Line 62
	add	rsp, 56					; 00000038H
	ret	0
?genDeBruijn@@YAX_KH@Z ENDP				; genDeBruijn
_TEXT	ENDS
END




This page took 0.01 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.