Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: FirstOne/LastOne

Author: Gerd Isenberg

Date: 11:48:57 06/13/04

Go up one level in this thread


Same loops call by reference and call by value with asm bsf versions.
Terrible, but less register pressure, and thanks to store/load-units not that
bad, first with inlining, then without inlining the bitscans:


the _asm inlines

__forceinline
int bitScanAndResetAsm(BitBoard &bb)
{
  __asm
  {
    mov    ebx, [bb]
    xor    eax, eax
    mov	   edx, 1
    bsf	   ecx, [ebx]
    jnz	   found
    bsf	   ecx, [ebx + 4]
    lea	   ebx, [ebx + 4]
    xor	   eax, 32
found:
    shl	   edx, cl
    xor    eax, ecx
    xor    [ebx], edx
  }
}

__forceinline
int bitScanForewardAsm(BitBoard bb) // last one
{
  __asm
  {
    bsf    eax, [bb+4]
    xor    eax, 32
    bsf    eax, [bb]
  }
}



?traverseByReference@@YAX_KPAH@Z PROC NEAR		; traverseByReference, COMDAT

; 68   : {

  00000	55		 push	 ebp
  00001	8b ec		 mov	 ebp, esp
  00003	51		 push	 ecx

; 69   : 	while (bb)

  00004	8b 45 08	 mov	 eax, DWORD PTR _bb$[ebp]
  00007	8b 4d 0c	 mov	 ecx, DWORD PTR _bb$[ebp+4]
  0000a	0b c1		 or	 eax, ecx
  0000c	74 45		 je	 SHORT $L653
  0000e	53		 push	 ebx

; 70   : 	{
; 71   : 		*pSquares++ = bitScanAndResetAsm(bb);

  0000f	8d 4d 08	 lea	 ecx, DWORD PTR _bb$[ebp]
  00012	56		 push	 esi
  00013	8b 75 10	 mov	 esi, DWORD PTR _pSquares$[ebp]
  00016	89 4d fc	 mov	 DWORD PTR $T647[ebp], ecx
$L603:
  00019	8b 5d fc	 mov	 ebx, DWORD PTR $T647[ebp]
  0001c	33 c0		 xor	 eax, eax
  0001e	ba 01 00 00 00	 mov	 edx, 1
  00023	0f bc 0b	 bsf	 ecx, DWORD PTR [ebx]
  00026	75 0a		 jne	 SHORT $found$645
  00028	0f bc 4b 04	 bsf	 ecx, DWORD PTR [ebx+4]
  0002c	8d 5b 04	 lea	 ebx, DWORD PTR [ebx+4]
  0002f	83 f0 20	 xor	 eax, 32			; 00000020H
$found$645:
  00032	d3 e2		 shl	 edx, cl
  00034	33 c1		 xor	 eax, ecx
  00036	31 13		 xor	 DWORD PTR [ebx], edx
  00038	8b 55 08	 mov	 edx, DWORD PTR _bb$[ebp]
  0003b	89 06		 mov	 DWORD PTR [esi], eax
  0003d	8b 45 0c	 mov	 eax, DWORD PTR _bb$[ebp+4]
  00040	83 c6 04	 add	 esi, 4
  00043	0b d0		 or	 edx, eax
  00045	75 d2		 jne	 SHORT $L603

; 72   : 	}
; 73   : 	*pSquares = -1;

  00047	c7 06 ff ff ff
	ff		 mov	 DWORD PTR [esi], -1
  0004d	5e		 pop	 esi
  0004e	5b		 pop	 ebx

; 74   : }

  0004f	8b e5		 mov	 esp, ebp
  00051	5d		 pop	 ebp
  00052	c3		 ret	 0
$L653:

; 72   : 	}
; 73   : 	*pSquares = -1;

  00053	8b 45 10	 mov	 eax, DWORD PTR _pSquares$[ebp]
  00056	c7 00 ff ff ff
	ff		 mov	 DWORD PTR [eax], -1

; 74   : }

  0005c	8b e5		 mov	 esp, ebp
  0005e	5d		 pop	 ebp
  0005f	c3		 ret	 0
?traverseByReference@@YAX_KPAH@Z ENDP			; traverseByReference


?traverseByValue@@YAX_KPAH@Z PROC NEAR			; traverseByValue, COMDAT

; 78   : 	while (bb)

  00000	8b 4c 24 04	 mov	 ecx, DWORD PTR _bb$[esp-4]
  00004	8b 54 24 08	 mov	 edx, DWORD PTR _bb$[esp]
  00008	8b c1		 mov	 eax, ecx
  0000a	0b c2		 or	 eax, edx
  0000c	74 3d		 je	 SHORT $L661
  0000e	56		 push	 esi
  0000f	8b 74 24 10	 mov	 esi, DWORD PTR _pSquares$[esp]
  00013	57		 push	 edi
$L610:

; 79   : 	{
; 80   : 		*pSquares++ = bitScanForewardAsm(bb);

  00014	0f bc 44 24 10	 bsf	 eax, DWORD PTR _bb$[esp+8]
  00019	83 f0 20	 xor	 eax, 32			; 00000020H
  0001c	0f bc 44 24 0c	 bsf	 eax, DWORD PTR _bb$[esp+4]
  00021	89 06		 mov	 DWORD PTR [esi], eax

; 81   : 		bb &= bb-1; // clear bit

  00023	8b c1		 mov	 eax, ecx
  00025	83 c6 04	 add	 esi, 4
  00028	83 c0 ff	 add	 eax, -1
  0002b	8b fa		 mov	 edi, edx
  0002d	83 d7 ff	 adc	 edi, -1
  00030	23 c8		 and	 ecx, eax
  00032	23 d7		 and	 edx, edi
  00034	8b c1		 mov	 eax, ecx
  00036	0b c2		 or	 eax, edx
  00038	89 4c 24 0c	 mov	 DWORD PTR _bb$[esp+4], ecx
  0003c	89 54 24 10	 mov	 DWORD PTR _bb$[esp+8], edx
  00040	75 d2		 jne	 SHORT $L610

; 82   : 	}
; 83   : 	*pSquares = -1;

  00042	c7 06 ff ff ff
	ff		 mov	 DWORD PTR [esi], -1
  00048	5f		 pop	 edi
  00049	5e		 pop	 esi

; 84   : }

  0004a	c3		 ret	 0
$L661:

; 82   : 	}
; 83   : 	*pSquares = -1;

  0004b	8b 4c 24 0c	 mov	 ecx, DWORD PTR _pSquares$[esp-4]
  0004f	c7 01 ff ff ff
	ff		 mov	 DWORD PTR [ecx], -1

; 84   : }

  00055	c3		 ret	 0
?traverseByValue@@YAX_KPAH@Z ENDP			; traverseByValue




loops without inlining:



?traverseByReference@@YAX_KPAH@Z PROC NEAR		; traverseByReference, COMDAT

; 67   : 	while (bb)

  00000	8b 44 24 04	 mov	 eax, DWORD PTR _bb$[esp-4]
  00004	8b 4c 24 08	 mov	 ecx, DWORD PTR _bb$[esp]
  00008	0b c1		 or	 eax, ecx
  0000a	74 2b		 je	 SHORT $L650
  0000c	56		 push	 esi
  0000d	8b 74 24 10	 mov	 esi, DWORD PTR _pSquares$[esp]
$L603:

; 68   : 	{
; 69   : 		*pSquares++ = bitScanAndResetAsm(bb);

  00011	8d 4c 24 08	 lea	 ecx, DWORD PTR _bb$[esp]
  00015	51		 push	 ecx
  00016	e8 00 00 00 00	 call	 ?bitScanAndResetAsm@@YAHAA_K@Z ;
bitScanAndResetAsm
  0001b	8b 54 24 0c	 mov	 edx, DWORD PTR _bb$[esp+4]
  0001f	89 06		 mov	 DWORD PTR [esi], eax
  00021	8b 44 24 10	 mov	 eax, DWORD PTR _bb$[esp+8]
  00025	83 c4 04	 add	 esp, 4
  00028	83 c6 04	 add	 esi, 4
  0002b	0b d0		 or	 edx, eax
  0002d	75 e2		 jne	 SHORT $L603

; 70   : 	}
; 71   : 	*pSquares = -1;

  0002f	c7 06 ff ff ff
	ff		 mov	 DWORD PTR [esi], -1
  00035	5e		 pop	 esi

; 72   : }

  00036	c3		 ret	 0
$L650:

; 70   : 	}
; 71   : 	*pSquares = -1;

  00037	8b 44 24 0c	 mov	 eax, DWORD PTR _pSquares$[esp-4]
  0003b	c7 00 ff ff ff
	ff		 mov	 DWORD PTR [eax], -1

; 72   : }

  00041	c3		 ret	 0
?traverseByReference@@YAX_KPAH@Z ENDP			; traverseByReference



PUBLIC	?traverseByValue@@YAX_KPAH@Z			; traverseByValue
; Function compile flags: /Ogty
;	COMDAT ?traverseByValue@@YAX_KPAH@Z
_TEXT	SEGMENT
_bb$ = 8
_pSquares$ = 16
?traverseByValue@@YAX_KPAH@Z PROC NEAR			; traverseByValue, COMDAT

; 76   : 	while (bb)

  00000	8b 4c 24 04	 mov	 ecx, DWORD PTR _bb$[esp-4]
  00004	8b 54 24 08	 mov	 edx, DWORD PTR _bb$[esp]
  00008	8b c1		 mov	 eax, ecx
  0000a	0b c2		 or	 eax, edx
  0000c	74 32		 je	 SHORT $L655
  0000e	56		 push	 esi
  0000f	8b 74 24 10	 mov	 esi, DWORD PTR _pSquares$[esp]
  00013	57		 push	 edi
$L610:

; 77   : 	{
; 78   : 		*pSquares++ = bitScanForewardAsm(bb);

  00014	52		 push	 edx
  00015	51		 push	 ecx
  00016	e8 00 00 00 00	 call	 ?bitScanForewardAsm@@YAH_K@Z ; bitScanForewardAsm
  0001b	89 06		 mov	 DWORD PTR [esi], eax

; 79   : 		bb &= bb-1; // clear bit

  0001d	8b c1		 mov	 eax, ecx
  0001f	83 c4 08	 add	 esp, 8
  00022	83 c6 04	 add	 esi, 4
  00025	83 c0 ff	 add	 eax, -1
  00028	8b fa		 mov	 edi, edx
  0002a	83 d7 ff	 adc	 edi, -1
  0002d	23 c8		 and	 ecx, eax
  0002f	23 d7		 and	 edx, edi
  00031	8b c1		 mov	 eax, ecx
  00033	0b c2		 or	 eax, edx
  00035	75 dd		 jne	 SHORT $L610

; 80   : 	}
; 81   : 	*pSquares = -1;

  00037	c7 06 ff ff ff
	ff		 mov	 DWORD PTR [esi], -1
  0003d	5f		 pop	 edi
  0003e	5e		 pop	 esi

; 82   : }

  0003f	c3		 ret	 0
$L655:

; 80   : 	}
; 81   : 	*pSquares = -1;

  00040	8b 4c 24 0c	 mov	 ecx, DWORD PTR _pSquares$[esp-4]
  00044	c7 01 ff ff ff
	ff		 mov	 DWORD PTR [ecx], -1

; 82   : }

  0004a	c3		 ret	 0
?traverseByValue@@YAX_KPAH@Z ENDP			; traverseByValue





This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.