Author: Dezhi Zhao
Date: 18:38:48 09/30/05
Go up one level in this thread
PUBLIC ?diagonal1Attacks@@YA_K_K0@Z ; diagonal1Attacks
; Function compile flags: /Ogtpy
; COMDAT ?diagonal1Attacks@@YA_K_K0@Z
_TEXT SEGMENT
deo$ = 8
pro$ = 16
?diagonal1Attacks@@YA_K_K0@Z PROC ; diagonal1Attacks, COMDAT
; 36 : return leftDownAttacks(deo, pro) | rightUpAttacks(deo, pro);
mov r10, rdx
mov r9, rcx
mov r8, rcx
shr r9, 9
mov rax, 9187201950435737471 ; 7f7f7f7f7f7f7f7fH
and r10, rax
and r9, r10
mov rax, r10
shr rax, 9
or r9, rcx
shl rcx, 9
and r10, rax
mov rax, r9
shr rax, 18
and rax, r10
or r9, rax
mov rax, -72340172838076674 ; fefefefefefefefeH
and rdx, rax
and rcx, rdx
mov rax, rdx
shl rax, 9
or rcx, r8
and rdx, rax
mov rax, rcx
shl rax, 18
and rax, rdx
or rcx, rax
mov rax, rcx
and rax, -2057 ; fffffffffffff7f7H
shl rax, 18
and rax, rdx
shl rax, 18
and rax, rdx
mov rdx, -141289400074369 ; ffff7f7f7f7f7f7fH
and rcx, rdx
mov rdx, r9
or rax, rcx
shr rdx, 36 ; 00000024H
mov rcx, r10
shr rcx, 18
shl rax, 9
and rdx, rcx
mov rcx, 35887507618889599 ; 007f7f7f7f7f7f7fH
and rdx, r10
or rdx, r9
shr rdx, 9
and rdx, rcx
or rax, rdx
; 37 : }
ret 0
?diagonal1Attacks@@YA_K_K0@Z ENDP ; diagonal1Attacks
_TEXT ENDS
On September 30, 2005 at 12:24:50, Gerd Isenberg wrote:
>
>>Yes, it looks fine. I prefer to use one more volatile register (r8/r9) to hold
>>the manifest constant and save one load operation.
>>
>
>hmm... yes, probably due to the long opcode of loading a immediate 64-bit
>constant into a register. Otoh even if r8/r9 are volatile registers, if not used
>by a leaf-callee a caller using those registers may not save/resore it.
>I also have the impression that msvc is a bit conserative using architectual
>registers - may be for a good reason.
>
>And in this special case, the second constant is changed to a pre mask value
>(ffff7f7f7f7f7f7fH) by some nice optimization process, see the shift left 36.
>
>As already mentioned, if more directions are processed in parallel with inlined
>Kogge-Stone routines, instructions may hopefully scheduled more parallel with
>let say 2*3 registers...
>
>Gerd
>
>
>typedef unsigned __int64 BitBoard;
>
>BitBoard notA = 0xfefefefefefefefe;
>BitBoard notH = 0x7f7f7f7f7f7f7f7f;
>
>// deo - bishops/queens, pro - set of empty squares
>
>__forceinline
>BitBoard rightUpAttacks(BitBoard deo, BitBoard pro) {
> pro = pro & notA;
> deo |= pro & (deo << 9);
> pro = pro & (pro << 9);
> deo |= pro & (deo << 18);
> pro = pro & (pro << 18);
> deo |= pro & (deo << 36);
> return (deo << 9) & notA;
>}
>
>__forceinline
>BitBoard leftDownAttacks(BitBoard deo, BitBoard pro) {
> pro = pro & notH;
> deo |= pro & (deo >> 9);
> pro = pro & (pro >> 9);
> deo |= pro & (deo >> 18);
> pro = pro & (pro >> 18);
> deo |= pro & (deo >> 36);
> return (deo >> 9) & notH;
>}
>
>BitBoard diagonal1Attacks(BitBoard deo, BitBoard pro) {
> return leftDownAttacks(deo, pro) | rightUpAttacks(deo, pro);
>}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.