Author: Eugene Nalimov
Date: 09:57:09 03/31/04
Go up one level in this thread
On March 31, 2004 at 04:15:21, Gerd Isenberg wrote:
>Looks fine ;-)
>
>Curious about call __chkstk in isDeBruijnN, but not in the recursive function
>genDeBruijn. Yes, isDeBruijnN has a local 4KByte array on the frame, and has to
>clear it too, so under runtime considerations call __chkstk doesn't matter much.
>Instead of call memset i would prefere an inlined intrinsic of that e.g. with a
>8-byte aligned bool array and REP STOSQ with rcx=4096/8. I guess there are some
>additional compiler flags...
>
>If you have some additional time, it would be nice to see the assembly of a
>kogge-stone filler with a bit more register pressure:
>
>Thanks again,
>Gerd
_chkstk() call is necessary if function allocates more than 4k on stack.
memset() call is faster than REP STOSQ. Trust me. BTW, the old version of the
compiler would generate REP STOSQ.
And here is your assembly:
; Listing generated by Microsoft (R) Optimizing Compiler Version 14.00.40316
include listing.inc
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
PUBLIC ?snotH@XMM@@1T__m128i@@B ; XMM::snotH
PUBLIC ?snotA@XMM@@1T__m128i@@B ; XMM::snotA
PUBLIC ?binDigit@BBPair@@1QBDB ; BBPair::binDigit
PUBLIC ?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
_BSS SEGMENT
?rankStr@BBPair@@1PAY09DA DB 0a00H DUP (?) ; BBPair::rankStr
_BSS ENDS
CONST SEGMENT
?binDigit@BBPair@@1QBDB DB 0b0H ; BBPair::binDigit
DB 0dbH
ORG $+14
?snotA@XMM@@1T__m128i@@B DB 0feH ; XMM::snotA
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
DB 0feH
?snotH@XMM@@1T__m128i@@B DB 07fH ; XMM::snotH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
DB 07fH
?minusOne@XMM@@1T__m128i@@B DB 0ffH ; XMM::minusOne
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
DB 0ffH
CONST ENDS
PUBLIC ??0BBPair@@QEAA@XZ ; BBPair::BBPair
; Function compile flags: /Ogtpy
; COMDAT ??0BBPair@@QEAA@XZ
_TEXT SEGMENT
this$ = 8
??0BBPair@@QEAA@XZ PROC ; BBPair::BBPair, COMDAT
; File c:\repro\q.cpp
; Line 12
mov rax, rcx
ret 0
??0BBPair@@QEAA@XZ ENDP ; BBPair::BBPair
_TEXT ENDS
PUBLIC ??0BBPair@@QEAA@AEB_K0@Z ; BBPair::BBPair
; Function compile flags: /Ogtpy
; COMDAT ??0BBPair@@QEAA@AEB_K0@Z
_TEXT SEGMENT
this$ = 8
low$ = 16
high$ = 24
??0BBPair@@QEAA@AEB_K0@Z PROC ; BBPair::BBPair, COMDAT
; Line 14
mov rax, QWORD PTR [rdx]
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [r8]
mov QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??0BBPair@@QEAA@AEB_K0@Z ENDP ; BBPair::BBPair
_TEXT ENDS
PUBLIC ??_C@_01EEMJAFIK@?6?$AA@ ; `string'
PUBLIC ??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@ ; `string'
PUBLIC ?printBoards@BBPair@@QEAAXXZ ; BBPair::printBoards
EXTRN printf:PROC
; COMDAT ??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
CONST SEGMENT
??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@ DB '%s %s', 0aH, 00H ; `string'
CONST ENDS
; COMDAT ??_C@_01EEMJAFIK@?6?$AA@
CONST SEGMENT
??_C@_01EEMJAFIK@?6?$AA@ DB 0aH, 00H ; `string'
CONST ENDS
; COMDAT xdata
xdata SEGMENT
$unwind$?printBoards@BBPair@@QEAAXXZ DD 071301H
DD 047413H
DD 05640eH
DD 063409H
DD 06204H
xdata ENDS
; COMDAT pdata
pdata SEGMENT
$pdata$?printBoards@BBPair@@QEAAXXZ DD @imagerel($LN8#)
DD @imagerel($LN8#+121)
DD @imagerel($unwind$?printBoards@BBPair@@QEAAXXZ#)
; Function compile flags: /Ogtpy
pdata ENDS
; COMDAT ?printBoards@BBPair@@QEAAXXZ
_TEXT SEGMENT
this$ = 64
?printBoards@BBPair@@QEAAXXZ PROC ; BBPair::printBoards, COMDAT
; Line 18
$LN8:
sub rsp, 56 ; 00000038H
mov QWORD PTR [rsp+48], rbx
mov QWORD PTR [rsp+40], rsi
mov QWORD PTR [rsp+32], rdi
mov rdi, rcx
; Line 19
mov ebx, 7
lea rsi, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
npad 14
$LL3@printBoard:
; Line 20
movzx eax, BYTE PTR [rbx+rdi+8]
lea rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
lea rdx, QWORD PTR [rax+rax*4]
movzx eax, BYTE PTR [rbx+rdi]
lea r8, QWORD PTR [rsi+rdx*2]
lea rdx, QWORD PTR [rax+rax*4]
lea rdx, QWORD PTR [rsi+rdx*2]
call printf
dec rbx
jns SHORT $LL3@printBoard
; Line 21
lea rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
mov rdi, QWORD PTR [rsp+32]
mov rsi, QWORD PTR [rsp+40]
mov rbx, QWORD PTR [rsp+48]
; Line 22
add rsp, 56 ; 00000038H
jmp printf
?printBoards@BBPair@@QEAAXXZ ENDP ; BBPair::printBoards
_TEXT ENDS
PUBLIC ?initRankStr@BBPair@@SAXXZ ; BBPair::initRankStr
; Function compile flags: /Ogtpy
; COMDAT ?initRankStr@BBPair@@SAXXZ
_TEXT SEGMENT
?initRankStr@BBPair@@SAXXZ PROC ; BBPair::initRankStr, COMDAT
; Line 26
xor ecx, ecx
lea rdx, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
lea r8, OFFSET FLAT:?binDigit@BBPair@@1QBDB ; BBPair::binDigit
$LL6@initRankSt:
; Line 29
mov eax, ecx
add rdx, 10
and eax, 1
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-10], al
mov eax, ecx
; Line 30
mov BYTE PTR [rdx-2], 0
and eax, 2
shr rax, 1
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-9], al
mov eax, ecx
and eax, 4
shr rax, 2
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-8], al
mov eax, ecx
and eax, 8
shr rax, 3
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-7], al
mov eax, ecx
and eax, 16
shr rax, 4
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-6], al
mov eax, ecx
and eax, 32 ; 00000020H
shr rax, 5
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-5], al
mov eax, ecx
and eax, 64 ; 00000040H
shr rax, 6
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-4], al
mov eax, ecx
inc ecx
and eax, 128 ; 00000080H
shr rax, 7
cmp ecx, 256 ; 00000100H
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-3], al
jl $LL6@initRankSt
; Line 32
ret 0
?initRankStr@BBPair@@SAXXZ ENDP ; BBPair::initRankStr
_TEXT ENDS
PUBLIC ??0XMM@@QEAA@T__m128i@@@Z ; XMM::XMM
; Function compile flags: /Ogtpy
; COMDAT ??0XMM@@QEAA@T__m128i@@@Z
_TEXT SEGMENT
this$ = 8
a$ = 16
??0XMM@@QEAA@T__m128i@@@Z PROC ; XMM::XMM, COMDAT
; Line 57
movdqa xmm0, XMMWORD PTR [rdx]
mov rax, rcx
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??0XMM@@QEAA@T__m128i@@@Z ENDP ; XMM::XMM
_TEXT ENDS
PUBLIC ??0XMM@@QEAA@PEBVBBPair@@@Z ; XMM::XMM
; Function compile flags: /Ogtpy
; COMDAT ??0XMM@@QEAA@PEBVBBPair@@@Z
_TEXT SEGMENT
this$ = 8
ptr$ = 16
??0XMM@@QEAA@PEBVBBPair@@@Z PROC ; XMM::XMM, COMDAT
; Line 60
movdqa xmm0, XMMWORD PTR [rdx]
mov rax, rcx
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??0XMM@@QEAA@PEBVBBPair@@@Z ENDP ; XMM::XMM
_TEXT ENDS
PUBLIC ?store@XMM@@QEAAXPEAVBBPair@@@Z ; XMM::store
; Function compile flags: /Ogtpy
; COMDAT ?store@XMM@@QEAAXPEAVBBPair@@@Z
_TEXT SEGMENT
this$ = 8
ptr$ = 16
?store@XMM@@QEAAXPEAVBBPair@@@Z PROC ; XMM::store, COMDAT
; Line 66
movdqa xmm0, XMMWORD PTR [rcx]
movdqa XMMWORD PTR [rdx], xmm0
ret 0
?store@XMM@@QEAAXPEAVBBPair@@@Z ENDP ; XMM::store
_TEXT ENDS
PUBLIC ??_4XMM@@QEAAAEAV0@AEBV0@@Z ; XMM::operator&=
; Function compile flags: /Ogtpy
; COMDAT ??_4XMM@@QEAAAEAV0@AEBV0@@Z
_TEXT SEGMENT
this$ = 8
dbb$ = 16
??_4XMM@@QEAAAEAV0@AEBV0@@Z PROC ; XMM::operator&=, COMDAT
; Line 85
movdqa xmm0, XMMWORD PTR [rcx]
mov rax, rcx
pand xmm0, XMMWORD PTR [rdx]
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??_4XMM@@QEAAAEAV0@AEBV0@@Z ENDP ; XMM::operator&=
_TEXT ENDS
PUBLIC ??_5XMM@@QEAAAEAV0@AEBV0@@Z ; XMM::operator|=
; Function compile flags: /Ogtpy
; COMDAT ??_5XMM@@QEAAAEAV0@AEBV0@@Z
_TEXT SEGMENT
this$ = 8
dbb$ = 16
??_5XMM@@QEAAAEAV0@AEBV0@@Z PROC ; XMM::operator|=, COMDAT
; Line 87
movdqa xmm0, XMMWORD PTR [rcx]
mov rax, rcx
por xmm0, XMMWORD PTR [rdx]
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??_5XMM@@QEAAAEAV0@AEBV0@@Z ENDP ; XMM::operator|=
_TEXT ENDS
PUBLIC ??5@YA?AVXMM@@AEBV0@H@Z ; operator>>
; Function compile flags: /Ogtpy
; COMDAT ??5@YA?AVXMM@@AEBV0@H@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??5@YA?AVXMM@@AEBV0@H@Z PROC ; operator>>, COMDAT
; Line 99
movdqa xmm1, XMMWORD PTR [rdx]
movd xmm0, r8d
mov rax, rcx
psrlq xmm1, xmm0
movdqa XMMWORD PTR [rcx], xmm1
ret 0
??5@YA?AVXMM@@AEBV0@H@Z ENDP ; operator>>
_TEXT ENDS
PUBLIC ??6@YA?AVXMM@@AEBV0@H@Z ; operator<<
; Function compile flags: /Ogtpy
; COMDAT ??6@YA?AVXMM@@AEBV0@H@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??6@YA?AVXMM@@AEBV0@H@Z PROC ; operator<<, COMDAT
; Line 101
movdqa xmm1, XMMWORD PTR [rdx]
movd xmm0, r8d
mov rax, rcx
psllq xmm1, xmm0
movdqa XMMWORD PTR [rcx], xmm1
ret 0
??6@YA?AVXMM@@AEBV0@H@Z ENDP ; operator<<
_TEXT ENDS
PUBLIC ??I@YA?AVXMM@@AEBV0@0@Z ; operator&
; Function compile flags: /Ogtpy
; COMDAT ??I@YA?AVXMM@@AEBV0@0@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
a$ = 16
b$ = 24
??I@YA?AVXMM@@AEBV0@0@Z PROC ; operator&, COMDAT
; Line 103
movdqa xmm0, XMMWORD PTR [rdx]
mov rax, rcx
pand xmm0, XMMWORD PTR [r8]
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??I@YA?AVXMM@@AEBV0@0@Z ENDP ; operator&
_TEXT ENDS
PUBLIC ??S@YA?AVXMM@@AEBV0@@Z ; operator~
; Function compile flags: /Ogtpy
; COMDAT ??S@YA?AVXMM@@AEBV0@@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
a$ = 16
??S@YA?AVXMM@@AEBV0@@Z PROC ; operator~, COMDAT
; Line 113
movdqa xmm0, XMMWORD PTR [rdx]
mov rax, rcx
pxor xmm0, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
movdqa XMMWORD PTR [rcx], xmm0
ret 0
??S@YA?AVXMM@@AEBV0@@Z ENDP ; operator~
_TEXT ENDS
PUBLIC ??0GPR@@QEAA@AEB_K0@Z ; GPR::GPR
; Function compile flags: /Ogtpy
; COMDAT ??0GPR@@QEAA@AEB_K0@Z
_TEXT SEGMENT
this$ = 8
low$ = 16
high$ = 24
??0GPR@@QEAA@AEB_K0@Z PROC ; GPR::GPR, COMDAT
; Line 152
mov rax, QWORD PTR [rdx]
mov QWORD PTR [rcx], rax
; Line 153
mov rax, QWORD PTR [r8]
mov QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??0GPR@@QEAA@AEB_K0@Z ENDP ; GPR::GPR
_TEXT ENDS
PUBLIC ??0GPR@@QEAA@PEBVBBPair@@@Z ; GPR::GPR
; Function compile flags: /Ogtpy
; COMDAT ??0GPR@@QEAA@PEBVBBPair@@@Z
_TEXT SEGMENT
this$ = 8
ptr$ = 16
??0GPR@@QEAA@PEBVBBPair@@@Z PROC ; GPR::GPR, COMDAT
; Line 155
mov rax, QWORD PTR [rdx]
mov QWORD PTR [rcx], rax
; Line 156
mov rax, QWORD PTR [rdx+8]
mov QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??0GPR@@QEAA@PEBVBBPair@@@Z ENDP ; GPR::GPR
_TEXT ENDS
PUBLIC ?store@GPR@@QEAAXPEAVBBPair@@@Z ; GPR::store
; Function compile flags: /Ogtpy
; COMDAT ?store@GPR@@QEAAXPEAVBBPair@@@Z
_TEXT SEGMENT
this$ = 8
ptr$ = 16
?store@GPR@@QEAAXPEAVBBPair@@@Z PROC ; GPR::store, COMDAT
; Line 161
mov rax, QWORD PTR [rcx]
mov QWORD PTR [rdx], rax
; Line 162
mov rax, QWORD PTR [rcx+8]
mov QWORD PTR [rdx+8], rax
ret 0
?store@GPR@@QEAAXPEAVBBPair@@@Z ENDP ; GPR::store
_TEXT ENDS
PUBLIC ??_4GPR@@QEAAAEAV0@AEBV0@@Z ; GPR::operator&=
; Function compile flags: /Ogtpy
; COMDAT ??_4GPR@@QEAAAEAV0@AEBV0@@Z
_TEXT SEGMENT
this$ = 8
dbb$ = 16
??_4GPR@@QEAAAEAV0@AEBV0@@Z PROC ; GPR::operator&=, COMDAT
; Line 168
mov rax, QWORD PTR [rdx]
and QWORD PTR [rcx], rax
; Line 169
mov rax, QWORD PTR [rdx+8]
and QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??_4GPR@@QEAAAEAV0@AEBV0@@Z ENDP ; GPR::operator&=
_TEXT ENDS
PUBLIC ??_5GPR@@QEAAAEAV0@AEBV0@@Z ; GPR::operator|=
; Function compile flags: /Ogtpy
; COMDAT ??_5GPR@@QEAAAEAV0@AEBV0@@Z
_TEXT SEGMENT
this$ = 8
dbb$ = 16
??_5GPR@@QEAAAEAV0@AEBV0@@Z PROC ; GPR::operator|=, COMDAT
; Line 170
mov rax, QWORD PTR [rdx]
or QWORD PTR [rcx], rax
; Line 171
mov rax, QWORD PTR [rdx+8]
or QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??_5GPR@@QEAAAEAV0@AEBV0@@Z ENDP ; GPR::operator|=
_TEXT ENDS
PUBLIC ??5@YA?AVGPR@@AEBV0@H@Z ; operator>>
; Function compile flags: /Ogtpy
; COMDAT ??5@YA?AVGPR@@AEBV0@H@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??5@YA?AVGPR@@AEBV0@H@Z PROC ; operator>>, COMDAT
; Line 182
mov rax, QWORD PTR [rdx]
mov r9, rcx
movsxd rcx, r8d
shr rax, cl
mov QWORD PTR [r9], rax
mov rax, QWORD PTR [rdx+8]
shr rax, cl
mov QWORD PTR [r9+8], rax
mov rax, r9
ret 0
??5@YA?AVGPR@@AEBV0@H@Z ENDP ; operator>>
_TEXT ENDS
PUBLIC ??6@YA?AVGPR@@AEBV0@H@Z ; operator<<
; Function compile flags: /Ogtpy
; COMDAT ??6@YA?AVGPR@@AEBV0@H@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??6@YA?AVGPR@@AEBV0@H@Z PROC ; operator<<, COMDAT
; Line 184
mov rax, QWORD PTR [rdx]
mov r9, rcx
movsxd rcx, r8d
shl rax, cl
mov QWORD PTR [r9], rax
mov rax, QWORD PTR [rdx+8]
shl rax, cl
mov QWORD PTR [r9+8], rax
mov rax, r9
ret 0
??6@YA?AVGPR@@AEBV0@H@Z ENDP ; operator<<
_TEXT ENDS
PUBLIC ??I@YA?AVGPR@@AEBV0@0@Z ; operator&
; Function compile flags: /Ogtpy
; COMDAT ??I@YA?AVGPR@@AEBV0@0@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
a$ = 16
b$ = 24
??I@YA?AVGPR@@AEBV0@0@Z PROC ; operator&, COMDAT
; Line 186
mov rax, QWORD PTR [rdx]
and rax, QWORD PTR [r8]
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rdx+8]
and rax, QWORD PTR [r8+8]
mov QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??I@YA?AVGPR@@AEBV0@0@Z ENDP ; operator&
_TEXT ENDS
PUBLIC ??S@YA?AVGPR@@AEBV0@@Z ; operator~
; Function compile flags: /Ogtpy
; COMDAT ??S@YA?AVGPR@@AEBV0@@Z
_TEXT SEGMENT
__$ReturnUdt$ = 8
a$ = 16
??S@YA?AVGPR@@AEBV0@@Z PROC ; operator~, COMDAT
; Line 201
mov rax, QWORD PTR [rdx]
not rax
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rdx+8]
not rax
mov QWORD PTR [rcx+8], rax
mov rax, rcx
ret 0
??S@YA?AVGPR@@AEBV0@@Z ENDP ; operator~
_TEXT ENDS
PUBLIC ??0sSource@@QEAA@_K0@Z ; sSource::sSource
; Function compile flags: /Ogtpy
; COMDAT ??0sSource@@QEAA@_K0@Z
_TEXT SEGMENT
this$ = 8
r1$ = 16
r2$ = 24
??0sSource@@QEAA@_K0@Z PROC ; sSource::sSource, COMDAT
; Line 208
mov QWORD PTR [rcx], rdx
or rdx, r8
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], rdx
mov QWORD PTR [rcx+24], rdx
mov rax, rcx
ret 0
??0sSource@@QEAA@_K0@Z ENDP ; sSource::sSource
_TEXT ENDS
PUBLIC ??0sTarget@@QEAA@XZ ; sTarget::sTarget
; Function compile flags: /Ogtpy
; COMDAT ??0sTarget@@QEAA@XZ
_TEXT SEGMENT
this$ = 8
??0sTarget@@QEAA@XZ PROC ; sTarget::sTarget, COMDAT
mov rax, rcx
ret 0
??0sTarget@@QEAA@XZ ENDP ; sTarget::sTarget
_TEXT ENDS
PUBLIC ??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ;
updownAttacks<GPR>
; COMDAT xdata
xdata SEGMENT
$unwind$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z DD 0b3201H
DD 06c432H
DD 07742aH
DD 08641eH
DD 095417H
DD 0a3413H
DD 0a207H
xdata ENDS
; COMDAT pdata
pdata SEGMENT
$pdata$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z DD
@imagerel($LN149#)
DD @imagerel($LN149#+408)
DD @imagerel($unwind$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z#)
; Function compile flags: /Ogtpy
pdata ENDS
; COMDAT ??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z
_TEXT SEGMENT
$T2643 = 0
pu$ = 0
gd$ = 16
pd$ = 32
pTarget$ = 96
pSource$ = 104
??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z PROC ;
updownAttacks<GPR>, COMDAT
; Line 220
$LN149:
mov rax, rsp
sub rsp, 88 ; 00000058H
; Line 222
mov r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rax-8], rbx
mov QWORD PTR [rax-16], rbp
mov rbp, QWORD PTR [rdx]
mov QWORD PTR [rax-24], rsi
mov rsi, QWORD PTR [rdx+8]
mov QWORD PTR [rax-72], rbp
mov QWORD PTR [rax-32], rdi
mov QWORD PTR [rax-64], rsi
mov QWORD PTR [rax-40], r12
mov r12, rcx
; Line 224
movaps xmm0, XMMWORD PTR [rax-72]
movdqa XMMWORD PTR [rax-72], xmm0
; Line 227
mov rbx, QWORD PTR gd$[rsp+8]
mov rdi, QWORD PTR gd$[rsp]
not r8
not r9
mov rcx, rbp
shl rcx, 8
mov QWORD PTR $T2643[rsp], r8
mov QWORD PTR [rax-80], r9
movaps xmm1, XMMWORD PTR $T2643[rsp]
movdqa XMMWORD PTR [rax-56], xmm1
and rcx, r8
; Line 228
mov r10, r8
or rbp, rcx
; Line 229
mov r11, r8
mov rax, rsi
movdqa XMMWORD PTR pu$[rsp], xmm1
mov rdx, QWORD PTR pu$[rsp+8]
shl rax, 8
mov rcx, rdi
and rax, r9
shr rcx, 8
shl r10, 8
or rsi, rax
and rcx, r8
and r10, r8
or rdi, rcx
mov rax, rbx
shr r11, 8
shr rax, 8
and r11, r8
mov r8, QWORD PTR pd$[rsp+8]
and rax, r9
; Line 230
mov rcx, rbp
or rbx, rax
shl rcx, 16
mov rax, r9
shl rax, 8
and rcx, r10
shr r9, 8
and rdx, rax
or rbp, rcx
and r8, r9
mov rax, rsi
; Line 231
mov rcx, rdi
shl rax, 16
shr rcx, 16
and rax, rdx
and rcx, r11
or rsi, rax
or rdi, rcx
mov rax, rbx
shr rax, 16
; Line 232
mov rcx, r10
and rax, r8
shl rcx, 16
or rbx, rax
and r10, rcx
mov rax, rdx
shl rax, 16
; Line 233
mov rcx, r11
and rdx, rax
shr rcx, 16
mov rax, r8
and r11, rcx
shr rax, 16
; Line 234
mov rcx, rbp
and r8, rax
shl rcx, 32 ; 00000020H
mov rax, rsi
shl rax, 32 ; 00000020H
and rcx, r10
or rbp, rcx
and rax, rdx
; Line 235
mov rcx, rdi
or rsi, rax
mov rax, rbx
; Line 236
shl rbp, 8
mov QWORD PTR [r12], rbp
mov rbp, QWORD PTR [rsp+72]
shl rsi, 8
mov QWORD PTR [r12+8], rsi
mov rsi, QWORD PTR [rsp+64]
shr rcx, 32 ; 00000020H
shr rax, 32 ; 00000020H
and rcx, r11
or rdi, rcx
and rax, r8
; Line 237
shr rdi, 8
or rbx, rax
shr rbx, 8
mov QWORD PTR [r12+16], rdi
mov rdi, QWORD PTR [rsp+56]
mov QWORD PTR [r12+24], rbx
mov r12, QWORD PTR [rsp+48]
mov rbx, QWORD PTR [rsp+80]
; Line 238
add rsp, 88 ; 00000058H
ret 0
??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ENDP ;
updownAttacks<GPR>
_TEXT ENDS
PUBLIC ??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ;
updownAttacks<XMM>
; Function compile flags: /Ogtpy
; COMDAT ??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z
_TEXT SEGMENT
pTarget$ = 8
pSource$ = 16
??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z PROC ;
updownAttacks<XMM>, COMDAT
; Line 221
movdqa xmm5, XMMWORD PTR [rdx]
; Line 222
movdqa xmm4, XMMWORD PTR [rdx+16]
; Line 226
movdqa xmm0, xmm5
movdqa xmm3, xmm5
pxor xmm4, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
movdqa xmm1, xmm4
; Line 228
movdqa xmm2, xmm4
psllq xmm0, 8
pand xmm1, xmm0
movdqa xmm0, xmm5
por xmm3, xmm1
psrlq xmm0, 8
movdqa xmm1, xmm4
pand xmm1, xmm0
movdqa xmm0, xmm4
por xmm5, xmm1
psllq xmm0, 8
pand xmm2, xmm0
; Line 229
movdqa xmm0, xmm4
psrlq xmm0, 8
; Line 230
movdqa xmm1, xmm2
pand xmm4, xmm0
movdqa xmm0, xmm3
psllq xmm0, 16
pand xmm1, xmm0
por xmm3, xmm1
; Line 231
movdqa xmm0, xmm5
movdqa xmm1, xmm4
psrlq xmm0, 16
pand xmm1, xmm0
por xmm5, xmm1
; Line 232
movdqa xmm0, xmm2
; Line 234
movdqa xmm1, xmm3
psllq xmm0, 16
psllq xmm1, 32 ; 00000020H
pand xmm2, xmm0
pand xmm2, xmm1
movdqa xmm0, xmm4
; Line 235
movdqa xmm1, xmm5
psrlq xmm0, 16
psrlq xmm1, 32 ; 00000020H
por xmm3, xmm2
pand xmm4, xmm0
; Line 236
psllq xmm3, 8
pand xmm4, xmm1
movdqa XMMWORD PTR [rcx], xmm3
por xmm5, xmm4
; Line 237
psrlq xmm5, 8
movdqa XMMWORD PTR [rcx+16], xmm5
; Line 238
ret 0
??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ENDP ;
updownAttacks<XMM>
_TEXT ENDS
PUBLIC main
xdata SEGMENT
$unwind$main DD 030901H
DD 0d6409H
DD 0e204H
xdata ENDS
pdata SEGMENT
$pdata$main DD @imagerel($LN359#)
DD @imagerel($LN359#+276)
DD @imagerel($unwind$main#)
pdata ENDS
xdata SEGMENT
$chain$1$main DD 020521H
DD 0e3405H
DD @imagerel($LN359#)
DD @imagerel($LN359#+276)
DD @imagerel($unwind$main#)
xdata ENDS
pdata SEGMENT
$pdata$1$main DD @imagerel($LN359#+276)
DD @imagerel($LN359#+303)
DD @imagerel($chain$1$main#)
pdata ENDS
xdata SEGMENT
$chain$2$main DD 020521H
DD 0c7405H
DD @imagerel($LN359#+276)
DD @imagerel($LN359#+303)
DD @imagerel($chain$1$main#)
xdata ENDS
pdata SEGMENT
$pdata$2$main DD @imagerel($LN359#+303)
DD @imagerel($LN359#+784)
DD @imagerel($chain$2$main#)
pdata ENDS
xdata SEGMENT
$chain$3$main DD 021H
DD @imagerel($LN359#+276)
DD @imagerel($LN359#+303)
DD @imagerel($chain$1$main#)
xdata ENDS
pdata SEGMENT
$pdata$3$main DD @imagerel($LN359#+784)
DD @imagerel($LN359#+856)
DD @imagerel($chain$3$main#)
; Function compile flags: /Ogtpy
pdata ENDS
_TEXT SEGMENT
target1$ = 32
target2$ = 64
source2$ = 64
argc$ = 128
argv$ = 136
main PROC
; Line 241
$LN359:
sub rsp, 120 ; 00000078H
mov QWORD PTR [rsp+104], rsi
; Line 242
lea rsi, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
xor ecx, ecx
mov rdx, rsi
lea r8, OFFSET FLAT:?binDigit@BBPair@@1QBDB ; BBPair::binDigit
npad 4
$LL8@main:
mov eax, ecx
add rdx, 10
and eax, 1
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-10], al
mov eax, ecx
mov BYTE PTR [rdx-2], 0
and eax, 2
shr rax, 1
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-9], al
mov eax, ecx
and eax, 4
shr rax, 2
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-8], al
mov eax, ecx
and eax, 8
shr rax, 3
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-7], al
mov eax, ecx
and eax, 16
shr rax, 4
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-6], al
mov eax, ecx
and eax, 32 ; 00000020H
shr rax, 5
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-5], al
mov eax, ecx
and eax, 64 ; 00000040H
shr rax, 6
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-4], al
mov eax, ecx
inc ecx
and eax, 128 ; 00000080H
shr rax, 7
cmp ecx, 256 ; 00000100H
movzx eax, BYTE PTR [rax+r8]
mov BYTE PTR [rdx-3], al
jl $LL8@main
; Line 245
mov rax, 274877906944 ; 0000004000000000H
mov rcx, 72624976668147848 ; 0102040810204088H
; Line 249
mov rdx, 71171198281555767 ; 00fcd9d3e7cf9f37H
mov QWORD PTR source2$[rsp], rax
mov QWORD PTR source2$[rsp+8], rcx
mov rax, 72625251546054792 ; 0102044810204088H
; Line 250
movdqa xmm5, XMMWORD PTR source2$[rsp]
mov QWORD PTR source2$[rsp+16], rax
mov QWORD PTR source2$[rsp+24], rax
movdqa xmm0, xmm5
movdqa xmm3, xmm5
psllq xmm0, 8
mov rax, 283691315109952 ; 0001020408102040H
mov QWORD PTR [rsp+112], rbx
mov ebx, 7
movdqa xmm4, XMMWORD PTR source2$[rsp+16]
pxor xmm4, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
or rcx, rax
mov QWORD PTR [rsp+96], rdi
mov rdi, rbx
movdqa xmm1, xmm4
movdqa xmm2, xmm4
pand xmm1, xmm0
movdqa xmm0, xmm5
por xmm3, xmm1
psrlq xmm0, 8
movdqa xmm1, xmm4
mov rax, rcx
pand xmm1, xmm0
movdqa xmm0, xmm4
shr rax, 16
por xmm5, xmm1
psllq xmm0, 8
and rax, rdx
or rcx, rax
mov rdx, 896604014343 ; 000000d0c1c38707H
pand xmm2, xmm0
mov rax, rcx
shr rax, 32 ; 00000020H
movdqa xmm0, xmm4
psrlq xmm0, 8
and rax, rdx
movdqa xmm1, xmm2
pand xmm4, xmm0
movdqa xmm0, xmm3
or rcx, rax
mov rax, 2314850208468434944 ; 2020000000000000H
shr rcx, 8
psllq xmm0, 16
pand xmm1, xmm0
mov QWORD PTR target1$[rsp], rax
mov rax, -2387759954472171520 ; dedcf8f8e8c88800H
por xmm3, xmm1
movdqa xmm0, xmm5
movdqa xmm1, xmm4
psrlq xmm0, 16
mov QWORD PTR target1$[rsp+8], rax
mov rax, 137977921536 ; 0000002020200000H
pand xmm1, xmm0
por xmm5, xmm1
mov QWORD PTR target1$[rsp+16], rax
mov QWORD PTR target1$[rsp+24], rcx
movdqa xmm0, xmm2
movdqa xmm1, xmm3
psllq xmm0, 16
psllq xmm1, 32 ; 00000020H
pand xmm2, xmm0
pand xmm2, xmm1
movdqa xmm0, xmm4
movdqa xmm1, xmm5
psrlq xmm0, 16
psrlq xmm1, 32 ; 00000020H
por xmm3, xmm2
pand xmm4, xmm0
psllq xmm3, 8
pand xmm4, xmm1
movdqa XMMWORD PTR target2$[rsp], xmm3
por xmm5, xmm4
psrlq xmm5, 8
movdqa XMMWORD PTR target2$[rsp+16], xmm5
npad 4
; Line 252
$LL321@main:
movzx eax, BYTE PTR target1$[rsp+rdi+24]
lea rcx, QWORD PTR [rax+rax*4]
movzx eax, BYTE PTR target1$[rsp+rdi+16]
lea r8, QWORD PTR [rsi+rcx*2]
lea rcx, QWORD PTR [rax+rax*4]
lea rdx, QWORD PTR [rsi+rcx*2]
lea rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
call printf
dec rdi
jns SHORT $LL321@main
lea rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
call printf
mov rdi, rbx
npad 6
; Line 253
$LL326@main:
movzx eax, BYTE PTR target1$[rsp+rdi+8]
lea rcx, QWORD PTR [rax+rax*4]
movzx eax, BYTE PTR target1$[rsp+rdi]
lea r8, QWORD PTR [rsi+rcx*2]
lea rcx, QWORD PTR [rax+rax*4]
lea rdx, QWORD PTR [rsi+rcx*2]
lea rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
call printf
dec rdi
jns SHORT $LL326@main
lea rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
call printf
mov rdi, rbx
npad 6
; Line 254
$LL331@main:
movzx eax, BYTE PTR target2$[rsp+rdi+24]
lea rcx, QWORD PTR [rax+rax*4]
movzx eax, BYTE PTR target2$[rsp+rdi+16]
lea r8, QWORD PTR [rsi+rcx*2]
lea rcx, QWORD PTR [rax+rax*4]
lea rdx, QWORD PTR [rsi+rcx*2]
lea rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
call printf
dec rdi
jns SHORT $LL331@main
lea rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
call printf
mov rdi, QWORD PTR [rsp+96]
npad 4
; Line 255
$LL336@main:
movzx eax, BYTE PTR target2$[rsp+rbx+8]
lea rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
lea rdx, QWORD PTR [rax+rax*4]
movzx eax, BYTE PTR target2$[rsp+rbx]
lea r8, QWORD PTR [rsi+rdx*2]
lea rdx, QWORD PTR [rax+rax*4]
lea rdx, QWORD PTR [rsi+rdx*2]
call printf
dec rbx
jns SHORT $LL336@main
lea rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
call printf
mov rsi, QWORD PTR [rsp+104]
mov rbx, QWORD PTR [rsp+112]
; Line 257
xor eax, eax
; Line 258
add rsp, 120 ; 00000078H
ret 0
main ENDP
_TEXT ENDS
END
>#include <stdio.h>
>#include <emmintrin.h>
>
>typedef unsigned __int64 BitBoard;
>
>
>class BBPair
>{
> friend class XMM;
> friend class GPR;
>public:
> __forceinline BBPair(){}
> __forceinline BBPair(const BitBoard &low, const BitBoard &high) {
> sgl[0] = low; sgl[1] = high;}
>
> // some output routines
> void printBoards()
> {
> for ( int i = 7; i >= 0; --i)
> printf("%s %s\n", rankStr[dbl.m128i_u8[i]],rankStr[dbl.m128i_u8[i+8]]);
> printf("\n");
> }
>
> static void initRankStr() // used by printBoards
> {
> for (int i = 0; i < 256; ++i)
> {
> for (int j = 0; j < 8; ++j)
> rankStr[i][j] = binDigit[(i & (1<<j)) != 0];
> rankStr[i][j] = '\0';
> }
> }
>
>protected:
> union
> {
> __m128i dbl; // this intrinsice type is wrapped here
> BitBoard sgl[2];
> };
>protected:
> static const char binDigit[2];
> static char rankStr[256][10];
>};
>
>char BBPair::rankStr[256][10];
>const char BBPair::binDigit[2] = { '°','\333'}; // { '0','1'};
>
>class XMM : public BBPair
>{
>public:
> __forceinline XMM(){}
> __forceinline XMM(const BitBoard &low, const BitBoard &high) {
> dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;}
>
> __forceinline XMM(const BitBoard &low) {dbl.m128i_u64[0] = low;}
>
> __forceinline XMM(__m128i a){dbl = a;}
> __forceinline XMM(const BBPair& a){dbl = a.dbl;}
> __forceinline XMM(const __m128i* ptr){dbl = _mm_load_si128(ptr);}
> __forceinline XMM(const BBPair* ptr){dbl = _mm_load_si128(&ptr->dbl);}
>
> __forceinline BitBoard getLowBoard() const {return dbl.m128i_u64[0];}
> __forceinline BitBoard getHighBoard() const {return dbl.m128i_u64[1];}
>
> __forceinline void store(__m128i* ptr) {_mm_store_si128 (ptr, dbl);}
> __forceinline void store(BBPair* ptr) {_mm_store_si128 (&ptr->dbl, dbl);}
>
> __forceinline XMM copyWord0() {return XMM(_mm_shuffle_epi32 (dbl, 0));}
> __forceinline XMM copyWord1() {return XMM(_mm_shuffle_epi32 (dbl, 0x55));}
> __forceinline int getInt16() {return _mm_extract_epi16(dbl,0);}
>
> __forceinline XMM& compare32(const XMM &dbb) {dbl = _mm_cmpeq_epi32(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& addAbsByteDiff(const XMM &dbb) {dbl = _mm_sad_epu8(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& addWords(const XMM &dbb) {dbl =
>_mm_add_epi32(_mm_shuffle_epi32(dbb.dbl,0x56), dbl); return *this;}
>
>
> __forceinline XMM& operator>>=(int shift) { dbl = _mm_srli_epi64(dbl, shift);
>return *this;}
> __forceinline XMM& operator<<=(int shift) {dbl = _mm_slli_epi64(dbl, shift);
>return *this;}
> __forceinline XMM& operator&=(const XMM &dbb) {dbl = _mm_and_si128(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& operator|=(const XMM &dbb) {dbl = _mm_or_si128(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& operator^=(const XMM &dbb) {dbl = _mm_xor_si128(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& operator+=(const XMM &dbb) {dbl = _mm_add_epi8(dbl,
>dbb.dbl); return *this;}
> __forceinline XMM& operator-=(const XMM &dbb) {dbl = _mm_sub_epi8(dbl,
>dbb.dbl); return *this;}
>
> __forceinline XMM notA() const {return XMM(_mm_and_si128(dbl, snotA));}
> __forceinline XMM notH() const {return XMM(_mm_and_si128(dbl, snotH));}
>
> __forceinline friend XMM operator>>(const XMM &dbb, int shift) {return
>XMM(_mm_srli_epi64(dbb.dbl, shift));}
> __forceinline friend XMM operator<<(const XMM &dbb, int shift) {return
>XMM(_mm_slli_epi64(dbb.dbl, shift));}
> __forceinline friend XMM operator&(const XMM &a, const XMM &b) {return
>XMM(_mm_and_si128(a.dbl, b.dbl));}
> __forceinline friend XMM operator|(const XMM &a, const XMM &b) {return
>XMM(_mm_or_si128(a.dbl, b.dbl));}
> __forceinline friend XMM operator^(const XMM &a, const XMM &b) {return
>XMM(_mm_xor_si128(a.dbl, b.dbl));}
> __forceinline friend XMM operator+(const XMM &a, const XMM &b) {return
>XMM(_mm_add_epi8(a.dbl, b.dbl));}
> __forceinline friend XMM operator-(const XMM &a, const XMM &b) {return
>XMM(_mm_sub_epi8(a.dbl, b.dbl));}
> __forceinline friend XMM operator~(const XMM &a) {return
>XMM(_mm_xor_si128(a.dbl, minusOne));}
>
>
>
>protected:
> static const __m128i snotA;
> static const __m128i snotH;
> static const __m128i minusOne;
>
>};
>
>const __m128i XMM::snotA =
>{
> '\376', '\376', '\376', '\376',
> '\376', '\376', '\376', '\376',
> '\376', '\376', '\376', '\376',
> '\376', '\376', '\376', '\376',
>};
>
>const __m128i XMM::snotH =
>{
> '\177', '\177', '\177', '\177',
> '\177', '\177', '\177', '\177',
> '\177', '\177', '\177', '\177',
> '\177', '\177', '\177', '\177',
>};
>
>const __m128i XMM::minusOne =
>{
> '\377', '\377', '\377', '\377',
> '\377', '\377', '\377', '\377',
> '\377', '\377', '\377', '\377',
> '\377', '\377', '\377', '\377',
>};
>
>class GPR : public BBPair
>{
>public:
> __forceinline GPR(){}
> __forceinline GPR(const BitBoard &low, const BitBoard &high) {sgl[0] = low;
>sgl[1] = high;}
> __forceinline GPR(const BBPair& a) {sgl[0] = a.sgl[0]; sgl[1] = a.sgl[1];}
> __forceinline GPR(const BBPair* ptr){sgl[0] = ptr->sgl[0]; sgl[1] =
>ptr->sgl[1];}
>
> __forceinline BitBoard getLowBoard() const {return sgl[0];}
> __forceinline BitBoard getHighBoard() const {return sgl[1];}
>
> __forceinline void store(BBPair* ptr) {ptr->sgl[0] = sgl[0]; ptr->sgl[1] =
>sgl[1];}
>
> __forceinline GPR& operator>>=(int shift) { sgl[0]>>=shift; sgl[1]>>=shift;
>return *this;}
> __forceinline GPR& operator<<=(int shift) { sgl[0]<<=shift; sgl[1]<<=shift;
>return *this;}
> __forceinline GPR& operator&=(const GPR &dbb) {sgl[0]&=dbb.sgl[0];
>sgl[1]&=dbb.sgl[1]; return *this;}
> __forceinline GPR& operator|=(const GPR &dbb) {sgl[0]|=dbb.sgl[0];
>sgl[1]|=dbb.sgl[1]; return *this;}
> __forceinline GPR& operator^=(const GPR &dbb) {sgl[0]^=dbb.sgl[0];
>sgl[1]^=dbb.sgl[1]; return *this;}
>
> __forceinline GPR notA() const {
> return GPR(sgl[0]&0xfefefefefefefefe, sgl[1]&0xfefefefefefefefe);}
>
> __forceinline GPR notH() const {
> return GPR(sgl[0]&0x7f7f7f7f7f7f7f7f, sgl[1]&0x7f7f7f7f7f7f7f7f);}
>
> __forceinline friend GPR operator>>(const GPR &dbb, int shift) {
> return GPR(dbb.sgl[0]>>shift, dbb.sgl[1]>>shift);}
> __forceinline friend GPR operator<<(const GPR &dbb, int shift) {
> return GPR(dbb.sgl[0]<<shift, dbb.sgl[1]<<shift);}
> __forceinline friend GPR operator&(const GPR &a, const GPR &b) {
> return GPR(a.sgl[0]&b.sgl[0], a.sgl[1]&b.sgl[1]);}
> __forceinline friend GPR operator|(const GPR &a, const GPR &b) {
> return GPR(a.sgl[0]|b.sgl[0], a.sgl[1]|b.sgl[1]);}
> __forceinline friend GPR operator^(const GPR &a, const GPR &b) {
> return GPR(a.sgl[0]^b.sgl[0], a.sgl[1]^b.sgl[1]);}
>
> __forceinline friend GPR operator-(const GPR &a, const GPR &b)
> {
> GPR c;
> for (int i=0; i < 16; i++)
> c.dbl.m128i_u8[i] = a.dbl.m128i_u8[i] - b.dbl.m128i_u8[i];
> return c;
> }
>
> __forceinline friend GPR operator~(const GPR &a) {
> return GPR(~a.sgl[0], ~a.sgl[1]);}
>
>};
>
>
>struct sSource
>{
> sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {}
> BBPair rooks;
> BBPair occup;
>};
>
>struct sTarget
>{
> BBPair up;
> BBPair down;
>};
>
>template <class T> void updownAttacks(sTarget* pTarget, const sSource* pSource)
>{
> T gu(&pSource->rooks);
> T pu(&pSource->occup);
> pu = ~pu; // empty
> T gd(gu);
> T pd(pu);
> gu |= pu & (gu<<8);
> gd |= pd & (gd>>8);
> pu &= pu<<8;
> pd &= pd>>8;
> gu |= pu & (gu<<16);
> gd |= pd & (gd>>16);
> pu &= pu<<16;
> pd &= pd>>16;
> gu |= pu & (gu<<32);
> gd |= pd & (gd>>32);
> (gu<<8).store(&pTarget->up);
> (gd>>8).store(&pTarget->down);
>}
>
>int main(int argc, char* argv[])
>{
> BBPair::initRankStr();
>
> sSource source1(0x0000200000000000, 0x0102040810204088);
> sSource source2(0x0000004000000000, 0x0102040810204088);
> sTarget target1;
> sTarget target2;
>
> updownAttacks<GPR>(&target1, &source1);
> updownAttacks<XMM>(&target2, &source2);
>
> target1.down.printBoards();
> target1.up.printBoards();
> target2.down.printBoards();
> target2.up.printBoards();
>
> return 0;
>}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.