Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Thanks

Author: Eugene Nalimov

Date: 09:57:09 03/31/04

Go up one level in this thread


On March 31, 2004 at 04:15:21, Gerd Isenberg wrote:

>Looks fine ;-)
>
>Curious about call __chkstk in isDeBruijnN, but not in the recursive function
>genDeBruijn. Yes, isDeBruijnN has a local 4KByte array on the frame, and has to
>clear it too, so under runtime considerations call __chkstk doesn't matter much.
>Instead of call memset i would prefere an inlined intrinsic of that e.g. with a
>8-byte aligned bool array and REP STOSQ with rcx=4096/8. I guess there are some
>additional compiler flags...
>
>If you have some additional time, it would be nice to see the assembly of a
>kogge-stone filler with a bit more register pressure:
>
>Thanks again,
>Gerd

_chkstk() call is necessary if function allocates more than 4k on stack.

memset() call is faster than REP STOSQ. Trust me. BTW, the old version of the
compiler would generate REP STOSQ.

And here is your assembly:

; Listing generated by Microsoft (R) Optimizing Compiler Version 14.00.40316

include listing.inc

INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES

PUBLIC	?minusOne@XMM@@1T__m128i@@B			; XMM::minusOne
PUBLIC	?snotH@XMM@@1T__m128i@@B			; XMM::snotH
PUBLIC	?snotA@XMM@@1T__m128i@@B			; XMM::snotA
PUBLIC	?binDigit@BBPair@@1QBDB				; BBPair::binDigit
PUBLIC	?rankStr@BBPair@@1PAY09DA			; BBPair::rankStr
_BSS	SEGMENT
?rankStr@BBPair@@1PAY09DA DB 0a00H DUP (?)		; BBPair::rankStr
_BSS	ENDS
CONST	SEGMENT
?binDigit@BBPair@@1QBDB DB 0b0H				; BBPair::binDigit
	DB	0dbH
	ORG $+14
?snotA@XMM@@1T__m128i@@B DB 0feH			; XMM::snotA
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
	DB	0feH
?snotH@XMM@@1T__m128i@@B DB 07fH			; XMM::snotH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
	DB	07fH
?minusOne@XMM@@1T__m128i@@B DB 0ffH			; XMM::minusOne
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
	DB	0ffH
CONST	ENDS
PUBLIC	??0BBPair@@QEAA@XZ				; BBPair::BBPair
; Function compile flags: /Ogtpy
;	COMDAT ??0BBPair@@QEAA@XZ
_TEXT	SEGMENT
this$ = 8
??0BBPair@@QEAA@XZ PROC					; BBPair::BBPair, COMDAT
; File c:\repro\q.cpp
; Line 12
	mov	rax, rcx
	ret	0
??0BBPair@@QEAA@XZ ENDP					; BBPair::BBPair
_TEXT	ENDS
PUBLIC	??0BBPair@@QEAA@AEB_K0@Z			; BBPair::BBPair
; Function compile flags: /Ogtpy
;	COMDAT ??0BBPair@@QEAA@AEB_K0@Z
_TEXT	SEGMENT
this$ = 8
low$ = 16
high$ = 24
??0BBPair@@QEAA@AEB_K0@Z PROC				; BBPair::BBPair, COMDAT
; Line 14
	mov	rax, QWORD PTR [rdx]
	mov	QWORD PTR [rcx], rax
	mov	rax, QWORD PTR [r8]
	mov	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??0BBPair@@QEAA@AEB_K0@Z ENDP				; BBPair::BBPair
_TEXT	ENDS
PUBLIC	??_C@_01EEMJAFIK@?6?$AA@			; `string'
PUBLIC	??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@		; `string'
PUBLIC	?printBoards@BBPair@@QEAAXXZ			; BBPair::printBoards
EXTRN	printf:PROC
;	COMDAT ??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
CONST	SEGMENT
??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@ DB '%s %s', 0aH, 00H ; `string'
CONST	ENDS
;	COMDAT ??_C@_01EEMJAFIK@?6?$AA@
CONST	SEGMENT
??_C@_01EEMJAFIK@?6?$AA@ DB 0aH, 00H			; `string'
CONST	ENDS
;	COMDAT xdata
xdata	SEGMENT
$unwind$?printBoards@BBPair@@QEAAXXZ DD 071301H
	DD	047413H
	DD	05640eH
	DD	063409H
	DD	06204H
xdata	ENDS
;	COMDAT pdata
pdata	SEGMENT
$pdata$?printBoards@BBPair@@QEAAXXZ DD @imagerel($LN8#)
	DD	@imagerel($LN8#+121)
	DD	@imagerel($unwind$?printBoards@BBPair@@QEAAXXZ#)
; Function compile flags: /Ogtpy
pdata	ENDS
;	COMDAT ?printBoards@BBPair@@QEAAXXZ
_TEXT	SEGMENT
this$ = 64
?printBoards@BBPair@@QEAAXXZ PROC			; BBPair::printBoards, COMDAT
; Line 18
$LN8:
	sub	rsp, 56					; 00000038H
	mov	QWORD PTR [rsp+48], rbx
	mov	QWORD PTR [rsp+40], rsi
	mov	QWORD PTR [rsp+32], rdi
	mov	rdi, rcx
; Line 19
	mov	ebx, 7
	lea	rsi, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
	npad	14
$LL3@printBoard:
; Line 20
	movzx	eax, BYTE PTR [rbx+rdi+8]
	lea	rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
	lea	rdx, QWORD PTR [rax+rax*4]
	movzx	eax, BYTE PTR [rbx+rdi]
	lea	r8, QWORD PTR [rsi+rdx*2]
	lea	rdx, QWORD PTR [rax+rax*4]
	lea	rdx, QWORD PTR [rsi+rdx*2]
	call	printf
	dec	rbx
	jns	SHORT $LL3@printBoard
; Line 21
	lea	rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
	mov	rdi, QWORD PTR [rsp+32]
	mov	rsi, QWORD PTR [rsp+40]
	mov	rbx, QWORD PTR [rsp+48]
; Line 22
	add	rsp, 56					; 00000038H
	jmp	printf
?printBoards@BBPair@@QEAAXXZ ENDP			; BBPair::printBoards
_TEXT	ENDS
PUBLIC	?initRankStr@BBPair@@SAXXZ			; BBPair::initRankStr
; Function compile flags: /Ogtpy
;	COMDAT ?initRankStr@BBPair@@SAXXZ
_TEXT	SEGMENT
?initRankStr@BBPair@@SAXXZ PROC				; BBPair::initRankStr, COMDAT
; Line 26
	xor	ecx, ecx
	lea	rdx, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
	lea	r8, OFFSET FLAT:?binDigit@BBPair@@1QBDB	; BBPair::binDigit
$LL6@initRankSt:
; Line 29
	mov	eax, ecx
	add	rdx, 10
	and	eax, 1
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-10], al
	mov	eax, ecx
; Line 30
	mov	BYTE PTR [rdx-2], 0
	and	eax, 2
	shr	rax, 1
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-9], al
	mov	eax, ecx
	and	eax, 4
	shr	rax, 2
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-8], al
	mov	eax, ecx
	and	eax, 8
	shr	rax, 3
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-7], al
	mov	eax, ecx
	and	eax, 16
	shr	rax, 4
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-6], al
	mov	eax, ecx
	and	eax, 32					; 00000020H
	shr	rax, 5
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-5], al
	mov	eax, ecx
	and	eax, 64					; 00000040H
	shr	rax, 6
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-4], al
	mov	eax, ecx
	inc	ecx
	and	eax, 128				; 00000080H
	shr	rax, 7
	cmp	ecx, 256				; 00000100H
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-3], al
	jl	$LL6@initRankSt
; Line 32
	ret	0
?initRankStr@BBPair@@SAXXZ ENDP				; BBPair::initRankStr
_TEXT	ENDS
PUBLIC	??0XMM@@QEAA@T__m128i@@@Z			; XMM::XMM
; Function compile flags: /Ogtpy
;	COMDAT ??0XMM@@QEAA@T__m128i@@@Z
_TEXT	SEGMENT
this$ = 8
a$ = 16
??0XMM@@QEAA@T__m128i@@@Z PROC				; XMM::XMM, COMDAT
; Line 57
	movdqa	xmm0, XMMWORD PTR [rdx]
	mov	rax, rcx
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??0XMM@@QEAA@T__m128i@@@Z ENDP				; XMM::XMM
_TEXT	ENDS
PUBLIC	??0XMM@@QEAA@PEBVBBPair@@@Z			; XMM::XMM
; Function compile flags: /Ogtpy
;	COMDAT ??0XMM@@QEAA@PEBVBBPair@@@Z
_TEXT	SEGMENT
this$ = 8
ptr$ = 16
??0XMM@@QEAA@PEBVBBPair@@@Z PROC			; XMM::XMM, COMDAT
; Line 60
	movdqa	xmm0, XMMWORD PTR [rdx]
	mov	rax, rcx
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??0XMM@@QEAA@PEBVBBPair@@@Z ENDP			; XMM::XMM
_TEXT	ENDS
PUBLIC	?store@XMM@@QEAAXPEAVBBPair@@@Z			; XMM::store
; Function compile flags: /Ogtpy
;	COMDAT ?store@XMM@@QEAAXPEAVBBPair@@@Z
_TEXT	SEGMENT
this$ = 8
ptr$ = 16
?store@XMM@@QEAAXPEAVBBPair@@@Z PROC			; XMM::store, COMDAT
; Line 66
	movdqa	xmm0, XMMWORD PTR [rcx]
	movdqa	XMMWORD PTR [rdx], xmm0
	ret	0
?store@XMM@@QEAAXPEAVBBPair@@@Z ENDP			; XMM::store
_TEXT	ENDS
PUBLIC	??_4XMM@@QEAAAEAV0@AEBV0@@Z			; XMM::operator&=
; Function compile flags: /Ogtpy
;	COMDAT ??_4XMM@@QEAAAEAV0@AEBV0@@Z
_TEXT	SEGMENT
this$ = 8
dbb$ = 16
??_4XMM@@QEAAAEAV0@AEBV0@@Z PROC			; XMM::operator&=, COMDAT
; Line 85
	movdqa	xmm0, XMMWORD PTR [rcx]
	mov	rax, rcx
	pand	xmm0, XMMWORD PTR [rdx]
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??_4XMM@@QEAAAEAV0@AEBV0@@Z ENDP			; XMM::operator&=
_TEXT	ENDS
PUBLIC	??_5XMM@@QEAAAEAV0@AEBV0@@Z			; XMM::operator|=
; Function compile flags: /Ogtpy
;	COMDAT ??_5XMM@@QEAAAEAV0@AEBV0@@Z
_TEXT	SEGMENT
this$ = 8
dbb$ = 16
??_5XMM@@QEAAAEAV0@AEBV0@@Z PROC			; XMM::operator|=, COMDAT
; Line 87
	movdqa	xmm0, XMMWORD PTR [rcx]
	mov	rax, rcx
	por	xmm0, XMMWORD PTR [rdx]
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??_5XMM@@QEAAAEAV0@AEBV0@@Z ENDP			; XMM::operator|=
_TEXT	ENDS
PUBLIC	??5@YA?AVXMM@@AEBV0@H@Z				; operator>>
; Function compile flags: /Ogtpy
;	COMDAT ??5@YA?AVXMM@@AEBV0@H@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??5@YA?AVXMM@@AEBV0@H@Z PROC				; operator>>, COMDAT
; Line 99
	movdqa	xmm1, XMMWORD PTR [rdx]
	movd	xmm0, r8d
	mov	rax, rcx
	psrlq	xmm1, xmm0
	movdqa	XMMWORD PTR [rcx], xmm1
	ret	0
??5@YA?AVXMM@@AEBV0@H@Z ENDP				; operator>>
_TEXT	ENDS
PUBLIC	??6@YA?AVXMM@@AEBV0@H@Z				; operator<<
; Function compile flags: /Ogtpy
;	COMDAT ??6@YA?AVXMM@@AEBV0@H@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??6@YA?AVXMM@@AEBV0@H@Z PROC				; operator<<, COMDAT
; Line 101
	movdqa	xmm1, XMMWORD PTR [rdx]
	movd	xmm0, r8d
	mov	rax, rcx
	psllq	xmm1, xmm0
	movdqa	XMMWORD PTR [rcx], xmm1
	ret	0
??6@YA?AVXMM@@AEBV0@H@Z ENDP				; operator<<
_TEXT	ENDS
PUBLIC	??I@YA?AVXMM@@AEBV0@0@Z				; operator&
; Function compile flags: /Ogtpy
;	COMDAT ??I@YA?AVXMM@@AEBV0@0@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
a$ = 16
b$ = 24
??I@YA?AVXMM@@AEBV0@0@Z PROC				; operator&, COMDAT
; Line 103
	movdqa	xmm0, XMMWORD PTR [rdx]
	mov	rax, rcx
	pand	xmm0, XMMWORD PTR [r8]
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??I@YA?AVXMM@@AEBV0@0@Z ENDP				; operator&
_TEXT	ENDS
PUBLIC	??S@YA?AVXMM@@AEBV0@@Z				; operator~
; Function compile flags: /Ogtpy
;	COMDAT ??S@YA?AVXMM@@AEBV0@@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
a$ = 16
??S@YA?AVXMM@@AEBV0@@Z PROC				; operator~, COMDAT
; Line 113
	movdqa	xmm0, XMMWORD PTR [rdx]
	mov	rax, rcx
	pxor	xmm0, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
	movdqa	XMMWORD PTR [rcx], xmm0
	ret	0
??S@YA?AVXMM@@AEBV0@@Z ENDP				; operator~
_TEXT	ENDS
PUBLIC	??0GPR@@QEAA@AEB_K0@Z				; GPR::GPR
; Function compile flags: /Ogtpy
;	COMDAT ??0GPR@@QEAA@AEB_K0@Z
_TEXT	SEGMENT
this$ = 8
low$ = 16
high$ = 24
??0GPR@@QEAA@AEB_K0@Z PROC				; GPR::GPR, COMDAT
; Line 152
	mov	rax, QWORD PTR [rdx]
	mov	QWORD PTR [rcx], rax
; Line 153
	mov	rax, QWORD PTR [r8]
	mov	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??0GPR@@QEAA@AEB_K0@Z ENDP				; GPR::GPR
_TEXT	ENDS
PUBLIC	??0GPR@@QEAA@PEBVBBPair@@@Z			; GPR::GPR
; Function compile flags: /Ogtpy
;	COMDAT ??0GPR@@QEAA@PEBVBBPair@@@Z
_TEXT	SEGMENT
this$ = 8
ptr$ = 16
??0GPR@@QEAA@PEBVBBPair@@@Z PROC			; GPR::GPR, COMDAT
; Line 155
	mov	rax, QWORD PTR [rdx]
	mov	QWORD PTR [rcx], rax
; Line 156
	mov	rax, QWORD PTR [rdx+8]
	mov	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??0GPR@@QEAA@PEBVBBPair@@@Z ENDP			; GPR::GPR
_TEXT	ENDS
PUBLIC	?store@GPR@@QEAAXPEAVBBPair@@@Z			; GPR::store
; Function compile flags: /Ogtpy
;	COMDAT ?store@GPR@@QEAAXPEAVBBPair@@@Z
_TEXT	SEGMENT
this$ = 8
ptr$ = 16
?store@GPR@@QEAAXPEAVBBPair@@@Z PROC			; GPR::store, COMDAT
; Line 161
	mov	rax, QWORD PTR [rcx]
	mov	QWORD PTR [rdx], rax
; Line 162
	mov	rax, QWORD PTR [rcx+8]
	mov	QWORD PTR [rdx+8], rax
	ret	0
?store@GPR@@QEAAXPEAVBBPair@@@Z ENDP			; GPR::store
_TEXT	ENDS
PUBLIC	??_4GPR@@QEAAAEAV0@AEBV0@@Z			; GPR::operator&=
; Function compile flags: /Ogtpy
;	COMDAT ??_4GPR@@QEAAAEAV0@AEBV0@@Z
_TEXT	SEGMENT
this$ = 8
dbb$ = 16
??_4GPR@@QEAAAEAV0@AEBV0@@Z PROC			; GPR::operator&=, COMDAT
; Line 168
	mov	rax, QWORD PTR [rdx]
	and	QWORD PTR [rcx], rax
; Line 169
	mov	rax, QWORD PTR [rdx+8]
	and	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??_4GPR@@QEAAAEAV0@AEBV0@@Z ENDP			; GPR::operator&=
_TEXT	ENDS
PUBLIC	??_5GPR@@QEAAAEAV0@AEBV0@@Z			; GPR::operator|=
; Function compile flags: /Ogtpy
;	COMDAT ??_5GPR@@QEAAAEAV0@AEBV0@@Z
_TEXT	SEGMENT
this$ = 8
dbb$ = 16
??_5GPR@@QEAAAEAV0@AEBV0@@Z PROC			; GPR::operator|=, COMDAT
; Line 170
	mov	rax, QWORD PTR [rdx]
	or	QWORD PTR [rcx], rax
; Line 171
	mov	rax, QWORD PTR [rdx+8]
	or	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??_5GPR@@QEAAAEAV0@AEBV0@@Z ENDP			; GPR::operator|=
_TEXT	ENDS
PUBLIC	??5@YA?AVGPR@@AEBV0@H@Z				; operator>>
; Function compile flags: /Ogtpy
;	COMDAT ??5@YA?AVGPR@@AEBV0@H@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??5@YA?AVGPR@@AEBV0@H@Z PROC				; operator>>, COMDAT
; Line 182
	mov	rax, QWORD PTR [rdx]
	mov	r9, rcx
	movsxd	rcx, r8d
	shr	rax, cl
	mov	QWORD PTR [r9], rax
	mov	rax, QWORD PTR [rdx+8]
	shr	rax, cl
	mov	QWORD PTR [r9+8], rax
	mov	rax, r9
	ret	0
??5@YA?AVGPR@@AEBV0@H@Z ENDP				; operator>>
_TEXT	ENDS
PUBLIC	??6@YA?AVGPR@@AEBV0@H@Z				; operator<<
; Function compile flags: /Ogtpy
;	COMDAT ??6@YA?AVGPR@@AEBV0@H@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
dbb$ = 16
shift$ = 24
??6@YA?AVGPR@@AEBV0@H@Z PROC				; operator<<, COMDAT
; Line 184
	mov	rax, QWORD PTR [rdx]
	mov	r9, rcx
	movsxd	rcx, r8d
	shl	rax, cl
	mov	QWORD PTR [r9], rax
	mov	rax, QWORD PTR [rdx+8]
	shl	rax, cl
	mov	QWORD PTR [r9+8], rax
	mov	rax, r9
	ret	0
??6@YA?AVGPR@@AEBV0@H@Z ENDP				; operator<<
_TEXT	ENDS
PUBLIC	??I@YA?AVGPR@@AEBV0@0@Z				; operator&
; Function compile flags: /Ogtpy
;	COMDAT ??I@YA?AVGPR@@AEBV0@0@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
a$ = 16
b$ = 24
??I@YA?AVGPR@@AEBV0@0@Z PROC				; operator&, COMDAT
; Line 186
	mov	rax, QWORD PTR [rdx]
	and	rax, QWORD PTR [r8]
	mov	QWORD PTR [rcx], rax
	mov	rax, QWORD PTR [rdx+8]
	and	rax, QWORD PTR [r8+8]
	mov	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??I@YA?AVGPR@@AEBV0@0@Z ENDP				; operator&
_TEXT	ENDS
PUBLIC	??S@YA?AVGPR@@AEBV0@@Z				; operator~
; Function compile flags: /Ogtpy
;	COMDAT ??S@YA?AVGPR@@AEBV0@@Z
_TEXT	SEGMENT
__$ReturnUdt$ = 8
a$ = 16
??S@YA?AVGPR@@AEBV0@@Z PROC				; operator~, COMDAT
; Line 201
	mov	rax, QWORD PTR [rdx]
	not	rax
	mov	QWORD PTR [rcx], rax
	mov	rax, QWORD PTR [rdx+8]
	not	rax
	mov	QWORD PTR [rcx+8], rax
	mov	rax, rcx
	ret	0
??S@YA?AVGPR@@AEBV0@@Z ENDP				; operator~
_TEXT	ENDS
PUBLIC	??0sSource@@QEAA@_K0@Z				; sSource::sSource
; Function compile flags: /Ogtpy
;	COMDAT ??0sSource@@QEAA@_K0@Z
_TEXT	SEGMENT
this$ = 8
r1$ = 16
r2$ = 24
??0sSource@@QEAA@_K0@Z PROC				; sSource::sSource, COMDAT
; Line 208
	mov	QWORD PTR [rcx], rdx
	or	rdx, r8
	mov	QWORD PTR [rcx+8], r8
	mov	QWORD PTR [rcx+16], rdx
	mov	QWORD PTR [rcx+24], rdx
	mov	rax, rcx
	ret	0
??0sSource@@QEAA@_K0@Z ENDP				; sSource::sSource
_TEXT	ENDS
PUBLIC	??0sTarget@@QEAA@XZ				; sTarget::sTarget
; Function compile flags: /Ogtpy
;	COMDAT ??0sTarget@@QEAA@XZ
_TEXT	SEGMENT
this$ = 8
??0sTarget@@QEAA@XZ PROC				; sTarget::sTarget, COMDAT
	mov	rax, rcx
	ret	0
??0sTarget@@QEAA@XZ ENDP				; sTarget::sTarget
_TEXT	ENDS
PUBLIC	??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ;
updownAttacks<GPR>
;	COMDAT xdata
xdata	SEGMENT
$unwind$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z DD 0b3201H
	DD	06c432H
	DD	07742aH
	DD	08641eH
	DD	095417H
	DD	0a3413H
	DD	0a207H
xdata	ENDS
;	COMDAT pdata
pdata	SEGMENT
$pdata$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z DD
@imagerel($LN149#)
	DD	@imagerel($LN149#+408)
	DD	@imagerel($unwind$??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z#)
; Function compile flags: /Ogtpy
pdata	ENDS
;	COMDAT ??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z
_TEXT	SEGMENT
$T2643 = 0
pu$ = 0
gd$ = 16
pd$ = 32
pTarget$ = 96
pSource$ = 104
??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z PROC ;
updownAttacks<GPR>, COMDAT
; Line 220
$LN149:
	mov	rax, rsp
	sub	rsp, 88					; 00000058H
; Line 222
	mov	r8, QWORD PTR [rdx+16]
	mov	r9, QWORD PTR [rdx+24]
	mov	QWORD PTR [rax-8], rbx
	mov	QWORD PTR [rax-16], rbp
	mov	rbp, QWORD PTR [rdx]
	mov	QWORD PTR [rax-24], rsi
	mov	rsi, QWORD PTR [rdx+8]
	mov	QWORD PTR [rax-72], rbp
	mov	QWORD PTR [rax-32], rdi
	mov	QWORD PTR [rax-64], rsi
	mov	QWORD PTR [rax-40], r12
	mov	r12, rcx
; Line 224
	movaps	xmm0, XMMWORD PTR [rax-72]
	movdqa	XMMWORD PTR [rax-72], xmm0
; Line 227
	mov	rbx, QWORD PTR gd$[rsp+8]
	mov	rdi, QWORD PTR gd$[rsp]
	not	r8
	not	r9
	mov	rcx, rbp
	shl	rcx, 8
	mov	QWORD PTR $T2643[rsp], r8
	mov	QWORD PTR [rax-80], r9
	movaps	xmm1, XMMWORD PTR $T2643[rsp]
	movdqa	XMMWORD PTR [rax-56], xmm1
	and	rcx, r8
; Line 228
	mov	r10, r8
	or	rbp, rcx
; Line 229
	mov	r11, r8
	mov	rax, rsi
	movdqa	XMMWORD PTR pu$[rsp], xmm1
	mov	rdx, QWORD PTR pu$[rsp+8]
	shl	rax, 8
	mov	rcx, rdi
	and	rax, r9
	shr	rcx, 8
	shl	r10, 8
	or	rsi, rax
	and	rcx, r8
	and	r10, r8
	or	rdi, rcx
	mov	rax, rbx
	shr	r11, 8
	shr	rax, 8
	and	r11, r8
	mov	r8, QWORD PTR pd$[rsp+8]
	and	rax, r9
; Line 230
	mov	rcx, rbp
	or	rbx, rax
	shl	rcx, 16
	mov	rax, r9
	shl	rax, 8
	and	rcx, r10
	shr	r9, 8
	and	rdx, rax
	or	rbp, rcx
	and	r8, r9
	mov	rax, rsi
; Line 231
	mov	rcx, rdi
	shl	rax, 16
	shr	rcx, 16
	and	rax, rdx
	and	rcx, r11
	or	rsi, rax
	or	rdi, rcx
	mov	rax, rbx
	shr	rax, 16
; Line 232
	mov	rcx, r10
	and	rax, r8
	shl	rcx, 16
	or	rbx, rax
	and	r10, rcx
	mov	rax, rdx
	shl	rax, 16
; Line 233
	mov	rcx, r11
	and	rdx, rax
	shr	rcx, 16
	mov	rax, r8
	and	r11, rcx
	shr	rax, 16
; Line 234
	mov	rcx, rbp
	and	r8, rax
	shl	rcx, 32					; 00000020H
	mov	rax, rsi
	shl	rax, 32					; 00000020H
	and	rcx, r10
	or	rbp, rcx
	and	rax, rdx
; Line 235
	mov	rcx, rdi
	or	rsi, rax
	mov	rax, rbx
; Line 236
	shl	rbp, 8
	mov	QWORD PTR [r12], rbp
	mov	rbp, QWORD PTR [rsp+72]
	shl	rsi, 8
	mov	QWORD PTR [r12+8], rsi
	mov	rsi, QWORD PTR [rsp+64]
	shr	rcx, 32					; 00000020H
	shr	rax, 32					; 00000020H
	and	rcx, r11
	or	rdi, rcx
	and	rax, r8
; Line 237
	shr	rdi, 8
	or	rbx, rax
	shr	rbx, 8
	mov	QWORD PTR [r12+16], rdi
	mov	rdi, QWORD PTR [rsp+56]
	mov	QWORD PTR [r12+24], rbx
	mov	r12, QWORD PTR [rsp+48]
	mov	rbx, QWORD PTR [rsp+80]
; Line 238
	add	rsp, 88					; 00000058H
	ret	0
??$updownAttacks@VGPR@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ENDP ;
updownAttacks<GPR>
_TEXT	ENDS
PUBLIC	??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ;
updownAttacks<XMM>
; Function compile flags: /Ogtpy
;	COMDAT ??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z
_TEXT	SEGMENT
pTarget$ = 8
pSource$ = 16
??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z PROC ;
updownAttacks<XMM>, COMDAT
; Line 221
	movdqa	xmm5, XMMWORD PTR [rdx]
; Line 222
	movdqa	xmm4, XMMWORD PTR [rdx+16]
; Line 226
	movdqa	xmm0, xmm5
	movdqa	xmm3, xmm5
	pxor	xmm4, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
	movdqa	xmm1, xmm4
; Line 228
	movdqa	xmm2, xmm4
	psllq	xmm0, 8
	pand	xmm1, xmm0
	movdqa	xmm0, xmm5
	por	xmm3, xmm1
	psrlq	xmm0, 8
	movdqa	xmm1, xmm4
	pand	xmm1, xmm0
	movdqa	xmm0, xmm4
	por	xmm5, xmm1
	psllq	xmm0, 8
	pand	xmm2, xmm0
; Line 229
	movdqa	xmm0, xmm4
	psrlq	xmm0, 8
; Line 230
	movdqa	xmm1, xmm2
	pand	xmm4, xmm0
	movdqa	xmm0, xmm3
	psllq	xmm0, 16
	pand	xmm1, xmm0
	por	xmm3, xmm1
; Line 231
	movdqa	xmm0, xmm5
	movdqa	xmm1, xmm4
	psrlq	xmm0, 16
	pand	xmm1, xmm0
	por	xmm5, xmm1
; Line 232
	movdqa	xmm0, xmm2
; Line 234
	movdqa	xmm1, xmm3
	psllq	xmm0, 16
	psllq	xmm1, 32				; 00000020H
	pand	xmm2, xmm0
	pand	xmm2, xmm1
	movdqa	xmm0, xmm4
; Line 235
	movdqa	xmm1, xmm5
	psrlq	xmm0, 16
	psrlq	xmm1, 32				; 00000020H
	por	xmm3, xmm2
	pand	xmm4, xmm0
; Line 236
	psllq	xmm3, 8
	pand	xmm4, xmm1
	movdqa	XMMWORD PTR [rcx], xmm3
	por	xmm5, xmm4
; Line 237
	psrlq	xmm5, 8
	movdqa	XMMWORD PTR [rcx+16], xmm5
; Line 238
	ret	0
??$updownAttacks@VXMM@@@@YAXPEAUsTarget@@PEBUsSource@@@Z ENDP ;
updownAttacks<XMM>
_TEXT	ENDS
PUBLIC	main
xdata	SEGMENT
$unwind$main DD	030901H
	DD	0d6409H
	DD	0e204H
xdata	ENDS
pdata	SEGMENT
$pdata$main DD	@imagerel($LN359#)
	DD	@imagerel($LN359#+276)
	DD	@imagerel($unwind$main#)
pdata	ENDS
xdata	SEGMENT
$chain$1$main DD 020521H
	DD	0e3405H
	DD	@imagerel($LN359#)
	DD	@imagerel($LN359#+276)
	DD	@imagerel($unwind$main#)
xdata	ENDS
pdata	SEGMENT
$pdata$1$main DD @imagerel($LN359#+276)
	DD	@imagerel($LN359#+303)
	DD	@imagerel($chain$1$main#)
pdata	ENDS
xdata	SEGMENT
$chain$2$main DD 020521H
	DD	0c7405H
	DD	@imagerel($LN359#+276)
	DD	@imagerel($LN359#+303)
	DD	@imagerel($chain$1$main#)
xdata	ENDS
pdata	SEGMENT
$pdata$2$main DD @imagerel($LN359#+303)
	DD	@imagerel($LN359#+784)
	DD	@imagerel($chain$2$main#)
pdata	ENDS
xdata	SEGMENT
$chain$3$main DD 021H
	DD	@imagerel($LN359#+276)
	DD	@imagerel($LN359#+303)
	DD	@imagerel($chain$1$main#)
xdata	ENDS
pdata	SEGMENT
$pdata$3$main DD @imagerel($LN359#+784)
	DD	@imagerel($LN359#+856)
	DD	@imagerel($chain$3$main#)
; Function compile flags: /Ogtpy
pdata	ENDS
_TEXT	SEGMENT
target1$ = 32
target2$ = 64
source2$ = 64
argc$ = 128
argv$ = 136
main	PROC
; Line 241
$LN359:
	sub	rsp, 120				; 00000078H
	mov	QWORD PTR [rsp+104], rsi
; Line 242
	lea	rsi, OFFSET FLAT:?rankStr@BBPair@@1PAY09DA ; BBPair::rankStr
	xor	ecx, ecx
	mov	rdx, rsi
	lea	r8, OFFSET FLAT:?binDigit@BBPair@@1QBDB	; BBPair::binDigit
	npad	4
$LL8@main:
	mov	eax, ecx
	add	rdx, 10
	and	eax, 1
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-10], al
	mov	eax, ecx
	mov	BYTE PTR [rdx-2], 0
	and	eax, 2
	shr	rax, 1
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-9], al
	mov	eax, ecx
	and	eax, 4
	shr	rax, 2
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-8], al
	mov	eax, ecx
	and	eax, 8
	shr	rax, 3
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-7], al
	mov	eax, ecx
	and	eax, 16
	shr	rax, 4
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-6], al
	mov	eax, ecx
	and	eax, 32					; 00000020H
	shr	rax, 5
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-5], al
	mov	eax, ecx
	and	eax, 64					; 00000040H
	shr	rax, 6
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-4], al
	mov	eax, ecx
	inc	ecx
	and	eax, 128				; 00000080H
	shr	rax, 7
	cmp	ecx, 256				; 00000100H
	movzx	eax, BYTE PTR [rax+r8]
	mov	BYTE PTR [rdx-3], al
	jl	$LL8@main
; Line 245
	mov	rax, 274877906944			; 0000004000000000H
	mov	rcx, 72624976668147848			; 0102040810204088H
; Line 249
	mov	rdx, 71171198281555767			; 00fcd9d3e7cf9f37H
	mov	QWORD PTR source2$[rsp], rax
	mov	QWORD PTR source2$[rsp+8], rcx
	mov	rax, 72625251546054792			; 0102044810204088H
; Line 250
	movdqa	xmm5, XMMWORD PTR source2$[rsp]
	mov	QWORD PTR source2$[rsp+16], rax
	mov	QWORD PTR source2$[rsp+24], rax
	movdqa	xmm0, xmm5
	movdqa	xmm3, xmm5
	psllq	xmm0, 8
	mov	rax, 283691315109952			; 0001020408102040H
	mov	QWORD PTR [rsp+112], rbx
	mov	ebx, 7
	movdqa	xmm4, XMMWORD PTR source2$[rsp+16]
	pxor	xmm4, XMMWORD PTR ?minusOne@XMM@@1T__m128i@@B ; XMM::minusOne
	or	rcx, rax
	mov	QWORD PTR [rsp+96], rdi
	mov	rdi, rbx
	movdqa	xmm1, xmm4
	movdqa	xmm2, xmm4
	pand	xmm1, xmm0
	movdqa	xmm0, xmm5
	por	xmm3, xmm1
	psrlq	xmm0, 8
	movdqa	xmm1, xmm4
	mov	rax, rcx
	pand	xmm1, xmm0
	movdqa	xmm0, xmm4
	shr	rax, 16
	por	xmm5, xmm1
	psllq	xmm0, 8
	and	rax, rdx
	or	rcx, rax
	mov	rdx, 896604014343			; 000000d0c1c38707H
	pand	xmm2, xmm0
	mov	rax, rcx
	shr	rax, 32					; 00000020H
	movdqa	xmm0, xmm4
	psrlq	xmm0, 8
	and	rax, rdx
	movdqa	xmm1, xmm2
	pand	xmm4, xmm0
	movdqa	xmm0, xmm3
	or	rcx, rax
	mov	rax, 2314850208468434944		; 2020000000000000H
	shr	rcx, 8
	psllq	xmm0, 16
	pand	xmm1, xmm0
	mov	QWORD PTR target1$[rsp], rax
	mov	rax, -2387759954472171520		; dedcf8f8e8c88800H
	por	xmm3, xmm1
	movdqa	xmm0, xmm5
	movdqa	xmm1, xmm4
	psrlq	xmm0, 16
	mov	QWORD PTR target1$[rsp+8], rax
	mov	rax, 137977921536			; 0000002020200000H
	pand	xmm1, xmm0
	por	xmm5, xmm1
	mov	QWORD PTR target1$[rsp+16], rax
	mov	QWORD PTR target1$[rsp+24], rcx
	movdqa	xmm0, xmm2
	movdqa	xmm1, xmm3
	psllq	xmm0, 16
	psllq	xmm1, 32				; 00000020H
	pand	xmm2, xmm0
	pand	xmm2, xmm1
	movdqa	xmm0, xmm4
	movdqa	xmm1, xmm5
	psrlq	xmm0, 16
	psrlq	xmm1, 32				; 00000020H
	por	xmm3, xmm2
	pand	xmm4, xmm0
	psllq	xmm3, 8
	pand	xmm4, xmm1
	movdqa	XMMWORD PTR target2$[rsp], xmm3
	por	xmm5, xmm4
	psrlq	xmm5, 8
	movdqa	XMMWORD PTR target2$[rsp+16], xmm5
	npad	4
; Line 252
$LL321@main:
	movzx	eax, BYTE PTR target1$[rsp+rdi+24]
	lea	rcx, QWORD PTR [rax+rax*4]
	movzx	eax, BYTE PTR target1$[rsp+rdi+16]
	lea	r8, QWORD PTR [rsi+rcx*2]
	lea	rcx, QWORD PTR [rax+rax*4]
	lea	rdx, QWORD PTR [rsi+rcx*2]
	lea	rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
	call	printf
	dec	rdi
	jns	SHORT $LL321@main
	lea	rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
	call	printf
	mov	rdi, rbx
	npad	6
; Line 253
$LL326@main:
	movzx	eax, BYTE PTR target1$[rsp+rdi+8]
	lea	rcx, QWORD PTR [rax+rax*4]
	movzx	eax, BYTE PTR target1$[rsp+rdi]
	lea	r8, QWORD PTR [rsi+rcx*2]
	lea	rcx, QWORD PTR [rax+rax*4]
	lea	rdx, QWORD PTR [rsi+rcx*2]
	lea	rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
	call	printf
	dec	rdi
	jns	SHORT $LL326@main
	lea	rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
	call	printf
	mov	rdi, rbx
	npad	6
; Line 254
$LL331@main:
	movzx	eax, BYTE PTR target2$[rsp+rdi+24]
	lea	rcx, QWORD PTR [rax+rax*4]
	movzx	eax, BYTE PTR target2$[rsp+rdi+16]
	lea	r8, QWORD PTR [rsi+rcx*2]
	lea	rcx, QWORD PTR [rax+rax*4]
	lea	rdx, QWORD PTR [rsi+rcx*2]
	lea	rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
	call	printf
	dec	rdi
	jns	SHORT $LL331@main
	lea	rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
	call	printf
	mov	rdi, QWORD PTR [rsp+96]
	npad	4
; Line 255
$LL336@main:
	movzx	eax, BYTE PTR target2$[rsp+rbx+8]
	lea	rcx, OFFSET FLAT:??_C@_06IINFAHFO@?$CFs?5?$CFs?6?$AA@
	lea	rdx, QWORD PTR [rax+rax*4]
	movzx	eax, BYTE PTR target2$[rsp+rbx]
	lea	r8, QWORD PTR [rsi+rdx*2]
	lea	rdx, QWORD PTR [rax+rax*4]
	lea	rdx, QWORD PTR [rsi+rdx*2]
	call	printf
	dec	rbx
	jns	SHORT $LL336@main
	lea	rcx, OFFSET FLAT:??_C@_01EEMJAFIK@?6?$AA@
	call	printf
	mov	rsi, QWORD PTR [rsp+104]
	mov	rbx, QWORD PTR [rsp+112]
; Line 257
	xor	eax, eax
; Line 258
	add	rsp, 120				; 00000078H
	ret	0
main	ENDP
_TEXT	ENDS
END

>#include <stdio.h>
>#include <emmintrin.h>
>
>typedef unsigned __int64 BitBoard;
>
>
>class BBPair
>{
>	friend class XMM;
>	friend class GPR;
>public:
>	__forceinline BBPair(){}
>	__forceinline BBPair(const BitBoard &low, const BitBoard &high) {
>		sgl[0] = low; sgl[1] = high;}
>
>	// some output routines
>	void printBoards()
>	{
>		for ( int i = 7; i >= 0; --i)
>			printf("%s %s\n", rankStr[dbl.m128i_u8[i]],rankStr[dbl.m128i_u8[i+8]]);
>		printf("\n");
>	}
>
>	static void initRankStr() // used by printBoards
>	{
>		for (int i = 0; i < 256; ++i)
>		{
>			for (int j = 0; j < 8; ++j)
>				rankStr[i][j]   = binDigit[(i & (1<<j)) != 0];
>			rankStr[i][j] = '\0';
>		}
>	}
>
>protected:
>	union
>	{
>		__m128i	 dbl; // this intrinsice type is wrapped here
>		BitBoard sgl[2];
>	};
>protected:
>	static const char binDigit[2];
>	static char rankStr[256][10];
>};
>
>char BBPair::rankStr[256][10];
>const char BBPair::binDigit[2] = { '°','\333'};	// { '0','1'};
>
>class XMM : public BBPair
>{
>public:
>	__forceinline XMM(){}
>	__forceinline XMM(const BitBoard &low, const BitBoard &high) {
>		dbl.m128i_u64[0] = low; dbl.m128i_u64[1] = high;}
>
>	__forceinline XMM(const BitBoard &low) {dbl.m128i_u64[0] = low;}
>
>	__forceinline XMM(__m128i a){dbl = a;}
>	__forceinline XMM(const BBPair& a){dbl = a.dbl;}
>	__forceinline XMM(const __m128i* ptr){dbl = _mm_load_si128(ptr);}
>	__forceinline XMM(const BBPair* ptr){dbl = _mm_load_si128(&ptr->dbl);}
>
>	__forceinline BitBoard getLowBoard() const {return dbl.m128i_u64[0];}
>	__forceinline BitBoard getHighBoard() const {return dbl.m128i_u64[1];}
>
>	__forceinline void store(__m128i* ptr) {_mm_store_si128 (ptr, dbl);}
>	__forceinline void store(BBPair* ptr) {_mm_store_si128 (&ptr->dbl, dbl);}
>
>	__forceinline XMM copyWord0() {return XMM(_mm_shuffle_epi32 (dbl, 0));}
>	__forceinline XMM copyWord1() {return XMM(_mm_shuffle_epi32 (dbl, 0x55));}
>	__forceinline int getInt16() {return _mm_extract_epi16(dbl,0);}
>
>	__forceinline XMM& compare32(const XMM &dbb) {dbl = _mm_cmpeq_epi32(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& addAbsByteDiff(const XMM &dbb) {dbl = _mm_sad_epu8(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& addWords(const XMM &dbb) {dbl =
>_mm_add_epi32(_mm_shuffle_epi32(dbb.dbl,0x56), dbl); return *this;}
>
>
>	__forceinline XMM& operator>>=(int shift) {	dbl = _mm_srli_epi64(dbl, shift);
>return *this;}
>	__forceinline XMM& operator<<=(int shift) {dbl = _mm_slli_epi64(dbl, shift);
>return *this;}
>	__forceinline XMM& operator&=(const XMM &dbb) {dbl = _mm_and_si128(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& operator|=(const XMM &dbb) {dbl = _mm_or_si128(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& operator^=(const XMM &dbb) {dbl = _mm_xor_si128(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& operator+=(const XMM &dbb) {dbl = _mm_add_epi8(dbl,
>dbb.dbl); return *this;}
>	__forceinline XMM& operator-=(const XMM &dbb) {dbl = _mm_sub_epi8(dbl,
>dbb.dbl); return *this;}
>
>	__forceinline XMM notA() const {return XMM(_mm_and_si128(dbl, snotA));}
>	__forceinline XMM notH() const {return XMM(_mm_and_si128(dbl, snotH));}
>
>	__forceinline friend XMM operator>>(const XMM &dbb, int shift) {return
>XMM(_mm_srli_epi64(dbb.dbl, shift));}
>	__forceinline friend XMM operator<<(const XMM &dbb, int shift) {return
>XMM(_mm_slli_epi64(dbb.dbl, shift));}
>	__forceinline friend XMM operator&(const XMM &a, const XMM &b) {return
>XMM(_mm_and_si128(a.dbl, b.dbl));}
>	__forceinline friend XMM operator|(const XMM &a, const XMM &b) {return
>XMM(_mm_or_si128(a.dbl, b.dbl));}
>	__forceinline friend XMM operator^(const XMM &a, const XMM &b) {return
>XMM(_mm_xor_si128(a.dbl, b.dbl));}
>	__forceinline friend XMM operator+(const XMM &a, const XMM &b) {return
>XMM(_mm_add_epi8(a.dbl, b.dbl));}
>	__forceinline friend XMM operator-(const XMM &a, const XMM &b) {return
>XMM(_mm_sub_epi8(a.dbl, b.dbl));}
>	__forceinline friend XMM operator~(const XMM &a) {return
>XMM(_mm_xor_si128(a.dbl, minusOne));}
>
>
>
>protected:
>	static const __m128i snotA;
>	static const __m128i snotH;
>	static const __m128i minusOne;
>
>};
>
>const __m128i XMM::snotA =
>{
>	'\376', '\376', '\376', '\376',
>	'\376', '\376', '\376', '\376',
>	'\376', '\376', '\376', '\376',
>	'\376', '\376', '\376', '\376',
>};
>
>const __m128i XMM::snotH =
>{
>	'\177', '\177', '\177', '\177',
>	'\177', '\177', '\177', '\177',
>	'\177', '\177', '\177', '\177',
>	'\177', '\177', '\177', '\177',
>};
>
>const __m128i XMM::minusOne =
>{
>	'\377', '\377', '\377', '\377',
>	'\377', '\377', '\377', '\377',
>	'\377', '\377', '\377', '\377',
>	'\377', '\377', '\377', '\377',
>};
>
>class GPR : public BBPair
>{
>public:
>	__forceinline GPR(){}
>	__forceinline GPR(const BitBoard &low, const BitBoard &high) {sgl[0] = low;
>sgl[1] = high;}
>	__forceinline GPR(const BBPair& a) {sgl[0] = a.sgl[0]; sgl[1] = a.sgl[1];}
>	__forceinline GPR(const BBPair* ptr){sgl[0] = ptr->sgl[0]; sgl[1] =
>ptr->sgl[1];}
>
>	__forceinline BitBoard getLowBoard() const {return sgl[0];}
>	__forceinline BitBoard getHighBoard() const {return sgl[1];}
>
>	__forceinline void store(BBPair* ptr) {ptr->sgl[0] = sgl[0]; ptr->sgl[1] =
>sgl[1];}
>
>	__forceinline GPR& operator>>=(int shift) {	sgl[0]>>=shift; sgl[1]>>=shift;
>return *this;}
>	__forceinline GPR& operator<<=(int shift) {	sgl[0]<<=shift; sgl[1]<<=shift;
>return *this;}
>	__forceinline GPR& operator&=(const GPR &dbb) {sgl[0]&=dbb.sgl[0];
>sgl[1]&=dbb.sgl[1]; return *this;}
>	__forceinline GPR& operator|=(const GPR &dbb) {sgl[0]|=dbb.sgl[0];
>sgl[1]|=dbb.sgl[1]; return *this;}
>	__forceinline GPR& operator^=(const GPR &dbb) {sgl[0]^=dbb.sgl[0];
>sgl[1]^=dbb.sgl[1]; return *this;}
>
>	__forceinline GPR notA() const {
>		return GPR(sgl[0]&0xfefefefefefefefe, sgl[1]&0xfefefefefefefefe);}
>
>	__forceinline GPR notH() const {
>		return GPR(sgl[0]&0x7f7f7f7f7f7f7f7f, sgl[1]&0x7f7f7f7f7f7f7f7f);}
>
>	__forceinline friend GPR operator>>(const GPR &dbb, int shift) {
>		return GPR(dbb.sgl[0]>>shift, dbb.sgl[1]>>shift);}
>	__forceinline friend GPR operator<<(const GPR &dbb, int shift) {
>		return GPR(dbb.sgl[0]<<shift, dbb.sgl[1]<<shift);}
>	__forceinline friend GPR operator&(const GPR &a, const GPR &b) {
>		return GPR(a.sgl[0]&b.sgl[0], a.sgl[1]&b.sgl[1]);}
>	__forceinline friend GPR operator|(const GPR &a, const GPR &b) {
>		return GPR(a.sgl[0]|b.sgl[0], a.sgl[1]|b.sgl[1]);}
>	__forceinline friend GPR operator^(const GPR &a, const GPR &b) {
>		return GPR(a.sgl[0]^b.sgl[0], a.sgl[1]^b.sgl[1]);}
>
>	__forceinline friend GPR operator-(const GPR &a, const GPR &b)
>	{
>		GPR c;
>		for (int i=0; i < 16; i++)
>			c.dbl.m128i_u8[i] = a.dbl.m128i_u8[i] - b.dbl.m128i_u8[i];
>		return c;
>	}
>
>	__forceinline friend GPR operator~(const GPR &a) {
>		return GPR(~a.sgl[0], ~a.sgl[1]);}
>
>};
>
>
>struct sSource
>{
>	sSource(BitBoard r1, BitBoard r2) : rooks(r1,r2), occup(r1|r2, r1|r2) {}
>	BBPair rooks;
>	BBPair occup;
>};
>
>struct sTarget
>{
>	BBPair up;
>	BBPair down;
>};
>
>template <class T> void updownAttacks(sTarget* pTarget, const sSource* pSource)
>{
>	T gu(&pSource->rooks);
>	T pu(&pSource->occup);
>	pu = ~pu; // empty
>	T gd(gu);
>	T pd(pu);
>	gu |= pu & (gu<<8);
>	gd |= pd & (gd>>8);
>	pu &= pu<<8;
>	pd &= pd>>8;
>	gu |= pu & (gu<<16);
>	gd |= pd & (gd>>16);
>	pu &= pu<<16;
>	pd &= pd>>16;
>	gu |= pu & (gu<<32);
>	gd |= pd & (gd>>32);
>	(gu<<8).store(&pTarget->up);
>	(gd>>8).store(&pTarget->down);
>}
>
>int main(int argc, char* argv[])
>{
>	BBPair::initRankStr();
>
>	sSource source1(0x0000200000000000, 0x0102040810204088);
>	sSource source2(0x0000004000000000, 0x0102040810204088);
>	sTarget target1;
>	sTarget target2;
>
>	updownAttacks<GPR>(&target1, &source1);
>	updownAttacks<XMM>(&target2, &source2);
>
>	target1.down.printBoards();
>	target1.up.printBoards();
>	target2.down.printBoards();
>	target2.up.printBoards();
>
>	return 0;
>}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.