Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: yes, i'm learning C++

Author: Gerd Isenberg

Date: 13:53:04 12/21/05

Go up one level in this thread


or the other way around - cheaper loop and break condition, few bytes shorter
for all types with assignment (constructor) and binary and operator& defined.

template <class T, unsigned int nint, const int* i1, const int* i2, int* i3>
__forceinline
void andIntVector()
{
  assert(nint % sizeof(T) == 0);
  const T* t1 = (const T*)i1;
  const T* t2 = (const T*)i2;
  T* t3 = (T*)i3;
  for (unsigned int i=(nint * sizeof(int))/sizeof(T) - 4; (int)i >= 0; i-=4) {
    t3[i+3] = t1[i+3] & t2[i+3];
    t3[i+2] = t1[i+2] & t2[i+2];
    t3[i+1] = t1[i+1] & t2[i+1];
    t3[i+0] = t1[i+0] & t2[i+0];
  }
}

andIntVector<MMX, 2048, a1, a2, a3>();

_TEXT	SEGMENT
??$andIntVector@UMMX@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ;
andIntVector<MMX,2048,&a1,&a2,&a3>, COMDAT
  00000	33 c0		 xor	 eax, eax
$LL3@andIntVect:
  00002	0f 6f 80 f8 1f
	00 00		 movq	 mm0, MMWORD PTR ?a1@@3PAHA[eax+8184]
  00009	0f 6f 88 f8 1f
	00 00		 movq	 mm1, MMWORD PTR ?a2@@3PAHA[eax+8184]
  00010	0f db c1	 pand	 mm0, mm1
  00013	0f 7f 80 f8 1f
	00 00		 movq	 MMWORD PTR ?a3@@3PAHA[eax+8184], mm0
  0001a	0f 6f 80 f0 1f
	00 00		 movq	 mm0, MMWORD PTR ?a1@@3PAHA[eax+8176]
  00021	0f 6f 88 f0 1f
	00 00		 movq	 mm1, MMWORD PTR ?a2@@3PAHA[eax+8176]
  00028	0f db c1	 pand	 mm0, mm1
  0002b	0f 7f 80 f0 1f
	00 00		 movq	 MMWORD PTR ?a3@@3PAHA[eax+8176], mm0
  00032	0f 6f 80 e8 1f
	00 00		 movq	 mm0, MMWORD PTR ?a1@@3PAHA[eax+8168]
  00039	0f 6f 88 e8 1f
	00 00		 movq	 mm1, MMWORD PTR ?a2@@3PAHA[eax+8168]
  00040	0f db c1	 pand	 mm0, mm1
  00043	0f 7f 80 e8 1f
	00 00		 movq	 MMWORD PTR ?a3@@3PAHA[eax+8168], mm0
  0004a	0f 6f 80 e0 1f
	00 00		 movq	 mm0, MMWORD PTR ?a1@@3PAHA[eax+8160]
  00051	0f 6f 88 e0 1f
	00 00		 movq	 mm1, MMWORD PTR ?a2@@3PAHA[eax+8160]
  00058	0f db c1	 pand	 mm0, mm1
  0005b	0f 7f 80 e0 1f
	00 00		 movq	 MMWORD PTR ?a3@@3PAHA[eax+8160], mm0
  00062	83 e8 20	 sub	 eax, 32			; 00000020H
  00065	3d 20 e0 ff ff	 cmp	 eax, -8160		; ffffe020H
  0006a	7d 96		 jge	 SHORT $LL3@andIntVect
  0006c	c3		 ret	 0
??$andIntVector@UMMX@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ;
andIntVector<MMX,2048,&a1,&a2,&a3>
_TEXT	ENDS


andIntVector<XMM, 2048, a1, a2, a3>();


_TEXT	ENDS
PUBLIC	??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ;
andIntVector<XMM,2048,&a1,&a2,&a3>
; Function compile flags: /Ogtpy
;	COMDAT ??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ
_TEXT	SEGMENT
??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ;
andIntVector<XMM,2048,&a1,&a2,&a3>, COMDAT
  00000	33 c0		 xor	 eax, eax
  00002	eb 0c 8d a4 24
	00 00 00 00 eb
	03 8d 49 00	 npad	 14
$LL3@andIntVect:
  00010	66 0f 6f 88 f0
	1f 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8176]
  00018	66 0f 6f 80 f0
	1f 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8176]
  00020	66 0f db c1	 pand	 xmm0, xmm1
  00024	66 0f 6f 88 e0
	1f 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8160]
  0002c	66 0f 7f 80 f0
	1f 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+8176], xmm0
  00034	66 0f 6f 80 e0
	1f 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8160]
  0003c	66 0f db c1	 pand	 xmm0, xmm1
  00040	66 0f 6f 88 d0
	1f 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8144]
  00048	66 0f 7f 80 e0
	1f 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+8160], xmm0
  00050	66 0f 6f 80 d0
	1f 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8144]
  00058	66 0f db c1	 pand	 xmm0, xmm1
  0005c	66 0f 6f 88 c0
	1f 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8128]
  00064	66 0f 7f 80 d0
	1f 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+8144], xmm0
  0006c	66 0f 6f 80 c0
	1f 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8128]
  00074	66 0f db c1	 pand	 xmm0, xmm1
  00078	66 0f 7f 80 c0
	1f 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+8128], xmm0
  00080	83 e8 40	 sub	 eax, 64			; 00000040H
  00083	3d 40 e0 ff ff	 cmp	 eax, -8128		; ffffe040H
  00088	7d 86		 jge	 SHORT $LL3@andIntVect
  0008a	c3		 ret	 0
??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ;
andIntVector<XMM,2048,&a1,&a2,&a3>
_TEXT	ENDS



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.