Computer Chess Club Archives


Search

Terms

Messages

Subject: yes, i'm learning C++

Author: Gerd Isenberg

Date: 11:57:01 12/21/05

Go up one level in this thread


with all compile time parameters:

template <class T, unsigned int nint, const int* i1, const int* i2, int* i3>
__forceinline
void andIntVector()
{
  const T* t1 = (const T*)i1;
  const T* t2 = (const T*)i2;
  T* t3 = (T*)i3;
  for (unsigned int i=0; i < (nint * sizeof(int))/sizeof(T); i+= 4) {
    t3[i+0] = t1[i+0] & t2[i+0];
    t3[i+1] = t1[i+1] & t2[i+1];
    t3[i+2] = t1[i+2] & t2[i+2];
    t3[i+3] = t1[i+3] & t2[i+3];
  }
}

a few 32-bit compiles with msvc2005 express:

int XMM_ALIGN a1[2048];
int XMM_ALIGN a2[2048];
int XMM_ALIGN a3[2048];

---------------------------------------------------------------------------
andIntVector<XMM, 2048, a1, a2, a3>();

assembly looks nice, but imho too conserative register usage with this express
compiler. I would prefere:

4*movdqa xmmi, [source1+reg]
4*pand   xmmi, [source2+reg]
4*movdqa [target+reg], xmmi

??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ;
andIntVector<XMM,2048,&a1,&a2,&a3>, COMDAT
  00000	33 c0		 xor	 eax, eax
  00002	b9 80 00 00 00	 mov	 ecx, 128		; 00000080H
  00007	eb 07 8d a4 24
	00 00 00 00	 npad	 9
$LL3@andIntVect:
  00010	66 0f 6f 88 00
	00 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax]
  00018	66 0f 6f 80 00
	00 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax]
  00020	66 0f db c1	 pand	 xmm0, xmm1
  00024	66 0f 6f 88 10
	00 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+16]
  0002c	66 0f 7f 80 00
	00 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax], xmm0
  00034	66 0f 6f 80 10
	00 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+16]
  0003c	66 0f db c1	 pand	 xmm0, xmm1
  00040	66 0f 6f 88 20
	00 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+32]
  00048	66 0f 7f 80 10
	00 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+16], xmm0
  00050	66 0f 6f 80 20
	00 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+32]
  00058	66 0f db c1	 pand	 xmm0, xmm1
  0005c	66 0f 6f 88 30
	00 00 00	 movdqa	 xmm1, XMMWORD PTR ?a2@@3PAHA[eax+48]
  00064	66 0f 7f 80 20
	00 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+32], xmm0
  0006c	66 0f 6f 80 30
	00 00 00	 movdqa	 xmm0, XMMWORD PTR ?a1@@3PAHA[eax+48]
  00074	66 0f db c1	 pand	 xmm0, xmm1
  00078	66 0f 7f 80 30
	00 00 00	 movdqa	 XMMWORD PTR ?a3@@3PAHA[eax+48], xmm0
  00080	83 c0 40	 add	 eax, 64			; 00000040H
  00083	83 e9 01	 sub	 ecx, 1
  00086	75 88		 jne	 SHORT $LL3@andIntVect
  00088	c3		 ret	 0
??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ;
andIntVector<XMM,2048,&a1,&a2,&a3>
_TEXT	ENDS

---------------------------------------------------------------------------
andIntVector<int, 2048, a1, a2, a3>();

_TEXT	SEGMENT
??$andIntVector@H$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ;
andIntVector<int,2048,&a1,&a2,&a3>, COMDAT
  00000	33 c0		 xor	 eax, eax
  00002	b9 00 02 00 00	 mov	 ecx, 512		; 00000200H
  00007	eb 07 8d a4 24
	00 00 00 00	 npad	 9
$LL3@andIntVect@4:
  00010	8b 90 00 00 00
	00		 mov	 edx, DWORD PTR ?a2@@3PAHA[eax]
  00016	23 90 00 00 00
	00		 and	 edx, DWORD PTR ?a1@@3PAHA[eax]
  0001c	83 c0 10	 add	 eax, 16			; 00000010H
  0001f	89 90 f0 ff ff
	ff		 mov	 DWORD PTR ?a3@@3PAHA[eax-16], edx
  00025	8b 90 f4 ff ff
	ff		 mov	 edx, DWORD PTR ?a2@@3PAHA[eax-12]
  0002b	23 90 f4 ff ff
	ff		 and	 edx, DWORD PTR ?a1@@3PAHA[eax-12]
  00031	89 90 f4 ff ff
	ff		 mov	 DWORD PTR ?a3@@3PAHA[eax-12], edx
  00037	8b 90 f8 ff ff
	ff		 mov	 edx, DWORD PTR ?a2@@3PAHA[eax-8]
  0003d	23 90 f8 ff ff
	ff		 and	 edx, DWORD PTR ?a1@@3PAHA[eax-8]
  00043	89 90 f8 ff ff
	ff		 mov	 DWORD PTR ?a3@@3PAHA[eax-8], edx
  00049	8b 90 fc ff ff
	ff		 mov	 edx, DWORD PTR ?a2@@3PAHA[eax-4]
  0004f	23 90 fc ff ff
	ff		 and	 edx, DWORD PTR ?a1@@3PAHA[eax-4]
  00055	83 e9 01	 sub	 ecx, 1
  00058	89 90 fc ff ff
	ff		 mov	 DWORD PTR ?a3@@3PAHA[eax-4], edx
  0005e	75 b0		 jne	 SHORT $LL3@andIntVect@4
  00060	c3		 ret	 0
??$andIntVector@H$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ;
andIntVector<int,2048,&a1,&a2,&a3>
_TEXT	ENDS



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.