Author: Gerd Isenberg
Date: 13:53:04 12/21/05
Go up one level in this thread
or the other way around - cheaper loop and break condition, few bytes shorter for all types with assignment (constructor) and binary and operator& defined. template <class T, unsigned int nint, const int* i1, const int* i2, int* i3> __forceinline void andIntVector() { assert(nint % sizeof(T) == 0); const T* t1 = (const T*)i1; const T* t2 = (const T*)i2; T* t3 = (T*)i3; for (unsigned int i=(nint * sizeof(int))/sizeof(T) - 4; (int)i >= 0; i-=4) { t3[i+3] = t1[i+3] & t2[i+3]; t3[i+2] = t1[i+2] & t2[i+2]; t3[i+1] = t1[i+1] & t2[i+1]; t3[i+0] = t1[i+0] & t2[i+0]; } } andIntVector<MMX, 2048, a1, a2, a3>(); _TEXT SEGMENT ??$andIntVector@UMMX@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ; andIntVector<MMX,2048,&a1,&a2,&a3>, COMDAT 00000 33 c0 xor eax, eax $LL3@andIntVect: 00002 0f 6f 80 f8 1f 00 00 movq mm0, MMWORD PTR ?a1@@3PAHA[eax+8184] 00009 0f 6f 88 f8 1f 00 00 movq mm1, MMWORD PTR ?a2@@3PAHA[eax+8184] 00010 0f db c1 pand mm0, mm1 00013 0f 7f 80 f8 1f 00 00 movq MMWORD PTR ?a3@@3PAHA[eax+8184], mm0 0001a 0f 6f 80 f0 1f 00 00 movq mm0, MMWORD PTR ?a1@@3PAHA[eax+8176] 00021 0f 6f 88 f0 1f 00 00 movq mm1, MMWORD PTR ?a2@@3PAHA[eax+8176] 00028 0f db c1 pand mm0, mm1 0002b 0f 7f 80 f0 1f 00 00 movq MMWORD PTR ?a3@@3PAHA[eax+8176], mm0 00032 0f 6f 80 e8 1f 00 00 movq mm0, MMWORD PTR ?a1@@3PAHA[eax+8168] 00039 0f 6f 88 e8 1f 00 00 movq mm1, MMWORD PTR ?a2@@3PAHA[eax+8168] 00040 0f db c1 pand mm0, mm1 00043 0f 7f 80 e8 1f 00 00 movq MMWORD PTR ?a3@@3PAHA[eax+8168], mm0 0004a 0f 6f 80 e0 1f 00 00 movq mm0, MMWORD PTR ?a1@@3PAHA[eax+8160] 00051 0f 6f 88 e0 1f 00 00 movq mm1, MMWORD PTR ?a2@@3PAHA[eax+8160] 00058 0f db c1 pand mm0, mm1 0005b 0f 7f 80 e0 1f 00 00 movq MMWORD PTR ?a3@@3PAHA[eax+8160], mm0 00062 83 e8 20 sub eax, 32 ; 00000020H 00065 3d 20 e0 ff ff cmp eax, -8160 ; ffffe020H 0006a 7d 96 jge SHORT $LL3@andIntVect 0006c c3 ret 0 ??$andIntVector@UMMX@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ; andIntVector<MMX,2048,&a1,&a2,&a3> _TEXT ENDS andIntVector<XMM, 2048, a1, a2, a3>(); _TEXT ENDS PUBLIC ??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ; andIntVector<XMM,2048,&a1,&a2,&a3> ; Function compile flags: /Ogtpy ; COMDAT ??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ _TEXT SEGMENT ??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ PROC ; andIntVector<XMM,2048,&a1,&a2,&a3>, COMDAT 00000 33 c0 xor eax, eax 00002 eb 0c 8d a4 24 00 00 00 00 eb 03 8d 49 00 npad 14 $LL3@andIntVect: 00010 66 0f 6f 88 f0 1f 00 00 movdqa xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8176] 00018 66 0f 6f 80 f0 1f 00 00 movdqa xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8176] 00020 66 0f db c1 pand xmm0, xmm1 00024 66 0f 6f 88 e0 1f 00 00 movdqa xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8160] 0002c 66 0f 7f 80 f0 1f 00 00 movdqa XMMWORD PTR ?a3@@3PAHA[eax+8176], xmm0 00034 66 0f 6f 80 e0 1f 00 00 movdqa xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8160] 0003c 66 0f db c1 pand xmm0, xmm1 00040 66 0f 6f 88 d0 1f 00 00 movdqa xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8144] 00048 66 0f 7f 80 e0 1f 00 00 movdqa XMMWORD PTR ?a3@@3PAHA[eax+8160], xmm0 00050 66 0f 6f 80 d0 1f 00 00 movdqa xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8144] 00058 66 0f db c1 pand xmm0, xmm1 0005c 66 0f 6f 88 c0 1f 00 00 movdqa xmm1, XMMWORD PTR ?a2@@3PAHA[eax+8128] 00064 66 0f 7f 80 d0 1f 00 00 movdqa XMMWORD PTR ?a3@@3PAHA[eax+8144], xmm0 0006c 66 0f 6f 80 c0 1f 00 00 movdqa xmm0, XMMWORD PTR ?a1@@3PAHA[eax+8128] 00074 66 0f db c1 pand xmm0, xmm1 00078 66 0f 7f 80 c0 1f 00 00 movdqa XMMWORD PTR ?a3@@3PAHA[eax+8128], xmm0 00080 83 e8 40 sub eax, 64 ; 00000040H 00083 3d 40 e0 ff ff cmp eax, -8128 ; ffffe040H 00088 7d 86 jge SHORT $LL3@andIntVect 0008a c3 ret 0 ??$andIntVector@VXMM@@$0IAA@$1?a1@@3PAHA$1?a2@@3PAHA$1?a3@@3PAHA@@YAXXZ ENDP ; andIntVector<XMM,2048,&a1,&a2,&a3> _TEXT ENDS
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.