Author: Gerd Isenberg
Date: 08:34:32 01/13/05
Go up one level in this thread
On January 13, 2005 at 09:27:03, Daniel Mehrmannn wrote:
>If you're sending me a source, with compiler flags ;), i could do the job for
>you and resending the binary or whatever you want.
>
>Daniel
Hi Daniel,
no idea about compiler flags.
I guess some of those you mentioned recently.
Following code-snippet:
int dotProduct(unsigned __int64 bb, unsigned char weight[] )
{
unsigned __int64 bit;
int i, sum = 0;
for (i=0, bit=1; i < 64; i++, bit <<= 1)
{
if ( bb & bit ) sum += weight[i];
// or may be, to give the compiler a hint
// sum += -(!!(bb & bit)) & weight[i];
}
return sum;
}
If Intel C is able to "vectorize" this in following manner,
it would be really great:
int dotProduct(unsigned __int64 bb, unsigned char weight[] )
{
static const unsigned __int64 bits[2] =
{0x8040201008040201, 0x8040201008040201};
__asm
{
movq xmm0, [bb] ; 00000000000000008040201008040201
punpcklbw xmm0, xmm0 ; 80804040202010100808040402020101
movdqa xmm4, [bits]
mov eax, [weights]
movdqa xmm2, xmm0
punpcklwd xmm0, xmm0 ; 08080808040404040202020201010101
punpckhwd xmm2, xmm2 ; 80808080404040402020202010101010
movdqa xmm1, xmm0
movdqa xmm3, xmm2
punpckldq xmm0, xmm0 ; 02020202020202020101010101010101
punpckhdq xmm1, xmm1 ; 08080808080808080404040404040404
punpckldq xmm2, xmm2 ; 20202020202020201010101010101010
punpckhdq xmm3, xmm3 ; 80808080808080804040404040404040
pand xmm0, xmm4 ; mask the bits
pand xmm1, xmm4
pand xmm2, xmm4
pand xmm3, xmm4
pcmpeqb xmm0, xmm4 ; extend bits to bytes
pcmpeqb xmm1, xmm4
pcmpeqb xmm2, xmm4
pcmpeqb xmm3, xmm4
pxor xmm4, xmm4 ; zero
pand xmm0, [eax+0*16] ; multiply by "and" with -1 or 0
pand xmm1, [eax+1*16]
pand xmm2, [eax+2*16]
pand xmm3, [eax+3*16]
psadbw xmm0, xmm4 ; horizontal adds
psadbw xmm1, xmm4
psadbw xmm2, xmm4
psadbw xmm3, xmm4
paddw xmm0, xmm1 ; vertical adds
paddw xmm0, xmm2
paddw xmm0, xmm3
pextrw edx, xmm0, 4 ; extract both intermediate sums to gp
pextrw eax, xmm0, 0
add eax, edx ; final add
}
}
Gerd
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.