Author: Gerd Isenberg
Date: 11:36:21 09/13/02
Go up one level in this thread
On September 13, 2002 at 04:07:55, Sune Fischer wrote:
>On September 12, 2002 at 21:19:08, Gerd Isenberg wrote:
>
>>Hi all,
>>
>>My first MMX-Routine (MSC inline asm) that gains considerable performance!
>
>Wow, that's great. How much faster do think it is?
>
A loop of 100,000,000 * 2 calls with the two sample pathes take (XP2.1):
C-Version with inlined shift 80 seconds
shlightly improved mmx version 56 seconds
10fold unrolled boolean mmx 35 seconds
I find with paddb a nice replacement for left shift one (board right) with no
need to mask for byte-wraps.
regards,
Gerd
<snip>
//===========================================================
// int getPathDistanceMMX - mmx flood fill algorithm
// ATHLON version because of pswapd
// path - a set of squares that directs the flood fill
// from - the square the flood fill starts
// !should be member of path,
// or for tempo reasons one adjacent!
// to - the target of the flood fill
// !should be member of path,
// otherwise we never reach it!
// return - 0 (FALSE) if no from,to connection exists
// > 0 the shortest distance(from,to) along the path
//===========================================================
#pragma warning(disable:4035) // no return value (eax)
int getPathDistanceMMX (BitBoard path, int from, int to)
{
static BitBoard notH = 0x7f7f7f7f7f7f7f7f;
__asm
{
movd mm0, [path]
punpckldq mm0, [path+4]
mov eax, [from]
mov edx, [to]
movq mm1, [scBBOfBit+8*eax] ; sq1 := initial from-bitboard
movq mm2, [scBBOfBit+8*edx] ; sq2 := initial to-bitboard
xor eax, eax ; distance := 0
movq mm6, [notH] ; makes loop 4 bytes shorter
floodfill:
// flood fill in left and right direction
movq mm3, mm1 ; := sq1
movq mm4, mm1 ; := sq1
movq mm5, mm1 ; store sq1 to measure progress later
psrlq mm3, 1 ; sq1>>1 board shift left in h->a direction
paddb mm4, mm4 ; board shift right in a->h direction, but no wrap
pand mm3, mm6 ; clear h-file due to wrapped bits from a file
por mm1, mm4 ; sq1 |= sq1<<1
por mm1, mm3 ; sq1 |= sq1>>1
// flood fill in up and down direction
inc eax ; distance := distance + 1
movq mm3, mm1 ; := sq1
movq mm4, mm1 ; := sq1
psrlq mm3, 8 ; sq1>>8 shift down in 8->1 direction
psllq mm4, 8 ; sq1<<8 shift up in 1->8 direction
por mm1, mm3 ; sq1 |= sq1>>8
por mm1, mm4 ; sq1 |= sq1<<8
pand mm1, mm0 ; sq1 & path -> drop flood not in path
// if there is an intersection between sq1-flood and sq2,
// shortest connection is found
// if the flood makes no progress, there is no connection
// both conditions are processed simultaniously
movq mm4, mm2 ; intersec := sq2 ...
pxor mm5, mm1 ; progress := previous sq1 ^ sq1
pand mm4, mm1 ; intersec := sq1 & sq2
pswapd mm3, mm5 ; high(progress)
pswapd mm7, mm4 ; high(intersec)
por mm3, mm5 ; high(progress) | low(progress)
por mm7, mm4 ; high(intersec) | low(intersec)
movd edx, mm7 ; packed(intersec)
test edx, edx ; intersection ?
movd edx, mm3 ; packed(progress)
jnz connected ; intersection ==> connected
test edx, edx ; progress ?
jnz floodfill ; yes, do next flood fill iteration
xor eax, eax ; otherwise disconnected, return 0
connected:
}
}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.