Author: Dezhi Zhao
Date: 07:29:47 09/05/03
Go up one level in this thread
MMX would be a clear win if it could go without the dirty emms instruction.
Here is the results without emms:
#1
old_key = 18be678400294823, new_key = 4512153f17260b03, c++ = 30s
old_key = 18be678400294823, new_key = 4512153f17260b03, asm = 26s
old_key = 18be678400294823, new_key = 4512153f17260b03, mmx = 19s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse = 22s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse2 = 23s
#2
old_key = 18be678400294823, new_key = 4512153f17260b03, c++ = 31s
old_key = 18be678400294823, new_key = 4512153f17260b03, asm = 25s
old_key = 18be678400294823, new_key = 4512153f17260b03, mmx = 19s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse = 23s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse2 = 23s
#3
old_key = 18be678400294823, new_key = 4512153f17260b03, c++ = 30s
old_key = 18be678400294823, new_key = 4512153f17260b03, asm = 26s
old_key = 18be678400294823, new_key = 4512153f17260b03, mmx = 19s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse = 22s
old_key = 18be678400294823, new_key = 4512153f17260b03, sse2 = 23s
If emms is encluded, the time goes up to 39s.
__declspec(naked) void __fastcall update_key_non_capture_mmx(int move)
{
__asm
{
movzx eax, cl // from
movzx edx, ch // to
shr ecx, 10 // type * 64
and ecx, ~63 // mask off
movq mm2, [old_key] // old_key 64
add eax, ecx // type from index
add edx, ecx // type to index
movq mm0, type_rnd[eax*8] // from 64
movq mm1, type_rnd[edx*8] // to 64
pxor mm0, mm2
pxor mm0, mm1
movq [new_key], mm0 // store 64
// emms // performence killer
ret
}
}
Regards,
Zhao
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.