Computer Chess Club Archives


Search

Terms

Messages

Subject: Here is some test data

Author: Dezhi Zhao

Date: 12:08:37 09/04/03

Go up one level in this thread


Yes, SSE could beat the regular by a small margin.

Test hardware P4 1.6G Intel M/B FSB400 SDRAM
Test run results:
&old_key = 40d2d0, &new_key = 40d2c0,  &type_rnd = 40bac0
old_key = 18be678400294823, new_key = 4512153f17260b03,  c++ = 31s
old_key = 18be678400294823, new_key = 4512153f17260b03,  asm = 25s
old_key = 18be678400294823, new_key = 4512153f17260b03,  sse = 23s


// sse.cpp
//

#include "stdafx.h"
#include <stdlib.h>
#include <time.h>

#define COUNT_RUN 2500000000

__declspec(align(16)) unsigned __int64 old_key;
__declspec(align(16)) unsigned __int64 new_key;
__declspec(align(16)) unsigned __int64 type_rnd[12][64];	// indexed by
[type][square]

struct Move
{
	unsigned char from;		// from square
	unsigned char to;		// to square
	unsigned char type;		// piece type
	unsigned char pad;		// unused

	Move(int move) { *((int*) this) = move; };	// a simple converter
};

void __fastcall update_key_non_capture(int move)
{
	Move m  = move;			// cast back to convenient form

	new_key = old_key ^ type_rnd[move >> 16][m.from] ^ type_rnd[move >> 16][m.to];
}


__declspec(naked) void __fastcall update_key_non_capture_asm(int move)
{
	__asm
	{
		movzx	eax, cl				// from
		movzx	edx, ch				// to
		shr ecx, 10				// type * 64
		and ecx, ~63				// mask off
		add eax, ecx				// type from index
		add edx, ecx				// type to index
		mov ecx, dword ptr [old_key]		// old low32
		xor ecx, dword ptr type_rnd[eax*8]	// from low32
		mov eax, dword ptr type_rnd[eax*8+4]	// from high32
		xor ecx, dword ptr type_rnd[edx*8]	// xor to low32
		xor eax, dword ptr type_rnd[edx*8+4]	// xor to high32
		xor eax, dword ptr [old_key + 4]	// old high32

		mov dword ptr [new_key], ecx		// store low32
		mov dword ptr [new_key+4], eax		// store high32
		ret
	}
}

__declspec(naked) void __fastcall update_key_non_capture_sse(int move)
{
	__asm
	{
		movzx	eax, cl				// from
		movzx	edx, ch				// to
		shr ecx, 10				// type * 64
		and	ecx, ~63			// mask off
		movaps	xmm2, [old_key]			// old_key 128

		add eax, ecx				// type from index
		add edx, ecx				// type to index

		movups	xmm0, type_rnd[eax*8]		// from 128
		movups	xmm1, type_rnd[edx*8]		// to 128
		xorps	xmm0, xmm2
		xorps	xmm0, xmm1

		movlps	[new_key], xmm0			// store 64
		ret
	}
}

__int64 rand64()
{
	union
	{
		__int64 q;
		struct
		{
			int low;
			int high;
		};
	} r;

	r.low = rand() | (rand() << 16);
	r.high = rand() | (rand() << 16);

	return r.q;
};

void init()
{
	old_key = rand64();

	for (int i = 0; i < 12; i++)
		for (int j = 0; j < 64; j++)
			type_rnd[i][j] = rand64();
};


int main(int argc, char* argv[])
{
	time_t t0, t1;
	int i;

	printf("&old_key = %x, &new_key = %x,  &type_rnd = %x\n",
			&old_key, &new_key, &type_rnd[0][0]);
	init();

	time(&t0);
	for (i = 0; i < COUNT_RUN; i++)
		update_key_non_capture((12) | (28 << 8) | (0 << 16));	// test e4
	time(&t1);
	printf("old_key = %I64x, new_key = %I64x,  c++ = %ds\n", old_key, new_key, t1 -
t0);

	time(&t0);
	for (i = 0; i < COUNT_RUN; i++)
		update_key_non_capture_asm((12) | (28 << 8) | (0 << 16));	// test e4
	time(&t1);
	printf("old_key = %I64x, new_key = %I64x,  asm = %ds\n", old_key, new_key, t1 -
t0);

	time(&t0);
	for (i = 0; i < COUNT_RUN; i++)
		update_key_non_capture_sse((12) | (28 << 8) | (0 << 16));	// test e4
	time(&t1);
	printf("old_key = %I64x, new_key = %I64x,  sse = %ds\n", old_key, new_key, t1 -
t0);

	return 0;
}



This page took 0.01 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.