Author: Robert Hyatt
Date: 08:50:51 06/11/02
Go up one level in this thread
On June 10, 2002 at 17:21:32, Peter Fendrich wrote:
>On June 10, 2002 at 15:47:14, Dann Corbit wrote:
>
>Dann,
>why do you collect these rotating functions?
>When it comes to me I don't make any rotating during search just during the
>initial phase when the program starts.
>During search it is lookups in order to get the rotated file or diagonal.
>
>Peter
>
It is an attempt to avoid the incremental updates in Make/Unmake. The cost
of incremental updating is not free. Whether it is less or more expensive
than the cost of direct rotating is another issue...
>>#include <stdio.h>
>>#include <stdlib.h>
>>
>>#ifdef _MSC_VER
>>typedef unsigned __int64 uint64;
>>#else
>>typedef unsigned long long uint64;
>>#endif
>>#ifndef ASSEMBLY_GOO
>>extern double long get_time(void);
>>extern double time_diff(double t1, double t2);
>>
>>#endif
>>static uint64 LUT[256];
>>
>>extern uint64 NOP(uint64 a);
>>extern uint64 rotate90a(uint64 a);
>>extern uint64 rotate90b(uint64 a);
>>extern uint64 rotateLa(uint64 x);
>>extern uint64 rotateLb(uint64 x);
>>extern uint64 rotateLc(uint64 x);
>>
>>extern void dump(uint64 x);
>>extern void fillLUT(void);
>>
>>typedef uint64(*rotfunc) (uint64);
>>
>>typedef struct tag_rotlist {
>> char *description;
>> rotfunc r;
>>} rotlist;
>>
>>static rotlist rlist[] = {
>> {"NOP", NOP},
>> {"rotate90a", rotate90a},
>> {"rotate90b", rotate90b},
>> {"rotateLa", rotateLa},
>> /* {"rotateLb", rotateLb}, */
>> {"rotateLc", rotateLc},
>> {0, 0}
>>};
>>
>>uint64 rotate90a(uint64 a)
>>{
>> a = (a & 0xf0f0f0f000000000) >> 32
>> | (a & 0x0f0f0f0f00000000) << 4
>> | (a & 0x00000000f0f0f0f0) >> 4
>> | (a & 0x000000000f0f0f0f) << 32;
>> a = (a & 0xcccc0000cccc0000) >> 16
>> | (a & 0x3333000033330000) << 2
>> | (a & 0x0000cccc0000cccc) >> 2
>> | (a & 0x0000333300003333) << 16;
>> a = (a & 0xaa00aa00aa00aa00) >> 8
>> | (a & 0x5500550055005500) << 1
>> | (a & 0x00aa00aa00aa00aa) >> 1
>> | (a & 0x0055005500550055) << 8;
>> return a;
>>}
>>
>>uint64 rotate90b(uint64 a)
>>{
>> a = (a & 0xf0f0f0f000000000) >> 32
>> | (a << 4 & 0xf0f0f0f000000000)
>> | (a >> 4 & 0x000000000f0f0f0f)
>> | (a & 0x000000000f0f0f0f) << 32;
>> a = (a & 0xcccc0000cccc0000) >> 16
>> | (a << 2 & 0xcccc0000cccc0000)
>> | (a >> 2 & 0x0000333300003333)
>> | (a & 0x0000333300003333) << 16;
>> a = (a & 0xaa00aa00aa00aa00) >> 8
>> | (a << 1 & 0xaa00aa00aa00aa00)
>> | (a >> 1 & 0x0055005500550055)
>> | (a & 0x0055005500550055) << 8;
>> return a;
>>}
>>
>>uint64 rotateLa(uint64 x)
>>{
>> return LUT[x >> 56 & 0xff] << 7
>> | LUT[x >> 48 & 0xff] << 6
>> | LUT[x >> 40 & 0xff] << 5
>> | LUT[x >> 32 & 0xff] << 4
>> | LUT[x >> 24 & 0xff] << 3
>> | LUT[x >> 16 & 0xff] << 2
>> | LUT[x >> 8 & 0xff] << 1
>> | LUT[x & 0xff];
>>}
>>
>>static const uint64 bytemask = 0xff;
>>static const uint64 longone = 1;
>>/* This one is broken
>>uint64 rotateLb(uint64 x)
>>{
>> return LUT[x >> 56 & 0xff] & 0x80808080808080
>> | LUT[x >> 48 & 0xff] & 0x40404040404040
>> | LUT[x >> 40 & 0xff] & 0x20202020202020
>> | LUT[x >> 32 & 0xff] & 0x10101010101010
>> | LUT[x >> 24 & 0xff] & 0x08080808080808
>> | LUT[x >> 16 & 0xff] & 0x04040404040404
>> | LUT[x >> 8 & 0xff] & 0x02020202020202
>> | LUT[x & 0xff] & 0x01010101010101;
>>}
>>*/
>>uint64 rotateLc(uint64 x)
>>{
>> uint64 c7 = (x >> 56) & bytemask;
>> uint64 c6 = (x >> 48) & bytemask;
>> uint64 c5 = (x >> 40) & bytemask;
>> uint64 c4 = (x >> 32) & bytemask;
>> uint64 c3 = (x >> 24) & bytemask;
>> uint64 c2 = (x >> 16) & bytemask;
>> uint64 c1 = (x >> 8) & bytemask;
>> uint64 c0 = (x) & bytemask;
>> c7 = LUT[c7];
>> c6 = LUT[c6];
>> c5 = LUT[c5];
>> c4 = LUT[c4];
>> c3 = LUT[c3];
>> c2 = LUT[c2];
>> c1 = LUT[c1];
>> c0 = LUT[c0];
>> c5 = (c7 << 2) + c5;
>> c4 = (c6 << 2) + c4;
>> c3 = (c5 << 2) + c3;
>> c2 = (c4 << 2) + c2;
>> c1 = (c3 << 2) + c1;
>> c0 = (c2 << 2) + c0;
>> return (c1 << 1) + c0;
>>}
>>
>>
>>uint64 NOP(uint64 a)
>>{
>> return a;
>>}
>>
>>#ifdef ASSEMBLY_GOO
>>uint64 getrpc(uint64 from)
>>{
>> register uint64 result,
>> scratch;
>> __asm__ volatile ("rpcc %0":
>> "=r" (result));
>> scratch = result >> 32;
>> result += scratch - from;
>> result &= 0xffffffff;
>> return result;
>>}
>>#endif
>>
>>extern uint64 NOP(uint64);
>>extern uint64 rotateL(uint64);
>>extern uint64 rotate90(uint64);
>>extern void fillLUT(void);
>>
>>#define B1(s) (*(s)&1u)
>>#define B2(s) (B1(s)*2+B1(s+1))
>>#define B4(s) (B2(s)*4+B2(s+2))
>>#define B8(s) (B4(s)*16+B4(s+4))
>>#define B16(s) (B8(s)*256+B8(s+8))
>>#define B32(s) ((B16(s)<<16)+B16(s+16))
>>#define B64(s) (((uint64)B32(s)<<32)+B32(s+32))
>>
>>#define NUMTESTS 1
>>static uint64 test[NUMTESTS] =
>>#ifndef BROKEN_COMPILER
>>{
>> B64(" #### "
>> " # ### "
>> "# ####"
>> "# ####"
>> "# #"
>> "# #"
>> " # # "
>> " #### ")
>>};
>>#else
>>{0};
>>#endif
>>
>>void dump(uint64 x)
>>{
>> int b = 64;
>> while (--b >= 0) {
>> putchar(x & (longone << b) ? '#' : ' ');
>> if ((b % 8) == 0)
>> putchar('\n');
>> }
>>}
>>
>>int main(int argc, char **argv)
>>{
>> int r,
>> i;
>>#ifdef ASSEMBLY_GOO
>> uint64 tick;
>>#else
>> double start,
>> end,
>> difference;
>>#endif
>> int index = 0;
>> fillLUT();
>>
>>#define NUMREPS 10000000L
>> while (rlist[index].description) {
>>
>>#ifdef ASSEMBLY_GOO
>> tick = getrpc(0);
>>#else
>> start = get_time();
>>#endif
>> puts(rlist[index].description);
>> for (r = 0; r < NUMREPS; ++r) {
>> for (i = 0; i < NUMTESTS; ++i) {
>> uint64 t = test[i];
>> t = rlist[index].r(t);
>> t = rlist[index].r(t);
>> t = rlist[index].r(t);
>> t = rlist[index].r(t);
>> if (t != test[i]) {
>> printf("%s test %i failed, %016llx->%016llx\n",
>>rlist[index].description, i, test[i], t);
>> t = test[i];
>> dump(t);
>> t = rlist[index].r(t);
>> dump(t);
>> t = rlist[index].r(t);
>> dump(t);
>> t = rlist[index].r(t);
>> dump(t);
>> t = rlist[index].r(t);
>> dump(t);
>> exit(EXIT_FAILURE);
>> }
>> }
>> }
>>#ifdef ASSEMBLY_GOO
>> tick = getrpc(0);
>> printf("%s test time %llu %g\n", rlist[index].description, tick,
>> tick * (1.0 / (4 * NUMREPS * NUMTESTS)));
>>#else
>> end = get_time();
>> difference = time_diff(start, end);
>>
>> printf("%s test time %f %g\n", rlist[index].description, difference,
>> difference * (0.25 / (NUMREPS * NUMTESTS)));
>>#endif
>> index++;
>> }
>>
>> return 0;
>>}
>>
>>void fillLUT()
>>{
>> int i;
>> for (i = 0; i < 256; ++i) {
>> if (i & 1)
>> LUT[i] |= longone << 56;
>> if (i & 2)
>> LUT[i] |= longone << 48;
>> if (i & 4)
>> LUT[i] |= longone << 40;
>> if (i & 8)
>> LUT[i] |= longone << 32;
>> if (i & 16)
>> LUT[i] |= longone << 24;
>> if (i & 32)
>> LUT[i] |= longone << 16;
>> if (i & 64)
>> LUT[i] |= longone << 8;
>> if (i & 128)
>> LUT[i] |= longone << 0;
>> }
>>#ifdef BROKEN_COMPILER
>> test[0] =
>> B64(" #### "
>> " # ### "
>> "# ####"
>> "# ####"
>> "# #"
>> "# #"
>> " # # "
>> " #### ");
>>#endif
>>}
>>
>>/*
>>A couple runs produce this:
>>
>>NOP
>>NOP test time 0.321925 8.04812e-009
>>rotate90a
>>rotate90a test time 2.485504 6.21376e-008
>>rotate90b
>>rotate90b test time 2.367185 5.91796e-008
>>rotateLa
>>rotateLa test time 2.721986 6.80496e-008
>>rotateLc
>>rotateLc test time 3.308488 8.27122e-008
>>
>>NOP
>>NOP test time 0.340500 8.51249e-009
>>rotate90a
>>rotate90a test time 2.520052 6.30013e-008
>>rotate90b
>>rotate90b test time 2.463799 6.1595e-008
>>rotateLa
>>rotateLa test time 2.732862 6.83215e-008
>>rotateLc
>>rotateLc test time 3.330550 8.32637e-008
>>*/
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.