Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: 64 bit 90 degree rotations -- a summary from USENET

Author: Peter Fendrich

Date: 14:21:32 06/10/02

Go up one level in this thread


On June 10, 2002 at 15:47:14, Dann Corbit wrote:

Dann,
why do you collect these rotating functions?
When it comes to me I don't make any rotating during search just during the
initial phase when the program starts.
During search it is lookups in order to get the rotated file or diagonal.

Peter

>#include <stdio.h>
>#include <stdlib.h>
>
>#ifdef _MSC_VER
>typedef unsigned __int64 uint64;
>#else
>typedef unsigned long long uint64;
>#endif
>#ifndef ASSEMBLY_GOO
>extern double long get_time(void);
>extern double   time_diff(double t1, double t2);
>
>#endif
>static uint64   LUT[256];
>
>extern uint64   NOP(uint64 a);
>extern uint64   rotate90a(uint64 a);
>extern uint64   rotate90b(uint64 a);
>extern uint64   rotateLa(uint64 x);
>extern uint64   rotateLb(uint64 x);
>extern uint64   rotateLc(uint64 x);
>
>extern void     dump(uint64 x);
>extern void     fillLUT(void);
>
>typedef         uint64(*rotfunc) (uint64);
>
>typedef struct tag_rotlist {
>    char           *description;
>    rotfunc         r;
>}               rotlist;
>
>static rotlist  rlist[] = {
>    {"NOP", NOP},
>    {"rotate90a", rotate90a},
>    {"rotate90b", rotate90b},
>    {"rotateLa", rotateLa},
>    /* {"rotateLb", rotateLb}, */
>    {"rotateLc", rotateLc},
>    {0, 0}
>};
>
>uint64          rotate90a(uint64 a)
>{
>    a = (a & 0xf0f0f0f000000000) >> 32
>        | (a & 0x0f0f0f0f00000000) << 4
>        | (a & 0x00000000f0f0f0f0) >> 4
>        | (a & 0x000000000f0f0f0f) << 32;
>    a = (a & 0xcccc0000cccc0000) >> 16
>        | (a & 0x3333000033330000) << 2
>        | (a & 0x0000cccc0000cccc) >> 2
>        | (a & 0x0000333300003333) << 16;
>    a = (a & 0xaa00aa00aa00aa00) >> 8
>        | (a & 0x5500550055005500) << 1
>        | (a & 0x00aa00aa00aa00aa) >> 1
>        | (a & 0x0055005500550055) << 8;
>    return a;
>}
>
>uint64          rotate90b(uint64 a)
>{
>    a = (a & 0xf0f0f0f000000000) >> 32
>        | (a << 4 & 0xf0f0f0f000000000)
>        | (a >> 4 & 0x000000000f0f0f0f)
>        | (a & 0x000000000f0f0f0f) << 32;
>    a = (a & 0xcccc0000cccc0000) >> 16
>        | (a << 2 & 0xcccc0000cccc0000)
>        | (a >> 2 & 0x0000333300003333)
>        | (a & 0x0000333300003333) << 16;
>    a = (a & 0xaa00aa00aa00aa00) >> 8
>        | (a << 1 & 0xaa00aa00aa00aa00)
>        | (a >> 1 & 0x0055005500550055)
>        | (a & 0x0055005500550055) << 8;
>    return a;
>}
>
>uint64          rotateLa(uint64 x)
>{
>    return LUT[x >> 56 & 0xff] << 7
>         | LUT[x >> 48 & 0xff] << 6
>         | LUT[x >> 40 & 0xff] << 5
>         | LUT[x >> 32 & 0xff] << 4
>         | LUT[x >> 24 & 0xff] << 3
>         | LUT[x >> 16 & 0xff] << 2
>         | LUT[x >> 8 & 0xff] << 1
>         | LUT[x & 0xff];
>}
>
>static const uint64 bytemask = 0xff;
>static const uint64 longone = 1;
>/* This one is broken
>uint64          rotateLb(uint64 x)
>{
>    return LUT[x >> 56 & 0xff] & 0x80808080808080
>    | LUT[x >> 48 & 0xff] & 0x40404040404040
>    | LUT[x >> 40 & 0xff] & 0x20202020202020
>    | LUT[x >> 32 & 0xff] & 0x10101010101010
>    | LUT[x >> 24 & 0xff] & 0x08080808080808
>    | LUT[x >> 16 & 0xff] & 0x04040404040404
>    | LUT[x >> 8 & 0xff] & 0x02020202020202
>    | LUT[x & 0xff] & 0x01010101010101;
>}
>*/
>uint64          rotateLc(uint64 x)
>{
>    uint64          c7 = (x >> 56) & bytemask;
>    uint64          c6 = (x >> 48) & bytemask;
>    uint64          c5 = (x >> 40) & bytemask;
>    uint64          c4 = (x >> 32) & bytemask;
>    uint64          c3 = (x >> 24) & bytemask;
>    uint64          c2 = (x >> 16) & bytemask;
>    uint64          c1 = (x >> 8) & bytemask;
>    uint64          c0 = (x) & bytemask;
>    c7 = LUT[c7];
>    c6 = LUT[c6];
>    c5 = LUT[c5];
>    c4 = LUT[c4];
>    c3 = LUT[c3];
>    c2 = LUT[c2];
>    c1 = LUT[c1];
>    c0 = LUT[c0];
>    c5 = (c7 << 2) + c5;
>    c4 = (c6 << 2) + c4;
>    c3 = (c5 << 2) + c3;
>    c2 = (c4 << 2) + c2;
>    c1 = (c3 << 2) + c1;
>    c0 = (c2 << 2) + c0;
>    return (c1 << 1) + c0;
>}
>
>
>uint64          NOP(uint64 a)
>{
>    return a;
>}
>
>#ifdef ASSEMBLY_GOO
>uint64          getrpc(uint64 from)
>{
>    register uint64 result,
>                    scratch;
> __asm__ volatile ("rpcc %0":
>                "=r" (result));
>    scratch = result >> 32;
>    result += scratch - from;
>    result &= 0xffffffff;
>    return result;
>}
>#endif
>
>extern uint64   NOP(uint64);
>extern uint64   rotateL(uint64);
>extern uint64   rotate90(uint64);
>extern void     fillLUT(void);
>
>#define B1(s) (*(s)&1u)
>#define B2(s) (B1(s)*2+B1(s+1))
>#define B4(s) (B2(s)*4+B2(s+2))
>#define B8(s) (B4(s)*16+B4(s+4))
>#define B16(s) (B8(s)*256+B8(s+8))
>#define B32(s) ((B16(s)<<16)+B16(s+16))
>#define B64(s) (((uint64)B32(s)<<32)+B32(s+32))
>
>#define NUMTESTS 1
>static uint64   test[NUMTESTS] =
>#ifndef BROKEN_COMPILER
>{
>    B64("  ####  "
>        " #  ### "
>        "#   ####"
>        "#   ####"
>        "#      #"
>        "#      #"
>        " #    # "
>        "  ####  ")
>};
>#else
>{0};
>#endif
>
>void            dump(uint64 x)
>{
>    int             b = 64;
>    while (--b >= 0) {
>        putchar(x & (longone << b) ? '#' : ' ');
>        if ((b % 8) == 0)
>            putchar('\n');
>    }
>}
>
>int             main(int argc, char **argv)
>{
>    int             r,
>                    i;
>#ifdef ASSEMBLY_GOO
>    uint64          tick;
>#else
>    double          start,
>                    end,
>                    difference;
>#endif
>    int             index = 0;
>    fillLUT();
>
>#define NUMREPS 10000000L
>    while (rlist[index].description) {
>
>#ifdef ASSEMBLY_GOO
>        tick = getrpc(0);
>#else
>        start = get_time();
>#endif
>        puts(rlist[index].description);
>        for (r = 0; r < NUMREPS; ++r) {
>            for (i = 0; i < NUMTESTS; ++i) {
>                uint64          t = test[i];
>                t = rlist[index].r(t);
>                t = rlist[index].r(t);
>                t = rlist[index].r(t);
>                t = rlist[index].r(t);
>                if (t != test[i]) {
>                    printf("%s test %i failed, %016llx->%016llx\n",
>rlist[index].description, i, test[i], t);
>                    t = test[i];
>                    dump(t);
>                    t = rlist[index].r(t);
>                    dump(t);
>                    t = rlist[index].r(t);
>                    dump(t);
>                    t = rlist[index].r(t);
>                    dump(t);
>                    t = rlist[index].r(t);
>                    dump(t);
>                    exit(EXIT_FAILURE);
>                }
>            }
>        }
>#ifdef ASSEMBLY_GOO
>        tick = getrpc(0);
>        printf("%s test time %llu %g\n", rlist[index].description, tick,
>               tick * (1.0 / (4 * NUMREPS * NUMTESTS)));
>#else
>        end = get_time();
>        difference = time_diff(start, end);
>
>        printf("%s test time %f %g\n", rlist[index].description, difference,
>               difference * (0.25 / (NUMREPS * NUMTESTS)));
>#endif
>        index++;
>    }
>
>    return 0;
>}
>
>void            fillLUT()
>{
>    int             i;
>    for (i = 0; i < 256; ++i) {
>        if (i & 1)
>            LUT[i] |= longone << 56;
>        if (i & 2)
>            LUT[i] |= longone << 48;
>        if (i & 4)
>            LUT[i] |= longone << 40;
>        if (i & 8)
>            LUT[i] |= longone << 32;
>        if (i & 16)
>            LUT[i] |= longone << 24;
>        if (i & 32)
>            LUT[i] |= longone << 16;
>        if (i & 64)
>            LUT[i] |= longone << 8;
>        if (i & 128)
>            LUT[i] |= longone << 0;
>    }
>#ifdef BROKEN_COMPILER
>    test[0] =
>        B64("  ####  "
>            " #  ### "
>            "#   ####"
>            "#   ####"
>            "#      #"
>            "#      #"
>            " #    # "
>            "  ####  ");
>#endif
>}
>
>/*
>A couple runs produce this:
>
>NOP
>NOP test time 0.321925 8.04812e-009
>rotate90a
>rotate90a test time 2.485504 6.21376e-008
>rotate90b
>rotate90b test time 2.367185 5.91796e-008
>rotateLa
>rotateLa test time 2.721986 6.80496e-008
>rotateLc
>rotateLc test time 3.308488 8.27122e-008
>
>NOP
>NOP test time 0.340500 8.51249e-009
>rotate90a
>rotate90a test time 2.520052 6.30013e-008
>rotate90b
>rotate90b test time 2.463799 6.1595e-008
>rotateLa
>rotateLa test time 2.732862 6.83215e-008
>rotateLc
>rotateLc test time 3.330550 8.32637e-008
>*/



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.