Author: Dann Corbit
Date: 12:47:14 06/10/02
#include <stdio.h>
#include <stdlib.h>
#ifdef _MSC_VER
typedef unsigned __int64 uint64;
#else
typedef unsigned long long uint64;
#endif
#ifndef ASSEMBLY_GOO
extern double long get_time(void);
extern double time_diff(double t1, double t2);
#endif
static uint64 LUT[256];
extern uint64 NOP(uint64 a);
extern uint64 rotate90a(uint64 a);
extern uint64 rotate90b(uint64 a);
extern uint64 rotateLa(uint64 x);
extern uint64 rotateLb(uint64 x);
extern uint64 rotateLc(uint64 x);
extern void dump(uint64 x);
extern void fillLUT(void);
typedef uint64(*rotfunc) (uint64);
typedef struct tag_rotlist {
char *description;
rotfunc r;
} rotlist;
static rotlist rlist[] = {
{"NOP", NOP},
{"rotate90a", rotate90a},
{"rotate90b", rotate90b},
{"rotateLa", rotateLa},
/* {"rotateLb", rotateLb}, */
{"rotateLc", rotateLc},
{0, 0}
};
uint64 rotate90a(uint64 a)
{
a = (a & 0xf0f0f0f000000000) >> 32
| (a & 0x0f0f0f0f00000000) << 4
| (a & 0x00000000f0f0f0f0) >> 4
| (a & 0x000000000f0f0f0f) << 32;
a = (a & 0xcccc0000cccc0000) >> 16
| (a & 0x3333000033330000) << 2
| (a & 0x0000cccc0000cccc) >> 2
| (a & 0x0000333300003333) << 16;
a = (a & 0xaa00aa00aa00aa00) >> 8
| (a & 0x5500550055005500) << 1
| (a & 0x00aa00aa00aa00aa) >> 1
| (a & 0x0055005500550055) << 8;
return a;
}
uint64 rotate90b(uint64 a)
{
a = (a & 0xf0f0f0f000000000) >> 32
| (a << 4 & 0xf0f0f0f000000000)
| (a >> 4 & 0x000000000f0f0f0f)
| (a & 0x000000000f0f0f0f) << 32;
a = (a & 0xcccc0000cccc0000) >> 16
| (a << 2 & 0xcccc0000cccc0000)
| (a >> 2 & 0x0000333300003333)
| (a & 0x0000333300003333) << 16;
a = (a & 0xaa00aa00aa00aa00) >> 8
| (a << 1 & 0xaa00aa00aa00aa00)
| (a >> 1 & 0x0055005500550055)
| (a & 0x0055005500550055) << 8;
return a;
}
uint64 rotateLa(uint64 x)
{
return LUT[x >> 56 & 0xff] << 7
| LUT[x >> 48 & 0xff] << 6
| LUT[x >> 40 & 0xff] << 5
| LUT[x >> 32 & 0xff] << 4
| LUT[x >> 24 & 0xff] << 3
| LUT[x >> 16 & 0xff] << 2
| LUT[x >> 8 & 0xff] << 1
| LUT[x & 0xff];
}
static const uint64 bytemask = 0xff;
static const uint64 longone = 1;
/* This one is broken
uint64 rotateLb(uint64 x)
{
return LUT[x >> 56 & 0xff] & 0x80808080808080
| LUT[x >> 48 & 0xff] & 0x40404040404040
| LUT[x >> 40 & 0xff] & 0x20202020202020
| LUT[x >> 32 & 0xff] & 0x10101010101010
| LUT[x >> 24 & 0xff] & 0x08080808080808
| LUT[x >> 16 & 0xff] & 0x04040404040404
| LUT[x >> 8 & 0xff] & 0x02020202020202
| LUT[x & 0xff] & 0x01010101010101;
}
*/
uint64 rotateLc(uint64 x)
{
uint64 c7 = (x >> 56) & bytemask;
uint64 c6 = (x >> 48) & bytemask;
uint64 c5 = (x >> 40) & bytemask;
uint64 c4 = (x >> 32) & bytemask;
uint64 c3 = (x >> 24) & bytemask;
uint64 c2 = (x >> 16) & bytemask;
uint64 c1 = (x >> 8) & bytemask;
uint64 c0 = (x) & bytemask;
c7 = LUT[c7];
c6 = LUT[c6];
c5 = LUT[c5];
c4 = LUT[c4];
c3 = LUT[c3];
c2 = LUT[c2];
c1 = LUT[c1];
c0 = LUT[c0];
c5 = (c7 << 2) + c5;
c4 = (c6 << 2) + c4;
c3 = (c5 << 2) + c3;
c2 = (c4 << 2) + c2;
c1 = (c3 << 2) + c1;
c0 = (c2 << 2) + c0;
return (c1 << 1) + c0;
}
uint64 NOP(uint64 a)
{
return a;
}
#ifdef ASSEMBLY_GOO
uint64 getrpc(uint64 from)
{
register uint64 result,
scratch;
__asm__ volatile ("rpcc %0":
"=r" (result));
scratch = result >> 32;
result += scratch - from;
result &= 0xffffffff;
return result;
}
#endif
extern uint64 NOP(uint64);
extern uint64 rotateL(uint64);
extern uint64 rotate90(uint64);
extern void fillLUT(void);
#define B1(s) (*(s)&1u)
#define B2(s) (B1(s)*2+B1(s+1))
#define B4(s) (B2(s)*4+B2(s+2))
#define B8(s) (B4(s)*16+B4(s+4))
#define B16(s) (B8(s)*256+B8(s+8))
#define B32(s) ((B16(s)<<16)+B16(s+16))
#define B64(s) (((uint64)B32(s)<<32)+B32(s+32))
#define NUMTESTS 1
static uint64 test[NUMTESTS] =
#ifndef BROKEN_COMPILER
{
B64(" #### "
" # ### "
"# ####"
"# ####"
"# #"
"# #"
" # # "
" #### ")
};
#else
{0};
#endif
void dump(uint64 x)
{
int b = 64;
while (--b >= 0) {
putchar(x & (longone << b) ? '#' : ' ');
if ((b % 8) == 0)
putchar('\n');
}
}
int main(int argc, char **argv)
{
int r,
i;
#ifdef ASSEMBLY_GOO
uint64 tick;
#else
double start,
end,
difference;
#endif
int index = 0;
fillLUT();
#define NUMREPS 10000000L
while (rlist[index].description) {
#ifdef ASSEMBLY_GOO
tick = getrpc(0);
#else
start = get_time();
#endif
puts(rlist[index].description);
for (r = 0; r < NUMREPS; ++r) {
for (i = 0; i < NUMTESTS; ++i) {
uint64 t = test[i];
t = rlist[index].r(t);
t = rlist[index].r(t);
t = rlist[index].r(t);
t = rlist[index].r(t);
if (t != test[i]) {
printf("%s test %i failed, %016llx->%016llx\n",
rlist[index].description, i, test[i], t);
t = test[i];
dump(t);
t = rlist[index].r(t);
dump(t);
t = rlist[index].r(t);
dump(t);
t = rlist[index].r(t);
dump(t);
t = rlist[index].r(t);
dump(t);
exit(EXIT_FAILURE);
}
}
}
#ifdef ASSEMBLY_GOO
tick = getrpc(0);
printf("%s test time %llu %g\n", rlist[index].description, tick,
tick * (1.0 / (4 * NUMREPS * NUMTESTS)));
#else
end = get_time();
difference = time_diff(start, end);
printf("%s test time %f %g\n", rlist[index].description, difference,
difference * (0.25 / (NUMREPS * NUMTESTS)));
#endif
index++;
}
return 0;
}
void fillLUT()
{
int i;
for (i = 0; i < 256; ++i) {
if (i & 1)
LUT[i] |= longone << 56;
if (i & 2)
LUT[i] |= longone << 48;
if (i & 4)
LUT[i] |= longone << 40;
if (i & 8)
LUT[i] |= longone << 32;
if (i & 16)
LUT[i] |= longone << 24;
if (i & 32)
LUT[i] |= longone << 16;
if (i & 64)
LUT[i] |= longone << 8;
if (i & 128)
LUT[i] |= longone << 0;
}
#ifdef BROKEN_COMPILER
test[0] =
B64(" #### "
" # ### "
"# ####"
"# ####"
"# #"
"# #"
" # # "
" #### ");
#endif
}
/*
A couple runs produce this:
NOP
NOP test time 0.321925 8.04812e-009
rotate90a
rotate90a test time 2.485504 6.21376e-008
rotate90b
rotate90b test time 2.367185 5.91796e-008
rotateLa
rotateLa test time 2.721986 6.80496e-008
rotateLc
rotateLc test time 3.308488 8.27122e-008
NOP
NOP test time 0.340500 8.51249e-009
rotate90a
rotate90a test time 2.520052 6.30013e-008
rotate90b
rotate90b test time 2.463799 6.1595e-008
rotateLa
rotateLa test time 2.732862 6.83215e-008
rotateLc
rotateLc test time 3.330550 8.32637e-008
*/
This page took 0.01 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.