Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Implementation of the abs() function [o.t.]

Author: Dieter Buerssner

Date: 17:32:05 07/05/03

Go up one level in this thread


On July 05, 2003 at 16:16:54, Russell Reagan wrote:

>int sbb_abs (int a) {
>    _asm {
>        mov ebx, [a]
>        cmp    ebx, 8000h
                     ^^^^^
>        sbb    eax, eax
>        xor    ebx, eax
>        sub    eax, ebx
>    }
>}

Should be 80000000h

I get a very strange table, when I let MSVC inline - omid is *much* faster than
nothing! And this reproducable. And totally unexplainable by looking at the
generated assembly. Very strange.

Well, I thought, I recode it again. To make sure, that all functions are handled
exactly the same, I used some macro tricks. Also made the inner loop as tight as
possible (see code later, which also includes GCC-assembly), so that the
optimizer should have the easiest way to figure out what to do. Same result ...
(but not with Gcc, there anything is slower than nothing). I can only supsect
some strange pairing between rand() and the rest. I included a small PRNG, then
the results look more reasonable. One can also see, that on my computer, the
register setup stuff needed for MSVC inlining gives an significant overhead,
much more than on your or Gerd's computer. Gcc results are very consistent (abs,
simple_abs and fish_abs are the same, my gcc does not use tricks for abs). No
overhead for the register setup with inline assembly.

MSVC, Russel's code, -Ox2 -Ob2 -G6 -Gr -GF

       nothing 3951541892 13.309
         abs() 1713113360 14.400
  simple_abs() 1713113360 17.936
    omid_abs() 1713113360 7.932   !!! Yes, reprocucable
     sbb_abs() 1713113360 17.144
     cdq_abs() 1713113360 17.555
    fish_abs() 1713113360 20.900
     sar_abs() 1713113360 16.464
   cmovl_abs() 1713113360 17.365
   cmovs_abs() 1713113360 17.345

MSVC, my code, using "own" PRNG (pretty same results as above with rand())

       nothing 1305123480 5.858
           abs 2955546426 6.099
    simple_abs 2955546426 12.558
      omid_abs 2955546426 6.119
       sbb_abs 2955546426 16.233
       cdq_abs 2955546426 15.372
      fish_abs 2955546426 20.419
       sar_abs 2955546426 16.033
     cmovl_abs 2955546426 15.262
     cmovs_abs 2955546426 15.272

Ditto, Gcc

       nothing 1305123480 5.714
           abs 2955546426 12.857
    simple_abs 2955546426 12.857
      omid_abs 2955546426 5.879
       sbb_abs 2955546426 6.484
       cdq_abs 2955546426 5.824
      fish_abs 2955546426 12.857
       sar_abs 2955546426 6.099
     cmovl_abs 2955546426 6.209
     cmovs_abs 2955546426 6.264
       nothing 1305123480 5.714

times are more or less reproducable. P4, 2.53 GHz.

Regards,
Dieter

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

/* Note, almost everything (even C code) not really portable.
   Assumes arithmetic right shift for signed int, 2 complement,
   32 bit ints, 32 bit long, perhaps more */

#define N_ITERATIONS    1000000000

/* Fast, but not good PRNG (shift register) */
static unsigned long sr32_seed=0x87654321UL;

unsigned long sr32_rand(void)
{
 unsigned long tmp = sr32_seed ^ (sr32_seed << 15);
 return sr32_seed = tmp ^ (tmp >> 17);
}

void seed_sr32_rand(unsigned long seed)
{
  sr32_seed = 0x87654321UL;
  if (seed != 0)
    sr32_seed = seed;
}

#if 1 /*  0 to get  Russel's numbers */
#define SEED(n) seed_sr32_rand(n)
#define RAND_VAL() (int)sr32_rand() /* Will be pos and neg */
#else
/* Russel */
#define SEED(n) srand(n)
#define RAND_VAL() rand()-(RAND_MAX/2+1)
#endif

#define DECLARE_TEST_FUNC(name) \
unsigned long tfunc_##name(void) \
{                                \
  unsigned long n=N_ITERATIONS;  \
  unsigned long sum=0;           \
  do                             \
  {                              \
    int a = RAND_VAL();          \
    sum += name(a);              \
  } while (--n != 0);            \
  return sum;                    \
}

int simple_abs (int i) {
    return (i >= 0) ? i : -i;
}

DECLARE_TEST_FUNC(simple_abs)

int omid_abs (int x) {
    int y;
    y = x >> 31;
    return (x ^ y) - y;
}

DECLARE_TEST_FUNC(omid_abs)

#if __GNUC__
int sbb_abs(int a)
{
  int res, tmp;
  __asm__ volatile(
    "cmpl $0x80000000, %1
     sbbl %0, %0
     xorl %0, %1
     subl %1, %0"
    : "=r&" (res), "=r&" (tmp)
    : "1" (a)
    : "cc");
  return res;
}
#else
int sbb_abs (int a) {
    _asm {
        mov ebx, [a]
        cmp    ebx, 80000000h
        sbb    eax, eax
        xor    ebx, eax
        sub    eax, ebx
    }
}
#endif

DECLARE_TEST_FUNC(sbb_abs)

#if __GNUC__
int cdq_abs(int a)
{
  __asm__ volatile(
    "cdq
     xorl %%edx, %%eax
     subl %%edx, %%eax"
    : "=a" (a)
    : "0" (a)
    : "edx", "cc");
  return a;
}
#else
int cdq_abs (int a) {
    _asm {
        mov eax, [a]
        cdq
        xor eax, edx
        sub eax, edx
    }
}
#endif

DECLARE_TEST_FUNC(cdq_abs)

#if __GNUC__
int fish_abs(int a)
{
  __asm__ volatile(
    "orl %0, %0
     jns 1f
     negl %0
     1:"
    : "=r" (a)
    : "0" (a)
    : "cc");
  return a;
}
#else
int fish_abs (int a) {
    _asm {
        mov    eax, [a]
        or     eax, eax
        jns    fish
        neg    eax
        fish:
    }
}
#endif

DECLARE_TEST_FUNC(fish_abs)

#if __GNUC__
int sar_abs(int a)
{
  int res, tmp;
  __asm__ volatile(
    "movl %1, %0
     sarl $31, %0
     addl %0, %1
     xorl %1, %0"
    : "=r&" (res), "=r&" (tmp)
    : "1" (a)
    : "cc");
  return res;
}
#else
int sar_abs (int a) {
    _asm {
        mov  ebx, [a]
        mov  eax,ebx
        sar  eax,31
        add  ebx,eax
        xor  eax,ebx
    }
}
#endif

DECLARE_TEST_FUNC(sar_abs)

#if __GNUC__
int cmovl_abs(int a)
{
  int res;
  __asm__ volatile(
    "movl %1, %0
     negl %0
     cmovll %1, %0"
    : "=r&" (res)
    : "r" (a)
    : "cc");
  return res;
}
#else
int cmovl_abs (int a) {
    _asm {
        mov   eax, [a]
        mov   ecx, eax
        neg   eax
        cmovl eax, ecx
    }
}
#endif

DECLARE_TEST_FUNC(cmovl_abs)

#if __GNUC__
int cmovs_abs(int a)
{
  int res;
  __asm__ volatile(
    "movl %1, %0
     negl %0
     cmovsl %1, %0"
    : "=r&" (res)
    : "r" (a)
    : "cc");
  return res;
}
#else
int cmovs_abs (int a) {
    _asm {
        mov     eax, [a]
        mov     ecx,eax
        neg     eax
        cmovs   eax,ecx
    }
}
#endif

DECLARE_TEST_FUNC(cmovs_abs)

int nothing(int a)
{
  return a;
}

DECLARE_TEST_FUNC(nothing)
DECLARE_TEST_FUNC(abs)

/* Don't inline this */
void time_func(unsigned long (*test_function)(), const char *name);

#define TIME_FUNCTION(name) time_func(tfunc_##name, #name)

int main (void)
{
  TIME_FUNCTION(nothing);
  TIME_FUNCTION(abs);
  TIME_FUNCTION(simple_abs);
  TIME_FUNCTION(omid_abs);
  TIME_FUNCTION(sbb_abs);
  TIME_FUNCTION(cdq_abs);
  TIME_FUNCTION(fish_abs);
  TIME_FUNCTION(sar_abs);
  TIME_FUNCTION(cmovl_abs);
  TIME_FUNCTION(cmovs_abs);
  return 0;
}

void time_func(unsigned long (*test_function)(), const char *name)
{
  clock_t clk;
  unsigned long sum;
  SEED(0);
  clk = clock();
  sum = test_function();
  clk = clock() - clk;

  printf("%14s %10u %.3f\n", name, sum, (double)clk / CLOCKS_PER_SEC);
}




This page took 0.01 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.