Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Implementation of the abs() function [o.t.]

Author: Russell Reagan

Date: 13:16:54 07/05/03

Go up one level in this thread


On July 05, 2003 at 10:17:38, Omid David Tabibi wrote:

>In Genesis I heavily use the abs() function, and so tried to optimize it.
>Instead of using the abs() function defined in <math.h>, I wrote the following
>fucntion:
>
>long abs(long x) {
>    long y;
>    y = x >> 31;
>    return (x ^ y) - y;
>}
>
>Testing it using a profiler, I found out that my implementation is about twice
>slower than the math.h implementation of abs(). I haven't looked at the
>implementation in math.h, but I can't see how a more optimized version of abs()
>can be written.
>
>Any ideas?

Maybe some of the instructions are very processor dependant, because yours was
just as fast as the math.h abs() on my Athlon 2GHz.

I grabbed several absolute value functions from various newsgroup archives and
timed them. I ran the program five times to try and get an accurate timing, then
took the fastest time (as long as it was consistent with the other times), then
I sorted them.

Regarding the table below: The checksum ensures that the function is working
correctly, and that the compiler didn't inline away the calls because the result
wasn't being used. The "nothing" timing just summed up the random numbers
without calling any absolute value function, so we could get a kind of "base
time". abs() is the math.h version. I used MSVC++ 6 Pro with "maximize speed",
as well as "inline any suitable".

     Function   Checksum   Time
============== ========== ======
       nothing 3951541892 4.776
         abs() 1713113360 6.039
    omid_abs() 1713113360 6.039
     cdq_abs() 1713113360 6.279
   cmovs_abs() 1713113360 6.289
     sbb_abs() 1713113360 6.539
   cmovl_abs() 1713113360 6.789
     sar_abs() 1713113360 7.531
  simple_abs() 1713113360 9.654
    fish_abs() 1713113360 11.376

You can get the file that with all of the functions and tests from:
http://home.comcast.net/~russell.reagan/abs.c

Maybe someone can run it on a different processor or with a different compiler
and we can compare results (see which is fastest, not compare timings). I'll
also post it here.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

#define MAX_ITERATIONS    1000000000

// ------------ //
// math.h abs() //
// ------------ //

void test_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ------------- //
// simple_abs () //
// ------------- //

int simple_abs (int i) {
    return (i >= 0) ? i : -i;
}

void test_simple_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += simple_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "simple_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ----------- //
// omid_abs () //
// ----------- //

int omid_abs (int x) {
    int y;
    y = x >> 31;
    return (x ^ y) - y;
}

void test_omid_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += omid_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "omid_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ----------- //
// sbb_abs () //
// ----------- //

int sbb_abs (int a) {
    _asm {
        mov ebx, [a]
        cmp    ebx, 8000h
        sbb    eax, eax
        xor    ebx, eax
        sub    eax, ebx
    }
}

void test_sbb_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += sbb_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "sbb_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ---------- //
// cdq_abs () //
// ---------- //

int cdq_abs (int a) {
    _asm {
        mov eax, [a]
        cdq
        xor eax, edx
        sub eax, edx
    }
}

void test_cdq_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += cdq_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "cdq_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ----------- //
// fish_abs () //
// ----------- //

int fish_abs (int a) {
    _asm {
        mov    eax, [a]
        or     eax, eax
        jns    fish
        neg    eax
        fish:
    }
}

void test_fish_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += fish_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "fish_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ---------- //
// sar_abs () //
// ---------- //

int sar_abs (int a) {
    _asm {
        mov  ebx, [a]
        mov  eax,ebx
        sar  eax,31
        add  ebx,eax
        xor  eax,ebx
    }
}

void test_sar_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += sar_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "sar_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ------------ //
// cmovl_abs () //
// ------------ //

int cmovl_abs (int a) {
    _asm {
        mov   eax, [a]
        mov   ecx, eax
        neg   eax
        cmovl eax, ecx
    }
}

void test_cmovl_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += cmovl_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "cmovl_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

// ------------ //
// cmovs_abs () //
// ------------ //

int cmovs_abs (int a) {
    _asm {
        mov     eax, [a]
        mov     ecx,eax
        neg     eax
        cmovs   eax,ecx
    }
}

void test_cmovs_abs () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += cmovs_abs(a);
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "cmovs_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

void test_nothing () {
    int i;
    unsigned long sum = 0;
    clock_t start, stop;

    srand(0);
    start = clock();
    for (i = 0; i < MAX_ITERATIONS; ++i) {

        // subtract so we get both positive and negative numbers
        int a = rand() - 16384;

        sum += a;
    }
    stop = clock();

    printf("%14s %10u %.3f\n", "nothing", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}

void main () {
    test_nothing();
    test_abs();
    test_simple_abs();
    test_omid_abs();
    test_sbb_abs();
    test_cdq_abs();
    test_fish_abs();
    test_sar_abs();
    test_cmovl_abs();
    test_cmovs_abs();
}



This page took 0.01 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.