Author: Russell Reagan
Date: 13:16:54 07/05/03
Go up one level in this thread
On July 05, 2003 at 10:17:38, Omid David Tabibi wrote:
>In Genesis I heavily use the abs() function, and so tried to optimize it.
>Instead of using the abs() function defined in <math.h>, I wrote the following
>fucntion:
>
>long abs(long x) {
> long y;
> y = x >> 31;
> return (x ^ y) - y;
>}
>
>Testing it using a profiler, I found out that my implementation is about twice
>slower than the math.h implementation of abs(). I haven't looked at the
>implementation in math.h, but I can't see how a more optimized version of abs()
>can be written.
>
>Any ideas?
Maybe some of the instructions are very processor dependant, because yours was
just as fast as the math.h abs() on my Athlon 2GHz.
I grabbed several absolute value functions from various newsgroup archives and
timed them. I ran the program five times to try and get an accurate timing, then
took the fastest time (as long as it was consistent with the other times), then
I sorted them.
Regarding the table below: The checksum ensures that the function is working
correctly, and that the compiler didn't inline away the calls because the result
wasn't being used. The "nothing" timing just summed up the random numbers
without calling any absolute value function, so we could get a kind of "base
time". abs() is the math.h version. I used MSVC++ 6 Pro with "maximize speed",
as well as "inline any suitable".
Function Checksum Time
============== ========== ======
nothing 3951541892 4.776
abs() 1713113360 6.039
omid_abs() 1713113360 6.039
cdq_abs() 1713113360 6.279
cmovs_abs() 1713113360 6.289
sbb_abs() 1713113360 6.539
cmovl_abs() 1713113360 6.789
sar_abs() 1713113360 7.531
simple_abs() 1713113360 9.654
fish_abs() 1713113360 11.376
You can get the file that with all of the functions and tests from:
http://home.comcast.net/~russell.reagan/abs.c
Maybe someone can run it on a different processor or with a different compiler
and we can compare results (see which is fastest, not compare timings). I'll
also post it here.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define MAX_ITERATIONS 1000000000
// ------------ //
// math.h abs() //
// ------------ //
void test_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ------------- //
// simple_abs () //
// ------------- //
int simple_abs (int i) {
return (i >= 0) ? i : -i;
}
void test_simple_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += simple_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "simple_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ----------- //
// omid_abs () //
// ----------- //
int omid_abs (int x) {
int y;
y = x >> 31;
return (x ^ y) - y;
}
void test_omid_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += omid_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "omid_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ----------- //
// sbb_abs () //
// ----------- //
int sbb_abs (int a) {
_asm {
mov ebx, [a]
cmp ebx, 8000h
sbb eax, eax
xor ebx, eax
sub eax, ebx
}
}
void test_sbb_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += sbb_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "sbb_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ---------- //
// cdq_abs () //
// ---------- //
int cdq_abs (int a) {
_asm {
mov eax, [a]
cdq
xor eax, edx
sub eax, edx
}
}
void test_cdq_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += cdq_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "cdq_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ----------- //
// fish_abs () //
// ----------- //
int fish_abs (int a) {
_asm {
mov eax, [a]
or eax, eax
jns fish
neg eax
fish:
}
}
void test_fish_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += fish_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "fish_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ---------- //
// sar_abs () //
// ---------- //
int sar_abs (int a) {
_asm {
mov ebx, [a]
mov eax,ebx
sar eax,31
add ebx,eax
xor eax,ebx
}
}
void test_sar_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += sar_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "sar_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ------------ //
// cmovl_abs () //
// ------------ //
int cmovl_abs (int a) {
_asm {
mov eax, [a]
mov ecx, eax
neg eax
cmovl eax, ecx
}
}
void test_cmovl_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += cmovl_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "cmovl_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
// ------------ //
// cmovs_abs () //
// ------------ //
int cmovs_abs (int a) {
_asm {
mov eax, [a]
mov ecx,eax
neg eax
cmovs eax,ecx
}
}
void test_cmovs_abs () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += cmovs_abs(a);
}
stop = clock();
printf("%14s %10u %.3f\n", "cmovs_abs()", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
void test_nothing () {
int i;
unsigned long sum = 0;
clock_t start, stop;
srand(0);
start = clock();
for (i = 0; i < MAX_ITERATIONS; ++i) {
// subtract so we get both positive and negative numbers
int a = rand() - 16384;
sum += a;
}
stop = clock();
printf("%14s %10u %.3f\n", "nothing", sum, (float)(stop - start) /
CLOCKS_PER_SEC);
}
void main () {
test_nothing();
test_abs();
test_simple_abs();
test_omid_abs();
test_sbb_abs();
test_cdq_abs();
test_fish_abs();
test_sar_abs();
test_cmovl_abs();
test_cmovs_abs();
}
This page took 0.03 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.