Author: Russell Reagan
Date: 13:16:54 07/05/03
Go up one level in this thread
On July 05, 2003 at 10:17:38, Omid David Tabibi wrote: >In Genesis I heavily use the abs() function, and so tried to optimize it. >Instead of using the abs() function defined in <math.h>, I wrote the following >fucntion: > >long abs(long x) { > long y; > y = x >> 31; > return (x ^ y) - y; >} > >Testing it using a profiler, I found out that my implementation is about twice >slower than the math.h implementation of abs(). I haven't looked at the >implementation in math.h, but I can't see how a more optimized version of abs() >can be written. > >Any ideas? Maybe some of the instructions are very processor dependant, because yours was just as fast as the math.h abs() on my Athlon 2GHz. I grabbed several absolute value functions from various newsgroup archives and timed them. I ran the program five times to try and get an accurate timing, then took the fastest time (as long as it was consistent with the other times), then I sorted them. Regarding the table below: The checksum ensures that the function is working correctly, and that the compiler didn't inline away the calls because the result wasn't being used. The "nothing" timing just summed up the random numbers without calling any absolute value function, so we could get a kind of "base time". abs() is the math.h version. I used MSVC++ 6 Pro with "maximize speed", as well as "inline any suitable". Function Checksum Time ============== ========== ====== nothing 3951541892 4.776 abs() 1713113360 6.039 omid_abs() 1713113360 6.039 cdq_abs() 1713113360 6.279 cmovs_abs() 1713113360 6.289 sbb_abs() 1713113360 6.539 cmovl_abs() 1713113360 6.789 sar_abs() 1713113360 7.531 simple_abs() 1713113360 9.654 fish_abs() 1713113360 11.376 You can get the file that with all of the functions and tests from: http://home.comcast.net/~russell.reagan/abs.c Maybe someone can run it on a different processor or with a different compiler and we can compare results (see which is fastest, not compare timings). I'll also post it here. #include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> #define MAX_ITERATIONS 1000000000 // ------------ // // math.h abs() // // ------------ // void test_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ------------- // // simple_abs () // // ------------- // int simple_abs (int i) { return (i >= 0) ? i : -i; } void test_simple_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += simple_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "simple_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ----------- // // omid_abs () // // ----------- // int omid_abs (int x) { int y; y = x >> 31; return (x ^ y) - y; } void test_omid_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += omid_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "omid_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ----------- // // sbb_abs () // // ----------- // int sbb_abs (int a) { _asm { mov ebx, [a] cmp ebx, 8000h sbb eax, eax xor ebx, eax sub eax, ebx } } void test_sbb_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += sbb_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "sbb_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ---------- // // cdq_abs () // // ---------- // int cdq_abs (int a) { _asm { mov eax, [a] cdq xor eax, edx sub eax, edx } } void test_cdq_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += cdq_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "cdq_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ----------- // // fish_abs () // // ----------- // int fish_abs (int a) { _asm { mov eax, [a] or eax, eax jns fish neg eax fish: } } void test_fish_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += fish_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "fish_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ---------- // // sar_abs () // // ---------- // int sar_abs (int a) { _asm { mov ebx, [a] mov eax,ebx sar eax,31 add ebx,eax xor eax,ebx } } void test_sar_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += sar_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "sar_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ------------ // // cmovl_abs () // // ------------ // int cmovl_abs (int a) { _asm { mov eax, [a] mov ecx, eax neg eax cmovl eax, ecx } } void test_cmovl_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += cmovl_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "cmovl_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } // ------------ // // cmovs_abs () // // ------------ // int cmovs_abs (int a) { _asm { mov eax, [a] mov ecx,eax neg eax cmovs eax,ecx } } void test_cmovs_abs () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += cmovs_abs(a); } stop = clock(); printf("%14s %10u %.3f\n", "cmovs_abs()", sum, (float)(stop - start) / CLOCKS_PER_SEC); } void test_nothing () { int i; unsigned long sum = 0; clock_t start, stop; srand(0); start = clock(); for (i = 0; i < MAX_ITERATIONS; ++i) { // subtract so we get both positive and negative numbers int a = rand() - 16384; sum += a; } stop = clock(); printf("%14s %10u %.3f\n", "nothing", sum, (float)(stop - start) / CLOCKS_PER_SEC); } void main () { test_nothing(); test_abs(); test_simple_abs(); test_omid_abs(); test_sbb_abs(); test_cdq_abs(); test_fish_abs(); test_sar_abs(); test_cmovl_abs(); test_cmovs_abs(); }
This page took 0.01 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.