Author: Dieter Buerssner
Date: 17:32:05 07/05/03
Go up one level in this thread
On July 05, 2003 at 16:16:54, Russell Reagan wrote: >int sbb_abs (int a) { > _asm { > mov ebx, [a] > cmp ebx, 8000h ^^^^^ > sbb eax, eax > xor ebx, eax > sub eax, ebx > } >} Should be 80000000h I get a very strange table, when I let MSVC inline - omid is *much* faster than nothing! And this reproducable. And totally unexplainable by looking at the generated assembly. Very strange. Well, I thought, I recode it again. To make sure, that all functions are handled exactly the same, I used some macro tricks. Also made the inner loop as tight as possible (see code later, which also includes GCC-assembly), so that the optimizer should have the easiest way to figure out what to do. Same result ... (but not with Gcc, there anything is slower than nothing). I can only supsect some strange pairing between rand() and the rest. I included a small PRNG, then the results look more reasonable. One can also see, that on my computer, the register setup stuff needed for MSVC inlining gives an significant overhead, much more than on your or Gerd's computer. Gcc results are very consistent (abs, simple_abs and fish_abs are the same, my gcc does not use tricks for abs). No overhead for the register setup with inline assembly. MSVC, Russel's code, -Ox2 -Ob2 -G6 -Gr -GF nothing 3951541892 13.309 abs() 1713113360 14.400 simple_abs() 1713113360 17.936 omid_abs() 1713113360 7.932 !!! Yes, reprocucable sbb_abs() 1713113360 17.144 cdq_abs() 1713113360 17.555 fish_abs() 1713113360 20.900 sar_abs() 1713113360 16.464 cmovl_abs() 1713113360 17.365 cmovs_abs() 1713113360 17.345 MSVC, my code, using "own" PRNG (pretty same results as above with rand()) nothing 1305123480 5.858 abs 2955546426 6.099 simple_abs 2955546426 12.558 omid_abs 2955546426 6.119 sbb_abs 2955546426 16.233 cdq_abs 2955546426 15.372 fish_abs 2955546426 20.419 sar_abs 2955546426 16.033 cmovl_abs 2955546426 15.262 cmovs_abs 2955546426 15.272 Ditto, Gcc nothing 1305123480 5.714 abs 2955546426 12.857 simple_abs 2955546426 12.857 omid_abs 2955546426 5.879 sbb_abs 2955546426 6.484 cdq_abs 2955546426 5.824 fish_abs 2955546426 12.857 sar_abs 2955546426 6.099 cmovl_abs 2955546426 6.209 cmovs_abs 2955546426 6.264 nothing 1305123480 5.714 times are more or less reproducable. P4, 2.53 GHz. Regards, Dieter #include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> /* Note, almost everything (even C code) not really portable. Assumes arithmetic right shift for signed int, 2 complement, 32 bit ints, 32 bit long, perhaps more */ #define N_ITERATIONS 1000000000 /* Fast, but not good PRNG (shift register) */ static unsigned long sr32_seed=0x87654321UL; unsigned long sr32_rand(void) { unsigned long tmp = sr32_seed ^ (sr32_seed << 15); return sr32_seed = tmp ^ (tmp >> 17); } void seed_sr32_rand(unsigned long seed) { sr32_seed = 0x87654321UL; if (seed != 0) sr32_seed = seed; } #if 1 /* 0 to get Russel's numbers */ #define SEED(n) seed_sr32_rand(n) #define RAND_VAL() (int)sr32_rand() /* Will be pos and neg */ #else /* Russel */ #define SEED(n) srand(n) #define RAND_VAL() rand()-(RAND_MAX/2+1) #endif #define DECLARE_TEST_FUNC(name) \ unsigned long tfunc_##name(void) \ { \ unsigned long n=N_ITERATIONS; \ unsigned long sum=0; \ do \ { \ int a = RAND_VAL(); \ sum += name(a); \ } while (--n != 0); \ return sum; \ } int simple_abs (int i) { return (i >= 0) ? i : -i; } DECLARE_TEST_FUNC(simple_abs) int omid_abs (int x) { int y; y = x >> 31; return (x ^ y) - y; } DECLARE_TEST_FUNC(omid_abs) #if __GNUC__ int sbb_abs(int a) { int res, tmp; __asm__ volatile( "cmpl $0x80000000, %1 sbbl %0, %0 xorl %0, %1 subl %1, %0" : "=r&" (res), "=r&" (tmp) : "1" (a) : "cc"); return res; } #else int sbb_abs (int a) { _asm { mov ebx, [a] cmp ebx, 80000000h sbb eax, eax xor ebx, eax sub eax, ebx } } #endif DECLARE_TEST_FUNC(sbb_abs) #if __GNUC__ int cdq_abs(int a) { __asm__ volatile( "cdq xorl %%edx, %%eax subl %%edx, %%eax" : "=a" (a) : "0" (a) : "edx", "cc"); return a; } #else int cdq_abs (int a) { _asm { mov eax, [a] cdq xor eax, edx sub eax, edx } } #endif DECLARE_TEST_FUNC(cdq_abs) #if __GNUC__ int fish_abs(int a) { __asm__ volatile( "orl %0, %0 jns 1f negl %0 1:" : "=r" (a) : "0" (a) : "cc"); return a; } #else int fish_abs (int a) { _asm { mov eax, [a] or eax, eax jns fish neg eax fish: } } #endif DECLARE_TEST_FUNC(fish_abs) #if __GNUC__ int sar_abs(int a) { int res, tmp; __asm__ volatile( "movl %1, %0 sarl $31, %0 addl %0, %1 xorl %1, %0" : "=r&" (res), "=r&" (tmp) : "1" (a) : "cc"); return res; } #else int sar_abs (int a) { _asm { mov ebx, [a] mov eax,ebx sar eax,31 add ebx,eax xor eax,ebx } } #endif DECLARE_TEST_FUNC(sar_abs) #if __GNUC__ int cmovl_abs(int a) { int res; __asm__ volatile( "movl %1, %0 negl %0 cmovll %1, %0" : "=r&" (res) : "r" (a) : "cc"); return res; } #else int cmovl_abs (int a) { _asm { mov eax, [a] mov ecx, eax neg eax cmovl eax, ecx } } #endif DECLARE_TEST_FUNC(cmovl_abs) #if __GNUC__ int cmovs_abs(int a) { int res; __asm__ volatile( "movl %1, %0 negl %0 cmovsl %1, %0" : "=r&" (res) : "r" (a) : "cc"); return res; } #else int cmovs_abs (int a) { _asm { mov eax, [a] mov ecx,eax neg eax cmovs eax,ecx } } #endif DECLARE_TEST_FUNC(cmovs_abs) int nothing(int a) { return a; } DECLARE_TEST_FUNC(nothing) DECLARE_TEST_FUNC(abs) /* Don't inline this */ void time_func(unsigned long (*test_function)(), const char *name); #define TIME_FUNCTION(name) time_func(tfunc_##name, #name) int main (void) { TIME_FUNCTION(nothing); TIME_FUNCTION(abs); TIME_FUNCTION(simple_abs); TIME_FUNCTION(omid_abs); TIME_FUNCTION(sbb_abs); TIME_FUNCTION(cdq_abs); TIME_FUNCTION(fish_abs); TIME_FUNCTION(sar_abs); TIME_FUNCTION(cmovl_abs); TIME_FUNCTION(cmovs_abs); return 0; } void time_func(unsigned long (*test_function)(), const char *name) { clock_t clk; unsigned long sum; SEED(0); clk = clock(); sum = test_function(); clk = clock() - clk; printf("%14s %10u %.3f\n", name, sum, (double)clk / CLOCKS_PER_SEC); }
This page took 0.01 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.