Author: Dieter Buerssner
Date: 17:32:05 07/05/03
Go up one level in this thread
On July 05, 2003 at 16:16:54, Russell Reagan wrote:
>int sbb_abs (int a) {
> _asm {
> mov ebx, [a]
> cmp ebx, 8000h
^^^^^
> sbb eax, eax
> xor ebx, eax
> sub eax, ebx
> }
>}
Should be 80000000h
I get a very strange table, when I let MSVC inline - omid is *much* faster than
nothing! And this reproducable. And totally unexplainable by looking at the
generated assembly. Very strange.
Well, I thought, I recode it again. To make sure, that all functions are handled
exactly the same, I used some macro tricks. Also made the inner loop as tight as
possible (see code later, which also includes GCC-assembly), so that the
optimizer should have the easiest way to figure out what to do. Same result ...
(but not with Gcc, there anything is slower than nothing). I can only supsect
some strange pairing between rand() and the rest. I included a small PRNG, then
the results look more reasonable. One can also see, that on my computer, the
register setup stuff needed for MSVC inlining gives an significant overhead,
much more than on your or Gerd's computer. Gcc results are very consistent (abs,
simple_abs and fish_abs are the same, my gcc does not use tricks for abs). No
overhead for the register setup with inline assembly.
MSVC, Russel's code, -Ox2 -Ob2 -G6 -Gr -GF
nothing 3951541892 13.309
abs() 1713113360 14.400
simple_abs() 1713113360 17.936
omid_abs() 1713113360 7.932 !!! Yes, reprocucable
sbb_abs() 1713113360 17.144
cdq_abs() 1713113360 17.555
fish_abs() 1713113360 20.900
sar_abs() 1713113360 16.464
cmovl_abs() 1713113360 17.365
cmovs_abs() 1713113360 17.345
MSVC, my code, using "own" PRNG (pretty same results as above with rand())
nothing 1305123480 5.858
abs 2955546426 6.099
simple_abs 2955546426 12.558
omid_abs 2955546426 6.119
sbb_abs 2955546426 16.233
cdq_abs 2955546426 15.372
fish_abs 2955546426 20.419
sar_abs 2955546426 16.033
cmovl_abs 2955546426 15.262
cmovs_abs 2955546426 15.272
Ditto, Gcc
nothing 1305123480 5.714
abs 2955546426 12.857
simple_abs 2955546426 12.857
omid_abs 2955546426 5.879
sbb_abs 2955546426 6.484
cdq_abs 2955546426 5.824
fish_abs 2955546426 12.857
sar_abs 2955546426 6.099
cmovl_abs 2955546426 6.209
cmovs_abs 2955546426 6.264
nothing 1305123480 5.714
times are more or less reproducable. P4, 2.53 GHz.
Regards,
Dieter
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
/* Note, almost everything (even C code) not really portable.
Assumes arithmetic right shift for signed int, 2 complement,
32 bit ints, 32 bit long, perhaps more */
#define N_ITERATIONS 1000000000
/* Fast, but not good PRNG (shift register) */
static unsigned long sr32_seed=0x87654321UL;
unsigned long sr32_rand(void)
{
unsigned long tmp = sr32_seed ^ (sr32_seed << 15);
return sr32_seed = tmp ^ (tmp >> 17);
}
void seed_sr32_rand(unsigned long seed)
{
sr32_seed = 0x87654321UL;
if (seed != 0)
sr32_seed = seed;
}
#if 1 /* 0 to get Russel's numbers */
#define SEED(n) seed_sr32_rand(n)
#define RAND_VAL() (int)sr32_rand() /* Will be pos and neg */
#else
/* Russel */
#define SEED(n) srand(n)
#define RAND_VAL() rand()-(RAND_MAX/2+1)
#endif
#define DECLARE_TEST_FUNC(name) \
unsigned long tfunc_##name(void) \
{ \
unsigned long n=N_ITERATIONS; \
unsigned long sum=0; \
do \
{ \
int a = RAND_VAL(); \
sum += name(a); \
} while (--n != 0); \
return sum; \
}
int simple_abs (int i) {
return (i >= 0) ? i : -i;
}
DECLARE_TEST_FUNC(simple_abs)
int omid_abs (int x) {
int y;
y = x >> 31;
return (x ^ y) - y;
}
DECLARE_TEST_FUNC(omid_abs)
#if __GNUC__
int sbb_abs(int a)
{
int res, tmp;
__asm__ volatile(
"cmpl $0x80000000, %1
sbbl %0, %0
xorl %0, %1
subl %1, %0"
: "=r&" (res), "=r&" (tmp)
: "1" (a)
: "cc");
return res;
}
#else
int sbb_abs (int a) {
_asm {
mov ebx, [a]
cmp ebx, 80000000h
sbb eax, eax
xor ebx, eax
sub eax, ebx
}
}
#endif
DECLARE_TEST_FUNC(sbb_abs)
#if __GNUC__
int cdq_abs(int a)
{
__asm__ volatile(
"cdq
xorl %%edx, %%eax
subl %%edx, %%eax"
: "=a" (a)
: "0" (a)
: "edx", "cc");
return a;
}
#else
int cdq_abs (int a) {
_asm {
mov eax, [a]
cdq
xor eax, edx
sub eax, edx
}
}
#endif
DECLARE_TEST_FUNC(cdq_abs)
#if __GNUC__
int fish_abs(int a)
{
__asm__ volatile(
"orl %0, %0
jns 1f
negl %0
1:"
: "=r" (a)
: "0" (a)
: "cc");
return a;
}
#else
int fish_abs (int a) {
_asm {
mov eax, [a]
or eax, eax
jns fish
neg eax
fish:
}
}
#endif
DECLARE_TEST_FUNC(fish_abs)
#if __GNUC__
int sar_abs(int a)
{
int res, tmp;
__asm__ volatile(
"movl %1, %0
sarl $31, %0
addl %0, %1
xorl %1, %0"
: "=r&" (res), "=r&" (tmp)
: "1" (a)
: "cc");
return res;
}
#else
int sar_abs (int a) {
_asm {
mov ebx, [a]
mov eax,ebx
sar eax,31
add ebx,eax
xor eax,ebx
}
}
#endif
DECLARE_TEST_FUNC(sar_abs)
#if __GNUC__
int cmovl_abs(int a)
{
int res;
__asm__ volatile(
"movl %1, %0
negl %0
cmovll %1, %0"
: "=r&" (res)
: "r" (a)
: "cc");
return res;
}
#else
int cmovl_abs (int a) {
_asm {
mov eax, [a]
mov ecx, eax
neg eax
cmovl eax, ecx
}
}
#endif
DECLARE_TEST_FUNC(cmovl_abs)
#if __GNUC__
int cmovs_abs(int a)
{
int res;
__asm__ volatile(
"movl %1, %0
negl %0
cmovsl %1, %0"
: "=r&" (res)
: "r" (a)
: "cc");
return res;
}
#else
int cmovs_abs (int a) {
_asm {
mov eax, [a]
mov ecx,eax
neg eax
cmovs eax,ecx
}
}
#endif
DECLARE_TEST_FUNC(cmovs_abs)
int nothing(int a)
{
return a;
}
DECLARE_TEST_FUNC(nothing)
DECLARE_TEST_FUNC(abs)
/* Don't inline this */
void time_func(unsigned long (*test_function)(), const char *name);
#define TIME_FUNCTION(name) time_func(tfunc_##name, #name)
int main (void)
{
TIME_FUNCTION(nothing);
TIME_FUNCTION(abs);
TIME_FUNCTION(simple_abs);
TIME_FUNCTION(omid_abs);
TIME_FUNCTION(sbb_abs);
TIME_FUNCTION(cdq_abs);
TIME_FUNCTION(fish_abs);
TIME_FUNCTION(sar_abs);
TIME_FUNCTION(cmovl_abs);
TIME_FUNCTION(cmovs_abs);
return 0;
}
void time_func(unsigned long (*test_function)(), const char *name)
{
clock_t clk;
unsigned long sum;
SEED(0);
clk = clock();
sum = test_function();
clk = clock() - clk;
printf("%14s %10u %.3f\n", name, sum, (double)clk / CLOCKS_PER_SEC);
}
This page took 0.02 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.