Author: Gerd Isenberg
Date: 12:25:08 07/16/03
Go up one level in this thread
On July 16, 2003 at 14:41:45, Dieter Buerssner wrote:
>Just few comments about the thread.
>
>An interesting test would be, to do lmbench type linked list test with Vincent's
>idea of real random access. I may try it out later. No PRNG calls will be
>needed. The linked list will be initialized "pseudo randomly". In this case, it
>would mean, that it will not be too close to real random, because in one cycle
>every memory adress will be read once. (This could easily happen anyway, with
>not so decent PRNGs).
>
>An perhaps interesting comment from lmbench source:
>
>
> /*
> * First create a list of pointers.
> *
> * This used to go forwards, we want to go backwards to try and defeat
> * HP's fetch ahead.
> *
> * We really need to do a random pattern once we are doing one hit per
> * page.
> */
>
>So, the authors did not seem too confident with the sequential like access? Or
>did I misunderstand.
>
>The PRNG Vincent uses is fine. I will do some tests on it. Lagged Fibonacci type
>generators don't have problems with mod (often rand() uses a linear congruential
>generator, which can have severe problem, especially when used with mod. Anyway,
>for this sort of test, I think even very bad PRNGs would do well. There is no
>way, the hardware can guess the access pattern.
>
>Regards,
>Dieter
Hi Dieter,
what about the hashsize and the mod?
I tried Vincent's test with some loop unrolling - very strange.
int DoNrng(BITBOARD n) {
BITBOARD i,i1,i2,dummyres,nents;
int t1,t2;
nents = nentries; /* hopefully this gets into a register */
dummyres = globaldummy;
n >>= 1;
t1 = GetClock();
for (i=0; i < n; i += 2) {
i1 = RanrotA()%nents;
dummyres ^= i1;
i2 = RanrotA()%nents;
dummyres ^= i2;
}
t2 = GetClock();
globaldummy = dummyres;
return(t2-t1);
}
int DoNreads(BITBOARD n) {
BITBOARD i=0,dummyres,nents, i1, i2;
int t1,t2;
nents = nentries; /* hopefully this gets into a register */
dummyres = globaldummy;
n >>= 1;
t1 = GetClock();
for (i=0; i < n; i += 2) {
i1 = RanrotA()%nents;
dummyres ^= hashtable[i1];
i2 = RanrotA()%nents;
dummyres ^= hashtable[i2];
}
t2 = GetClock();
globaldummy = dummyres;
return(t2-t1);
}
Even more strance with more agressive unrolling ;-)
What happens here, did i made an error with unrolling here?
Cheers,
Gerd
I switched off optimization but got results like this (even with longer times):
C:\Source\latency\Release>latency 300000000 1
Welcome to RASM Latency!
RASML measures the RANDOM AVERAGE SHARED MEMORY LATENCY!
Stored in rasmexename = C:\Source\latency\Release\latency.exe
Trying to allocate 37500000 entries. In total 300000000 bytes
Benchmarking Pseudo Random Number Generator speed, RanRot type 'B'!
Speed depends upon CPU and compile options from RASML,
therefore we benchmark the RNG
Please wait a few seconds.. ..took 5368 milliseconds to generate numbers
Speed of RNG = 19076005 numbers a second
So 1 RNG call takes 52.421878 nanoseconds
Benchmarking random RNG test. Please wait..
timetaken=2424
Machine needs 48.480002 ns for RND loop
Trying to Allocate Buffer
Took 0.000 seconds to allocate Hash
Clearing hashtable
Took 0.821 seconds to clear Hash
Starting Other processes
Took 0 milliseconds to start 0 additional processes
Read latency measurement STARTS NOW using steps of 2 * 1.000 seconds :
Raw Average measured read read time at 1 processes = 155.465361 ns
Now for the final calculation it gets compensated:
Average measured read read time at 1 processes = 106.985360 ns
The assembler listing of DoNreads:
?DoNreads@@YAH_K@Z PROC NEAR ; DoNreads
; 427 : int DoNreads(BITBOARD n) {
push ebp
mov ebp, esp
sub esp, 48 ; 00000030H
push esi
; 428 : BITBOARD i,dummyres,nents, i1, i2;
; 429 : int t1,t2;
; 430 :
; 431 : nents = nentries; /* hopefully this gets into a register */
mov eax, DWORD PTR ?nentries@@3_KA
mov DWORD PTR _nents$[ebp], eax
mov ecx, DWORD PTR ?nentries@@3_KA+4
mov DWORD PTR _nents$[ebp+4], ecx
; 432 : dummyres = globaldummy;
mov edx, DWORD PTR ?globaldummy@@3_KA
mov DWORD PTR _dummyres$[ebp], edx
mov eax, DWORD PTR ?globaldummy@@3_KA+4
mov DWORD PTR _dummyres$[ebp+4], eax
; 433 :
; 434 : n >>= 1;
mov eax, DWORD PTR _n$[ebp]
mov edx, DWORD PTR _n$[ebp+4]
mov ecx, 1
call __aullshr
mov DWORD PTR _n$[ebp], eax
mov DWORD PTR _n$[ebp+4], edx
; 435 : t1 = GetClock();
call ?GetClock@@YAHXZ ; GetClock
mov DWORD PTR _t1$[ebp], eax
; 436 : for (i=0; i < n; i += 2) {
mov DWORD PTR _i$[ebp], 0
mov DWORD PTR _i$[ebp+4], 0
jmp SHORT $L43049
$L43050:
mov ecx, DWORD PTR _i$[ebp]
add ecx, 2
mov edx, DWORD PTR _i$[ebp+4]
adc edx, 0
mov DWORD PTR _i$[ebp], ecx
mov DWORD PTR _i$[ebp+4], edx
$L43049:
mov eax, DWORD PTR _i$[ebp+4]
cmp eax, DWORD PTR _n$[ebp+4]
ja $L43051
jb SHORT $L43322
mov ecx, DWORD PTR _i$[ebp]
cmp ecx, DWORD PTR _n$[ebp]
jae $L43051
$L43322:
; 437 : i1 = RanrotA()%nents;
call ?RanrotA@@YA_KXZ ; RanrotA
mov ecx, DWORD PTR _nents$[ebp+4]
push ecx
mov ecx, DWORD PTR _nents$[ebp]
push ecx
push edx
push eax
call __aullrem
mov DWORD PTR _i1$[ebp], eax
mov DWORD PTR _i1$[ebp+4], edx
; 438 : dummyres ^= hashtable[i1];
push 0
push 8
mov edx, DWORD PTR _i1$[ebp+4]
push edx
mov eax, DWORD PTR _i1$[ebp]
push eax
call __allmul
mov ecx, DWORD PTR ?hashtable@@3PA_KA ; hashtable
mov edx, DWORD PTR _dummyres$[ebp]
xor edx, DWORD PTR [ecx+eax]
mov esi, DWORD PTR _dummyres$[ebp+4]
xor esi, DWORD PTR [ecx+eax+4]
mov DWORD PTR _dummyres$[ebp], edx
mov DWORD PTR _dummyres$[ebp+4], esi
; 439 : i2 = RanrotA()%nents;
call ?RanrotA@@YA_KXZ ; RanrotA
mov ecx, DWORD PTR _nents$[ebp+4]
push ecx
mov ecx, DWORD PTR _nents$[ebp]
push ecx
push edx
push eax
call __aullrem
mov DWORD PTR _i2$[ebp], eax
mov DWORD PTR _i2$[ebp+4], edx
; 440 : dummyres ^= hashtable[i2];
push 0
push 8
mov edx, DWORD PTR _i2$[ebp+4]
push edx
mov eax, DWORD PTR _i2$[ebp]
push eax
call __allmul
mov ecx, DWORD PTR ?hashtable@@3PA_KA ; hashtable
mov edx, DWORD PTR _dummyres$[ebp]
xor edx, DWORD PTR [ecx+eax]
mov esi, DWORD PTR _dummyres$[ebp+4]
xor esi, DWORD PTR [ecx+eax+4]
mov DWORD PTR _dummyres$[ebp], edx
mov DWORD PTR _dummyres$[ebp+4], esi
; 441 : }
jmp $L43050
$L43051:
; 442 : t2 = GetClock();
call ?GetClock@@YAHXZ ; GetClock
mov DWORD PTR _t2$[ebp], eax
; 443 : globaldummy = dummyres;
mov eax, DWORD PTR _dummyres$[ebp]
mov DWORD PTR ?globaldummy@@3_KA, eax
mov ecx, DWORD PTR _dummyres$[ebp+4]
mov DWORD PTR ?globaldummy@@3_KA+4, ecx
; 444 : return(t2-t1);
mov eax, DWORD PTR _t2$[ebp]
sub eax, DWORD PTR _t1$[ebp]
; 445 : }
pop esi
mov esp, ebp
pop ebp
ret 0
?DoNreads@@YAH_K@Z ENDP ; DoNreads
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.