Author: Dieter Buerssner
Date: 06:16:10 07/17/03
I use an inner loop, that just translates to a stream of move memory to register instructions (one for each access). Here are some results (source at the end of the posting, not well tested, please report the errors/flaws ...) C:\yace\vincent>dblat 300000000 Setting up a random access pattern, may take a while Finished Random access: 30.864 s, 308.640 ns/access Testing same pattern again Random access: 30.744 s, 307.440 ns/access Setting up a different random access pattern, may take a while Finished Random access: 30.735 s, 307.350 ns/access Testing same pattern again Random access: 30.724 s, 307.240 ns/access Sequential access offset 1: 0.310 s, 3.100 ns/access Sequential access offset 2: 0.601 s, 6.010 ns/access Sequential access offset 4: 1.182 s, 11.820 ns/access Sequential access offset 8: 2.263 s, 22.630 ns/access Sequential access offset 16: 4.847 s, 48.470 ns/access Sequential access offset 32: 17.315 s, 173.150 ns/access Sequential access offset 64: 16.984 s, 169.840 ns/access Sequential access offset 128: 17.475 s, 174.750 ns/access Sequential access offset 256: 18.296 s, 182.960 ns/access Sequential access offset 512: 19.688 s, 196.880 ns/access Sequential access offset 1024: 22.513 s, 225.130 ns/access Sequential access offset 2048: 23.013 s, 230.130 ns/access Sequential access offset 4096: 22.883 s, 228.830 ns/access Sequential access offset 8192: 23.603 s, 236.030 ns/access Sequential access offset -1: 0.330 s, 3.300 ns/access Sequential access offset -2: 0.620 s, 6.200 ns/access Sequential access offset -4: 1.222 s, 12.220 ns/access Sequential access offset -8: 2.453 s, 24.530 ns/access Sequential access offset -16: 4.847 s, 48.470 ns/access Sequential access offset -32: 17.355 s, 173.550 ns/access Sequential access offset -64: 16.944 s, 169.440 ns/access Sequential access offset -128: 17.455 s, 174.550 ns/access Sequential access offset -256: 18.206 s, 182.060 ns/access Sequential access offset -512: 19.538 s, 195.380 ns/access Sequential access offset -1024: 22.282 s, 222.820 ns/access Sequential access offset -2048: 22.663 s, 226.630 ns/access Sequential access offset -4096: 22.653 s, 226.530 ns/access Sequential access offset -8192: 23.444 s, 234.440 ns/access Vincent's program reports 325 ns, which is not too far off from the random access number. C:\yace\vincent>dblat 100000000 Setting up a random access pattern, may take a while Finished Random access: 24.315 s, 243.150 ns/access Testing same pattern again Random access: 24.175 s, 241.750 ns/access Setting up a different random access pattern, may take a while Finished Random access: 24.165 s, 241.650 ns/access Testing same pattern again Random access: 24.174 s, 241.740 ns/access Sequential access offset 1: 0.320 s, 3.200 ns/access Sequential access offset 2: 0.601 s, 6.010 ns/access Sequential access offset 4: 1.162 s, 11.620 ns/access Sequential access offset 8: 2.263 s, 22.630 ns/access Sequential access offset 16: 4.857 s, 48.570 ns/access Sequential access offset 32: 17.345 s, 173.450 ns/access Sequential access offset 64: 16.974 s, 169.740 ns/access Sequential access offset 128: 17.456 s, 174.560 ns/access Sequential access offset 256: 18.126 s, 181.260 ns/access Sequential access offset 512: 19.509 s, 195.090 ns/access Sequential access offset 1024: 22.252 s, 222.520 ns/access Sequential access offset 2048: 22.753 s, 227.530 ns/access Sequential access offset 4096: 22.772 s, 227.720 ns/access Sequential access offset 8192: 23.353 s, 233.530 ns/access Sequential access offset -1: 0.330 s, 3.300 ns/access Sequential access offset -2: 0.631 s, 6.310 ns/access Sequential access offset -4: 1.242 s, 12.420 ns/access Sequential access offset -8: 2.454 s, 24.540 ns/access Sequential access offset -16: 4.827 s, 48.270 ns/access Sequential access offset -32: 17.364 s, 173.640 ns/access Sequential access offset -64: 16.964 s, 169.640 ns/access Sequential access offset -128: 17.465 s, 174.650 ns/access Sequential access offset -256: 18.106 s, 181.060 ns/access Sequential access offset -512: 19.498 s, 194.980 ns/access Sequential access offset -1024: 22.262 s, 222.620 ns/access Sequential access offset -2048: 22.682 s, 226.820 ns/access Sequential access offset -4096: 22.652 s, 226.520 ns/access Sequential access offset -8192: 23.433 s, 234.330 ns/access Vincent: 256 ns Note, random access is faster than before I get similar numbers for smaller sizes bigger than the cache One final example, for everything in L2 cache: C:\yace\vincent>dblat 250000 Setting up a random access pattern, may take a while Finished Random access: 0.751 s, 7.510 ns/access Resting same pattern again Random access: 0.751 s, 7.510 ns/access Setting up a different random access pattern, may take a while Finished Random access: 0.751 s, 7.510 ns/access Testing same pattern again Random access: 0.751 s, 7.510 ns/access Sequential access offset 1: 0.100 s, 1.000 ns/access Sequential access offset 2: 0.120 s, 1.200 ns/access Sequential access offset 4: 0.180 s, 1.800 ns/access Sequential access offset 8: 0.481 s, 4.810 ns/access Sequential access offset 16: 0.751 s, 7.510 ns/access Sequential access offset 32: 0.751 s, 7.510 ns/access Sequential access offset 64: 0.771 s, 7.710 ns/access Sequential access offset 128: 0.751 s, 7.510 ns/access Sequential access offset 256: 0.762 s, 7.620 ns/access Sequential access offset 512: 0.751 s, 7.510 ns/access Sequential access offset 1024: 0.751 s, 7.510 ns/access Sequential access offset 2048: 0.761 s, 7.610 ns/access Sequential access offset 4096: 0.811 s, 8.110 ns/access Sequential access offset 8192: 0.751 s, 7.510 ns/access Sequential access offset -1: 0.120 s, 1.200 ns/access Sequential access offset -2: 0.150 s, 1.500 ns/access Sequential access offset -4: 0.381 s, 3.810 ns/access Sequential access offset -8: 0.751 s, 7.510 ns/access Sequential access offset -16: 0.741 s, 7.410 ns/access Sequential access offset -32: 0.741 s, 7.410 ns/access Sequential access offset -64: 0.741 s, 7.410 ns/access Sequential access offset -128: 0.771 s, 7.710 ns/access Sequential access offset -256: 0.741 s, 7.410 ns/access Sequential access offset -512: 0.752 s, 7.520 ns/access Sequential access offset -1024: 0.751 s, 7.510 ns/access Sequential access offset -2048: 0.751 s, 7.510 ns/access Sequential access offset -4096: 0.861 s, 8.610 ns/access Sequential access offset -8192: 0.811 s, 8.110 ns/access Summary: There is more, than just an lmbench number. Actually the comment in lmbench source suggests, that they actually wanted to get the random access times. I don't want to argue about defenition of the "real" memory latency. But for chess programs/hash the Vinent type number is the most interesting. Regards, Dieter /* dblat.c * In the current form, it will not work with memory sizes * of 4 Gb or bigger. * But it can easily be fixed, by changing the PRNG * run as "dblat memory_size_in_bytes" */ #include <stdio.h> #include <stdlib.h> #include <time.h> #define N_LOOPS 100000000UL double time_stamp(void) { /* Use the timing method, you like */ return (double)clock()/CLOCKS_PER_SEC; } void *access_loop(void **buf) { size_t n; void **p = buf; /* Unroll by 10, change to your liking */ n = N_LOOPS/10; /* We dont care about possible remainder */ do { p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; p = (void **)*p; } while (--n != 0); return (void *)p; } void time_access(const char *prompt, void **buf) { double ts = time_stamp(); access_loop(buf); ts = time_stamp()-ts; printf("%s: %7.3f s, %7.3f ns/access\n", prompt, ts, ts/N_LOOPS*1e9); } void setup_seq(void **buf, size_t n, int offset) { size_t i; for (i=0; i<n; i++) buf[i] = buf+((i+offset)%n); /* No need to opt. away the % for ini */ } /* Vinc 325, 254 */ /* PRNG Only for 32 bit pointers */ #define MY_RAND_MAX 0xffffffffUL #define MY_RAND() mwc1616() static unsigned long zseed = 0x12345678UL; static unsigned long wseed = 0x87654321UL; /* Combination of 2 multiply with carry generators, just because it does not need much source code */ static unsigned long mwc1616(void) { unsigned long t = zseed; zseed=30903*(t&0xffff)+(t>>16); t = wseed; wseed=18000*(t&0xffff)+(t>>16); return ((wseed<<16)&0xffffffffUL) + (zseed&0xffff); } /* Do it as careful as possible */ /* If you have 64 bit pointers, and unsigned long is smaller 64 bits, and you want to test memory sizes >= 4 GB, this has to be changed */ static unsigned long rand_range(unsigned long range) { unsigned long rmax, r, d; /* find the largest number rmax <= MY_RAND_MAX, for which (rmax+1) % range == 0. All returns from rand() > rmax will be skipped, to guarantee equal probability for all return values. */ d = (MY_RAND_MAX+1U-range) / range + 1; /* Note, the overflow is ok */ rmax = d * range - 1; /* -1 to avoid "overflow to zero" */ do r = MY_RAND(); while (r > rmax); return r/d; } void setup_random(void **buf, size_t n) { size_t i, r; void *tmp; setup_seq(buf, n, 1); for (i=n-1; i>0; i--) { do { r = rand_range(i+1); tmp = buf[r]; } while (tmp == buf+i); /* Can this happen? */ buf[r] = buf[i]; buf[i] = tmp; } } int main(int argc, char *argv[]) { int offset; size_t memsiz, n; void **buf; char prompt[256]; if (argc != 2) return EXIT_FAILURE; memsiz = atol(argv[1]); n = memsiz/sizeof *buf; buf = malloc(memsiz); if (buf == NULL) return EXIT_FAILURE; printf("Setting up a random access pattern, may take a while\n"); setup_random(buf, n); printf("Finished\n"); sprintf(prompt, "Random access"); time_access(prompt, buf); printf("Testing same pattern again\n"); time_access(prompt, buf); printf("Setting up a different random access pattern, may take a while\n"); setup_random(buf, n); printf("Finished\n"); time_access(prompt, buf); printf("Testing same pattern again\n"); time_access(prompt, buf); for (offset=1; offset <= 8192 && offset < n; offset*=2) { setup_seq(buf, n, offset); sprintf(prompt, "Sequential access offset %5d", offset); time_access(prompt, buf); } for (offset=-1; offset >= -8192 && -offset < n; offset*=2) { setup_seq(buf, n, offset); sprintf(prompt, "Sequential access offset %5d", offset); time_access(prompt, buf); } free(buf); return EXIT_SUCCESS; }
This page took 0.03 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.