#include #include #include /* Test a U(0,1) (uniformly distributed numbers between 0 and 1) random number generator to see if it starts repeating when it is expected to do so. If the test does not pass because the first repetition happens too soon, (the computed fr is below the expected one), this means that the random number generator is likely to be using only a subset of all possible values (for example, the numbers are not fully double precision, the last 20 bits are zero -- a VERY common mistake). If the test does not pass because the first repetition happens too late (and in many cases the hashing table will overflow), this means that the numbers are unusually uniformly spread. A common cause for this lack of randomnes comes from the use of modular methods which will go over all the elements of a field before starting to repeat. In other words, a good U(0,1) floating point number generator should pass this test. In particular if large numbers of random values will be generated and their difference matters. What follows is a brief explanation of the math involved. Gaston H. Gonnet (May 3rd, 2003) Please cite as: @techreport{ Gonnet-repeating-test-03, author = {Gaston Gonnet}, title = {Repeating Time Test for U(0,1) Random Number Generators}, month = { May }, year = {2003}, number = { }, howpublished = { Electronic publication }, copyright = {GNU General Public License}, institution = {Informatik, ETH, Zurich}, abstract = {A runnable C program to test a U(0,1) random number generator for the time of first repetition. }, URL = { http://www.inf.ethz.ch/personal/gonnet/RepetitionTest.html } } The expected time of finding a repetition (fr), assuming that all n elements are equally likely to appear is: fr := sum( n!/(n-i)!/n^i, i=0..n ); n ----- \ n! fr := ) ----------- / i ----- (n - i)! n i = 0 fr := hypergeom([1, -n], [], - 1/n) This is Knuth's Q(n) function. For large n, this is: 1/2 1/2 1/2 1/2 2 Pi fr := 1/2 2 (Pi n) + 2/3 + 1/24 ---------- - 4/135 1/n 1/2 n 1/2 1/2 1/2 1/2 2 Pi 1 139 2 Pi + 1/576 ---------- + 8/2835 ---- - ------ ---------- + ... 3/2 2 103680 5/2 n n n The inverse of this (to approximate n using fr) is 16 2 --- - 1/144 Pi fr fr 1 1 405 n = 2 --- - 8/3 ---- + 8/9 ---- - 1/6 + 8/135 ---- + -------------- Pi Pi Pi fr 2 fr 32 ---- - 1/140 Pi 1215 + --------------- + ... 3 fr The second moment of fr is fr2 := sum( (2*i+1)*n!/(n-i)!/n^i, i=0..n ); fr2 := hypergeom([1, -n], [], - 1/n) + 2 hypergeom([2, -n + 1], [], - 1/n) which simplifies to fr2 := 2*n+fr For a floating point system, n should be the same as (B-1)/DBL_EPSILON. (notice that DBL_EPSILON is the smallest value such that 1+DBL_EPSILON > 1, and hence its inverse gives the number of possible values for a fixed exponent. B is the base.) */ #define A(x) ((x)&(BATCH-1)) #define ABS(x) ((x)>=0?(x):-(x)) /* Macro to add b to a and adding the error of the addition in c */ /* Used for more accurate summations. */ #define ADD_ERR(a,b,c) { double tmpa, tmpb, tmpe; \ tmpa = a; tmpb = b;\ a += tmpb; \ if( ABS(tmpa) > ABS(tmpb) ) \ { tmpe = a-tmpa; c += tmpb-tmpe; } \ else { tmpe = a-tmpb; c += tmpa-tmpe; } } #define B FLT_RADIX #define n ((B-1)/DBL_EPSILON) #define Pi 3.1415926535897932385 #define BATCH 2048 double a[BATCH]; /***********************************/ /* parameters which can be changed */ /***********************************/ /* approximately 1.8Gb of hashing area, which is good for testing doubles with 53 bit mantissas. For other floating point systems, MAXA should be about 4/sqrt(DBL_EPSILON/(B-1)). NOTICE: Do not run on hardware with less than MAXA doubles of main memory -- it is hopeless. For a given MAXA, the hashing table may be too short with probability exp( - MAXA^2 * DBL_EPSILON / 2 / (B-1) ) */ #define MAXA (128*1824*1024) /* Number of times that the first repetition will be computed. The bigger, the longer it takes, but the more subtle problems that may be discovered. The variance of fr is quite large so very small samples are not very revealing. */ int nexp={100}; /* Lower limit of the range to be analyzed. The test studies one swath of numbers at a time. The swath is between lowerlimit and lowerlimit*B. The random numbers generated are sieved for this range. If lowerlimit is too small, expect running time O(1/lowerlimit), just due to generating enough numbers in the right range. This test does not work if different precisions are mixed. It has to be done like this. */ double lowerlimit={0.5}; /* generate random numbers in batches (of BATCH) so that we make sure that they are stored in a double in memory (and not kept in a register with extra bits that will ruin equality comparisons) */ void randbatch() { double r0, r1; int i; for( i=0; i 1 ) a[i] -= 1; else if( a[i] < 0 ) a[i] += 1; } } /******************************/ /* end of settable parameters */ /******************************/ /* Copyright (C) 2003 Gaston H. Gonnet This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ main( int argc, char *argv[] ) { int i, iter, j, k; double *h, tot1={0}, tot2={0}; double ave, var, fr, fr2, varfr, stdfr; /* initialize batch */ a[0] = 0.10347613976419651; for( i=1; i= lowerlimit && a[j] < B*lowerlimit ) { k++; if( k > MAXA-2000000 ) { printf( "exceeded hash table size (adding 10%%)\n" ); k += k/10; goto collision; /* too many iterations */ } for( i = rint( MAXA*(a[j]/lowerlimit-1) ); 1; i++ ) { if( i >= MAXA ) i -= MAXA; if( h[i] == -1.0 ) { h[i] = a[j]; break; } if( h[i] == a[j] ) goto collision; } } } } collision: tot1 += k; tot2 += (double)k * (double)k; printf( "iter %d finished, %d to go, k=%d\n", iter+1, nexp-iter-1, k ); } var = (nexp*tot2-tot1*tot1) / (nexp-1) / nexp; ave = tot1/nexp; printf( "%d tests\n", nexp ); printf( "testing random numbers between %.10g and %.10g\n", lowerlimit, B*lowerlimit ); printf( "average numbers to a collision: %.5g +- %.5g\n", ave, sqrt( var/nexp ) * 1.96 ); printf( "observed average corresponds to a size: %.5g\n", 2*ave*ave/Pi - 8/3.0/Pi*ave + (8/9.0/Pi-1/6.0) + 8/135.0/ave ); fr = 2/3.0 + 1.253314137315500251208 * sqrt(n); fr2 = 2*n + fr; varfr = fr2 - fr*fr; stdfr = sqrt(varfr); printf( "the expected first repetition is %.5g, std=%.5g\n", fr, stdfr ); if( ave >= fr-1.96*stdfr/sqrt(nexp) && ave <= fr+1.96*stdfr/sqrt(nexp) ) printf( "The random number generator passes the test !!!\n" ); else printf( "The random number generator failed the test by %.5g standard deviations\n", (ave-fr) / stdfr ); } Repeating Time Test for U(0,1) Random Number Generators

Test a U(0,1) (uniformly distributed numbers between 0 and 1) random number generator to see if it starts repeating when it is expected to do so.


by Gaston H. Gonnet

     This is running code to test a random number generator. See the comments for all details.

© 2003 by Gaston Gonnet, Informatik, ETH Zurich

Last updated on Thu May 1 22:29:26 2003 by GhG