1  /* stat.c -- statistical tests of random number sequences. */
       2  
       3  /*
       4  Copyright 1999, 2000 Free Software Foundation, Inc.
       5  
       6  This file is part of the GNU MP Library test suite.
       7  
       8  The GNU MP Library test suite is free software; you can redistribute it
       9  and/or modify it under the terms of the GNU General Public License as
      10  published by the Free Software Foundation; either version 3 of the License,
      11  or (at your option) any later version.
      12  
      13  The GNU MP Library test suite is distributed in the hope that it will be
      14  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
      15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
      16  Public License for more details.
      17  
      18  You should have received a copy of the GNU General Public License along with
      19  the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
      20  
      21  /* Examples:
      22  
      23    $ gen 1000 | stat
      24  Test 1000 real numbers.
      25  
      26    $ gen 30000 | stat -2 1000
      27  Test 1000 real numbers 30 times and then test the 30 results in a
      28  ``second level''.
      29  
      30    $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
      31  Test 1000 integers 0 <= X <= 2^32-1.
      32  
      33    $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
      34  Test 1000 integers 0 <= X <= 2^34-1.
      35  
      36  */
      37  
      38  #include <stdio.h>
      39  #include <stdlib.h>
      40  #include <unistd.h>
      41  #include <math.h>
      42  #include "gmpstat.h"
      43  
      44  #if !HAVE_DECL_OPTARG
      45  extern char *optarg;
      46  extern int optind, opterr;
      47  #endif
      48  
      49  #define FVECSIZ (100000L)
      50  
      51  int g_debug = 0;
      52  
      53  static void
      54  print_ks_results (mpf_t f_p, mpf_t f_p_prob,
      55  		  mpf_t f_m, mpf_t f_m_prob,
      56  		  FILE *fp)
      57  {
      58    double p, pp, m, mp;
      59  
      60    p = mpf_get_d (f_p);
      61    m = mpf_get_d (f_m);
      62    pp = mpf_get_d (f_p_prob);
      63    mp = mpf_get_d (f_m_prob);
      64  
      65    fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
      66    fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
      67  }
      68  
      69  static void
      70  print_x2_table (unsigned int v, FILE *fp)
      71  {
      72    double t[7];
      73    int f;
      74  
      75  
      76    fprintf (fp, "Chi-square table for v=%u\n", v);
      77    fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
      78    x2_table (t, v);
      79    for (f = 0; f < 7; f++)
      80      fprintf (fp, "%.2f\t", t[f]);
      81    fputs ("\n", fp);
      82  }
      83  
      84  
      85  
      86  /* Pks () -- Distribution function for KS results with a big n (like 1000
      87     or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
      88  /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
      89  
      90  static void
      91  Pks (mpf_t p, mpf_t x)
      92  {
      93    double dt;			/* temp double */
      94  
      95    mpf_set (p, x);
      96    mpf_mul (p, p, p);		/* p = x^2 */
      97    mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
      98    mpf_neg (p, p);		/* p = -2*x^2 */
      99    /* No pow() in gmp.  Use doubles. */
     100    /* FIXME: Use exp()? */
     101    dt = pow (M_E, mpf_get_d (p));
     102    mpf_set_d (p, dt);
     103    mpf_ui_sub (p, 1, p);
     104  }
     105  
     106  /* f_freq() -- frequency test on real numbers 0<=f<1*/
     107  static void
     108  f_freq (const unsigned l1runs, const unsigned l2runs,
     109  	mpf_t fvec[], const unsigned long n)
     110  {
     111    unsigned f;
     112    mpf_t f_p, f_p_prob;
     113    mpf_t f_m, f_m_prob;
     114    mpf_t *l1res;			/* level 1 result array */
     115  
     116    mpf_init (f_p);  mpf_init (f_m);
     117    mpf_init (f_p_prob);  mpf_init (f_m_prob);
     118  
     119  
     120    /* Allocate space for 1st level results. */
     121    l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
     122    if (NULL == l1res)
     123      {
     124        fprintf (stderr, "stat: malloc failure\n");
     125        exit (1);
     126      }
     127  
     128    printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
     129    printf ("\tKp\t\tKm\n");
     130  
     131    for (f = 0; f < l2runs; f++)
     132      {
     133        /*  f_printvec (fvec, n); */
     134        mpf_freqt (f_p, f_m, fvec + f * n, n);
     135  
     136        /* what's the probability of getting these results? */
     137        ks_table (f_p_prob, f_p, n);
     138        ks_table (f_m_prob, f_m, n);
     139  
     140        if (l1runs == 0)
     141  	{
     142  	  /*printf ("%u:\t", f + 1);*/
     143  	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
     144  	}
     145        else
     146  	{
     147  	  /* save result */
     148  	  mpf_init_set (l1res[f], f_p);
     149  	  mpf_init_set (l1res[f + l2runs], f_m);
     150  	}
     151      }
     152  
     153    /* Now, apply the KS test on the results from the 1st level rounds
     154       with the distribution
     155       F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */
     156  
     157    if (l1runs != 0)
     158      {
     159        /*printf ("-------------------------------------\n");*/
     160  
     161        /* The Kp's. */
     162        ks (f_p, f_m, l1res, Pks, l2runs);
     163        ks_table (f_p_prob, f_p, l2runs);
     164        ks_table (f_m_prob, f_m, l2runs);
     165        printf ("Kp:\t");
     166        print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
     167  
     168        /* The Km's. */
     169        ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
     170        ks_table (f_p_prob, f_p, l2runs);
     171        ks_table (f_m_prob, f_m, l2runs);
     172        printf ("Km:\t");
     173        print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
     174      }
     175  
     176    mpf_clear (f_p);  mpf_clear (f_m);
     177    mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
     178    free (l1res);
     179  }
     180  
     181  /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
     182     0<=z<=MAX */
     183  static void
     184  z_freq (const unsigned l1runs,
     185  	const unsigned l2runs,
     186  	mpz_t zvec[],
     187  	const unsigned long n,
     188  	unsigned int max)
     189  {
     190    mpf_t V;			/* result */
     191    double d_V;			/* result as a double */
     192  
     193    mpf_init (V);
     194  
     195  
     196    printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
     197    print_x2_table (max, stdout);
     198  
     199    mpz_freqt (V, zvec, max, n);
     200  
     201    d_V = mpf_get_d (V);
     202    printf ("V = %.2f (n = %lu)\n", d_V, n);
     203  
     204    mpf_clear (V);
     205  }
     206  
     207  unsigned int stat_debug = 0;
     208  
     209  int
     210  main (argc, argv)
     211       int argc;
     212       char *argv[];
     213  {
     214    const char usage[] =
     215      "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
     216      "       file     filename\n" \
     217      "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
     218      "       -d       increase debugging level\n" \
     219      "       -i max   input is integers 0 <= Z <= MAX\n" \
     220      "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
     221      "                maximum value when converting real numbers to integers\n" \
     222      "";
     223  
     224    mpf_t fvec[FVECSIZ];
     225    mpz_t zvec[FVECSIZ];
     226    unsigned long int f, n, vecentries;
     227    char *filen;
     228    FILE *fp;
     229    int c;
     230    int omitoutput = 0;
     231    int realinput = -1;		/* 1: input is real numbers 0<=R<1;
     232  				   0: input is integers 0 <= Z <= MAX. */
     233    long l1runs = 0,		/* 1st level runs */
     234      l2runs = 1;			/* 2nd level runs */
     235    mpf_t f_temp;
     236    mpz_t z_imax;			/* max value when converting between
     237  				   real number and integer. */
     238    mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
     239  				   convenience */
     240    mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
     241  				   convenience */
     242  
     243  
     244    mpf_init (f_temp);
     245    mpz_init_set_ui (z_imax, 0x7fffffff);
     246    mpf_init (f_imax_plus1);
     247    mpf_init (f_imax_minus1);
     248  
     249    while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
     250      switch (c)
     251        {
     252        case '2':
     253  	l1runs = atol (optarg);
     254  	l2runs = -1;		/* set later on */
     255  	break;
     256        case 'd':			/* increase debug level */
     257  	stat_debug++;
     258  	break;
     259        case 'i':
     260  	if (1 == realinput)
     261  	  {
     262  	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
     263  	    exit (1);
     264  	  }
     265  	if (mpz_set_str (z_imax, optarg, 0))
     266  	  {
     267  	    fprintf (stderr, "stat: bad max value %s\n", optarg);
     268  	    exit (1);
     269  	  }
     270  	realinput = 0;
     271  	break;
     272        case 'r':
     273  	if (0 == realinput)
     274  	  {
     275  	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
     276  	    exit (1);
     277  	  }
     278  	if (mpz_set_str (z_imax, optarg, 0))
     279  	  {
     280  	    fprintf (stderr, "stat: bad max value %s\n", optarg);
     281  	    exit (1);
     282  	  }
     283  	realinput = 1;
     284  	break;
     285        case 'o':
     286  	omitoutput = atoi (optarg);
     287  	break;
     288        case '?':
     289        default:
     290  	fputs (usage, stderr);
     291  	exit (1);
     292        }
     293    argc -= optind;
     294    argv += optind;
     295  
     296    if (argc < 1)
     297      fp = stdin;
     298    else
     299      filen = argv[0];
     300  
     301    if (fp != stdin)
     302      if (NULL == (fp = fopen (filen, "r")))
     303        {
     304  	perror (filen);
     305  	exit (1);
     306        }
     307  
     308    if (-1 == realinput)
     309      realinput = 1;		/* default is real numbers */
     310  
     311    /* read file and fill appropriate vec */
     312    if (1 == realinput)		/* real input */
     313      {
     314        for (f = 0; f < FVECSIZ ; f++)
     315  	{
     316  	  mpf_init (fvec[f]);
     317  	  if (!mpf_inp_str (fvec[f], fp, 10))
     318  	    break;
     319  	}
     320      }
     321    else				/* integer input */
     322      {
     323        for (f = 0; f < FVECSIZ ; f++)
     324  	{
     325  	  mpz_init (zvec[f]);
     326  	  if (!mpz_inp_str (zvec[f], fp, 10))
     327  	    break;
     328  	}
     329      }
     330    vecentries = n = f;		/* number of entries read */
     331    fclose (fp);
     332  
     333    if (FVECSIZ == f)
     334      fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
     335  	     "of only %ld entries.  sorry.\n", FVECSIZ);
     336  
     337    printf ("Got %lu numbers.\n", n);
     338  
     339    /* convert and fill the other vec */
     340    /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
     341       0<=z<=imax and we are truncating all fractions when
     342       converting float to int, we have to add 1 to imax.*/
     343    mpf_set_z (f_imax_plus1, z_imax);
     344    mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
     345    if (1 == realinput)		/* fill zvec[] */
     346      {
     347        for (f = 0; f < n; f++)
     348  	{
     349  	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
     350  	  mpz_init (zvec[f]);
     351  	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
     352  	  if (stat_debug > 1)
     353  	    {
     354  	      mpz_out_str (stderr, 10, zvec[f]);
     355  	      fputs ("\n", stderr);
     356  	    }
     357  	}
     358      }
     359    else				/* integer input; fill fvec[] */
     360      {
     361        /*    mpf_set_z (f_imax_minus1, z_imax);
     362  	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
     363        for (f = 0; f < n; f++)
     364  	{
     365  	  mpf_init (fvec[f]);
     366  	  mpf_set_z (fvec[f], zvec[f]);
     367  	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
     368  	  if (stat_debug > 1)
     369  	    {
     370  	      mpf_out_str (stderr, 10, 0, fvec[f]);
     371  	      fputs ("\n", stderr);
     372  	    }
     373  	}
     374      }
     375  
     376    /* 2 levels? */
     377    if (1 != l2runs)
     378      {
     379        l2runs = n / l1runs;
     380        printf ("Doing %ld second level rounds "\
     381  	      "with %ld entries in each round", l2runs, l1runs);
     382        if (n % l1runs)
     383  	printf (" (discarding %ld entr%s)", n % l1runs,
     384  		n % l1runs == 1 ? "y" : "ies");
     385        puts (".");
     386        n = l1runs;
     387      }
     388  
     389  #ifndef DONT_FFREQ
     390    f_freq (l1runs, l2runs, fvec, n);
     391  #endif
     392  #ifdef DO_ZFREQ
     393    z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
     394  #endif
     395  
     396    mpf_clear (f_temp); mpz_clear (z_imax);
     397    mpf_clear (f_imax_plus1);
     398    mpf_clear (f_imax_minus1);
     399    for (f = 0; f < vecentries; f++)
     400      {
     401        mpf_clear (fvec[f]);
     402        mpz_clear (zvec[f]);
     403      }
     404  
     405    return 0;
     406  }