(root)/
coreutils-9.4/
lib/
randperm.c
       1  /* Generate random permutations.
       2  
       3     Copyright (C) 2006-2023 Free Software Foundation, Inc.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation, either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Written by Paul Eggert.  */
      19  
      20  #include <config.h>
      21  
      22  #include "randperm.h"
      23  
      24  #include <limits.h>
      25  #include <stdint.h>
      26  #include <stdlib.h>
      27  
      28  #include "attribute.h"
      29  #include "count-leading-zeros.h"
      30  #include "hash.h"
      31  #include "xalloc.h"
      32  
      33  /* Return the floor of the log base 2 of N.  If N is zero, return -1.  */
      34  
      35  ATTRIBUTE_CONST static int
      36  floor_lg (size_t n)
      37  {
      38    static_assert (SIZE_WIDTH <= ULLONG_WIDTH);
      39    return (n == 0 ? -1
      40            : SIZE_WIDTH <= UINT_WIDTH
      41            ? UINT_WIDTH - 1 - count_leading_zeros (n)
      42            : SIZE_WIDTH <= ULONG_WIDTH
      43            ? ULONG_WIDTH - 1 - count_leading_zeros_l (n)
      44            : ULLONG_WIDTH - 1 - count_leading_zeros_ll (n));
      45  }
      46  
      47  /* Return an upper bound on the number of random bytes needed to
      48     generate the first H elements of a random permutation of N
      49     elements.  H must not exceed N.  */
      50  
      51  size_t
      52  randperm_bound (size_t h, size_t n)
      53  {
      54    /* Upper bound on number of bits needed to generate the first number
      55       of the permutation.  */
      56    uintmax_t lg_n = floor_lg (n) + 1;
      57  
      58    /* Upper bound on number of bits needed to generated the first H elements.  */
      59    uintmax_t ar = lg_n * h;
      60  
      61    /* Convert the bit count to a byte count.  */
      62    size_t bound = (ar + CHAR_BIT - 1) / CHAR_BIT;
      63  
      64    return bound;
      65  }
      66  
      67  /* Swap elements I and J in array V.  */
      68  
      69  static void
      70  swap (size_t *v, size_t i, size_t j)
      71  {
      72    size_t t = v[i];
      73    v[i] = v[j];
      74    v[j] = t;
      75  }
      76  
      77  /* Structures and functions for a sparse_map abstract data type that's
      78     used to effectively swap elements I and J in array V like swap(),
      79     but in a more memory efficient manner (when the number of permutations
      80     performed is significantly less than the size of the input).  */
      81  
      82  struct sparse_ent_
      83  {
      84     size_t index;
      85     size_t val;
      86  };
      87  
      88  static size_t
      89  sparse_hash_ (void const *x, size_t table_size)
      90  {
      91    struct sparse_ent_ const *ent = x;
      92    return ent->index % table_size;
      93  }
      94  
      95  static bool
      96  sparse_cmp_ (void const *x, void const *y)
      97  {
      98    struct sparse_ent_ const *ent1 = x;
      99    struct sparse_ent_ const *ent2 = y;
     100    return ent1->index == ent2->index;
     101  }
     102  
     103  typedef Hash_table sparse_map;
     104  
     105  /* Initialize the structure for the sparse map,
     106     when a best guess as to the number of entries
     107     specified with SIZE_HINT.  */
     108  
     109  static sparse_map *
     110  sparse_new (size_t size_hint)
     111  {
     112    return hash_initialize (size_hint, nullptr, sparse_hash_, sparse_cmp_, free);
     113  }
     114  
     115  /* Swap the values for I and J.  If a value is not already present
     116     then assume it's equal to the index.  Update the value for
     117     index I in array V.  */
     118  
     119  static void
     120  sparse_swap (sparse_map *sv, size_t *v, size_t i, size_t j)
     121  {
     122    struct sparse_ent_ *v1 = hash_remove (sv, &(struct sparse_ent_) {i,0});
     123    struct sparse_ent_ *v2 = hash_remove (sv, &(struct sparse_ent_) {j,0});
     124  
     125    /* FIXME: reduce the frequency of these mallocs.  */
     126    if (!v1)
     127      {
     128        v1 = xmalloc (sizeof *v1);
     129        v1->index = v1->val = i;
     130      }
     131    if (!v2)
     132      {
     133        v2 = xmalloc (sizeof *v2);
     134        v2->index = v2->val = j;
     135      }
     136  
     137    size_t t = v1->val;
     138    v1->val = v2->val;
     139    v2->val = t;
     140    if (!hash_insert (sv, v1))
     141      xalloc_die ();
     142    if (!hash_insert (sv, v2))
     143      xalloc_die ();
     144  
     145    v[i] = v1->val;
     146  }
     147  
     148  static void
     149  sparse_free (sparse_map *sv)
     150  {
     151    hash_free (sv);
     152  }
     153  
     154  
     155  /* From R, allocate and return a malloc'd array of the first H elements
     156     of a random permutation of N elements.  H must not exceed N.
     157     Return nullptr if H is zero.  */
     158  
     159  size_t *
     160  randperm_new (struct randint_source *r, size_t h, size_t n)
     161  {
     162    size_t *v;
     163  
     164    switch (h)
     165      {
     166      case 0:
     167        v = nullptr;
     168        break;
     169  
     170      case 1:
     171        v = xmalloc (sizeof *v);
     172        v[0] = randint_choose (r, n);
     173        break;
     174  
     175      default:
     176        {
     177          /* The algorithm is essentially the same in both
     178             the sparse and non sparse case.  In the sparse case we use
     179             a hash to implement sparse storage for the set of n numbers
     180             we're shuffling.  When to use the sparse method was
     181             determined with the help of this script:
     182  
     183             #!/bin/sh
     184             for n in $(seq 2 32); do
     185               for h in $(seq 2 32); do
     186                 test $h -gt $n && continue
     187                 for s in o n; do
     188                   test $s = o && shuf=shuf || shuf=./shuf
     189                   num=$(env time -f "$s:${h},${n} = %e,%M" \
     190                         $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
     191                   test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
     192                 done
     193               done
     194             done
     195  
     196             This showed that if sparseness = n/h, then:
     197  
     198             sparseness = 128 => .125 mem used, and about same speed
     199             sparseness =  64 => .25  mem used, but 1.5 times slower
     200             sparseness =  32 => .5   mem used, but 2 times slower
     201  
     202             Also the memory usage was only significant when n > 128Ki
     203          */
     204          bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
     205  
     206          size_t i;
     207          sparse_map *sv;
     208  
     209          if (sparse)
     210            {
     211              sv = sparse_new (h * 2);
     212              if (sv == nullptr)
     213                xalloc_die ();
     214              v = xnmalloc (h, sizeof *v);
     215            }
     216          else
     217            {
     218              sv = nullptr; /* To placate GCC's -Wuninitialized.  */
     219              v = xnmalloc (n, sizeof *v);
     220              for (i = 0; i < n; i++)
     221                v[i] = i;
     222            }
     223  
     224          for (i = 0; i < h; i++)
     225            {
     226              size_t j = i + randint_choose (r, n - i);
     227              if (sparse)
     228                sparse_swap (sv, v, i, j);
     229              else
     230                swap (v, i, j);
     231            }
     232  
     233          if (sparse)
     234            sparse_free (sv);
     235          else
     236            v = xnrealloc (v, h, sizeof *v);
     237        }
     238        break;
     239      }
     240  
     241    return v;
     242  }