(root)/
binutils-2.41/
zlib/
examples/
enough.c
       1  /* enough.c -- determine the maximum size of inflate's Huffman code tables over
       2   * all possible valid and complete prefix codes, subject to a length limit.
       3   * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler
       4   * Version 1.5  5 August 2018  Mark Adler
       5   */
       6  
       7  /* Version history:
       8     1.0   3 Jan 2007  First version (derived from codecount.c version 1.4)
       9     1.1   4 Jan 2007  Use faster incremental table usage computation
      10                       Prune examine() search on previously visited states
      11     1.2   5 Jan 2007  Comments clean up
      12                       As inflate does, decrease root for short codes
      13                       Refuse cases where inflate would increase root
      14     1.3  17 Feb 2008  Add argument for initial root table size
      15                       Fix bug for initial root table size == max - 1
      16                       Use a macro to compute the history index
      17     1.4  18 Aug 2012  Avoid shifts more than bits in type (caused endless loop!)
      18                       Clean up comparisons of different types
      19                       Clean up code indentation
      20     1.5   5 Aug 2018  Clean up code style, formatting, and comments
      21                       Show all the codes for the maximum, and only the maximum
      22   */
      23  
      24  /*
      25     Examine all possible prefix codes for a given number of symbols and a
      26     maximum code length in bits to determine the maximum table size for zlib's
      27     inflate. Only complete prefix codes are counted.
      28  
      29     Two codes are considered distinct if the vectors of the number of codes per
      30     length are not identical. So permutations of the symbol assignments result
      31     in the same code for the counting, as do permutations of the assignments of
      32     the bit values to the codes (i.e. only canonical codes are counted).
      33  
      34     We build a code from shorter to longer lengths, determining how many symbols
      35     are coded at each length. At each step, we have how many symbols remain to
      36     be coded, what the last code length used was, and how many bit patterns of
      37     that length remain unused. Then we add one to the code length and double the
      38     number of unused patterns to graduate to the next code length. We then
      39     assign all portions of the remaining symbols to that code length that
      40     preserve the properties of a correct and eventually complete code. Those
      41     properties are: we cannot use more bit patterns than are available; and when
      42     all the symbols are used, there are exactly zero possible bit patterns left
      43     unused.
      44  
      45     The inflate Huffman decoding algorithm uses two-level lookup tables for
      46     speed. There is a single first-level table to decode codes up to root bits
      47     in length (root == 9 for literal/length codes and root == 6 for distance
      48     codes, in the current inflate implementation). The base table has 1 << root
      49     entries and is indexed by the next root bits of input. Codes shorter than
      50     root bits have replicated table entries, so that the correct entry is
      51     pointed to regardless of the bits that follow the short code. If the code is
      52     longer than root bits, then the table entry points to a second-level table.
      53     The size of that table is determined by the longest code with that root-bit
      54     prefix. If that longest code has length len, then the table has size 1 <<
      55     (len - root), to index the remaining bits in that set of codes. Each
      56     subsequent root-bit prefix then has its own sub-table. The total number of
      57     table entries required by the code is calculated incrementally as the number
      58     of codes at each bit length is populated. When all of the codes are shorter
      59     than root bits, then root is reduced to the longest code length, resulting
      60     in a single, smaller, one-level table.
      61  
      62     The inflate algorithm also provides for small values of root (relative to
      63     the log2 of the number of symbols), where the shortest code has more bits
      64     than root. In that case, root is increased to the length of the shortest
      65     code. This program, by design, does not handle that case, so it is verified
      66     that the number of symbols is less than 1 << (root + 1).
      67  
      68     In order to speed up the examination (by about ten orders of magnitude for
      69     the default arguments), the intermediate states in the build-up of a code
      70     are remembered and previously visited branches are pruned. The memory
      71     required for this will increase rapidly with the total number of symbols and
      72     the maximum code length in bits. However this is a very small price to pay
      73     for the vast speedup.
      74  
      75     First, all of the possible prefix codes are counted, and reachable
      76     intermediate states are noted by a non-zero count in a saved-results array.
      77     Second, the intermediate states that lead to (root + 1) bit or longer codes
      78     are used to look at all sub-codes from those junctures for their inflate
      79     memory usage. (The amount of memory used is not affected by the number of
      80     codes of root bits or less in length.)  Third, the visited states in the
      81     construction of those sub-codes and the associated calculation of the table
      82     size is recalled in order to avoid recalculating from the same juncture.
      83     Beginning the code examination at (root + 1) bit codes, which is enabled by
      84     identifying the reachable nodes, accounts for about six of the orders of
      85     magnitude of improvement for the default arguments. About another four
      86     orders of magnitude come from not revisiting previous states. Out of
      87     approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes
      88     need to be examined to cover all of the possible table memory usage cases
      89     for the default arguments of 286 symbols limited to 15-bit codes.
      90  
      91     Note that the uintmax_t type is used for counting. It is quite easy to
      92     exceed the capacity of an eight-byte integer with a large number of symbols
      93     and a large maximum code length, so multiple-precision arithmetic would need
      94     to replace the integer arithmetic in that case. This program will abort if
      95     an overflow occurs. The big_t type identifies where the counting takes
      96     place.
      97  
      98     The uintmax_t type is also used for calculating the number of possible codes
      99     remaining at the maximum length. This limits the maximum code length to the
     100     number of bits in a long long minus the number of bits needed to represent
     101     the symbols in a flat code. The code_t type identifies where the bit-pattern
     102     counting takes place.
     103   */
     104  
     105  #include <stdio.h>
     106  #include <stdlib.h>
     107  #include <string.h>
     108  #include <stdarg.h>
     109  #include <stdint.h>
     110  #include <assert.h>
     111  
     112  #define local static
     113  
     114  // Special data types.
     115  typedef uintmax_t big_t;    // type for code counting
     116  #define PRIbig "ju"         // printf format for big_t
     117  typedef uintmax_t code_t;   // type for bit pattern counting
     118  struct tab {                // type for been-here check
     119      size_t len;             // allocated length of bit vector in octets
     120      char *vec;              // allocated bit vector
     121  };
     122  
     123  /* The array for saving results, num[], is indexed with this triplet:
     124  
     125        syms: number of symbols remaining to code
     126        left: number of available bit patterns at length len
     127        len: number of bits in the codes currently being assigned
     128  
     129     Those indices are constrained thusly when saving results:
     130  
     131        syms: 3..totsym (totsym == total symbols to code)
     132        left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
     133        len: 1..max - 1 (max == maximum code length in bits)
     134  
     135     syms == 2 is not saved since that immediately leads to a single code. left
     136     must be even, since it represents the number of available bit patterns at
     137     the current length, which is double the number at the previous length. left
     138     ends at syms-1 since left == syms immediately results in a single code.
     139     (left > sym is not allowed since that would result in an incomplete code.)
     140     len is less than max, since the code completes immediately when len == max.
     141  
     142     The offset into the array is calculated for the three indices with the first
     143     one (syms) being outermost, and the last one (len) being innermost. We build
     144     the array with length max-1 lists for the len index, with syms-3 of those
     145     for each symbol. There are totsym-2 of those, with each one varying in
     146     length as a function of sym. See the calculation of index in map() for the
     147     index, and the calculation of size in main() for the size of the array.
     148  
     149     For the deflate example of 286 symbols limited to 15-bit codes, the array
     150     has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half
     151     of the space allocated for saved results is actually used -- not all
     152     possible triplets are reached in the generation of valid prefix codes.
     153   */
     154  
     155  /* The array for tracking visited states, done[], is itself indexed identically
     156     to the num[] array as described above for the (syms, left, len) triplet.
     157     Each element in the array is further indexed by the (mem, rem) doublet,
     158     where mem is the amount of inflate table space used so far, and rem is the
     159     remaining unused entries in the current inflate sub-table. Each indexed
     160     element is simply one bit indicating whether the state has been visited or
     161     not. Since the ranges for mem and rem are not known a priori, each bit
     162     vector is of a variable size, and grows as needed to accommodate the visited
     163     states. mem and rem are used to calculate a single index in a triangular
     164     array. Since the range of mem is expected in the default case to be about
     165     ten times larger than the range of rem, the array is skewed to reduce the
     166     memory usage, with eight times the range for mem than for rem. See the
     167     calculations for offset and bit in been_here() for the details.
     168  
     169     For the deflate example of 286 symbols limited to 15-bit codes, the bit
     170     vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself.
     171   */
     172  
     173  // Type for a variable-length, allocated string.
     174  typedef struct {
     175      char *str;          // pointer to allocated string
     176      size_t size;        // size of allocation
     177      size_t len;         // length of string, not including terminating zero
     178  } string_t;
     179  
     180  // Clear a string_t.
     181  local void string_clear(string_t *s) {
     182      s->str[0] = 0;
     183      s->len = 0;
     184  }
     185  
     186  // Initialize a string_t.
     187  local void string_init(string_t *s) {
     188      s->size = 16;
     189      s->str = malloc(s->size);
     190      assert(s->str != NULL && "out of memory");
     191      string_clear(s);
     192  }
     193  
     194  // Release the allocation of a string_t.
     195  local void string_free(string_t *s) {
     196      free(s->str);
     197      s->str = NULL;
     198      s->size = 0;
     199      s->len = 0;
     200  }
     201  
     202  // Save the results of printf with fmt and the subsequent argument list to s.
     203  // Each call appends to s. The allocated space for s is increased as needed.
     204  local void string_printf(string_t *s, char *fmt, ...) {
     205      va_list ap;
     206      va_start(ap, fmt);
     207      size_t len = s->len;
     208      int ret = vsnprintf(s->str + len, s->size - len, fmt, ap);
     209      assert(ret >= 0 && "out of memory");
     210      s->len += ret;
     211      if (s->size < s->len + 1) {
     212          do {
     213              s->size <<= 1;
     214              assert(s->size != 0 && "overflow");
     215          } while (s->size < s->len + 1);
     216          s->str = realloc(s->str, s->size);
     217          assert(s->str != NULL && "out of memory");
     218          vsnprintf(s->str + len, s->size - len, fmt, ap);
     219      }
     220      va_end(ap);
     221  }
     222  
     223  // Globals to avoid propagating constants or constant pointers recursively.
     224  struct {
     225      int max;            // maximum allowed bit length for the codes
     226      int root;           // size of base code table in bits
     227      int large;          // largest code table so far
     228      size_t size;        // number of elements in num and done
     229      big_t tot;          // total number of codes with maximum tables size
     230      string_t out;       // display of subcodes for maximum tables size
     231      int *code;          // number of symbols assigned to each bit length
     232      big_t *num;         // saved results array for code counting
     233      struct tab *done;   // states already evaluated array
     234  } g;
     235  
     236  // Index function for num[] and done[].
     237  local inline size_t map(int syms, int left, int len) {
     238      return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) +
     239              (left >> 1) - 1) * (g.max - 1) +
     240             len - 1;
     241  }
     242  
     243  // Free allocated space in globals.
     244  local void cleanup(void) {
     245      if (g.done != NULL) {
     246          for (size_t n = 0; n < g.size; n++)
     247              if (g.done[n].len)
     248                  free(g.done[n].vec);
     249          g.size = 0;
     250          free(g.done);   g.done = NULL;
     251      }
     252      free(g.num);    g.num = NULL;
     253      free(g.code);   g.code = NULL;
     254      string_free(&g.out);
     255  }
     256  
     257  // Return the number of possible prefix codes using bit patterns of lengths len
     258  // through max inclusive, coding syms symbols, with left bit patterns of length
     259  // len unused -- return -1 if there is an overflow in the counting. Keep a
     260  // record of previous results in num to prevent repeating the same calculation.
     261  local big_t count(int syms, int left, int len) {
     262      // see if only one possible code
     263      if (syms == left)
     264          return 1;
     265  
     266      // note and verify the expected state
     267      assert(syms > left && left > 0 && len < g.max);
     268  
     269      // see if we've done this one already
     270      size_t index = map(syms, left, len);
     271      big_t got = g.num[index];
     272      if (got)
     273          return got;         // we have -- return the saved result
     274  
     275      // we need to use at least this many bit patterns so that the code won't be
     276      // incomplete at the next length (more bit patterns than symbols)
     277      int least = (left << 1) - syms;
     278      if (least < 0)
     279          least = 0;
     280  
     281      // we can use at most this many bit patterns, lest there not be enough
     282      // available for the remaining symbols at the maximum length (if there were
     283      // no limit to the code length, this would become: most = left - 1)
     284      int most = (((code_t)left << (g.max - len)) - syms) /
     285                 (((code_t)1 << (g.max - len)) - 1);
     286  
     287      // count all possible codes from this juncture and add them up
     288      big_t sum = 0;
     289      for (int use = least; use <= most; use++) {
     290          got = count(syms - use, (left - use) << 1, len + 1);
     291          sum += got;
     292          if (got == (big_t)-1 || sum < got)      // overflow
     293              return (big_t)-1;
     294      }
     295  
     296      // verify that all recursive calls are productive
     297      assert(sum != 0);
     298  
     299      // save the result and return it
     300      g.num[index] = sum;
     301      return sum;
     302  }
     303  
     304  // Return true if we've been here before, set to true if not. Set a bit in a
     305  // bit vector to indicate visiting this state. Each (syms,len,left) state has a
     306  // variable size bit vector indexed by (mem,rem). The bit vector is lengthened
     307  // as needed to allow setting the (mem,rem) bit.
     308  local int been_here(int syms, int left, int len, int mem, int rem) {
     309      // point to vector for (syms,left,len), bit in vector for (mem,rem)
     310      size_t index = map(syms, left, len);
     311      mem -= 1 << g.root;             // mem always includes the root table
     312      mem >>= 1;                      // mem and rem are always even
     313      rem >>= 1;
     314      size_t offset = (mem >> 3) + rem;
     315      offset = ((offset * (offset + 1)) >> 1) + rem;
     316      int bit = 1 << (mem & 7);
     317  
     318      // see if we've been here
     319      size_t length = g.done[index].len;
     320      if (offset < length && (g.done[index].vec[offset] & bit) != 0)
     321          return 1;       // done this!
     322  
     323      // we haven't been here before -- set the bit to show we have now
     324  
     325      // see if we need to lengthen the vector in order to set the bit
     326      if (length <= offset) {
     327          // if we have one already, enlarge it, zero out the appended space
     328          char *vector;
     329          if (length) {
     330              do {
     331                  length <<= 1;
     332              } while (length <= offset);
     333              vector = realloc(g.done[index].vec, length);
     334              assert(vector != NULL && "out of memory");
     335              memset(vector + g.done[index].len, 0, length - g.done[index].len);
     336          }
     337  
     338          // otherwise we need to make a new vector and zero it out
     339          else {
     340              length = 16;
     341              while (length <= offset)
     342                  length <<= 1;
     343              vector = calloc(length, 1);
     344              assert(vector != NULL && "out of memory");
     345          }
     346  
     347          // install the new vector
     348          g.done[index].len = length;
     349          g.done[index].vec = vector;
     350      }
     351  
     352      // set the bit
     353      g.done[index].vec[offset] |= bit;
     354      return 0;
     355  }
     356  
     357  // Examine all possible codes from the given node (syms, len, left). Compute
     358  // the amount of memory required to build inflate's decoding tables, where the
     359  // number of code structures used so far is mem, and the number remaining in
     360  // the current sub-table is rem.
     361  local void examine(int syms, int left, int len, int mem, int rem) {
     362      // see if we have a complete code
     363      if (syms == left) {
     364          // set the last code entry
     365          g.code[len] = left;
     366  
     367          // complete computation of memory used by this code
     368          while (rem < left) {
     369              left -= rem;
     370              rem = 1 << (len - g.root);
     371              mem += rem;
     372          }
     373          assert(rem == left);
     374  
     375          // if this is at the maximum, show the sub-code
     376          if (mem >= g.large) {
     377              // if this is a new maximum, update the maximum and clear out the
     378              // printed sub-codes from the previous maximum
     379              if (mem > g.large) {
     380                  g.large = mem;
     381                  string_clear(&g.out);
     382              }
     383  
     384              // compute the starting state for this sub-code
     385              syms = 0;
     386              left = 1 << g.max;
     387              for (int bits = g.max; bits > g.root; bits--) {
     388                  syms += g.code[bits];
     389                  left -= g.code[bits];
     390                  assert((left & 1) == 0);
     391                  left >>= 1;
     392              }
     393  
     394              // print the starting state and the resulting sub-code to g.out
     395              string_printf(&g.out, "<%u, %u, %u>:",
     396                            syms, g.root + 1, ((1 << g.root) - left) << 1);
     397              for (int bits = g.root + 1; bits <= g.max; bits++)
     398                  if (g.code[bits])
     399                      string_printf(&g.out, " %d[%d]", g.code[bits], bits);
     400              string_printf(&g.out, "\n");
     401          }
     402  
     403          // remove entries as we drop back down in the recursion
     404          g.code[len] = 0;
     405          return;
     406      }
     407  
     408      // prune the tree if we can
     409      if (been_here(syms, left, len, mem, rem))
     410          return;
     411  
     412      // we need to use at least this many bit patterns so that the code won't be
     413      // incomplete at the next length (more bit patterns than symbols)
     414      int least = (left << 1) - syms;
     415      if (least < 0)
     416          least = 0;
     417  
     418      // we can use at most this many bit patterns, lest there not be enough
     419      // available for the remaining symbols at the maximum length (if there were
     420      // no limit to the code length, this would become: most = left - 1)
     421      int most = (((code_t)left << (g.max - len)) - syms) /
     422                 (((code_t)1 << (g.max - len)) - 1);
     423  
     424      // occupy least table spaces, creating new sub-tables as needed
     425      int use = least;
     426      while (rem < use) {
     427          use -= rem;
     428          rem = 1 << (len - g.root);
     429          mem += rem;
     430      }
     431      rem -= use;
     432  
     433      // examine codes from here, updating table space as we go
     434      for (use = least; use <= most; use++) {
     435          g.code[len] = use;
     436          examine(syms - use, (left - use) << 1, len + 1,
     437                  mem + (rem ? 1 << (len - g.root) : 0), rem << 1);
     438          if (rem == 0) {
     439              rem = 1 << (len - g.root);
     440              mem += rem;
     441          }
     442          rem--;
     443      }
     444  
     445      // remove entries as we drop back down in the recursion
     446      g.code[len] = 0;
     447  }
     448  
     449  // Look at all sub-codes starting with root + 1 bits. Look at only the valid
     450  // intermediate code states (syms, left, len). For each completed code,
     451  // calculate the amount of memory required by inflate to build the decoding
     452  // tables. Find the maximum amount of memory required and show the codes that
     453  // require that maximum.
     454  local void enough(int syms) {
     455      // clear code
     456      for (int n = 0; n <= g.max; n++)
     457          g.code[n] = 0;
     458  
     459      // look at all (root + 1) bit and longer codes
     460      string_clear(&g.out);           // empty saved results
     461      g.large = 1 << g.root;          // base table
     462      if (g.root < g.max)             // otherwise, there's only a base table
     463          for (int n = 3; n <= syms; n++)
     464              for (int left = 2; left < n; left += 2) {
     465                  // look at all reachable (root + 1) bit nodes, and the
     466                  // resulting codes (complete at root + 2 or more)
     467                  size_t index = map(n, left, g.root + 1);
     468                  if (g.root + 1 < g.max && g.num[index]) // reachable node
     469                      examine(n, left, g.root + 1, 1 << g.root, 0);
     470  
     471                  // also look at root bit codes with completions at root + 1
     472                  // bits (not saved in num, since complete), just in case
     473                  if (g.num[index - 1] && n <= left << 1)
     474                      examine((n - left) << 1, (n - left) << 1, g.root + 1,
     475                              1 << g.root, 0);
     476              }
     477  
     478      // done
     479      printf("maximum of %d table entries for root = %d\n", g.large, g.root);
     480      fputs(g.out.str, stdout);
     481  }
     482  
     483  // Examine and show the total number of possible prefix codes for a given
     484  // maximum number of symbols, initial root table size, and maximum code length
     485  // in bits -- those are the command arguments in that order. The default values
     486  // are 286, 9, and 15 respectively, for the deflate literal/length code. The
     487  // possible codes are counted for each number of coded symbols from two to the
     488  // maximum. The counts for each of those and the total number of codes are
     489  // shown. The maximum number of inflate table entires is then calculated across
     490  // all possible codes. Each new maximum number of table entries and the
     491  // associated sub-code (starting at root + 1 == 10 bits) is shown.
     492  //
     493  // To count and examine prefix codes that are not length-limited, provide a
     494  // maximum length equal to the number of symbols minus one.
     495  //
     496  // For the deflate literal/length code, use "enough". For the deflate distance
     497  // code, use "enough 30 6".
     498  int main(int argc, char **argv) {
     499      // set up globals for cleanup()
     500      g.code = NULL;
     501      g.num = NULL;
     502      g.done = NULL;
     503      string_init(&g.out);
     504  
     505      // get arguments -- default to the deflate literal/length code
     506      int syms = 286;
     507      g.root = 9;
     508      g.max = 15;
     509      if (argc > 1) {
     510          syms = atoi(argv[1]);
     511          if (argc > 2) {
     512              g.root = atoi(argv[2]);
     513              if (argc > 3)
     514                  g.max = atoi(argv[3]);
     515          }
     516      }
     517      if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) {
     518          fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n",
     519                stderr);
     520          return 1;
     521      }
     522  
     523      // if not restricting the code length, the longest is syms - 1
     524      if (g.max > syms - 1)
     525          g.max = syms - 1;
     526  
     527      // determine the number of bits in a code_t
     528      int bits = 0;
     529      for (code_t word = 1; word; word <<= 1)
     530          bits++;
     531  
     532      // make sure that the calculation of most will not overflow
     533      if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) {
     534          fputs("abort: code length too long for internal types\n", stderr);
     535          return 1;
     536      }
     537  
     538      // reject impossible code requests
     539      if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) {
     540          fprintf(stderr, "%d symbols cannot be coded in %d bits\n",
     541                  syms, g.max);
     542          return 1;
     543      }
     544  
     545      // allocate code vector
     546      g.code = calloc(g.max + 1, sizeof(int));
     547      assert(g.code != NULL && "out of memory");
     548  
     549      // determine size of saved results array, checking for overflows,
     550      // allocate and clear the array (set all to zero with calloc())
     551      if (syms == 2)              // iff max == 1
     552          g.num = NULL;           // won't be saving any results
     553      else {
     554          g.size = syms >> 1;
     555          int n = (syms - 1) >> 1;
     556          assert(g.size <= (size_t)-1 / n && "overflow");
     557          g.size *= n;
     558          n = g.max - 1;
     559          assert(g.size <= (size_t)-1 / n && "overflow");
     560          g.size *= n;
     561          g.num = calloc(g.size, sizeof(big_t));
     562          assert(g.num != NULL && "out of memory");
     563      }
     564  
     565      // count possible codes for all numbers of symbols, add up counts
     566      big_t sum = 0;
     567      for (int n = 2; n <= syms; n++) {
     568          big_t got = count(n, 2, 1);
     569          sum += got;
     570          assert(got != (big_t)-1 && sum >= got && "overflow");
     571      }
     572      printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms);
     573      if (g.max < syms - 1)
     574          printf(" (%d-bit length limit)\n", g.max);
     575      else
     576          puts(" (no length limit)");
     577  
     578      // allocate and clear done array for been_here()
     579      if (syms == 2)
     580          g.done = NULL;
     581      else {
     582          g.done = calloc(g.size, sizeof(struct tab));
     583          assert(g.done != NULL && "out of memory");
     584      }
     585  
     586      // find and show maximum inflate table usage
     587      if (g.root > g.max)             // reduce root to max length
     588          g.root = g.max;
     589      if ((code_t)syms < ((code_t)1 << (g.root + 1)))
     590          enough(syms);
     591      else
     592          fputs("cannot handle minimum code lengths > root", stderr);
     593  
     594      // done
     595      cleanup();
     596      return 0;
     597  }