(root)/
gzip-1.13/
unpack.c
       1  /* unpack.c -- decompress files in pack format.
       2  
       3     Copyright (C) 1997, 1999, 2006, 2009-2023 Free Software Foundation, Inc.
       4     Copyright (C) 1992-1993 Jean-loup Gailly
       5  
       6     This program is free software; you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3, or (at your option)
       9     any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program; if not, write to the Free Software Foundation,
      18     Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
      19  
      20  #include <config.h>
      21  #include "tailor.h"
      22  #include "gzip.h"
      23  
      24  #define MIN(a,b) ((a) <= (b) ? (a) : (b))
      25  /* The arguments must not have side effects. */
      26  
      27  #define MAX_BITLEN 25
      28  /* Maximum length of Huffman codes. (Minor modifications to the code
      29   * would be needed to support 32 bits codes, but pack never generates
      30   * more than 24 bits anyway.)
      31   */
      32  
      33  #define LITERALS 256
      34  /* Number of literals, excluding the End of Block (EOB) code */
      35  
      36  #define MAX_PEEK 12
      37  /* Maximum number of 'peek' bits used to optimize traversal of the
      38   * Huffman tree.
      39   */
      40  
      41  static ulg orig_len;       /* original uncompressed length */
      42  static int max_len;        /* maximum bit length of Huffman codes */
      43  
      44  static uch literal[LITERALS];
      45  /* The literal bytes present in the Huffman tree. The EOB code is not
      46   * represented.
      47   */
      48  
      49  static int lit_base[MAX_BITLEN+1];
      50  /* All literals of a given bit length are contiguous in literal[] and
      51   * have contiguous codes. literal[code+lit_base[len]] is the literal
      52   * for a code of len bits.
      53   */
      54  
      55  static int leaves [MAX_BITLEN+1]; /* Number of leaves for each bit length */
      56  static int parents[MAX_BITLEN+1]; /* Number of parents for each bit length */
      57  
      58  static int peek_bits; /* Number of peek bits currently used */
      59  
      60  /* static uch prefix_len[1 << MAX_PEEK]; */
      61  #define prefix_len outbuf
      62  /* For each bit pattern b of peek_bits bits, prefix_len[b] is the length
      63   * of the Huffman code starting with a prefix of b (upper bits), or 0
      64   * if all codes of prefix b have more than peek_bits bits. It is not
      65   * necessary to have a huge table (large MAX_PEEK) because most of the
      66   * codes encountered in the input stream are short codes (by construction).
      67   * So for most codes a single lookup will be necessary.
      68   */
      69  #if (1<<MAX_PEEK) > OUTBUFSIZ
      70      error cannot overlay prefix_len and outbuf
      71  #endif
      72  
      73  static ulg bitbuf;
      74  /* Bits are added on the low part of bitbuf and read from the high part. */
      75  
      76  static int valid;                  /* number of valid bits in bitbuf */
      77  /* all bits above the last valid bit are always zero */
      78  
      79  /* Read an input byte, reporting an error at EOF.  */
      80  static unsigned char
      81  read_byte (void)
      82  {
      83    int b = get_byte ();
      84    if (b < 0)
      85      gzip_error ("invalid compressed data -- unexpected end of file");
      86    return b;
      87  }
      88  
      89  /* Set code to the next 'bits' input bits without skipping them. code
      90   * must be the name of a simple variable and bits must not have side effects.
      91   * IN assertions: bits <= 25 (so that we still have room for an extra byte
      92   * when valid is only 24), and mask = (1<<bits)-1.
      93   */
      94  #define look_bits(code,bits,mask) \
      95  { \
      96    while (valid < (bits)) bitbuf = (bitbuf<<8) | read_byte(), valid += 8; \
      97    code = (bitbuf >> (valid-(bits))) & (mask); \
      98  }
      99  
     100  /* Skip the given number of bits (after having peeked at them): */
     101  #define skip_bits(bits)  (valid -= (bits))
     102  
     103  #define clear_bitbuf() (valid = 0, bitbuf = 0)
     104  
     105  /* Local functions */
     106  
     107  static void read_tree (void);
     108  static void build_tree (void);
     109  
     110  /* ===========================================================================
     111   * Read the Huffman tree.
     112   */
     113  static void
     114  read_tree ()
     115  {
     116      int len;  /* bit length */
     117      int base; /* base offset for a sequence of leaves */
     118      int n;
     119      int max_leaves = 1;
     120  
     121      /* Read the original input size, MSB first */
     122      orig_len = 0;
     123      for (n = 1; n <= 4; n++)
     124        orig_len = (orig_len << 8) | read_byte ();
     125  
     126      /* Read the maximum bit length of Huffman codes.  */
     127      max_len = read_byte ();
     128      if (! (0 < max_len && max_len <= MAX_BITLEN))
     129        gzip_error ("invalid compressed data -- "
     130                    "Huffman code bit length out of range");
     131  
     132      /* Get the number of leaves at each bit length */
     133      n = 0;
     134      for (len = 1; len <= max_len; len++) {
     135          leaves[len] = read_byte ();
     136          if (max_leaves - (len == max_len) < leaves[len])
     137            gzip_error ("too many leaves in Huffman tree");
     138          max_leaves = (max_leaves - leaves[len] + 1) * 2 - 1;
     139          n += leaves[len];
     140      }
     141      if (LITERALS <= n) {
     142          gzip_error ("too many leaves in Huffman tree");
     143      }
     144      Trace((stderr, "orig_len %lu, max_len %d, leaves %d\n",
     145             orig_len, max_len, n));
     146      /* There are at least 2 and at most 256 leaves of length max_len.
     147       * (Pack arbitrarily rejects empty files and files consisting of
     148       * a single byte even repeated.) To fit the last leaf count in a
     149       * byte, it is offset by 2. However, the last literal is the EOB
     150       * code, and is not transmitted explicitly in the tree, so we must
     151       * adjust here by one only.
     152       */
     153      leaves[max_len]++;
     154  
     155      /* Now read the leaves themselves */
     156      base = 0;
     157      for (len = 1; len <= max_len; len++) {
     158          /* Remember where the literals of this length start in literal[] : */
     159          lit_base[len] = base;
     160          /* And read the literals: */
     161          for (n = leaves[len]; n > 0; n--) {
     162              literal[base++] = read_byte ();
     163          }
     164      }
     165      leaves[max_len]++; /* Now include the EOB code in the Huffman tree */
     166  }
     167  
     168  /* ===========================================================================
     169   * Build the Huffman tree and the prefix table.
     170   */
     171  static void
     172  build_tree ()
     173  {
     174      int nodes = 0; /* number of nodes (parents+leaves) at current bit length */
     175      int len;       /* current bit length */
     176      uch *prefixp;  /* pointer in prefix_len */
     177  
     178      for (len = max_len; len >= 1; len--) {
     179          /* The number of parent nodes at this level is half the total
     180           * number of nodes at parent level:
     181           */
     182          nodes >>= 1;
     183          parents[len] = nodes;
     184          /* Update lit_base by the appropriate bias to skip the parent nodes
     185           * (which are not represented in the literal array):
     186           */
     187          lit_base[len] -= nodes;
     188          /* Restore nodes to be parents+leaves: */
     189          nodes += leaves[len];
     190      }
     191      if ((nodes >> 1) != 1)
     192        gzip_error ("too few leaves in Huffman tree");
     193  
     194      /* Construct the prefix table, from shortest leaves to longest ones.
     195       * The shortest code is all ones, so we start at the end of the table.
     196       */
     197      peek_bits = MIN(max_len, MAX_PEEK);
     198      prefixp = &prefix_len[1<<peek_bits];
     199      for (len = 1; len <= peek_bits; len++) {
     200          int prefixes = leaves[len] << (peek_bits-len); /* may be 0 */
     201          while (prefixes--) *--prefixp = (uch)len;
     202      }
     203      /* The length of all other codes is unknown: */
     204      while (prefixp > prefix_len) *--prefixp = 0;
     205  }
     206  
     207  /* ===========================================================================
     208   * Unpack in to out.  This routine does not support the old pack format
     209   * with magic header \037\037.
     210   *
     211   * IN assertions: the buffer inbuf contains already the beginning of
     212   *   the compressed data, from offsets inptr to insize-1 included.
     213   *   The magic header has already been checked. The output buffer is cleared.
     214   *
     215   * 'in' and 'out' are the input and output file descriptors.
     216   */
     217  int
     218  unpack (int in, int out)
     219  {
     220      int len;                /* Bit length of current code */
     221      unsigned eob;           /* End Of Block code */
     222      register unsigned peek; /* lookahead bits */
     223      unsigned peek_mask;     /* Mask for peek_bits bits */
     224  
     225      ifd = in;
     226      ofd = out;
     227  
     228      read_tree();     /* Read the Huffman tree */
     229      build_tree();    /* Build the prefix table */
     230      clear_bitbuf();  /* Initialize bit input */
     231      peek_mask = (1<<peek_bits)-1;
     232  
     233      /* The eob code is the largest code among all leaves of maximal length: */
     234      eob = leaves[max_len]-1;
     235      Trace((stderr, "eob %d %x\n", max_len, eob));
     236  
     237      /* Decode the input data: */
     238      for (;;) {
     239          /* Since eob is the longest code and not shorter than max_len,
     240           * we can peek at max_len bits without having the risk of reading
     241           * beyond the end of file.
     242           */
     243          look_bits(peek, peek_bits, peek_mask);
     244          len = prefix_len[peek];
     245          if (len > 0) {
     246              peek >>= peek_bits - len; /* discard the extra bits */
     247          } else {
     248              /* Code of more than peek_bits bits, we must traverse the tree */
     249              ulg mask = peek_mask;
     250              len = peek_bits;
     251  
     252              /* Loop as long as peek is a parent node.  */
     253              while (peek < parents[len])
     254                {
     255                  len++, mask = (mask<<1)+1;
     256                  look_bits(peek, len, mask);
     257                }
     258          }
     259          /* At this point, peek is the next complete code, of len bits */
     260          if (peek == eob && len == max_len)
     261            break; /* End of file.  */
     262          put_ubyte(literal[peek+lit_base[len]]);
     263          Tracev((stderr,"%02d %04x %c\n", len, peek,
     264                  literal[peek+lit_base[len]]));
     265          skip_bits(len);
     266      } /* for (;;) */
     267  
     268      flush_window();
     269      if (orig_len != (ulg)(bytes_out & 0xffffffff)) {
     270          gzip_error ("invalid compressed data--length error");
     271      }
     272      return OK;
     273  }