(root)/
gzip-1.13/
unlzw.c
       1  /* unlzw.c -- decompress files in LZW format.
       2   * The code in this file is directly derived from the public domain 'compress'
       3   * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
       4   * Ken Turkowski, Dave Mack and Peter Jannesen.
       5   *
       6   * This is a temporary version which will be rewritten in some future version
       7   * to accommodate in-memory decompression.
       8   */
       9  
      10  #include <config.h>
      11  
      12  #include <unistd.h>
      13  #include <fcntl.h>
      14  
      15  #include "tailor.h"
      16  #include "gzip.h"
      17  #include "lzw.h"
      18  
      19  typedef unsigned char char_type;
      20  typedef          long code_int;
      21  typedef unsigned long count_int;
      22  typedef unsigned short count_short;
      23  typedef unsigned long cmp_code_int;
      24  
      25  #define MAXCODE(n)	(1L << (n))
      26  
      27  #ifndef	BYTEORDER
      28  #	define	BYTEORDER	0000
      29  #endif
      30  
      31  #ifndef	NOALLIGN
      32  #	define	NOALLIGN	0
      33  #endif
      34  
      35  
      36  union	bytes {
      37      long  word;
      38      struct {
      39  #if BYTEORDER == 4321
      40          char_type	b1;
      41          char_type	b2;
      42          char_type	b3;
      43          char_type	b4;
      44  #else
      45  #if BYTEORDER == 1234
      46          char_type	b4;
      47          char_type	b3;
      48          char_type	b2;
      49          char_type	b1;
      50  #else
      51  #	undef	BYTEORDER
      52          int  dummy;
      53  #endif
      54  #endif
      55      } bytes;
      56  };
      57  
      58  #if BYTEORDER == 4321 && NOALLIGN == 1
      59  #  define input(b,o,c,n,m){ \
      60       (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
      61       (o) += (n); \
      62     }
      63  #else
      64  #  define input(b,o,c,n,m){ \
      65       char_type *p = &(b)[(o)>>3]; \
      66       (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
      67       ((long)(p[2])<<16))>>((o)&0x7))&(m); \
      68       (o) += (n); \
      69     }
      70  #endif
      71  
      72  #ifndef MAXSEG_64K
      73     /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
      74  #  define tab_prefixof(i) tab_prefix[i]
      75  #  define clear_tab_prefixof()	memzero(tab_prefix, 256);
      76  #else
      77     /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
      78     /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd  codes */
      79     ush *tab_prefix[2];
      80  #  define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
      81  #  define clear_tab_prefixof()	\
      82        memzero(tab_prefix0, 128), \
      83        memzero(tab_prefix1, 128);
      84  #endif
      85  #define de_stack        ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
      86  #define tab_suffixof(i) tab_suffix[i]
      87  
      88  /* block compress mode -C compatible with 2.0 */
      89  static int block_mode = BLOCK_MODE;
      90  
      91  /* ============================================================================
      92   * Decompress in to out.  This routine adapts to the codes in the
      93   * file building the "string" table on-the-fly; requiring no table to
      94   * be stored in the compressed file.
      95   * IN assertions: the buffer inbuf contains already the beginning of
      96   *   the compressed data, from offsets iptr to insize-1 included.
      97   *   The magic header has already been checked and skipped.
      98   *   bytes_in and bytes_out have been initialized.
      99   * 'in' and 'out' are the input and output file descriptors.
     100   */
     101  int
     102  unlzw (int in, int out)
     103  {
     104      char_type  *stackp;
     105      code_int   code;
     106      int        finchar;
     107      code_int   oldcode;
     108      code_int   incode;
     109      long       inbits;
     110      long       posbits;
     111      int        outpos;
     112  /*  int        insize; (global) */
     113      unsigned   bitmask;
     114      code_int   free_ent;
     115      code_int   maxcode;
     116      code_int   maxmaxcode;
     117      int        n_bits;
     118      int        rsize;
     119  
     120  #ifdef MAXSEG_64K
     121      tab_prefix[0] = tab_prefix0;
     122      tab_prefix[1] = tab_prefix1;
     123  #endif
     124      maxbits = get_byte();
     125      block_mode = maxbits & BLOCK_MODE;
     126      if ((maxbits & LZW_RESERVED) != 0) {
     127          WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n",
     128                program_name, ifname, (unsigned int) maxbits & LZW_RESERVED));
     129      }
     130      maxbits &= BIT_MASK;
     131      maxmaxcode = MAXCODE(maxbits);
     132  
     133      if (maxbits > BITS) {
     134          fprintf(stderr,
     135                  "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
     136                  program_name, ifname, maxbits, BITS);
     137          exit_code = ERROR;
     138          return ERROR;
     139      }
     140      rsize = insize;
     141      maxcode = MAXCODE(n_bits = INIT_BITS)-1;
     142      bitmask = (1<<n_bits)-1;
     143      oldcode = -1;
     144      finchar = 0;
     145      outpos = 0;
     146      posbits = inptr<<3;
     147  
     148      free_ent = ((block_mode) ? FIRST : 256);
     149  
     150      clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */
     151  
     152      for (code = 255 ; code >= 0 ; --code) {
     153          tab_suffixof(code) = (char_type)code;
     154      }
     155      do {
     156          int i;
     157          int  e;
     158          int  o;
     159  
     160      resetbuf:
     161          o = posbits >> 3;
     162          e = o <= insize ? insize - o : 0;
     163  
     164          for (i = 0 ; i < e ; ++i) {
     165              inbuf[i] = inbuf[i+o];
     166          }
     167          insize = e;
     168          posbits = 0;
     169  
     170          if (insize < INBUF_EXTRA) {
     171              rsize = read_buffer (in, (char *) inbuf + insize, INBUFSIZ);
     172              if (rsize == -1) {
     173                  read_error();
     174              }
     175              insize += rsize;
     176              bytes_in += (off_t)rsize;
     177          }
     178          inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 :
     179                    ((long)insize<<3)-(n_bits-1));
     180  
     181          while (inbits > posbits) {
     182              if (free_ent > maxcode) {
     183                  posbits = ((posbits-1) +
     184                             ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
     185                  ++n_bits;
     186                  if (n_bits == maxbits) {
     187                      maxcode = maxmaxcode;
     188                  } else {
     189                      maxcode = MAXCODE(n_bits)-1;
     190                  }
     191                  bitmask = (1<<n_bits)-1;
     192                  goto resetbuf;
     193              }
     194              input(inbuf,posbits,code,n_bits,bitmask);
     195              Tracev((stderr, "%ld ", code));
     196  
     197              if (oldcode == -1) {
     198                  if (256 <= code)
     199                    gzip_error ("corrupt input.");
     200                  outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code));
     201                  continue;
     202              }
     203              if (code == CLEAR && block_mode) {
     204                  clear_tab_prefixof();
     205                  free_ent = FIRST - 1;
     206                  posbits = ((posbits-1) +
     207                             ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
     208                  maxcode = MAXCODE(n_bits = INIT_BITS)-1;
     209                  bitmask = (1<<n_bits)-1;
     210                  goto resetbuf;
     211              }
     212              incode = code;
     213              stackp = de_stack;
     214  
     215              if (code >= free_ent) { /* Special case for KwKwK string. */
     216                  if (code > free_ent) {
     217  #ifdef DEBUG
     218                      char_type *p;
     219  
     220                      posbits -= n_bits;
     221                      p = &inbuf[posbits>>3];
     222                      fprintf(stderr,
     223                              "code:%ld free_ent:%ld n_bits:%d insize:%u\n",
     224                              code, free_ent, n_bits, insize);
     225                      fprintf(stderr,
     226                              "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
     227                              posbits, p[-1],p[0],p[1],p[2],p[3]);
     228  #endif
     229                      if (outpos > 0)
     230                        write_buf (out, outbuf, outpos);
     231                      gzip_error (to_stdout
     232                                  ? "corrupt input."
     233                                  : "corrupt input. Use zcat to recover some data.");
     234                  }
     235                  *--stackp = (char_type)finchar;
     236                  code = oldcode;
     237              }
     238  
     239              while ((cmp_code_int)code >= (cmp_code_int)256) {
     240                  /* Generate output characters in reverse order */
     241                  *--stackp = tab_suffixof(code);
     242                  code = tab_prefixof(code);
     243              }
     244              *--stackp =	(char_type)(finchar = tab_suffixof(code));
     245  
     246              /* And put them out in forward order */
     247              {
     248                  int i;
     249  
     250                  if (outpos+(i = (de_stack-stackp)) >= OUTBUFSIZ) {
     251                      do {
     252                          if (i > OUTBUFSIZ-outpos) i = OUTBUFSIZ-outpos;
     253  
     254                          if (i > 0) {
     255                              memcpy(outbuf+outpos, stackp, i);
     256                              outpos += i;
     257                          }
     258                          if (outpos >= OUTBUFSIZ) {
     259                              write_buf (out, outbuf, outpos);
     260                              outpos = 0;
     261                          }
     262                          stackp+= i;
     263                      } while ((i = (de_stack-stackp)) > 0);
     264                  } else {
     265                      memcpy(outbuf+outpos, stackp, i);
     266                      outpos += i;
     267                  }
     268              }
     269  
     270              if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */
     271  
     272                  tab_prefixof(code) = (unsigned short)oldcode;
     273                  tab_suffixof(code) = (char_type)finchar;
     274                  free_ent = code+1;
     275              }
     276              oldcode = incode;	/* Remember previous code.	*/
     277          }
     278      } while (rsize != 0);
     279  
     280      if (outpos > 0)
     281        write_buf (out, outbuf, outpos);
     282      return OK;
     283  }