(root)/
binutils-2.41/
zlib/
examples/
gzappend.c
       1  /* gzappend -- command to append to a gzip file
       2  
       3    Copyright (C) 2003, 2012 Mark Adler, all rights reserved
       4    version 1.2, 11 Oct 2012
       5  
       6    This software is provided 'as-is', without any express or implied
       7    warranty.  In no event will the author be held liable for any damages
       8    arising from the use of this software.
       9  
      10    Permission is granted to anyone to use this software for any purpose,
      11    including commercial applications, and to alter it and redistribute it
      12    freely, subject to the following restrictions:
      13  
      14    1. The origin of this software must not be misrepresented; you must not
      15       claim that you wrote the original software. If you use this software
      16       in a product, an acknowledgment in the product documentation would be
      17       appreciated but is not required.
      18    2. Altered source versions must be plainly marked as such, and must not be
      19       misrepresented as being the original software.
      20    3. This notice may not be removed or altered from any source distribution.
      21  
      22    Mark Adler    madler@alumni.caltech.edu
      23   */
      24  
      25  /*
      26   * Change history:
      27   *
      28   * 1.0  19 Oct 2003     - First version
      29   * 1.1   4 Nov 2003     - Expand and clarify some comments and notes
      30   *                      - Add version and copyright to help
      31   *                      - Send help to stdout instead of stderr
      32   *                      - Add some preemptive typecasts
      33   *                      - Add L to constants in lseek() calls
      34   *                      - Remove some debugging information in error messages
      35   *                      - Use new data_type definition for zlib 1.2.1
      36   *                      - Simplfy and unify file operations
      37   *                      - Finish off gzip file in gztack()
      38   *                      - Use deflatePrime() instead of adding empty blocks
      39   *                      - Keep gzip file clean on appended file read errors
      40   *                      - Use in-place rotate instead of auxiliary buffer
      41   *                        (Why you ask?  Because it was fun to write!)
      42   * 1.2  11 Oct 2012     - Fix for proper z_const usage
      43   *                      - Check for input buffer malloc failure
      44   */
      45  
      46  /*
      47     gzappend takes a gzip file and appends to it, compressing files from the
      48     command line or data from stdin.  The gzip file is written to directly, to
      49     avoid copying that file, in case it's large.  Note that this results in the
      50     unfriendly behavior that if gzappend fails, the gzip file is corrupted.
      51  
      52     This program was written to illustrate the use of the new Z_BLOCK option of
      53     zlib 1.2.x's inflate() function.  This option returns from inflate() at each
      54     block boundary to facilitate locating and modifying the last block bit at
      55     the start of the final deflate block.  Also whether using Z_BLOCK or not,
      56     another required feature of zlib 1.2.x is that inflate() now provides the
      57     number of unusued bits in the last input byte used.  gzappend will not work
      58     with versions of zlib earlier than 1.2.1.
      59  
      60     gzappend first decompresses the gzip file internally, discarding all but
      61     the last 32K of uncompressed data, and noting the location of the last block
      62     bit and the number of unused bits in the last byte of the compressed data.
      63     The gzip trailer containing the CRC-32 and length of the uncompressed data
      64     is verified.  This trailer will be later overwritten.
      65  
      66     Then the last block bit is cleared by seeking back in the file and rewriting
      67     the byte that contains it.  Seeking forward, the last byte of the compressed
      68     data is saved along with the number of unused bits to initialize deflate.
      69  
      70     A deflate process is initialized, using the last 32K of the uncompressed
      71     data from the gzip file to initialize the dictionary.  If the total
      72     uncompressed data was less than 32K, then all of it is used to initialize
      73     the dictionary.  The deflate output bit buffer is also initialized with the
      74     last bits from the original deflate stream.  From here on, the data to
      75     append is simply compressed using deflate, and written to the gzip file.
      76     When that is complete, the new CRC-32 and uncompressed length are written
      77     as the trailer of the gzip file.
      78   */
      79  
      80  #include <stdio.h>
      81  #include <stdlib.h>
      82  #include <string.h>
      83  #include <fcntl.h>
      84  #include <unistd.h>
      85  #include "zlib.h"
      86  
      87  #define local static
      88  #define LGCHUNK 14
      89  #define CHUNK (1U << LGCHUNK)
      90  #define DSIZE 32768U
      91  
      92  /* print an error message and terminate with extreme prejudice */
      93  local void bye(char *msg1, char *msg2)
      94  {
      95      fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
      96      exit(1);
      97  }
      98  
      99  /* return the greatest common divisor of a and b using Euclid's algorithm,
     100     modified to be fast when one argument much greater than the other, and
     101     coded to avoid unnecessary swapping */
     102  local unsigned gcd(unsigned a, unsigned b)
     103  {
     104      unsigned c;
     105  
     106      while (a && b)
     107          if (a > b) {
     108              c = b;
     109              while (a - c >= c)
     110                  c <<= 1;
     111              a -= c;
     112          }
     113          else {
     114              c = a;
     115              while (b - c >= c)
     116                  c <<= 1;
     117              b -= c;
     118          }
     119      return a + b;
     120  }
     121  
     122  /* rotate list[0..len-1] left by rot positions, in place */
     123  local void rotate(unsigned char *list, unsigned len, unsigned rot)
     124  {
     125      unsigned char tmp;
     126      unsigned cycles;
     127      unsigned char *start, *last, *to, *from;
     128  
     129      /* normalize rot and handle degenerate cases */
     130      if (len < 2) return;
     131      if (rot >= len) rot %= len;
     132      if (rot == 0) return;
     133  
     134      /* pointer to last entry in list */
     135      last = list + (len - 1);
     136  
     137      /* do simple left shift by one */
     138      if (rot == 1) {
     139          tmp = *list;
     140          memmove(list, list + 1, len - 1);
     141          *last = tmp;
     142          return;
     143      }
     144  
     145      /* do simple right shift by one */
     146      if (rot == len - 1) {
     147          tmp = *last;
     148          memmove(list + 1, list, len - 1);
     149          *list = tmp;
     150          return;
     151      }
     152  
     153      /* otherwise do rotate as a set of cycles in place */
     154      cycles = gcd(len, rot);             /* number of cycles */
     155      do {
     156          start = from = list + cycles;   /* start index is arbitrary */
     157          tmp = *from;                    /* save entry to be overwritten */
     158          for (;;) {
     159              to = from;                  /* next step in cycle */
     160              from += rot;                /* go right rot positions */
     161              if (from > last) from -= len;   /* (pointer better not wrap) */
     162              if (from == start) break;   /* all but one shifted */
     163              *to = *from;                /* shift left */
     164          }
     165          *to = tmp;                      /* complete the circle */
     166      } while (--cycles);
     167  }
     168  
     169  /* structure for gzip file read operations */
     170  typedef struct {
     171      int fd;                     /* file descriptor */
     172      int size;                   /* 1 << size is bytes in buf */
     173      unsigned left;              /* bytes available at next */
     174      unsigned char *buf;         /* buffer */
     175      z_const unsigned char *next;    /* next byte in buffer */
     176      char *name;                 /* file name for error messages */
     177  } file;
     178  
     179  /* reload buffer */
     180  local int readin(file *in)
     181  {
     182      int len;
     183  
     184      len = read(in->fd, in->buf, 1 << in->size);
     185      if (len == -1) bye("error reading ", in->name);
     186      in->left = (unsigned)len;
     187      in->next = in->buf;
     188      return len;
     189  }
     190  
     191  /* read from file in, exit if end-of-file */
     192  local int readmore(file *in)
     193  {
     194      if (readin(in) == 0) bye("unexpected end of ", in->name);
     195      return 0;
     196  }
     197  
     198  #define read1(in) (in->left == 0 ? readmore(in) : 0, \
     199                     in->left--, *(in->next)++)
     200  
     201  /* skip over n bytes of in */
     202  local void skip(file *in, unsigned n)
     203  {
     204      unsigned bypass;
     205  
     206      if (n > in->left) {
     207          n -= in->left;
     208          bypass = n & ~((1U << in->size) - 1);
     209          if (bypass) {
     210              if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
     211                  bye("seeking ", in->name);
     212              n -= bypass;
     213          }
     214          readmore(in);
     215          if (n > in->left)
     216              bye("unexpected end of ", in->name);
     217      }
     218      in->left -= n;
     219      in->next += n;
     220  }
     221  
     222  /* read a four-byte unsigned integer, little-endian, from in */
     223  unsigned long read4(file *in)
     224  {
     225      unsigned long val;
     226  
     227      val = read1(in);
     228      val += (unsigned)read1(in) << 8;
     229      val += (unsigned long)read1(in) << 16;
     230      val += (unsigned long)read1(in) << 24;
     231      return val;
     232  }
     233  
     234  /* skip over gzip header */
     235  local void gzheader(file *in)
     236  {
     237      int flags;
     238      unsigned n;
     239  
     240      if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
     241      if (read1(in) != 8) bye("unknown compression method in", in->name);
     242      flags = read1(in);
     243      if (flags & 0xe0) bye("unknown header flags set in", in->name);
     244      skip(in, 6);
     245      if (flags & 4) {
     246          n = read1(in);
     247          n += (unsigned)(read1(in)) << 8;
     248          skip(in, n);
     249      }
     250      if (flags & 8) while (read1(in) != 0) ;
     251      if (flags & 16) while (read1(in) != 0) ;
     252      if (flags & 2) skip(in, 2);
     253  }
     254  
     255  /* decompress gzip file "name", return strm with a deflate stream ready to
     256     continue compression of the data in the gzip file, and return a file
     257     descriptor pointing to where to write the compressed data -- the deflate
     258     stream is initialized to compress using level "level" */
     259  local int gzscan(char *name, z_stream *strm, int level)
     260  {
     261      int ret, lastbit, left, full;
     262      unsigned have;
     263      unsigned long crc, tot;
     264      unsigned char *window;
     265      off_t lastoff, end;
     266      file gz;
     267  
     268      /* open gzip file */
     269      gz.name = name;
     270      gz.fd = open(name, O_RDWR, 0);
     271      if (gz.fd == -1) bye("cannot open ", name);
     272      gz.buf = malloc(CHUNK);
     273      if (gz.buf == NULL) bye("out of memory", "");
     274      gz.size = LGCHUNK;
     275      gz.left = 0;
     276  
     277      /* skip gzip header */
     278      gzheader(&gz);
     279  
     280      /* prepare to decompress */
     281      window = malloc(DSIZE);
     282      if (window == NULL) bye("out of memory", "");
     283      strm->zalloc = Z_NULL;
     284      strm->zfree = Z_NULL;
     285      strm->opaque = Z_NULL;
     286      ret = inflateInit2(strm, -15);
     287      if (ret != Z_OK) bye("out of memory", " or library mismatch");
     288  
     289      /* decompress the deflate stream, saving append information */
     290      lastbit = 0;
     291      lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
     292      left = 0;
     293      strm->avail_in = gz.left;
     294      strm->next_in = gz.next;
     295      crc = crc32(0L, Z_NULL, 0);
     296      have = full = 0;
     297      do {
     298          /* if needed, get more input */
     299          if (strm->avail_in == 0) {
     300              readmore(&gz);
     301              strm->avail_in = gz.left;
     302              strm->next_in = gz.next;
     303          }
     304  
     305          /* set up output to next available section of sliding window */
     306          strm->avail_out = DSIZE - have;
     307          strm->next_out = window + have;
     308  
     309          /* inflate and check for errors */
     310          ret = inflate(strm, Z_BLOCK);
     311          if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
     312          if (ret == Z_MEM_ERROR) bye("out of memory", "");
     313          if (ret == Z_DATA_ERROR)
     314              bye("invalid compressed data--format violated in", name);
     315  
     316          /* update crc and sliding window pointer */
     317          crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
     318          if (strm->avail_out)
     319              have = DSIZE - strm->avail_out;
     320          else {
     321              have = 0;
     322              full = 1;
     323          }
     324  
     325          /* process end of block */
     326          if (strm->data_type & 128) {
     327              if (strm->data_type & 64)
     328                  left = strm->data_type & 0x1f;
     329              else {
     330                  lastbit = strm->data_type & 0x1f;
     331                  lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
     332              }
     333          }
     334      } while (ret != Z_STREAM_END);
     335      inflateEnd(strm);
     336      gz.left = strm->avail_in;
     337      gz.next = strm->next_in;
     338  
     339      /* save the location of the end of the compressed data */
     340      end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
     341  
     342      /* check gzip trailer and save total for deflate */
     343      if (crc != read4(&gz))
     344          bye("invalid compressed data--crc mismatch in ", name);
     345      tot = strm->total_out;
     346      if ((tot & 0xffffffffUL) != read4(&gz))
     347          bye("invalid compressed data--length mismatch in", name);
     348  
     349      /* if not at end of file, warn */
     350      if (gz.left || readin(&gz))
     351          fprintf(stderr,
     352              "gzappend warning: junk at end of gzip file overwritten\n");
     353  
     354      /* clear last block bit */
     355      lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
     356      if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
     357      *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
     358      lseek(gz.fd, -1L, SEEK_CUR);
     359      if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
     360  
     361      /* if window wrapped, build dictionary from window by rotating */
     362      if (full) {
     363          rotate(window, DSIZE, have);
     364          have = DSIZE;
     365      }
     366  
     367      /* set up deflate stream with window, crc, total_in, and leftover bits */
     368      ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
     369      if (ret != Z_OK) bye("out of memory", "");
     370      deflateSetDictionary(strm, window, have);
     371      strm->adler = crc;
     372      strm->total_in = tot;
     373      if (left) {
     374          lseek(gz.fd, --end, SEEK_SET);
     375          if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
     376          deflatePrime(strm, 8 - left, *gz.buf);
     377      }
     378      lseek(gz.fd, end, SEEK_SET);
     379  
     380      /* clean up and return */
     381      free(window);
     382      free(gz.buf);
     383      return gz.fd;
     384  }
     385  
     386  /* append file "name" to gzip file gd using deflate stream strm -- if last
     387     is true, then finish off the deflate stream at the end */
     388  local void gztack(char *name, int gd, z_stream *strm, int last)
     389  {
     390      int fd, len, ret;
     391      unsigned left;
     392      unsigned char *in, *out;
     393  
     394      /* open file to compress and append */
     395      fd = 0;
     396      if (name != NULL) {
     397          fd = open(name, O_RDONLY, 0);
     398          if (fd == -1)
     399              fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
     400                      name);
     401      }
     402  
     403      /* allocate buffers */
     404      in = malloc(CHUNK);
     405      out = malloc(CHUNK);
     406      if (in == NULL || out == NULL) bye("out of memory", "");
     407  
     408      /* compress input file and append to gzip file */
     409      do {
     410          /* get more input */
     411          len = read(fd, in, CHUNK);
     412          if (len == -1) {
     413              fprintf(stderr,
     414                      "gzappend warning: error reading %s, skipping rest ...\n",
     415                      name);
     416              len = 0;
     417          }
     418          strm->avail_in = (unsigned)len;
     419          strm->next_in = in;
     420          if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
     421  
     422          /* compress and write all available output */
     423          do {
     424              strm->avail_out = CHUNK;
     425              strm->next_out = out;
     426              ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
     427              left = CHUNK - strm->avail_out;
     428              while (left) {
     429                  len = write(gd, out + CHUNK - strm->avail_out - left, left);
     430                  if (len == -1) bye("writing gzip file", "");
     431                  left -= (unsigned)len;
     432              }
     433          } while (strm->avail_out == 0 && ret != Z_STREAM_END);
     434      } while (len != 0);
     435  
     436      /* write trailer after last entry */
     437      if (last) {
     438          deflateEnd(strm);
     439          out[0] = (unsigned char)(strm->adler);
     440          out[1] = (unsigned char)(strm->adler >> 8);
     441          out[2] = (unsigned char)(strm->adler >> 16);
     442          out[3] = (unsigned char)(strm->adler >> 24);
     443          out[4] = (unsigned char)(strm->total_in);
     444          out[5] = (unsigned char)(strm->total_in >> 8);
     445          out[6] = (unsigned char)(strm->total_in >> 16);
     446          out[7] = (unsigned char)(strm->total_in >> 24);
     447          len = 8;
     448          do {
     449              ret = write(gd, out + 8 - len, len);
     450              if (ret == -1) bye("writing gzip file", "");
     451              len -= ret;
     452          } while (len);
     453          close(gd);
     454      }
     455  
     456      /* clean up and return */
     457      free(out);
     458      free(in);
     459      if (fd > 0) close(fd);
     460  }
     461  
     462  /* process the compression level option if present, scan the gzip file, and
     463     append the specified files, or append the data from stdin if no other file
     464     names are provided on the command line -- the gzip file must be writable
     465     and seekable */
     466  int main(int argc, char **argv)
     467  {
     468      int gd, level;
     469      z_stream strm;
     470  
     471      /* ignore command name */
     472      argc--; argv++;
     473  
     474      /* provide usage if no arguments */
     475      if (*argv == NULL) {
     476          printf(
     477              "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n"
     478                 );
     479          printf(
     480              "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
     481          return 0;
     482      }
     483  
     484      /* set compression level */
     485      level = Z_DEFAULT_COMPRESSION;
     486      if (argv[0][0] == '-') {
     487          if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
     488              bye("invalid compression level", "");
     489          level = argv[0][1] - '0';
     490          if (*++argv == NULL) bye("no gzip file name after options", "");
     491      }
     492  
     493      /* prepare to append to gzip file */
     494      gd = gzscan(*argv++, &strm, level);
     495  
     496      /* append files on command line, or from stdin if none */
     497      if (*argv == NULL)
     498          gztack(NULL, gd, &strm, 1);
     499      else
     500          do {
     501              gztack(*argv, gd, &strm, argv[1] == NULL);
     502          } while (*++argv != NULL);
     503      return 0;
     504  }