(root)/
binutils-2.41/
zlib/
examples/
gzlog.c
       1  /*
       2   * gzlog.c
       3   * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved
       4   * For conditions of distribution and use, see copyright notice in gzlog.h
       5   * version 2.3, 25 May 2019
       6   */
       7  
       8  /*
       9     gzlog provides a mechanism for frequently appending short strings to a gzip
      10     file that is efficient both in execution time and compression ratio.  The
      11     strategy is to write the short strings in an uncompressed form to the end of
      12     the gzip file, only compressing when the amount of uncompressed data has
      13     reached a given threshold.
      14  
      15     gzlog also provides protection against interruptions in the process due to
      16     system crashes.  The status of the operation is recorded in an extra field
      17     in the gzip file, and is only updated once the gzip file is brought to a
      18     valid state.  The last data to be appended or compressed is saved in an
      19     auxiliary file, so that if the operation is interrupted, it can be completed
      20     the next time an append operation is attempted.
      21  
      22     gzlog maintains another auxiliary file with the last 32K of data from the
      23     compressed portion, which is preloaded for the compression of the subsequent
      24     data.  This minimizes the impact to the compression ratio of appending.
      25   */
      26  
      27  /*
      28     Operations Concept:
      29  
      30     Files (log name "foo"):
      31     foo.gz -- gzip file with the complete log
      32     foo.add -- last message to append or last data to compress
      33     foo.dict -- dictionary of the last 32K of data for next compression
      34     foo.temp -- temporary dictionary file for compression after this one
      35     foo.lock -- lock file for reading and writing the other files
      36     foo.repairs -- log file for log file recovery operations (not compressed)
      37  
      38     gzip file structure:
      39     - fixed-length (no file name) header with extra field (see below)
      40     - compressed data ending initially with empty stored block
      41     - uncompressed data filling out originally empty stored block and
      42       subsequent stored blocks as needed (16K max each)
      43     - gzip trailer
      44     - no junk at end (no other gzip streams)
      45  
      46     When appending data, the information in the first three items above plus the
      47     foo.add file are sufficient to recover an interrupted append operation.  The
      48     extra field has the necessary information to restore the start of the last
      49     stored block and determine where to append the data in the foo.add file, as
      50     well as the crc and length of the gzip data before the append operation.
      51  
      52     The foo.add file is created before the gzip file is marked for append, and
      53     deleted after the gzip file is marked as complete.  So if the append
      54     operation is interrupted, the data to add will still be there.  If due to
      55     some external force, the foo.add file gets deleted between when the append
      56     operation was interrupted and when recovery is attempted, the gzip file will
      57     still be restored, but without the appended data.
      58  
      59     When compressing data, the information in the first two items above plus the
      60     foo.add file are sufficient to recover an interrupted compress operation.
      61     The extra field has the necessary information to find the end of the
      62     compressed data, and contains both the crc and length of just the compressed
      63     data and of the complete set of data including the contents of the foo.add
      64     file.
      65  
      66     Again, the foo.add file is maintained during the compress operation in case
      67     of an interruption.  If in the unlikely event the foo.add file with the data
      68     to be compressed is missing due to some external force, a gzip file with
      69     just the previous compressed data will be reconstructed.  In this case, all
      70     of the data that was to be compressed is lost (approximately one megabyte).
      71     This will not occur if all that happened was an interruption of the compress
      72     operation.
      73  
      74     The third state that is marked is the replacement of the old dictionary with
      75     the new dictionary after a compress operation.  Once compression is
      76     complete, the gzip file is marked as being in the replace state.  This
      77     completes the gzip file, so an interrupt after being so marked does not
      78     result in recompression.  Then the dictionary file is replaced, and the gzip
      79     file is marked as completed.  This state prevents the possibility of
      80     restarting compression with the wrong dictionary file.
      81  
      82     All three operations are wrapped by a lock/unlock procedure.  In order to
      83     gain exclusive access to the log files, first a foo.lock file must be
      84     exclusively created.  When all operations are complete, the lock is
      85     released by deleting the foo.lock file.  If when attempting to create the
      86     lock file, it already exists and the modify time of the lock file is more
      87     than five minutes old (set by the PATIENCE define below), then the old
      88     lock file is considered stale and deleted, and the exclusive creation of
      89     the lock file is retried.  To assure that there are no false assessments
      90     of the staleness of the lock file, the operations periodically touch the
      91     lock file to update the modified date.
      92  
      93     Following is the definition of the extra field with all of the information
      94     required to enable the above append and compress operations and their
      95     recovery if interrupted.  Multi-byte values are stored little endian
      96     (consistent with the gzip format).  File pointers are eight bytes long.
      97     The crc's and lengths for the gzip trailer are four bytes long.  (Note that
      98     the length at the end of a gzip file is used for error checking only, and
      99     for large files is actually the length modulo 2^32.)  The stored block
     100     length is two bytes long.  The gzip extra field two-byte identification is
     101     "ap" for append.  It is assumed that writing the extra field to the file is
     102     an "atomic" operation.  That is, either all of the extra field is written
     103     to the file, or none of it is, if the operation is interrupted right at the
     104     point of updating the extra field.  This is a reasonable assumption, since
     105     the extra field is within the first 52 bytes of the file, which is smaller
     106     than any expected block size for a mass storage device (usually 512 bytes or
     107     larger).
     108  
     109     Extra field (35 bytes):
     110     - Pointer to first stored block length -- this points to the two-byte length
     111       of the first stored block, which is followed by the two-byte, one's
     112       complement of that length.  The stored block length is preceded by the
     113       three-bit header of the stored block, which is the actual start of the
     114       stored block in the deflate format.  See the bit offset field below.
     115     - Pointer to the last stored block length.  This is the same as above, but
     116       for the last stored block of the uncompressed data in the gzip file.
     117       Initially this is the same as the first stored block length pointer.
     118       When the stored block gets to 16K (see the MAX_STORE define), then a new
     119       stored block as added, at which point the last stored block length pointer
     120       is different from the first stored block length pointer.  When they are
     121       different, the first bit of the last stored block header is eight bits, or
     122       one byte back from the block length.
     123     - Compressed data crc and length.  This is the crc and length of the data
     124       that is in the compressed portion of the deflate stream.  These are used
     125       only in the event that the foo.add file containing the data to compress is
     126       lost after a compress operation is interrupted.
     127     - Total data crc and length.  This is the crc and length of all of the data
     128       stored in the gzip file, compressed and uncompressed.  It is used to
     129       reconstruct the gzip trailer when compressing, as well as when recovering
     130       interrupted operations.
     131     - Final stored block length.  This is used to quickly find where to append,
     132       and allows the restoration of the original final stored block state when
     133       an append operation is interrupted.
     134     - First stored block start as the number of bits back from the final stored
     135       block first length byte.  This value is in the range of 3..10, and is
     136       stored as the low three bits of the final byte of the extra field after
     137       subtracting three (0..7).  This allows the last-block bit of the stored
     138       block header to be updated when a new stored block is added, for the case
     139       when the first stored block and the last stored block are the same.  (When
     140       they are different, the numbers of bits back is known to be eight.)  This
     141       also allows for new compressed data to be appended to the old compressed
     142       data in the compress operation, overwriting the previous first stored
     143       block, or for the compressed data to be terminated and a valid gzip file
     144       reconstructed on the off chance that a compression operation was
     145       interrupted and the data to compress in the foo.add file was deleted.
     146     - The operation in process.  This is the next two bits in the last byte (the
     147       bits under the mask 0x18).  The are interpreted as 0: nothing in process,
     148       1: append in process, 2: compress in process, 3: replace in process.
     149     - The top three bits of the last byte in the extra field are reserved and
     150       are currently set to zero.
     151  
     152     Main procedure:
     153     - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of
     154       the system open() call.  If the modify time of an existing lock file is
     155       more than PATIENCE seconds old, then the lock file is deleted and the
     156       exclusive create is retried.
     157     - Load the extra field from the foo.gz file, and see if an operation was in
     158       progress but not completed.  If so, apply the recovery procedure below.
     159     - Perform the append procedure with the provided data.
     160     - If the uncompressed data in the foo.gz file is 1MB or more, apply the
     161       compress procedure.
     162     - Delete the foo.lock file.
     163  
     164     Append procedure:
     165     - Put what to append in the foo.add file so that the operation can be
     166       restarted if this procedure is interrupted.
     167     - Mark the foo.gz extra field with the append operation in progress.
     168     + Restore the original last-block bit and stored block length of the last
     169       stored block from the information in the extra field, in case a previous
     170       append operation was interrupted.
     171     - Append the provided data to the last stored block, creating new stored
     172       blocks as needed and updating the stored blocks last-block bits and
     173       lengths.
     174     - Update the crc and length with the new data, and write the gzip trailer.
     175     - Write over the extra field (with a single write operation) with the new
     176       pointers, lengths, and crc's, and mark the gzip file as not in process.
     177       Though there is still a foo.add file, it will be ignored since nothing
     178       is in process.  If a foo.add file is leftover from a previously
     179       completed operation, it is truncated when writing new data to it.
     180     - Delete the foo.add file.
     181  
     182     Compress and replace procedures:
     183     - Read all of the uncompressed data in the stored blocks in foo.gz and write
     184       it to foo.add.  Also write foo.temp with the last 32K of that data to
     185       provide a dictionary for the next invocation of this procedure.
     186     - Rewrite the extra field marking foo.gz with a compression in process.
     187     * If there is no data provided to compress (due to a missing foo.add file
     188       when recovering), reconstruct and truncate the foo.gz file to contain
     189       only the previous compressed data and proceed to the step after the next
     190       one.  Otherwise ...
     191     - Compress the data with the dictionary in foo.dict, and write to the
     192       foo.gz file starting at the bit immediately following the last previously
     193       compressed block.  If there is no foo.dict, proceed anyway with the
     194       compression at slightly reduced efficiency.  (For the foo.dict file to be
     195       missing requires some external failure beyond simply the interruption of
     196       a compress operation.)  During this process, the foo.lock file is
     197       periodically touched to assure that that file is not considered stale by
     198       another process before we're done.  The deflation is terminated with a
     199       non-last empty static block (10 bits long), that is then located and
     200       written over by a last-bit-set empty stored block.
     201     - Append the crc and length of the data in the gzip file (previously
     202       calculated during the append operations).
     203     - Write over the extra field with the updated stored block offsets, bits
     204       back, crc's, and lengths, and mark foo.gz as in process for a replacement
     205       of the dictionary.
     206     @ Delete the foo.add file.
     207     - Replace foo.dict with foo.temp.
     208     - Write over the extra field, marking foo.gz as complete.
     209  
     210     Recovery procedure:
     211     - If not a replace recovery, read in the foo.add file, and provide that data
     212       to the appropriate recovery below.  If there is no foo.add file, provide
     213       a zero data length to the recovery.  In that case, the append recovery
     214       restores the foo.gz to the previous compressed + uncompressed data state.
     215       For the the compress recovery, a missing foo.add file results in foo.gz
     216       being restored to the previous compressed-only data state.
     217     - Append recovery:
     218       - Pick up append at + step above
     219     - Compress recovery:
     220       - Pick up compress at * step above
     221     - Replace recovery:
     222       - Pick up compress at @ step above
     223     - Log the repair with a date stamp in foo.repairs
     224   */
     225  
     226  #include <sys/types.h>
     227  #include <stdio.h>      /* rename, fopen, fprintf, fclose */
     228  #include <stdlib.h>     /* malloc, free */
     229  #include <string.h>     /* strlen, strrchr, strcpy, strncpy, strcmp */
     230  #include <fcntl.h>      /* open */
     231  #include <unistd.h>     /* lseek, read, write, close, unlink, sleep, */
     232                          /* ftruncate, fsync */
     233  #include <errno.h>      /* errno */
     234  #include <time.h>       /* time, ctime */
     235  #include <sys/stat.h>   /* stat */
     236  #include <sys/time.h>   /* utimes */
     237  #include "zlib.h"       /* crc32 */
     238  
     239  #include "gzlog.h"      /* header for external access */
     240  
     241  #define local static
     242  typedef unsigned int uint;
     243  typedef unsigned long ulong;
     244  
     245  /* Macro for debugging to deterministically force recovery operations */
     246  #ifdef GZLOG_DEBUG
     247      #include <setjmp.h>         /* longjmp */
     248      jmp_buf gzlog_jump;         /* where to go back to */
     249      int gzlog_bail = 0;         /* which point to bail at (1..8) */
     250      int gzlog_count = -1;       /* number of times through to wait */
     251  #   define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \
     252                              longjmp(gzlog_jump, gzlog_bail); } while (0)
     253  #else
     254  #   define BAIL(n)
     255  #endif
     256  
     257  /* how old the lock file can be in seconds before considering it stale */
     258  #define PATIENCE 300
     259  
     260  /* maximum stored block size in Kbytes -- must be in 1..63 */
     261  #define MAX_STORE 16
     262  
     263  /* number of stored Kbytes to trigger compression (must be >= 32 to allow
     264     dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to
     265     discard the stored block headers contribution of five bytes each) */
     266  #define TRIGGER 1024
     267  
     268  /* size of a deflate dictionary (this cannot be changed) */
     269  #define DICT 32768U
     270  
     271  /* values for the operation (2 bits) */
     272  #define NO_OP 0
     273  #define APPEND_OP 1
     274  #define COMPRESS_OP 2
     275  #define REPLACE_OP 3
     276  
     277  /* macros to extract little-endian integers from an unsigned byte buffer */
     278  #define PULL2(p) ((p)[0]+((uint)((p)[1])<<8))
     279  #define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16))
     280  #define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32))
     281  
     282  /* macros to store integers into a byte buffer in little-endian order */
     283  #define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0)
     284  #define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0)
     285  #define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0)
     286  
     287  /* internal structure for log information */
     288  #define LOGID "\106\035\172"    /* should be three non-zero characters */
     289  struct log {
     290      char id[4];     /* contains LOGID to detect inadvertent overwrites */
     291      int fd;         /* file descriptor for .gz file, opened read/write */
     292      char *path;     /* allocated path, e.g. "/var/log/foo" or "foo" */
     293      char *end;      /* end of path, for appending suffices such as ".gz" */
     294      off_t first;    /* offset of first stored block first length byte */
     295      int back;       /* location of first block id in bits back from first */
     296      uint stored;    /* bytes currently in last stored block */
     297      off_t last;     /* offset of last stored block first length byte */
     298      ulong ccrc;     /* crc of compressed data */
     299      ulong clen;     /* length (modulo 2^32) of compressed data */
     300      ulong tcrc;     /* crc of total data */
     301      ulong tlen;     /* length (modulo 2^32) of total data */
     302      time_t lock;    /* last modify time of our lock file */
     303  };
     304  
     305  /* gzip header for gzlog */
     306  local unsigned char log_gzhead[] = {
     307      0x1f, 0x8b,                 /* magic gzip id */
     308      8,                          /* compression method is deflate */
     309      4,                          /* there is an extra field (no file name) */
     310      0, 0, 0, 0,                 /* no modification time provided */
     311      0, 0xff,                    /* no extra flags, no OS specified */
     312      39, 0, 'a', 'p', 35, 0      /* extra field with "ap" subfield */
     313                                  /* 35 is EXTRA, 39 is EXTRA + 4 */
     314  };
     315  
     316  #define HEAD sizeof(log_gzhead)     /* should be 16 */
     317  
     318  /* initial gzip extra field content (52 == HEAD + EXTRA + 1) */
     319  local unsigned char log_gzext[] = {
     320      52, 0, 0, 0, 0, 0, 0, 0,    /* offset of first stored block length */
     321      52, 0, 0, 0, 0, 0, 0, 0,    /* offset of last stored block length */
     322      0, 0, 0, 0, 0, 0, 0, 0,     /* compressed data crc and length */
     323      0, 0, 0, 0, 0, 0, 0, 0,     /* total data crc and length */
     324      0, 0,                       /* final stored block data length */
     325      5                           /* op is NO_OP, last bit 8 bits back */
     326  };
     327  
     328  #define EXTRA sizeof(log_gzext)     /* should be 35 */
     329  
     330  /* initial gzip data and trailer */
     331  local unsigned char log_gzbody[] = {
     332      1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */
     333      0, 0, 0, 0,                 /* crc */
     334      0, 0, 0, 0                  /* uncompressed length */
     335  };
     336  
     337  #define BODY sizeof(log_gzbody)
     338  
     339  /* Exclusively create foo.lock in order to negotiate exclusive access to the
     340     foo.* files.  If the modify time of an existing lock file is greater than
     341     PATIENCE seconds in the past, then consider the lock file to have been
     342     abandoned, delete it, and try the exclusive create again.  Save the lock
     343     file modify time for verification of ownership.  Return 0 on success, or -1
     344     on failure, usually due to an access restriction or invalid path.  Note that
     345     if stat() or unlink() fails, it may be due to another process noticing the
     346     abandoned lock file a smidge sooner and deleting it, so those are not
     347     flagged as an error. */
     348  local int log_lock(struct log *log)
     349  {
     350      int fd;
     351      struct stat st;
     352  
     353      strcpy(log->end, ".lock");
     354      while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) {
     355          if (errno != EEXIST)
     356              return -1;
     357          if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) {
     358              unlink(log->path);
     359              continue;
     360          }
     361          sleep(2);       /* relinquish the CPU for two seconds while waiting */
     362      }
     363      close(fd);
     364      if (stat(log->path, &st) == 0)
     365          log->lock = st.st_mtime;
     366      return 0;
     367  }
     368  
     369  /* Update the modify time of the lock file to now, in order to prevent another
     370     task from thinking that the lock is stale.  Save the lock file modify time
     371     for verification of ownership. */
     372  local void log_touch(struct log *log)
     373  {
     374      struct stat st;
     375  
     376      strcpy(log->end, ".lock");
     377      utimes(log->path, NULL);
     378      if (stat(log->path, &st) == 0)
     379          log->lock = st.st_mtime;
     380  }
     381  
     382  /* Check the log file modify time against what is expected.  Return true if
     383     this is not our lock.  If it is our lock, touch it to keep it. */
     384  local int log_check(struct log *log)
     385  {
     386      struct stat st;
     387  
     388      strcpy(log->end, ".lock");
     389      if (stat(log->path, &st) || st.st_mtime != log->lock)
     390          return 1;
     391      log_touch(log);
     392      return 0;
     393  }
     394  
     395  /* Unlock a previously acquired lock, but only if it's ours. */
     396  local void log_unlock(struct log *log)
     397  {
     398      if (log_check(log))
     399          return;
     400      strcpy(log->end, ".lock");
     401      unlink(log->path);
     402      log->lock = 0;
     403  }
     404  
     405  /* Check the gzip header and read in the extra field, filling in the values in
     406     the log structure.  Return op on success or -1 if the gzip header was not as
     407     expected.  op is the current operation in progress last written to the extra
     408     field.  This assumes that the gzip file has already been opened, with the
     409     file descriptor log->fd. */
     410  local int log_head(struct log *log)
     411  {
     412      int op;
     413      unsigned char buf[HEAD + EXTRA];
     414  
     415      if (lseek(log->fd, 0, SEEK_SET) < 0 ||
     416          read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA ||
     417          memcmp(buf, log_gzhead, HEAD)) {
     418          return -1;
     419      }
     420      log->first = PULL8(buf + HEAD);
     421      log->last = PULL8(buf + HEAD + 8);
     422      log->ccrc = PULL4(buf + HEAD + 16);
     423      log->clen = PULL4(buf + HEAD + 20);
     424      log->tcrc = PULL4(buf + HEAD + 24);
     425      log->tlen = PULL4(buf + HEAD + 28);
     426      log->stored = PULL2(buf + HEAD + 32);
     427      log->back = 3 + (buf[HEAD + 34] & 7);
     428      op = (buf[HEAD + 34] >> 3) & 3;
     429      return op;
     430  }
     431  
     432  /* Write over the extra field contents, marking the operation as op.  Use fsync
     433     to assure that the device is written to, and in the requested order.  This
     434     operation, and only this operation, is assumed to be atomic in order to
     435     assure that the log is recoverable in the event of an interruption at any
     436     point in the process.  Return -1 if the write to foo.gz failed. */
     437  local int log_mark(struct log *log, int op)
     438  {
     439      int ret;
     440      unsigned char ext[EXTRA];
     441  
     442      PUT8(ext, log->first);
     443      PUT8(ext + 8, log->last);
     444      PUT4(ext + 16, log->ccrc);
     445      PUT4(ext + 20, log->clen);
     446      PUT4(ext + 24, log->tcrc);
     447      PUT4(ext + 28, log->tlen);
     448      PUT2(ext + 32, log->stored);
     449      ext[34] = log->back - 3 + (op << 3);
     450      fsync(log->fd);
     451      ret = lseek(log->fd, HEAD, SEEK_SET) < 0 ||
     452            write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0;
     453      fsync(log->fd);
     454      return ret;
     455  }
     456  
     457  /* Rewrite the last block header bits and subsequent zero bits to get to a byte
     458     boundary, setting the last block bit if last is true, and then write the
     459     remainder of the stored block header (length and one's complement).  Leave
     460     the file pointer after the end of the last stored block data.  Return -1 if
     461     there is a read or write failure on the foo.gz file */
     462  local int log_last(struct log *log, int last)
     463  {
     464      int back, len, mask;
     465      unsigned char buf[6];
     466  
     467      /* determine the locations of the bytes and bits to modify */
     468      back = log->last == log->first ? log->back : 8;
     469      len = back > 8 ? 2 : 1;                 /* bytes back from log->last */
     470      mask = 0x80 >> ((back - 1) & 7);        /* mask for block last-bit */
     471  
     472      /* get the byte to modify (one or two back) into buf[0] -- don't need to
     473         read the byte if the last-bit is eight bits back, since in that case
     474         the entire byte will be modified */
     475      buf[0] = 0;
     476      if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
     477                        read(log->fd, buf, 1) != 1))
     478          return -1;
     479  
     480      /* change the last-bit of the last stored block as requested -- note
     481         that all bits above the last-bit are set to zero, per the type bits
     482         of a stored block being 00 and per the convention that the bits to
     483         bring the stream to a byte boundary are also zeros */
     484      buf[1] = 0;
     485      buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0);
     486  
     487      /* write the modified stored block header and lengths, move the file
     488         pointer to after the last stored block data */
     489      PUT2(buf + 2, log->stored);
     490      PUT2(buf + 4, log->stored ^ 0xffff);
     491      return lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
     492             write(log->fd, buf + 2 - len, len + 4) != len + 4 ||
     493             lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0;
     494  }
     495  
     496  /* Append len bytes from data to the locked and open log file.  len may be zero
     497     if recovering and no .add file was found.  In that case, the previous state
     498     of the foo.gz file is restored.  The data is appended uncompressed in
     499     deflate stored blocks.  Return -1 if there was an error reading or writing
     500     the foo.gz file. */
     501  local int log_append(struct log *log, unsigned char *data, size_t len)
     502  {
     503      uint put;
     504      off_t end;
     505      unsigned char buf[8];
     506  
     507      /* set the last block last-bit and length, in case recovering an
     508         interrupted append, then position the file pointer to append to the
     509         block */
     510      if (log_last(log, 1))
     511          return -1;
     512  
     513      /* append, adding stored blocks and updating the offset of the last stored
     514         block as needed, and update the total crc and length */
     515      while (len) {
     516          /* append as much as we can to the last block */
     517          put = (MAX_STORE << 10) - log->stored;
     518          if (put > len)
     519              put = (uint)len;
     520          if (put) {
     521              if (write(log->fd, data, put) != put)
     522                  return -1;
     523              BAIL(1);
     524              log->tcrc = crc32(log->tcrc, data, put);
     525              log->tlen += put;
     526              log->stored += put;
     527              data += put;
     528              len -= put;
     529          }
     530  
     531          /* if we need to, add a new empty stored block */
     532          if (len) {
     533              /* mark current block as not last */
     534              if (log_last(log, 0))
     535                  return -1;
     536  
     537              /* point to new, empty stored block */
     538              log->last += 4 + log->stored + 1;
     539              log->stored = 0;
     540          }
     541  
     542          /* mark last block as last, update its length */
     543          if (log_last(log, 1))
     544              return -1;
     545          BAIL(2);
     546      }
     547  
     548      /* write the new crc and length trailer, and truncate just in case (could
     549         be recovering from partial append with a missing foo.add file) */
     550      PUT4(buf, log->tcrc);
     551      PUT4(buf + 4, log->tlen);
     552      if (write(log->fd, buf, 8) != 8 ||
     553          (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
     554          return -1;
     555  
     556      /* write the extra field, marking the log file as done, delete .add file */
     557      if (log_mark(log, NO_OP))
     558          return -1;
     559      strcpy(log->end, ".add");
     560      unlink(log->path);          /* ignore error, since may not exist */
     561      return 0;
     562  }
     563  
     564  /* Replace the foo.dict file with the foo.temp file.  Also delete the foo.add
     565     file, since the compress operation may have been interrupted before that was
     566     done.  Returns 1 if memory could not be allocated, or -1 if reading or
     567     writing foo.gz fails, or if the rename fails for some reason other than
     568     foo.temp not existing.  foo.temp not existing is a permitted error, since
     569     the replace operation may have been interrupted after the rename is done,
     570     but before foo.gz is marked as complete. */
     571  local int log_replace(struct log *log)
     572  {
     573      int ret;
     574      char *dest;
     575  
     576      /* delete foo.add file */
     577      strcpy(log->end, ".add");
     578      unlink(log->path);         /* ignore error, since may not exist */
     579      BAIL(3);
     580  
     581      /* rename foo.name to foo.dict, replacing foo.dict if it exists */
     582      strcpy(log->end, ".dict");
     583      dest = malloc(strlen(log->path) + 1);
     584      if (dest == NULL)
     585          return -2;
     586      strcpy(dest, log->path);
     587      strcpy(log->end, ".temp");
     588      ret = rename(log->path, dest);
     589      free(dest);
     590      if (ret && errno != ENOENT)
     591          return -1;
     592      BAIL(4);
     593  
     594      /* mark the foo.gz file as done */
     595      return log_mark(log, NO_OP);
     596  }
     597  
     598  /* Compress the len bytes at data and append the compressed data to the
     599     foo.gz deflate data immediately after the previous compressed data.  This
     600     overwrites the previous uncompressed data, which was stored in foo.add
     601     and is the data provided in data[0..len-1].  If this operation is
     602     interrupted, it picks up at the start of this routine, with the foo.add
     603     file read in again.  If there is no data to compress (len == 0), then we
     604     simply terminate the foo.gz file after the previously compressed data,
     605     appending a final empty stored block and the gzip trailer.  Return -1 if
     606     reading or writing the log.gz file failed, or -2 if there was a memory
     607     allocation failure. */
     608  local int log_compress(struct log *log, unsigned char *data, size_t len)
     609  {
     610      int fd;
     611      uint got, max;
     612      ssize_t dict;
     613      off_t end;
     614      z_stream strm;
     615      unsigned char buf[DICT];
     616  
     617      /* compress and append compressed data */
     618      if (len) {
     619          /* set up for deflate, allocating memory */
     620          strm.zalloc = Z_NULL;
     621          strm.zfree = Z_NULL;
     622          strm.opaque = Z_NULL;
     623          if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8,
     624                           Z_DEFAULT_STRATEGY) != Z_OK)
     625              return -2;
     626  
     627          /* read in dictionary (last 32K of data that was compressed) */
     628          strcpy(log->end, ".dict");
     629          fd = open(log->path, O_RDONLY, 0);
     630          if (fd >= 0) {
     631              dict = read(fd, buf, DICT);
     632              close(fd);
     633              if (dict < 0) {
     634                  deflateEnd(&strm);
     635                  return -1;
     636              }
     637              if (dict)
     638                  deflateSetDictionary(&strm, buf, (uint)dict);
     639          }
     640          log_touch(log);
     641  
     642          /* prime deflate with last bits of previous block, position write
     643             pointer to write those bits and overwrite what follows */
     644          if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1),
     645                  SEEK_SET) < 0 ||
     646              read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) {
     647              deflateEnd(&strm);
     648              return -1;
     649          }
     650          deflatePrime(&strm, (8 - log->back) & 7, *buf);
     651  
     652          /* compress, finishing with a partial non-last empty static block */
     653          strm.next_in = data;
     654          max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */
     655          do {
     656              strm.avail_in = len > max ? max : (uint)len;
     657              len -= strm.avail_in;
     658              do {
     659                  strm.avail_out = DICT;
     660                  strm.next_out = buf;
     661                  deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH);
     662                  got = DICT - strm.avail_out;
     663                  if (got && write(log->fd, buf, got) != got) {
     664                      deflateEnd(&strm);
     665                      return -1;
     666                  }
     667                  log_touch(log);
     668              } while (strm.avail_out == 0);
     669          } while (len);
     670          deflateEnd(&strm);
     671          BAIL(5);
     672  
     673          /* find start of empty static block -- scanning backwards the first one
     674             bit is the second bit of the block, if the last byte is zero, then
     675             we know the byte before that has a one in the top bit, since an
     676             empty static block is ten bits long */
     677          if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 ||
     678              read(log->fd, buf, 1) != 1)
     679              return -1;
     680          log->first++;
     681          if (*buf) {
     682              log->back = 1;
     683              while ((*buf & ((uint)1 << (8 - log->back++))) == 0)
     684                  ;       /* guaranteed to terminate, since *buf != 0 */
     685          }
     686          else
     687              log->back = 10;
     688  
     689          /* update compressed crc and length */
     690          log->ccrc = log->tcrc;
     691          log->clen = log->tlen;
     692      }
     693      else {
     694          /* no data to compress -- fix up existing gzip stream */
     695          log->tcrc = log->ccrc;
     696          log->tlen = log->clen;
     697      }
     698  
     699      /* complete and truncate gzip stream */
     700      log->last = log->first;
     701      log->stored = 0;
     702      PUT4(buf, log->tcrc);
     703      PUT4(buf + 4, log->tlen);
     704      if (log_last(log, 1) || write(log->fd, buf, 8) != 8 ||
     705          (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
     706          return -1;
     707      BAIL(6);
     708  
     709      /* mark as being in the replace operation */
     710      if (log_mark(log, REPLACE_OP))
     711          return -1;
     712  
     713      /* execute the replace operation and mark the file as done */
     714      return log_replace(log);
     715  }
     716  
     717  /* log a repair record to the .repairs file */
     718  local void log_log(struct log *log, int op, char *record)
     719  {
     720      time_t now;
     721      FILE *rec;
     722  
     723      now = time(NULL);
     724      strcpy(log->end, ".repairs");
     725      rec = fopen(log->path, "a");
     726      if (rec == NULL)
     727          return;
     728      fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ?
     729              "append" : (op == COMPRESS_OP ? "compress" : "replace"), record);
     730      fclose(rec);
     731      return;
     732  }
     733  
     734  /* Recover the interrupted operation op.  First read foo.add for recovering an
     735     append or compress operation.  Return -1 if there was an error reading or
     736     writing foo.gz or reading an existing foo.add, or -2 if there was a memory
     737     allocation failure. */
     738  local int log_recover(struct log *log, int op)
     739  {
     740      int fd, ret = 0;
     741      unsigned char *data = NULL;
     742      size_t len = 0;
     743      struct stat st;
     744  
     745      /* log recovery */
     746      log_log(log, op, "start");
     747  
     748      /* load foo.add file if expected and present */
     749      if (op == APPEND_OP || op == COMPRESS_OP) {
     750          strcpy(log->end, ".add");
     751          if (stat(log->path, &st) == 0 && st.st_size) {
     752              len = (size_t)(st.st_size);
     753              if ((off_t)len != st.st_size ||
     754                      (data = malloc(st.st_size)) == NULL) {
     755                  log_log(log, op, "allocation failure");
     756                  return -2;
     757              }
     758              if ((fd = open(log->path, O_RDONLY, 0)) < 0) {
     759                  free(data);
     760                  log_log(log, op, ".add file read failure");
     761                  return -1;
     762              }
     763              ret = (size_t)read(fd, data, len) != len;
     764              close(fd);
     765              if (ret) {
     766                  free(data);
     767                  log_log(log, op, ".add file read failure");
     768                  return -1;
     769              }
     770              log_log(log, op, "loaded .add file");
     771          }
     772          else
     773              log_log(log, op, "missing .add file!");
     774      }
     775  
     776      /* recover the interrupted operation */
     777      switch (op) {
     778      case APPEND_OP:
     779          ret = log_append(log, data, len);
     780          break;
     781      case COMPRESS_OP:
     782          ret = log_compress(log, data, len);
     783          break;
     784      case REPLACE_OP:
     785          ret = log_replace(log);
     786      }
     787  
     788      /* log status */
     789      log_log(log, op, ret ? "failure" : "complete");
     790  
     791      /* clean up */
     792      if (data != NULL)
     793          free(data);
     794      return ret;
     795  }
     796  
     797  /* Close the foo.gz file (if open) and release the lock. */
     798  local void log_close(struct log *log)
     799  {
     800      if (log->fd >= 0)
     801          close(log->fd);
     802      log->fd = -1;
     803      log_unlock(log);
     804  }
     805  
     806  /* Open foo.gz, verify the header, and load the extra field contents, after
     807     first creating the foo.lock file to gain exclusive access to the foo.*
     808     files.  If foo.gz does not exist or is empty, then write the initial header,
     809     extra, and body content of an empty foo.gz log file.  If there is an error
     810     creating the lock file due to access restrictions, or an error reading or
     811     writing the foo.gz file, or if the foo.gz file is not a proper log file for
     812     this object (e.g. not a gzip file or does not contain the expected extra
     813     field), then return true.  If there is an error, the lock is released.
     814     Otherwise, the lock is left in place. */
     815  local int log_open(struct log *log)
     816  {
     817      int op;
     818  
     819      /* release open file resource if left over -- can occur if lock lost
     820         between gzlog_open() and gzlog_write() */
     821      if (log->fd >= 0)
     822          close(log->fd);
     823      log->fd = -1;
     824  
     825      /* negotiate exclusive access */
     826      if (log_lock(log) < 0)
     827          return -1;
     828  
     829      /* open the log file, foo.gz */
     830      strcpy(log->end, ".gz");
     831      log->fd = open(log->path, O_RDWR | O_CREAT, 0644);
     832      if (log->fd < 0) {
     833          log_close(log);
     834          return -1;
     835      }
     836  
     837      /* if new, initialize foo.gz with an empty log, delete old dictionary */
     838      if (lseek(log->fd, 0, SEEK_END) == 0) {
     839          if (write(log->fd, log_gzhead, HEAD) != HEAD ||
     840              write(log->fd, log_gzext, EXTRA) != EXTRA ||
     841              write(log->fd, log_gzbody, BODY) != BODY) {
     842              log_close(log);
     843              return -1;
     844          }
     845          strcpy(log->end, ".dict");
     846          unlink(log->path);
     847      }
     848  
     849      /* verify log file and load extra field information */
     850      if ((op = log_head(log)) < 0) {
     851          log_close(log);
     852          return -1;
     853      }
     854  
     855      /* check for interrupted process and if so, recover */
     856      if (op != NO_OP && log_recover(log, op)) {
     857          log_close(log);
     858          return -1;
     859      }
     860  
     861      /* touch the lock file to prevent another process from grabbing it */
     862      log_touch(log);
     863      return 0;
     864  }
     865  
     866  /* See gzlog.h for the description of the external methods below */
     867  gzlog *gzlog_open(char *path)
     868  {
     869      size_t n;
     870      struct log *log;
     871  
     872      /* check arguments */
     873      if (path == NULL || *path == 0)
     874          return NULL;
     875  
     876      /* allocate and initialize log structure */
     877      log = malloc(sizeof(struct log));
     878      if (log == NULL)
     879          return NULL;
     880      strcpy(log->id, LOGID);
     881      log->fd = -1;
     882  
     883      /* save path and end of path for name construction */
     884      n = strlen(path);
     885      log->path = malloc(n + 9);              /* allow for ".repairs" */
     886      if (log->path == NULL) {
     887          free(log);
     888          return NULL;
     889      }
     890      strcpy(log->path, path);
     891      log->end = log->path + n;
     892  
     893      /* gain exclusive access and verify log file -- may perform a
     894         recovery operation if needed */
     895      if (log_open(log)) {
     896          free(log->path);
     897          free(log);
     898          return NULL;
     899      }
     900  
     901      /* return pointer to log structure */
     902      return log;
     903  }
     904  
     905  /* gzlog_compress() return values:
     906      0: all good
     907     -1: file i/o error (usually access issue)
     908     -2: memory allocation failure
     909     -3: invalid log pointer argument */
     910  int gzlog_compress(gzlog *logd)
     911  {
     912      int fd, ret;
     913      uint block;
     914      size_t len, next;
     915      unsigned char *data, buf[5];
     916      struct log *log = logd;
     917  
     918      /* check arguments */
     919      if (log == NULL || strcmp(log->id, LOGID))
     920          return -3;
     921  
     922      /* see if we lost the lock -- if so get it again and reload the extra
     923         field information (it probably changed), recover last operation if
     924         necessary */
     925      if (log_check(log) && log_open(log))
     926          return -1;
     927  
     928      /* create space for uncompressed data */
     929      len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) +
     930            log->stored;
     931      if ((data = malloc(len)) == NULL)
     932          return -2;
     933  
     934      /* do statement here is just a cheap trick for error handling */
     935      do {
     936          /* read in the uncompressed data */
     937          if (lseek(log->fd, log->first - 1, SEEK_SET) < 0)
     938              break;
     939          next = 0;
     940          while (next < len) {
     941              if (read(log->fd, buf, 5) != 5)
     942                  break;
     943              block = PULL2(buf + 1);
     944              if (next + block > len ||
     945                  read(log->fd, (char *)data + next, block) != block)
     946                  break;
     947              next += block;
     948          }
     949          if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored)
     950              break;
     951          log_touch(log);
     952  
     953          /* write the uncompressed data to the .add file */
     954          strcpy(log->end, ".add");
     955          fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
     956          if (fd < 0)
     957              break;
     958          ret = (size_t)write(fd, data, len) != len;
     959          if (ret | close(fd))
     960              break;
     961          log_touch(log);
     962  
     963          /* write the dictionary for the next compress to the .temp file */
     964          strcpy(log->end, ".temp");
     965          fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
     966          if (fd < 0)
     967              break;
     968          next = DICT > len ? len : DICT;
     969          ret = (size_t)write(fd, (char *)data + len - next, next) != next;
     970          if (ret | close(fd))
     971              break;
     972          log_touch(log);
     973  
     974          /* roll back to compressed data, mark the compress in progress */
     975          log->last = log->first;
     976          log->stored = 0;
     977          if (log_mark(log, COMPRESS_OP))
     978              break;
     979          BAIL(7);
     980  
     981          /* compress and append the data (clears mark) */
     982          ret = log_compress(log, data, len);
     983          free(data);
     984          return ret;
     985      } while (0);
     986  
     987      /* broke out of do above on i/o error */
     988      free(data);
     989      return -1;
     990  }
     991  
     992  /* gzlog_write() return values:
     993      0: all good
     994     -1: file i/o error (usually access issue)
     995     -2: memory allocation failure
     996     -3: invalid log pointer argument */
     997  int gzlog_write(gzlog *logd, void *data, size_t len)
     998  {
     999      int fd, ret;
    1000      struct log *log = logd;
    1001  
    1002      /* check arguments */
    1003      if (log == NULL || strcmp(log->id, LOGID))
    1004          return -3;
    1005      if (data == NULL || len <= 0)
    1006          return 0;
    1007  
    1008      /* see if we lost the lock -- if so get it again and reload the extra
    1009         field information (it probably changed), recover last operation if
    1010         necessary */
    1011      if (log_check(log) && log_open(log))
    1012          return -1;
    1013  
    1014      /* create and write .add file */
    1015      strcpy(log->end, ".add");
    1016      fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    1017      if (fd < 0)
    1018          return -1;
    1019      ret = (size_t)write(fd, data, len) != len;
    1020      if (ret | close(fd))
    1021          return -1;
    1022      log_touch(log);
    1023  
    1024      /* mark log file with append in progress */
    1025      if (log_mark(log, APPEND_OP))
    1026          return -1;
    1027      BAIL(8);
    1028  
    1029      /* append data (clears mark) */
    1030      if (log_append(log, data, len))
    1031          return -1;
    1032  
    1033      /* check to see if it's time to compress -- if not, then done */
    1034      if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER)
    1035          return 0;
    1036  
    1037      /* time to compress */
    1038      return gzlog_compress(log);
    1039  }
    1040  
    1041  /* gzlog_close() return values:
    1042      0: ok
    1043     -3: invalid log pointer argument */
    1044  int gzlog_close(gzlog *logd)
    1045  {
    1046      struct log *log = logd;
    1047  
    1048      /* check arguments */
    1049      if (log == NULL || strcmp(log->id, LOGID))
    1050          return -3;
    1051  
    1052      /* close the log file and release the lock */
    1053      log_close(log);
    1054  
    1055      /* free structure and return */
    1056      if (log->path != NULL)
    1057          free(log->path);
    1058      strcpy(log->id, "bad");
    1059      free(log);
    1060      return 0;
    1061  }