(root)/
gzip-1.13/
deflate.c
       1  /* deflate.c -- compress data using the deflation algorithm
       2  
       3     Copyright (C) 1999, 2006, 2009-2023 Free Software Foundation, Inc.
       4     Copyright (C) 1992-1993 Jean-loup Gailly
       5  
       6     This program is free software; you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3, or (at your option)
       9     any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program; if not, write to the Free Software Foundation,
      18     Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
      19  
      20  /*
      21   *  PURPOSE
      22   *
      23   *      Identify new text as repetitions of old text within a fixed-
      24   *      length sliding window trailing behind the new text.
      25   *
      26   *  DISCUSSION
      27   *
      28   *      The "deflation" process depends on being able to identify portions
      29   *      of the input text which are identical to earlier input (within a
      30   *      sliding window trailing behind the input currently being processed).
      31   *
      32   *      The most straightforward technique turns out to be the fastest for
      33   *      most input files: try all possible matches and select the longest.
      34   *      The key feature of this algorithm is that insertions into the string
      35   *      dictionary are very simple and thus fast, and deletions are avoided
      36   *      completely. Insertions are performed at each input character, whereas
      37   *      string matches are performed only when the previous match ends. So it
      38   *      is preferable to spend more time in matches to allow very fast string
      39   *      insertions and avoid deletions. The matching algorithm for small
      40   *      strings is inspired from that of Rabin & Karp. A brute force approach
      41   *      is used to find longer strings when a small match has been found.
      42   *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
      43   *      (by Leonid Broukhis).
      44   *         A previous version of this file used a more sophisticated algorithm
      45   *      (by Fiala and Greene) which is guaranteed to run in linear amortized
      46   *      time, but has a larger average cost, uses more memory and is patented.
      47   *      However the F&G algorithm may be faster for some highly redundant
      48   *      files if the parameter max_chain_length (described below) is too large.
      49   *
      50   *  ACKNOWLEDGEMENTS
      51   *
      52   *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
      53   *      I found it in 'freeze' written by Leonid Broukhis.
      54   *      Thanks to many info-zippers for bug reports and testing.
      55   *
      56   *  REFERENCES
      57   *
      58   *      APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
      59   *
      60   *      A description of the Rabin and Karp algorithm is given in the book
      61   *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
      62   *
      63   *      Fiala,E.R., and Greene,D.H.
      64   *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
      65   *
      66   *  INTERFACE
      67   *
      68   *      void lm_init (int pack_level, ush *flags)
      69   *          Initialize the "longest match" routines for a new file
      70   *
      71   *      off_t deflate ()
      72   *          Processes a new input file and return its compressed length. Sets
      73   *          the compressed length, crc, deflate flags and internal file
      74   *          attributes.
      75   */
      76  
      77  #include <config.h>
      78  #include <stdio.h>
      79  
      80  #include "tailor.h"
      81  #include "gzip.h"
      82  #include "lzw.h" /* just for consistency checking */
      83  #include "verify.h"
      84  
      85  /* ===========================================================================
      86   * Configuration parameters
      87   */
      88  
      89  /* Compile with MEDIUM_MEM to reduce the memory requirements or
      90   * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
      91   * entire input file can be held in memory (not possible on 16 bit systems).
      92   * Warning: defining these symbols affects HASH_BITS (see below) and thus
      93   * affects the compression ratio. The compressed output
      94   * is still correct, and might even be smaller in some cases.
      95   */
      96  
      97  #ifdef SMALL_MEM
      98  #   define HASH_BITS  13  /* Number of bits used to hash strings */
      99  #endif
     100  #ifdef MEDIUM_MEM
     101  #   define HASH_BITS  14
     102  #endif
     103  #ifndef HASH_BITS
     104  #   define HASH_BITS  15
     105     /* For portability to 16 bit machines, do not use values above 15. */
     106  #endif
     107  
     108  /* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
     109   * window with tab_suffix. Check that we can do this:
     110   */
     111  #if (WSIZE<<1) > (1<<BITS)
     112     error: cannot overlay window with tab_suffix and prev with tab_prefix0
     113  #endif
     114  #if HASH_BITS > BITS-1
     115     error: cannot overlay head with tab_prefix1
     116  #endif
     117  
     118  #define HASH_SIZE (unsigned)(1<<HASH_BITS)
     119  #define HASH_MASK (HASH_SIZE-1)
     120  #define WMASK     (WSIZE-1)
     121  /* HASH_SIZE and WSIZE must be powers of two */
     122  
     123  #define NIL 0
     124  /* Tail of hash chains */
     125  
     126  #ifndef TOO_FAR
     127  #  define TOO_FAR 4096
     128  #endif
     129  /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
     130  
     131  #ifndef RSYNC_WIN
     132  #  define RSYNC_WIN 4096
     133  #endif
     134  verify(RSYNC_WIN < MAX_DIST);
     135  
     136  #define RSYNC_SUM_MATCH(sum) ((sum) % RSYNC_WIN == 0)
     137  /* Whether window sum matches magic value */
     138  
     139  /* ===========================================================================
     140   * Local data used by the "longest match" routines.
     141   */
     142  
     143  typedef ush Pos;
     144  typedef unsigned IPos;
     145  /* A Pos is an index in the character window. We use short instead of int to
     146   * save space in the various tables. IPos is used only for parameter passing.
     147   */
     148  
     149  /* DECLARE(uch, window, 2L*WSIZE); */
     150  /* Sliding window. Input bytes are read into the second half of the window,
     151   * and move to the first half later to keep a dictionary of at least WSIZE
     152   * bytes. With this organization, matches are limited to a distance of
     153   * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
     154   * performed with a length multiple of the block size. Also, it limits
     155   * the window size to 64K, which is quite useful on MSDOS.
     156   * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
     157   * be less efficient).
     158   */
     159  
     160  /* DECLARE(Pos, prev, WSIZE); */
     161  /* Link to older string with same hash index. To limit the size of this
     162   * array to 64K, this link is maintained only for the last 32K strings.
     163   * An index in this array is thus a window index modulo 32K.
     164   */
     165  
     166  /* DECLARE(Pos, head, 1<<HASH_BITS); */
     167  /* Heads of the hash chains or NIL. */
     168  
     169  static ulg window_size = (ulg)2*WSIZE;
     170  /* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
     171   * input file length plus MIN_LOOKAHEAD.
     172   */
     173  
     174  long block_start;
     175  /* window position at the beginning of the current output block. Gets
     176   * negative when the window is moved backwards.
     177   */
     178  
     179  static unsigned ins_h;  /* hash index of string to be inserted */
     180  
     181  #define H_SHIFT  ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
     182  /* Number of bits by which ins_h and del_h must be shifted at each
     183   * input step. It must be such that after MIN_MATCH steps, the oldest
     184   * byte no longer takes part in the hash key, that is:
     185   *   H_SHIFT * MIN_MATCH >= HASH_BITS
     186   */
     187  
     188         unsigned int near prev_length;
     189  /* Length of the best match at previous step. Matches not greater than this
     190   * are discarded. This is used in the lazy match evaluation.
     191   */
     192  
     193        unsigned near strstart;      /* start of string to insert */
     194        unsigned near match_start;   /* start of matching string */
     195  static int eofile;		   /* flag set at end of input file */
     196  static unsigned lookahead;	   /* number of valid bytes ahead in window */
     197  
     198         unsigned max_chain_length;
     199  /* To speed up deflation, hash chains are never searched beyond this length.
     200   * A higher limit improves compression ratio but degrades the speed.
     201   */
     202  
     203  static unsigned int max_lazy_match;
     204  /* Attempt to find a better match only when the current match is strictly
     205   * smaller than this value. This mechanism is used only for compression
     206   * levels >= 4.
     207   */
     208  #define max_insert_length  max_lazy_match
     209  /* Insert new strings in the hash table only if the match length
     210   * is not greater than this length. This saves time but degrades compression.
     211   * max_insert_length is used only for compression levels <= 3.
     212   */
     213  
     214  unsigned good_match;
     215  /* Use a faster search when the previous match is longer than this */
     216  
     217  static ulg rsync_sum;  /* rolling sum of rsync window */
     218  static ulg rsync_chunk_end; /* next rsync sequence point */
     219  
     220  /* Values for max_lazy_match, good_match and max_chain_length, depending on
     221   * the desired pack level (0..9). The values given below have been tuned to
     222   * exclude worst case performance for pathological files. Better values may be
     223   * found for specific files.
     224   */
     225  
     226  typedef struct config {
     227     ush good_length; /* reduce lazy search above this match length */
     228     ush max_lazy;    /* do not perform lazy search above this match length */
     229     ush nice_length; /* quit search above this match length */
     230     ush max_chain;
     231  } config;
     232  
     233  #ifdef ASMV
     234  # define static_unless_ASMV
     235  #else
     236  # define static_unless_ASMV static
     237  #endif
     238  
     239  #ifdef  FULL_SEARCH
     240  # define nice_match MAX_MATCH
     241  #else
     242    /* Stop searching when current match exceeds this */
     243    static_unless_ASMV int nice_match;
     244  #endif
     245  
     246  static config configuration_table[10] = {
     247  /*      good lazy nice chain */
     248  /* 0 */ {0,    0,  0,    0},  /* store only */
     249  /* 1 */ {4,    4,  8,    4},  /* maximum speed, no lazy matches */
     250  /* 2 */ {4,    5, 16,    8},
     251  /* 3 */ {4,    6, 32,   32},
     252  
     253  /* 4 */ {4,    4, 16,   16},  /* lazy matches */
     254  /* 5 */ {8,   16, 32,   32},
     255  /* 6 */ {8,   16, 128, 128},
     256  /* 7 */ {8,   32, 128, 256},
     257  /* 8 */ {32, 128, 258, 1024},
     258  /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
     259  
     260  /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
     261   * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
     262   * meaning.
     263   */
     264  
     265  /* ===========================================================================
     266   *  Prototypes for local functions.
     267   */
     268  static void fill_window (void);
     269  static off_t deflate_fast (void);
     270  
     271  #ifdef ASMV
     272        int  longest_match (IPos cur_match);
     273        void match_init (void); /* asm code initialization */
     274  #endif
     275  
     276  #ifdef DEBUG
     277  static void check_match (IPos start, IPos match, int length);
     278  #endif
     279  
     280  /* ===========================================================================
     281   * Update a hash value with the given input byte
     282   * IN  assertion: all calls to UPDATE_HASH are made with consecutive
     283   *    input characters, so that a running hash key can be computed from the
     284   *    previous key instead of complete recalculation each time.
     285   */
     286  #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
     287  
     288  /* ===========================================================================
     289   * Insert string s in the dictionary and set match_head to the previous head
     290   * of the hash chain (the most recent string with same hash key). Return
     291   * the previous length of the hash chain.
     292   * IN  assertion: all calls to INSERT_STRING are made with consecutive
     293   *    input characters and the first MIN_MATCH bytes of s are valid
     294   *    (except for the last MIN_MATCH-1 bytes of the input file).
     295   */
     296  #define INSERT_STRING(s, match_head) \
     297     (UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]), \
     298      prev[(s) & WMASK] = match_head = head[ins_h], \
     299      head[ins_h] = (s))
     300  
     301  /* ===========================================================================
     302   * Initialize the "longest match" routines for a new file
     303   * PACK_LEVEL values: 0: store, 1: best speed, 9: best compression
     304   */
     305  static void
     306  lm_init (int pack_level)
     307  {
     308      register unsigned j;
     309  
     310      if (pack_level < 1 || pack_level > 9) gzip_error ("bad pack level");
     311  
     312      /* Initialize the hash table. */
     313  #if defined MAXSEG_64K && HASH_BITS == 15
     314      for (j = 0;  j < HASH_SIZE; j++) head[j] = NIL;
     315  #else
     316      memzero((char*)head, HASH_SIZE*sizeof(*head));
     317  #endif
     318      /* prev will be initialized on the fly */
     319  
     320      /* rsync params */
     321      rsync_chunk_end = 0xFFFFFFFFUL;
     322      rsync_sum = 0;
     323  
     324      /* Set the default configuration parameters:
     325       */
     326      max_lazy_match   = configuration_table[pack_level].max_lazy;
     327      good_match       = configuration_table[pack_level].good_length;
     328  #ifndef FULL_SEARCH
     329      nice_match       = configuration_table[pack_level].nice_length;
     330  #endif
     331      max_chain_length = configuration_table[pack_level].max_chain;
     332      /* ??? reduce max_chain_length for binary files */
     333  
     334      strstart = 0;
     335      block_start = 0L;
     336  #ifdef ASMV
     337      match_init(); /* initialize the asm code */
     338  #endif
     339  
     340      lookahead = read_buf((char*)window,
     341                           sizeof(int) <= 2 ? (unsigned)WSIZE : 2*WSIZE);
     342  
     343      if (lookahead == 0 || lookahead == (unsigned)EOF) {
     344         eofile = 1, lookahead = 0;
     345         return;
     346      }
     347      eofile = 0;
     348      /* Make sure that we always have enough lookahead. This is important
     349       * if input comes from a device such as a tty.
     350       */
     351      while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
     352  
     353      ins_h = 0;
     354      for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, window[j]);
     355      /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
     356       * not important since only literal bytes will be emitted.
     357       */
     358  }
     359  
     360  /* ===========================================================================
     361   * Set match_start to the longest match starting at the given string and
     362   * return its length. Matches shorter or equal to prev_length are discarded,
     363   * in which case the result is equal to prev_length and match_start is
     364   * garbage.
     365   * IN assertions: cur_match is the head of the hash chain for the current
     366   *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
     367   */
     368  #ifndef ASMV
     369  /* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm or
     370   * match.s. The code is functionally equivalent, so you can use the C version
     371   * if desired.
     372   */
     373  static int
     374  longest_match(IPos cur_match)
     375  {
     376      unsigned chain_length = max_chain_length;   /* max hash chain length */
     377      register uch *scan = window + strstart;     /* current string */
     378      register uch *match;                        /* matched string */
     379      register int len;                           /* length of current match */
     380      int best_len = prev_length;                 /* best match length so far */
     381      IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL;
     382      /* Stop when cur_match becomes <= limit. To simplify the code,
     383       * we prevent matches with the string of window index 0.
     384       */
     385  
     386  /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
     387   * It is easy to get rid of this optimization if necessary.
     388   */
     389  #if HASH_BITS < 8 || MAX_MATCH != 258
     390     error: Code too clever
     391  #endif
     392  
     393  #ifdef UNALIGNED_OK
     394      /* Compare two bytes at a time. Note: this is not always beneficial.
     395       * Try with and without -DUNALIGNED_OK to check.
     396       */
     397      register uch *strend = window + strstart + MAX_MATCH - 1;
     398      register ush scan_start = *(ush*)scan;
     399      register ush scan_end   = *(ush*)(scan+best_len-1);
     400  #else
     401      register uch *strend = window + strstart + MAX_MATCH;
     402      register uch scan_end1  = scan[best_len-1];
     403      register uch scan_end   = scan[best_len];
     404  #endif
     405  
     406      /* Do not waste too much time if we already have a good match: */
     407      if (prev_length >= good_match) {
     408          chain_length >>= 2;
     409      }
     410      Assert(strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead");
     411  
     412      do {
     413          Assert(cur_match < strstart, "no future");
     414          match = window + cur_match;
     415  
     416          /* Skip to next match if the match length cannot increase
     417           * or if the match length is less than 2:
     418           */
     419  #if defined UNALIGNED_OK && MAX_MATCH == 258
     420          /* This code assumes sizeof(unsigned short) == 2. Do not use
     421           * UNALIGNED_OK if your compiler uses a different size.
     422           */
     423          if (*(ush*)(match+best_len-1) != scan_end ||
     424              *(ush*)match != scan_start) continue;
     425  
     426          /* It is not necessary to compare scan[2] and match[2] since they are
     427           * always equal when the other bytes match, given that the hash keys
     428           * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
     429           * strstart+3, +5, ... up to strstart+257. We check for insufficient
     430           * lookahead only every 4th comparison; the 128th check will be made
     431           * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
     432           * necessary to put more guard bytes at the end of the window, or
     433           * to check more often for insufficient lookahead.
     434           */
     435          scan++, match++;
     436          do {
     437          } while (*(ush*)(scan+=2) == *(ush*)(match+=2) &&
     438                   *(ush*)(scan+=2) == *(ush*)(match+=2) &&
     439                   *(ush*)(scan+=2) == *(ush*)(match+=2) &&
     440                   *(ush*)(scan+=2) == *(ush*)(match+=2) &&
     441                   scan < strend);
     442          /* The funny "do {}" generates better code on most compilers */
     443  
     444          /* Here, scan <= window+strstart+257 */
     445          Assert(scan <= window+(unsigned)(window_size-1), "wild scan");
     446          if (*scan == *match) scan++;
     447  
     448          len = (MAX_MATCH - 1) - (int)(strend-scan);
     449          scan = strend - (MAX_MATCH-1);
     450  
     451  #else /* UNALIGNED_OK */
     452  
     453          if (match[best_len]   != scan_end  ||
     454              match[best_len-1] != scan_end1 ||
     455              *match            != *scan     ||
     456              *++match          != scan[1])      continue;
     457  
     458          /* The check at best_len-1 can be removed because it will be made
     459           * again later. (This heuristic is not always a win.)
     460           * It is not necessary to compare scan[2] and match[2] since they
     461           * are always equal when the other bytes match, given that
     462           * the hash keys are equal and that HASH_BITS >= 8.
     463           */
     464          scan += 2, match++;
     465  
     466          /* We check for insufficient lookahead only every 8th comparison;
     467           * the 256th check will be made at strstart+258.
     468           */
     469          do {
     470          } while (*++scan == *++match && *++scan == *++match &&
     471                   *++scan == *++match && *++scan == *++match &&
     472                   *++scan == *++match && *++scan == *++match &&
     473                   *++scan == *++match && *++scan == *++match &&
     474                   scan < strend);
     475  
     476          len = MAX_MATCH - (int)(strend - scan);
     477          scan = strend - MAX_MATCH;
     478  
     479  #endif /* UNALIGNED_OK */
     480  
     481          if (len > best_len) {
     482              match_start = cur_match;
     483              best_len = len;
     484              if (len >= nice_match) break;
     485  #ifdef UNALIGNED_OK
     486              scan_end = *(ush*)(scan+best_len-1);
     487  #else
     488              scan_end1  = scan[best_len-1];
     489              scan_end   = scan[best_len];
     490  #endif
     491          }
     492      } while ((cur_match = prev[cur_match & WMASK]) > limit
     493               && --chain_length != 0);
     494  
     495      return best_len;
     496  }
     497  #endif /* ASMV */
     498  
     499  #ifdef DEBUG
     500  /* ===========================================================================
     501   * Check that the match at match_start is indeed a match.
     502   */
     503  static void
     504  check_match (IPos start, IPos match, int length)
     505  {
     506      /* check that the match is indeed a match */
     507      if (memcmp((char*)window + match,
     508                  (char*)window + start, length) != 0) {
     509          fprintf(stderr,
     510              " start %u, match %u, length %d\n",
     511              start, match, length);
     512          gzip_error ("invalid match");
     513      }
     514      if (verbose > 1) {
     515          fprintf (stderr, "\\[%u,%d]", start - match, length);
     516          do { putc(window[start++], stderr); } while (--length != 0);
     517      }
     518  }
     519  #else
     520  #  define check_match(start, match, length)
     521  #endif
     522  
     523  /* ===========================================================================
     524   * Fill the window when the lookahead becomes insufficient.
     525   * Updates strstart and lookahead, and sets eofile if end of input file.
     526   * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
     527   * OUT assertions: at least one byte has been read, or eofile is set;
     528   *    file reads are performed for at least two bytes (required for the
     529   *    translate_eol option).
     530   */
     531  static void
     532  fill_window ()
     533  {
     534      register unsigned n, m;
     535      unsigned more = (unsigned)(window_size - (ulg)lookahead - (ulg)strstart);
     536      /* Amount of free space at the end of the window. */
     537  
     538      /* If the window is almost full and there is insufficient lookahead,
     539       * move the upper half to the lower one to make room in the upper half.
     540       */
     541      if (more == (unsigned)EOF) {
     542          /* Very unlikely, but possible on 16 bit machine if strstart == 0
     543           * and lookahead == 1 (input done one byte at time)
     544           */
     545          more--;
     546      } else if (strstart >= WSIZE+MAX_DIST) {
     547          /* By the IN assertion, the window is not empty so we can't confuse
     548           * more == 0 with more == 64K on a 16 bit machine.
     549           */
     550          Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM");
     551  
     552          memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
     553          match_start -= WSIZE;
     554          strstart    -= WSIZE; /* we now have strstart >= MAX_DIST: */
     555          if (rsync_chunk_end != 0xFFFFFFFFUL)
     556              rsync_chunk_end -= WSIZE;
     557  
     558          block_start -= (long) WSIZE;
     559  
     560          for (n = 0; n < HASH_SIZE; n++) {
     561              m = head[n];
     562              head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
     563          }
     564          for (n = 0; n < WSIZE; n++) {
     565              m = prev[n];
     566              prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
     567              /* If n is not on any hash chain, prev[n] is garbage but
     568               * its value will never be used.
     569               */
     570          }
     571          more += WSIZE;
     572      }
     573      /* At this point, more >= 2 */
     574      if (!eofile) {
     575          n = read_buf((char*)window+strstart+lookahead, more);
     576          if (n == 0 || n == (unsigned)EOF) {
     577              eofile = 1;
     578              /* Don't let garbage pollute the dictionary.  */
     579              memzero (window + strstart + lookahead, MIN_MATCH - 1);
     580          } else {
     581              lookahead += n;
     582          }
     583      }
     584  }
     585  
     586  /* With an initial offset of START, advance rsync's rolling checksum
     587     by NUM bytes.  */
     588  static void
     589  rsync_roll (unsigned int start, unsigned int num)
     590  {
     591      unsigned i;
     592  
     593      if (start < RSYNC_WIN) {
     594          /* before window fills. */
     595          for (i = start; i < RSYNC_WIN; i++) {
     596              if (i == start + num)
     597                  return;
     598              rsync_sum += (ulg)window[i];
     599          }
     600          num -= (RSYNC_WIN - start);
     601          start = RSYNC_WIN;
     602      }
     603  
     604      /* buffer after window full */
     605      for (i = start; i < start+num; i++) {
     606          /* New character in */
     607          rsync_sum += (ulg)window[i];
     608          /* Old character out */
     609          rsync_sum -= (ulg)window[i - RSYNC_WIN];
     610          if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
     611              rsync_chunk_end = i;
     612      }
     613  }
     614  
     615  /* ===========================================================================
     616   * Set rsync_chunk_end if window sum matches magic value.
     617   */
     618  #define RSYNC_ROLL(s, n) \
     619     do { if (rsync) rsync_roll((s), (n)); } while(0)
     620  
     621  /* ===========================================================================
     622   * Flush the current block, with given end-of-file flag.
     623   * IN assertion: strstart is set to the end of the current match.
     624   */
     625  #define FLUSH_BLOCK(eof) \
     626     flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \
     627                  (char*)NULL, (long)strstart - block_start, flush-1, (eof))
     628  
     629  /* ===========================================================================
     630   * Processes a new input file and return its compressed length. This
     631   * function does not perform lazy evaluationof matches and inserts
     632   * new strings in the dictionary only for unmatched strings or for short
     633   * matches. It is used only for the fast compression options.
     634   */
     635  static off_t
     636  deflate_fast ()
     637  {
     638      IPos hash_head; /* head of the hash chain */
     639      int flush = 0;  /* set if current block must be flushed, 2=>and padded  */
     640      unsigned match_length = 0;  /* length of best match */
     641  
     642      prev_length = MIN_MATCH-1;
     643      while (lookahead != 0) {
     644          /* Insert the string window[strstart .. strstart+2] in the
     645           * dictionary, and set hash_head to the head of the hash chain:
     646           */
     647          INSERT_STRING(strstart, hash_head);
     648  
     649          /* Find the longest match, discarding those <= prev_length.
     650           * At this point we have always match_length < MIN_MATCH
     651           */
     652          if (hash_head != NIL && strstart - hash_head <= MAX_DIST
     653              && strstart <= window_size - MIN_LOOKAHEAD) {
     654              /* To simplify the code, we prevent matches with the string
     655               * of window index 0 (in particular we have to avoid a match
     656               * of the string with itself at the start of the input file).
     657               */
     658              match_length = longest_match (hash_head);
     659              /* longest_match() sets match_start */
     660              if (match_length > lookahead) match_length = lookahead;
     661          }
     662          if (match_length >= MIN_MATCH) {
     663              check_match(strstart, match_start, match_length);
     664  
     665              flush = ct_tally(strstart-match_start, match_length - MIN_MATCH);
     666  
     667              lookahead -= match_length;
     668  
     669              RSYNC_ROLL(strstart, match_length);
     670              /* Insert new strings in the hash table only if the match length
     671               * is not too large. This saves time but degrades compression.
     672               */
     673              if (match_length <= max_insert_length) {
     674                  match_length--; /* string at strstart already in hash table */
     675                  do {
     676                      strstart++;
     677                      INSERT_STRING(strstart, hash_head);
     678                      /* strstart never exceeds WSIZE-MAX_MATCH, so there are
     679                       * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
     680                       * these bytes are garbage, but it does not matter since
     681                       * the next lookahead bytes will be emitted as literals.
     682                       */
     683                  } while (--match_length != 0);
     684                  strstart++;
     685              } else {
     686                  strstart += match_length;
     687                  match_length = 0;
     688                  ins_h = window[strstart];
     689                  UPDATE_HASH(ins_h, window[strstart+1]);
     690  #if MIN_MATCH != 3
     691                  Call UPDATE_HASH() MIN_MATCH-3 more times
     692  #endif
     693              }
     694          } else {
     695              /* No match, output a literal byte */
     696              Tracevv((stderr,"%c",window[strstart]));
     697              flush = ct_tally (0, window[strstart]);
     698              RSYNC_ROLL(strstart, 1);
     699              lookahead--;
     700              strstart++;
     701          }
     702          if (rsync && strstart > rsync_chunk_end) {
     703              rsync_chunk_end = 0xFFFFFFFFUL;
     704              flush = 2;
     705          }
     706          if (flush) FLUSH_BLOCK(0), block_start = strstart;
     707  
     708          /* Make sure that we always have enough lookahead, except
     709           * at the end of the input file. We need MAX_MATCH bytes
     710           * for the next match, plus MIN_MATCH bytes to insert the
     711           * string following the next match.
     712           */
     713          while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
     714  
     715      }
     716      return FLUSH_BLOCK(1); /* eof */
     717  }
     718  
     719  /* ===========================================================================
     720   * Same as above, but achieves better compression. We use a lazy
     721   * evaluation for matches: a match is finally adopted only if there is
     722   * no better match at the next window position.
     723   */
     724  off_t
     725  deflate (int pack_level)
     726  {
     727      IPos hash_head;          /* head of hash chain */
     728      IPos prev_match;         /* previous match */
     729      int flush = 0;           /* set if current block must be flushed */
     730      int match_available = 0; /* set if previous match exists */
     731      register unsigned match_length = MIN_MATCH-1; /* length of best match */
     732  
     733      lm_init (pack_level);
     734      if (pack_level <= 3)
     735        return deflate_fast();
     736  
     737      /* Process the input block. */
     738      while (lookahead != 0) {
     739          /* Insert the string window[strstart .. strstart+2] in the
     740           * dictionary, and set hash_head to the head of the hash chain:
     741           */
     742          INSERT_STRING(strstart, hash_head);
     743  
     744          /* Find the longest match, discarding those <= prev_length.
     745           */
     746          prev_length = match_length, prev_match = match_start;
     747          match_length = MIN_MATCH-1;
     748  
     749          if (hash_head != NIL && prev_length < max_lazy_match &&
     750              strstart - hash_head <= MAX_DIST &&
     751              strstart <= window_size - MIN_LOOKAHEAD) {
     752              /* To simplify the code, we prevent matches with the string
     753               * of window index 0 (in particular we have to avoid a match
     754               * of the string with itself at the start of the input file).
     755               */
     756              match_length = longest_match (hash_head);
     757              /* longest_match() sets match_start */
     758              if (match_length > lookahead) match_length = lookahead;
     759  
     760              /* Ignore a length 3 match if it is too distant: */
     761              if (match_length == MIN_MATCH && strstart-match_start > TOO_FAR){
     762                  /* If prev_match is also MIN_MATCH, match_start is garbage
     763                   * but we will ignore the current match anyway.
     764                   */
     765                  match_length--;
     766              }
     767          }
     768          /* If there was a match at the previous step and the current
     769           * match is not better, output the previous match:
     770           */
     771          if (prev_length >= MIN_MATCH && match_length <= prev_length) {
     772  
     773              check_match(strstart-1, prev_match, prev_length);
     774  
     775              flush = ct_tally(strstart-1-prev_match, prev_length - MIN_MATCH);
     776  
     777              /* Insert in hash table all strings up to the end of the match.
     778               * strstart-1 and strstart are already inserted.
     779               */
     780              lookahead -= prev_length-1;
     781              prev_length -= 2;
     782              RSYNC_ROLL(strstart, prev_length+1);
     783              do {
     784                  strstart++;
     785                  INSERT_STRING(strstart, hash_head);
     786                  /* strstart never exceeds WSIZE-MAX_MATCH, so there are
     787                   * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
     788                   * these bytes are garbage, but it does not matter since the
     789                   * next lookahead bytes will always be emitted as literals.
     790                   */
     791              } while (--prev_length != 0);
     792              match_available = 0;
     793              match_length = MIN_MATCH-1;
     794              strstart++;
     795  
     796              if (rsync && strstart > rsync_chunk_end) {
     797                  rsync_chunk_end = 0xFFFFFFFFUL;
     798                  flush = 2;
     799              }
     800              if (flush) FLUSH_BLOCK(0), block_start = strstart;
     801          } else if (match_available) {
     802              /* If there was no match at the previous position, output a
     803               * single literal. If there was a match but the current match
     804               * is longer, truncate the previous match to a single literal.
     805               */
     806              Tracevv((stderr,"%c",window[strstart-1]));
     807              flush = ct_tally (0, window[strstart-1]);
     808              if (rsync && strstart > rsync_chunk_end) {
     809                  rsync_chunk_end = 0xFFFFFFFFUL;
     810                  flush = 2;
     811              }
     812              if (flush) FLUSH_BLOCK(0), block_start = strstart;
     813              RSYNC_ROLL(strstart, 1);
     814              strstart++;
     815              lookahead--;
     816          } else {
     817              /* There is no previous match to compare with, wait for
     818               * the next step to decide.
     819               */
     820              if (rsync && strstart > rsync_chunk_end) {
     821                  /* Reset huffman tree */
     822                  rsync_chunk_end = 0xFFFFFFFFUL;
     823                  flush = 2;
     824                  FLUSH_BLOCK(0), block_start = strstart;
     825              }
     826  
     827              match_available = 1;
     828              RSYNC_ROLL(strstart, 1);
     829              strstart++;
     830              lookahead--;
     831          }
     832          Assert (strstart <= bytes_in && lookahead <= bytes_in, "a bit too far");
     833  
     834          /* Make sure that we always have enough lookahead, except
     835           * at the end of the input file. We need MAX_MATCH bytes
     836           * for the next match, plus MIN_MATCH bytes to insert the
     837           * string following the next match.
     838           */
     839          while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window();
     840      }
     841      if (match_available) ct_tally (0, window[strstart-1]);
     842  
     843      return FLUSH_BLOCK(1); /* eof */
     844  }