(root)/
xz-5.4.5/
src/
liblzma/
lzma/
lzma_common.h
       1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       lzma_common.h
       4  /// \brief      Private definitions common to LZMA encoder and decoder
       5  ///
       6  //  Authors:    Igor Pavlov
       7  //              Lasse Collin
       8  //
       9  //  This file has been put into the public domain.
      10  //  You can do whatever you want with this file.
      11  //
      12  ///////////////////////////////////////////////////////////////////////////////
      13  
      14  #ifndef LZMA_LZMA_COMMON_H
      15  #define LZMA_LZMA_COMMON_H
      16  
      17  #include "common.h"
      18  #include "range_common.h"
      19  
      20  
      21  ///////////////////
      22  // Miscellaneous //
      23  ///////////////////
      24  
      25  /// Maximum number of position states. A position state is the lowest pos bits
      26  /// number of bits of the current uncompressed offset. In some places there
      27  /// are different sets of probabilities for different pos states.
      28  #define POS_STATES_MAX (1 << LZMA_PB_MAX)
      29  
      30  
      31  /// Validates lc, lp, and pb.
      32  static inline bool
      33  is_lclppb_valid(const lzma_options_lzma *options)
      34  {
      35  	return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX
      36  			&& options->lc + options->lp <= LZMA_LCLP_MAX
      37  			&& options->pb <= LZMA_PB_MAX;
      38  }
      39  
      40  
      41  ///////////
      42  // State //
      43  ///////////
      44  
      45  /// This enum is used to track which events have occurred most recently and
      46  /// in which order. This information is used to predict the next event.
      47  ///
      48  /// Events:
      49  ///  - Literal: One 8-bit byte
      50  ///  - Match: Repeat a chunk of data at some distance
      51  ///  - Long repeat: Multi-byte match at a recently seen distance
      52  ///  - Short repeat: One-byte repeat at a recently seen distance
      53  ///
      54  /// The event names are in from STATE_oldest_older_previous. REP means
      55  /// either short or long repeated match, and NONLIT means any non-literal.
      56  typedef enum {
      57  	STATE_LIT_LIT,
      58  	STATE_MATCH_LIT_LIT,
      59  	STATE_REP_LIT_LIT,
      60  	STATE_SHORTREP_LIT_LIT,
      61  	STATE_MATCH_LIT,
      62  	STATE_REP_LIT,
      63  	STATE_SHORTREP_LIT,
      64  	STATE_LIT_MATCH,
      65  	STATE_LIT_LONGREP,
      66  	STATE_LIT_SHORTREP,
      67  	STATE_NONLIT_MATCH,
      68  	STATE_NONLIT_REP,
      69  } lzma_lzma_state;
      70  
      71  
      72  /// Total number of states
      73  #define STATES 12
      74  
      75  /// The lowest 7 states indicate that the previous state was a literal.
      76  #define LIT_STATES 7
      77  
      78  
      79  /// Indicate that the latest state was a literal.
      80  #define update_literal(state) \
      81  	state = ((state) <= STATE_SHORTREP_LIT_LIT \
      82  			? STATE_LIT_LIT \
      83  			: ((state) <= STATE_LIT_SHORTREP \
      84  				? (state) - 3 \
      85  				: (state) - 6))
      86  
      87  /// Indicate that the latest state was a match.
      88  #define update_match(state) \
      89  	state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH)
      90  
      91  /// Indicate that the latest state was a long repeated match.
      92  #define update_long_rep(state) \
      93  	state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP)
      94  
      95  /// Indicate that the latest state was a short match.
      96  #define update_short_rep(state) \
      97  	state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP)
      98  
      99  /// Test if the previous state was a literal.
     100  #define is_literal_state(state) \
     101  	((state) < LIT_STATES)
     102  
     103  
     104  /////////////
     105  // Literal //
     106  /////////////
     107  
     108  /// Each literal coder is divided in three sections:
     109  ///   - 0x001-0x0FF: Without match byte
     110  ///   - 0x101-0x1FF: With match byte; match bit is 0
     111  ///   - 0x201-0x2FF: With match byte; match bit is 1
     112  ///
     113  /// Match byte is used when the previous LZMA symbol was something else than
     114  /// a literal (that is, it was some kind of match).
     115  #define LITERAL_CODER_SIZE 0x300
     116  
     117  /// Maximum number of literal coders
     118  #define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX)
     119  
     120  /// Locate the literal coder for the next literal byte. The choice depends on
     121  ///   - the lowest literal_pos_bits bits of the position of the current
     122  ///     byte; and
     123  ///   - the highest literal_context_bits bits of the previous byte.
     124  #define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \
     125  	((probs)[(((pos) & (lp_mask)) << (lc)) \
     126  			+ ((uint32_t)(prev_byte) >> (8U - (lc)))])
     127  
     128  
     129  static inline void
     130  literal_init(probability (*probs)[LITERAL_CODER_SIZE],
     131  		uint32_t lc, uint32_t lp)
     132  {
     133  	assert(lc + lp <= LZMA_LCLP_MAX);
     134  
     135  	const uint32_t coders = 1U << (lc + lp);
     136  
     137  	for (uint32_t i = 0; i < coders; ++i)
     138  		for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j)
     139  			bit_reset(probs[i][j]);
     140  
     141  	return;
     142  }
     143  
     144  
     145  //////////////////
     146  // Match length //
     147  //////////////////
     148  
     149  // Minimum length of a match is two bytes.
     150  #define MATCH_LEN_MIN 2
     151  
     152  // Match length is encoded with 4, 5, or 10 bits.
     153  //
     154  // Length   Bits
     155  //  2-9      4 = Choice=0 + 3 bits
     156  // 10-17     5 = Choice=1 + Choice2=0 + 3 bits
     157  // 18-273   10 = Choice=1 + Choice2=1 + 8 bits
     158  #define LEN_LOW_BITS 3
     159  #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
     160  #define LEN_MID_BITS 3
     161  #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
     162  #define LEN_HIGH_BITS 8
     163  #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
     164  #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
     165  
     166  // Maximum length of a match is 273 which is a result of the encoding
     167  // described above.
     168  #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
     169  
     170  
     171  ////////////////////
     172  // Match distance //
     173  ////////////////////
     174  
     175  // Different sets of probabilities are used for match distances that have very
     176  // short match length: Lengths of 2, 3, and 4 bytes have a separate set of
     177  // probabilities for each length. The matches with longer length use a shared
     178  // set of probabilities.
     179  #define DIST_STATES 4
     180  
     181  // Macro to get the index of the appropriate probability array.
     182  #define get_dist_state(len) \
     183  	((len) < DIST_STATES + MATCH_LEN_MIN \
     184  		? (len) - MATCH_LEN_MIN \
     185  		: DIST_STATES - 1)
     186  
     187  // The highest two bits of a match distance (distance slot) are encoded
     188  // using six bits. See fastpos.h for more explanation.
     189  #define DIST_SLOT_BITS 6
     190  #define DIST_SLOTS (1 << DIST_SLOT_BITS)
     191  
     192  // Match distances up to 127 are fully encoded using probabilities. Since
     193  // the highest two bits (distance slot) are always encoded using six bits,
     194  // the distances 0-3 don't need any additional bits to encode, since the
     195  // distance slot itself is the same as the actual distance. DIST_MODEL_START
     196  // indicates the first distance slot where at least one additional bit is
     197  // needed.
     198  #define DIST_MODEL_START 4
     199  
     200  // Match distances greater than 127 are encoded in three pieces:
     201  //   - distance slot: the highest two bits
     202  //   - direct bits: 2-26 bits below the highest two bits
     203  //   - alignment bits: four lowest bits
     204  //
     205  // Direct bits don't use any probabilities.
     206  //
     207  // The distance slot value of 14 is for distances 128-191 (see the table in
     208  // fastpos.h to understand why).
     209  #define DIST_MODEL_END 14
     210  
     211  // Distance slots that indicate a distance <= 127.
     212  #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
     213  #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
     214  
     215  // For match distances greater than 127, only the highest two bits and the
     216  // lowest four bits (alignment) is encoded using probabilities.
     217  #define ALIGN_BITS 4
     218  #define ALIGN_SIZE (1 << ALIGN_BITS)
     219  #define ALIGN_MASK (ALIGN_SIZE - 1)
     220  
     221  // LZMA remembers the four most recent match distances. Reusing these distances
     222  // tends to take less space than re-encoding the actual distance value.
     223  #define REPS 4
     224  
     225  #endif