1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       lzma_decoder.c
       4  /// \brief      LZMA decoder
       5  ///
       6  //  Authors:    Igor Pavlov
       7  //              Lasse Collin
       8  //
       9  //  This file has been put into the public domain.
      10  //  You can do whatever you want with this file.
      11  //
      12  ///////////////////////////////////////////////////////////////////////////////
      13  
      14  #include "lz_decoder.h"
      15  #include "lzma_common.h"
      16  #include "lzma_decoder.h"
      17  #include "range_decoder.h"
      18  
      19  // The macros unroll loops with switch statements.
      20  // Silence warnings about missing fall-through comments.
      21  #if TUKLIB_GNUC_REQ(7, 0)
      22  #	pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
      23  #endif
      24  
      25  
      26  #ifdef HAVE_SMALL
      27  
      28  // Macros for (somewhat) size-optimized code.
      29  #define seq_4(seq) seq
      30  
      31  #define seq_6(seq) seq
      32  
      33  #define seq_8(seq) seq
      34  
      35  #define seq_len(seq) \
      36  	seq ## _CHOICE, \
      37  	seq ## _CHOICE2, \
      38  	seq ## _BITTREE
      39  
      40  #define len_decode(target, ld, pos_state, seq) \
      41  do { \
      42  case seq ## _CHOICE: \
      43  	rc_if_0(ld.choice, seq ## _CHOICE) { \
      44  		rc_update_0(ld.choice); \
      45  		probs = ld.low[pos_state];\
      46  		limit = LEN_LOW_SYMBOLS; \
      47  		target = MATCH_LEN_MIN; \
      48  	} else { \
      49  		rc_update_1(ld.choice); \
      50  case seq ## _CHOICE2: \
      51  		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
      52  			rc_update_0(ld.choice2); \
      53  			probs = ld.mid[pos_state]; \
      54  			limit = LEN_MID_SYMBOLS; \
      55  			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
      56  		} else { \
      57  			rc_update_1(ld.choice2); \
      58  			probs = ld.high; \
      59  			limit = LEN_HIGH_SYMBOLS; \
      60  			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
      61  					+ LEN_MID_SYMBOLS; \
      62  		} \
      63  	} \
      64  	symbol = 1; \
      65  case seq ## _BITTREE: \
      66  	do { \
      67  		rc_bit(probs[symbol], , , seq ## _BITTREE); \
      68  	} while (symbol < limit); \
      69  	target += symbol - limit; \
      70  } while (0)
      71  
      72  #else // HAVE_SMALL
      73  
      74  // Unrolled versions
      75  #define seq_4(seq) \
      76  	seq ## 0, \
      77  	seq ## 1, \
      78  	seq ## 2, \
      79  	seq ## 3
      80  
      81  #define seq_6(seq) \
      82  	seq ## 0, \
      83  	seq ## 1, \
      84  	seq ## 2, \
      85  	seq ## 3, \
      86  	seq ## 4, \
      87  	seq ## 5
      88  
      89  #define seq_8(seq) \
      90  	seq ## 0, \
      91  	seq ## 1, \
      92  	seq ## 2, \
      93  	seq ## 3, \
      94  	seq ## 4, \
      95  	seq ## 5, \
      96  	seq ## 6, \
      97  	seq ## 7
      98  
      99  #define seq_len(seq) \
     100  	seq ## _CHOICE, \
     101  	seq ## _LOW0, \
     102  	seq ## _LOW1, \
     103  	seq ## _LOW2, \
     104  	seq ## _CHOICE2, \
     105  	seq ## _MID0, \
     106  	seq ## _MID1, \
     107  	seq ## _MID2, \
     108  	seq ## _HIGH0, \
     109  	seq ## _HIGH1, \
     110  	seq ## _HIGH2, \
     111  	seq ## _HIGH3, \
     112  	seq ## _HIGH4, \
     113  	seq ## _HIGH5, \
     114  	seq ## _HIGH6, \
     115  	seq ## _HIGH7
     116  
     117  #define len_decode(target, ld, pos_state, seq) \
     118  do { \
     119  	symbol = 1; \
     120  case seq ## _CHOICE: \
     121  	rc_if_0(ld.choice, seq ## _CHOICE) { \
     122  		rc_update_0(ld.choice); \
     123  		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
     124  		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
     125  		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
     126  		target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
     127  	} else { \
     128  		rc_update_1(ld.choice); \
     129  case seq ## _CHOICE2: \
     130  		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
     131  			rc_update_0(ld.choice2); \
     132  			rc_bit_case(ld.mid[pos_state][symbol], , , \
     133  					seq ## _MID0); \
     134  			rc_bit_case(ld.mid[pos_state][symbol], , , \
     135  					seq ## _MID1); \
     136  			rc_bit_case(ld.mid[pos_state][symbol], , , \
     137  					seq ## _MID2); \
     138  			target = symbol - LEN_MID_SYMBOLS \
     139  					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
     140  		} else { \
     141  			rc_update_1(ld.choice2); \
     142  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
     143  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
     144  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
     145  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
     146  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
     147  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
     148  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
     149  			rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
     150  			target = symbol - LEN_HIGH_SYMBOLS \
     151  					+ MATCH_LEN_MIN \
     152  					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
     153  		} \
     154  	} \
     155  } while (0)
     156  
     157  #endif // HAVE_SMALL
     158  
     159  
     160  /// Length decoder probabilities; see comments in lzma_common.h.
     161  typedef struct {
     162  	probability choice;
     163  	probability choice2;
     164  	probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
     165  	probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
     166  	probability high[LEN_HIGH_SYMBOLS];
     167  } lzma_length_decoder;
     168  
     169  
     170  typedef struct {
     171  	///////////////////
     172  	// Probabilities //
     173  	///////////////////
     174  
     175  	/// Literals; see comments in lzma_common.h.
     176  	probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
     177  
     178  	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
     179  	probability is_match[STATES][POS_STATES_MAX];
     180  
     181  	/// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
     182  	probability is_rep[STATES];
     183  
     184  	/// If 0, distance of a repeated match is rep0.
     185  	/// Otherwise check is_rep1.
     186  	probability is_rep0[STATES];
     187  
     188  	/// If 0, distance of a repeated match is rep1.
     189  	/// Otherwise check is_rep2.
     190  	probability is_rep1[STATES];
     191  
     192  	/// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
     193  	probability is_rep2[STATES];
     194  
     195  	/// If 1, the repeated match has length of one byte. Otherwise
     196  	/// the length is decoded from rep_len_decoder.
     197  	probability is_rep0_long[STATES][POS_STATES_MAX];
     198  
     199  	/// Probability tree for the highest two bits of the match distance.
     200  	/// There is a separate probability tree for match lengths of
     201  	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
     202  	probability dist_slot[DIST_STATES][DIST_SLOTS];
     203  
     204  	/// Probability trees for additional bits for match distance when the
     205  	/// distance is in the range [4, 127].
     206  	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
     207  
     208  	/// Probability tree for the lowest four bits of a match distance
     209  	/// that is equal to or greater than 128.
     210  	probability pos_align[ALIGN_SIZE];
     211  
     212  	/// Length of a normal match
     213  	lzma_length_decoder match_len_decoder;
     214  
     215  	/// Length of a repeated match
     216  	lzma_length_decoder rep_len_decoder;
     217  
     218  	///////////////////
     219  	// Decoder state //
     220  	///////////////////
     221  
     222  	// Range coder
     223  	lzma_range_decoder rc;
     224  
     225  	// Types of the most recently seen LZMA symbols
     226  	lzma_lzma_state state;
     227  
     228  	uint32_t rep0;      ///< Distance of the latest match
     229  	uint32_t rep1;      ///< Distance of second latest match
     230  	uint32_t rep2;      ///< Distance of third latest match
     231  	uint32_t rep3;      ///< Distance of fourth latest match
     232  
     233  	uint32_t pos_mask; // (1U << pb) - 1
     234  	uint32_t literal_context_bits;
     235  	uint32_t literal_pos_mask;
     236  
     237  	/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
     238  	/// payload marker is expected.
     239  	lzma_vli uncompressed_size;
     240  
     241  	/// True if end of payload marker (EOPM) is allowed even when
     242  	/// uncompressed_size is known; false if EOPM must not be present.
     243  	/// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
     244  	bool allow_eopm;
     245  
     246  	////////////////////////////////
     247  	// State of incomplete symbol //
     248  	////////////////////////////////
     249  
     250  	/// Position where to continue the decoder loop
     251  	enum {
     252  		SEQ_NORMALIZE,
     253  		SEQ_IS_MATCH,
     254  		seq_8(SEQ_LITERAL),
     255  		seq_8(SEQ_LITERAL_MATCHED),
     256  		SEQ_LITERAL_WRITE,
     257  		SEQ_IS_REP,
     258  		seq_len(SEQ_MATCH_LEN),
     259  		seq_6(SEQ_DIST_SLOT),
     260  		SEQ_DIST_MODEL,
     261  		SEQ_DIRECT,
     262  		seq_4(SEQ_ALIGN),
     263  		SEQ_EOPM,
     264  		SEQ_IS_REP0,
     265  		SEQ_SHORTREP,
     266  		SEQ_IS_REP0_LONG,
     267  		SEQ_IS_REP1,
     268  		SEQ_IS_REP2,
     269  		seq_len(SEQ_REP_LEN),
     270  		SEQ_COPY,
     271  	} sequence;
     272  
     273  	/// Base of the current probability tree
     274  	probability *probs;
     275  
     276  	/// Symbol being decoded. This is also used as an index variable in
     277  	/// bittree decoders: probs[symbol]
     278  	uint32_t symbol;
     279  
     280  	/// Used as a loop termination condition on bittree decoders and
     281  	/// direct bits decoder.
     282  	uint32_t limit;
     283  
     284  	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
     285  	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
     286  	uint32_t offset;
     287  
     288  	/// If decoding a literal: match byte.
     289  	/// If decoding a match: length of the match.
     290  	uint32_t len;
     291  } lzma_lzma1_decoder;
     292  
     293  
     294  static lzma_ret
     295  lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
     296  		const uint8_t *restrict in,
     297  		size_t *restrict in_pos, size_t in_size)
     298  {
     299  	lzma_lzma1_decoder *restrict coder = coder_ptr;
     300  
     301  	////////////////////
     302  	// Initialization //
     303  	////////////////////
     304  
     305  	{
     306  		const lzma_ret ret = rc_read_init(
     307  				&coder->rc, in, in_pos, in_size);
     308  		if (ret != LZMA_STREAM_END)
     309  			return ret;
     310  	}
     311  
     312  	///////////////
     313  	// Variables //
     314  	///////////////
     315  
     316  	// Making local copies of often-used variables improves both
     317  	// speed and readability.
     318  
     319  	lzma_dict dict = *dictptr;
     320  
     321  	const size_t dict_start = dict.pos;
     322  
     323  	// Range decoder
     324  	rc_to_local(coder->rc, *in_pos);
     325  
     326  	// State
     327  	uint32_t state = coder->state;
     328  	uint32_t rep0 = coder->rep0;
     329  	uint32_t rep1 = coder->rep1;
     330  	uint32_t rep2 = coder->rep2;
     331  	uint32_t rep3 = coder->rep3;
     332  
     333  	const uint32_t pos_mask = coder->pos_mask;
     334  
     335  	// These variables are actually needed only if we last time ran
     336  	// out of input in the middle of the decoder loop.
     337  	probability *probs = coder->probs;
     338  	uint32_t symbol = coder->symbol;
     339  	uint32_t limit = coder->limit;
     340  	uint32_t offset = coder->offset;
     341  	uint32_t len = coder->len;
     342  
     343  	const uint32_t literal_pos_mask = coder->literal_pos_mask;
     344  	const uint32_t literal_context_bits = coder->literal_context_bits;
     345  
     346  	// Temporary variables
     347  	uint32_t pos_state = dict.pos & pos_mask;
     348  
     349  	lzma_ret ret = LZMA_OK;
     350  
     351  	// This is true when the next LZMA symbol is allowed to be EOPM.
     352  	// That is, if this is false, then EOPM is considered
     353  	// an invalid symbol and we will return LZMA_DATA_ERROR.
     354  	//
     355  	// EOPM is always required (not just allowed) when
     356  	// the uncompressed size isn't known. When uncompressed size
     357  	// is known, eopm_is_valid may be set to true later.
     358  	bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
     359  
     360  	// If uncompressed size is known and there is enough output space
     361  	// to decode all the data, limit the available buffer space so that
     362  	// the main loop won't try to decode past the end of the stream.
     363  	bool might_finish_without_eopm = false;
     364  	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
     365  			&& coder->uncompressed_size <= dict.limit - dict.pos) {
     366  		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
     367  		might_finish_without_eopm = true;
     368  	}
     369  
     370  	// The main decoder loop. The "switch" is used to restart the decoder at
     371  	// correct location. Once restarted, the "switch" is no longer used.
     372  	switch (coder->sequence)
     373  	while (true) {
     374  		// Calculate new pos_state. This is skipped on the first loop
     375  		// since we already calculated it when setting up the local
     376  		// variables.
     377  		pos_state = dict.pos & pos_mask;
     378  
     379  	case SEQ_NORMALIZE:
     380  	case SEQ_IS_MATCH:
     381  		if (unlikely(might_finish_without_eopm
     382  				&& dict.pos == dict.limit)) {
     383  			// In rare cases there is a useless byte that needs
     384  			// to be read anyway.
     385  			rc_normalize(SEQ_NORMALIZE);
     386  
     387  			// If the range decoder state is such that we can
     388  			// be at the end of the LZMA stream, then the
     389  			// decoding is finished.
     390  			if (rc_is_finished(rc)) {
     391  				ret = LZMA_STREAM_END;
     392  				goto out;
     393  			}
     394  
     395  			// If the caller hasn't allowed EOPM to be present
     396  			// together with known uncompressed size, then the
     397  			// LZMA stream is corrupt.
     398  			if (!coder->allow_eopm) {
     399  				ret = LZMA_DATA_ERROR;
     400  				goto out;
     401  			}
     402  
     403  			// Otherwise continue decoding with the expectation
     404  			// that the next LZMA symbol is EOPM.
     405  			eopm_is_valid = true;
     406  		}
     407  
     408  		rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
     409  			rc_update_0(coder->is_match[state][pos_state]);
     410  
     411  			// It's a literal i.e. a single 8-bit byte.
     412  
     413  			probs = literal_subcoder(coder->literal,
     414  					literal_context_bits, literal_pos_mask,
     415  					dict.pos, dict_get(&dict, 0));
     416  			symbol = 1;
     417  
     418  			if (is_literal_state(state)) {
     419  				// Decode literal without match byte.
     420  #ifdef HAVE_SMALL
     421  	case SEQ_LITERAL:
     422  				do {
     423  					rc_bit(probs[symbol], , , SEQ_LITERAL);
     424  				} while (symbol < (1 << 8));
     425  #else
     426  				rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
     427  				rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
     428  				rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
     429  				rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
     430  				rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
     431  				rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
     432  				rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
     433  				rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
     434  #endif
     435  			} else {
     436  				// Decode literal with match byte.
     437  				//
     438  				// We store the byte we compare against
     439  				// ("match byte") to "len" to minimize the
     440  				// number of variables we need to store
     441  				// between decoder calls.
     442  				len = (uint32_t)(dict_get(&dict, rep0)) << 1;
     443  
     444  				// The usage of "offset" allows omitting some
     445  				// branches, which should give tiny speed
     446  				// improvement on some CPUs. "offset" gets
     447  				// set to zero if match_bit didn't match.
     448  				offset = 0x100;
     449  
     450  #ifdef HAVE_SMALL
     451  	case SEQ_LITERAL_MATCHED:
     452  				do {
     453  					const uint32_t match_bit
     454  							= len & offset;
     455  					const uint32_t subcoder_index
     456  							= offset + match_bit
     457  							+ symbol;
     458  
     459  					rc_bit(probs[subcoder_index],
     460  							offset &= ~match_bit,
     461  							offset &= match_bit,
     462  							SEQ_LITERAL_MATCHED);
     463  
     464  					// It seems to be faster to do this
     465  					// here instead of putting it to the
     466  					// beginning of the loop and then
     467  					// putting the "case" in the middle
     468  					// of the loop.
     469  					len <<= 1;
     470  
     471  				} while (symbol < (1 << 8));
     472  #else
     473  				// Unroll the loop.
     474  				uint32_t match_bit;
     475  				uint32_t subcoder_index;
     476  
     477  #	define d(seq) \
     478  		case seq: \
     479  			match_bit = len & offset; \
     480  			subcoder_index = offset + match_bit + symbol; \
     481  			rc_bit(probs[subcoder_index], \
     482  					offset &= ~match_bit, \
     483  					offset &= match_bit, \
     484  					seq)
     485  
     486  				d(SEQ_LITERAL_MATCHED0);
     487  				len <<= 1;
     488  				d(SEQ_LITERAL_MATCHED1);
     489  				len <<= 1;
     490  				d(SEQ_LITERAL_MATCHED2);
     491  				len <<= 1;
     492  				d(SEQ_LITERAL_MATCHED3);
     493  				len <<= 1;
     494  				d(SEQ_LITERAL_MATCHED4);
     495  				len <<= 1;
     496  				d(SEQ_LITERAL_MATCHED5);
     497  				len <<= 1;
     498  				d(SEQ_LITERAL_MATCHED6);
     499  				len <<= 1;
     500  				d(SEQ_LITERAL_MATCHED7);
     501  #	undef d
     502  #endif
     503  			}
     504  
     505  			//update_literal(state);
     506  			// Use a lookup table to update to literal state,
     507  			// since compared to other state updates, this would
     508  			// need two branches.
     509  			static const lzma_lzma_state next_state[] = {
     510  				STATE_LIT_LIT,
     511  				STATE_LIT_LIT,
     512  				STATE_LIT_LIT,
     513  				STATE_LIT_LIT,
     514  				STATE_MATCH_LIT_LIT,
     515  				STATE_REP_LIT_LIT,
     516  				STATE_SHORTREP_LIT_LIT,
     517  				STATE_MATCH_LIT,
     518  				STATE_REP_LIT,
     519  				STATE_SHORTREP_LIT,
     520  				STATE_MATCH_LIT,
     521  				STATE_REP_LIT
     522  			};
     523  			state = next_state[state];
     524  
     525  	case SEQ_LITERAL_WRITE:
     526  			if (unlikely(dict_put(&dict, symbol))) {
     527  				coder->sequence = SEQ_LITERAL_WRITE;
     528  				goto out;
     529  			}
     530  
     531  			continue;
     532  		}
     533  
     534  		// Instead of a new byte we are going to get a byte range
     535  		// (distance and length) which will be repeated from our
     536  		// output history.
     537  
     538  		rc_update_1(coder->is_match[state][pos_state]);
     539  
     540  	case SEQ_IS_REP:
     541  		rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
     542  			// Not a repeated match
     543  			rc_update_0(coder->is_rep[state]);
     544  			update_match(state);
     545  
     546  			// The latest three match distances are kept in
     547  			// memory in case there are repeated matches.
     548  			rep3 = rep2;
     549  			rep2 = rep1;
     550  			rep1 = rep0;
     551  
     552  			// Decode the length of the match.
     553  			len_decode(len, coder->match_len_decoder,
     554  					pos_state, SEQ_MATCH_LEN);
     555  
     556  			// Prepare to decode the highest two bits of the
     557  			// match distance.
     558  			probs = coder->dist_slot[get_dist_state(len)];
     559  			symbol = 1;
     560  
     561  #ifdef HAVE_SMALL
     562  	case SEQ_DIST_SLOT:
     563  			do {
     564  				rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
     565  			} while (symbol < DIST_SLOTS);
     566  #else
     567  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
     568  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
     569  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
     570  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
     571  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
     572  			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
     573  #endif
     574  			// Get rid of the highest bit that was needed for
     575  			// indexing of the probability array.
     576  			symbol -= DIST_SLOTS;
     577  			assert(symbol <= 63);
     578  
     579  			if (symbol < DIST_MODEL_START) {
     580  				// Match distances [0, 3] have only two bits.
     581  				rep0 = symbol;
     582  			} else {
     583  				// Decode the lowest [1, 29] bits of
     584  				// the match distance.
     585  				limit = (symbol >> 1) - 1;
     586  				assert(limit >= 1 && limit <= 30);
     587  				rep0 = 2 + (symbol & 1);
     588  
     589  				if (symbol < DIST_MODEL_END) {
     590  					// Prepare to decode the low bits for
     591  					// a distance of [4, 127].
     592  					assert(limit <= 5);
     593  					rep0 <<= limit;
     594  					assert(rep0 <= 96);
     595  					// -1 is fine, because we start
     596  					// decoding at probs[1], not probs[0].
     597  					// NOTE: This violates the C standard,
     598  					// since we are doing pointer
     599  					// arithmetic past the beginning of
     600  					// the array.
     601  					assert((int32_t)(rep0 - symbol - 1)
     602  							>= -1);
     603  					assert((int32_t)(rep0 - symbol - 1)
     604  							<= 82);
     605  					probs = coder->pos_special + rep0
     606  							- symbol - 1;
     607  					symbol = 1;
     608  					offset = 0;
     609  	case SEQ_DIST_MODEL:
     610  #ifdef HAVE_SMALL
     611  					do {
     612  						rc_bit(probs[symbol], ,
     613  							rep0 += 1U << offset,
     614  							SEQ_DIST_MODEL);
     615  					} while (++offset < limit);
     616  #else
     617  					switch (limit) {
     618  					case 5:
     619  						assert(offset == 0);
     620  						rc_bit(probs[symbol], ,
     621  							rep0 += 1U,
     622  							SEQ_DIST_MODEL);
     623  						++offset;
     624  						--limit;
     625  					case 4:
     626  						rc_bit(probs[symbol], ,
     627  							rep0 += 1U << offset,
     628  							SEQ_DIST_MODEL);
     629  						++offset;
     630  						--limit;
     631  					case 3:
     632  						rc_bit(probs[symbol], ,
     633  							rep0 += 1U << offset,
     634  							SEQ_DIST_MODEL);
     635  						++offset;
     636  						--limit;
     637  					case 2:
     638  						rc_bit(probs[symbol], ,
     639  							rep0 += 1U << offset,
     640  							SEQ_DIST_MODEL);
     641  						++offset;
     642  						--limit;
     643  					case 1:
     644  						// We need "symbol" only for
     645  						// indexing the probability
     646  						// array, thus we can use
     647  						// rc_bit_last() here to omit
     648  						// the unneeded updating of
     649  						// "symbol".
     650  						rc_bit_last(probs[symbol], ,
     651  							rep0 += 1U << offset,
     652  							SEQ_DIST_MODEL);
     653  					}
     654  #endif
     655  				} else {
     656  					// The distance is >= 128. Decode the
     657  					// lower bits without probabilities
     658  					// except the lowest four bits.
     659  					assert(symbol >= 14);
     660  					assert(limit >= 6);
     661  					limit -= ALIGN_BITS;
     662  					assert(limit >= 2);
     663  	case SEQ_DIRECT:
     664  					// Not worth manual unrolling
     665  					do {
     666  						rc_direct(rep0, SEQ_DIRECT);
     667  					} while (--limit > 0);
     668  
     669  					// Decode the lowest four bits using
     670  					// probabilities.
     671  					rep0 <<= ALIGN_BITS;
     672  					symbol = 1;
     673  #ifdef HAVE_SMALL
     674  					offset = 0;
     675  	case SEQ_ALIGN:
     676  					do {
     677  						rc_bit(coder->pos_align[
     678  								symbol], ,
     679  							rep0 += 1U << offset,
     680  							SEQ_ALIGN);
     681  					} while (++offset < ALIGN_BITS);
     682  #else
     683  	case SEQ_ALIGN0:
     684  					rc_bit(coder->pos_align[symbol], ,
     685  							rep0 += 1, SEQ_ALIGN0);
     686  	case SEQ_ALIGN1:
     687  					rc_bit(coder->pos_align[symbol], ,
     688  							rep0 += 2, SEQ_ALIGN1);
     689  	case SEQ_ALIGN2:
     690  					rc_bit(coder->pos_align[symbol], ,
     691  							rep0 += 4, SEQ_ALIGN2);
     692  	case SEQ_ALIGN3:
     693  					// Like in SEQ_DIST_MODEL, we don't
     694  					// need "symbol" for anything else
     695  					// than indexing the probability array.
     696  					rc_bit_last(coder->pos_align[symbol], ,
     697  							rep0 += 8, SEQ_ALIGN3);
     698  #endif
     699  
     700  					if (rep0 == UINT32_MAX) {
     701  						// End of payload marker was
     702  						// found. It may only be
     703  						// present if
     704  						//   - uncompressed size is
     705  						//     unknown or
     706  						//   - after known uncompressed
     707  						//     size amount of bytes has
     708  						//     been decompressed and
     709  						//     caller has indicated
     710  						//     that EOPM might be used
     711  						//     (it's not allowed in
     712  						//     LZMA2).
     713  						if (!eopm_is_valid) {
     714  							ret = LZMA_DATA_ERROR;
     715  							goto out;
     716  						}
     717  
     718  	case SEQ_EOPM:
     719  						// LZMA1 stream with
     720  						// end-of-payload marker.
     721  						rc_normalize(SEQ_EOPM);
     722  						ret = rc_is_finished(rc)
     723  							? LZMA_STREAM_END
     724  							: LZMA_DATA_ERROR;
     725  						goto out;
     726  					}
     727  				}
     728  			}
     729  
     730  			// Validate the distance we just decoded.
     731  			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
     732  				ret = LZMA_DATA_ERROR;
     733  				goto out;
     734  			}
     735  
     736  		} else {
     737  			rc_update_1(coder->is_rep[state]);
     738  
     739  			// Repeated match
     740  			//
     741  			// The match distance is a value that we have had
     742  			// earlier. The latest four match distances are
     743  			// available as rep0, rep1, rep2 and rep3. We will
     744  			// now decode which of them is the new distance.
     745  			//
     746  			// There cannot be a match if we haven't produced
     747  			// any output, so check that first.
     748  			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
     749  				ret = LZMA_DATA_ERROR;
     750  				goto out;
     751  			}
     752  
     753  	case SEQ_IS_REP0:
     754  			rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
     755  				rc_update_0(coder->is_rep0[state]);
     756  				// The distance is rep0.
     757  
     758  	case SEQ_IS_REP0_LONG:
     759  				rc_if_0(coder->is_rep0_long[state][pos_state],
     760  						SEQ_IS_REP0_LONG) {
     761  					rc_update_0(coder->is_rep0_long[
     762  							state][pos_state]);
     763  
     764  					update_short_rep(state);
     765  
     766  	case SEQ_SHORTREP:
     767  					if (unlikely(dict_put(&dict, dict_get(
     768  							&dict, rep0)))) {
     769  						coder->sequence = SEQ_SHORTREP;
     770  						goto out;
     771  					}
     772  
     773  					continue;
     774  				}
     775  
     776  				// Repeating more than one byte at
     777  				// distance of rep0.
     778  				rc_update_1(coder->is_rep0_long[
     779  						state][pos_state]);
     780  
     781  			} else {
     782  				rc_update_1(coder->is_rep0[state]);
     783  
     784  	case SEQ_IS_REP1:
     785  				// The distance is rep1, rep2 or rep3. Once
     786  				// we find out which one of these three, it
     787  				// is stored to rep0 and rep1, rep2 and rep3
     788  				// are updated accordingly.
     789  				rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
     790  					rc_update_0(coder->is_rep1[state]);
     791  
     792  					const uint32_t distance = rep1;
     793  					rep1 = rep0;
     794  					rep0 = distance;
     795  
     796  				} else {
     797  					rc_update_1(coder->is_rep1[state]);
     798  	case SEQ_IS_REP2:
     799  					rc_if_0(coder->is_rep2[state],
     800  							SEQ_IS_REP2) {
     801  						rc_update_0(coder->is_rep2[
     802  								state]);
     803  
     804  						const uint32_t distance = rep2;
     805  						rep2 = rep1;
     806  						rep1 = rep0;
     807  						rep0 = distance;
     808  
     809  					} else {
     810  						rc_update_1(coder->is_rep2[
     811  								state]);
     812  
     813  						const uint32_t distance = rep3;
     814  						rep3 = rep2;
     815  						rep2 = rep1;
     816  						rep1 = rep0;
     817  						rep0 = distance;
     818  					}
     819  				}
     820  			}
     821  
     822  			update_long_rep(state);
     823  
     824  			// Decode the length of the repeated match.
     825  			len_decode(len, coder->rep_len_decoder,
     826  					pos_state, SEQ_REP_LEN);
     827  		}
     828  
     829  		/////////////////////////////////
     830  		// Repeat from history buffer. //
     831  		/////////////////////////////////
     832  
     833  		// The length is always between these limits. There is no way
     834  		// to trigger the algorithm to set len outside this range.
     835  		assert(len >= MATCH_LEN_MIN);
     836  		assert(len <= MATCH_LEN_MAX);
     837  
     838  	case SEQ_COPY:
     839  		// Repeat len bytes from distance of rep0.
     840  		if (unlikely(dict_repeat(&dict, rep0, &len))) {
     841  			coder->sequence = SEQ_COPY;
     842  			goto out;
     843  		}
     844  	}
     845  
     846  out:
     847  	// Save state
     848  
     849  	// NOTE: Must not copy dict.limit.
     850  	dictptr->pos = dict.pos;
     851  	dictptr->full = dict.full;
     852  
     853  	rc_from_local(coder->rc, *in_pos);
     854  
     855  	coder->state = state;
     856  	coder->rep0 = rep0;
     857  	coder->rep1 = rep1;
     858  	coder->rep2 = rep2;
     859  	coder->rep3 = rep3;
     860  
     861  	coder->probs = probs;
     862  	coder->symbol = symbol;
     863  	coder->limit = limit;
     864  	coder->offset = offset;
     865  	coder->len = len;
     866  
     867  	// Update the remaining amount of uncompressed data if uncompressed
     868  	// size was known.
     869  	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
     870  		coder->uncompressed_size -= dict.pos - dict_start;
     871  
     872  		// If we have gotten all the output but the decoder wants
     873  		// to write more output, the file is corrupt. There are
     874  		// three SEQ values where output is produced.
     875  		if (coder->uncompressed_size == 0 && ret == LZMA_OK
     876  				&& (coder->sequence == SEQ_LITERAL_WRITE
     877  					|| coder->sequence == SEQ_SHORTREP
     878  					|| coder->sequence == SEQ_COPY))
     879  			ret = LZMA_DATA_ERROR;
     880  	}
     881  
     882  	if (ret == LZMA_STREAM_END) {
     883  		// Reset the range decoder so that it is ready to reinitialize
     884  		// for a new LZMA2 chunk.
     885  		rc_reset(coder->rc);
     886  		coder->sequence = SEQ_IS_MATCH;
     887  	}
     888  
     889  	return ret;
     890  }
     891  
     892  
     893  
     894  static void
     895  lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
     896  		bool allow_eopm)
     897  {
     898  	lzma_lzma1_decoder *coder = coder_ptr;
     899  	coder->uncompressed_size = uncompressed_size;
     900  	coder->allow_eopm = allow_eopm;
     901  }
     902  
     903  
     904  static void
     905  lzma_decoder_reset(void *coder_ptr, const void *opt)
     906  {
     907  	lzma_lzma1_decoder *coder = coder_ptr;
     908  	const lzma_options_lzma *options = opt;
     909  
     910  	// NOTE: We assume that lc/lp/pb are valid since they were
     911  	// successfully decoded with lzma_lzma_decode_properties().
     912  
     913  	// Calculate pos_mask. We don't need pos_bits as is for anything.
     914  	coder->pos_mask = (1U << options->pb) - 1;
     915  
     916  	// Initialize the literal decoder.
     917  	literal_init(coder->literal, options->lc, options->lp);
     918  
     919  	coder->literal_context_bits = options->lc;
     920  	coder->literal_pos_mask = (1U << options->lp) - 1;
     921  
     922  	// State
     923  	coder->state = STATE_LIT_LIT;
     924  	coder->rep0 = 0;
     925  	coder->rep1 = 0;
     926  	coder->rep2 = 0;
     927  	coder->rep3 = 0;
     928  	coder->pos_mask = (1U << options->pb) - 1;
     929  
     930  	// Range decoder
     931  	rc_reset(coder->rc);
     932  
     933  	// Bit and bittree decoders
     934  	for (uint32_t i = 0; i < STATES; ++i) {
     935  		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
     936  			bit_reset(coder->is_match[i][j]);
     937  			bit_reset(coder->is_rep0_long[i][j]);
     938  		}
     939  
     940  		bit_reset(coder->is_rep[i]);
     941  		bit_reset(coder->is_rep0[i]);
     942  		bit_reset(coder->is_rep1[i]);
     943  		bit_reset(coder->is_rep2[i]);
     944  	}
     945  
     946  	for (uint32_t i = 0; i < DIST_STATES; ++i)
     947  		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
     948  
     949  	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
     950  		bit_reset(coder->pos_special[i]);
     951  
     952  	bittree_reset(coder->pos_align, ALIGN_BITS);
     953  
     954  	// Len decoders (also bit/bittree)
     955  	const uint32_t num_pos_states = 1U << options->pb;
     956  	bit_reset(coder->match_len_decoder.choice);
     957  	bit_reset(coder->match_len_decoder.choice2);
     958  	bit_reset(coder->rep_len_decoder.choice);
     959  	bit_reset(coder->rep_len_decoder.choice2);
     960  
     961  	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
     962  		bittree_reset(coder->match_len_decoder.low[pos_state],
     963  				LEN_LOW_BITS);
     964  		bittree_reset(coder->match_len_decoder.mid[pos_state],
     965  				LEN_MID_BITS);
     966  
     967  		bittree_reset(coder->rep_len_decoder.low[pos_state],
     968  				LEN_LOW_BITS);
     969  		bittree_reset(coder->rep_len_decoder.mid[pos_state],
     970  				LEN_MID_BITS);
     971  	}
     972  
     973  	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
     974  	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
     975  
     976  	coder->sequence = SEQ_IS_MATCH;
     977  	coder->probs = NULL;
     978  	coder->symbol = 0;
     979  	coder->limit = 0;
     980  	coder->offset = 0;
     981  	coder->len = 0;
     982  
     983  	return;
     984  }
     985  
     986  
     987  extern lzma_ret
     988  lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
     989  		const lzma_options_lzma *options, lzma_lz_options *lz_options)
     990  {
     991  	if (lz->coder == NULL) {
     992  		lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
     993  		if (lz->coder == NULL)
     994  			return LZMA_MEM_ERROR;
     995  
     996  		lz->code = &lzma_decode;
     997  		lz->reset = &lzma_decoder_reset;
     998  		lz->set_uncompressed = &lzma_decoder_uncompressed;
     999  	}
    1000  
    1001  	// All dictionary sizes are OK here. LZ decoder will take care of
    1002  	// the special cases.
    1003  	lz_options->dict_size = options->dict_size;
    1004  	lz_options->preset_dict = options->preset_dict;
    1005  	lz_options->preset_dict_size = options->preset_dict_size;
    1006  
    1007  	return LZMA_OK;
    1008  }
    1009  
    1010  
    1011  /// Allocate and initialize LZMA decoder. This is used only via LZ
    1012  /// initialization (lzma_lzma_decoder_init() passes function pointer to
    1013  /// the LZ initialization).
    1014  static lzma_ret
    1015  lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
    1016  		lzma_vli id, const void *options, lzma_lz_options *lz_options)
    1017  {
    1018  	if (!is_lclppb_valid(options))
    1019  		return LZMA_PROG_ERROR;
    1020  
    1021  	lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
    1022  	bool allow_eopm = true;
    1023  
    1024  	if (id == LZMA_FILTER_LZMA1EXT) {
    1025  		const lzma_options_lzma *opt = options;
    1026  
    1027  		// Only one flag is supported.
    1028  		if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
    1029  			return LZMA_OPTIONS_ERROR;
    1030  
    1031  		// FIXME? Using lzma_vli instead of uint64_t is weird because
    1032  		// this has nothing to do with .xz headers and variable-length
    1033  		// integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
    1034  		// instead of UINT64_MAX is clearer when unknown size is
    1035  		// meant. A problem with using lzma_vli is that now we
    1036  		// allow > LZMA_VLI_MAX which is fine in this file but
    1037  		// it's still confusing. Note that alone_decoder.c also
    1038  		// allows > LZMA_VLI_MAX when setting uncompressed size.
    1039  		uncomp_size = opt->ext_size_low
    1040  				+ ((uint64_t)(opt->ext_size_high) << 32);
    1041  		allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
    1042  				|| uncomp_size == LZMA_VLI_UNKNOWN;
    1043  	}
    1044  
    1045  	return_if_error(lzma_lzma_decoder_create(
    1046  			lz, allocator, options, lz_options));
    1047  
    1048  	lzma_decoder_reset(lz->coder, options);
    1049  	lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
    1050  
    1051  	return LZMA_OK;
    1052  }
    1053  
    1054  
    1055  extern lzma_ret
    1056  lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
    1057  		const lzma_filter_info *filters)
    1058  {
    1059  	// LZMA can only be the last filter in the chain. This is enforced
    1060  	// by the raw_decoder initialization.
    1061  	assert(filters[1].init == NULL);
    1062  
    1063  	return lzma_lz_decoder_init(next, allocator, filters,
    1064  			&lzma_decoder_init);
    1065  }
    1066  
    1067  
    1068  extern bool
    1069  lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
    1070  {
    1071  	if (byte > (4 * 5 + 4) * 9 + 8)
    1072  		return true;
    1073  
    1074  	// See the file format specification to understand this.
    1075  	options->pb = byte / (9 * 5);
    1076  	byte -= options->pb * 9 * 5;
    1077  	options->lp = byte / 9;
    1078  	options->lc = byte - options->lp * 9;
    1079  
    1080  	return options->lc + options->lp > LZMA_LCLP_MAX;
    1081  }
    1082  
    1083  
    1084  extern uint64_t
    1085  lzma_lzma_decoder_memusage_nocheck(const void *options)
    1086  {
    1087  	const lzma_options_lzma *const opt = options;
    1088  	return sizeof(lzma_lzma1_decoder)
    1089  			+ lzma_lz_decoder_memusage(opt->dict_size);
    1090  }
    1091  
    1092  
    1093  extern uint64_t
    1094  lzma_lzma_decoder_memusage(const void *options)
    1095  {
    1096  	if (!is_lclppb_valid(options))
    1097  		return UINT64_MAX;
    1098  
    1099  	return lzma_lzma_decoder_memusage_nocheck(options);
    1100  }
    1101  
    1102  
    1103  extern lzma_ret
    1104  lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
    1105  		const uint8_t *props, size_t props_size)
    1106  {
    1107  	if (props_size != 5)
    1108  		return LZMA_OPTIONS_ERROR;
    1109  
    1110  	lzma_options_lzma *opt
    1111  			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
    1112  	if (opt == NULL)
    1113  		return LZMA_MEM_ERROR;
    1114  
    1115  	if (lzma_lzma_lclppb_decode(opt, props[0]))
    1116  		goto error;
    1117  
    1118  	// All dictionary sizes are accepted, including zero. LZ decoder
    1119  	// will automatically use a dictionary at least a few KiB even if
    1120  	// a smaller dictionary is requested.
    1121  	opt->dict_size = read32le(props + 1);
    1122  
    1123  	opt->preset_dict = NULL;
    1124  	opt->preset_dict_size = 0;
    1125  
    1126  	*options = opt;
    1127  
    1128  	return LZMA_OK;
    1129  
    1130  error:
    1131  	lzma_free(opt, allocator);
    1132  	return LZMA_OPTIONS_ERROR;
    1133  }