(root)/
xz-5.4.5/
src/
liblzma/
common/
lzip_decoder.c
       1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       lzip_decoder.c
       4  /// \brief      Decodes .lz (lzip) files
       5  //
       6  //  Author:     Michał Górny
       7  //              Lasse Collin
       8  //
       9  //  This file has been put into the public domain.
      10  //  You can do whatever you want with this file.
      11  //
      12  ///////////////////////////////////////////////////////////////////////////////
      13  
      14  #include "lzip_decoder.h"
      15  #include "lzma_decoder.h"
      16  #include "check.h"
      17  
      18  
      19  // .lz format version 0 lacks the 64-bit Member size field in the footer.
      20  #define LZIP_V0_FOOTER_SIZE 12
      21  #define LZIP_V1_FOOTER_SIZE 20
      22  #define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE
      23  
      24  // lc/lp/pb are hardcoded in the .lz format.
      25  #define LZIP_LC 3
      26  #define LZIP_LP 0
      27  #define LZIP_PB 2
      28  
      29  
      30  typedef struct {
      31  	enum {
      32  		SEQ_ID_STRING,
      33  		SEQ_VERSION,
      34  		SEQ_DICT_SIZE,
      35  		SEQ_CODER_INIT,
      36  		SEQ_LZMA_STREAM,
      37  		SEQ_MEMBER_FOOTER,
      38  	} sequence;
      39  
      40  	/// .lz member format version
      41  	uint32_t version;
      42  
      43  	/// CRC32 of the uncompressed data in the .lz member
      44  	uint32_t crc32;
      45  
      46  	/// Uncompressed size of the .lz member
      47  	uint64_t uncompressed_size;
      48  
      49  	/// Compressed size of the .lz member
      50  	uint64_t member_size;
      51  
      52  	/// Memory usage limit
      53  	uint64_t memlimit;
      54  
      55  	/// Amount of memory actually needed
      56  	uint64_t memusage;
      57  
      58  	/// If true, LZMA_GET_CHECK is returned after decoding the header
      59  	/// fields. As all files use CRC32 this is redundant but it's
      60  	/// implemented anyway since the initialization functions supports
      61  	/// all other flags in addition to LZMA_TELL_ANY_CHECK.
      62  	bool tell_any_check;
      63  
      64  	/// If true, we won't calculate or verify the CRC32 of
      65  	/// the uncompressed data.
      66  	bool ignore_check;
      67  
      68  	/// If true, we will decode concatenated .lz members and stop if
      69  	/// non-.lz data is seen after at least one member has been
      70  	/// successfully decoded.
      71  	bool concatenated;
      72  
      73  	/// When decoding concatenated .lz members, this is true as long as
      74  	/// we are decoding the first .lz member. This is needed to avoid
      75  	/// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at
      76  	/// the end of the file.
      77  	bool first_member;
      78  
      79  	/// Reading position in the header and footer fields
      80  	size_t pos;
      81  
      82  	/// Buffer to hold the .lz footer fields
      83  	uint8_t buffer[LZIP_FOOTER_SIZE_MAX];
      84  
      85  	/// Options decoded from the .lz header that needed to initialize
      86  	/// the LZMA1 decoder.
      87  	lzma_options_lzma options;
      88  
      89  	/// LZMA1 decoder
      90  	lzma_next_coder lzma_decoder;
      91  
      92  } lzma_lzip_coder;
      93  
      94  
      95  static lzma_ret
      96  lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
      97  		const uint8_t *restrict in, size_t *restrict in_pos,
      98  		size_t in_size, uint8_t *restrict out,
      99  		size_t *restrict out_pos, size_t out_size, lzma_action action)
     100  {
     101  	lzma_lzip_coder *coder = coder_ptr;
     102  
     103  	while (true)
     104  	switch (coder->sequence) {
     105  	case SEQ_ID_STRING: {
     106  		// The "ID string" or magic bytes are "LZIP" in US-ASCII.
     107  		const uint8_t lzip_id_string[4] = { 0x4C, 0x5A, 0x49, 0x50 };
     108  
     109  		while (coder->pos < sizeof(lzip_id_string)) {
     110  			if (*in_pos >= in_size) {
     111  				// If we are on the 2nd+ concatenated member
     112  				// and the input ends before we can read
     113  				// the magic bytes, we discard the bytes that
     114  				// were already read (up to 3) and finish.
     115  				// See the reasoning below.
     116  				return !coder->first_member
     117  						&& action == LZMA_FINISH
     118  					? LZMA_STREAM_END : LZMA_OK;
     119  			}
     120  
     121  			if (in[*in_pos] != lzip_id_string[coder->pos]) {
     122  				// The .lz format allows putting non-.lz data
     123  				// at the end of the file. If we have seen
     124  				// at least one valid .lz member already,
     125  				// then we won't consume the byte at *in_pos
     126  				// and will return LZMA_STREAM_END. This way
     127  				// apps can easily locate and read the non-.lz
     128  				// data after the .lz member(s).
     129  				//
     130  				// NOTE: If the first 1-3 bytes of the non-.lz
     131  				// data match the .lz ID string then the first
     132  				// 1-3 bytes of the junk will get ignored by
     133  				// us. If apps want to properly locate the
     134  				// trailing data they must ensure that the
     135  				// first byte of their custom data isn't the
     136  				// same as the first byte of .lz ID string.
     137  				// With the liblzma API we cannot rewind the
     138  				// input position across calls to lzma_code().
     139  				return !coder->first_member
     140  					? LZMA_STREAM_END : LZMA_FORMAT_ERROR;
     141  			}
     142  
     143  			++*in_pos;
     144  			++coder->pos;
     145  		}
     146  
     147  		coder->pos = 0;
     148  
     149  		coder->crc32 = 0;
     150  		coder->uncompressed_size = 0;
     151  		coder->member_size = sizeof(lzip_id_string);
     152  
     153  		coder->sequence = SEQ_VERSION;
     154  	}
     155  
     156  	// Fall through
     157  
     158  	case SEQ_VERSION:
     159  		if (*in_pos >= in_size)
     160  			return LZMA_OK;
     161  
     162  		coder->version = in[(*in_pos)++];
     163  
     164  		// We support version 0 and unextended version 1.
     165  		if (coder->version > 1)
     166  			return LZMA_OPTIONS_ERROR;
     167  
     168  		++coder->member_size;
     169  		coder->sequence = SEQ_DICT_SIZE;
     170  
     171  		// .lz versions 0 and 1 use CRC32 as the integrity check
     172  		// so if the application wanted to know that
     173  		// (LZMA_TELL_ANY_CHECK) we can tell it now.
     174  		if (coder->tell_any_check)
     175  			return LZMA_GET_CHECK;
     176  
     177  	// Fall through
     178  
     179  	case SEQ_DICT_SIZE: {
     180  		if (*in_pos >= in_size)
     181  			return LZMA_OK;
     182  
     183  		const uint32_t ds = in[(*in_pos)++];
     184  		++coder->member_size;
     185  
     186  		// The five lowest bits are for the base-2 logarithm of
     187  		// the dictionary size and the highest three bits are
     188  		// the fractional part (0/16 to 7/16) that will be
     189  		// subtracted to get the final value.
     190  		//
     191  		// For example, with 0xB5:
     192  		//     b2log = 21
     193  		//     fracnum = 5
     194  		//     dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB
     195  		const uint32_t b2log = ds & 0x1F;
     196  		const uint32_t fracnum = ds >> 5;
     197  
     198  		// The format versions 0 and 1 allow dictionary size in the
     199  		// range [4 KiB, 512 MiB].
     200  		if (b2log < 12 || b2log > 29 || (b2log == 12 && fracnum > 0))
     201  			return LZMA_DATA_ERROR;
     202  
     203  		//   2^[b2log] - 2^[b2log] * [fracnum] / 16
     204  		// = 2^[b2log] - [fracnum] * 2^([b2log] - 4)
     205  		coder->options.dict_size = (UINT32_C(1) << b2log)
     206  				- (fracnum << (b2log - 4));
     207  
     208  		assert(coder->options.dict_size >= 4096);
     209  		assert(coder->options.dict_size <= (UINT32_C(512) << 20));
     210  
     211  		coder->options.preset_dict = NULL;
     212  		coder->options.lc = LZIP_LC;
     213  		coder->options.lp = LZIP_LP;
     214  		coder->options.pb = LZIP_PB;
     215  
     216  		// Calculate the memory usage.
     217  		coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
     218  				+ LZMA_MEMUSAGE_BASE;
     219  
     220  		// Initialization is a separate step because if we return
     221  		// LZMA_MEMLIMIT_ERROR we need to be able to restart after
     222  		// the memlimit has been increased.
     223  		coder->sequence = SEQ_CODER_INIT;
     224  	}
     225  
     226  	// Fall through
     227  
     228  	case SEQ_CODER_INIT: {
     229  		if (coder->memusage > coder->memlimit)
     230  			return LZMA_MEMLIMIT_ERROR;
     231  
     232  		const lzma_filter_info filters[2] = {
     233  			{
     234  				.id = LZMA_FILTER_LZMA1,
     235  				.init = &lzma_lzma_decoder_init,
     236  				.options = &coder->options,
     237  			}, {
     238  				.init = NULL,
     239  			}
     240  		};
     241  
     242  		return_if_error(lzma_next_filter_init(&coder->lzma_decoder,
     243  				allocator, filters));
     244  
     245  		coder->crc32 = 0;
     246  		coder->sequence = SEQ_LZMA_STREAM;
     247  	}
     248  
     249  	// Fall through
     250  
     251  	case SEQ_LZMA_STREAM: {
     252  		const size_t in_start = *in_pos;
     253  		const size_t out_start = *out_pos;
     254  
     255  		const lzma_ret ret = coder->lzma_decoder.code(
     256  				coder->lzma_decoder.coder, allocator,
     257  				in, in_pos, in_size, out, out_pos, out_size,
     258  				action);
     259  
     260  		const size_t out_used = *out_pos - out_start;
     261  
     262  		coder->member_size += *in_pos - in_start;
     263  		coder->uncompressed_size += out_used;
     264  
     265  		// Don't update the CRC32 if the integrity check will be
     266  		// ignored or if there was no new output. The latter is
     267  		// important in case out == NULL to avoid null pointer + 0
     268  		// which is undefined behavior.
     269  		if (!coder->ignore_check && out_used > 0)
     270  			coder->crc32 = lzma_crc32(out + out_start, out_used,
     271  					coder->crc32);
     272  
     273  		if (ret != LZMA_STREAM_END)
     274  			return ret;
     275  
     276  		coder->sequence = SEQ_MEMBER_FOOTER;
     277  	}
     278  
     279  	// Fall through
     280  
     281  	case SEQ_MEMBER_FOOTER: {
     282  		// The footer of .lz version 0 lacks the Member size field.
     283  		// This is the only difference between version 0 and
     284  		// unextended version 1 formats.
     285  		const size_t footer_size = coder->version == 0
     286  				? LZIP_V0_FOOTER_SIZE
     287  				: LZIP_V1_FOOTER_SIZE;
     288  
     289  		// Copy the CRC32, Data size, and Member size fields to
     290  		// the internal buffer.
     291  		lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
     292  				footer_size);
     293  
     294  		// Return if we didn't get the whole footer yet.
     295  		if (coder->pos < footer_size)
     296  			return LZMA_OK;
     297  
     298  		coder->pos = 0;
     299  		coder->member_size += footer_size;
     300  
     301  		// Check that the footer fields match the observed data.
     302  		if (!coder->ignore_check
     303  				&& coder->crc32 != read32le(&coder->buffer[0]))
     304  			return LZMA_DATA_ERROR;
     305  
     306  		if (coder->uncompressed_size != read64le(&coder->buffer[4]))
     307  			return LZMA_DATA_ERROR;
     308  
     309  		if (coder->version > 0) {
     310  			// .lz version 0 has no Member size field.
     311  			if (coder->member_size != read64le(&coder->buffer[12]))
     312  				return LZMA_DATA_ERROR;
     313  		}
     314  
     315  		// Decoding is finished if we weren't requested to decode
     316  		// more than one .lz member.
     317  		if (!coder->concatenated)
     318  			return LZMA_STREAM_END;
     319  
     320  		coder->first_member = false;
     321  		coder->sequence = SEQ_ID_STRING;
     322  		break;
     323  	}
     324  
     325  	default:
     326  		assert(0);
     327  		return LZMA_PROG_ERROR;
     328  	}
     329  
     330  	// Never reached
     331  }
     332  
     333  
     334  static void
     335  lzip_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
     336  {
     337  	lzma_lzip_coder *coder = coder_ptr;
     338  	lzma_next_end(&coder->lzma_decoder, allocator);
     339  	lzma_free(coder, allocator);
     340  	return;
     341  }
     342  
     343  
     344  static lzma_check
     345  lzip_decoder_get_check(const void *coder_ptr lzma_attribute((__unused__)))
     346  {
     347  	return LZMA_CHECK_CRC32;
     348  }
     349  
     350  
     351  static lzma_ret
     352  lzip_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
     353  		uint64_t *old_memlimit, uint64_t new_memlimit)
     354  {
     355  	lzma_lzip_coder *coder = coder_ptr;
     356  
     357  	*memusage = coder->memusage;
     358  	*old_memlimit = coder->memlimit;
     359  
     360  	if (new_memlimit != 0) {
     361  		if (new_memlimit < coder->memusage)
     362  			return LZMA_MEMLIMIT_ERROR;
     363  
     364  		coder->memlimit = new_memlimit;
     365  	}
     366  
     367  	return LZMA_OK;
     368  }
     369  
     370  
     371  extern lzma_ret
     372  lzma_lzip_decoder_init(
     373  		lzma_next_coder *next, const lzma_allocator *allocator,
     374  		uint64_t memlimit, uint32_t flags)
     375  {
     376  	lzma_next_coder_init(&lzma_lzip_decoder_init, next, allocator);
     377  
     378  	if (flags & ~LZMA_SUPPORTED_FLAGS)
     379  		return LZMA_OPTIONS_ERROR;
     380  
     381  	lzma_lzip_coder *coder = next->coder;
     382  	if (coder == NULL) {
     383  		coder = lzma_alloc(sizeof(lzma_lzip_coder), allocator);
     384  		if (coder == NULL)
     385  			return LZMA_MEM_ERROR;
     386  
     387  		next->coder = coder;
     388  		next->code = &lzip_decode;
     389  		next->end = &lzip_decoder_end;
     390  		next->get_check = &lzip_decoder_get_check;
     391  		next->memconfig = &lzip_decoder_memconfig;
     392  
     393  		coder->lzma_decoder = LZMA_NEXT_CODER_INIT;
     394  	}
     395  
     396  	coder->sequence = SEQ_ID_STRING;
     397  	coder->memlimit = my_max(1, memlimit);
     398  	coder->memusage = LZMA_MEMUSAGE_BASE;
     399  	coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
     400  	coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
     401  	coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
     402  	coder->first_member = true;
     403  	coder->pos = 0;
     404  
     405  	return LZMA_OK;
     406  }
     407  
     408  
     409  extern LZMA_API(lzma_ret)
     410  lzma_lzip_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
     411  {
     412  	lzma_next_strm_init(lzma_lzip_decoder_init, strm, memlimit, flags);
     413  
     414  	strm->internal->supported_actions[LZMA_RUN] = true;
     415  	strm->internal->supported_actions[LZMA_FINISH] = true;
     416  
     417  	return LZMA_OK;
     418  }