(root)/
xz-5.4.5/
src/
liblzma/
common/
block_buffer_encoder.c
       1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       block_buffer_encoder.c
       4  /// \brief      Single-call .xz Block encoder
       5  //
       6  //  Author:     Lasse Collin
       7  //
       8  //  This file has been put into the public domain.
       9  //  You can do whatever you want with this file.
      10  //
      11  ///////////////////////////////////////////////////////////////////////////////
      12  
      13  #include "block_buffer_encoder.h"
      14  #include "block_encoder.h"
      15  #include "filter_encoder.h"
      16  #include "lzma2_encoder.h"
      17  #include "check.h"
      18  
      19  
      20  /// Estimate the maximum size of the Block Header and Check fields for
      21  /// a Block that uses LZMA2 uncompressed chunks. We could use
      22  /// lzma_block_header_size() but this is simpler.
      23  ///
      24  /// Block Header Size + Block Flags + Compressed Size
      25  /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
      26  /// and round up to the next multiple of four to take Header Padding
      27  /// into account.
      28  #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
      29  		+ LZMA_CHECK_SIZE_MAX + 3) & ~3)
      30  
      31  
      32  static uint64_t
      33  lzma2_bound(uint64_t uncompressed_size)
      34  {
      35  	// Prevent integer overflow in overhead calculation.
      36  	if (uncompressed_size > COMPRESSED_SIZE_MAX)
      37  		return 0;
      38  
      39  	// Calculate the exact overhead of the LZMA2 headers: Round
      40  	// uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
      41  	// multiply by the size of per-chunk header, and add one byte for
      42  	// the end marker.
      43  	const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
      44  				/ LZMA2_CHUNK_MAX)
      45  			* LZMA2_HEADER_UNCOMPRESSED + 1;
      46  
      47  	// Catch the possible integer overflow.
      48  	if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
      49  		return 0;
      50  
      51  	return uncompressed_size + overhead;
      52  }
      53  
      54  
      55  extern uint64_t
      56  lzma_block_buffer_bound64(uint64_t uncompressed_size)
      57  {
      58  	// If the data doesn't compress, we always use uncompressed
      59  	// LZMA2 chunks.
      60  	uint64_t lzma2_size = lzma2_bound(uncompressed_size);
      61  	if (lzma2_size == 0)
      62  		return 0;
      63  
      64  	// Take Block Padding into account.
      65  	lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
      66  
      67  	// No risk of integer overflow because lzma2_bound() already takes
      68  	// into account the size of the headers in the Block.
      69  	return HEADERS_BOUND + lzma2_size;
      70  }
      71  
      72  
      73  extern LZMA_API(size_t)
      74  lzma_block_buffer_bound(size_t uncompressed_size)
      75  {
      76  	uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
      77  
      78  #if SIZE_MAX < UINT64_MAX
      79  	// Catch the possible integer overflow on 32-bit systems.
      80  	if (ret > SIZE_MAX)
      81  		return 0;
      82  #endif
      83  
      84  	return ret;
      85  }
      86  
      87  
      88  static lzma_ret
      89  block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
      90  		uint8_t *out, size_t *out_pos, size_t out_size)
      91  {
      92  	// Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
      93  	// all, but LZMA2 always requires a dictionary, so use the minimum
      94  	// value to minimize memory usage of the decoder.
      95  	lzma_options_lzma lzma2 = {
      96  		.dict_size = LZMA_DICT_SIZE_MIN,
      97  	};
      98  
      99  	lzma_filter filters[2];
     100  	filters[0].id = LZMA_FILTER_LZMA2;
     101  	filters[0].options = &lzma2;
     102  	filters[1].id = LZMA_VLI_UNKNOWN;
     103  
     104  	// Set the above filter options to *block temporarily so that we can
     105  	// encode the Block Header.
     106  	lzma_filter *filters_orig = block->filters;
     107  	block->filters = filters;
     108  
     109  	if (lzma_block_header_size(block) != LZMA_OK) {
     110  		block->filters = filters_orig;
     111  		return LZMA_PROG_ERROR;
     112  	}
     113  
     114  	// Check that there's enough output space. The caller has already
     115  	// set block->compressed_size to what lzma2_bound() has returned,
     116  	// so we can reuse that value. We know that compressed_size is a
     117  	// known valid VLI and header_size is a small value so their sum
     118  	// will never overflow.
     119  	assert(block->compressed_size == lzma2_bound(in_size));
     120  	if (out_size - *out_pos
     121  			< block->header_size + block->compressed_size) {
     122  		block->filters = filters_orig;
     123  		return LZMA_BUF_ERROR;
     124  	}
     125  
     126  	if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
     127  		block->filters = filters_orig;
     128  		return LZMA_PROG_ERROR;
     129  	}
     130  
     131  	block->filters = filters_orig;
     132  	*out_pos += block->header_size;
     133  
     134  	// Encode the data using LZMA2 uncompressed chunks.
     135  	size_t in_pos = 0;
     136  	uint8_t control = 0x01; // Dictionary reset
     137  
     138  	while (in_pos < in_size) {
     139  		// Control byte: Indicate uncompressed chunk, of which
     140  		// the first resets the dictionary.
     141  		out[(*out_pos)++] = control;
     142  		control = 0x02; // No dictionary reset
     143  
     144  		// Size of the uncompressed chunk
     145  		const size_t copy_size
     146  				= my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
     147  		out[(*out_pos)++] = (copy_size - 1) >> 8;
     148  		out[(*out_pos)++] = (copy_size - 1) & 0xFF;
     149  
     150  		// The actual data
     151  		assert(*out_pos + copy_size <= out_size);
     152  		memcpy(out + *out_pos, in + in_pos, copy_size);
     153  
     154  		in_pos += copy_size;
     155  		*out_pos += copy_size;
     156  	}
     157  
     158  	// End marker
     159  	out[(*out_pos)++] = 0x00;
     160  	assert(*out_pos <= out_size);
     161  
     162  	return LZMA_OK;
     163  }
     164  
     165  
     166  static lzma_ret
     167  block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
     168  		const uint8_t *in, size_t in_size,
     169  		uint8_t *out, size_t *out_pos, size_t out_size)
     170  {
     171  	// Find out the size of the Block Header.
     172  	return_if_error(lzma_block_header_size(block));
     173  
     174  	// Reserve space for the Block Header and skip it for now.
     175  	if (out_size - *out_pos <= block->header_size)
     176  		return LZMA_BUF_ERROR;
     177  
     178  	const size_t out_start = *out_pos;
     179  	*out_pos += block->header_size;
     180  
     181  	// Limit out_size so that we stop encoding if the output would grow
     182  	// bigger than what uncompressed Block would be.
     183  	if (out_size - *out_pos > block->compressed_size)
     184  		out_size = *out_pos + block->compressed_size;
     185  
     186  	// TODO: In many common cases this could be optimized to use
     187  	// significantly less memory.
     188  	lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
     189  	lzma_ret ret = lzma_raw_encoder_init(
     190  			&raw_encoder, allocator, block->filters);
     191  
     192  	if (ret == LZMA_OK) {
     193  		size_t in_pos = 0;
     194  		ret = raw_encoder.code(raw_encoder.coder, allocator,
     195  				in, &in_pos, in_size, out, out_pos, out_size,
     196  				LZMA_FINISH);
     197  	}
     198  
     199  	// NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
     200  	lzma_next_end(&raw_encoder, allocator);
     201  
     202  	if (ret == LZMA_STREAM_END) {
     203  		// Compression was successful. Write the Block Header.
     204  		block->compressed_size
     205  				= *out_pos - (out_start + block->header_size);
     206  		ret = lzma_block_header_encode(block, out + out_start);
     207  		if (ret != LZMA_OK)
     208  			ret = LZMA_PROG_ERROR;
     209  
     210  	} else if (ret == LZMA_OK) {
     211  		// Output buffer became full.
     212  		ret = LZMA_BUF_ERROR;
     213  	}
     214  
     215  	// Reset *out_pos if something went wrong.
     216  	if (ret != LZMA_OK)
     217  		*out_pos = out_start;
     218  
     219  	return ret;
     220  }
     221  
     222  
     223  static lzma_ret
     224  block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
     225  		const uint8_t *in, size_t in_size,
     226  		uint8_t *out, size_t *out_pos, size_t out_size,
     227  		bool try_to_compress)
     228  {
     229  	// Validate the arguments.
     230  	if (block == NULL || (in == NULL && in_size != 0) || out == NULL
     231  			|| out_pos == NULL || *out_pos > out_size)
     232  		return LZMA_PROG_ERROR;
     233  
     234  	// The contents of the structure may depend on the version so
     235  	// check the version before validating the contents of *block.
     236  	if (block->version > 1)
     237  		return LZMA_OPTIONS_ERROR;
     238  
     239  	if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
     240  			|| (try_to_compress && block->filters == NULL))
     241  		return LZMA_PROG_ERROR;
     242  
     243  	if (!lzma_check_is_supported(block->check))
     244  		return LZMA_UNSUPPORTED_CHECK;
     245  
     246  	// Size of a Block has to be a multiple of four, so limit the size
     247  	// here already. This way we don't need to check it again when adding
     248  	// Block Padding.
     249  	out_size -= (out_size - *out_pos) & 3;
     250  
     251  	// Get the size of the Check field.
     252  	const size_t check_size = lzma_check_size(block->check);
     253  	assert(check_size != UINT32_MAX);
     254  
     255  	// Reserve space for the Check field.
     256  	if (out_size - *out_pos <= check_size)
     257  		return LZMA_BUF_ERROR;
     258  
     259  	out_size -= check_size;
     260  
     261  	// Initialize block->uncompressed_size and calculate the worst-case
     262  	// value for block->compressed_size.
     263  	block->uncompressed_size = in_size;
     264  	block->compressed_size = lzma2_bound(in_size);
     265  	if (block->compressed_size == 0)
     266  		return LZMA_DATA_ERROR;
     267  
     268  	// Do the actual compression.
     269  	lzma_ret ret = LZMA_BUF_ERROR;
     270  	if (try_to_compress)
     271  		ret = block_encode_normal(block, allocator,
     272  				in, in_size, out, out_pos, out_size);
     273  
     274  	if (ret != LZMA_OK) {
     275  		// If the error was something else than output buffer
     276  		// becoming full, return the error now.
     277  		if (ret != LZMA_BUF_ERROR)
     278  			return ret;
     279  
     280  		// The data was incompressible (at least with the options
     281  		// given to us) or the output buffer was too small. Use the
     282  		// uncompressed chunks of LZMA2 to wrap the data into a valid
     283  		// Block. If we haven't been given enough output space, even
     284  		// this may fail.
     285  		return_if_error(block_encode_uncompressed(block, in, in_size,
     286  				out, out_pos, out_size));
     287  	}
     288  
     289  	assert(*out_pos <= out_size);
     290  
     291  	// Block Padding. No buffer overflow here, because we already adjusted
     292  	// out_size so that (out_size - out_start) is a multiple of four.
     293  	// Thus, if the buffer is full, the loop body can never run.
     294  	for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
     295  		assert(*out_pos < out_size);
     296  		out[(*out_pos)++] = 0x00;
     297  	}
     298  
     299  	// If there's no Check field, we are done now.
     300  	if (check_size > 0) {
     301  		// Calculate the integrity check. We reserved space for
     302  		// the Check field earlier so we don't need to check for
     303  		// available output space here.
     304  		lzma_check_state check;
     305  		lzma_check_init(&check, block->check);
     306  		lzma_check_update(&check, block->check, in, in_size);
     307  		lzma_check_finish(&check, block->check);
     308  
     309  		memcpy(block->raw_check, check.buffer.u8, check_size);
     310  		memcpy(out + *out_pos, check.buffer.u8, check_size);
     311  		*out_pos += check_size;
     312  	}
     313  
     314  	return LZMA_OK;
     315  }
     316  
     317  
     318  extern LZMA_API(lzma_ret)
     319  lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
     320  		const uint8_t *in, size_t in_size,
     321  		uint8_t *out, size_t *out_pos, size_t out_size)
     322  {
     323  	return block_buffer_encode(block, allocator,
     324  			in, in_size, out, out_pos, out_size, true);
     325  }
     326  
     327  
     328  #ifdef HAVE_SYMBOL_VERSIONS_LINUX
     329  // This is for compatibility with binaries linked against liblzma that
     330  // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
     331  LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
     332  	lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
     333  		const uint8_t *in, size_t in_size,
     334  		uint8_t *out, size_t *out_pos, size_t out_size)
     335  		lzma_nothrow lzma_attr_warn_unused_result
     336  		__attribute__((__alias__("lzma_block_uncomp_encode_52")));
     337  
     338  LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
     339  	lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
     340  		const uint8_t *in, size_t in_size,
     341  		uint8_t *out, size_t *out_pos, size_t out_size)
     342  		lzma_nothrow lzma_attr_warn_unused_result;
     343  
     344  #define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
     345  #endif
     346  extern LZMA_API(lzma_ret)
     347  lzma_block_uncomp_encode(lzma_block *block,
     348  		const uint8_t *in, size_t in_size,
     349  		uint8_t *out, size_t *out_pos, size_t out_size)
     350  {
     351  	// It won't allocate any memory from heap so no need
     352  	// for lzma_allocator.
     353  	return block_buffer_encode(block, NULL,
     354  			in, in_size, out, out_pos, out_size, false);
     355  }