(root)/
xz-5.4.5/
src/
liblzma/
common/
index_hash.c
       1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       index_hash.c
       4  /// \brief      Validates Index by using a hash function
       5  //
       6  //  Author:     Lasse Collin
       7  //
       8  //  This file has been put into the public domain.
       9  //  You can do whatever you want with this file.
      10  //
      11  ///////////////////////////////////////////////////////////////////////////////
      12  
      13  #include "common.h"
      14  #include "index.h"
      15  #include "check.h"
      16  
      17  
      18  typedef struct {
      19  	/// Sum of the Block sizes (including Block Padding)
      20  	lzma_vli blocks_size;
      21  
      22  	/// Sum of the Uncompressed Size fields
      23  	lzma_vli uncompressed_size;
      24  
      25  	/// Number of Records
      26  	lzma_vli count;
      27  
      28  	/// Size of the List of Index Records as bytes
      29  	lzma_vli index_list_size;
      30  
      31  	/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
      32  	lzma_check_state check;
      33  
      34  } lzma_index_hash_info;
      35  
      36  
      37  struct lzma_index_hash_s {
      38  	enum {
      39  		SEQ_BLOCK,
      40  		SEQ_COUNT,
      41  		SEQ_UNPADDED,
      42  		SEQ_UNCOMPRESSED,
      43  		SEQ_PADDING_INIT,
      44  		SEQ_PADDING,
      45  		SEQ_CRC32,
      46  	} sequence;
      47  
      48  	/// Information collected while decoding the actual Blocks.
      49  	lzma_index_hash_info blocks;
      50  
      51  	/// Information collected from the Index field.
      52  	lzma_index_hash_info records;
      53  
      54  	/// Number of Records not fully decoded
      55  	lzma_vli remaining;
      56  
      57  	/// Unpadded Size currently being read from an Index Record.
      58  	lzma_vli unpadded_size;
      59  
      60  	/// Uncompressed Size currently being read from an Index Record.
      61  	lzma_vli uncompressed_size;
      62  
      63  	/// Position in variable-length integers when decoding them from
      64  	/// the List of Records.
      65  	size_t pos;
      66  
      67  	/// CRC32 of the Index
      68  	uint32_t crc32;
      69  };
      70  
      71  
      72  extern LZMA_API(lzma_index_hash *)
      73  lzma_index_hash_init(lzma_index_hash *index_hash,
      74  		const lzma_allocator *allocator)
      75  {
      76  	if (index_hash == NULL) {
      77  		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
      78  		if (index_hash == NULL)
      79  			return NULL;
      80  	}
      81  
      82  	index_hash->sequence = SEQ_BLOCK;
      83  	index_hash->blocks.blocks_size = 0;
      84  	index_hash->blocks.uncompressed_size = 0;
      85  	index_hash->blocks.count = 0;
      86  	index_hash->blocks.index_list_size = 0;
      87  	index_hash->records.blocks_size = 0;
      88  	index_hash->records.uncompressed_size = 0;
      89  	index_hash->records.count = 0;
      90  	index_hash->records.index_list_size = 0;
      91  	index_hash->unpadded_size = 0;
      92  	index_hash->uncompressed_size = 0;
      93  	index_hash->pos = 0;
      94  	index_hash->crc32 = 0;
      95  
      96  	// These cannot fail because LZMA_CHECK_BEST is known to be supported.
      97  	(void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
      98  	(void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
      99  
     100  	return index_hash;
     101  }
     102  
     103  
     104  extern LZMA_API(void)
     105  lzma_index_hash_end(lzma_index_hash *index_hash,
     106  		const lzma_allocator *allocator)
     107  {
     108  	lzma_free(index_hash, allocator);
     109  	return;
     110  }
     111  
     112  
     113  extern LZMA_API(lzma_vli)
     114  lzma_index_hash_size(const lzma_index_hash *index_hash)
     115  {
     116  	// Get the size of the Index from ->blocks instead of ->records for
     117  	// cases where application wants to know the Index Size before
     118  	// decoding the Index.
     119  	return index_size(index_hash->blocks.count,
     120  			index_hash->blocks.index_list_size);
     121  }
     122  
     123  
     124  /// Updates the sizes and the hash without any validation.
     125  static void
     126  hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
     127  		lzma_vli uncompressed_size)
     128  {
     129  	info->blocks_size += vli_ceil4(unpadded_size);
     130  	info->uncompressed_size += uncompressed_size;
     131  	info->index_list_size += lzma_vli_size(unpadded_size)
     132  			+ lzma_vli_size(uncompressed_size);
     133  	++info->count;
     134  
     135  	const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
     136  	lzma_check_update(&info->check, LZMA_CHECK_BEST,
     137  			(const uint8_t *)(sizes), sizeof(sizes));
     138  
     139  	return;
     140  }
     141  
     142  
     143  extern LZMA_API(lzma_ret)
     144  lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
     145  		lzma_vli uncompressed_size)
     146  {
     147  	// Validate the arguments.
     148  	if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
     149  			|| unpadded_size < UNPADDED_SIZE_MIN
     150  			|| unpadded_size > UNPADDED_SIZE_MAX
     151  			|| uncompressed_size > LZMA_VLI_MAX)
     152  		return LZMA_PROG_ERROR;
     153  
     154  	// Update the hash.
     155  	hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
     156  
     157  	// Validate the properties of *info are still in allowed limits.
     158  	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
     159  			|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
     160  			|| index_size(index_hash->blocks.count,
     161  					index_hash->blocks.index_list_size)
     162  				> LZMA_BACKWARD_SIZE_MAX
     163  			|| index_stream_size(index_hash->blocks.blocks_size,
     164  					index_hash->blocks.count,
     165  					index_hash->blocks.index_list_size)
     166  				> LZMA_VLI_MAX)
     167  		return LZMA_DATA_ERROR;
     168  
     169  	return LZMA_OK;
     170  }
     171  
     172  
     173  extern LZMA_API(lzma_ret)
     174  lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
     175  		size_t *in_pos, size_t in_size)
     176  {
     177  	// Catch zero input buffer here, because in contrast to Index encoder
     178  	// and decoder functions, applications call this function directly
     179  	// instead of via lzma_code(), which does the buffer checking.
     180  	if (*in_pos >= in_size)
     181  		return LZMA_BUF_ERROR;
     182  
     183  	// NOTE: This function has many similarities to index_encode() and
     184  	// index_decode() functions found from index_encoder.c and
     185  	// index_decoder.c. See the comments especially in index_encoder.c.
     186  	const size_t in_start = *in_pos;
     187  	lzma_ret ret = LZMA_OK;
     188  
     189  	while (*in_pos < in_size)
     190  	switch (index_hash->sequence) {
     191  	case SEQ_BLOCK:
     192  		// Check the Index Indicator is present.
     193  		if (in[(*in_pos)++] != INDEX_INDICATOR)
     194  			return LZMA_DATA_ERROR;
     195  
     196  		index_hash->sequence = SEQ_COUNT;
     197  		break;
     198  
     199  	case SEQ_COUNT: {
     200  		ret = lzma_vli_decode(&index_hash->remaining,
     201  				&index_hash->pos, in, in_pos, in_size);
     202  		if (ret != LZMA_STREAM_END)
     203  			goto out;
     204  
     205  		// The count must match the count of the Blocks decoded.
     206  		if (index_hash->remaining != index_hash->blocks.count)
     207  			return LZMA_DATA_ERROR;
     208  
     209  		ret = LZMA_OK;
     210  		index_hash->pos = 0;
     211  
     212  		// Handle the special case when there are no Blocks.
     213  		index_hash->sequence = index_hash->remaining == 0
     214  				? SEQ_PADDING_INIT : SEQ_UNPADDED;
     215  		break;
     216  	}
     217  
     218  	case SEQ_UNPADDED:
     219  	case SEQ_UNCOMPRESSED: {
     220  		lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
     221  				? &index_hash->unpadded_size
     222  				: &index_hash->uncompressed_size;
     223  
     224  		ret = lzma_vli_decode(size, &index_hash->pos,
     225  				in, in_pos, in_size);
     226  		if (ret != LZMA_STREAM_END)
     227  			goto out;
     228  
     229  		ret = LZMA_OK;
     230  		index_hash->pos = 0;
     231  
     232  		if (index_hash->sequence == SEQ_UNPADDED) {
     233  			if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
     234  					|| index_hash->unpadded_size
     235  						> UNPADDED_SIZE_MAX)
     236  				return LZMA_DATA_ERROR;
     237  
     238  			index_hash->sequence = SEQ_UNCOMPRESSED;
     239  		} else {
     240  			// Update the hash.
     241  			hash_append(&index_hash->records,
     242  					index_hash->unpadded_size,
     243  					index_hash->uncompressed_size);
     244  
     245  			// Verify that we don't go over the known sizes. Note
     246  			// that this validation is simpler than the one used
     247  			// in lzma_index_hash_append(), because here we know
     248  			// that values in index_hash->blocks are already
     249  			// validated and we are fine as long as we don't
     250  			// exceed them in index_hash->records.
     251  			if (index_hash->blocks.blocks_size
     252  					< index_hash->records.blocks_size
     253  					|| index_hash->blocks.uncompressed_size
     254  					< index_hash->records.uncompressed_size
     255  					|| index_hash->blocks.index_list_size
     256  					< index_hash->records.index_list_size)
     257  				return LZMA_DATA_ERROR;
     258  
     259  			// Check if this was the last Record.
     260  			index_hash->sequence = --index_hash->remaining == 0
     261  					? SEQ_PADDING_INIT : SEQ_UNPADDED;
     262  		}
     263  
     264  		break;
     265  	}
     266  
     267  	case SEQ_PADDING_INIT:
     268  		index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
     269  				index_hash->records.count,
     270  				index_hash->records.index_list_size)) & 3;
     271  		index_hash->sequence = SEQ_PADDING;
     272  
     273  	// Fall through
     274  
     275  	case SEQ_PADDING:
     276  		if (index_hash->pos > 0) {
     277  			--index_hash->pos;
     278  			if (in[(*in_pos)++] != 0x00)
     279  				return LZMA_DATA_ERROR;
     280  
     281  			break;
     282  		}
     283  
     284  		// Compare the sizes.
     285  		if (index_hash->blocks.blocks_size
     286  				!= index_hash->records.blocks_size
     287  				|| index_hash->blocks.uncompressed_size
     288  				!= index_hash->records.uncompressed_size
     289  				|| index_hash->blocks.index_list_size
     290  				!= index_hash->records.index_list_size)
     291  			return LZMA_DATA_ERROR;
     292  
     293  		// Finish the hashes and compare them.
     294  		lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
     295  		lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
     296  		if (memcmp(index_hash->blocks.check.buffer.u8,
     297  				index_hash->records.check.buffer.u8,
     298  				lzma_check_size(LZMA_CHECK_BEST)) != 0)
     299  			return LZMA_DATA_ERROR;
     300  
     301  		// Finish the CRC32 calculation.
     302  		index_hash->crc32 = lzma_crc32(in + in_start,
     303  				*in_pos - in_start, index_hash->crc32);
     304  
     305  		index_hash->sequence = SEQ_CRC32;
     306  
     307  	// Fall through
     308  
     309  	case SEQ_CRC32:
     310  		do {
     311  			if (*in_pos == in_size)
     312  				return LZMA_OK;
     313  
     314  			if (((index_hash->crc32 >> (index_hash->pos * 8))
     315  					& 0xFF) != in[(*in_pos)++]) {
     316  #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
     317  				return LZMA_DATA_ERROR;
     318  #endif
     319  			}
     320  
     321  		} while (++index_hash->pos < 4);
     322  
     323  		return LZMA_STREAM_END;
     324  
     325  	default:
     326  		assert(0);
     327  		return LZMA_PROG_ERROR;
     328  	}
     329  
     330  out:
     331  	// Update the CRC32.
     332  	//
     333  	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
     334  	// In such a case we had no input and thus in_used == 0.
     335  	{
     336  		const size_t in_used = *in_pos - in_start;
     337  		if (in_used > 0)
     338  			index_hash->crc32 = lzma_crc32(in + in_start,
     339  					in_used, index_hash->crc32);
     340  	}
     341  
     342  	return ret;
     343  }