(root)/
xz-5.4.5/
src/
liblzma/
lzma/
fastpos.h
       1  ///////////////////////////////////////////////////////////////////////////////
       2  //
       3  /// \file       fastpos.h
       4  /// \brief      Kind of two-bit version of bit scan reverse
       5  ///
       6  //  Authors:    Igor Pavlov
       7  //              Lasse Collin
       8  //
       9  //  This file has been put into the public domain.
      10  //  You can do whatever you want with this file.
      11  //
      12  ///////////////////////////////////////////////////////////////////////////////
      13  
      14  #ifndef LZMA_FASTPOS_H
      15  #define LZMA_FASTPOS_H
      16  
      17  // LZMA encodes match distances by storing the highest two bits using
      18  // a six-bit value [0, 63], and then the missing lower bits.
      19  // Dictionary size is also stored using this encoding in the .xz
      20  // file format header.
      21  //
      22  // fastpos.h provides a way to quickly find out the correct six-bit
      23  // values. The following table gives some examples of this encoding:
      24  //
      25  //     dist   return
      26  //       0       0
      27  //       1       1
      28  //       2       2
      29  //       3       3
      30  //       4       4
      31  //       5       4
      32  //       6       5
      33  //       7       5
      34  //       8       6
      35  //      11       6
      36  //      12       7
      37  //     ...      ...
      38  //      15       7
      39  //      16       8
      40  //      17       8
      41  //     ...      ...
      42  //      23       8
      43  //      24       9
      44  //      25       9
      45  //     ...      ...
      46  //
      47  //
      48  // Provided functions or macros
      49  // ----------------------------
      50  //
      51  // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
      52  // assumes that dist >= FULL_DISTANCES, thus the result is at least
      53  // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
      54  // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
      55  // should be tiny bit faster due to the assumption being made.
      56  //
      57  //
      58  // Size vs. speed
      59  // --------------
      60  //
      61  // With some CPUs that have fast BSR (bit scan reverse) instruction, the
      62  // size optimized version is slightly faster than the bigger table based
      63  // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
      64  // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
      65  // would still have speed roughly comparable to the table version. Older
      66  // x86 CPUs like the original Pentium have very slow BSR; on those systems
      67  // the table version is a lot faster.
      68  //
      69  // On some CPUs, the table version is a lot faster when using position
      70  // dependent code, but with position independent code the size optimized
      71  // version is slightly faster. This occurs at least on 32-bit SPARC (no
      72  // ASM optimizations).
      73  //
      74  // I'm making the table version the default, because that has good speed
      75  // on all systems I have tried. The size optimized version is sometimes
      76  // slightly faster, but sometimes it is a lot slower.
      77  
      78  #ifdef HAVE_SMALL
      79  #	define get_dist_slot(dist) \
      80  		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
      81  
      82  static inline uint32_t
      83  get_dist_slot_2(uint32_t dist)
      84  {
      85  	const uint32_t i = bsr32(dist);
      86  	return (i + i) + ((dist >> (i - 1)) & 1);
      87  }
      88  
      89  
      90  #else
      91  
      92  #define FASTPOS_BITS 13
      93  
      94  lzma_attr_visibility_hidden
      95  extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
      96  
      97  
      98  #define fastpos_shift(extra, n) \
      99  	((extra) + (n) * (FASTPOS_BITS - 1))
     100  
     101  #define fastpos_limit(extra, n) \
     102  	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
     103  
     104  #define fastpos_result(dist, extra, n) \
     105  	(uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
     106  			+ 2 * fastpos_shift(extra, n)
     107  
     108  
     109  static inline uint32_t
     110  get_dist_slot(uint32_t dist)
     111  {
     112  	// If it is small enough, we can pick the result directly from
     113  	// the precalculated table.
     114  	if (dist < fastpos_limit(0, 0))
     115  		return lzma_fastpos[dist];
     116  
     117  	if (dist < fastpos_limit(0, 1))
     118  		return fastpos_result(dist, 0, 1);
     119  
     120  	return fastpos_result(dist, 0, 2);
     121  }
     122  
     123  
     124  #ifdef FULL_DISTANCES_BITS
     125  static inline uint32_t
     126  get_dist_slot_2(uint32_t dist)
     127  {
     128  	assert(dist >= FULL_DISTANCES);
     129  
     130  	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
     131  		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
     132  
     133  	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
     134  		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
     135  
     136  	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
     137  }
     138  #endif
     139  
     140  #endif
     141  
     142  #endif