python (3.11.7)

(root)/
include/
python3.11/
internal/
pycore_bitutils.h
       1  /* Bit and bytes utilities.
       2  
       3     Bytes swap functions, reverse order of bytes:
       4  
       5     - _Py_bswap16(uint16_t)
       6     - _Py_bswap32(uint32_t)
       7     - _Py_bswap64(uint64_t)
       8  */
       9  
      10  #ifndef Py_INTERNAL_BITUTILS_H
      11  #define Py_INTERNAL_BITUTILS_H
      12  #ifdef __cplusplus
      13  extern "C" {
      14  #endif
      15  
      16  #ifndef Py_BUILD_CORE
      17  #  error "this header requires Py_BUILD_CORE define"
      18  #endif
      19  
      20  #if defined(__GNUC__) \
      21        && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
      22     /* __builtin_bswap16() is available since GCC 4.8,
      23        __builtin_bswap32() is available since GCC 4.3,
      24        __builtin_bswap64() is available since GCC 4.3. */
      25  #  define _PY_HAVE_BUILTIN_BSWAP
      26  #endif
      27  
      28  #ifdef _MSC_VER
      29     /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
      30  #  include <intrin.h>
      31  #endif
      32  
      33  static inline uint16_t
      34  _Py_bswap16(uint16_t word)
      35  {
      36  #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
      37      return __builtin_bswap16(word);
      38  #elif defined(_MSC_VER)
      39      Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
      40      return _byteswap_ushort(word);
      41  #else
      42      // Portable implementation which doesn't rely on circular bit shift
      43      return ( ((word & UINT16_C(0x00FF)) << 8)
      44             | ((word & UINT16_C(0xFF00)) >> 8));
      45  #endif
      46  }
      47  
      48  static inline uint32_t
      49  _Py_bswap32(uint32_t word)
      50  {
      51  #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
      52      return __builtin_bswap32(word);
      53  #elif defined(_MSC_VER)
      54      Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
      55      return _byteswap_ulong(word);
      56  #else
      57      // Portable implementation which doesn't rely on circular bit shift
      58      return ( ((word & UINT32_C(0x000000FF)) << 24)
      59             | ((word & UINT32_C(0x0000FF00)) <<  8)
      60             | ((word & UINT32_C(0x00FF0000)) >>  8)
      61             | ((word & UINT32_C(0xFF000000)) >> 24));
      62  #endif
      63  }
      64  
      65  static inline uint64_t
      66  _Py_bswap64(uint64_t word)
      67  {
      68  #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
      69      return __builtin_bswap64(word);
      70  #elif defined(_MSC_VER)
      71      return _byteswap_uint64(word);
      72  #else
      73      // Portable implementation which doesn't rely on circular bit shift
      74      return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
      75             | ((word & UINT64_C(0x000000000000FF00)) << 40)
      76             | ((word & UINT64_C(0x0000000000FF0000)) << 24)
      77             | ((word & UINT64_C(0x00000000FF000000)) <<  8)
      78             | ((word & UINT64_C(0x000000FF00000000)) >>  8)
      79             | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
      80             | ((word & UINT64_C(0x00FF000000000000)) >> 40)
      81             | ((word & UINT64_C(0xFF00000000000000)) >> 56));
      82  #endif
      83  }
      84  
      85  
      86  // Population count: count the number of 1's in 'x'
      87  // (number of bits set to 1), also known as the hamming weight.
      88  //
      89  // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
      90  // can be used, to keep the implementation simple. For example, Visual Studio
      91  // __popcnt() is not used this reason. The clang and GCC builtin function can
      92  // use the x86 POPCNT instruction if the target architecture has SSE4a or
      93  // newer.
      94  static inline int
      95  _Py_popcount32(uint32_t x)
      96  {
      97  #if (defined(__clang__) || defined(__GNUC__))
      98  
      99  #if SIZEOF_INT >= 4
     100      Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
     101      return __builtin_popcount(x);
     102  #else
     103      // The C standard guarantees that unsigned long will always be big enough
     104      // to hold a uint32_t value without losing information.
     105      Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
     106      return __builtin_popcountl(x);
     107  #endif
     108  
     109  #else
     110      // 32-bit SWAR (SIMD Within A Register) popcount
     111  
     112      // Binary: 0 1 0 1 ...
     113      const uint32_t M1 = 0x55555555;
     114      // Binary: 00 11 00 11. ..
     115      const uint32_t M2 = 0x33333333;
     116      // Binary: 0000 1111 0000 1111 ...
     117      const uint32_t M4 = 0x0F0F0F0F;
     118  
     119      // Put count of each 2 bits into those 2 bits
     120      x = x - ((x >> 1) & M1);
     121      // Put count of each 4 bits into those 4 bits
     122      x = (x & M2) + ((x >> 2) & M2);
     123      // Put count of each 8 bits into those 8 bits
     124      x = (x + (x >> 4)) & M4;
     125      // Sum of the 4 byte counts.
     126      // Take care when considering changes to the next line. Portability and
     127      // correctness are delicate here, thanks to C's "integer promotions" (C99
     128      // §6.3.1.1p2). On machines where the `int` type has width greater than 32
     129      // bits, `x` will be promoted to an `int`, and following C's "usual
     130      // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
     131      // performed as a multiplication of two `unsigned int` operands. In this
     132      // case it's critical that we cast back to `uint32_t` in order to keep only
     133      // the least significant 32 bits. On machines where the `int` type has
     134      // width no greater than 32, the multiplication is of two 32-bit unsigned
     135      // integer types, and the (uint32_t) cast is a no-op. In both cases, we
     136      // avoid the risk of undefined behaviour due to overflow of a
     137      // multiplication of signed integer types.
     138      return (uint32_t)(x * 0x01010101U) >> 24;
     139  #endif
     140  }
     141  
     142  
     143  // Return the index of the most significant 1 bit in 'x'. This is the smallest
     144  // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
     145  static inline int
     146  _Py_bit_length(unsigned long x)
     147  {
     148  #if (defined(__clang__) || defined(__GNUC__))
     149      if (x != 0) {
     150          // __builtin_clzl() is available since GCC 3.4.
     151          // Undefined behavior for x == 0.
     152          return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
     153      }
     154      else {
     155          return 0;
     156      }
     157  #elif defined(_MSC_VER)
     158      // _BitScanReverse() is documented to search 32 bits.
     159      Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
     160      unsigned long msb;
     161      if (_BitScanReverse(&msb, x)) {
     162          return (int)msb + 1;
     163      }
     164      else {
     165          return 0;
     166      }
     167  #else
     168      const int BIT_LENGTH_TABLE[32] = {
     169          0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
     170          5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
     171      };
     172      int msb = 0;
     173      while (x >= 32) {
     174          msb += 6;
     175          x >>= 6;
     176      }
     177      msb += BIT_LENGTH_TABLE[x];
     178      return msb;
     179  #endif
     180  }
     181  
     182  
     183  #ifdef __cplusplus
     184  }
     185  #endif
     186  #endif /* !Py_INTERNAL_BITUTILS_H */