(root)/
Python-3.12.0/
Objects/
stringlib/
find_max_char.h
       1  /* Finding the optimal width of unicode characters in a buffer */
       2  
       3  #if !STRINGLIB_IS_UNICODE
       4  # error "find_max_char.h is specific to Unicode"
       5  #endif
       6  
       7  /* Mask to quickly check whether a C 'size_t' contains a
       8     non-ASCII, UTF8-encoded char. */
       9  #if (SIZEOF_SIZE_T == 8)
      10  # define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
      11  #elif (SIZEOF_SIZE_T == 4)
      12  # define UCS1_ASCII_CHAR_MASK 0x80808080U
      13  #else
      14  # error C 'size_t' size should be either 4 or 8!
      15  #endif
      16  
      17  #if STRINGLIB_SIZEOF_CHAR == 1
      18  
      19  Py_LOCAL_INLINE(Py_UCS4)
      20  STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
      21  {
      22      const unsigned char *p = (const unsigned char *) begin;
      23  
      24      while (p < end) {
      25          if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
      26              /* Help register allocation */
      27              const unsigned char *_p = p;
      28              while (_p + SIZEOF_SIZE_T <= end) {
      29                  size_t value = *(const size_t *) _p;
      30                  if (value & UCS1_ASCII_CHAR_MASK)
      31                      return 255;
      32                  _p += SIZEOF_SIZE_T;
      33              }
      34              p = _p;
      35              if (p == end)
      36                  break;
      37          }
      38          if (*p++ & 0x80)
      39              return 255;
      40      }
      41      return 127;
      42  }
      43  
      44  #undef ASCII_CHAR_MASK
      45  
      46  #else /* STRINGLIB_SIZEOF_CHAR == 1 */
      47  
      48  #define MASK_ASCII 0xFFFFFF80
      49  #define MASK_UCS1 0xFFFFFF00
      50  #define MASK_UCS2 0xFFFF0000
      51  
      52  #define MAX_CHAR_ASCII 0x7f
      53  #define MAX_CHAR_UCS1  0xff
      54  #define MAX_CHAR_UCS2  0xffff
      55  #define MAX_CHAR_UCS4  0x10ffff
      56  
      57  Py_LOCAL_INLINE(Py_UCS4)
      58  STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
      59  {
      60  #if STRINGLIB_SIZEOF_CHAR == 2
      61      const Py_UCS4 mask_limit = MASK_UCS1;
      62      const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
      63  #elif STRINGLIB_SIZEOF_CHAR == 4
      64      const Py_UCS4 mask_limit = MASK_UCS2;
      65      const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
      66  #else
      67  #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
      68  #endif
      69      Py_UCS4 mask;
      70      Py_ssize_t n = end - begin;
      71      const STRINGLIB_CHAR *p = begin;
      72      const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
      73      Py_UCS4 max_char;
      74  
      75      max_char = MAX_CHAR_ASCII;
      76      mask = MASK_ASCII;
      77      while (p < unrolled_end) {
      78          STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
      79          if (bits & mask) {
      80              if (mask == mask_limit) {
      81                  /* Limit reached */
      82                  return max_char_limit;
      83              }
      84              if (mask == MASK_ASCII) {
      85                  max_char = MAX_CHAR_UCS1;
      86                  mask = MASK_UCS1;
      87              }
      88              else {
      89                  /* mask can't be MASK_UCS2 because of mask_limit above */
      90                  assert(mask == MASK_UCS1);
      91                  max_char = MAX_CHAR_UCS2;
      92                  mask = MASK_UCS2;
      93              }
      94              /* We check the new mask on the same chars in the next iteration */
      95              continue;
      96          }
      97          p += 4;
      98      }
      99      while (p < end) {
     100          if (p[0] & mask) {
     101              if (mask == mask_limit) {
     102                  /* Limit reached */
     103                  return max_char_limit;
     104              }
     105              if (mask == MASK_ASCII) {
     106                  max_char = MAX_CHAR_UCS1;
     107                  mask = MASK_UCS1;
     108              }
     109              else {
     110                  /* mask can't be MASK_UCS2 because of mask_limit above */
     111                  assert(mask == MASK_UCS1);
     112                  max_char = MAX_CHAR_UCS2;
     113                  mask = MASK_UCS2;
     114              }
     115              /* We check the new mask on the same chars in the next iteration */
     116              continue;
     117          }
     118          p++;
     119      }
     120      return max_char;
     121  }
     122  
     123  #undef MASK_ASCII
     124  #undef MASK_UCS1
     125  #undef MASK_UCS2
     126  #undef MAX_CHAR_ASCII
     127  #undef MAX_CHAR_UCS1
     128  #undef MAX_CHAR_UCS2
     129  #undef MAX_CHAR_UCS4
     130  
     131  #endif /* STRINGLIB_SIZEOF_CHAR == 1 */
     132