1  #ifndef Py_CPYTHON_UNICODEOBJECT_H
       2  #  error "this header file must not be included directly"
       3  #endif
       4  
       5  /* Py_UNICODE was the native Unicode storage format (code unit) used by
       6     Python and represents a single Unicode element in the Unicode type.
       7     With PEP 393, Py_UNICODE is deprecated and replaced with a
       8     typedef to wchar_t. */
       9  #define PY_UNICODE_TYPE wchar_t
      10  /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
      11  
      12  /* --- Internal Unicode Operations ---------------------------------------- */
      13  
      14  #ifndef USE_UNICODE_WCHAR_CACHE
      15  #  define USE_UNICODE_WCHAR_CACHE 1
      16  #endif /* USE_UNICODE_WCHAR_CACHE */
      17  
      18  /* Since splitting on whitespace is an important use case, and
      19     whitespace in most situations is solely ASCII whitespace, we
      20     optimize for the common case by using a quick look-up table
      21     _Py_ascii_whitespace (see below) with an inlined check.
      22  
      23   */
      24  #define Py_UNICODE_ISSPACE(ch) \
      25      ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
      26  
      27  #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
      28  #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
      29  #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
      30  #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
      31  
      32  #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
      33  #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
      34  #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
      35  
      36  #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
      37  #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
      38  #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
      39  #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
      40  
      41  #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
      42  #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
      43  #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
      44  
      45  #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
      46  
      47  #define Py_UNICODE_ISALNUM(ch) \
      48     (Py_UNICODE_ISALPHA(ch) || \
      49      Py_UNICODE_ISDECIMAL(ch) || \
      50      Py_UNICODE_ISDIGIT(ch) || \
      51      Py_UNICODE_ISNUMERIC(ch))
      52  
      53  /* macros to work with surrogates */
      54  #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
      55  #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
      56  #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
      57  /* Join two surrogate characters and return a single Py_UCS4 value. */
      58  #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
      59      (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
      60        ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
      61  /* high surrogate = top 10 bits added to D800 */
      62  #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
      63  /* low surrogate = bottom 10 bits added to DC00 */
      64  #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
      65  
      66  /* --- Unicode Type ------------------------------------------------------- */
      67  
      68  /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
      69     structure. state.ascii and state.compact are set, and the data
      70     immediately follow the structure. utf8_length and wstr_length can be found
      71     in the length field; the utf8 pointer is equal to the data pointer. */
      72  typedef struct {
      73      /* There are 4 forms of Unicode strings:
      74  
      75         - compact ascii:
      76  
      77           * structure = PyASCIIObject
      78           * test: PyUnicode_IS_COMPACT_ASCII(op)
      79           * kind = PyUnicode_1BYTE_KIND
      80           * compact = 1
      81           * ascii = 1
      82           * ready = 1
      83           * (length is the length of the utf8 and wstr strings)
      84           * (data starts just after the structure)
      85           * (since ASCII is decoded from UTF-8, the utf8 string are the data)
      86  
      87         - compact:
      88  
      89           * structure = PyCompactUnicodeObject
      90           * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
      91           * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
      92             PyUnicode_4BYTE_KIND
      93           * compact = 1
      94           * ready = 1
      95           * ascii = 0
      96           * utf8 is not shared with data
      97           * utf8_length = 0 if utf8 is NULL
      98           * wstr is shared with data and wstr_length=length
      99             if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
     100             or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
     101           * wstr_length = 0 if wstr is NULL
     102           * (data starts just after the structure)
     103  
     104         - legacy string, not ready:
     105  
     106           * structure = PyUnicodeObject
     107           * test: kind == PyUnicode_WCHAR_KIND
     108           * length = 0 (use wstr_length)
     109           * hash = -1
     110           * kind = PyUnicode_WCHAR_KIND
     111           * compact = 0
     112           * ascii = 0
     113           * ready = 0
     114           * interned = SSTATE_NOT_INTERNED
     115           * wstr is not NULL
     116           * data.any is NULL
     117           * utf8 is NULL
     118           * utf8_length = 0
     119  
     120         - legacy string, ready:
     121  
     122           * structure = PyUnicodeObject structure
     123           * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
     124           * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
     125             PyUnicode_4BYTE_KIND
     126           * compact = 0
     127           * ready = 1
     128           * data.any is not NULL
     129           * utf8 is shared and utf8_length = length with data.any if ascii = 1
     130           * utf8_length = 0 if utf8 is NULL
     131           * wstr is shared with data.any and wstr_length = length
     132             if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
     133             or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
     134           * wstr_length = 0 if wstr is NULL
     135  
     136         Compact strings use only one memory block (structure + characters),
     137         whereas legacy strings use one block for the structure and one block
     138         for characters.
     139  
     140         Legacy strings are created by PyUnicode_FromUnicode() and
     141         PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
     142         when PyUnicode_READY() is called.
     143  
     144         See also _PyUnicode_CheckConsistency().
     145      */
     146      PyObject_HEAD
     147      Py_ssize_t length;          /* Number of code points in the string */
     148      Py_hash_t hash;             /* Hash value; -1 if not set */
     149      struct {
     150          /*
     151             SSTATE_NOT_INTERNED (0)
     152             SSTATE_INTERNED_MORTAL (1)
     153             SSTATE_INTERNED_IMMORTAL (2)
     154  
     155             If interned != SSTATE_NOT_INTERNED, the two references from the
     156             dictionary to this object are *not* counted in ob_refcnt.
     157           */
     158          unsigned int interned:2;
     159          /* Character size:
     160  
     161             - PyUnicode_WCHAR_KIND (0):
     162  
     163               * character type = wchar_t (16 or 32 bits, depending on the
     164                 platform)
     165  
     166             - PyUnicode_1BYTE_KIND (1):
     167  
     168               * character type = Py_UCS1 (8 bits, unsigned)
     169               * all characters are in the range U+0000-U+00FF (latin1)
     170               * if ascii is set, all characters are in the range U+0000-U+007F
     171                 (ASCII), otherwise at least one character is in the range
     172                 U+0080-U+00FF
     173  
     174             - PyUnicode_2BYTE_KIND (2):
     175  
     176               * character type = Py_UCS2 (16 bits, unsigned)
     177               * all characters are in the range U+0000-U+FFFF (BMP)
     178               * at least one character is in the range U+0100-U+FFFF
     179  
     180             - PyUnicode_4BYTE_KIND (4):
     181  
     182               * character type = Py_UCS4 (32 bits, unsigned)
     183               * all characters are in the range U+0000-U+10FFFF
     184               * at least one character is in the range U+10000-U+10FFFF
     185           */
     186          unsigned int kind:3;
     187          /* Compact is with respect to the allocation scheme. Compact unicode
     188             objects only require one memory block while non-compact objects use
     189             one block for the PyUnicodeObject struct and another for its data
     190             buffer. */
     191          unsigned int compact:1;
     192          /* The string only contains characters in the range U+0000-U+007F (ASCII)
     193             and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
     194             set, use the PyASCIIObject structure. */
     195          unsigned int ascii:1;
     196          /* The ready flag indicates whether the object layout is initialized
     197             completely. This means that this is either a compact object, or
     198             the data pointer is filled out. The bit is redundant, and helps
     199             to minimize the test in PyUnicode_IS_READY(). */
     200          unsigned int ready:1;
     201          /* Padding to ensure that PyUnicode_DATA() is always aligned to
     202             4 bytes (see issue #19537 on m68k). */
     203          unsigned int :24;
     204      } state;
     205      wchar_t *wstr;              /* wchar_t representation (null-terminated) */
     206  } PyASCIIObject;
     207  
     208  /* Non-ASCII strings allocated through PyUnicode_New use the
     209     PyCompactUnicodeObject structure. state.compact is set, and the data
     210     immediately follow the structure. */
     211  typedef struct {
     212      PyASCIIObject _base;
     213      Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
     214                                   * terminating \0. */
     215      char *utf8;                 /* UTF-8 representation (null-terminated) */
     216      Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
     217                                   * surrogates count as two code points. */
     218  } PyCompactUnicodeObject;
     219  
     220  /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
     221     PyUnicodeObject structure. The actual string data is initially in the wstr
     222     block, and copied into the data block using _PyUnicode_Ready. */
     223  typedef struct {
     224      PyCompactUnicodeObject _base;
     225      union {
     226          void *any;
     227          Py_UCS1 *latin1;
     228          Py_UCS2 *ucs2;
     229          Py_UCS4 *ucs4;
     230      } data;                     /* Canonical, smallest-form Unicode buffer */
     231  } PyUnicodeObject;
     232  
     233  PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
     234      PyObject *op,
     235      int check_content);
     236  
     237  
     238  #define _PyASCIIObject_CAST(op) \
     239      (assert(PyUnicode_Check(op)), \
     240       _Py_CAST(PyASCIIObject*, (op)))
     241  #define _PyCompactUnicodeObject_CAST(op) \
     242      (assert(PyUnicode_Check(op)), \
     243       _Py_CAST(PyCompactUnicodeObject*, (op)))
     244  #define _PyUnicodeObject_CAST(op) \
     245      (assert(PyUnicode_Check(op)), \
     246       _Py_CAST(PyUnicodeObject*, (op)))
     247  
     248  
     249  /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
     250  
     251  /* Values for PyASCIIObject.state: */
     252  
     253  /* Interning state. */
     254  #define SSTATE_NOT_INTERNED 0
     255  #define SSTATE_INTERNED_MORTAL 1
     256  #define SSTATE_INTERNED_IMMORTAL 2
     257  
     258  /* Use only if you know it's a string */
     259  static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
     260      return _PyASCIIObject_CAST(op)->state.interned;
     261  }
     262  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     263  #  define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))
     264  #endif
     265  
     266  /* Fast check to determine whether an object is ready. Equivalent to:
     267     PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */
     268  static inline unsigned int PyUnicode_IS_READY(PyObject *op) {
     269      return _PyASCIIObject_CAST(op)->state.ready;
     270  }
     271  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     272  #  define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op))
     273  #endif
     274  
     275  /* Return true if the string contains only ASCII characters, or 0 if not. The
     276     string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
     277     ready. */
     278  static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) {
     279      assert(PyUnicode_IS_READY(op));
     280      return _PyASCIIObject_CAST(op)->state.ascii;
     281  }
     282  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     283  #  define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op))
     284  #endif
     285  
     286  /* Return true if the string is compact or 0 if not.
     287     No type checks or Ready calls are performed. */
     288  static inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) {
     289      return _PyASCIIObject_CAST(op)->state.compact;
     290  }
     291  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     292  #  define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op))
     293  #endif
     294  
     295  /* Return true if the string is a compact ASCII string (use PyASCIIObject
     296     structure), or 0 if not.  No type checks or Ready calls are performed. */
     297  static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
     298      return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op));
     299  }
     300  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     301  #  define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op))
     302  #endif
     303  
     304  enum PyUnicode_Kind {
     305  /* String contains only wstr byte characters.  This is only possible
     306     when the string was created with a legacy API and _PyUnicode_Ready()
     307     has not been called yet.  */
     308      PyUnicode_WCHAR_KIND = 0,
     309  /* Return values of the PyUnicode_KIND() function: */
     310      PyUnicode_1BYTE_KIND = 1,
     311      PyUnicode_2BYTE_KIND = 2,
     312      PyUnicode_4BYTE_KIND = 4
     313  };
     314  
     315  /* Return one of the PyUnicode_*_KIND values defined above. */
     316  #define PyUnicode_KIND(op) \
     317      (assert(PyUnicode_IS_READY(op)), \
     318       _PyASCIIObject_CAST(op)->state.kind)
     319  
     320  /* Return a void pointer to the raw unicode buffer. */
     321  static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) {
     322      if (PyUnicode_IS_ASCII(op)) {
     323          return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1));
     324      }
     325      return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1));
     326  }
     327  
     328  static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) {
     329      void *data;
     330      assert(!PyUnicode_IS_COMPACT(op));
     331      data = _PyUnicodeObject_CAST(op)->data.any;
     332      assert(data != NULL);
     333      return data;
     334  }
     335  
     336  static inline void* PyUnicode_DATA(PyObject *op) {
     337      if (PyUnicode_IS_COMPACT(op)) {
     338          return _PyUnicode_COMPACT_DATA(op);
     339      }
     340      return _PyUnicode_NONCOMPACT_DATA(op);
     341  }
     342  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     343  #  define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op))
     344  #endif
     345  
     346  /* Return pointers to the canonical representation cast to unsigned char,
     347     Py_UCS2, or Py_UCS4 for direct character access.
     348     No checks are performed, use PyUnicode_KIND() before to ensure
     349     these will work correctly. */
     350  
     351  #define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op))
     352  #define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op))
     353  #define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op))
     354  
     355  /* Returns the length of the unicode string. The caller has to make sure that
     356     the string has it's canonical representation set before calling
     357     this function.  Call PyUnicode_(FAST_)Ready to ensure that. */
     358  static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
     359      assert(PyUnicode_IS_READY(op));
     360      return _PyASCIIObject_CAST(op)->length;
     361  }
     362  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     363  #  define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
     364  #endif
     365  
     366  /* Write into the canonical representation, this function does not do any sanity
     367     checks and is intended for usage in loops.  The caller should cache the
     368     kind and data pointers obtained from other function calls.
     369     index is the index in the string (starts at 0) and value is the new
     370     code point value which should be written to that location. */
     371  static inline void PyUnicode_WRITE(int kind, void *data,
     372                                     Py_ssize_t index, Py_UCS4 value)
     373  {
     374      if (kind == PyUnicode_1BYTE_KIND) {
     375          assert(value <= 0xffU);
     376          _Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value);
     377      }
     378      else if (kind == PyUnicode_2BYTE_KIND) {
     379          assert(value <= 0xffffU);
     380          _Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value);
     381      }
     382      else {
     383          assert(kind == PyUnicode_4BYTE_KIND);
     384          assert(value <= 0x10ffffU);
     385          _Py_STATIC_CAST(Py_UCS4*, data)[index] = value;
     386      }
     387  }
     388  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     389  #define PyUnicode_WRITE(kind, data, index, value) \
     390      PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \
     391                      (index), _Py_STATIC_CAST(Py_UCS4, value))
     392  #endif
     393  
     394  /* Read a code point from the string's canonical representation.  No checks
     395     or ready calls are performed. */
     396  static inline Py_UCS4 PyUnicode_READ(int kind,
     397                                       const void *data, Py_ssize_t index)
     398  {
     399      if (kind == PyUnicode_1BYTE_KIND) {
     400          return _Py_STATIC_CAST(const Py_UCS1*, data)[index];
     401      }
     402      if (kind == PyUnicode_2BYTE_KIND) {
     403          return _Py_STATIC_CAST(const Py_UCS2*, data)[index];
     404      }
     405      assert(kind == PyUnicode_4BYTE_KIND);
     406      return _Py_STATIC_CAST(const Py_UCS4*, data)[index];
     407  }
     408  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     409  #define PyUnicode_READ(kind, data, index) \
     410      PyUnicode_READ(_Py_STATIC_CAST(int, kind), \
     411                     _Py_STATIC_CAST(const void*, data), \
     412                     (index))
     413  #endif
     414  
     415  /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
     416     calls PyUnicode_KIND() and might call it twice.  For single reads, use
     417     PyUnicode_READ_CHAR, for multiple consecutive reads callers should
     418     cache kind and use PyUnicode_READ instead. */
     419  static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index)
     420  {
     421      int kind;
     422      assert(PyUnicode_IS_READY(unicode));
     423      kind = PyUnicode_KIND(unicode);
     424      if (kind == PyUnicode_1BYTE_KIND) {
     425          return PyUnicode_1BYTE_DATA(unicode)[index];
     426      }
     427      if (kind == PyUnicode_2BYTE_KIND) {
     428          return PyUnicode_2BYTE_DATA(unicode)[index];
     429      }
     430      assert(kind == PyUnicode_4BYTE_KIND);
     431      return PyUnicode_4BYTE_DATA(unicode)[index];
     432  }
     433  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     434  #  define PyUnicode_READ_CHAR(unicode, index) \
     435         PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index))
     436  #endif
     437  
     438  /* Return a maximum character value which is suitable for creating another
     439     string based on op.  This is always an approximation but more efficient
     440     than iterating over the string. */
     441  static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
     442  {
     443      int kind;
     444  
     445      assert(PyUnicode_IS_READY(op));
     446      if (PyUnicode_IS_ASCII(op)) {
     447          return 0x7fU;
     448      }
     449  
     450      kind = PyUnicode_KIND(op);
     451      if (kind == PyUnicode_1BYTE_KIND) {
     452         return 0xffU;
     453      }
     454      if (kind == PyUnicode_2BYTE_KIND) {
     455          return 0xffffU;
     456      }
     457      assert(kind == PyUnicode_4BYTE_KIND);
     458      return 0x10ffffU;
     459  }
     460  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     461  #  define PyUnicode_MAX_CHAR_VALUE(op) \
     462         PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op))
     463  #endif
     464  
     465  /* === Public API ========================================================= */
     466  
     467  /* --- Plain Py_UNICODE --------------------------------------------------- */
     468  
     469  /* With PEP 393, this is the recommended way to allocate a new unicode object.
     470     This function will allocate the object and its buffer in a single memory
     471     block.  Objects created using this function are not resizable. */
     472  PyAPI_FUNC(PyObject*) PyUnicode_New(
     473      Py_ssize_t size,            /* Number of code points in the new string */
     474      Py_UCS4 maxchar             /* maximum code point value in the string */
     475      );
     476  
     477  /* Initializes the canonical string representation from the deprecated
     478     wstr/Py_UNICODE representation. This function is used to convert Unicode
     479     objects which were created using the old API to the new flexible format
     480     introduced with PEP 393.
     481  
     482     Don't call this function directly, use the public PyUnicode_READY() function
     483     instead. */
     484  PyAPI_FUNC(int) _PyUnicode_Ready(
     485      PyObject *unicode           /* Unicode object */
     486      );
     487  
     488  /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
     489     case.  If the canonical representation is not yet set, it will still call
     490     _PyUnicode_Ready().
     491     Returns 0 on success and -1 on errors. */
     492  static inline int PyUnicode_READY(PyObject *op)
     493  {
     494      if (PyUnicode_IS_READY(op)) {
     495          return 0;
     496      }
     497      return _PyUnicode_Ready(op);
     498  }
     499  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     500  #  define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op))
     501  #endif
     502  
     503  /* Get a copy of a Unicode string. */
     504  PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
     505      PyObject *unicode
     506      );
     507  
     508  /* Copy character from one unicode object into another, this function performs
     509     character conversion when necessary and falls back to memcpy() if possible.
     510  
     511     Fail if to is too small (smaller than *how_many* or smaller than
     512     len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
     513     kind(to), or if *to* has more than 1 reference.
     514  
     515     Return the number of written character, or return -1 and raise an exception
     516     on error.
     517  
     518     Pseudo-code:
     519  
     520         how_many = min(how_many, len(from) - from_start)
     521         to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
     522         return how_many
     523  
     524     Note: The function doesn't write a terminating null character.
     525     */
     526  PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
     527      PyObject *to,
     528      Py_ssize_t to_start,
     529      PyObject *from,
     530      Py_ssize_t from_start,
     531      Py_ssize_t how_many
     532      );
     533  
     534  /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
     535     may crash if parameters are invalid (e.g. if the output string
     536     is too short). */
     537  PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
     538      PyObject *to,
     539      Py_ssize_t to_start,
     540      PyObject *from,
     541      Py_ssize_t from_start,
     542      Py_ssize_t how_many
     543      );
     544  
     545  /* Fill a string with a character: write fill_char into
     546     unicode[start:start+length].
     547  
     548     Fail if fill_char is bigger than the string maximum character, or if the
     549     string has more than 1 reference.
     550  
     551     Return the number of written character, or return -1 and raise an exception
     552     on error. */
     553  PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
     554      PyObject *unicode,
     555      Py_ssize_t start,
     556      Py_ssize_t length,
     557      Py_UCS4 fill_char
     558      );
     559  
     560  /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
     561     if parameters are invalid (e.g. if length is longer than the string). */
     562  PyAPI_FUNC(void) _PyUnicode_FastFill(
     563      PyObject *unicode,
     564      Py_ssize_t start,
     565      Py_ssize_t length,
     566      Py_UCS4 fill_char
     567      );
     568  
     569  /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
     570     Scan the string to find the maximum character. */
     571  PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
     572      int kind,
     573      const void *buffer,
     574      Py_ssize_t size);
     575  
     576  /* Create a new string from a buffer of ASCII characters.
     577     WARNING: Don't check if the string contains any non-ASCII character. */
     578  PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
     579      const char *buffer,
     580      Py_ssize_t size);
     581  
     582  /* Compute the maximum character of the substring unicode[start:end].
     583     Return 127 for an empty string. */
     584  PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
     585      PyObject *unicode,
     586      Py_ssize_t start,
     587      Py_ssize_t end);
     588  
     589  /* --- Legacy deprecated API ---------------------------------------------- */
     590  
     591  /* Create a Unicode Object from the Py_UNICODE buffer u of the given
     592     size.
     593  
     594     u may be NULL which causes the contents to be undefined. It is the
     595     user's responsibility to fill in the needed data afterwards. Note
     596     that modifying the Unicode object contents after construction is
     597     only allowed if u was set to NULL.
     598  
     599     The buffer is copied into the new object. */
     600  Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
     601      const Py_UNICODE *u,        /* Unicode buffer */
     602      Py_ssize_t size             /* size of buffer */
     603      );
     604  
     605  /* Return a read-only pointer to the Unicode object's internal
     606     Py_UNICODE buffer.
     607     If the wchar_t/Py_UNICODE representation is not yet available, this
     608     function will calculate it. */
     609  Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
     610      PyObject *unicode           /* Unicode object */
     611      );
     612  
     613  /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
     614     contains null characters. */
     615  PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
     616      PyObject *unicode           /* Unicode object */
     617      );
     618  
     619  /* Return a read-only pointer to the Unicode object's internal
     620     Py_UNICODE buffer and save the length at size.
     621     If the wchar_t/Py_UNICODE representation is not yet available, this
     622     function will calculate it. */
     623  
     624  Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
     625      PyObject *unicode,          /* Unicode object */
     626      Py_ssize_t *size            /* location where to save the length */
     627      );
     628  
     629  
     630  /* Fast access macros */
     631  
     632  Py_DEPRECATED(3.3)
     633  static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op)
     634  {
     635      if (PyUnicode_IS_COMPACT_ASCII(op)) {
     636          return _PyASCIIObject_CAST(op)->length;
     637      }
     638      else {
     639          return _PyCompactUnicodeObject_CAST(op)->wstr_length;
     640      }
     641  }
     642  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     643  #  define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op))
     644  #endif
     645  
     646  /* Returns the deprecated Py_UNICODE representation's size in code units
     647     (this includes surrogate pairs as 2 units).
     648     If the Py_UNICODE representation is not available, it will be computed
     649     on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
     650  
     651  Py_DEPRECATED(3.3)
     652  static inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op)
     653  {
     654      _Py_COMP_DIAG_PUSH
     655      _Py_COMP_DIAG_IGNORE_DEPR_DECLS
     656      if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) {
     657          (void)PyUnicode_AsUnicode(op);
     658          assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL);
     659      }
     660      return PyUnicode_WSTR_LENGTH(op);
     661      _Py_COMP_DIAG_POP
     662  }
     663  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     664  #  define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op))
     665  #endif
     666  
     667  Py_DEPRECATED(3.3)
     668  static inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op)
     669  {
     670      _Py_COMP_DIAG_PUSH
     671      _Py_COMP_DIAG_IGNORE_DEPR_DECLS
     672      return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE;
     673      _Py_COMP_DIAG_POP
     674  }
     675  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     676  #  define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op))
     677  #endif
     678  
     679  /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
     680     representation on demand.  Using this macro is very inefficient now,
     681     try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
     682     use PyUnicode_WRITE() and PyUnicode_READ(). */
     683  
     684  Py_DEPRECATED(3.3)
     685  static inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op)
     686  {
     687      wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr;
     688      if (wstr != _Py_NULL) {
     689          return wstr;
     690      }
     691  
     692      _Py_COMP_DIAG_PUSH
     693      _Py_COMP_DIAG_IGNORE_DEPR_DECLS
     694      return PyUnicode_AsUnicode(op);
     695      _Py_COMP_DIAG_POP
     696  }
     697  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     698  #  define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op))
     699  #endif
     700  
     701  Py_DEPRECATED(3.3)
     702  static inline const char* PyUnicode_AS_DATA(PyObject *op)
     703  {
     704      _Py_COMP_DIAG_PUSH
     705      _Py_COMP_DIAG_IGNORE_DEPR_DECLS
     706      Py_UNICODE *data = PyUnicode_AS_UNICODE(op);
     707      // In C++, casting directly PyUnicode* to const char* is not valid
     708      return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data));
     709      _Py_COMP_DIAG_POP
     710  }
     711  #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
     712  #  define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op))
     713  #endif
     714  
     715  
     716  /* --- _PyUnicodeWriter API ----------------------------------------------- */
     717  
     718  typedef struct {
     719      PyObject *buffer;
     720      void *data;
     721      enum PyUnicode_Kind kind;
     722      Py_UCS4 maxchar;
     723      Py_ssize_t size;
     724      Py_ssize_t pos;
     725  
     726      /* minimum number of allocated characters (default: 0) */
     727      Py_ssize_t min_length;
     728  
     729      /* minimum character (default: 127, ASCII) */
     730      Py_UCS4 min_char;
     731  
     732      /* If non-zero, overallocate the buffer (default: 0). */
     733      unsigned char overallocate;
     734  
     735      /* If readonly is 1, buffer is a shared string (cannot be modified)
     736         and size is set to 0. */
     737      unsigned char readonly;
     738  } _PyUnicodeWriter ;
     739  
     740  /* Initialize a Unicode writer.
     741   *
     742   * By default, the minimum buffer size is 0 character and overallocation is
     743   * disabled. Set min_length, min_char and overallocate attributes to control
     744   * the allocation of the buffer. */
     745  PyAPI_FUNC(void)
     746  _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
     747  
     748  /* Prepare the buffer to write 'length' characters
     749     with the specified maximum character.
     750  
     751     Return 0 on success, raise an exception and return -1 on error. */
     752  #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
     753      (((MAXCHAR) <= (WRITER)->maxchar                                  \
     754        && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
     755       ? 0                                                              \
     756       : (((LENGTH) == 0)                                               \
     757          ? 0                                                           \
     758          : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
     759  
     760  /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
     761     instead. */
     762  PyAPI_FUNC(int)
     763  _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
     764                                   Py_ssize_t length, Py_UCS4 maxchar);
     765  
     766  /* Prepare the buffer to have at least the kind KIND.
     767     For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
     768     support characters in range U+000-U+FFFF.
     769  
     770     Return 0 on success, raise an exception and return -1 on error. */
     771  #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
     772      (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
     773       (KIND) <= (WRITER)->kind                                         \
     774       ? 0                                                              \
     775       : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
     776  
     777  /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
     778     macro instead. */
     779  PyAPI_FUNC(int)
     780  _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
     781                                       enum PyUnicode_Kind kind);
     782  
     783  /* Append a Unicode character.
     784     Return 0 on success, raise an exception and return -1 on error. */
     785  PyAPI_FUNC(int)
     786  _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
     787      Py_UCS4 ch
     788      );
     789  
     790  /* Append a Unicode string.
     791     Return 0 on success, raise an exception and return -1 on error. */
     792  PyAPI_FUNC(int)
     793  _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
     794      PyObject *str               /* Unicode string */
     795      );
     796  
     797  /* Append a substring of a Unicode string.
     798     Return 0 on success, raise an exception and return -1 on error. */
     799  PyAPI_FUNC(int)
     800  _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
     801      PyObject *str,              /* Unicode string */
     802      Py_ssize_t start,
     803      Py_ssize_t end
     804      );
     805  
     806  /* Append an ASCII-encoded byte string.
     807     Return 0 on success, raise an exception and return -1 on error. */
     808  PyAPI_FUNC(int)
     809  _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
     810      const char *str,           /* ASCII-encoded byte string */
     811      Py_ssize_t len             /* number of bytes, or -1 if unknown */
     812      );
     813  
     814  /* Append a latin1-encoded byte string.
     815     Return 0 on success, raise an exception and return -1 on error. */
     816  PyAPI_FUNC(int)
     817  _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
     818      const char *str,           /* latin1-encoded byte string */
     819      Py_ssize_t len             /* length in bytes */
     820      );
     821  
     822  /* Get the value of the writer as a Unicode string. Clear the
     823     buffer of the writer. Raise an exception and return NULL
     824     on error. */
     825  PyAPI_FUNC(PyObject *)
     826  _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
     827  
     828  /* Deallocate memory of a writer (clear its internal buffer). */
     829  PyAPI_FUNC(void)
     830  _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
     831  
     832  
     833  /* Format the object based on the format_spec, as defined in PEP 3101
     834     (Advanced String Formatting). */
     835  PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
     836      _PyUnicodeWriter *writer,
     837      PyObject *obj,
     838      PyObject *format_spec,
     839      Py_ssize_t start,
     840      Py_ssize_t end);
     841  
     842  /* --- Manage the default encoding ---------------------------------------- */
     843  
     844  /* Returns a pointer to the default encoding (UTF-8) of the
     845     Unicode object unicode.
     846  
     847     Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
     848     in the unicodeobject.
     849  
     850     _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
     851     support the previous internal function with the same behaviour.
     852  
     853     Use of this API is DEPRECATED since no size information can be
     854     extracted from the returned data.
     855  */
     856  
     857  PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
     858  
     859  #define _PyUnicode_AsString PyUnicode_AsUTF8
     860  
     861  /* --- UTF-7 Codecs ------------------------------------------------------- */
     862  
     863  PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
     864      PyObject *unicode,          /* Unicode object */
     865      int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
     866      int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
     867      const char *errors          /* error handling */
     868      );
     869  
     870  /* --- UTF-8 Codecs ------------------------------------------------------- */
     871  
     872  PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
     873      PyObject *unicode,
     874      const char *errors);
     875  
     876  /* --- UTF-32 Codecs ------------------------------------------------------ */
     877  
     878  PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
     879      PyObject *object,           /* Unicode object */
     880      const char *errors,         /* error handling */
     881      int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
     882      );
     883  
     884  /* --- UTF-16 Codecs ------------------------------------------------------ */
     885  
     886  /* Returns a Python string object holding the UTF-16 encoded value of
     887     the Unicode data.
     888  
     889     If byteorder is not 0, output is written according to the following
     890     byte order:
     891  
     892     byteorder == -1: little endian
     893     byteorder == 0:  native byte order (writes a BOM mark)
     894     byteorder == 1:  big endian
     895  
     896     If byteorder is 0, the output string will always start with the
     897     Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
     898     prepended.
     899  */
     900  PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
     901      PyObject* unicode,          /* Unicode object */
     902      const char *errors,         /* error handling */
     903      int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
     904      );
     905  
     906  /* --- Unicode-Escape Codecs ---------------------------------------------- */
     907  
     908  /* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
     909  PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
     910          const char *string,     /* Unicode-Escape encoded string */
     911          Py_ssize_t length,      /* size of string */
     912          const char *errors,     /* error handling */
     913          Py_ssize_t *consumed    /* bytes consumed */
     914  );
     915  /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
     916     chars. */
     917  PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
     918          const char *string,     /* Unicode-Escape encoded string */
     919          Py_ssize_t length,      /* size of string */
     920          const char *errors,     /* error handling */
     921          Py_ssize_t *consumed,   /* bytes consumed */
     922          const char **first_invalid_escape  /* on return, points to first
     923                                                invalid escaped char in
     924                                                string. */
     925  );
     926  
     927  /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
     928  
     929  /* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
     930  PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
     931          const char *string,     /* Unicode-Escape encoded string */
     932          Py_ssize_t length,      /* size of string */
     933          const char *errors,     /* error handling */
     934          Py_ssize_t *consumed    /* bytes consumed */
     935  );
     936  
     937  /* --- Latin-1 Codecs ----------------------------------------------------- */
     938  
     939  PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
     940      PyObject* unicode,
     941      const char* errors);
     942  
     943  /* --- ASCII Codecs ------------------------------------------------------- */
     944  
     945  PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
     946      PyObject* unicode,
     947      const char* errors);
     948  
     949  /* --- Character Map Codecs ----------------------------------------------- */
     950  
     951  /* Translate an Unicode object by applying a character mapping table to
     952     it and return the resulting Unicode object.
     953  
     954     The mapping table must map Unicode ordinal integers to Unicode strings,
     955     Unicode ordinal integers or None (causing deletion of the character).
     956  
     957     Mapping tables may be dictionaries or sequences. Unmapped character
     958     ordinals (ones which cause a LookupError) are left untouched and
     959     are copied as-is.
     960  */
     961  PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
     962      PyObject *unicode,          /* Unicode object */
     963      PyObject *mapping,          /* encoding mapping */
     964      const char *errors          /* error handling */
     965      );
     966  
     967  /* --- Decimal Encoder ---------------------------------------------------- */
     968  
     969  /* Coverts a Unicode object holding a decimal value to an ASCII string
     970     for using in int, float and complex parsers.
     971     Transforms code points that have decimal digit property to the
     972     corresponding ASCII digit code points.  Transforms spaces to ASCII.
     973     Transforms code points starting from the first non-ASCII code point that
     974     is neither a decimal digit nor a space to the end into '?'. */
     975  
     976  PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
     977      PyObject *unicode           /* Unicode object */
     978      );
     979  
     980  /* --- Methods & Slots ---------------------------------------------------- */
     981  
     982  PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
     983      PyObject *separator,
     984      PyObject *const *items,
     985      Py_ssize_t seqlen
     986      );
     987  
     988  /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
     989     0 otherwise.  The right argument must be ASCII identifier.
     990     Any error occurs inside will be cleared before return. */
     991  PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
     992      PyObject *left,             /* Left string */
     993      _Py_Identifier *right       /* Right identifier */
     994      );
     995  
     996  /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
     997     0 otherwise.  The right argument must be ASCII-encoded string.
     998     Any error occurs inside will be cleared before return. */
     999  PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
    1000      PyObject *left,
    1001      const char *right           /* ASCII-encoded string */
    1002      );
    1003  
    1004  /* Externally visible for str.strip(unicode) */
    1005  PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
    1006      PyObject *self,
    1007      int striptype,
    1008      PyObject *sepobj
    1009      );
    1010  
    1011  /* Using explicit passed-in values, insert the thousands grouping
    1012     into the string pointed to by buffer.  For the argument descriptions,
    1013     see Objects/stringlib/localeutil.h */
    1014  PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
    1015      _PyUnicodeWriter *writer,
    1016      Py_ssize_t n_buffer,
    1017      PyObject *digits,
    1018      Py_ssize_t d_pos,
    1019      Py_ssize_t n_digits,
    1020      Py_ssize_t min_width,
    1021      const char *grouping,
    1022      PyObject *thousands_sep,
    1023      Py_UCS4 *maxchar);
    1024  
    1025  /* === Characters Type APIs =============================================== */
    1026  
    1027  /* Helper array used by Py_UNICODE_ISSPACE(). */
    1028  
    1029  PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
    1030  
    1031  /* These should not be used directly. Use the Py_UNICODE_IS* and
    1032     Py_UNICODE_TO* macros instead.
    1033  
    1034     These APIs are implemented in Objects/unicodectype.c.
    1035  
    1036  */
    1037  
    1038  PyAPI_FUNC(int) _PyUnicode_IsLowercase(
    1039      Py_UCS4 ch       /* Unicode character */
    1040      );
    1041  
    1042  PyAPI_FUNC(int) _PyUnicode_IsUppercase(
    1043      Py_UCS4 ch       /* Unicode character */
    1044      );
    1045  
    1046  PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
    1047      Py_UCS4 ch       /* Unicode character */
    1048      );
    1049  
    1050  PyAPI_FUNC(int) _PyUnicode_IsXidStart(
    1051      Py_UCS4 ch       /* Unicode character */
    1052      );
    1053  
    1054  PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
    1055      Py_UCS4 ch       /* Unicode character */
    1056      );
    1057  
    1058  PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
    1059      const Py_UCS4 ch         /* Unicode character */
    1060      );
    1061  
    1062  PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
    1063      const Py_UCS4 ch         /* Unicode character */
    1064      );
    1065  
    1066  /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
    1067      Py_UCS4 ch       /* Unicode character */
    1068      );
    1069  
    1070  /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
    1071      Py_UCS4 ch       /* Unicode character */
    1072      );
    1073  
    1074  Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
    1075      Py_UCS4 ch       /* Unicode character */
    1076      );
    1077  
    1078  PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
    1079      Py_UCS4 ch,       /* Unicode character */
    1080      Py_UCS4 *res
    1081      );
    1082  
    1083  PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
    1084      Py_UCS4 ch,       /* Unicode character */
    1085      Py_UCS4 *res
    1086      );
    1087  
    1088  PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
    1089      Py_UCS4 ch,       /* Unicode character */
    1090      Py_UCS4 *res
    1091      );
    1092  
    1093  PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
    1094      Py_UCS4 ch,       /* Unicode character */
    1095      Py_UCS4 *res
    1096      );
    1097  
    1098  PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
    1099      Py_UCS4 ch         /* Unicode character */
    1100      );
    1101  
    1102  PyAPI_FUNC(int) _PyUnicode_IsCased(
    1103      Py_UCS4 ch         /* Unicode character */
    1104      );
    1105  
    1106  PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
    1107      Py_UCS4 ch       /* Unicode character */
    1108      );
    1109  
    1110  PyAPI_FUNC(int) _PyUnicode_ToDigit(
    1111      Py_UCS4 ch       /* Unicode character */
    1112      );
    1113  
    1114  PyAPI_FUNC(double) _PyUnicode_ToNumeric(
    1115      Py_UCS4 ch       /* Unicode character */
    1116      );
    1117  
    1118  PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
    1119      Py_UCS4 ch       /* Unicode character */
    1120      );
    1121  
    1122  PyAPI_FUNC(int) _PyUnicode_IsDigit(
    1123      Py_UCS4 ch       /* Unicode character */
    1124      );
    1125  
    1126  PyAPI_FUNC(int) _PyUnicode_IsNumeric(
    1127      Py_UCS4 ch       /* Unicode character */
    1128      );
    1129  
    1130  PyAPI_FUNC(int) _PyUnicode_IsPrintable(
    1131      Py_UCS4 ch       /* Unicode character */
    1132      );
    1133  
    1134  PyAPI_FUNC(int) _PyUnicode_IsAlpha(
    1135      Py_UCS4 ch       /* Unicode character */
    1136      );
    1137  
    1138  PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
    1139  
    1140  /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
    1141  PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
    1142  
    1143  /* Fast equality check when the inputs are known to be exact unicode types
    1144     and where the hash values are equal (i.e. a very probable match) */
    1145  PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
    1146  
    1147  /* Equality check. Returns -1 on failure. */
    1148  PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);
    1149  
    1150  PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
    1151  PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
    1152  
    1153  PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);