(root)/
Python-3.11.7/
Modules/
_codecsmodule.c
       1  /* ------------------------------------------------------------------------
       2  
       3     _codecs -- Provides access to the codec registry and the builtin
       4                codecs.
       5  
       6     This module should never be imported directly. The standard library
       7     module "codecs" wraps this builtin module for use within Python.
       8  
       9     The codec registry is accessible via:
      10  
      11       register(search_function) -> None
      12  
      13       lookup(encoding) -> CodecInfo object
      14  
      15     The builtin Unicode codecs use the following interface:
      16  
      17       <encoding>_encode(Unicode_object[,errors='strict']) ->
      18          (string object, bytes consumed)
      19  
      20       <encoding>_decode(char_buffer_obj[,errors='strict']) ->
      21          (Unicode object, bytes consumed)
      22  
      23     These <encoding>s are available: utf_8, unicode_escape,
      24     raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
      25  
      26  
      27  Written by Marc-Andre Lemburg (mal@lemburg.com).
      28  
      29  Copyright (c) Corporation for National Research Initiatives.
      30  
      31     ------------------------------------------------------------------------ */
      32  
      33  #define PY_SSIZE_T_CLEAN
      34  #include "Python.h"
      35  
      36  #ifdef MS_WINDOWS
      37  #include <windows.h>
      38  #endif
      39  
      40  /*[clinic input]
      41  module _codecs
      42  [clinic start generated code]*/
      43  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
      44  
      45  #include "clinic/_codecsmodule.c.h"
      46  
      47  /* --- Registry ----------------------------------------------------------- */
      48  
      49  /*[clinic input]
      50  _codecs.register
      51      search_function: object
      52      /
      53  
      54  Register a codec search function.
      55  
      56  Search functions are expected to take one argument, the encoding name in
      57  all lower case letters, and either return None, or a tuple of functions
      58  (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
      59  [clinic start generated code]*/
      60  
      61  static PyObject *
      62  _codecs_register(PyObject *module, PyObject *search_function)
      63  /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
      64  {
      65      if (PyCodec_Register(search_function))
      66          return NULL;
      67  
      68      Py_RETURN_NONE;
      69  }
      70  
      71  /*[clinic input]
      72  _codecs.unregister
      73      search_function: object
      74      /
      75  
      76  Unregister a codec search function and clear the registry's cache.
      77  
      78  If the search function is not registered, do nothing.
      79  [clinic start generated code]*/
      80  
      81  static PyObject *
      82  _codecs_unregister(PyObject *module, PyObject *search_function)
      83  /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
      84  {
      85      if (PyCodec_Unregister(search_function) < 0) {
      86          return NULL;
      87      }
      88  
      89      Py_RETURN_NONE;
      90  }
      91  
      92  /*[clinic input]
      93  _codecs.lookup
      94      encoding: str
      95      /
      96  
      97  Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
      98  [clinic start generated code]*/
      99  
     100  static PyObject *
     101  _codecs_lookup_impl(PyObject *module, const char *encoding)
     102  /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
     103  {
     104      return _PyCodec_Lookup(encoding);
     105  }
     106  
     107  /*[clinic input]
     108  _codecs.encode
     109      obj: object
     110      encoding: str(c_default="NULL") = "utf-8"
     111      errors: str(c_default="NULL") = "strict"
     112  
     113  Encodes obj using the codec registered for encoding.
     114  
     115  The default encoding is 'utf-8'.  errors may be given to set a
     116  different error handling scheme.  Default is 'strict' meaning that encoding
     117  errors raise a ValueError.  Other possible values are 'ignore', 'replace'
     118  and 'backslashreplace' as well as any other name registered with
     119  codecs.register_error that can handle ValueErrors.
     120  [clinic start generated code]*/
     121  
     122  static PyObject *
     123  _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
     124                      const char *errors)
     125  /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
     126  {
     127      if (encoding == NULL)
     128          encoding = PyUnicode_GetDefaultEncoding();
     129  
     130      /* Encode via the codec registry */
     131      return PyCodec_Encode(obj, encoding, errors);
     132  }
     133  
     134  /*[clinic input]
     135  _codecs.decode
     136      obj: object
     137      encoding: str(c_default="NULL") = "utf-8"
     138      errors: str(c_default="NULL") = "strict"
     139  
     140  Decodes obj using the codec registered for encoding.
     141  
     142  Default encoding is 'utf-8'.  errors may be given to set a
     143  different error handling scheme.  Default is 'strict' meaning that encoding
     144  errors raise a ValueError.  Other possible values are 'ignore', 'replace'
     145  and 'backslashreplace' as well as any other name registered with
     146  codecs.register_error that can handle ValueErrors.
     147  [clinic start generated code]*/
     148  
     149  static PyObject *
     150  _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
     151                      const char *errors)
     152  /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
     153  {
     154      if (encoding == NULL)
     155          encoding = PyUnicode_GetDefaultEncoding();
     156  
     157      /* Decode via the codec registry */
     158      return PyCodec_Decode(obj, encoding, errors);
     159  }
     160  
     161  /* --- Helpers ------------------------------------------------------------ */
     162  
     163  static
     164  PyObject *codec_tuple(PyObject *decoded,
     165                        Py_ssize_t len)
     166  {
     167      if (decoded == NULL)
     168          return NULL;
     169      return Py_BuildValue("Nn", decoded, len);
     170  }
     171  
     172  /* --- String codecs ------------------------------------------------------ */
     173  /*[clinic input]
     174  _codecs.escape_decode
     175      data: Py_buffer(accept={str, buffer})
     176      errors: str(accept={str, NoneType}) = None
     177      /
     178  [clinic start generated code]*/
     179  
     180  static PyObject *
     181  _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
     182                             const char *errors)
     183  /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
     184  {
     185      PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
     186                                               errors, 0, NULL);
     187      return codec_tuple(decoded, data->len);
     188  }
     189  
     190  /*[clinic input]
     191  _codecs.escape_encode
     192      data: object(subclass_of='&PyBytes_Type')
     193      errors: str(accept={str, NoneType}) = None
     194      /
     195  [clinic start generated code]*/
     196  
     197  static PyObject *
     198  _codecs_escape_encode_impl(PyObject *module, PyObject *data,
     199                             const char *errors)
     200  /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
     201  {
     202      Py_ssize_t size;
     203      Py_ssize_t newsize;
     204      PyObject *v;
     205  
     206      size = PyBytes_GET_SIZE(data);
     207      if (size > PY_SSIZE_T_MAX / 4) {
     208          PyErr_SetString(PyExc_OverflowError,
     209              "string is too large to encode");
     210              return NULL;
     211      }
     212      newsize = 4*size;
     213      v = PyBytes_FromStringAndSize(NULL, newsize);
     214  
     215      if (v == NULL) {
     216          return NULL;
     217      }
     218      else {
     219          Py_ssize_t i;
     220          char c;
     221          char *p = PyBytes_AS_STRING(v);
     222  
     223          for (i = 0; i < size; i++) {
     224              /* There's at least enough room for a hex escape */
     225              assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
     226              c = PyBytes_AS_STRING(data)[i];
     227              if (c == '\'' || c == '\\')
     228                  *p++ = '\\', *p++ = c;
     229              else if (c == '\t')
     230                  *p++ = '\\', *p++ = 't';
     231              else if (c == '\n')
     232                  *p++ = '\\', *p++ = 'n';
     233              else if (c == '\r')
     234                  *p++ = '\\', *p++ = 'r';
     235              else if (c < ' ' || c >= 0x7f) {
     236                  *p++ = '\\';
     237                  *p++ = 'x';
     238                  *p++ = Py_hexdigits[(c & 0xf0) >> 4];
     239                  *p++ = Py_hexdigits[c & 0xf];
     240              }
     241              else
     242                  *p++ = c;
     243          }
     244          *p = '\0';
     245          if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
     246              return NULL;
     247          }
     248      }
     249  
     250      return codec_tuple(v, size);
     251  }
     252  
     253  /* --- Decoder ------------------------------------------------------------ */
     254  /*[clinic input]
     255  _codecs.utf_7_decode
     256      data: Py_buffer
     257      errors: str(accept={str, NoneType}) = None
     258      final: bool(accept={int}) = False
     259      /
     260  [clinic start generated code]*/
     261  
     262  static PyObject *
     263  _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
     264                            const char *errors, int final)
     265  /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
     266  {
     267      Py_ssize_t consumed = data->len;
     268      PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
     269                                                       errors,
     270                                                       final ? NULL : &consumed);
     271      return codec_tuple(decoded, consumed);
     272  }
     273  
     274  /*[clinic input]
     275  _codecs.utf_8_decode
     276      data: Py_buffer
     277      errors: str(accept={str, NoneType}) = None
     278      final: bool(accept={int}) = False
     279      /
     280  [clinic start generated code]*/
     281  
     282  static PyObject *
     283  _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
     284                            const char *errors, int final)
     285  /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
     286  {
     287      Py_ssize_t consumed = data->len;
     288      PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
     289                                                       errors,
     290                                                       final ? NULL : &consumed);
     291      return codec_tuple(decoded, consumed);
     292  }
     293  
     294  /*[clinic input]
     295  _codecs.utf_16_decode
     296      data: Py_buffer
     297      errors: str(accept={str, NoneType}) = None
     298      final: bool(accept={int}) = False
     299      /
     300  [clinic start generated code]*/
     301  
     302  static PyObject *
     303  _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
     304                             const char *errors, int final)
     305  /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
     306  {
     307      int byteorder = 0;
     308      /* This is overwritten unless final is true. */
     309      Py_ssize_t consumed = data->len;
     310      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     311                                                        errors, &byteorder,
     312                                                        final ? NULL : &consumed);
     313      return codec_tuple(decoded, consumed);
     314  }
     315  
     316  /*[clinic input]
     317  _codecs.utf_16_le_decode
     318      data: Py_buffer
     319      errors: str(accept={str, NoneType}) = None
     320      final: bool(accept={int}) = False
     321      /
     322  [clinic start generated code]*/
     323  
     324  static PyObject *
     325  _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
     326                                const char *errors, int final)
     327  /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
     328  {
     329      int byteorder = -1;
     330      /* This is overwritten unless final is true. */
     331      Py_ssize_t consumed = data->len;
     332      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     333                                                        errors, &byteorder,
     334                                                        final ? NULL : &consumed);
     335      return codec_tuple(decoded, consumed);
     336  }
     337  
     338  /*[clinic input]
     339  _codecs.utf_16_be_decode
     340      data: Py_buffer
     341      errors: str(accept={str, NoneType}) = None
     342      final: bool(accept={int}) = False
     343      /
     344  [clinic start generated code]*/
     345  
     346  static PyObject *
     347  _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
     348                                const char *errors, int final)
     349  /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
     350  {
     351      int byteorder = 1;
     352      /* This is overwritten unless final is true. */
     353      Py_ssize_t consumed = data->len;
     354      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     355                                                        errors, &byteorder,
     356                                                        final ? NULL : &consumed);
     357      return codec_tuple(decoded, consumed);
     358  }
     359  
     360  /* This non-standard version also provides access to the byteorder
     361     parameter of the builtin UTF-16 codec.
     362  
     363     It returns a tuple (unicode, bytesread, byteorder) with byteorder
     364     being the value in effect at the end of data.
     365  
     366  */
     367  /*[clinic input]
     368  _codecs.utf_16_ex_decode
     369      data: Py_buffer
     370      errors: str(accept={str, NoneType}) = None
     371      byteorder: int = 0
     372      final: bool(accept={int}) = False
     373      /
     374  [clinic start generated code]*/
     375  
     376  static PyObject *
     377  _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
     378                                const char *errors, int byteorder, int final)
     379  /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
     380  {
     381      /* This is overwritten unless final is true. */
     382      Py_ssize_t consumed = data->len;
     383  
     384      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     385                                                        errors, &byteorder,
     386                                                        final ? NULL : &consumed);
     387      if (decoded == NULL)
     388          return NULL;
     389      return Py_BuildValue("Nni", decoded, consumed, byteorder);
     390  }
     391  
     392  /*[clinic input]
     393  _codecs.utf_32_decode
     394      data: Py_buffer
     395      errors: str(accept={str, NoneType}) = None
     396      final: bool(accept={int}) = False
     397      /
     398  [clinic start generated code]*/
     399  
     400  static PyObject *
     401  _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
     402                             const char *errors, int final)
     403  /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
     404  {
     405      int byteorder = 0;
     406      /* This is overwritten unless final is true. */
     407      Py_ssize_t consumed = data->len;
     408      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     409                                                        errors, &byteorder,
     410                                                        final ? NULL : &consumed);
     411      return codec_tuple(decoded, consumed);
     412  }
     413  
     414  /*[clinic input]
     415  _codecs.utf_32_le_decode
     416      data: Py_buffer
     417      errors: str(accept={str, NoneType}) = None
     418      final: bool(accept={int}) = False
     419      /
     420  [clinic start generated code]*/
     421  
     422  static PyObject *
     423  _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
     424                                const char *errors, int final)
     425  /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
     426  {
     427      int byteorder = -1;
     428      /* This is overwritten unless final is true. */
     429      Py_ssize_t consumed = data->len;
     430      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     431                                                        errors, &byteorder,
     432                                                        final ? NULL : &consumed);
     433      return codec_tuple(decoded, consumed);
     434  }
     435  
     436  /*[clinic input]
     437  _codecs.utf_32_be_decode
     438      data: Py_buffer
     439      errors: str(accept={str, NoneType}) = None
     440      final: bool(accept={int}) = False
     441      /
     442  [clinic start generated code]*/
     443  
     444  static PyObject *
     445  _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
     446                                const char *errors, int final)
     447  /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
     448  {
     449      int byteorder = 1;
     450      /* This is overwritten unless final is true. */
     451      Py_ssize_t consumed = data->len;
     452      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     453                                                        errors, &byteorder,
     454                                                        final ? NULL : &consumed);
     455      return codec_tuple(decoded, consumed);
     456  }
     457  
     458  /* This non-standard version also provides access to the byteorder
     459     parameter of the builtin UTF-32 codec.
     460  
     461     It returns a tuple (unicode, bytesread, byteorder) with byteorder
     462     being the value in effect at the end of data.
     463  
     464  */
     465  /*[clinic input]
     466  _codecs.utf_32_ex_decode
     467      data: Py_buffer
     468      errors: str(accept={str, NoneType}) = None
     469      byteorder: int = 0
     470      final: bool(accept={int}) = False
     471      /
     472  [clinic start generated code]*/
     473  
     474  static PyObject *
     475  _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
     476                                const char *errors, int byteorder, int final)
     477  /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
     478  {
     479      Py_ssize_t consumed = data->len;
     480      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     481                                                        errors, &byteorder,
     482                                                        final ? NULL : &consumed);
     483      if (decoded == NULL)
     484          return NULL;
     485      return Py_BuildValue("Nni", decoded, consumed, byteorder);
     486  }
     487  
     488  /*[clinic input]
     489  _codecs.unicode_escape_decode
     490      data: Py_buffer(accept={str, buffer})
     491      errors: str(accept={str, NoneType}) = None
     492      final: bool(accept={int}) = True
     493      /
     494  [clinic start generated code]*/
     495  
     496  static PyObject *
     497  _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
     498                                     const char *errors, int final)
     499  /*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
     500  {
     501      Py_ssize_t consumed = data->len;
     502      PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
     503                                                                 errors,
     504                                                                 final ? NULL : &consumed);
     505      return codec_tuple(decoded, consumed);
     506  }
     507  
     508  /*[clinic input]
     509  _codecs.raw_unicode_escape_decode
     510      data: Py_buffer(accept={str, buffer})
     511      errors: str(accept={str, NoneType}) = None
     512      final: bool(accept={int}) = True
     513      /
     514  [clinic start generated code]*/
     515  
     516  static PyObject *
     517  _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
     518                                         const char *errors, int final)
     519  /*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
     520  {
     521      Py_ssize_t consumed = data->len;
     522      PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
     523                                                                    errors,
     524                                                                    final ? NULL : &consumed);
     525      return codec_tuple(decoded, consumed);
     526  }
     527  
     528  /*[clinic input]
     529  _codecs.latin_1_decode
     530      data: Py_buffer
     531      errors: str(accept={str, NoneType}) = None
     532      /
     533  [clinic start generated code]*/
     534  
     535  static PyObject *
     536  _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
     537                              const char *errors)
     538  /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
     539  {
     540      PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
     541      return codec_tuple(decoded, data->len);
     542  }
     543  
     544  /*[clinic input]
     545  _codecs.ascii_decode
     546      data: Py_buffer
     547      errors: str(accept={str, NoneType}) = None
     548      /
     549  [clinic start generated code]*/
     550  
     551  static PyObject *
     552  _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
     553                            const char *errors)
     554  /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
     555  {
     556      PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
     557      return codec_tuple(decoded, data->len);
     558  }
     559  
     560  /*[clinic input]
     561  _codecs.charmap_decode
     562      data: Py_buffer
     563      errors: str(accept={str, NoneType}) = None
     564      mapping: object = None
     565      /
     566  [clinic start generated code]*/
     567  
     568  static PyObject *
     569  _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
     570                              const char *errors, PyObject *mapping)
     571  /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
     572  {
     573      PyObject *decoded;
     574  
     575      if (mapping == Py_None)
     576          mapping = NULL;
     577  
     578      decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
     579      return codec_tuple(decoded, data->len);
     580  }
     581  
     582  #ifdef MS_WINDOWS
     583  
     584  /*[clinic input]
     585  _codecs.mbcs_decode
     586      data: Py_buffer
     587      errors: str(accept={str, NoneType}) = None
     588      final: bool(accept={int}) = False
     589      /
     590  [clinic start generated code]*/
     591  
     592  static PyObject *
     593  _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
     594                           const char *errors, int final)
     595  /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
     596  {
     597      Py_ssize_t consumed = data->len;
     598      PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
     599              errors, final ? NULL : &consumed);
     600      return codec_tuple(decoded, consumed);
     601  }
     602  
     603  /*[clinic input]
     604  _codecs.oem_decode
     605      data: Py_buffer
     606      errors: str(accept={str, NoneType}) = None
     607      final: bool(accept={int}) = False
     608      /
     609  [clinic start generated code]*/
     610  
     611  static PyObject *
     612  _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
     613                          const char *errors, int final)
     614  /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
     615  {
     616      Py_ssize_t consumed = data->len;
     617      PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
     618          data->buf, data->len, errors, final ? NULL : &consumed);
     619      return codec_tuple(decoded, consumed);
     620  }
     621  
     622  /*[clinic input]
     623  _codecs.code_page_decode
     624      codepage: int
     625      data: Py_buffer
     626      errors: str(accept={str, NoneType}) = None
     627      final: bool(accept={int}) = False
     628      /
     629  [clinic start generated code]*/
     630  
     631  static PyObject *
     632  _codecs_code_page_decode_impl(PyObject *module, int codepage,
     633                                Py_buffer *data, const char *errors, int final)
     634  /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
     635  {
     636      Py_ssize_t consumed = data->len;
     637      PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
     638                                                           data->buf, data->len,
     639                                                           errors,
     640                                                           final ? NULL : &consumed);
     641      return codec_tuple(decoded, consumed);
     642  }
     643  
     644  #endif /* MS_WINDOWS */
     645  
     646  /* --- Encoder ------------------------------------------------------------ */
     647  
     648  /*[clinic input]
     649  _codecs.readbuffer_encode
     650      data: Py_buffer(accept={str, buffer})
     651      errors: str(accept={str, NoneType}) = None
     652      /
     653  [clinic start generated code]*/
     654  
     655  static PyObject *
     656  _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
     657                                 const char *errors)
     658  /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
     659  {
     660      PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
     661      return codec_tuple(result, data->len);
     662  }
     663  
     664  /*[clinic input]
     665  _codecs.utf_7_encode
     666      str: unicode
     667      errors: str(accept={str, NoneType}) = None
     668      /
     669  [clinic start generated code]*/
     670  
     671  static PyObject *
     672  _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
     673                            const char *errors)
     674  /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
     675  {
     676      return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
     677                         PyUnicode_GET_LENGTH(str));
     678  }
     679  
     680  /*[clinic input]
     681  _codecs.utf_8_encode
     682      str: unicode
     683      errors: str(accept={str, NoneType}) = None
     684      /
     685  [clinic start generated code]*/
     686  
     687  static PyObject *
     688  _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
     689                            const char *errors)
     690  /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
     691  {
     692      return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
     693                         PyUnicode_GET_LENGTH(str));
     694  }
     695  
     696  /* This version provides access to the byteorder parameter of the
     697     builtin UTF-16 codecs as optional third argument. It defaults to 0
     698     which means: use the native byte order and prepend the data with a
     699     BOM mark.
     700  
     701  */
     702  
     703  /*[clinic input]
     704  _codecs.utf_16_encode
     705      str: unicode
     706      errors: str(accept={str, NoneType}) = None
     707      byteorder: int = 0
     708      /
     709  [clinic start generated code]*/
     710  
     711  static PyObject *
     712  _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
     713                             const char *errors, int byteorder)
     714  /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
     715  {
     716      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
     717                         PyUnicode_GET_LENGTH(str));
     718  }
     719  
     720  /*[clinic input]
     721  _codecs.utf_16_le_encode
     722      str: unicode
     723      errors: str(accept={str, NoneType}) = None
     724      /
     725  [clinic start generated code]*/
     726  
     727  static PyObject *
     728  _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
     729                                const char *errors)
     730  /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
     731  {
     732      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
     733                         PyUnicode_GET_LENGTH(str));
     734  }
     735  
     736  /*[clinic input]
     737  _codecs.utf_16_be_encode
     738      str: unicode
     739      errors: str(accept={str, NoneType}) = None
     740      /
     741  [clinic start generated code]*/
     742  
     743  static PyObject *
     744  _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
     745                                const char *errors)
     746  /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
     747  {
     748      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
     749                         PyUnicode_GET_LENGTH(str));
     750  }
     751  
     752  /* This version provides access to the byteorder parameter of the
     753     builtin UTF-32 codecs as optional third argument. It defaults to 0
     754     which means: use the native byte order and prepend the data with a
     755     BOM mark.
     756  
     757  */
     758  
     759  /*[clinic input]
     760  _codecs.utf_32_encode
     761      str: unicode
     762      errors: str(accept={str, NoneType}) = None
     763      byteorder: int = 0
     764      /
     765  [clinic start generated code]*/
     766  
     767  static PyObject *
     768  _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
     769                             const char *errors, int byteorder)
     770  /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
     771  {
     772      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
     773                         PyUnicode_GET_LENGTH(str));
     774  }
     775  
     776  /*[clinic input]
     777  _codecs.utf_32_le_encode
     778      str: unicode
     779      errors: str(accept={str, NoneType}) = None
     780      /
     781  [clinic start generated code]*/
     782  
     783  static PyObject *
     784  _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
     785                                const char *errors)
     786  /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
     787  {
     788      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
     789                         PyUnicode_GET_LENGTH(str));
     790  }
     791  
     792  /*[clinic input]
     793  _codecs.utf_32_be_encode
     794      str: unicode
     795      errors: str(accept={str, NoneType}) = None
     796      /
     797  [clinic start generated code]*/
     798  
     799  static PyObject *
     800  _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
     801                                const char *errors)
     802  /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
     803  {
     804      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
     805                         PyUnicode_GET_LENGTH(str));
     806  }
     807  
     808  /*[clinic input]
     809  _codecs.unicode_escape_encode
     810      str: unicode
     811      errors: str(accept={str, NoneType}) = None
     812      /
     813  [clinic start generated code]*/
     814  
     815  static PyObject *
     816  _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
     817                                     const char *errors)
     818  /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
     819  {
     820      return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
     821                         PyUnicode_GET_LENGTH(str));
     822  }
     823  
     824  /*[clinic input]
     825  _codecs.raw_unicode_escape_encode
     826      str: unicode
     827      errors: str(accept={str, NoneType}) = None
     828      /
     829  [clinic start generated code]*/
     830  
     831  static PyObject *
     832  _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
     833                                         const char *errors)
     834  /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
     835  {
     836      return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
     837                         PyUnicode_GET_LENGTH(str));
     838  }
     839  
     840  /*[clinic input]
     841  _codecs.latin_1_encode
     842      str: unicode
     843      errors: str(accept={str, NoneType}) = None
     844      /
     845  [clinic start generated code]*/
     846  
     847  static PyObject *
     848  _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
     849                              const char *errors)
     850  /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
     851  {
     852      return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
     853                         PyUnicode_GET_LENGTH(str));
     854  }
     855  
     856  /*[clinic input]
     857  _codecs.ascii_encode
     858      str: unicode
     859      errors: str(accept={str, NoneType}) = None
     860      /
     861  [clinic start generated code]*/
     862  
     863  static PyObject *
     864  _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
     865                            const char *errors)
     866  /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
     867  {
     868      return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
     869                         PyUnicode_GET_LENGTH(str));
     870  }
     871  
     872  /*[clinic input]
     873  _codecs.charmap_encode
     874      str: unicode
     875      errors: str(accept={str, NoneType}) = None
     876      mapping: object = None
     877      /
     878  [clinic start generated code]*/
     879  
     880  static PyObject *
     881  _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
     882                              const char *errors, PyObject *mapping)
     883  /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
     884  {
     885      if (mapping == Py_None)
     886          mapping = NULL;
     887  
     888      return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
     889                         PyUnicode_GET_LENGTH(str));
     890  }
     891  
     892  /*[clinic input]
     893  _codecs.charmap_build
     894      map: unicode
     895      /
     896  [clinic start generated code]*/
     897  
     898  static PyObject *
     899  _codecs_charmap_build_impl(PyObject *module, PyObject *map)
     900  /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
     901  {
     902      return PyUnicode_BuildEncodingMap(map);
     903  }
     904  
     905  #ifdef MS_WINDOWS
     906  
     907  /*[clinic input]
     908  _codecs.mbcs_encode
     909      str: unicode
     910      errors: str(accept={str, NoneType}) = None
     911      /
     912  [clinic start generated code]*/
     913  
     914  static PyObject *
     915  _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
     916  /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
     917  {
     918      return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
     919                         PyUnicode_GET_LENGTH(str));
     920  }
     921  
     922  /*[clinic input]
     923  _codecs.oem_encode
     924      str: unicode
     925      errors: str(accept={str, NoneType}) = None
     926      /
     927  [clinic start generated code]*/
     928  
     929  static PyObject *
     930  _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
     931  /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
     932  {
     933      return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
     934          PyUnicode_GET_LENGTH(str));
     935  }
     936  
     937  /*[clinic input]
     938  _codecs.code_page_encode
     939      code_page: int
     940      str: unicode
     941      errors: str(accept={str, NoneType}) = None
     942      /
     943  [clinic start generated code]*/
     944  
     945  static PyObject *
     946  _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
     947                                const char *errors)
     948  /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
     949  {
     950      return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
     951                         PyUnicode_GET_LENGTH(str));
     952  }
     953  
     954  #endif /* MS_WINDOWS */
     955  
     956  /* --- Error handler registry --------------------------------------------- */
     957  
     958  /*[clinic input]
     959  _codecs.register_error
     960      errors: str
     961      handler: object
     962      /
     963  
     964  Register the specified error handler under the name errors.
     965  
     966  handler must be a callable object, that will be called with an exception
     967  instance containing information about the location of the encoding/decoding
     968  error and must return a (replacement, new position) tuple.
     969  [clinic start generated code]*/
     970  
     971  static PyObject *
     972  _codecs_register_error_impl(PyObject *module, const char *errors,
     973                              PyObject *handler)
     974  /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
     975  {
     976      if (PyCodec_RegisterError(errors, handler))
     977          return NULL;
     978      Py_RETURN_NONE;
     979  }
     980  
     981  /*[clinic input]
     982  _codecs.lookup_error
     983      name: str
     984      /
     985  
     986  lookup_error(errors) -> handler
     987  
     988  Return the error handler for the specified error handling name or raise a
     989  LookupError, if no handler exists under this name.
     990  [clinic start generated code]*/
     991  
     992  static PyObject *
     993  _codecs_lookup_error_impl(PyObject *module, const char *name)
     994  /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
     995  {
     996      return PyCodec_LookupError(name);
     997  }
     998  
     999  /* --- Module API --------------------------------------------------------- */
    1000  
    1001  static PyMethodDef _codecs_functions[] = {
    1002      _CODECS_REGISTER_METHODDEF
    1003      _CODECS_UNREGISTER_METHODDEF
    1004      _CODECS_LOOKUP_METHODDEF
    1005      _CODECS_ENCODE_METHODDEF
    1006      _CODECS_DECODE_METHODDEF
    1007      _CODECS_ESCAPE_ENCODE_METHODDEF
    1008      _CODECS_ESCAPE_DECODE_METHODDEF
    1009      _CODECS_UTF_8_ENCODE_METHODDEF
    1010      _CODECS_UTF_8_DECODE_METHODDEF
    1011      _CODECS_UTF_7_ENCODE_METHODDEF
    1012      _CODECS_UTF_7_DECODE_METHODDEF
    1013      _CODECS_UTF_16_ENCODE_METHODDEF
    1014      _CODECS_UTF_16_LE_ENCODE_METHODDEF
    1015      _CODECS_UTF_16_BE_ENCODE_METHODDEF
    1016      _CODECS_UTF_16_DECODE_METHODDEF
    1017      _CODECS_UTF_16_LE_DECODE_METHODDEF
    1018      _CODECS_UTF_16_BE_DECODE_METHODDEF
    1019      _CODECS_UTF_16_EX_DECODE_METHODDEF
    1020      _CODECS_UTF_32_ENCODE_METHODDEF
    1021      _CODECS_UTF_32_LE_ENCODE_METHODDEF
    1022      _CODECS_UTF_32_BE_ENCODE_METHODDEF
    1023      _CODECS_UTF_32_DECODE_METHODDEF
    1024      _CODECS_UTF_32_LE_DECODE_METHODDEF
    1025      _CODECS_UTF_32_BE_DECODE_METHODDEF
    1026      _CODECS_UTF_32_EX_DECODE_METHODDEF
    1027      _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
    1028      _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
    1029      _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
    1030      _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
    1031      _CODECS_LATIN_1_ENCODE_METHODDEF
    1032      _CODECS_LATIN_1_DECODE_METHODDEF
    1033      _CODECS_ASCII_ENCODE_METHODDEF
    1034      _CODECS_ASCII_DECODE_METHODDEF
    1035      _CODECS_CHARMAP_ENCODE_METHODDEF
    1036      _CODECS_CHARMAP_DECODE_METHODDEF
    1037      _CODECS_CHARMAP_BUILD_METHODDEF
    1038      _CODECS_READBUFFER_ENCODE_METHODDEF
    1039      _CODECS_MBCS_ENCODE_METHODDEF
    1040      _CODECS_MBCS_DECODE_METHODDEF
    1041      _CODECS_OEM_ENCODE_METHODDEF
    1042      _CODECS_OEM_DECODE_METHODDEF
    1043      _CODECS_CODE_PAGE_ENCODE_METHODDEF
    1044      _CODECS_CODE_PAGE_DECODE_METHODDEF
    1045      _CODECS_REGISTER_ERROR_METHODDEF
    1046      _CODECS_LOOKUP_ERROR_METHODDEF
    1047      {NULL, NULL}                /* sentinel */
    1048  };
    1049  
    1050  static PyModuleDef_Slot _codecs_slots[] = {
    1051      {0, NULL}
    1052  };
    1053  
    1054  static struct PyModuleDef codecsmodule = {
    1055          PyModuleDef_HEAD_INIT,
    1056          "_codecs",
    1057          NULL,
    1058          0,
    1059          _codecs_functions,
    1060          _codecs_slots,
    1061          NULL,
    1062          NULL,
    1063          NULL
    1064  };
    1065  
    1066  PyMODINIT_FUNC
    1067  PyInit__codecs(void)
    1068  {
    1069      return PyModuleDef_Init(&codecsmodule);
    1070  }