(root)/
Python-3.12.0/
Modules/
_codecsmodule.c
       1  /* ------------------------------------------------------------------------
       2  
       3     _codecs -- Provides access to the codec registry and the builtin
       4                codecs.
       5  
       6     This module should never be imported directly. The standard library
       7     module "codecs" wraps this builtin module for use within Python.
       8  
       9     The codec registry is accessible via:
      10  
      11       register(search_function) -> None
      12  
      13       lookup(encoding) -> CodecInfo object
      14  
      15     The builtin Unicode codecs use the following interface:
      16  
      17       <encoding>_encode(Unicode_object[,errors='strict']) ->
      18          (string object, bytes consumed)
      19  
      20       <encoding>_decode(char_buffer_obj[,errors='strict']) ->
      21          (Unicode object, bytes consumed)
      22  
      23     These <encoding>s are available: utf_8, unicode_escape,
      24     raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
      25  
      26  
      27  Written by Marc-Andre Lemburg (mal@lemburg.com).
      28  
      29  Copyright (c) Corporation for National Research Initiatives.
      30  
      31     ------------------------------------------------------------------------ */
      32  
      33  #define PY_SSIZE_T_CLEAN
      34  #include "Python.h"
      35  
      36  #ifdef MS_WINDOWS
      37  #include <windows.h>
      38  #endif
      39  
      40  /*[clinic input]
      41  module _codecs
      42  [clinic start generated code]*/
      43  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
      44  
      45  #include "pycore_runtime.h"
      46  #include "clinic/_codecsmodule.c.h"
      47  
      48  /* --- Registry ----------------------------------------------------------- */
      49  
      50  /*[clinic input]
      51  _codecs.register
      52      search_function: object
      53      /
      54  
      55  Register a codec search function.
      56  
      57  Search functions are expected to take one argument, the encoding name in
      58  all lower case letters, and either return None, or a tuple of functions
      59  (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
      60  [clinic start generated code]*/
      61  
      62  static PyObject *
      63  _codecs_register(PyObject *module, PyObject *search_function)
      64  /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
      65  {
      66      if (PyCodec_Register(search_function))
      67          return NULL;
      68  
      69      Py_RETURN_NONE;
      70  }
      71  
      72  /*[clinic input]
      73  _codecs.unregister
      74      search_function: object
      75      /
      76  
      77  Unregister a codec search function and clear the registry's cache.
      78  
      79  If the search function is not registered, do nothing.
      80  [clinic start generated code]*/
      81  
      82  static PyObject *
      83  _codecs_unregister(PyObject *module, PyObject *search_function)
      84  /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
      85  {
      86      if (PyCodec_Unregister(search_function) < 0) {
      87          return NULL;
      88      }
      89  
      90      Py_RETURN_NONE;
      91  }
      92  
      93  /*[clinic input]
      94  _codecs.lookup
      95      encoding: str
      96      /
      97  
      98  Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
      99  [clinic start generated code]*/
     100  
     101  static PyObject *
     102  _codecs_lookup_impl(PyObject *module, const char *encoding)
     103  /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
     104  {
     105      return _PyCodec_Lookup(encoding);
     106  }
     107  
     108  /*[clinic input]
     109  _codecs.encode
     110      obj: object
     111      encoding: str(c_default="NULL") = "utf-8"
     112      errors: str(c_default="NULL") = "strict"
     113  
     114  Encodes obj using the codec registered for encoding.
     115  
     116  The default encoding is 'utf-8'.  errors may be given to set a
     117  different error handling scheme.  Default is 'strict' meaning that encoding
     118  errors raise a ValueError.  Other possible values are 'ignore', 'replace'
     119  and 'backslashreplace' as well as any other name registered with
     120  codecs.register_error that can handle ValueErrors.
     121  [clinic start generated code]*/
     122  
     123  static PyObject *
     124  _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
     125                      const char *errors)
     126  /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
     127  {
     128      if (encoding == NULL)
     129          encoding = PyUnicode_GetDefaultEncoding();
     130  
     131      /* Encode via the codec registry */
     132      return PyCodec_Encode(obj, encoding, errors);
     133  }
     134  
     135  /*[clinic input]
     136  _codecs.decode
     137      obj: object
     138      encoding: str(c_default="NULL") = "utf-8"
     139      errors: str(c_default="NULL") = "strict"
     140  
     141  Decodes obj using the codec registered for encoding.
     142  
     143  Default encoding is 'utf-8'.  errors may be given to set a
     144  different error handling scheme.  Default is 'strict' meaning that encoding
     145  errors raise a ValueError.  Other possible values are 'ignore', 'replace'
     146  and 'backslashreplace' as well as any other name registered with
     147  codecs.register_error that can handle ValueErrors.
     148  [clinic start generated code]*/
     149  
     150  static PyObject *
     151  _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
     152                      const char *errors)
     153  /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
     154  {
     155      if (encoding == NULL)
     156          encoding = PyUnicode_GetDefaultEncoding();
     157  
     158      /* Decode via the codec registry */
     159      return PyCodec_Decode(obj, encoding, errors);
     160  }
     161  
     162  /* --- Helpers ------------------------------------------------------------ */
     163  
     164  static
     165  PyObject *codec_tuple(PyObject *decoded,
     166                        Py_ssize_t len)
     167  {
     168      if (decoded == NULL)
     169          return NULL;
     170      return Py_BuildValue("Nn", decoded, len);
     171  }
     172  
     173  /* --- String codecs ------------------------------------------------------ */
     174  /*[clinic input]
     175  _codecs.escape_decode
     176      data: Py_buffer(accept={str, buffer})
     177      errors: str(accept={str, NoneType}) = None
     178      /
     179  [clinic start generated code]*/
     180  
     181  static PyObject *
     182  _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
     183                             const char *errors)
     184  /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
     185  {
     186      PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
     187                                               errors, 0, NULL);
     188      return codec_tuple(decoded, data->len);
     189  }
     190  
     191  /*[clinic input]
     192  _codecs.escape_encode
     193      data: object(subclass_of='&PyBytes_Type')
     194      errors: str(accept={str, NoneType}) = None
     195      /
     196  [clinic start generated code]*/
     197  
     198  static PyObject *
     199  _codecs_escape_encode_impl(PyObject *module, PyObject *data,
     200                             const char *errors)
     201  /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
     202  {
     203      Py_ssize_t size;
     204      Py_ssize_t newsize;
     205      PyObject *v;
     206  
     207      size = PyBytes_GET_SIZE(data);
     208      if (size > PY_SSIZE_T_MAX / 4) {
     209          PyErr_SetString(PyExc_OverflowError,
     210              "string is too large to encode");
     211              return NULL;
     212      }
     213      newsize = 4*size;
     214      v = PyBytes_FromStringAndSize(NULL, newsize);
     215  
     216      if (v == NULL) {
     217          return NULL;
     218      }
     219      else {
     220          Py_ssize_t i;
     221          char c;
     222          char *p = PyBytes_AS_STRING(v);
     223  
     224          for (i = 0; i < size; i++) {
     225              /* There's at least enough room for a hex escape */
     226              assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
     227              c = PyBytes_AS_STRING(data)[i];
     228              if (c == '\'' || c == '\\')
     229                  *p++ = '\\', *p++ = c;
     230              else if (c == '\t')
     231                  *p++ = '\\', *p++ = 't';
     232              else if (c == '\n')
     233                  *p++ = '\\', *p++ = 'n';
     234              else if (c == '\r')
     235                  *p++ = '\\', *p++ = 'r';
     236              else if (c < ' ' || c >= 0x7f) {
     237                  *p++ = '\\';
     238                  *p++ = 'x';
     239                  *p++ = Py_hexdigits[(c & 0xf0) >> 4];
     240                  *p++ = Py_hexdigits[c & 0xf];
     241              }
     242              else
     243                  *p++ = c;
     244          }
     245          *p = '\0';
     246          if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
     247              return NULL;
     248          }
     249      }
     250  
     251      return codec_tuple(v, size);
     252  }
     253  
     254  /* --- Decoder ------------------------------------------------------------ */
     255  /*[clinic input]
     256  _codecs.utf_7_decode
     257      data: Py_buffer
     258      errors: str(accept={str, NoneType}) = None
     259      final: bool = False
     260      /
     261  [clinic start generated code]*/
     262  
     263  static PyObject *
     264  _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
     265                            const char *errors, int final)
     266  /*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
     267  {
     268      Py_ssize_t consumed = data->len;
     269      PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
     270                                                       errors,
     271                                                       final ? NULL : &consumed);
     272      return codec_tuple(decoded, consumed);
     273  }
     274  
     275  /*[clinic input]
     276  _codecs.utf_8_decode
     277      data: Py_buffer
     278      errors: str(accept={str, NoneType}) = None
     279      final: bool = False
     280      /
     281  [clinic start generated code]*/
     282  
     283  static PyObject *
     284  _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
     285                            const char *errors, int final)
     286  /*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
     287  {
     288      Py_ssize_t consumed = data->len;
     289      PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
     290                                                       errors,
     291                                                       final ? NULL : &consumed);
     292      return codec_tuple(decoded, consumed);
     293  }
     294  
     295  /*[clinic input]
     296  _codecs.utf_16_decode
     297      data: Py_buffer
     298      errors: str(accept={str, NoneType}) = None
     299      final: bool = False
     300      /
     301  [clinic start generated code]*/
     302  
     303  static PyObject *
     304  _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
     305                             const char *errors, int final)
     306  /*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
     307  {
     308      int byteorder = 0;
     309      /* This is overwritten unless final is true. */
     310      Py_ssize_t consumed = data->len;
     311      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     312                                                        errors, &byteorder,
     313                                                        final ? NULL : &consumed);
     314      return codec_tuple(decoded, consumed);
     315  }
     316  
     317  /*[clinic input]
     318  _codecs.utf_16_le_decode
     319      data: Py_buffer
     320      errors: str(accept={str, NoneType}) = None
     321      final: bool = False
     322      /
     323  [clinic start generated code]*/
     324  
     325  static PyObject *
     326  _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
     327                                const char *errors, int final)
     328  /*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
     329  {
     330      int byteorder = -1;
     331      /* This is overwritten unless final is true. */
     332      Py_ssize_t consumed = data->len;
     333      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     334                                                        errors, &byteorder,
     335                                                        final ? NULL : &consumed);
     336      return codec_tuple(decoded, consumed);
     337  }
     338  
     339  /*[clinic input]
     340  _codecs.utf_16_be_decode
     341      data: Py_buffer
     342      errors: str(accept={str, NoneType}) = None
     343      final: bool = False
     344      /
     345  [clinic start generated code]*/
     346  
     347  static PyObject *
     348  _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
     349                                const char *errors, int final)
     350  /*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
     351  {
     352      int byteorder = 1;
     353      /* This is overwritten unless final is true. */
     354      Py_ssize_t consumed = data->len;
     355      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     356                                                        errors, &byteorder,
     357                                                        final ? NULL : &consumed);
     358      return codec_tuple(decoded, consumed);
     359  }
     360  
     361  /* This non-standard version also provides access to the byteorder
     362     parameter of the builtin UTF-16 codec.
     363  
     364     It returns a tuple (unicode, bytesread, byteorder) with byteorder
     365     being the value in effect at the end of data.
     366  
     367  */
     368  /*[clinic input]
     369  _codecs.utf_16_ex_decode
     370      data: Py_buffer
     371      errors: str(accept={str, NoneType}) = None
     372      byteorder: int = 0
     373      final: bool = False
     374      /
     375  [clinic start generated code]*/
     376  
     377  static PyObject *
     378  _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
     379                                const char *errors, int byteorder, int final)
     380  /*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
     381  {
     382      /* This is overwritten unless final is true. */
     383      Py_ssize_t consumed = data->len;
     384  
     385      PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
     386                                                        errors, &byteorder,
     387                                                        final ? NULL : &consumed);
     388      if (decoded == NULL)
     389          return NULL;
     390      return Py_BuildValue("Nni", decoded, consumed, byteorder);
     391  }
     392  
     393  /*[clinic input]
     394  _codecs.utf_32_decode
     395      data: Py_buffer
     396      errors: str(accept={str, NoneType}) = None
     397      final: bool = False
     398      /
     399  [clinic start generated code]*/
     400  
     401  static PyObject *
     402  _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
     403                             const char *errors, int final)
     404  /*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
     405  {
     406      int byteorder = 0;
     407      /* This is overwritten unless final is true. */
     408      Py_ssize_t consumed = data->len;
     409      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     410                                                        errors, &byteorder,
     411                                                        final ? NULL : &consumed);
     412      return codec_tuple(decoded, consumed);
     413  }
     414  
     415  /*[clinic input]
     416  _codecs.utf_32_le_decode
     417      data: Py_buffer
     418      errors: str(accept={str, NoneType}) = None
     419      final: bool = False
     420      /
     421  [clinic start generated code]*/
     422  
     423  static PyObject *
     424  _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
     425                                const char *errors, int final)
     426  /*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
     427  {
     428      int byteorder = -1;
     429      /* This is overwritten unless final is true. */
     430      Py_ssize_t consumed = data->len;
     431      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     432                                                        errors, &byteorder,
     433                                                        final ? NULL : &consumed);
     434      return codec_tuple(decoded, consumed);
     435  }
     436  
     437  /*[clinic input]
     438  _codecs.utf_32_be_decode
     439      data: Py_buffer
     440      errors: str(accept={str, NoneType}) = None
     441      final: bool = False
     442      /
     443  [clinic start generated code]*/
     444  
     445  static PyObject *
     446  _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
     447                                const char *errors, int final)
     448  /*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
     449  {
     450      int byteorder = 1;
     451      /* This is overwritten unless final is true. */
     452      Py_ssize_t consumed = data->len;
     453      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     454                                                        errors, &byteorder,
     455                                                        final ? NULL : &consumed);
     456      return codec_tuple(decoded, consumed);
     457  }
     458  
     459  /* This non-standard version also provides access to the byteorder
     460     parameter of the builtin UTF-32 codec.
     461  
     462     It returns a tuple (unicode, bytesread, byteorder) with byteorder
     463     being the value in effect at the end of data.
     464  
     465  */
     466  /*[clinic input]
     467  _codecs.utf_32_ex_decode
     468      data: Py_buffer
     469      errors: str(accept={str, NoneType}) = None
     470      byteorder: int = 0
     471      final: bool = False
     472      /
     473  [clinic start generated code]*/
     474  
     475  static PyObject *
     476  _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
     477                                const char *errors, int byteorder, int final)
     478  /*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
     479  {
     480      Py_ssize_t consumed = data->len;
     481      PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
     482                                                        errors, &byteorder,
     483                                                        final ? NULL : &consumed);
     484      if (decoded == NULL)
     485          return NULL;
     486      return Py_BuildValue("Nni", decoded, consumed, byteorder);
     487  }
     488  
     489  /*[clinic input]
     490  _codecs.unicode_escape_decode
     491      data: Py_buffer(accept={str, buffer})
     492      errors: str(accept={str, NoneType}) = None
     493      final: bool = True
     494      /
     495  [clinic start generated code]*/
     496  
     497  static PyObject *
     498  _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
     499                                     const char *errors, int final)
     500  /*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
     501  {
     502      Py_ssize_t consumed = data->len;
     503      PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
     504                                                                 errors,
     505                                                                 final ? NULL : &consumed);
     506      return codec_tuple(decoded, consumed);
     507  }
     508  
     509  /*[clinic input]
     510  _codecs.raw_unicode_escape_decode
     511      data: Py_buffer(accept={str, buffer})
     512      errors: str(accept={str, NoneType}) = None
     513      final: bool = True
     514      /
     515  [clinic start generated code]*/
     516  
     517  static PyObject *
     518  _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
     519                                         const char *errors, int final)
     520  /*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
     521  {
     522      Py_ssize_t consumed = data->len;
     523      PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
     524                                                                    errors,
     525                                                                    final ? NULL : &consumed);
     526      return codec_tuple(decoded, consumed);
     527  }
     528  
     529  /*[clinic input]
     530  _codecs.latin_1_decode
     531      data: Py_buffer
     532      errors: str(accept={str, NoneType}) = None
     533      /
     534  [clinic start generated code]*/
     535  
     536  static PyObject *
     537  _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
     538                              const char *errors)
     539  /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
     540  {
     541      PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
     542      return codec_tuple(decoded, data->len);
     543  }
     544  
     545  /*[clinic input]
     546  _codecs.ascii_decode
     547      data: Py_buffer
     548      errors: str(accept={str, NoneType}) = None
     549      /
     550  [clinic start generated code]*/
     551  
     552  static PyObject *
     553  _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
     554                            const char *errors)
     555  /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
     556  {
     557      PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
     558      return codec_tuple(decoded, data->len);
     559  }
     560  
     561  /*[clinic input]
     562  _codecs.charmap_decode
     563      data: Py_buffer
     564      errors: str(accept={str, NoneType}) = None
     565      mapping: object = None
     566      /
     567  [clinic start generated code]*/
     568  
     569  static PyObject *
     570  _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
     571                              const char *errors, PyObject *mapping)
     572  /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
     573  {
     574      PyObject *decoded;
     575  
     576      if (mapping == Py_None)
     577          mapping = NULL;
     578  
     579      decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
     580      return codec_tuple(decoded, data->len);
     581  }
     582  
     583  #ifdef MS_WINDOWS
     584  
     585  /*[clinic input]
     586  _codecs.mbcs_decode
     587      data: Py_buffer
     588      errors: str(accept={str, NoneType}) = None
     589      final: bool = False
     590      /
     591  [clinic start generated code]*/
     592  
     593  static PyObject *
     594  _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
     595                           const char *errors, int final)
     596  /*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
     597  {
     598      Py_ssize_t consumed = data->len;
     599      PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
     600              errors, final ? NULL : &consumed);
     601      return codec_tuple(decoded, consumed);
     602  }
     603  
     604  /*[clinic input]
     605  _codecs.oem_decode
     606      data: Py_buffer
     607      errors: str(accept={str, NoneType}) = None
     608      final: bool = False
     609      /
     610  [clinic start generated code]*/
     611  
     612  static PyObject *
     613  _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
     614                          const char *errors, int final)
     615  /*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
     616  {
     617      Py_ssize_t consumed = data->len;
     618      PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
     619          data->buf, data->len, errors, final ? NULL : &consumed);
     620      return codec_tuple(decoded, consumed);
     621  }
     622  
     623  /*[clinic input]
     624  _codecs.code_page_decode
     625      codepage: int
     626      data: Py_buffer
     627      errors: str(accept={str, NoneType}) = None
     628      final: bool = False
     629      /
     630  [clinic start generated code]*/
     631  
     632  static PyObject *
     633  _codecs_code_page_decode_impl(PyObject *module, int codepage,
     634                                Py_buffer *data, const char *errors, int final)
     635  /*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
     636  {
     637      Py_ssize_t consumed = data->len;
     638      PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
     639                                                           data->buf, data->len,
     640                                                           errors,
     641                                                           final ? NULL : &consumed);
     642      return codec_tuple(decoded, consumed);
     643  }
     644  
     645  #endif /* MS_WINDOWS */
     646  
     647  /* --- Encoder ------------------------------------------------------------ */
     648  
     649  /*[clinic input]
     650  _codecs.readbuffer_encode
     651      data: Py_buffer(accept={str, buffer})
     652      errors: str(accept={str, NoneType}) = None
     653      /
     654  [clinic start generated code]*/
     655  
     656  static PyObject *
     657  _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
     658                                 const char *errors)
     659  /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
     660  {
     661      PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
     662      return codec_tuple(result, data->len);
     663  }
     664  
     665  /*[clinic input]
     666  _codecs.utf_7_encode
     667      str: unicode
     668      errors: str(accept={str, NoneType}) = None
     669      /
     670  [clinic start generated code]*/
     671  
     672  static PyObject *
     673  _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
     674                            const char *errors)
     675  /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
     676  {
     677      return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
     678                         PyUnicode_GET_LENGTH(str));
     679  }
     680  
     681  /*[clinic input]
     682  _codecs.utf_8_encode
     683      str: unicode
     684      errors: str(accept={str, NoneType}) = None
     685      /
     686  [clinic start generated code]*/
     687  
     688  static PyObject *
     689  _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
     690                            const char *errors)
     691  /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
     692  {
     693      return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
     694                         PyUnicode_GET_LENGTH(str));
     695  }
     696  
     697  /* This version provides access to the byteorder parameter of the
     698     builtin UTF-16 codecs as optional third argument. It defaults to 0
     699     which means: use the native byte order and prepend the data with a
     700     BOM mark.
     701  
     702  */
     703  
     704  /*[clinic input]
     705  _codecs.utf_16_encode
     706      str: unicode
     707      errors: str(accept={str, NoneType}) = None
     708      byteorder: int = 0
     709      /
     710  [clinic start generated code]*/
     711  
     712  static PyObject *
     713  _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
     714                             const char *errors, int byteorder)
     715  /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
     716  {
     717      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
     718                         PyUnicode_GET_LENGTH(str));
     719  }
     720  
     721  /*[clinic input]
     722  _codecs.utf_16_le_encode
     723      str: unicode
     724      errors: str(accept={str, NoneType}) = None
     725      /
     726  [clinic start generated code]*/
     727  
     728  static PyObject *
     729  _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
     730                                const char *errors)
     731  /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
     732  {
     733      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
     734                         PyUnicode_GET_LENGTH(str));
     735  }
     736  
     737  /*[clinic input]
     738  _codecs.utf_16_be_encode
     739      str: unicode
     740      errors: str(accept={str, NoneType}) = None
     741      /
     742  [clinic start generated code]*/
     743  
     744  static PyObject *
     745  _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
     746                                const char *errors)
     747  /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
     748  {
     749      return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
     750                         PyUnicode_GET_LENGTH(str));
     751  }
     752  
     753  /* This version provides access to the byteorder parameter of the
     754     builtin UTF-32 codecs as optional third argument. It defaults to 0
     755     which means: use the native byte order and prepend the data with a
     756     BOM mark.
     757  
     758  */
     759  
     760  /*[clinic input]
     761  _codecs.utf_32_encode
     762      str: unicode
     763      errors: str(accept={str, NoneType}) = None
     764      byteorder: int = 0
     765      /
     766  [clinic start generated code]*/
     767  
     768  static PyObject *
     769  _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
     770                             const char *errors, int byteorder)
     771  /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
     772  {
     773      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
     774                         PyUnicode_GET_LENGTH(str));
     775  }
     776  
     777  /*[clinic input]
     778  _codecs.utf_32_le_encode
     779      str: unicode
     780      errors: str(accept={str, NoneType}) = None
     781      /
     782  [clinic start generated code]*/
     783  
     784  static PyObject *
     785  _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
     786                                const char *errors)
     787  /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
     788  {
     789      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
     790                         PyUnicode_GET_LENGTH(str));
     791  }
     792  
     793  /*[clinic input]
     794  _codecs.utf_32_be_encode
     795      str: unicode
     796      errors: str(accept={str, NoneType}) = None
     797      /
     798  [clinic start generated code]*/
     799  
     800  static PyObject *
     801  _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
     802                                const char *errors)
     803  /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
     804  {
     805      return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
     806                         PyUnicode_GET_LENGTH(str));
     807  }
     808  
     809  /*[clinic input]
     810  _codecs.unicode_escape_encode
     811      str: unicode
     812      errors: str(accept={str, NoneType}) = None
     813      /
     814  [clinic start generated code]*/
     815  
     816  static PyObject *
     817  _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
     818                                     const char *errors)
     819  /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
     820  {
     821      return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
     822                         PyUnicode_GET_LENGTH(str));
     823  }
     824  
     825  /*[clinic input]
     826  _codecs.raw_unicode_escape_encode
     827      str: unicode
     828      errors: str(accept={str, NoneType}) = None
     829      /
     830  [clinic start generated code]*/
     831  
     832  static PyObject *
     833  _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
     834                                         const char *errors)
     835  /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
     836  {
     837      return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
     838                         PyUnicode_GET_LENGTH(str));
     839  }
     840  
     841  /*[clinic input]
     842  _codecs.latin_1_encode
     843      str: unicode
     844      errors: str(accept={str, NoneType}) = None
     845      /
     846  [clinic start generated code]*/
     847  
     848  static PyObject *
     849  _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
     850                              const char *errors)
     851  /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
     852  {
     853      return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
     854                         PyUnicode_GET_LENGTH(str));
     855  }
     856  
     857  /*[clinic input]
     858  _codecs.ascii_encode
     859      str: unicode
     860      errors: str(accept={str, NoneType}) = None
     861      /
     862  [clinic start generated code]*/
     863  
     864  static PyObject *
     865  _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
     866                            const char *errors)
     867  /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
     868  {
     869      return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
     870                         PyUnicode_GET_LENGTH(str));
     871  }
     872  
     873  /*[clinic input]
     874  _codecs.charmap_encode
     875      str: unicode
     876      errors: str(accept={str, NoneType}) = None
     877      mapping: object = None
     878      /
     879  [clinic start generated code]*/
     880  
     881  static PyObject *
     882  _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
     883                              const char *errors, PyObject *mapping)
     884  /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
     885  {
     886      if (mapping == Py_None)
     887          mapping = NULL;
     888  
     889      return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
     890                         PyUnicode_GET_LENGTH(str));
     891  }
     892  
     893  /*[clinic input]
     894  _codecs.charmap_build
     895      map: unicode
     896      /
     897  [clinic start generated code]*/
     898  
     899  static PyObject *
     900  _codecs_charmap_build_impl(PyObject *module, PyObject *map)
     901  /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
     902  {
     903      return PyUnicode_BuildEncodingMap(map);
     904  }
     905  
     906  #ifdef MS_WINDOWS
     907  
     908  /*[clinic input]
     909  _codecs.mbcs_encode
     910      str: unicode
     911      errors: str(accept={str, NoneType}) = None
     912      /
     913  [clinic start generated code]*/
     914  
     915  static PyObject *
     916  _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
     917  /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
     918  {
     919      return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
     920                         PyUnicode_GET_LENGTH(str));
     921  }
     922  
     923  /*[clinic input]
     924  _codecs.oem_encode
     925      str: unicode
     926      errors: str(accept={str, NoneType}) = None
     927      /
     928  [clinic start generated code]*/
     929  
     930  static PyObject *
     931  _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
     932  /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
     933  {
     934      return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
     935          PyUnicode_GET_LENGTH(str));
     936  }
     937  
     938  /*[clinic input]
     939  _codecs.code_page_encode
     940      code_page: int
     941      str: unicode
     942      errors: str(accept={str, NoneType}) = None
     943      /
     944  [clinic start generated code]*/
     945  
     946  static PyObject *
     947  _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
     948                                const char *errors)
     949  /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
     950  {
     951      return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
     952                         PyUnicode_GET_LENGTH(str));
     953  }
     954  
     955  #endif /* MS_WINDOWS */
     956  
     957  /* --- Error handler registry --------------------------------------------- */
     958  
     959  /*[clinic input]
     960  _codecs.register_error
     961      errors: str
     962      handler: object
     963      /
     964  
     965  Register the specified error handler under the name errors.
     966  
     967  handler must be a callable object, that will be called with an exception
     968  instance containing information about the location of the encoding/decoding
     969  error and must return a (replacement, new position) tuple.
     970  [clinic start generated code]*/
     971  
     972  static PyObject *
     973  _codecs_register_error_impl(PyObject *module, const char *errors,
     974                              PyObject *handler)
     975  /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
     976  {
     977      if (PyCodec_RegisterError(errors, handler))
     978          return NULL;
     979      Py_RETURN_NONE;
     980  }
     981  
     982  /*[clinic input]
     983  _codecs.lookup_error
     984      name: str
     985      /
     986  
     987  lookup_error(errors) -> handler
     988  
     989  Return the error handler for the specified error handling name or raise a
     990  LookupError, if no handler exists under this name.
     991  [clinic start generated code]*/
     992  
     993  static PyObject *
     994  _codecs_lookup_error_impl(PyObject *module, const char *name)
     995  /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
     996  {
     997      return PyCodec_LookupError(name);
     998  }
     999  
    1000  /* --- Module API --------------------------------------------------------- */
    1001  
    1002  static PyMethodDef _codecs_functions[] = {
    1003      _CODECS_REGISTER_METHODDEF
    1004      _CODECS_UNREGISTER_METHODDEF
    1005      _CODECS_LOOKUP_METHODDEF
    1006      _CODECS_ENCODE_METHODDEF
    1007      _CODECS_DECODE_METHODDEF
    1008      _CODECS_ESCAPE_ENCODE_METHODDEF
    1009      _CODECS_ESCAPE_DECODE_METHODDEF
    1010      _CODECS_UTF_8_ENCODE_METHODDEF
    1011      _CODECS_UTF_8_DECODE_METHODDEF
    1012      _CODECS_UTF_7_ENCODE_METHODDEF
    1013      _CODECS_UTF_7_DECODE_METHODDEF
    1014      _CODECS_UTF_16_ENCODE_METHODDEF
    1015      _CODECS_UTF_16_LE_ENCODE_METHODDEF
    1016      _CODECS_UTF_16_BE_ENCODE_METHODDEF
    1017      _CODECS_UTF_16_DECODE_METHODDEF
    1018      _CODECS_UTF_16_LE_DECODE_METHODDEF
    1019      _CODECS_UTF_16_BE_DECODE_METHODDEF
    1020      _CODECS_UTF_16_EX_DECODE_METHODDEF
    1021      _CODECS_UTF_32_ENCODE_METHODDEF
    1022      _CODECS_UTF_32_LE_ENCODE_METHODDEF
    1023      _CODECS_UTF_32_BE_ENCODE_METHODDEF
    1024      _CODECS_UTF_32_DECODE_METHODDEF
    1025      _CODECS_UTF_32_LE_DECODE_METHODDEF
    1026      _CODECS_UTF_32_BE_DECODE_METHODDEF
    1027      _CODECS_UTF_32_EX_DECODE_METHODDEF
    1028      _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
    1029      _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
    1030      _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
    1031      _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
    1032      _CODECS_LATIN_1_ENCODE_METHODDEF
    1033      _CODECS_LATIN_1_DECODE_METHODDEF
    1034      _CODECS_ASCII_ENCODE_METHODDEF
    1035      _CODECS_ASCII_DECODE_METHODDEF
    1036      _CODECS_CHARMAP_ENCODE_METHODDEF
    1037      _CODECS_CHARMAP_DECODE_METHODDEF
    1038      _CODECS_CHARMAP_BUILD_METHODDEF
    1039      _CODECS_READBUFFER_ENCODE_METHODDEF
    1040      _CODECS_MBCS_ENCODE_METHODDEF
    1041      _CODECS_MBCS_DECODE_METHODDEF
    1042      _CODECS_OEM_ENCODE_METHODDEF
    1043      _CODECS_OEM_DECODE_METHODDEF
    1044      _CODECS_CODE_PAGE_ENCODE_METHODDEF
    1045      _CODECS_CODE_PAGE_DECODE_METHODDEF
    1046      _CODECS_REGISTER_ERROR_METHODDEF
    1047      _CODECS_LOOKUP_ERROR_METHODDEF
    1048      {NULL, NULL}                /* sentinel */
    1049  };
    1050  
    1051  static PyModuleDef_Slot _codecs_slots[] = {
    1052      {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
    1053      {0, NULL}
    1054  };
    1055  
    1056  static struct PyModuleDef codecsmodule = {
    1057          PyModuleDef_HEAD_INIT,
    1058          "_codecs",
    1059          NULL,
    1060          0,
    1061          _codecs_functions,
    1062          _codecs_slots,
    1063          NULL,
    1064          NULL,
    1065          NULL
    1066  };
    1067  
    1068  PyMODINIT_FUNC
    1069  PyInit__codecs(void)
    1070  {
    1071      return PyModuleDef_Init(&codecsmodule);
    1072  }