(root)/
Python-3.12.0/
Modules/
_json.c
       1  /* JSON accelerator C extensor: _json module.
       2   *
       3   * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
       4   * and as an extension module (Py_BUILD_CORE_MODULE define) on other
       5   * platforms. */
       6  
       7  #ifndef Py_BUILD_CORE_BUILTIN
       8  #  define Py_BUILD_CORE_MODULE 1
       9  #endif
      10  
      11  #include "Python.h"
      12  #include "pycore_ceval.h"           // _Py_EnterRecursiveCall()
      13  #include "pycore_runtime.h"         // _PyRuntime
      14  #include "structmember.h"           // PyMemberDef
      15  #include "pycore_global_objects.h"  // _Py_ID()
      16  #include <stdbool.h>                // bool
      17  
      18  
      19  typedef struct _PyScannerObject {
      20      PyObject_HEAD
      21      signed char strict;
      22      PyObject *object_hook;
      23      PyObject *object_pairs_hook;
      24      PyObject *parse_float;
      25      PyObject *parse_int;
      26      PyObject *parse_constant;
      27      PyObject *memo;
      28  } PyScannerObject;
      29  
      30  static PyMemberDef scanner_members[] = {
      31      {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
      32      {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
      33      {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
      34      {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
      35      {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
      36      {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
      37      {NULL}
      38  };
      39  
      40  typedef struct _PyEncoderObject {
      41      PyObject_HEAD
      42      PyObject *markers;
      43      PyObject *defaultfn;
      44      PyObject *encoder;
      45      PyObject *indent;
      46      PyObject *key_separator;
      47      PyObject *item_separator;
      48      char sort_keys;
      49      char skipkeys;
      50      int allow_nan;
      51      PyCFunction fast_encode;
      52  } PyEncoderObject;
      53  
      54  static PyMemberDef encoder_members[] = {
      55      {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
      56      {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
      57      {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
      58      {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
      59      {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
      60      {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
      61      {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
      62      {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
      63      {NULL}
      64  };
      65  
      66  /* Forward decls */
      67  
      68  static PyObject *
      69  ascii_escape_unicode(PyObject *pystr);
      70  static PyObject *
      71  py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
      72  static PyObject *
      73  scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
      74  static PyObject *
      75  _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
      76  static PyObject *
      77  scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
      78  static void
      79  scanner_dealloc(PyObject *self);
      80  static int
      81  scanner_clear(PyScannerObject *self);
      82  static PyObject *
      83  encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
      84  static void
      85  encoder_dealloc(PyObject *self);
      86  static int
      87  encoder_clear(PyEncoderObject *self);
      88  static int
      89  encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
      90  static int
      91  encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
      92  static int
      93  encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
      94  static PyObject *
      95  _encoded_const(PyObject *obj);
      96  static void
      97  raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
      98  static PyObject *
      99  encoder_encode_string(PyEncoderObject *s, PyObject *obj);
     100  static PyObject *
     101  encoder_encode_float(PyEncoderObject *s, PyObject *obj);
     102  
     103  #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
     104  #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
     105  
     106  static Py_ssize_t
     107  ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
     108  {
     109      /* Escape unicode code point c to ASCII escape sequences
     110      in char *output. output must have at least 12 bytes unused to
     111      accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
     112      output[chars++] = '\\';
     113      switch (c) {
     114          case '\\': output[chars++] = c; break;
     115          case '"': output[chars++] = c; break;
     116          case '\b': output[chars++] = 'b'; break;
     117          case '\f': output[chars++] = 'f'; break;
     118          case '\n': output[chars++] = 'n'; break;
     119          case '\r': output[chars++] = 'r'; break;
     120          case '\t': output[chars++] = 't'; break;
     121          default:
     122              if (c >= 0x10000) {
     123                  /* UTF-16 surrogate pair */
     124                  Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
     125                  output[chars++] = 'u';
     126                  output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
     127                  output[chars++] = Py_hexdigits[(v >>  8) & 0xf];
     128                  output[chars++] = Py_hexdigits[(v >>  4) & 0xf];
     129                  output[chars++] = Py_hexdigits[(v      ) & 0xf];
     130                  c = Py_UNICODE_LOW_SURROGATE(c);
     131                  output[chars++] = '\\';
     132              }
     133              output[chars++] = 'u';
     134              output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
     135              output[chars++] = Py_hexdigits[(c >>  8) & 0xf];
     136              output[chars++] = Py_hexdigits[(c >>  4) & 0xf];
     137              output[chars++] = Py_hexdigits[(c      ) & 0xf];
     138      }
     139      return chars;
     140  }
     141  
     142  static PyObject *
     143  ascii_escape_unicode(PyObject *pystr)
     144  {
     145      /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
     146      Py_ssize_t i;
     147      Py_ssize_t input_chars;
     148      Py_ssize_t output_size;
     149      Py_ssize_t chars;
     150      PyObject *rval;
     151      const void *input;
     152      Py_UCS1 *output;
     153      int kind;
     154  
     155      if (PyUnicode_READY(pystr) == -1)
     156          return NULL;
     157  
     158      input_chars = PyUnicode_GET_LENGTH(pystr);
     159      input = PyUnicode_DATA(pystr);
     160      kind = PyUnicode_KIND(pystr);
     161  
     162      /* Compute the output size */
     163      for (i = 0, output_size = 2; i < input_chars; i++) {
     164          Py_UCS4 c = PyUnicode_READ(kind, input, i);
     165          Py_ssize_t d;
     166          if (S_CHAR(c)) {
     167              d = 1;
     168          }
     169          else {
     170              switch(c) {
     171              case '\\': case '"': case '\b': case '\f':
     172              case '\n': case '\r': case '\t':
     173                  d = 2; break;
     174              default:
     175                  d = c >= 0x10000 ? 12 : 6;
     176              }
     177          }
     178          if (output_size > PY_SSIZE_T_MAX - d) {
     179              PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
     180              return NULL;
     181          }
     182          output_size += d;
     183      }
     184  
     185      rval = PyUnicode_New(output_size, 127);
     186      if (rval == NULL) {
     187          return NULL;
     188      }
     189      output = PyUnicode_1BYTE_DATA(rval);
     190      chars = 0;
     191      output[chars++] = '"';
     192      for (i = 0; i < input_chars; i++) {
     193          Py_UCS4 c = PyUnicode_READ(kind, input, i);
     194          if (S_CHAR(c)) {
     195              output[chars++] = c;
     196          }
     197          else {
     198              chars = ascii_escape_unichar(c, output, chars);
     199          }
     200      }
     201      output[chars++] = '"';
     202  #ifdef Py_DEBUG
     203      assert(_PyUnicode_CheckConsistency(rval, 1));
     204  #endif
     205      return rval;
     206  }
     207  
     208  static PyObject *
     209  escape_unicode(PyObject *pystr)
     210  {
     211      /* Take a PyUnicode pystr and return a new escaped PyUnicode */
     212      Py_ssize_t i;
     213      Py_ssize_t input_chars;
     214      Py_ssize_t output_size;
     215      Py_ssize_t chars;
     216      PyObject *rval;
     217      const void *input;
     218      int kind;
     219      Py_UCS4 maxchar;
     220  
     221      if (PyUnicode_READY(pystr) == -1)
     222          return NULL;
     223  
     224      maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
     225      input_chars = PyUnicode_GET_LENGTH(pystr);
     226      input = PyUnicode_DATA(pystr);
     227      kind = PyUnicode_KIND(pystr);
     228  
     229      /* Compute the output size */
     230      for (i = 0, output_size = 2; i < input_chars; i++) {
     231          Py_UCS4 c = PyUnicode_READ(kind, input, i);
     232          Py_ssize_t d;
     233          switch (c) {
     234          case '\\': case '"': case '\b': case '\f':
     235          case '\n': case '\r': case '\t':
     236              d = 2;
     237              break;
     238          default:
     239              if (c <= 0x1f)
     240                  d = 6;
     241              else
     242                  d = 1;
     243          }
     244          if (output_size > PY_SSIZE_T_MAX - d) {
     245              PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
     246              return NULL;
     247          }
     248          output_size += d;
     249      }
     250  
     251      rval = PyUnicode_New(output_size, maxchar);
     252      if (rval == NULL)
     253          return NULL;
     254  
     255      kind = PyUnicode_KIND(rval);
     256  
     257  #define ENCODE_OUTPUT do { \
     258          chars = 0; \
     259          output[chars++] = '"'; \
     260          for (i = 0; i < input_chars; i++) { \
     261              Py_UCS4 c = PyUnicode_READ(kind, input, i); \
     262              switch (c) { \
     263              case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
     264              case '"':  output[chars++] = '\\'; output[chars++] = c; break; \
     265              case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
     266              case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
     267              case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
     268              case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
     269              case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
     270              default: \
     271                  if (c <= 0x1f) { \
     272                      output[chars++] = '\\'; \
     273                      output[chars++] = 'u'; \
     274                      output[chars++] = '0'; \
     275                      output[chars++] = '0'; \
     276                      output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
     277                      output[chars++] = Py_hexdigits[(c     ) & 0xf]; \
     278                  } else { \
     279                      output[chars++] = c; \
     280                  } \
     281              } \
     282          } \
     283          output[chars++] = '"'; \
     284      } while (0)
     285  
     286      if (kind == PyUnicode_1BYTE_KIND) {
     287          Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
     288          ENCODE_OUTPUT;
     289      } else if (kind == PyUnicode_2BYTE_KIND) {
     290          Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
     291          ENCODE_OUTPUT;
     292      } else {
     293          Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
     294          assert(kind == PyUnicode_4BYTE_KIND);
     295          ENCODE_OUTPUT;
     296      }
     297  #undef ENCODE_OUTPUT
     298  
     299  #ifdef Py_DEBUG
     300      assert(_PyUnicode_CheckConsistency(rval, 1));
     301  #endif
     302      return rval;
     303  }
     304  
     305  static void
     306  raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
     307  {
     308      /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
     309      _Py_DECLARE_STR(json_decoder, "json.decoder");
     310      PyObject *JSONDecodeError =
     311           _PyImport_GetModuleAttr(&_Py_STR(json_decoder), &_Py_ID(JSONDecodeError));
     312      if (JSONDecodeError == NULL) {
     313          return;
     314      }
     315  
     316      PyObject *exc;
     317      exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
     318      Py_DECREF(JSONDecodeError);
     319      if (exc) {
     320          PyErr_SetObject(JSONDecodeError, exc);
     321          Py_DECREF(exc);
     322      }
     323  }
     324  
     325  static void
     326  raise_stop_iteration(Py_ssize_t idx)
     327  {
     328      PyObject *value = PyLong_FromSsize_t(idx);
     329      if (value != NULL) {
     330          PyErr_SetObject(PyExc_StopIteration, value);
     331          Py_DECREF(value);
     332      }
     333  }
     334  
     335  static PyObject *
     336  _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
     337      /* return (rval, idx) tuple, stealing reference to rval */
     338      PyObject *tpl;
     339      PyObject *pyidx;
     340      /*
     341      steal a reference to rval, returns (rval, idx)
     342      */
     343      if (rval == NULL) {
     344          return NULL;
     345      }
     346      pyidx = PyLong_FromSsize_t(idx);
     347      if (pyidx == NULL) {
     348          Py_DECREF(rval);
     349          return NULL;
     350      }
     351      tpl = PyTuple_New(2);
     352      if (tpl == NULL) {
     353          Py_DECREF(pyidx);
     354          Py_DECREF(rval);
     355          return NULL;
     356      }
     357      PyTuple_SET_ITEM(tpl, 0, rval);
     358      PyTuple_SET_ITEM(tpl, 1, pyidx);
     359      return tpl;
     360  }
     361  
     362  static PyObject *
     363  scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
     364  {
     365      /* Read the JSON string from PyUnicode pystr.
     366      end is the index of the first character after the quote.
     367      if strict is zero then literal control characters are allowed
     368      *next_end_ptr is a return-by-reference index of the character
     369          after the end quote
     370  
     371      Return value is a new PyUnicode
     372      */
     373      PyObject *rval = NULL;
     374      Py_ssize_t len;
     375      Py_ssize_t begin = end - 1;
     376      Py_ssize_t next /* = begin */;
     377      const void *buf;
     378      int kind;
     379  
     380      if (PyUnicode_READY(pystr) == -1)
     381          return 0;
     382  
     383      _PyUnicodeWriter writer;
     384      _PyUnicodeWriter_Init(&writer);
     385      writer.overallocate = 1;
     386  
     387      len = PyUnicode_GET_LENGTH(pystr);
     388      buf = PyUnicode_DATA(pystr);
     389      kind = PyUnicode_KIND(pystr);
     390  
     391      if (end < 0 || len < end) {
     392          PyErr_SetString(PyExc_ValueError, "end is out of bounds");
     393          goto bail;
     394      }
     395      while (1) {
     396          /* Find the end of the string or the next escape */
     397          Py_UCS4 c;
     398          {
     399              // Use tight scope variable to help register allocation.
     400              Py_UCS4 d = 0;
     401              for (next = end; next < len; next++) {
     402                  d = PyUnicode_READ(kind, buf, next);
     403                  if (d == '"' || d == '\\') {
     404                      break;
     405                  }
     406                  if (d <= 0x1f && strict) {
     407                      raise_errmsg("Invalid control character at", pystr, next);
     408                      goto bail;
     409                  }
     410              }
     411              c = d;
     412          }
     413  
     414          if (c == '"') {
     415              // Fast path for simple case.
     416              if (writer.buffer == NULL) {
     417                  PyObject *ret = PyUnicode_Substring(pystr, end, next);
     418                  if (ret == NULL) {
     419                      goto bail;
     420                  }
     421                  *next_end_ptr = next + 1;;
     422                  return ret;
     423              }
     424          }
     425          else if (c != '\\') {
     426              raise_errmsg("Unterminated string starting at", pystr, begin);
     427              goto bail;
     428          }
     429  
     430          /* Pick up this chunk if it's not zero length */
     431          if (next != end) {
     432              if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
     433                  goto bail;
     434              }
     435          }
     436          next++;
     437          if (c == '"') {
     438              end = next;
     439              break;
     440          }
     441          if (next == len) {
     442              raise_errmsg("Unterminated string starting at", pystr, begin);
     443              goto bail;
     444          }
     445          c = PyUnicode_READ(kind, buf, next);
     446          if (c != 'u') {
     447              /* Non-unicode backslash escapes */
     448              end = next + 1;
     449              switch (c) {
     450                  case '"': break;
     451                  case '\\': break;
     452                  case '/': break;
     453                  case 'b': c = '\b'; break;
     454                  case 'f': c = '\f'; break;
     455                  case 'n': c = '\n'; break;
     456                  case 'r': c = '\r'; break;
     457                  case 't': c = '\t'; break;
     458                  default: c = 0;
     459              }
     460              if (c == 0) {
     461                  raise_errmsg("Invalid \\escape", pystr, end - 2);
     462                  goto bail;
     463              }
     464          }
     465          else {
     466              c = 0;
     467              next++;
     468              end = next + 4;
     469              if (end >= len) {
     470                  raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
     471                  goto bail;
     472              }
     473              /* Decode 4 hex digits */
     474              for (; next < end; next++) {
     475                  Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
     476                  c <<= 4;
     477                  switch (digit) {
     478                      case '0': case '1': case '2': case '3': case '4':
     479                      case '5': case '6': case '7': case '8': case '9':
     480                          c |= (digit - '0'); break;
     481                      case 'a': case 'b': case 'c': case 'd': case 'e':
     482                      case 'f':
     483                          c |= (digit - 'a' + 10); break;
     484                      case 'A': case 'B': case 'C': case 'D': case 'E':
     485                      case 'F':
     486                          c |= (digit - 'A' + 10); break;
     487                      default:
     488                          raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
     489                          goto bail;
     490                  }
     491              }
     492              /* Surrogate pair */
     493              if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
     494                  PyUnicode_READ(kind, buf, next++) == '\\' &&
     495                  PyUnicode_READ(kind, buf, next++) == 'u') {
     496                  Py_UCS4 c2 = 0;
     497                  end += 6;
     498                  /* Decode 4 hex digits */
     499                  for (; next < end; next++) {
     500                      Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
     501                      c2 <<= 4;
     502                      switch (digit) {
     503                          case '0': case '1': case '2': case '3': case '4':
     504                          case '5': case '6': case '7': case '8': case '9':
     505                              c2 |= (digit - '0'); break;
     506                          case 'a': case 'b': case 'c': case 'd': case 'e':
     507                          case 'f':
     508                              c2 |= (digit - 'a' + 10); break;
     509                          case 'A': case 'B': case 'C': case 'D': case 'E':
     510                          case 'F':
     511                              c2 |= (digit - 'A' + 10); break;
     512                          default:
     513                              raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
     514                              goto bail;
     515                      }
     516                  }
     517                  if (Py_UNICODE_IS_LOW_SURROGATE(c2))
     518                      c = Py_UNICODE_JOIN_SURROGATES(c, c2);
     519                  else
     520                      end -= 6;
     521              }
     522          }
     523          if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
     524              goto bail;
     525          }
     526      }
     527  
     528      rval = _PyUnicodeWriter_Finish(&writer);
     529      *next_end_ptr = end;
     530      return rval;
     531  
     532  bail:
     533      *next_end_ptr = -1;
     534      _PyUnicodeWriter_Dealloc(&writer);
     535      return NULL;
     536  }
     537  
     538  PyDoc_STRVAR(pydoc_scanstring,
     539      "scanstring(string, end, strict=True) -> (string, end)\n"
     540      "\n"
     541      "Scan the string s for a JSON string. End is the index of the\n"
     542      "character in s after the quote that started the JSON string.\n"
     543      "Unescapes all valid JSON string escape sequences and raises ValueError\n"
     544      "on attempt to decode an invalid string. If strict is False then literal\n"
     545      "control characters are allowed in the string.\n"
     546      "\n"
     547      "Returns a tuple of the decoded string and the index of the character in s\n"
     548      "after the end quote."
     549  );
     550  
     551  static PyObject *
     552  py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
     553  {
     554      PyObject *pystr;
     555      PyObject *rval;
     556      Py_ssize_t end;
     557      Py_ssize_t next_end = -1;
     558      int strict = 1;
     559      if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) {
     560          return NULL;
     561      }
     562      if (PyUnicode_Check(pystr)) {
     563          rval = scanstring_unicode(pystr, end, strict, &next_end);
     564      }
     565      else {
     566          PyErr_Format(PyExc_TypeError,
     567                       "first argument must be a string, not %.80s",
     568                       Py_TYPE(pystr)->tp_name);
     569          return NULL;
     570      }
     571      return _build_rval_index_tuple(rval, next_end);
     572  }
     573  
     574  PyDoc_STRVAR(pydoc_encode_basestring_ascii,
     575      "encode_basestring_ascii(string) -> string\n"
     576      "\n"
     577      "Return an ASCII-only JSON representation of a Python string"
     578  );
     579  
     580  static PyObject *
     581  py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
     582  {
     583      PyObject *rval;
     584      /* Return an ASCII-only JSON representation of a Python string */
     585      /* METH_O */
     586      if (PyUnicode_Check(pystr)) {
     587          rval = ascii_escape_unicode(pystr);
     588      }
     589      else {
     590          PyErr_Format(PyExc_TypeError,
     591                       "first argument must be a string, not %.80s",
     592                       Py_TYPE(pystr)->tp_name);
     593          return NULL;
     594      }
     595      return rval;
     596  }
     597  
     598  
     599  PyDoc_STRVAR(pydoc_encode_basestring,
     600      "encode_basestring(string) -> string\n"
     601      "\n"
     602      "Return a JSON representation of a Python string"
     603  );
     604  
     605  static PyObject *
     606  py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
     607  {
     608      PyObject *rval;
     609      /* Return a JSON representation of a Python string */
     610      /* METH_O */
     611      if (PyUnicode_Check(pystr)) {
     612          rval = escape_unicode(pystr);
     613      }
     614      else {
     615          PyErr_Format(PyExc_TypeError,
     616                       "first argument must be a string, not %.80s",
     617                       Py_TYPE(pystr)->tp_name);
     618          return NULL;
     619      }
     620      return rval;
     621  }
     622  
     623  static void
     624  scanner_dealloc(PyObject *self)
     625  {
     626      PyTypeObject *tp = Py_TYPE(self);
     627      /* bpo-31095: UnTrack is needed before calling any callbacks */
     628      PyObject_GC_UnTrack(self);
     629      scanner_clear((PyScannerObject *)self);
     630      tp->tp_free(self);
     631      Py_DECREF(tp);
     632  }
     633  
     634  static int
     635  scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
     636  {
     637      Py_VISIT(Py_TYPE(self));
     638      Py_VISIT(self->object_hook);
     639      Py_VISIT(self->object_pairs_hook);
     640      Py_VISIT(self->parse_float);
     641      Py_VISIT(self->parse_int);
     642      Py_VISIT(self->parse_constant);
     643      Py_VISIT(self->memo);
     644      return 0;
     645  }
     646  
     647  static int
     648  scanner_clear(PyScannerObject *self)
     649  {
     650      Py_CLEAR(self->object_hook);
     651      Py_CLEAR(self->object_pairs_hook);
     652      Py_CLEAR(self->parse_float);
     653      Py_CLEAR(self->parse_int);
     654      Py_CLEAR(self->parse_constant);
     655      Py_CLEAR(self->memo);
     656      return 0;
     657  }
     658  
     659  static PyObject *
     660  _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
     661  {
     662      /* Read a JSON object from PyUnicode pystr.
     663      idx is the index of the first character after the opening curly brace.
     664      *next_idx_ptr is a return-by-reference index to the first character after
     665          the closing curly brace.
     666  
     667      Returns a new PyObject (usually a dict, but object_hook can change that)
     668      */
     669      const void *str;
     670      int kind;
     671      Py_ssize_t end_idx;
     672      PyObject *val = NULL;
     673      PyObject *rval = NULL;
     674      PyObject *key = NULL;
     675      int has_pairs_hook = (s->object_pairs_hook != Py_None);
     676      Py_ssize_t next_idx;
     677  
     678      if (PyUnicode_READY(pystr) == -1)
     679          return NULL;
     680  
     681      str = PyUnicode_DATA(pystr);
     682      kind = PyUnicode_KIND(pystr);
     683      end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
     684  
     685      if (has_pairs_hook)
     686          rval = PyList_New(0);
     687      else
     688          rval = PyDict_New();
     689      if (rval == NULL)
     690          return NULL;
     691  
     692      /* skip whitespace after { */
     693      while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
     694  
     695      /* only loop if the object is non-empty */
     696      if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
     697          while (1) {
     698              PyObject *memokey;
     699  
     700              /* read key */
     701              if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
     702                  raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
     703                  goto bail;
     704              }
     705              key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
     706              if (key == NULL)
     707                  goto bail;
     708              memokey = PyDict_SetDefault(s->memo, key, key);
     709              if (memokey == NULL) {
     710                  goto bail;
     711              }
     712              Py_SETREF(key, Py_NewRef(memokey));
     713              idx = next_idx;
     714  
     715              /* skip whitespace between key and : delimiter, read :, skip whitespace */
     716              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     717              if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
     718                  raise_errmsg("Expecting ':' delimiter", pystr, idx);
     719                  goto bail;
     720              }
     721              idx++;
     722              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     723  
     724              /* read any JSON term */
     725              val = scan_once_unicode(s, pystr, idx, &next_idx);
     726              if (val == NULL)
     727                  goto bail;
     728  
     729              if (has_pairs_hook) {
     730                  PyObject *item = PyTuple_Pack(2, key, val);
     731                  if (item == NULL)
     732                      goto bail;
     733                  Py_CLEAR(key);
     734                  Py_CLEAR(val);
     735                  if (PyList_Append(rval, item) == -1) {
     736                      Py_DECREF(item);
     737                      goto bail;
     738                  }
     739                  Py_DECREF(item);
     740              }
     741              else {
     742                  if (PyDict_SetItem(rval, key, val) < 0)
     743                      goto bail;
     744                  Py_CLEAR(key);
     745                  Py_CLEAR(val);
     746              }
     747              idx = next_idx;
     748  
     749              /* skip whitespace before } or , */
     750              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     751  
     752              /* bail if the object is closed or we didn't get the , delimiter */
     753              if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
     754                  break;
     755              if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
     756                  raise_errmsg("Expecting ',' delimiter", pystr, idx);
     757                  goto bail;
     758              }
     759              idx++;
     760  
     761              /* skip whitespace after , delimiter */
     762              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     763          }
     764      }
     765  
     766      *next_idx_ptr = idx + 1;
     767  
     768      if (has_pairs_hook) {
     769          val = PyObject_CallOneArg(s->object_pairs_hook, rval);
     770          Py_DECREF(rval);
     771          return val;
     772      }
     773  
     774      /* if object_hook is not None: rval = object_hook(rval) */
     775      if (s->object_hook != Py_None) {
     776          val = PyObject_CallOneArg(s->object_hook, rval);
     777          Py_DECREF(rval);
     778          return val;
     779      }
     780      return rval;
     781  bail:
     782      Py_XDECREF(key);
     783      Py_XDECREF(val);
     784      Py_XDECREF(rval);
     785      return NULL;
     786  }
     787  
     788  static PyObject *
     789  _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
     790      /* Read a JSON array from PyUnicode pystr.
     791      idx is the index of the first character after the opening brace.
     792      *next_idx_ptr is a return-by-reference index to the first character after
     793          the closing brace.
     794  
     795      Returns a new PyList
     796      */
     797      const void *str;
     798      int kind;
     799      Py_ssize_t end_idx;
     800      PyObject *val = NULL;
     801      PyObject *rval;
     802      Py_ssize_t next_idx;
     803  
     804      if (PyUnicode_READY(pystr) == -1)
     805          return NULL;
     806  
     807      rval = PyList_New(0);
     808      if (rval == NULL)
     809          return NULL;
     810  
     811      str = PyUnicode_DATA(pystr);
     812      kind = PyUnicode_KIND(pystr);
     813      end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
     814  
     815      /* skip whitespace after [ */
     816      while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     817  
     818      /* only loop if the array is non-empty */
     819      if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
     820          while (1) {
     821  
     822              /* read any JSON term  */
     823              val = scan_once_unicode(s, pystr, idx, &next_idx);
     824              if (val == NULL)
     825                  goto bail;
     826  
     827              if (PyList_Append(rval, val) == -1)
     828                  goto bail;
     829  
     830              Py_CLEAR(val);
     831              idx = next_idx;
     832  
     833              /* skip whitespace between term and , */
     834              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     835  
     836              /* bail if the array is closed or we didn't get the , delimiter */
     837              if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
     838                  break;
     839              if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
     840                  raise_errmsg("Expecting ',' delimiter", pystr, idx);
     841                  goto bail;
     842              }
     843              idx++;
     844  
     845              /* skip whitespace after , */
     846              while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
     847          }
     848      }
     849  
     850      /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
     851      if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
     852          raise_errmsg("Expecting value", pystr, end_idx);
     853          goto bail;
     854      }
     855      *next_idx_ptr = idx + 1;
     856      return rval;
     857  bail:
     858      Py_XDECREF(val);
     859      Py_DECREF(rval);
     860      return NULL;
     861  }
     862  
     863  static PyObject *
     864  _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
     865      /* Read a JSON constant.
     866      constant is the constant string that was found
     867          ("NaN", "Infinity", "-Infinity").
     868      idx is the index of the first character of the constant
     869      *next_idx_ptr is a return-by-reference index to the first character after
     870          the constant.
     871  
     872      Returns the result of parse_constant
     873      */
     874      PyObject *cstr;
     875      PyObject *rval;
     876      /* constant is "NaN", "Infinity", or "-Infinity" */
     877      cstr = PyUnicode_InternFromString(constant);
     878      if (cstr == NULL)
     879          return NULL;
     880  
     881      /* rval = parse_constant(constant) */
     882      rval = PyObject_CallOneArg(s->parse_constant, cstr);
     883      idx += PyUnicode_GET_LENGTH(cstr);
     884      Py_DECREF(cstr);
     885      *next_idx_ptr = idx;
     886      return rval;
     887  }
     888  
     889  static PyObject *
     890  _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
     891      /* Read a JSON number from PyUnicode pystr.
     892      idx is the index of the first character of the number
     893      *next_idx_ptr is a return-by-reference index to the first character after
     894          the number.
     895  
     896      Returns a new PyObject representation of that number:
     897          PyLong, or PyFloat.
     898          May return other types if parse_int or parse_float are set
     899      */
     900      const void *str;
     901      int kind;
     902      Py_ssize_t end_idx;
     903      Py_ssize_t idx = start;
     904      int is_float = 0;
     905      PyObject *rval;
     906      PyObject *numstr = NULL;
     907      PyObject *custom_func;
     908  
     909      if (PyUnicode_READY(pystr) == -1)
     910          return NULL;
     911  
     912      str = PyUnicode_DATA(pystr);
     913      kind = PyUnicode_KIND(pystr);
     914      end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
     915  
     916      /* read a sign if it's there, make sure it's not the end of the string */
     917      if (PyUnicode_READ(kind, str, idx) == '-') {
     918          idx++;
     919          if (idx > end_idx) {
     920              raise_stop_iteration(start);
     921              return NULL;
     922          }
     923      }
     924  
     925      /* read as many integer digits as we find as long as it doesn't start with 0 */
     926      if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
     927          idx++;
     928          while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
     929      }
     930      /* if it starts with 0 we only expect one integer digit */
     931      else if (PyUnicode_READ(kind, str, idx) == '0') {
     932          idx++;
     933      }
     934      /* no integer digits, error */
     935      else {
     936          raise_stop_iteration(start);
     937          return NULL;
     938      }
     939  
     940      /* if the next char is '.' followed by a digit then read all float digits */
     941      if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
     942          is_float = 1;
     943          idx += 2;
     944          while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
     945      }
     946  
     947      /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
     948      if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
     949          Py_ssize_t e_start = idx;
     950          idx++;
     951  
     952          /* read an exponent sign if present */
     953          if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
     954  
     955          /* read all digits */
     956          while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
     957  
     958          /* if we got a digit, then parse as float. if not, backtrack */
     959          if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
     960              is_float = 1;
     961          }
     962          else {
     963              idx = e_start;
     964          }
     965      }
     966  
     967      if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
     968          custom_func = s->parse_float;
     969      else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
     970          custom_func = s->parse_int;
     971      else
     972          custom_func = NULL;
     973  
     974      if (custom_func) {
     975          /* copy the section we determined to be a number */
     976          numstr = PyUnicode_FromKindAndData(kind,
     977                                             (char*)str + kind * start,
     978                                             idx - start);
     979          if (numstr == NULL)
     980              return NULL;
     981          rval = PyObject_CallOneArg(custom_func, numstr);
     982      }
     983      else {
     984          Py_ssize_t i, n;
     985          char *buf;
     986          /* Straight conversion to ASCII, to avoid costly conversion of
     987             decimal unicode digits (which cannot appear here) */
     988          n = idx - start;
     989          numstr = PyBytes_FromStringAndSize(NULL, n);
     990          if (numstr == NULL)
     991              return NULL;
     992          buf = PyBytes_AS_STRING(numstr);
     993          for (i = 0; i < n; i++) {
     994              buf[i] = (char) PyUnicode_READ(kind, str, i + start);
     995          }
     996          if (is_float)
     997              rval = PyFloat_FromString(numstr);
     998          else
     999              rval = PyLong_FromString(buf, NULL, 10);
    1000      }
    1001      Py_DECREF(numstr);
    1002      *next_idx_ptr = idx;
    1003      return rval;
    1004  }
    1005  
    1006  static PyObject *
    1007  scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
    1008  {
    1009      /* Read one JSON term (of any kind) from PyUnicode pystr.
    1010      idx is the index of the first character of the term
    1011      *next_idx_ptr is a return-by-reference index to the first character after
    1012          the number.
    1013  
    1014      Returns a new PyObject representation of the term.
    1015      */
    1016      PyObject *res;
    1017      const void *str;
    1018      int kind;
    1019      Py_ssize_t length;
    1020  
    1021      if (PyUnicode_READY(pystr) == -1)
    1022          return NULL;
    1023  
    1024      str = PyUnicode_DATA(pystr);
    1025      kind = PyUnicode_KIND(pystr);
    1026      length = PyUnicode_GET_LENGTH(pystr);
    1027  
    1028      if (idx < 0) {
    1029          PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
    1030          return NULL;
    1031      }
    1032      if (idx >= length) {
    1033          raise_stop_iteration(idx);
    1034          return NULL;
    1035      }
    1036  
    1037      switch (PyUnicode_READ(kind, str, idx)) {
    1038          case '"':
    1039              /* string */
    1040              return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
    1041          case '{':
    1042              /* object */
    1043              if (_Py_EnterRecursiveCall(" while decoding a JSON object "
    1044                                         "from a unicode string"))
    1045                  return NULL;
    1046              res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
    1047              _Py_LeaveRecursiveCall();
    1048              return res;
    1049          case '[':
    1050              /* array */
    1051              if (_Py_EnterRecursiveCall(" while decoding a JSON array "
    1052                                         "from a unicode string"))
    1053                  return NULL;
    1054              res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
    1055              _Py_LeaveRecursiveCall();
    1056              return res;
    1057          case 'n':
    1058              /* null */
    1059              if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
    1060                  *next_idx_ptr = idx + 4;
    1061                  Py_RETURN_NONE;
    1062              }
    1063              break;
    1064          case 't':
    1065              /* true */
    1066              if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
    1067                  *next_idx_ptr = idx + 4;
    1068                  Py_RETURN_TRUE;
    1069              }
    1070              break;
    1071          case 'f':
    1072              /* false */
    1073              if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
    1074                  PyUnicode_READ(kind, str, idx + 2) == 'l' &&
    1075                  PyUnicode_READ(kind, str, idx + 3) == 's' &&
    1076                  PyUnicode_READ(kind, str, idx + 4) == 'e') {
    1077                  *next_idx_ptr = idx + 5;
    1078                  Py_RETURN_FALSE;
    1079              }
    1080              break;
    1081          case 'N':
    1082              /* NaN */
    1083              if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
    1084                  PyUnicode_READ(kind, str, idx + 2) == 'N') {
    1085                  return _parse_constant(s, "NaN", idx, next_idx_ptr);
    1086              }
    1087              break;
    1088          case 'I':
    1089              /* Infinity */
    1090              if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
    1091                  PyUnicode_READ(kind, str, idx + 2) == 'f' &&
    1092                  PyUnicode_READ(kind, str, idx + 3) == 'i' &&
    1093                  PyUnicode_READ(kind, str, idx + 4) == 'n' &&
    1094                  PyUnicode_READ(kind, str, idx + 5) == 'i' &&
    1095                  PyUnicode_READ(kind, str, idx + 6) == 't' &&
    1096                  PyUnicode_READ(kind, str, idx + 7) == 'y') {
    1097                  return _parse_constant(s, "Infinity", idx, next_idx_ptr);
    1098              }
    1099              break;
    1100          case '-':
    1101              /* -Infinity */
    1102              if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
    1103                  PyUnicode_READ(kind, str, idx + 2) == 'n' &&
    1104                  PyUnicode_READ(kind, str, idx + 3) == 'f' &&
    1105                  PyUnicode_READ(kind, str, idx + 4) == 'i' &&
    1106                  PyUnicode_READ(kind, str, idx + 5) == 'n' &&
    1107                  PyUnicode_READ(kind, str, idx + 6) == 'i' &&
    1108                  PyUnicode_READ(kind, str, idx + 7) == 't' &&
    1109                  PyUnicode_READ(kind, str, idx + 8) == 'y') {
    1110                  return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
    1111              }
    1112              break;
    1113      }
    1114      /* Didn't find a string, object, array, or named constant. Look for a number. */
    1115      return _match_number_unicode(s, pystr, idx, next_idx_ptr);
    1116  }
    1117  
    1118  static PyObject *
    1119  scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
    1120  {
    1121      /* Python callable interface to scan_once_{str,unicode} */
    1122      PyObject *pystr;
    1123      PyObject *rval;
    1124      Py_ssize_t idx;
    1125      Py_ssize_t next_idx = -1;
    1126      static char *kwlist[] = {"string", "idx", NULL};
    1127      if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
    1128          return NULL;
    1129  
    1130      if (PyUnicode_Check(pystr)) {
    1131          rval = scan_once_unicode(self, pystr, idx, &next_idx);
    1132      }
    1133      else {
    1134          PyErr_Format(PyExc_TypeError,
    1135                   "first argument must be a string, not %.80s",
    1136                   Py_TYPE(pystr)->tp_name);
    1137          return NULL;
    1138      }
    1139      PyDict_Clear(self->memo);
    1140      if (rval == NULL)
    1141          return NULL;
    1142      return _build_rval_index_tuple(rval, next_idx);
    1143  }
    1144  
    1145  static PyObject *
    1146  scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1147  {
    1148      PyScannerObject *s;
    1149      PyObject *ctx;
    1150      PyObject *strict;
    1151      static char *kwlist[] = {"context", NULL};
    1152  
    1153      if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
    1154          return NULL;
    1155  
    1156      s = (PyScannerObject *)type->tp_alloc(type, 0);
    1157      if (s == NULL) {
    1158          return NULL;
    1159      }
    1160  
    1161      s->memo = PyDict_New();
    1162      if (s->memo == NULL)
    1163          goto bail;
    1164  
    1165      /* All of these will fail "gracefully" so we don't need to verify them */
    1166      strict = PyObject_GetAttrString(ctx, "strict");
    1167      if (strict == NULL)
    1168          goto bail;
    1169      s->strict = PyObject_IsTrue(strict);
    1170      Py_DECREF(strict);
    1171      if (s->strict < 0)
    1172          goto bail;
    1173      s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
    1174      if (s->object_hook == NULL)
    1175          goto bail;
    1176      s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
    1177      if (s->object_pairs_hook == NULL)
    1178          goto bail;
    1179      s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
    1180      if (s->parse_float == NULL)
    1181          goto bail;
    1182      s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
    1183      if (s->parse_int == NULL)
    1184          goto bail;
    1185      s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
    1186      if (s->parse_constant == NULL)
    1187          goto bail;
    1188  
    1189      return (PyObject *)s;
    1190  
    1191  bail:
    1192      Py_DECREF(s);
    1193      return NULL;
    1194  }
    1195  
    1196  PyDoc_STRVAR(scanner_doc, "JSON scanner object");
    1197  
    1198  static PyType_Slot PyScannerType_slots[] = {
    1199      {Py_tp_doc, (void *)scanner_doc},
    1200      {Py_tp_dealloc, scanner_dealloc},
    1201      {Py_tp_call, scanner_call},
    1202      {Py_tp_traverse, scanner_traverse},
    1203      {Py_tp_clear, scanner_clear},
    1204      {Py_tp_members, scanner_members},
    1205      {Py_tp_new, scanner_new},
    1206      {0, 0}
    1207  };
    1208  
    1209  static PyType_Spec PyScannerType_spec = {
    1210      .name = "_json.Scanner",
    1211      .basicsize = sizeof(PyScannerObject),
    1212      .itemsize = 0,
    1213      .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
    1214      .slots = PyScannerType_slots,
    1215  };
    1216  
    1217  static PyObject *
    1218  encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1219  {
    1220      static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
    1221  
    1222      PyEncoderObject *s;
    1223      PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
    1224      PyObject *item_separator;
    1225      int sort_keys, skipkeys, allow_nan;
    1226  
    1227      if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
    1228          &markers, &defaultfn, &encoder, &indent,
    1229          &key_separator, &item_separator,
    1230          &sort_keys, &skipkeys, &allow_nan))
    1231          return NULL;
    1232  
    1233      if (markers != Py_None && !PyDict_Check(markers)) {
    1234          PyErr_Format(PyExc_TypeError,
    1235                       "make_encoder() argument 1 must be dict or None, "
    1236                       "not %.200s", Py_TYPE(markers)->tp_name);
    1237          return NULL;
    1238      }
    1239  
    1240      s = (PyEncoderObject *)type->tp_alloc(type, 0);
    1241      if (s == NULL)
    1242          return NULL;
    1243  
    1244      s->markers = Py_NewRef(markers);
    1245      s->defaultfn = Py_NewRef(defaultfn);
    1246      s->encoder = Py_NewRef(encoder);
    1247      s->indent = Py_NewRef(indent);
    1248      s->key_separator = Py_NewRef(key_separator);
    1249      s->item_separator = Py_NewRef(item_separator);
    1250      s->sort_keys = sort_keys;
    1251      s->skipkeys = skipkeys;
    1252      s->allow_nan = allow_nan;
    1253      s->fast_encode = NULL;
    1254  
    1255      if (PyCFunction_Check(s->encoder)) {
    1256          PyCFunction f = PyCFunction_GetFunction(s->encoder);
    1257          if (f == (PyCFunction)py_encode_basestring_ascii ||
    1258                  f == (PyCFunction)py_encode_basestring) {
    1259              s->fast_encode = f;
    1260          }
    1261      }
    1262  
    1263      return (PyObject *)s;
    1264  }
    1265  
    1266  static PyObject *
    1267  encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
    1268  {
    1269      /* Python callable interface to encode_listencode_obj */
    1270      static char *kwlist[] = {"obj", "_current_indent_level", NULL};
    1271      PyObject *obj, *result;
    1272      Py_ssize_t indent_level;
    1273      _PyUnicodeWriter writer;
    1274  
    1275      if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
    1276          &obj, &indent_level))
    1277          return NULL;
    1278  
    1279      _PyUnicodeWriter_Init(&writer);
    1280      writer.overallocate = 1;
    1281  
    1282      if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
    1283          _PyUnicodeWriter_Dealloc(&writer);
    1284          return NULL;
    1285      }
    1286  
    1287      result = PyTuple_New(1);
    1288      if (result == NULL ||
    1289              PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
    1290          Py_XDECREF(result);
    1291          return NULL;
    1292      }
    1293      return result;
    1294  }
    1295  
    1296  static PyObject *
    1297  _encoded_const(PyObject *obj)
    1298  {
    1299      /* Return the JSON string representation of None, True, False */
    1300      if (obj == Py_None) {
    1301          return Py_NewRef(&_Py_ID(null));
    1302      }
    1303      else if (obj == Py_True) {
    1304          return Py_NewRef(&_Py_ID(true));
    1305      }
    1306      else if (obj == Py_False) {
    1307          return Py_NewRef(&_Py_ID(false));
    1308      }
    1309      else {
    1310          PyErr_SetString(PyExc_ValueError, "not a const");
    1311          return NULL;
    1312      }
    1313  }
    1314  
    1315  static PyObject *
    1316  encoder_encode_float(PyEncoderObject *s, PyObject *obj)
    1317  {
    1318      /* Return the JSON representation of a PyFloat. */
    1319      double i = PyFloat_AS_DOUBLE(obj);
    1320      if (!Py_IS_FINITE(i)) {
    1321          if (!s->allow_nan) {
    1322              PyErr_Format(
    1323                      PyExc_ValueError,
    1324                      "Out of range float values are not JSON compliant: %R",
    1325                      obj
    1326                      );
    1327              return NULL;
    1328          }
    1329          if (i > 0) {
    1330              return PyUnicode_FromString("Infinity");
    1331          }
    1332          else if (i < 0) {
    1333              return PyUnicode_FromString("-Infinity");
    1334          }
    1335          else {
    1336              return PyUnicode_FromString("NaN");
    1337          }
    1338      }
    1339      return PyFloat_Type.tp_repr(obj);
    1340  }
    1341  
    1342  static PyObject *
    1343  encoder_encode_string(PyEncoderObject *s, PyObject *obj)
    1344  {
    1345      /* Return the JSON representation of a string */
    1346      PyObject *encoded;
    1347  
    1348      if (s->fast_encode) {
    1349          return s->fast_encode(NULL, obj);
    1350      }
    1351      encoded = PyObject_CallOneArg(s->encoder, obj);
    1352      if (encoded != NULL && !PyUnicode_Check(encoded)) {
    1353          PyErr_Format(PyExc_TypeError,
    1354                       "encoder() must return a string, not %.80s",
    1355                       Py_TYPE(encoded)->tp_name);
    1356          Py_DECREF(encoded);
    1357          return NULL;
    1358      }
    1359      return encoded;
    1360  }
    1361  
    1362  static int
    1363  _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
    1364  {
    1365      /* Append stolen and then decrement its reference count */
    1366      int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
    1367      Py_DECREF(stolen);
    1368      return rval;
    1369  }
    1370  
    1371  static int
    1372  encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
    1373                         PyObject *obj, Py_ssize_t indent_level)
    1374  {
    1375      /* Encode Python object obj to a JSON term */
    1376      PyObject *newobj;
    1377      int rv;
    1378  
    1379      if (obj == Py_None) {
    1380        return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
    1381      }
    1382      else if (obj == Py_True) {
    1383        return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
    1384      }
    1385      else if (obj == Py_False) {
    1386        return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
    1387      }
    1388      else if (PyUnicode_Check(obj)) {
    1389          PyObject *encoded = encoder_encode_string(s, obj);
    1390          if (encoded == NULL)
    1391              return -1;
    1392          return _steal_accumulate(writer, encoded);
    1393      }
    1394      else if (PyLong_Check(obj)) {
    1395          PyObject *encoded = PyLong_Type.tp_repr(obj);
    1396          if (encoded == NULL)
    1397              return -1;
    1398          return _steal_accumulate(writer, encoded);
    1399      }
    1400      else if (PyFloat_Check(obj)) {
    1401          PyObject *encoded = encoder_encode_float(s, obj);
    1402          if (encoded == NULL)
    1403              return -1;
    1404          return _steal_accumulate(writer, encoded);
    1405      }
    1406      else if (PyList_Check(obj) || PyTuple_Check(obj)) {
    1407          if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
    1408              return -1;
    1409          rv = encoder_listencode_list(s, writer, obj, indent_level);
    1410          _Py_LeaveRecursiveCall();
    1411          return rv;
    1412      }
    1413      else if (PyDict_Check(obj)) {
    1414          if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
    1415              return -1;
    1416          rv = encoder_listencode_dict(s, writer, obj, indent_level);
    1417          _Py_LeaveRecursiveCall();
    1418          return rv;
    1419      }
    1420      else {
    1421          PyObject *ident = NULL;
    1422          if (s->markers != Py_None) {
    1423              int has_key;
    1424              ident = PyLong_FromVoidPtr(obj);
    1425              if (ident == NULL)
    1426                  return -1;
    1427              has_key = PyDict_Contains(s->markers, ident);
    1428              if (has_key) {
    1429                  if (has_key != -1)
    1430                      PyErr_SetString(PyExc_ValueError, "Circular reference detected");
    1431                  Py_DECREF(ident);
    1432                  return -1;
    1433              }
    1434              if (PyDict_SetItem(s->markers, ident, obj)) {
    1435                  Py_DECREF(ident);
    1436                  return -1;
    1437              }
    1438          }
    1439          newobj = PyObject_CallOneArg(s->defaultfn, obj);
    1440          if (newobj == NULL) {
    1441              Py_XDECREF(ident);
    1442              return -1;
    1443          }
    1444  
    1445          if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
    1446              Py_DECREF(newobj);
    1447              Py_XDECREF(ident);
    1448              return -1;
    1449          }
    1450          rv = encoder_listencode_obj(s, writer, newobj, indent_level);
    1451          _Py_LeaveRecursiveCall();
    1452  
    1453          Py_DECREF(newobj);
    1454          if (rv) {
    1455              Py_XDECREF(ident);
    1456              return -1;
    1457          }
    1458          if (ident != NULL) {
    1459              if (PyDict_DelItem(s->markers, ident)) {
    1460                  Py_XDECREF(ident);
    1461                  return -1;
    1462              }
    1463              Py_XDECREF(ident);
    1464          }
    1465          return rv;
    1466      }
    1467  }
    1468  
    1469  static int
    1470  encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
    1471                           PyObject *key, PyObject *value, Py_ssize_t indent_level)
    1472  {
    1473      PyObject *keystr = NULL;
    1474      PyObject *encoded;
    1475  
    1476      if (PyUnicode_Check(key)) {
    1477          keystr = Py_NewRef(key);
    1478      }
    1479      else if (PyFloat_Check(key)) {
    1480          keystr = encoder_encode_float(s, key);
    1481      }
    1482      else if (key == Py_True || key == Py_False || key == Py_None) {
    1483                      /* This must come before the PyLong_Check because
    1484                         True and False are also 1 and 0.*/
    1485          keystr = _encoded_const(key);
    1486      }
    1487      else if (PyLong_Check(key)) {
    1488          keystr = PyLong_Type.tp_repr(key);
    1489      }
    1490      else if (s->skipkeys) {
    1491          return 0;
    1492      }
    1493      else {
    1494          PyErr_Format(PyExc_TypeError,
    1495                       "keys must be str, int, float, bool or None, "
    1496                       "not %.100s", Py_TYPE(key)->tp_name);
    1497          return -1;
    1498      }
    1499  
    1500      if (keystr == NULL) {
    1501          return -1;
    1502      }
    1503  
    1504      if (*first) {
    1505          *first = false;
    1506      }
    1507      else {
    1508          if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
    1509              Py_DECREF(keystr);
    1510              return -1;
    1511          }
    1512      }
    1513  
    1514      encoded = encoder_encode_string(s, keystr);
    1515      Py_DECREF(keystr);
    1516      if (encoded == NULL) {
    1517          return -1;
    1518      }
    1519  
    1520      if (_steal_accumulate(writer, encoded) < 0) {
    1521          return -1;
    1522      }
    1523      if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
    1524          return -1;
    1525      }
    1526      if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
    1527          return -1;
    1528      }
    1529      return 0;
    1530  }
    1531  
    1532  static int
    1533  encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
    1534                          PyObject *dct, Py_ssize_t indent_level)
    1535  {
    1536      /* Encode Python dict dct a JSON term */
    1537      PyObject *ident = NULL;
    1538      PyObject *items = NULL;
    1539      PyObject *key, *value;
    1540      bool first = true;
    1541  
    1542      if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
    1543          return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
    1544  
    1545      if (s->markers != Py_None) {
    1546          int has_key;
    1547          ident = PyLong_FromVoidPtr(dct);
    1548          if (ident == NULL)
    1549              goto bail;
    1550          has_key = PyDict_Contains(s->markers, ident);
    1551          if (has_key) {
    1552              if (has_key != -1)
    1553                  PyErr_SetString(PyExc_ValueError, "Circular reference detected");
    1554              goto bail;
    1555          }
    1556          if (PyDict_SetItem(s->markers, ident, dct)) {
    1557              goto bail;
    1558          }
    1559      }
    1560  
    1561      if (_PyUnicodeWriter_WriteChar(writer, '{'))
    1562          goto bail;
    1563  
    1564      if (s->indent != Py_None) {
    1565          /* TODO: DOES NOT RUN */
    1566          indent_level += 1;
    1567          /*
    1568              newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
    1569              separator = _item_separator + newline_indent
    1570              buf += newline_indent
    1571          */
    1572      }
    1573  
    1574      if (s->sort_keys || !PyDict_CheckExact(dct)) {
    1575          items = PyMapping_Items(dct);
    1576          if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
    1577              goto bail;
    1578  
    1579          for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
    1580              PyObject *item = PyList_GET_ITEM(items, i);
    1581  
    1582              if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
    1583                  PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
    1584                  goto bail;
    1585              }
    1586  
    1587              key = PyTuple_GET_ITEM(item, 0);
    1588              value = PyTuple_GET_ITEM(item, 1);
    1589              if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
    1590                  goto bail;
    1591          }
    1592          Py_CLEAR(items);
    1593  
    1594      } else {
    1595          Py_ssize_t pos = 0;
    1596          while (PyDict_Next(dct, &pos, &key, &value)) {
    1597              if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
    1598                  goto bail;
    1599          }
    1600      }
    1601  
    1602      if (ident != NULL) {
    1603          if (PyDict_DelItem(s->markers, ident))
    1604              goto bail;
    1605          Py_CLEAR(ident);
    1606      }
    1607      /* TODO DOES NOT RUN; dead code
    1608      if (s->indent != Py_None) {
    1609          indent_level -= 1;
    1610  
    1611          yield '\n' + (' ' * (_indent * _current_indent_level))
    1612      }*/
    1613      if (_PyUnicodeWriter_WriteChar(writer, '}'))
    1614          goto bail;
    1615      return 0;
    1616  
    1617  bail:
    1618      Py_XDECREF(items);
    1619      Py_XDECREF(ident);
    1620      return -1;
    1621  }
    1622  
    1623  static int
    1624  encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
    1625                          PyObject *seq, Py_ssize_t indent_level)
    1626  {
    1627      PyObject *ident = NULL;
    1628      PyObject *s_fast = NULL;
    1629      Py_ssize_t i;
    1630  
    1631      ident = NULL;
    1632      s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
    1633      if (s_fast == NULL)
    1634          return -1;
    1635      if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
    1636          Py_DECREF(s_fast);
    1637          return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
    1638      }
    1639  
    1640      if (s->markers != Py_None) {
    1641          int has_key;
    1642          ident = PyLong_FromVoidPtr(seq);
    1643          if (ident == NULL)
    1644              goto bail;
    1645          has_key = PyDict_Contains(s->markers, ident);
    1646          if (has_key) {
    1647              if (has_key != -1)
    1648                  PyErr_SetString(PyExc_ValueError, "Circular reference detected");
    1649              goto bail;
    1650          }
    1651          if (PyDict_SetItem(s->markers, ident, seq)) {
    1652              goto bail;
    1653          }
    1654      }
    1655  
    1656      if (_PyUnicodeWriter_WriteChar(writer, '['))
    1657          goto bail;
    1658      if (s->indent != Py_None) {
    1659          /* TODO: DOES NOT RUN */
    1660          indent_level += 1;
    1661          /*
    1662              newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
    1663              separator = _item_separator + newline_indent
    1664              buf += newline_indent
    1665          */
    1666      }
    1667      for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
    1668          PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
    1669          if (i) {
    1670              if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
    1671                  goto bail;
    1672          }
    1673          if (encoder_listencode_obj(s, writer, obj, indent_level))
    1674              goto bail;
    1675      }
    1676      if (ident != NULL) {
    1677          if (PyDict_DelItem(s->markers, ident))
    1678              goto bail;
    1679          Py_CLEAR(ident);
    1680      }
    1681  
    1682      /* TODO: DOES NOT RUN
    1683      if (s->indent != Py_None) {
    1684          indent_level -= 1;
    1685  
    1686          yield '\n' + (' ' * (_indent * _current_indent_level))
    1687      }*/
    1688      if (_PyUnicodeWriter_WriteChar(writer, ']'))
    1689          goto bail;
    1690      Py_DECREF(s_fast);
    1691      return 0;
    1692  
    1693  bail:
    1694      Py_XDECREF(ident);
    1695      Py_DECREF(s_fast);
    1696      return -1;
    1697  }
    1698  
    1699  static void
    1700  encoder_dealloc(PyObject *self)
    1701  {
    1702      PyTypeObject *tp = Py_TYPE(self);
    1703      /* bpo-31095: UnTrack is needed before calling any callbacks */
    1704      PyObject_GC_UnTrack(self);
    1705      encoder_clear((PyEncoderObject *)self);
    1706      tp->tp_free(self);
    1707      Py_DECREF(tp);
    1708  }
    1709  
    1710  static int
    1711  encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
    1712  {
    1713      Py_VISIT(Py_TYPE(self));
    1714      Py_VISIT(self->markers);
    1715      Py_VISIT(self->defaultfn);
    1716      Py_VISIT(self->encoder);
    1717      Py_VISIT(self->indent);
    1718      Py_VISIT(self->key_separator);
    1719      Py_VISIT(self->item_separator);
    1720      return 0;
    1721  }
    1722  
    1723  static int
    1724  encoder_clear(PyEncoderObject *self)
    1725  {
    1726      /* Deallocate Encoder */
    1727      Py_CLEAR(self->markers);
    1728      Py_CLEAR(self->defaultfn);
    1729      Py_CLEAR(self->encoder);
    1730      Py_CLEAR(self->indent);
    1731      Py_CLEAR(self->key_separator);
    1732      Py_CLEAR(self->item_separator);
    1733      return 0;
    1734  }
    1735  
    1736  PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
    1737  
    1738  static PyType_Slot PyEncoderType_slots[] = {
    1739      {Py_tp_doc, (void *)encoder_doc},
    1740      {Py_tp_dealloc, encoder_dealloc},
    1741      {Py_tp_call, encoder_call},
    1742      {Py_tp_traverse, encoder_traverse},
    1743      {Py_tp_clear, encoder_clear},
    1744      {Py_tp_members, encoder_members},
    1745      {Py_tp_new, encoder_new},
    1746      {0, 0}
    1747  };
    1748  
    1749  static PyType_Spec PyEncoderType_spec = {
    1750      .name = "_json.Encoder",
    1751      .basicsize = sizeof(PyEncoderObject),
    1752      .itemsize = 0,
    1753      .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
    1754      .slots = PyEncoderType_slots
    1755  };
    1756  
    1757  static PyMethodDef speedups_methods[] = {
    1758      {"encode_basestring_ascii",
    1759          (PyCFunction)py_encode_basestring_ascii,
    1760          METH_O,
    1761          pydoc_encode_basestring_ascii},
    1762      {"encode_basestring",
    1763          (PyCFunction)py_encode_basestring,
    1764          METH_O,
    1765          pydoc_encode_basestring},
    1766      {"scanstring",
    1767          (PyCFunction)py_scanstring,
    1768          METH_VARARGS,
    1769          pydoc_scanstring},
    1770      {NULL, NULL, 0, NULL}
    1771  };
    1772  
    1773  PyDoc_STRVAR(module_doc,
    1774  "json speedups\n");
    1775  
    1776  static int
    1777  _json_exec(PyObject *module)
    1778  {
    1779      PyObject *PyScannerType = PyType_FromSpec(&PyScannerType_spec);
    1780      if (PyScannerType == NULL) {
    1781          return -1;
    1782      }
    1783      int rc = PyModule_AddObjectRef(module, "make_scanner", PyScannerType);
    1784      Py_DECREF(PyScannerType);
    1785      if (rc < 0) {
    1786          return -1;
    1787      }
    1788  
    1789      PyObject *PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
    1790      if (PyEncoderType == NULL) {
    1791          return -1;
    1792      }
    1793      rc = PyModule_AddObjectRef(module, "make_encoder", PyEncoderType);
    1794      Py_DECREF(PyEncoderType);
    1795      if (rc < 0) {
    1796          return -1;
    1797      }
    1798  
    1799      return 0;
    1800  }
    1801  
    1802  static PyModuleDef_Slot _json_slots[] = {
    1803      {Py_mod_exec, _json_exec},
    1804      {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
    1805      {0, NULL}
    1806  };
    1807  
    1808  static struct PyModuleDef jsonmodule = {
    1809      .m_base = PyModuleDef_HEAD_INIT,
    1810      .m_name = "_json",
    1811      .m_doc = module_doc,
    1812      .m_methods = speedups_methods,
    1813      .m_slots = _json_slots,
    1814  };
    1815  
    1816  PyMODINIT_FUNC
    1817  PyInit__json(void)
    1818  {
    1819      return PyModuleDef_Init(&jsonmodule);
    1820  }