1 /* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10
11 #include "Python.h"
12 #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
13 #include "pycore_runtime.h" // _PyRuntime
14 #include "structmember.h" // PyMemberDef
15 #include "pycore_global_objects.h" // _Py_ID()
16 #include <stdbool.h> // bool
17
18
19 typedef struct _PyScannerObject {
20 PyObject_HEAD
21 signed char strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
27 PyObject *memo;
28 } PyScannerObject;
29
30 static PyMemberDef scanner_members[] = {
31 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38 };
39
40 typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 char sort_keys;
49 char skipkeys;
50 int allow_nan;
51 PyCFunction fast_encode;
52 } PyEncoderObject;
53
54 static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64 };
65
66 /* Forward decls */
67
68 static PyObject *
69 ascii_escape_unicode(PyObject *pystr);
70 static PyObject *
71 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
72 static PyObject *
73 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
74 static PyObject *
75 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
76 static PyObject *
77 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
78 static void
79 scanner_dealloc(PyObject *self);
80 static int
81 scanner_clear(PyScannerObject *self);
82 static PyObject *
83 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
84 static void
85 encoder_dealloc(PyObject *self);
86 static int
87 encoder_clear(PyEncoderObject *self);
88 static int
89 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
90 static int
91 encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
92 static int
93 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
94 static PyObject *
95 _encoded_const(PyObject *obj);
96 static void
97 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
98 static PyObject *
99 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
100 static PyObject *
101 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
102
103 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
104 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
105
106 static Py_ssize_t
107 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
108 {
109 /* Escape unicode code point c to ASCII escape sequences
110 in char *output. output must have at least 12 bytes unused to
111 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
112 output[chars++] = '\\';
113 switch (c) {
114 case '\\': output[chars++] = c; break;
115 case '"': output[chars++] = c; break;
116 case '\b': output[chars++] = 'b'; break;
117 case '\f': output[chars++] = 'f'; break;
118 case '\n': output[chars++] = 'n'; break;
119 case '\r': output[chars++] = 'r'; break;
120 case '\t': output[chars++] = 't'; break;
121 default:
122 if (c >= 0x10000) {
123 /* UTF-16 surrogate pair */
124 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
125 output[chars++] = 'u';
126 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
127 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
128 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
129 output[chars++] = Py_hexdigits[(v ) & 0xf];
130 c = Py_UNICODE_LOW_SURROGATE(c);
131 output[chars++] = '\\';
132 }
133 output[chars++] = 'u';
134 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
135 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
136 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
137 output[chars++] = Py_hexdigits[(c ) & 0xf];
138 }
139 return chars;
140 }
141
142 static PyObject *
143 ascii_escape_unicode(PyObject *pystr)
144 {
145 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
146 Py_ssize_t i;
147 Py_ssize_t input_chars;
148 Py_ssize_t output_size;
149 Py_ssize_t chars;
150 PyObject *rval;
151 const void *input;
152 Py_UCS1 *output;
153 int kind;
154
155 if (PyUnicode_READY(pystr) == -1)
156 return NULL;
157
158 input_chars = PyUnicode_GET_LENGTH(pystr);
159 input = PyUnicode_DATA(pystr);
160 kind = PyUnicode_KIND(pystr);
161
162 /* Compute the output size */
163 for (i = 0, output_size = 2; i < input_chars; i++) {
164 Py_UCS4 c = PyUnicode_READ(kind, input, i);
165 Py_ssize_t d;
166 if (S_CHAR(c)) {
167 d = 1;
168 }
169 else {
170 switch(c) {
171 case '\\': case '"': case '\b': case '\f':
172 case '\n': case '\r': case '\t':
173 d = 2; break;
174 default:
175 d = c >= 0x10000 ? 12 : 6;
176 }
177 }
178 if (output_size > PY_SSIZE_T_MAX - d) {
179 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
180 return NULL;
181 }
182 output_size += d;
183 }
184
185 rval = PyUnicode_New(output_size, 127);
186 if (rval == NULL) {
187 return NULL;
188 }
189 output = PyUnicode_1BYTE_DATA(rval);
190 chars = 0;
191 output[chars++] = '"';
192 for (i = 0; i < input_chars; i++) {
193 Py_UCS4 c = PyUnicode_READ(kind, input, i);
194 if (S_CHAR(c)) {
195 output[chars++] = c;
196 }
197 else {
198 chars = ascii_escape_unichar(c, output, chars);
199 }
200 }
201 output[chars++] = '"';
202 #ifdef Py_DEBUG
203 assert(_PyUnicode_CheckConsistency(rval, 1));
204 #endif
205 return rval;
206 }
207
208 static PyObject *
209 escape_unicode(PyObject *pystr)
210 {
211 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
212 Py_ssize_t i;
213 Py_ssize_t input_chars;
214 Py_ssize_t output_size;
215 Py_ssize_t chars;
216 PyObject *rval;
217 const void *input;
218 int kind;
219 Py_UCS4 maxchar;
220
221 if (PyUnicode_READY(pystr) == -1)
222 return NULL;
223
224 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
225 input_chars = PyUnicode_GET_LENGTH(pystr);
226 input = PyUnicode_DATA(pystr);
227 kind = PyUnicode_KIND(pystr);
228
229 /* Compute the output size */
230 for (i = 0, output_size = 2; i < input_chars; i++) {
231 Py_UCS4 c = PyUnicode_READ(kind, input, i);
232 Py_ssize_t d;
233 switch (c) {
234 case '\\': case '"': case '\b': case '\f':
235 case '\n': case '\r': case '\t':
236 d = 2;
237 break;
238 default:
239 if (c <= 0x1f)
240 d = 6;
241 else
242 d = 1;
243 }
244 if (output_size > PY_SSIZE_T_MAX - d) {
245 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
246 return NULL;
247 }
248 output_size += d;
249 }
250
251 rval = PyUnicode_New(output_size, maxchar);
252 if (rval == NULL)
253 return NULL;
254
255 kind = PyUnicode_KIND(rval);
256
257 #define ENCODE_OUTPUT do { \
258 chars = 0; \
259 output[chars++] = '"'; \
260 for (i = 0; i < input_chars; i++) { \
261 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
262 switch (c) { \
263 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
264 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
265 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
266 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
267 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
268 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
269 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
270 default: \
271 if (c <= 0x1f) { \
272 output[chars++] = '\\'; \
273 output[chars++] = 'u'; \
274 output[chars++] = '0'; \
275 output[chars++] = '0'; \
276 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
277 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
278 } else { \
279 output[chars++] = c; \
280 } \
281 } \
282 } \
283 output[chars++] = '"'; \
284 } while (0)
285
286 if (kind == PyUnicode_1BYTE_KIND) {
287 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
288 ENCODE_OUTPUT;
289 } else if (kind == PyUnicode_2BYTE_KIND) {
290 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
291 ENCODE_OUTPUT;
292 } else {
293 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
294 assert(kind == PyUnicode_4BYTE_KIND);
295 ENCODE_OUTPUT;
296 }
297 #undef ENCODE_OUTPUT
298
299 #ifdef Py_DEBUG
300 assert(_PyUnicode_CheckConsistency(rval, 1));
301 #endif
302 return rval;
303 }
304
305 static void
306 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
307 {
308 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
309 _Py_DECLARE_STR(json_decoder, "json.decoder");
310 PyObject *JSONDecodeError =
311 _PyImport_GetModuleAttr(&_Py_STR(json_decoder), &_Py_ID(JSONDecodeError));
312 if (JSONDecodeError == NULL) {
313 return;
314 }
315
316 PyObject *exc;
317 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
318 Py_DECREF(JSONDecodeError);
319 if (exc) {
320 PyErr_SetObject(JSONDecodeError, exc);
321 Py_DECREF(exc);
322 }
323 }
324
325 static void
326 raise_stop_iteration(Py_ssize_t idx)
327 {
328 PyObject *value = PyLong_FromSsize_t(idx);
329 if (value != NULL) {
330 PyErr_SetObject(PyExc_StopIteration, value);
331 Py_DECREF(value);
332 }
333 }
334
335 static PyObject *
336 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
337 /* return (rval, idx) tuple, stealing reference to rval */
338 PyObject *tpl;
339 PyObject *pyidx;
340 /*
341 steal a reference to rval, returns (rval, idx)
342 */
343 if (rval == NULL) {
344 return NULL;
345 }
346 pyidx = PyLong_FromSsize_t(idx);
347 if (pyidx == NULL) {
348 Py_DECREF(rval);
349 return NULL;
350 }
351 tpl = PyTuple_New(2);
352 if (tpl == NULL) {
353 Py_DECREF(pyidx);
354 Py_DECREF(rval);
355 return NULL;
356 }
357 PyTuple_SET_ITEM(tpl, 0, rval);
358 PyTuple_SET_ITEM(tpl, 1, pyidx);
359 return tpl;
360 }
361
362 static PyObject *
363 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
364 {
365 /* Read the JSON string from PyUnicode pystr.
366 end is the index of the first character after the quote.
367 if strict is zero then literal control characters are allowed
368 *next_end_ptr is a return-by-reference index of the character
369 after the end quote
370
371 Return value is a new PyUnicode
372 */
373 PyObject *rval = NULL;
374 Py_ssize_t len;
375 Py_ssize_t begin = end - 1;
376 Py_ssize_t next /* = begin */;
377 const void *buf;
378 int kind;
379
380 if (PyUnicode_READY(pystr) == -1)
381 return 0;
382
383 _PyUnicodeWriter writer;
384 _PyUnicodeWriter_Init(&writer);
385 writer.overallocate = 1;
386
387 len = PyUnicode_GET_LENGTH(pystr);
388 buf = PyUnicode_DATA(pystr);
389 kind = PyUnicode_KIND(pystr);
390
391 if (end < 0 || len < end) {
392 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
393 goto bail;
394 }
395 while (1) {
396 /* Find the end of the string or the next escape */
397 Py_UCS4 c;
398 {
399 // Use tight scope variable to help register allocation.
400 Py_UCS4 d = 0;
401 for (next = end; next < len; next++) {
402 d = PyUnicode_READ(kind, buf, next);
403 if (d == '"' || d == '\\') {
404 break;
405 }
406 if (d <= 0x1f && strict) {
407 raise_errmsg("Invalid control character at", pystr, next);
408 goto bail;
409 }
410 }
411 c = d;
412 }
413
414 if (c == '"') {
415 // Fast path for simple case.
416 if (writer.buffer == NULL) {
417 PyObject *ret = PyUnicode_Substring(pystr, end, next);
418 if (ret == NULL) {
419 goto bail;
420 }
421 *next_end_ptr = next + 1;;
422 return ret;
423 }
424 }
425 else if (c != '\\') {
426 raise_errmsg("Unterminated string starting at", pystr, begin);
427 goto bail;
428 }
429
430 /* Pick up this chunk if it's not zero length */
431 if (next != end) {
432 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
433 goto bail;
434 }
435 }
436 next++;
437 if (c == '"') {
438 end = next;
439 break;
440 }
441 if (next == len) {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
445 c = PyUnicode_READ(kind, buf, next);
446 if (c != 'u') {
447 /* Non-unicode backslash escapes */
448 end = next + 1;
449 switch (c) {
450 case '"': break;
451 case '\\': break;
452 case '/': break;
453 case 'b': c = '\b'; break;
454 case 'f': c = '\f'; break;
455 case 'n': c = '\n'; break;
456 case 'r': c = '\r'; break;
457 case 't': c = '\t'; break;
458 default: c = 0;
459 }
460 if (c == 0) {
461 raise_errmsg("Invalid \\escape", pystr, end - 2);
462 goto bail;
463 }
464 }
465 else {
466 c = 0;
467 next++;
468 end = next + 4;
469 if (end >= len) {
470 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
471 goto bail;
472 }
473 /* Decode 4 hex digits */
474 for (; next < end; next++) {
475 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
476 c <<= 4;
477 switch (digit) {
478 case '0': case '1': case '2': case '3': case '4':
479 case '5': case '6': case '7': case '8': case '9':
480 c |= (digit - '0'); break;
481 case 'a': case 'b': case 'c': case 'd': case 'e':
482 case 'f':
483 c |= (digit - 'a' + 10); break;
484 case 'A': case 'B': case 'C': case 'D': case 'E':
485 case 'F':
486 c |= (digit - 'A' + 10); break;
487 default:
488 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
489 goto bail;
490 }
491 }
492 /* Surrogate pair */
493 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
494 PyUnicode_READ(kind, buf, next++) == '\\' &&
495 PyUnicode_READ(kind, buf, next++) == 'u') {
496 Py_UCS4 c2 = 0;
497 end += 6;
498 /* Decode 4 hex digits */
499 for (; next < end; next++) {
500 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
501 c2 <<= 4;
502 switch (digit) {
503 case '0': case '1': case '2': case '3': case '4':
504 case '5': case '6': case '7': case '8': case '9':
505 c2 |= (digit - '0'); break;
506 case 'a': case 'b': case 'c': case 'd': case 'e':
507 case 'f':
508 c2 |= (digit - 'a' + 10); break;
509 case 'A': case 'B': case 'C': case 'D': case 'E':
510 case 'F':
511 c2 |= (digit - 'A' + 10); break;
512 default:
513 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
514 goto bail;
515 }
516 }
517 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
518 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
519 else
520 end -= 6;
521 }
522 }
523 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
524 goto bail;
525 }
526 }
527
528 rval = _PyUnicodeWriter_Finish(&writer);
529 *next_end_ptr = end;
530 return rval;
531
532 bail:
533 *next_end_ptr = -1;
534 _PyUnicodeWriter_Dealloc(&writer);
535 return NULL;
536 }
537
538 PyDoc_STRVAR(pydoc_scanstring,
539 "scanstring(string, end, strict=True) -> (string, end)\n"
540 "\n"
541 "Scan the string s for a JSON string. End is the index of the\n"
542 "character in s after the quote that started the JSON string.\n"
543 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
544 "on attempt to decode an invalid string. If strict is False then literal\n"
545 "control characters are allowed in the string.\n"
546 "\n"
547 "Returns a tuple of the decoded string and the index of the character in s\n"
548 "after the end quote."
549 );
550
551 static PyObject *
552 py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
553 {
554 PyObject *pystr;
555 PyObject *rval;
556 Py_ssize_t end;
557 Py_ssize_t next_end = -1;
558 int strict = 1;
559 if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) {
560 return NULL;
561 }
562 if (PyUnicode_Check(pystr)) {
563 rval = scanstring_unicode(pystr, end, strict, &next_end);
564 }
565 else {
566 PyErr_Format(PyExc_TypeError,
567 "first argument must be a string, not %.80s",
568 Py_TYPE(pystr)->tp_name);
569 return NULL;
570 }
571 return _build_rval_index_tuple(rval, next_end);
572 }
573
574 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
575 "encode_basestring_ascii(string) -> string\n"
576 "\n"
577 "Return an ASCII-only JSON representation of a Python string"
578 );
579
580 static PyObject *
581 py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
582 {
583 PyObject *rval;
584 /* Return an ASCII-only JSON representation of a Python string */
585 /* METH_O */
586 if (PyUnicode_Check(pystr)) {
587 rval = ascii_escape_unicode(pystr);
588 }
589 else {
590 PyErr_Format(PyExc_TypeError,
591 "first argument must be a string, not %.80s",
592 Py_TYPE(pystr)->tp_name);
593 return NULL;
594 }
595 return rval;
596 }
597
598
599 PyDoc_STRVAR(pydoc_encode_basestring,
600 "encode_basestring(string) -> string\n"
601 "\n"
602 "Return a JSON representation of a Python string"
603 );
604
605 static PyObject *
606 py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
607 {
608 PyObject *rval;
609 /* Return a JSON representation of a Python string */
610 /* METH_O */
611 if (PyUnicode_Check(pystr)) {
612 rval = escape_unicode(pystr);
613 }
614 else {
615 PyErr_Format(PyExc_TypeError,
616 "first argument must be a string, not %.80s",
617 Py_TYPE(pystr)->tp_name);
618 return NULL;
619 }
620 return rval;
621 }
622
623 static void
624 scanner_dealloc(PyObject *self)
625 {
626 PyTypeObject *tp = Py_TYPE(self);
627 /* bpo-31095: UnTrack is needed before calling any callbacks */
628 PyObject_GC_UnTrack(self);
629 scanner_clear((PyScannerObject *)self);
630 tp->tp_free(self);
631 Py_DECREF(tp);
632 }
633
634 static int
635 scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
636 {
637 Py_VISIT(Py_TYPE(self));
638 Py_VISIT(self->object_hook);
639 Py_VISIT(self->object_pairs_hook);
640 Py_VISIT(self->parse_float);
641 Py_VISIT(self->parse_int);
642 Py_VISIT(self->parse_constant);
643 Py_VISIT(self->memo);
644 return 0;
645 }
646
647 static int
648 scanner_clear(PyScannerObject *self)
649 {
650 Py_CLEAR(self->object_hook);
651 Py_CLEAR(self->object_pairs_hook);
652 Py_CLEAR(self->parse_float);
653 Py_CLEAR(self->parse_int);
654 Py_CLEAR(self->parse_constant);
655 Py_CLEAR(self->memo);
656 return 0;
657 }
658
659 static PyObject *
660 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
661 {
662 /* Read a JSON object from PyUnicode pystr.
663 idx is the index of the first character after the opening curly brace.
664 *next_idx_ptr is a return-by-reference index to the first character after
665 the closing curly brace.
666
667 Returns a new PyObject (usually a dict, but object_hook can change that)
668 */
669 const void *str;
670 int kind;
671 Py_ssize_t end_idx;
672 PyObject *val = NULL;
673 PyObject *rval = NULL;
674 PyObject *key = NULL;
675 int has_pairs_hook = (s->object_pairs_hook != Py_None);
676 Py_ssize_t next_idx;
677
678 if (PyUnicode_READY(pystr) == -1)
679 return NULL;
680
681 str = PyUnicode_DATA(pystr);
682 kind = PyUnicode_KIND(pystr);
683 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
684
685 if (has_pairs_hook)
686 rval = PyList_New(0);
687 else
688 rval = PyDict_New();
689 if (rval == NULL)
690 return NULL;
691
692 /* skip whitespace after { */
693 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
694
695 /* only loop if the object is non-empty */
696 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
697 while (1) {
698 PyObject *memokey;
699
700 /* read key */
701 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
702 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
703 goto bail;
704 }
705 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
706 if (key == NULL)
707 goto bail;
708 memokey = PyDict_SetDefault(s->memo, key, key);
709 if (memokey == NULL) {
710 goto bail;
711 }
712 Py_SETREF(key, Py_NewRef(memokey));
713 idx = next_idx;
714
715 /* skip whitespace between key and : delimiter, read :, skip whitespace */
716 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
717 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
718 raise_errmsg("Expecting ':' delimiter", pystr, idx);
719 goto bail;
720 }
721 idx++;
722 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
723
724 /* read any JSON term */
725 val = scan_once_unicode(s, pystr, idx, &next_idx);
726 if (val == NULL)
727 goto bail;
728
729 if (has_pairs_hook) {
730 PyObject *item = PyTuple_Pack(2, key, val);
731 if (item == NULL)
732 goto bail;
733 Py_CLEAR(key);
734 Py_CLEAR(val);
735 if (PyList_Append(rval, item) == -1) {
736 Py_DECREF(item);
737 goto bail;
738 }
739 Py_DECREF(item);
740 }
741 else {
742 if (PyDict_SetItem(rval, key, val) < 0)
743 goto bail;
744 Py_CLEAR(key);
745 Py_CLEAR(val);
746 }
747 idx = next_idx;
748
749 /* skip whitespace before } or , */
750 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
751
752 /* bail if the object is closed or we didn't get the , delimiter */
753 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
754 break;
755 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
756 raise_errmsg("Expecting ',' delimiter", pystr, idx);
757 goto bail;
758 }
759 idx++;
760
761 /* skip whitespace after , delimiter */
762 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
763 }
764 }
765
766 *next_idx_ptr = idx + 1;
767
768 if (has_pairs_hook) {
769 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
770 Py_DECREF(rval);
771 return val;
772 }
773
774 /* if object_hook is not None: rval = object_hook(rval) */
775 if (s->object_hook != Py_None) {
776 val = PyObject_CallOneArg(s->object_hook, rval);
777 Py_DECREF(rval);
778 return val;
779 }
780 return rval;
781 bail:
782 Py_XDECREF(key);
783 Py_XDECREF(val);
784 Py_XDECREF(rval);
785 return NULL;
786 }
787
788 static PyObject *
789 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
790 /* Read a JSON array from PyUnicode pystr.
791 idx is the index of the first character after the opening brace.
792 *next_idx_ptr is a return-by-reference index to the first character after
793 the closing brace.
794
795 Returns a new PyList
796 */
797 const void *str;
798 int kind;
799 Py_ssize_t end_idx;
800 PyObject *val = NULL;
801 PyObject *rval;
802 Py_ssize_t next_idx;
803
804 if (PyUnicode_READY(pystr) == -1)
805 return NULL;
806
807 rval = PyList_New(0);
808 if (rval == NULL)
809 return NULL;
810
811 str = PyUnicode_DATA(pystr);
812 kind = PyUnicode_KIND(pystr);
813 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
814
815 /* skip whitespace after [ */
816 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
817
818 /* only loop if the array is non-empty */
819 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
820 while (1) {
821
822 /* read any JSON term */
823 val = scan_once_unicode(s, pystr, idx, &next_idx);
824 if (val == NULL)
825 goto bail;
826
827 if (PyList_Append(rval, val) == -1)
828 goto bail;
829
830 Py_CLEAR(val);
831 idx = next_idx;
832
833 /* skip whitespace between term and , */
834 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
835
836 /* bail if the array is closed or we didn't get the , delimiter */
837 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
838 break;
839 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
840 raise_errmsg("Expecting ',' delimiter", pystr, idx);
841 goto bail;
842 }
843 idx++;
844
845 /* skip whitespace after , */
846 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
847 }
848 }
849
850 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
851 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
852 raise_errmsg("Expecting value", pystr, end_idx);
853 goto bail;
854 }
855 *next_idx_ptr = idx + 1;
856 return rval;
857 bail:
858 Py_XDECREF(val);
859 Py_DECREF(rval);
860 return NULL;
861 }
862
863 static PyObject *
864 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
865 /* Read a JSON constant.
866 constant is the constant string that was found
867 ("NaN", "Infinity", "-Infinity").
868 idx is the index of the first character of the constant
869 *next_idx_ptr is a return-by-reference index to the first character after
870 the constant.
871
872 Returns the result of parse_constant
873 */
874 PyObject *cstr;
875 PyObject *rval;
876 /* constant is "NaN", "Infinity", or "-Infinity" */
877 cstr = PyUnicode_InternFromString(constant);
878 if (cstr == NULL)
879 return NULL;
880
881 /* rval = parse_constant(constant) */
882 rval = PyObject_CallOneArg(s->parse_constant, cstr);
883 idx += PyUnicode_GET_LENGTH(cstr);
884 Py_DECREF(cstr);
885 *next_idx_ptr = idx;
886 return rval;
887 }
888
889 static PyObject *
890 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
891 /* Read a JSON number from PyUnicode pystr.
892 idx is the index of the first character of the number
893 *next_idx_ptr is a return-by-reference index to the first character after
894 the number.
895
896 Returns a new PyObject representation of that number:
897 PyLong, or PyFloat.
898 May return other types if parse_int or parse_float are set
899 */
900 const void *str;
901 int kind;
902 Py_ssize_t end_idx;
903 Py_ssize_t idx = start;
904 int is_float = 0;
905 PyObject *rval;
906 PyObject *numstr = NULL;
907 PyObject *custom_func;
908
909 if (PyUnicode_READY(pystr) == -1)
910 return NULL;
911
912 str = PyUnicode_DATA(pystr);
913 kind = PyUnicode_KIND(pystr);
914 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
915
916 /* read a sign if it's there, make sure it's not the end of the string */
917 if (PyUnicode_READ(kind, str, idx) == '-') {
918 idx++;
919 if (idx > end_idx) {
920 raise_stop_iteration(start);
921 return NULL;
922 }
923 }
924
925 /* read as many integer digits as we find as long as it doesn't start with 0 */
926 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
927 idx++;
928 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
929 }
930 /* if it starts with 0 we only expect one integer digit */
931 else if (PyUnicode_READ(kind, str, idx) == '0') {
932 idx++;
933 }
934 /* no integer digits, error */
935 else {
936 raise_stop_iteration(start);
937 return NULL;
938 }
939
940 /* if the next char is '.' followed by a digit then read all float digits */
941 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
942 is_float = 1;
943 idx += 2;
944 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
945 }
946
947 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
948 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
949 Py_ssize_t e_start = idx;
950 idx++;
951
952 /* read an exponent sign if present */
953 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
954
955 /* read all digits */
956 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
957
958 /* if we got a digit, then parse as float. if not, backtrack */
959 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
960 is_float = 1;
961 }
962 else {
963 idx = e_start;
964 }
965 }
966
967 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
968 custom_func = s->parse_float;
969 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
970 custom_func = s->parse_int;
971 else
972 custom_func = NULL;
973
974 if (custom_func) {
975 /* copy the section we determined to be a number */
976 numstr = PyUnicode_FromKindAndData(kind,
977 (char*)str + kind * start,
978 idx - start);
979 if (numstr == NULL)
980 return NULL;
981 rval = PyObject_CallOneArg(custom_func, numstr);
982 }
983 else {
984 Py_ssize_t i, n;
985 char *buf;
986 /* Straight conversion to ASCII, to avoid costly conversion of
987 decimal unicode digits (which cannot appear here) */
988 n = idx - start;
989 numstr = PyBytes_FromStringAndSize(NULL, n);
990 if (numstr == NULL)
991 return NULL;
992 buf = PyBytes_AS_STRING(numstr);
993 for (i = 0; i < n; i++) {
994 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
995 }
996 if (is_float)
997 rval = PyFloat_FromString(numstr);
998 else
999 rval = PyLong_FromString(buf, NULL, 10);
1000 }
1001 Py_DECREF(numstr);
1002 *next_idx_ptr = idx;
1003 return rval;
1004 }
1005
1006 static PyObject *
1007 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1008 {
1009 /* Read one JSON term (of any kind) from PyUnicode pystr.
1010 idx is the index of the first character of the term
1011 *next_idx_ptr is a return-by-reference index to the first character after
1012 the number.
1013
1014 Returns a new PyObject representation of the term.
1015 */
1016 PyObject *res;
1017 const void *str;
1018 int kind;
1019 Py_ssize_t length;
1020
1021 if (PyUnicode_READY(pystr) == -1)
1022 return NULL;
1023
1024 str = PyUnicode_DATA(pystr);
1025 kind = PyUnicode_KIND(pystr);
1026 length = PyUnicode_GET_LENGTH(pystr);
1027
1028 if (idx < 0) {
1029 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1030 return NULL;
1031 }
1032 if (idx >= length) {
1033 raise_stop_iteration(idx);
1034 return NULL;
1035 }
1036
1037 switch (PyUnicode_READ(kind, str, idx)) {
1038 case '"':
1039 /* string */
1040 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1041 case '{':
1042 /* object */
1043 if (_Py_EnterRecursiveCall(" while decoding a JSON object "
1044 "from a unicode string"))
1045 return NULL;
1046 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1047 _Py_LeaveRecursiveCall();
1048 return res;
1049 case '[':
1050 /* array */
1051 if (_Py_EnterRecursiveCall(" while decoding a JSON array "
1052 "from a unicode string"))
1053 return NULL;
1054 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1055 _Py_LeaveRecursiveCall();
1056 return res;
1057 case 'n':
1058 /* null */
1059 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1060 *next_idx_ptr = idx + 4;
1061 Py_RETURN_NONE;
1062 }
1063 break;
1064 case 't':
1065 /* true */
1066 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1067 *next_idx_ptr = idx + 4;
1068 Py_RETURN_TRUE;
1069 }
1070 break;
1071 case 'f':
1072 /* false */
1073 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1074 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1075 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1076 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1077 *next_idx_ptr = idx + 5;
1078 Py_RETURN_FALSE;
1079 }
1080 break;
1081 case 'N':
1082 /* NaN */
1083 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1084 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1085 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1086 }
1087 break;
1088 case 'I':
1089 /* Infinity */
1090 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1091 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1092 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1093 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1094 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1095 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1096 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1097 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1098 }
1099 break;
1100 case '-':
1101 /* -Infinity */
1102 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1103 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1104 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1105 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1106 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1107 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1108 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1109 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1110 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1111 }
1112 break;
1113 }
1114 /* Didn't find a string, object, array, or named constant. Look for a number. */
1115 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1116 }
1117
1118 static PyObject *
1119 scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1120 {
1121 /* Python callable interface to scan_once_{str,unicode} */
1122 PyObject *pystr;
1123 PyObject *rval;
1124 Py_ssize_t idx;
1125 Py_ssize_t next_idx = -1;
1126 static char *kwlist[] = {"string", "idx", NULL};
1127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1128 return NULL;
1129
1130 if (PyUnicode_Check(pystr)) {
1131 rval = scan_once_unicode(self, pystr, idx, &next_idx);
1132 }
1133 else {
1134 PyErr_Format(PyExc_TypeError,
1135 "first argument must be a string, not %.80s",
1136 Py_TYPE(pystr)->tp_name);
1137 return NULL;
1138 }
1139 PyDict_Clear(self->memo);
1140 if (rval == NULL)
1141 return NULL;
1142 return _build_rval_index_tuple(rval, next_idx);
1143 }
1144
1145 static PyObject *
1146 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1147 {
1148 PyScannerObject *s;
1149 PyObject *ctx;
1150 PyObject *strict;
1151 static char *kwlist[] = {"context", NULL};
1152
1153 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1154 return NULL;
1155
1156 s = (PyScannerObject *)type->tp_alloc(type, 0);
1157 if (s == NULL) {
1158 return NULL;
1159 }
1160
1161 s->memo = PyDict_New();
1162 if (s->memo == NULL)
1163 goto bail;
1164
1165 /* All of these will fail "gracefully" so we don't need to verify them */
1166 strict = PyObject_GetAttrString(ctx, "strict");
1167 if (strict == NULL)
1168 goto bail;
1169 s->strict = PyObject_IsTrue(strict);
1170 Py_DECREF(strict);
1171 if (s->strict < 0)
1172 goto bail;
1173 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1174 if (s->object_hook == NULL)
1175 goto bail;
1176 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1177 if (s->object_pairs_hook == NULL)
1178 goto bail;
1179 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1180 if (s->parse_float == NULL)
1181 goto bail;
1182 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1183 if (s->parse_int == NULL)
1184 goto bail;
1185 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1186 if (s->parse_constant == NULL)
1187 goto bail;
1188
1189 return (PyObject *)s;
1190
1191 bail:
1192 Py_DECREF(s);
1193 return NULL;
1194 }
1195
1196 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1197
1198 static PyType_Slot PyScannerType_slots[] = {
1199 {Py_tp_doc, (void *)scanner_doc},
1200 {Py_tp_dealloc, scanner_dealloc},
1201 {Py_tp_call, scanner_call},
1202 {Py_tp_traverse, scanner_traverse},
1203 {Py_tp_clear, scanner_clear},
1204 {Py_tp_members, scanner_members},
1205 {Py_tp_new, scanner_new},
1206 {0, 0}
1207 };
1208
1209 static PyType_Spec PyScannerType_spec = {
1210 .name = "_json.Scanner",
1211 .basicsize = sizeof(PyScannerObject),
1212 .itemsize = 0,
1213 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1214 .slots = PyScannerType_slots,
1215 };
1216
1217 static PyObject *
1218 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1219 {
1220 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1221
1222 PyEncoderObject *s;
1223 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1224 PyObject *item_separator;
1225 int sort_keys, skipkeys, allow_nan;
1226
1227 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1228 &markers, &defaultfn, &encoder, &indent,
1229 &key_separator, &item_separator,
1230 &sort_keys, &skipkeys, &allow_nan))
1231 return NULL;
1232
1233 if (markers != Py_None && !PyDict_Check(markers)) {
1234 PyErr_Format(PyExc_TypeError,
1235 "make_encoder() argument 1 must be dict or None, "
1236 "not %.200s", Py_TYPE(markers)->tp_name);
1237 return NULL;
1238 }
1239
1240 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1241 if (s == NULL)
1242 return NULL;
1243
1244 s->markers = Py_NewRef(markers);
1245 s->defaultfn = Py_NewRef(defaultfn);
1246 s->encoder = Py_NewRef(encoder);
1247 s->indent = Py_NewRef(indent);
1248 s->key_separator = Py_NewRef(key_separator);
1249 s->item_separator = Py_NewRef(item_separator);
1250 s->sort_keys = sort_keys;
1251 s->skipkeys = skipkeys;
1252 s->allow_nan = allow_nan;
1253 s->fast_encode = NULL;
1254
1255 if (PyCFunction_Check(s->encoder)) {
1256 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1257 if (f == (PyCFunction)py_encode_basestring_ascii ||
1258 f == (PyCFunction)py_encode_basestring) {
1259 s->fast_encode = f;
1260 }
1261 }
1262
1263 return (PyObject *)s;
1264 }
1265
1266 static PyObject *
1267 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1268 {
1269 /* Python callable interface to encode_listencode_obj */
1270 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1271 PyObject *obj, *result;
1272 Py_ssize_t indent_level;
1273 _PyUnicodeWriter writer;
1274
1275 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1276 &obj, &indent_level))
1277 return NULL;
1278
1279 _PyUnicodeWriter_Init(&writer);
1280 writer.overallocate = 1;
1281
1282 if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
1283 _PyUnicodeWriter_Dealloc(&writer);
1284 return NULL;
1285 }
1286
1287 result = PyTuple_New(1);
1288 if (result == NULL ||
1289 PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
1290 Py_XDECREF(result);
1291 return NULL;
1292 }
1293 return result;
1294 }
1295
1296 static PyObject *
1297 _encoded_const(PyObject *obj)
1298 {
1299 /* Return the JSON string representation of None, True, False */
1300 if (obj == Py_None) {
1301 return Py_NewRef(&_Py_ID(null));
1302 }
1303 else if (obj == Py_True) {
1304 return Py_NewRef(&_Py_ID(true));
1305 }
1306 else if (obj == Py_False) {
1307 return Py_NewRef(&_Py_ID(false));
1308 }
1309 else {
1310 PyErr_SetString(PyExc_ValueError, "not a const");
1311 return NULL;
1312 }
1313 }
1314
1315 static PyObject *
1316 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1317 {
1318 /* Return the JSON representation of a PyFloat. */
1319 double i = PyFloat_AS_DOUBLE(obj);
1320 if (!Py_IS_FINITE(i)) {
1321 if (!s->allow_nan) {
1322 PyErr_Format(
1323 PyExc_ValueError,
1324 "Out of range float values are not JSON compliant: %R",
1325 obj
1326 );
1327 return NULL;
1328 }
1329 if (i > 0) {
1330 return PyUnicode_FromString("Infinity");
1331 }
1332 else if (i < 0) {
1333 return PyUnicode_FromString("-Infinity");
1334 }
1335 else {
1336 return PyUnicode_FromString("NaN");
1337 }
1338 }
1339 return PyFloat_Type.tp_repr(obj);
1340 }
1341
1342 static PyObject *
1343 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1344 {
1345 /* Return the JSON representation of a string */
1346 PyObject *encoded;
1347
1348 if (s->fast_encode) {
1349 return s->fast_encode(NULL, obj);
1350 }
1351 encoded = PyObject_CallOneArg(s->encoder, obj);
1352 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1353 PyErr_Format(PyExc_TypeError,
1354 "encoder() must return a string, not %.80s",
1355 Py_TYPE(encoded)->tp_name);
1356 Py_DECREF(encoded);
1357 return NULL;
1358 }
1359 return encoded;
1360 }
1361
1362 static int
1363 _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
1364 {
1365 /* Append stolen and then decrement its reference count */
1366 int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
1367 Py_DECREF(stolen);
1368 return rval;
1369 }
1370
1371 static int
1372 encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1373 PyObject *obj, Py_ssize_t indent_level)
1374 {
1375 /* Encode Python object obj to a JSON term */
1376 PyObject *newobj;
1377 int rv;
1378
1379 if (obj == Py_None) {
1380 return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
1381 }
1382 else if (obj == Py_True) {
1383 return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
1384 }
1385 else if (obj == Py_False) {
1386 return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
1387 }
1388 else if (PyUnicode_Check(obj)) {
1389 PyObject *encoded = encoder_encode_string(s, obj);
1390 if (encoded == NULL)
1391 return -1;
1392 return _steal_accumulate(writer, encoded);
1393 }
1394 else if (PyLong_Check(obj)) {
1395 PyObject *encoded = PyLong_Type.tp_repr(obj);
1396 if (encoded == NULL)
1397 return -1;
1398 return _steal_accumulate(writer, encoded);
1399 }
1400 else if (PyFloat_Check(obj)) {
1401 PyObject *encoded = encoder_encode_float(s, obj);
1402 if (encoded == NULL)
1403 return -1;
1404 return _steal_accumulate(writer, encoded);
1405 }
1406 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1407 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1408 return -1;
1409 rv = encoder_listencode_list(s, writer, obj, indent_level);
1410 _Py_LeaveRecursiveCall();
1411 return rv;
1412 }
1413 else if (PyDict_Check(obj)) {
1414 if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1415 return -1;
1416 rv = encoder_listencode_dict(s, writer, obj, indent_level);
1417 _Py_LeaveRecursiveCall();
1418 return rv;
1419 }
1420 else {
1421 PyObject *ident = NULL;
1422 if (s->markers != Py_None) {
1423 int has_key;
1424 ident = PyLong_FromVoidPtr(obj);
1425 if (ident == NULL)
1426 return -1;
1427 has_key = PyDict_Contains(s->markers, ident);
1428 if (has_key) {
1429 if (has_key != -1)
1430 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1431 Py_DECREF(ident);
1432 return -1;
1433 }
1434 if (PyDict_SetItem(s->markers, ident, obj)) {
1435 Py_DECREF(ident);
1436 return -1;
1437 }
1438 }
1439 newobj = PyObject_CallOneArg(s->defaultfn, obj);
1440 if (newobj == NULL) {
1441 Py_XDECREF(ident);
1442 return -1;
1443 }
1444
1445 if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
1446 Py_DECREF(newobj);
1447 Py_XDECREF(ident);
1448 return -1;
1449 }
1450 rv = encoder_listencode_obj(s, writer, newobj, indent_level);
1451 _Py_LeaveRecursiveCall();
1452
1453 Py_DECREF(newobj);
1454 if (rv) {
1455 Py_XDECREF(ident);
1456 return -1;
1457 }
1458 if (ident != NULL) {
1459 if (PyDict_DelItem(s->markers, ident)) {
1460 Py_XDECREF(ident);
1461 return -1;
1462 }
1463 Py_XDECREF(ident);
1464 }
1465 return rv;
1466 }
1467 }
1468
1469 static int
1470 encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
1471 PyObject *key, PyObject *value, Py_ssize_t indent_level)
1472 {
1473 PyObject *keystr = NULL;
1474 PyObject *encoded;
1475
1476 if (PyUnicode_Check(key)) {
1477 keystr = Py_NewRef(key);
1478 }
1479 else if (PyFloat_Check(key)) {
1480 keystr = encoder_encode_float(s, key);
1481 }
1482 else if (key == Py_True || key == Py_False || key == Py_None) {
1483 /* This must come before the PyLong_Check because
1484 True and False are also 1 and 0.*/
1485 keystr = _encoded_const(key);
1486 }
1487 else if (PyLong_Check(key)) {
1488 keystr = PyLong_Type.tp_repr(key);
1489 }
1490 else if (s->skipkeys) {
1491 return 0;
1492 }
1493 else {
1494 PyErr_Format(PyExc_TypeError,
1495 "keys must be str, int, float, bool or None, "
1496 "not %.100s", Py_TYPE(key)->tp_name);
1497 return -1;
1498 }
1499
1500 if (keystr == NULL) {
1501 return -1;
1502 }
1503
1504 if (*first) {
1505 *first = false;
1506 }
1507 else {
1508 if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
1509 Py_DECREF(keystr);
1510 return -1;
1511 }
1512 }
1513
1514 encoded = encoder_encode_string(s, keystr);
1515 Py_DECREF(keystr);
1516 if (encoded == NULL) {
1517 return -1;
1518 }
1519
1520 if (_steal_accumulate(writer, encoded) < 0) {
1521 return -1;
1522 }
1523 if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
1524 return -1;
1525 }
1526 if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
1527 return -1;
1528 }
1529 return 0;
1530 }
1531
1532 static int
1533 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1534 PyObject *dct, Py_ssize_t indent_level)
1535 {
1536 /* Encode Python dict dct a JSON term */
1537 PyObject *ident = NULL;
1538 PyObject *items = NULL;
1539 PyObject *key, *value;
1540 bool first = true;
1541
1542 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1543 return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
1544
1545 if (s->markers != Py_None) {
1546 int has_key;
1547 ident = PyLong_FromVoidPtr(dct);
1548 if (ident == NULL)
1549 goto bail;
1550 has_key = PyDict_Contains(s->markers, ident);
1551 if (has_key) {
1552 if (has_key != -1)
1553 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1554 goto bail;
1555 }
1556 if (PyDict_SetItem(s->markers, ident, dct)) {
1557 goto bail;
1558 }
1559 }
1560
1561 if (_PyUnicodeWriter_WriteChar(writer, '{'))
1562 goto bail;
1563
1564 if (s->indent != Py_None) {
1565 /* TODO: DOES NOT RUN */
1566 indent_level += 1;
1567 /*
1568 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1569 separator = _item_separator + newline_indent
1570 buf += newline_indent
1571 */
1572 }
1573
1574 if (s->sort_keys || !PyDict_CheckExact(dct)) {
1575 items = PyMapping_Items(dct);
1576 if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
1577 goto bail;
1578
1579 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
1580 PyObject *item = PyList_GET_ITEM(items, i);
1581
1582 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1583 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1584 goto bail;
1585 }
1586
1587 key = PyTuple_GET_ITEM(item, 0);
1588 value = PyTuple_GET_ITEM(item, 1);
1589 if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1590 goto bail;
1591 }
1592 Py_CLEAR(items);
1593
1594 } else {
1595 Py_ssize_t pos = 0;
1596 while (PyDict_Next(dct, &pos, &key, &value)) {
1597 if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1598 goto bail;
1599 }
1600 }
1601
1602 if (ident != NULL) {
1603 if (PyDict_DelItem(s->markers, ident))
1604 goto bail;
1605 Py_CLEAR(ident);
1606 }
1607 /* TODO DOES NOT RUN; dead code
1608 if (s->indent != Py_None) {
1609 indent_level -= 1;
1610
1611 yield '\n' + (' ' * (_indent * _current_indent_level))
1612 }*/
1613 if (_PyUnicodeWriter_WriteChar(writer, '}'))
1614 goto bail;
1615 return 0;
1616
1617 bail:
1618 Py_XDECREF(items);
1619 Py_XDECREF(ident);
1620 return -1;
1621 }
1622
1623 static int
1624 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1625 PyObject *seq, Py_ssize_t indent_level)
1626 {
1627 PyObject *ident = NULL;
1628 PyObject *s_fast = NULL;
1629 Py_ssize_t i;
1630
1631 ident = NULL;
1632 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1633 if (s_fast == NULL)
1634 return -1;
1635 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1636 Py_DECREF(s_fast);
1637 return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
1638 }
1639
1640 if (s->markers != Py_None) {
1641 int has_key;
1642 ident = PyLong_FromVoidPtr(seq);
1643 if (ident == NULL)
1644 goto bail;
1645 has_key = PyDict_Contains(s->markers, ident);
1646 if (has_key) {
1647 if (has_key != -1)
1648 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1649 goto bail;
1650 }
1651 if (PyDict_SetItem(s->markers, ident, seq)) {
1652 goto bail;
1653 }
1654 }
1655
1656 if (_PyUnicodeWriter_WriteChar(writer, '['))
1657 goto bail;
1658 if (s->indent != Py_None) {
1659 /* TODO: DOES NOT RUN */
1660 indent_level += 1;
1661 /*
1662 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1663 separator = _item_separator + newline_indent
1664 buf += newline_indent
1665 */
1666 }
1667 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1668 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1669 if (i) {
1670 if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
1671 goto bail;
1672 }
1673 if (encoder_listencode_obj(s, writer, obj, indent_level))
1674 goto bail;
1675 }
1676 if (ident != NULL) {
1677 if (PyDict_DelItem(s->markers, ident))
1678 goto bail;
1679 Py_CLEAR(ident);
1680 }
1681
1682 /* TODO: DOES NOT RUN
1683 if (s->indent != Py_None) {
1684 indent_level -= 1;
1685
1686 yield '\n' + (' ' * (_indent * _current_indent_level))
1687 }*/
1688 if (_PyUnicodeWriter_WriteChar(writer, ']'))
1689 goto bail;
1690 Py_DECREF(s_fast);
1691 return 0;
1692
1693 bail:
1694 Py_XDECREF(ident);
1695 Py_DECREF(s_fast);
1696 return -1;
1697 }
1698
1699 static void
1700 encoder_dealloc(PyObject *self)
1701 {
1702 PyTypeObject *tp = Py_TYPE(self);
1703 /* bpo-31095: UnTrack is needed before calling any callbacks */
1704 PyObject_GC_UnTrack(self);
1705 encoder_clear((PyEncoderObject *)self);
1706 tp->tp_free(self);
1707 Py_DECREF(tp);
1708 }
1709
1710 static int
1711 encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1712 {
1713 Py_VISIT(Py_TYPE(self));
1714 Py_VISIT(self->markers);
1715 Py_VISIT(self->defaultfn);
1716 Py_VISIT(self->encoder);
1717 Py_VISIT(self->indent);
1718 Py_VISIT(self->key_separator);
1719 Py_VISIT(self->item_separator);
1720 return 0;
1721 }
1722
1723 static int
1724 encoder_clear(PyEncoderObject *self)
1725 {
1726 /* Deallocate Encoder */
1727 Py_CLEAR(self->markers);
1728 Py_CLEAR(self->defaultfn);
1729 Py_CLEAR(self->encoder);
1730 Py_CLEAR(self->indent);
1731 Py_CLEAR(self->key_separator);
1732 Py_CLEAR(self->item_separator);
1733 return 0;
1734 }
1735
1736 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1737
1738 static PyType_Slot PyEncoderType_slots[] = {
1739 {Py_tp_doc, (void *)encoder_doc},
1740 {Py_tp_dealloc, encoder_dealloc},
1741 {Py_tp_call, encoder_call},
1742 {Py_tp_traverse, encoder_traverse},
1743 {Py_tp_clear, encoder_clear},
1744 {Py_tp_members, encoder_members},
1745 {Py_tp_new, encoder_new},
1746 {0, 0}
1747 };
1748
1749 static PyType_Spec PyEncoderType_spec = {
1750 .name = "_json.Encoder",
1751 .basicsize = sizeof(PyEncoderObject),
1752 .itemsize = 0,
1753 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1754 .slots = PyEncoderType_slots
1755 };
1756
1757 static PyMethodDef speedups_methods[] = {
1758 {"encode_basestring_ascii",
1759 (PyCFunction)py_encode_basestring_ascii,
1760 METH_O,
1761 pydoc_encode_basestring_ascii},
1762 {"encode_basestring",
1763 (PyCFunction)py_encode_basestring,
1764 METH_O,
1765 pydoc_encode_basestring},
1766 {"scanstring",
1767 (PyCFunction)py_scanstring,
1768 METH_VARARGS,
1769 pydoc_scanstring},
1770 {NULL, NULL, 0, NULL}
1771 };
1772
1773 PyDoc_STRVAR(module_doc,
1774 "json speedups\n");
1775
1776 static int
1777 _json_exec(PyObject *module)
1778 {
1779 PyObject *PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1780 if (PyScannerType == NULL) {
1781 return -1;
1782 }
1783 int rc = PyModule_AddObjectRef(module, "make_scanner", PyScannerType);
1784 Py_DECREF(PyScannerType);
1785 if (rc < 0) {
1786 return -1;
1787 }
1788
1789 PyObject *PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1790 if (PyEncoderType == NULL) {
1791 return -1;
1792 }
1793 rc = PyModule_AddObjectRef(module, "make_encoder", PyEncoderType);
1794 Py_DECREF(PyEncoderType);
1795 if (rc < 0) {
1796 return -1;
1797 }
1798
1799 return 0;
1800 }
1801
1802 static PyModuleDef_Slot _json_slots[] = {
1803 {Py_mod_exec, _json_exec},
1804 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1805 {0, NULL}
1806 };
1807
1808 static struct PyModuleDef jsonmodule = {
1809 .m_base = PyModuleDef_HEAD_INIT,
1810 .m_name = "_json",
1811 .m_doc = module_doc,
1812 .m_methods = speedups_methods,
1813 .m_slots = _json_slots,
1814 };
1815
1816 PyMODINIT_FUNC
1817 PyInit__json(void)
1818 {
1819 return PyModuleDef_Init(&jsonmodule);
1820 }