1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h" // PyMemberDef
15 #include <stdbool.h>
16
17 /*[clinic input]
18 module _csv
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21
22 #include "clinic/_csv.c.h"
23 #define NOT_SET ((Py_UCS4)-1)
24 #define EOL ((Py_UCS4)-2)
25
26
27 typedef struct {
28 PyObject *error_obj; /* CSV exception */
29 PyObject *dialects; /* Dialect registry */
30 PyTypeObject *dialect_type;
31 PyTypeObject *reader_type;
32 PyTypeObject *writer_type;
33 long field_limit; /* max parsed field size */
34 PyObject *str_write;
35 } _csvstate;
36
37 static struct PyModuleDef _csvmodule;
38
39 static inline _csvstate*
40 get_csv_state(PyObject *module)
41 {
42 void *state = PyModule_GetState(module);
43 assert(state != NULL);
44 return (_csvstate *)state;
45 }
46
47 static int
48 _csv_clear(PyObject *module)
49 {
50 _csvstate *module_state = PyModule_GetState(module);
51 Py_CLEAR(module_state->error_obj);
52 Py_CLEAR(module_state->dialects);
53 Py_CLEAR(module_state->dialect_type);
54 Py_CLEAR(module_state->reader_type);
55 Py_CLEAR(module_state->writer_type);
56 Py_CLEAR(module_state->str_write);
57 return 0;
58 }
59
60 static int
61 _csv_traverse(PyObject *module, visitproc visit, void *arg)
62 {
63 _csvstate *module_state = PyModule_GetState(module);
64 Py_VISIT(module_state->error_obj);
65 Py_VISIT(module_state->dialects);
66 Py_VISIT(module_state->dialect_type);
67 Py_VISIT(module_state->reader_type);
68 Py_VISIT(module_state->writer_type);
69 return 0;
70 }
71
72 static void
73 _csv_free(void *module)
74 {
75 _csv_clear((PyObject *)module);
76 }
77
78 typedef enum {
79 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81 EAT_CRNL,AFTER_ESCAPED_CRNL
82 } ParserState;
83
84 typedef enum {
85 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
86 QUOTE_STRINGS, QUOTE_NOTNULL
87 } QuoteStyle;
88
89 typedef struct {
90 QuoteStyle style;
91 const char *name;
92 } StyleDesc;
93
94 static const StyleDesc quote_styles[] = {
95 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
96 { QUOTE_ALL, "QUOTE_ALL" },
97 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
98 { QUOTE_NONE, "QUOTE_NONE" },
99 { QUOTE_STRINGS, "QUOTE_STRINGS" },
100 { QUOTE_NOTNULL, "QUOTE_NOTNULL" },
101 { 0 }
102 };
103
104 typedef struct {
105 PyObject_HEAD
106
107 char doublequote; /* is " represented by ""? */
108 char skipinitialspace; /* ignore spaces following delimiter? */
109 char strict; /* raise exception on bad CSV */
110 int quoting; /* style of quoting to write */
111 Py_UCS4 delimiter; /* field separator */
112 Py_UCS4 quotechar; /* quote character */
113 Py_UCS4 escapechar; /* escape character */
114 PyObject *lineterminator; /* string to write between records */
115
116 } DialectObj;
117
118 typedef struct {
119 PyObject_HEAD
120
121 PyObject *input_iter; /* iterate over this for input lines */
122
123 DialectObj *dialect; /* parsing dialect */
124
125 PyObject *fields; /* field list for current record */
126 ParserState state; /* current CSV parse state */
127 Py_UCS4 *field; /* temporary buffer */
128 Py_ssize_t field_size; /* size of allocated buffer */
129 Py_ssize_t field_len; /* length of current field */
130 int numeric_field; /* treat field as numeric */
131 unsigned long line_num; /* Source-file line number */
132 } ReaderObj;
133
134 typedef struct {
135 PyObject_HEAD
136
137 PyObject *write; /* write output lines to this file */
138
139 DialectObj *dialect; /* parsing dialect */
140
141 Py_UCS4 *rec; /* buffer for parser.join */
142 Py_ssize_t rec_size; /* size of allocated record */
143 Py_ssize_t rec_len; /* length of record */
144 int num_fields; /* number of fields in record */
145
146 PyObject *error_obj; /* cached error object */
147 } WriterObj;
148
149 /*
150 * DIALECT class
151 */
152
153 static PyObject *
154 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
155 {
156 PyObject *dialect_obj;
157
158 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
159 if (dialect_obj == NULL) {
160 if (!PyErr_Occurred())
161 PyErr_Format(module_state->error_obj, "unknown dialect");
162 }
163 else
164 Py_INCREF(dialect_obj);
165
166 return dialect_obj;
167 }
168
169 static PyObject *
170 get_char_or_None(Py_UCS4 c)
171 {
172 if (c == NOT_SET) {
173 Py_RETURN_NONE;
174 }
175 else
176 return PyUnicode_FromOrdinal(c);
177 }
178
179 static PyObject *
180 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
181 {
182 return Py_XNewRef(self->lineterminator);
183 }
184
185 static PyObject *
186 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
187 {
188 return get_char_or_None(self->delimiter);
189 }
190
191 static PyObject *
192 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
193 {
194 return get_char_or_None(self->escapechar);
195 }
196
197 static PyObject *
198 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
199 {
200 return get_char_or_None(self->quotechar);
201 }
202
203 static PyObject *
204 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
205 {
206 return PyLong_FromLong(self->quoting);
207 }
208
209 static int
210 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
211 {
212 if (src == NULL)
213 *target = dflt;
214 else {
215 int b = PyObject_IsTrue(src);
216 if (b < 0)
217 return -1;
218 *target = (char)b;
219 }
220 return 0;
221 }
222
223 static int
224 _set_int(const char *name, int *target, PyObject *src, int dflt)
225 {
226 if (src == NULL)
227 *target = dflt;
228 else {
229 int value;
230 if (!PyLong_CheckExact(src)) {
231 PyErr_Format(PyExc_TypeError,
232 "\"%s\" must be an integer", name);
233 return -1;
234 }
235 value = _PyLong_AsInt(src);
236 if (value == -1 && PyErr_Occurred()) {
237 return -1;
238 }
239 *target = value;
240 }
241 return 0;
242 }
243
244 static int
245 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
246 {
247 if (src == NULL) {
248 *target = dflt;
249 }
250 else {
251 *target = NOT_SET;
252 if (src != Py_None) {
253 if (!PyUnicode_Check(src)) {
254 PyErr_Format(PyExc_TypeError,
255 "\"%s\" must be string or None, not %.200s", name,
256 Py_TYPE(src)->tp_name);
257 return -1;
258 }
259 Py_ssize_t len = PyUnicode_GetLength(src);
260 if (len < 0) {
261 return -1;
262 }
263 if (len != 1) {
264 PyErr_Format(PyExc_TypeError,
265 "\"%s\" must be a 1-character string",
266 name);
267 return -1;
268 }
269 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
270 *target = PyUnicode_READ_CHAR(src, 0);
271 }
272 }
273 return 0;
274 }
275
276 static int
277 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
278 {
279 if (src == NULL) {
280 *target = dflt;
281 }
282 else {
283 if (!PyUnicode_Check(src)) {
284 PyErr_Format(PyExc_TypeError,
285 "\"%s\" must be string, not %.200s", name,
286 Py_TYPE(src)->tp_name);
287 return -1;
288 }
289 Py_ssize_t len = PyUnicode_GetLength(src);
290 if (len < 0) {
291 return -1;
292 }
293 if (len != 1) {
294 PyErr_Format(PyExc_TypeError,
295 "\"%s\" must be a 1-character string",
296 name);
297 return -1;
298 }
299 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
300 *target = PyUnicode_READ_CHAR(src, 0);
301 }
302 return 0;
303 }
304
305 static int
306 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
307 {
308 if (src == NULL)
309 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
310 else {
311 if (src == Py_None)
312 *target = NULL;
313 else if (!PyUnicode_Check(src)) {
314 PyErr_Format(PyExc_TypeError,
315 "\"%s\" must be a string", name);
316 return -1;
317 }
318 else {
319 if (PyUnicode_READY(src) == -1)
320 return -1;
321 Py_XSETREF(*target, Py_NewRef(src));
322 }
323 }
324 return 0;
325 }
326
327 static int
328 dialect_check_quoting(int quoting)
329 {
330 const StyleDesc *qs;
331
332 for (qs = quote_styles; qs->name; qs++) {
333 if ((int)qs->style == quoting)
334 return 0;
335 }
336 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
337 return -1;
338 }
339
340 #define D_OFF(x) offsetof(DialectObj, x)
341
342 static struct PyMemberDef Dialect_memberlist[] = {
343 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
344 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
345 { "strict", T_BOOL, D_OFF(strict), READONLY },
346 { NULL }
347 };
348
349 static PyGetSetDef Dialect_getsetlist[] = {
350 { "delimiter", (getter)Dialect_get_delimiter},
351 { "escapechar", (getter)Dialect_get_escapechar},
352 { "lineterminator", (getter)Dialect_get_lineterminator},
353 { "quotechar", (getter)Dialect_get_quotechar},
354 { "quoting", (getter)Dialect_get_quoting},
355 {NULL},
356 };
357
358 static void
359 Dialect_dealloc(DialectObj *self)
360 {
361 PyTypeObject *tp = Py_TYPE(self);
362 PyObject_GC_UnTrack(self);
363 tp->tp_clear((PyObject *)self);
364 PyObject_GC_Del(self);
365 Py_DECREF(tp);
366 }
367
368 static char *dialect_kws[] = {
369 "dialect",
370 "delimiter",
371 "doublequote",
372 "escapechar",
373 "lineterminator",
374 "quotechar",
375 "quoting",
376 "skipinitialspace",
377 "strict",
378 NULL
379 };
380
381 static _csvstate *
382 _csv_state_from_type(PyTypeObject *type, const char *name)
383 {
384 PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
385 if (module == NULL) {
386 return NULL;
387 }
388 _csvstate *module_state = PyModule_GetState(module);
389 if (module_state == NULL) {
390 PyErr_Format(PyExc_SystemError,
391 "%s: No _csv module state found", name);
392 return NULL;
393 }
394 return module_state;
395 }
396
397 static PyObject *
398 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
399 {
400 DialectObj *self;
401 PyObject *ret = NULL;
402 PyObject *dialect = NULL;
403 PyObject *delimiter = NULL;
404 PyObject *doublequote = NULL;
405 PyObject *escapechar = NULL;
406 PyObject *lineterminator = NULL;
407 PyObject *quotechar = NULL;
408 PyObject *quoting = NULL;
409 PyObject *skipinitialspace = NULL;
410 PyObject *strict = NULL;
411
412 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
413 "|OOOOOOOOO", dialect_kws,
414 &dialect,
415 &delimiter,
416 &doublequote,
417 &escapechar,
418 &lineterminator,
419 "echar,
420 "ing,
421 &skipinitialspace,
422 &strict))
423 return NULL;
424
425 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
426 if (module_state == NULL) {
427 return NULL;
428 }
429
430 if (dialect != NULL) {
431 if (PyUnicode_Check(dialect)) {
432 dialect = get_dialect_from_registry(dialect, module_state);
433 if (dialect == NULL)
434 return NULL;
435 }
436 else
437 Py_INCREF(dialect);
438 /* Can we reuse this instance? */
439 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
440 delimiter == NULL &&
441 doublequote == NULL &&
442 escapechar == NULL &&
443 lineterminator == NULL &&
444 quotechar == NULL &&
445 quoting == NULL &&
446 skipinitialspace == NULL &&
447 strict == NULL)
448 return dialect;
449 }
450
451 self = (DialectObj *)type->tp_alloc(type, 0);
452 if (self == NULL) {
453 Py_CLEAR(dialect);
454 return NULL;
455 }
456 self->lineterminator = NULL;
457
458 Py_XINCREF(delimiter);
459 Py_XINCREF(doublequote);
460 Py_XINCREF(escapechar);
461 Py_XINCREF(lineterminator);
462 Py_XINCREF(quotechar);
463 Py_XINCREF(quoting);
464 Py_XINCREF(skipinitialspace);
465 Py_XINCREF(strict);
466 if (dialect != NULL) {
467 #define DIALECT_GETATTR(v, n) \
468 do { \
469 if (v == NULL) { \
470 v = PyObject_GetAttrString(dialect, n); \
471 if (v == NULL) \
472 PyErr_Clear(); \
473 } \
474 } while (0)
475 DIALECT_GETATTR(delimiter, "delimiter");
476 DIALECT_GETATTR(doublequote, "doublequote");
477 DIALECT_GETATTR(escapechar, "escapechar");
478 DIALECT_GETATTR(lineterminator, "lineterminator");
479 DIALECT_GETATTR(quotechar, "quotechar");
480 DIALECT_GETATTR(quoting, "quoting");
481 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
482 DIALECT_GETATTR(strict, "strict");
483 }
484
485 /* check types and convert to C values */
486 #define DIASET(meth, name, target, src, dflt) \
487 if (meth(name, target, src, dflt)) \
488 goto err
489 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
490 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
491 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
492 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
493 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
494 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
495 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
496 DIASET(_set_bool, "strict", &self->strict, strict, false);
497
498 /* validate options */
499 if (dialect_check_quoting(self->quoting))
500 goto err;
501 if (self->delimiter == NOT_SET) {
502 PyErr_SetString(PyExc_TypeError,
503 "\"delimiter\" must be a 1-character string");
504 goto err;
505 }
506 if (quotechar == Py_None && quoting == NULL)
507 self->quoting = QUOTE_NONE;
508 if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
509 PyErr_SetString(PyExc_TypeError,
510 "quotechar must be set if quoting enabled");
511 goto err;
512 }
513 if (self->lineterminator == NULL) {
514 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
515 goto err;
516 }
517
518 ret = Py_NewRef(self);
519 err:
520 Py_CLEAR(self);
521 Py_CLEAR(dialect);
522 Py_CLEAR(delimiter);
523 Py_CLEAR(doublequote);
524 Py_CLEAR(escapechar);
525 Py_CLEAR(lineterminator);
526 Py_CLEAR(quotechar);
527 Py_CLEAR(quoting);
528 Py_CLEAR(skipinitialspace);
529 Py_CLEAR(strict);
530 return ret;
531 }
532
533 /* Since dialect is now a heap type, it inherits pickling method for
534 * protocol 0 and 1 from object, therefore it needs to be overridden */
535
536 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
537
538 static PyObject *
539 Dialect_reduce(PyObject *self, PyObject *args) {
540 PyErr_Format(PyExc_TypeError,
541 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
542 return NULL;
543 }
544
545 static struct PyMethodDef dialect_methods[] = {
546 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
547 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
548 {NULL, NULL}
549 };
550
551 PyDoc_STRVAR(Dialect_Type_doc,
552 "CSV dialect\n"
553 "\n"
554 "The Dialect type records CSV parsing and generation options.\n");
555
556 static int
557 Dialect_clear(DialectObj *self)
558 {
559 Py_CLEAR(self->lineterminator);
560 return 0;
561 }
562
563 static int
564 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
565 {
566 Py_VISIT(self->lineterminator);
567 Py_VISIT(Py_TYPE(self));
568 return 0;
569 }
570
571 static PyType_Slot Dialect_Type_slots[] = {
572 {Py_tp_doc, (char*)Dialect_Type_doc},
573 {Py_tp_members, Dialect_memberlist},
574 {Py_tp_getset, Dialect_getsetlist},
575 {Py_tp_new, dialect_new},
576 {Py_tp_methods, dialect_methods},
577 {Py_tp_dealloc, Dialect_dealloc},
578 {Py_tp_clear, Dialect_clear},
579 {Py_tp_traverse, Dialect_traverse},
580 {0, NULL}
581 };
582
583 PyType_Spec Dialect_Type_spec = {
584 .name = "_csv.Dialect",
585 .basicsize = sizeof(DialectObj),
586 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
587 Py_TPFLAGS_IMMUTABLETYPE),
588 .slots = Dialect_Type_slots,
589 };
590
591
592 /*
593 * Return an instance of the dialect type, given a Python instance or kwarg
594 * description of the dialect
595 */
596 static PyObject *
597 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
598 {
599 PyObject *type = (PyObject *)module_state->dialect_type;
600 if (dialect_inst) {
601 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
602 }
603 else {
604 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
605 }
606 }
607
608 /*
609 * READER
610 */
611 static int
612 parse_save_field(ReaderObj *self)
613 {
614 PyObject *field;
615
616 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
617 (void *) self->field, self->field_len);
618 if (field == NULL)
619 return -1;
620 self->field_len = 0;
621 if (self->numeric_field) {
622 PyObject *tmp;
623
624 self->numeric_field = 0;
625 tmp = PyNumber_Float(field);
626 Py_DECREF(field);
627 if (tmp == NULL)
628 return -1;
629 field = tmp;
630 }
631 if (PyList_Append(self->fields, field) < 0) {
632 Py_DECREF(field);
633 return -1;
634 }
635 Py_DECREF(field);
636 return 0;
637 }
638
639 static int
640 parse_grow_buff(ReaderObj *self)
641 {
642 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
643
644 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
645 Py_UCS4 *field_new = self->field;
646 PyMem_Resize(field_new, Py_UCS4, field_size_new);
647 if (field_new == NULL) {
648 PyErr_NoMemory();
649 return 0;
650 }
651 self->field = field_new;
652 self->field_size = field_size_new;
653 return 1;
654 }
655
656 static int
657 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
658 {
659 if (self->field_len >= module_state->field_limit) {
660 PyErr_Format(module_state->error_obj,
661 "field larger than field limit (%ld)",
662 module_state->field_limit);
663 return -1;
664 }
665 if (self->field_len == self->field_size && !parse_grow_buff(self))
666 return -1;
667 self->field[self->field_len++] = c;
668 return 0;
669 }
670
671 static int
672 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
673 {
674 DialectObj *dialect = self->dialect;
675
676 switch (self->state) {
677 case START_RECORD:
678 /* start of record */
679 if (c == EOL)
680 /* empty line - return [] */
681 break;
682 else if (c == '\n' || c == '\r') {
683 self->state = EAT_CRNL;
684 break;
685 }
686 /* normal character - handle as START_FIELD */
687 self->state = START_FIELD;
688 /* fallthru */
689 case START_FIELD:
690 /* expecting field */
691 if (c == '\n' || c == '\r' || c == EOL) {
692 /* save empty field - return [fields] */
693 if (parse_save_field(self) < 0)
694 return -1;
695 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
696 }
697 else if (c == dialect->quotechar &&
698 dialect->quoting != QUOTE_NONE) {
699 /* start quoted field */
700 self->state = IN_QUOTED_FIELD;
701 }
702 else if (c == dialect->escapechar) {
703 /* possible escaped character */
704 self->state = ESCAPED_CHAR;
705 }
706 else if (c == ' ' && dialect->skipinitialspace)
707 /* ignore spaces at start of field */
708 ;
709 else if (c == dialect->delimiter) {
710 /* save empty field */
711 if (parse_save_field(self) < 0)
712 return -1;
713 }
714 else {
715 /* begin new unquoted field */
716 if (dialect->quoting == QUOTE_NONNUMERIC)
717 self->numeric_field = 1;
718 if (parse_add_char(self, module_state, c) < 0)
719 return -1;
720 self->state = IN_FIELD;
721 }
722 break;
723
724 case ESCAPED_CHAR:
725 if (c == '\n' || c=='\r') {
726 if (parse_add_char(self, module_state, c) < 0)
727 return -1;
728 self->state = AFTER_ESCAPED_CRNL;
729 break;
730 }
731 if (c == EOL)
732 c = '\n';
733 if (parse_add_char(self, module_state, c) < 0)
734 return -1;
735 self->state = IN_FIELD;
736 break;
737
738 case AFTER_ESCAPED_CRNL:
739 if (c == EOL)
740 break;
741 /*fallthru*/
742
743 case IN_FIELD:
744 /* in unquoted field */
745 if (c == '\n' || c == '\r' || c == EOL) {
746 /* end of line - return [fields] */
747 if (parse_save_field(self) < 0)
748 return -1;
749 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
750 }
751 else if (c == dialect->escapechar) {
752 /* possible escaped character */
753 self->state = ESCAPED_CHAR;
754 }
755 else if (c == dialect->delimiter) {
756 /* save field - wait for new field */
757 if (parse_save_field(self) < 0)
758 return -1;
759 self->state = START_FIELD;
760 }
761 else {
762 /* normal character - save in field */
763 if (parse_add_char(self, module_state, c) < 0)
764 return -1;
765 }
766 break;
767
768 case IN_QUOTED_FIELD:
769 /* in quoted field */
770 if (c == EOL)
771 ;
772 else if (c == dialect->escapechar) {
773 /* Possible escape character */
774 self->state = ESCAPE_IN_QUOTED_FIELD;
775 }
776 else if (c == dialect->quotechar &&
777 dialect->quoting != QUOTE_NONE) {
778 if (dialect->doublequote) {
779 /* doublequote; " represented by "" */
780 self->state = QUOTE_IN_QUOTED_FIELD;
781 }
782 else {
783 /* end of quote part of field */
784 self->state = IN_FIELD;
785 }
786 }
787 else {
788 /* normal character - save in field */
789 if (parse_add_char(self, module_state, c) < 0)
790 return -1;
791 }
792 break;
793
794 case ESCAPE_IN_QUOTED_FIELD:
795 if (c == EOL)
796 c = '\n';
797 if (parse_add_char(self, module_state, c) < 0)
798 return -1;
799 self->state = IN_QUOTED_FIELD;
800 break;
801
802 case QUOTE_IN_QUOTED_FIELD:
803 /* doublequote - seen a quote in a quoted field */
804 if (dialect->quoting != QUOTE_NONE &&
805 c == dialect->quotechar) {
806 /* save "" as " */
807 if (parse_add_char(self, module_state, c) < 0)
808 return -1;
809 self->state = IN_QUOTED_FIELD;
810 }
811 else if (c == dialect->delimiter) {
812 /* save field - wait for new field */
813 if (parse_save_field(self) < 0)
814 return -1;
815 self->state = START_FIELD;
816 }
817 else if (c == '\n' || c == '\r' || c == EOL) {
818 /* end of line - return [fields] */
819 if (parse_save_field(self) < 0)
820 return -1;
821 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
822 }
823 else if (!dialect->strict) {
824 if (parse_add_char(self, module_state, c) < 0)
825 return -1;
826 self->state = IN_FIELD;
827 }
828 else {
829 /* illegal */
830 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
831 dialect->delimiter,
832 dialect->quotechar);
833 return -1;
834 }
835 break;
836
837 case EAT_CRNL:
838 if (c == '\n' || c == '\r')
839 ;
840 else if (c == EOL)
841 self->state = START_RECORD;
842 else {
843 PyErr_Format(module_state->error_obj,
844 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
845 return -1;
846 }
847 break;
848
849 }
850 return 0;
851 }
852
853 static int
854 parse_reset(ReaderObj *self)
855 {
856 Py_XSETREF(self->fields, PyList_New(0));
857 if (self->fields == NULL)
858 return -1;
859 self->field_len = 0;
860 self->state = START_RECORD;
861 self->numeric_field = 0;
862 return 0;
863 }
864
865 static PyObject *
866 Reader_iternext(ReaderObj *self)
867 {
868 PyObject *fields = NULL;
869 Py_UCS4 c;
870 Py_ssize_t pos, linelen;
871 int kind;
872 const void *data;
873 PyObject *lineobj;
874
875 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
876 "Reader.__next__");
877 if (module_state == NULL) {
878 return NULL;
879 }
880
881 if (parse_reset(self) < 0)
882 return NULL;
883 do {
884 lineobj = PyIter_Next(self->input_iter);
885 if (lineobj == NULL) {
886 /* End of input OR exception */
887 if (!PyErr_Occurred() && (self->field_len != 0 ||
888 self->state == IN_QUOTED_FIELD)) {
889 if (self->dialect->strict)
890 PyErr_SetString(module_state->error_obj,
891 "unexpected end of data");
892 else if (parse_save_field(self) >= 0)
893 break;
894 }
895 return NULL;
896 }
897 if (!PyUnicode_Check(lineobj)) {
898 PyErr_Format(module_state->error_obj,
899 "iterator should return strings, "
900 "not %.200s "
901 "(the file should be opened in text mode)",
902 Py_TYPE(lineobj)->tp_name
903 );
904 Py_DECREF(lineobj);
905 return NULL;
906 }
907 if (PyUnicode_READY(lineobj) == -1) {
908 Py_DECREF(lineobj);
909 return NULL;
910 }
911 ++self->line_num;
912 kind = PyUnicode_KIND(lineobj);
913 data = PyUnicode_DATA(lineobj);
914 pos = 0;
915 linelen = PyUnicode_GET_LENGTH(lineobj);
916 while (linelen--) {
917 c = PyUnicode_READ(kind, data, pos);
918 if (parse_process_char(self, module_state, c) < 0) {
919 Py_DECREF(lineobj);
920 goto err;
921 }
922 pos++;
923 }
924 Py_DECREF(lineobj);
925 if (parse_process_char(self, module_state, EOL) < 0)
926 goto err;
927 } while (self->state != START_RECORD);
928
929 fields = self->fields;
930 self->fields = NULL;
931 err:
932 return fields;
933 }
934
935 static void
936 Reader_dealloc(ReaderObj *self)
937 {
938 PyTypeObject *tp = Py_TYPE(self);
939 PyObject_GC_UnTrack(self);
940 tp->tp_clear((PyObject *)self);
941 if (self->field != NULL) {
942 PyMem_Free(self->field);
943 self->field = NULL;
944 }
945 PyObject_GC_Del(self);
946 Py_DECREF(tp);
947 }
948
949 static int
950 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951 {
952 Py_VISIT(self->dialect);
953 Py_VISIT(self->input_iter);
954 Py_VISIT(self->fields);
955 Py_VISIT(Py_TYPE(self));
956 return 0;
957 }
958
959 static int
960 Reader_clear(ReaderObj *self)
961 {
962 Py_CLEAR(self->dialect);
963 Py_CLEAR(self->input_iter);
964 Py_CLEAR(self->fields);
965 return 0;
966 }
967
968 PyDoc_STRVAR(Reader_Type_doc,
969 "CSV reader\n"
970 "\n"
971 "Reader objects are responsible for reading and parsing tabular data\n"
972 "in CSV format.\n"
973 );
974
975 static struct PyMethodDef Reader_methods[] = {
976 { NULL, NULL }
977 };
978 #define R_OFF(x) offsetof(ReaderObj, x)
979
980 static struct PyMemberDef Reader_memberlist[] = {
981 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983 { NULL }
984 };
985
986
987 static PyType_Slot Reader_Type_slots[] = {
988 {Py_tp_doc, (char*)Reader_Type_doc},
989 {Py_tp_traverse, Reader_traverse},
990 {Py_tp_iter, PyObject_SelfIter},
991 {Py_tp_iternext, Reader_iternext},
992 {Py_tp_methods, Reader_methods},
993 {Py_tp_members, Reader_memberlist},
994 {Py_tp_clear, Reader_clear},
995 {Py_tp_dealloc, Reader_dealloc},
996 {0, NULL}
997 };
998
999 PyType_Spec Reader_Type_spec = {
1000 .name = "_csv.reader",
1001 .basicsize = sizeof(ReaderObj),
1002 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1004 .slots = Reader_Type_slots
1005 };
1006
1007
1008 static PyObject *
1009 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010 {
1011 PyObject * iterator, * dialect = NULL;
1012 _csvstate *module_state = get_csv_state(module);
1013 ReaderObj * self = PyObject_GC_New(
1014 ReaderObj,
1015 module_state->reader_type);
1016
1017 if (!self)
1018 return NULL;
1019
1020 self->dialect = NULL;
1021 self->fields = NULL;
1022 self->input_iter = NULL;
1023 self->field = NULL;
1024 self->field_size = 0;
1025 self->line_num = 0;
1026
1027 if (parse_reset(self) < 0) {
1028 Py_DECREF(self);
1029 return NULL;
1030 }
1031
1032 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033 Py_DECREF(self);
1034 return NULL;
1035 }
1036 self->input_iter = PyObject_GetIter(iterator);
1037 if (self->input_iter == NULL) {
1038 Py_DECREF(self);
1039 return NULL;
1040 }
1041 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042 keyword_args);
1043 if (self->dialect == NULL) {
1044 Py_DECREF(self);
1045 return NULL;
1046 }
1047
1048 PyObject_GC_Track(self);
1049 return (PyObject *)self;
1050 }
1051
1052 /*
1053 * WRITER
1054 */
1055 /* ---------------------------------------------------------------- */
1056 static void
1057 join_reset(WriterObj *self)
1058 {
1059 self->rec_len = 0;
1060 self->num_fields = 0;
1061 }
1062
1063 #define MEM_INCR 32768
1064
1065 /* Calculate new record length or append field to record. Return new
1066 * record length.
1067 */
1068 static Py_ssize_t
1069 join_append_data(WriterObj *self, int field_kind, const void *field_data,
1070 Py_ssize_t field_len, int *quoted,
1071 int copy_phase)
1072 {
1073 DialectObj *dialect = self->dialect;
1074 int i;
1075 Py_ssize_t rec_len;
1076
1077 #define INCLEN \
1078 do {\
1079 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1080 goto overflow; \
1081 } \
1082 rec_len++; \
1083 } while(0)
1084
1085 #define ADDCH(c) \
1086 do {\
1087 if (copy_phase) \
1088 self->rec[rec_len] = c;\
1089 INCLEN;\
1090 } while(0)
1091
1092 rec_len = self->rec_len;
1093
1094 /* If this is not the first field we need a field separator */
1095 if (self->num_fields > 0)
1096 ADDCH(dialect->delimiter);
1097
1098 /* Handle preceding quote */
1099 if (copy_phase && *quoted)
1100 ADDCH(dialect->quotechar);
1101
1102 /* Copy/count field data */
1103 /* If field is null just pass over */
1104 for (i = 0; field_data && (i < field_len); i++) {
1105 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106 int want_escape = 0;
1107
1108 if (c == dialect->delimiter ||
1109 c == dialect->escapechar ||
1110 c == dialect->quotechar ||
1111 PyUnicode_FindChar(
1112 dialect->lineterminator, c, 0,
1113 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114 if (dialect->quoting == QUOTE_NONE)
1115 want_escape = 1;
1116 else {
1117 if (c == dialect->quotechar) {
1118 if (dialect->doublequote)
1119 ADDCH(dialect->quotechar);
1120 else
1121 want_escape = 1;
1122 }
1123 else if (c == dialect->escapechar) {
1124 want_escape = 1;
1125 }
1126 if (!want_escape)
1127 *quoted = 1;
1128 }
1129 if (want_escape) {
1130 if (dialect->escapechar == NOT_SET) {
1131 PyErr_Format(self->error_obj,
1132 "need to escape, but no escapechar set");
1133 return -1;
1134 }
1135 ADDCH(dialect->escapechar);
1136 }
1137 }
1138 /* Copy field character into record buffer.
1139 */
1140 ADDCH(c);
1141 }
1142
1143 if (*quoted) {
1144 if (copy_phase)
1145 ADDCH(dialect->quotechar);
1146 else {
1147 INCLEN; /* starting quote */
1148 INCLEN; /* ending quote */
1149 }
1150 }
1151 return rec_len;
1152
1153 overflow:
1154 PyErr_NoMemory();
1155 return -1;
1156 #undef ADDCH
1157 #undef INCLEN
1158 }
1159
1160 static int
1161 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162 {
1163 assert(rec_len >= 0);
1164
1165 if (rec_len > self->rec_size) {
1166 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167 Py_UCS4 *rec_new = self->rec;
1168 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169 if (rec_new == NULL) {
1170 PyErr_NoMemory();
1171 return 0;
1172 }
1173 self->rec = rec_new;
1174 self->rec_size = (Py_ssize_t)rec_size_new;
1175 }
1176 return 1;
1177 }
1178
1179 static int
1180 join_append(WriterObj *self, PyObject *field, int quoted)
1181 {
1182 int field_kind = -1;
1183 const void *field_data = NULL;
1184 Py_ssize_t field_len = 0;
1185 Py_ssize_t rec_len;
1186
1187 if (field != NULL) {
1188 if (PyUnicode_READY(field) == -1)
1189 return 0;
1190 field_kind = PyUnicode_KIND(field);
1191 field_data = PyUnicode_DATA(field);
1192 field_len = PyUnicode_GET_LENGTH(field);
1193 }
1194 rec_len = join_append_data(self, field_kind, field_data, field_len,
1195 "ed, 0);
1196 if (rec_len < 0)
1197 return 0;
1198
1199 /* grow record buffer if necessary */
1200 if (!join_check_rec_size(self, rec_len))
1201 return 0;
1202
1203 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204 "ed, 1);
1205 self->num_fields++;
1206
1207 return 1;
1208 }
1209
1210 static int
1211 join_append_lineterminator(WriterObj *self)
1212 {
1213 Py_ssize_t terminator_len, i;
1214 int term_kind;
1215 const void *term_data;
1216
1217 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218 if (terminator_len == -1)
1219 return 0;
1220
1221 /* grow record buffer if necessary */
1222 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223 return 0;
1224
1225 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227 for (i = 0; i < terminator_len; i++)
1228 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229 self->rec_len += terminator_len;
1230
1231 return 1;
1232 }
1233
1234 PyDoc_STRVAR(csv_writerow_doc,
1235 "writerow(iterable)\n"
1236 "\n"
1237 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1238 "elements will be converted to string.");
1239
1240 static PyObject *
1241 csv_writerow(WriterObj *self, PyObject *seq)
1242 {
1243 DialectObj *dialect = self->dialect;
1244 PyObject *iter, *field, *line, *result;
1245
1246 iter = PyObject_GetIter(seq);
1247 if (iter == NULL) {
1248 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249 PyErr_Format(self->error_obj,
1250 "iterable expected, not %.200s",
1251 Py_TYPE(seq)->tp_name);
1252 }
1253 return NULL;
1254 }
1255
1256 /* Join all fields in internal buffer.
1257 */
1258 join_reset(self);
1259 while ((field = PyIter_Next(iter))) {
1260 int append_ok;
1261 int quoted;
1262
1263 switch (dialect->quoting) {
1264 case QUOTE_NONNUMERIC:
1265 quoted = !PyNumber_Check(field);
1266 break;
1267 case QUOTE_ALL:
1268 quoted = 1;
1269 break;
1270 case QUOTE_STRINGS:
1271 quoted = PyUnicode_Check(field);
1272 break;
1273 case QUOTE_NOTNULL:
1274 quoted = field != Py_None;
1275 break;
1276 default:
1277 quoted = 0;
1278 break;
1279 }
1280
1281 if (PyUnicode_Check(field)) {
1282 append_ok = join_append(self, field, quoted);
1283 Py_DECREF(field);
1284 }
1285 else if (field == Py_None) {
1286 append_ok = join_append(self, NULL, quoted);
1287 Py_DECREF(field);
1288 }
1289 else {
1290 PyObject *str;
1291
1292 str = PyObject_Str(field);
1293 Py_DECREF(field);
1294 if (str == NULL) {
1295 Py_DECREF(iter);
1296 return NULL;
1297 }
1298 append_ok = join_append(self, str, quoted);
1299 Py_DECREF(str);
1300 }
1301 if (!append_ok) {
1302 Py_DECREF(iter);
1303 return NULL;
1304 }
1305 }
1306 Py_DECREF(iter);
1307 if (PyErr_Occurred())
1308 return NULL;
1309
1310 if (self->num_fields > 0 && self->rec_len == 0) {
1311 if (dialect->quoting == QUOTE_NONE) {
1312 PyErr_Format(self->error_obj,
1313 "single empty field record must be quoted");
1314 return NULL;
1315 }
1316 self->num_fields--;
1317 if (!join_append(self, NULL, 1))
1318 return NULL;
1319 }
1320
1321 /* Add line terminator.
1322 */
1323 if (!join_append_lineterminator(self)) {
1324 return NULL;
1325 }
1326
1327 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1328 (void *) self->rec, self->rec_len);
1329 if (line == NULL) {
1330 return NULL;
1331 }
1332 result = PyObject_CallOneArg(self->write, line);
1333 Py_DECREF(line);
1334 return result;
1335 }
1336
1337 PyDoc_STRVAR(csv_writerows_doc,
1338 "writerows(iterable of iterables)\n"
1339 "\n"
1340 "Construct and write a series of iterables to a csv file. Non-string\n"
1341 "elements will be converted to string.");
1342
1343 static PyObject *
1344 csv_writerows(WriterObj *self, PyObject *seqseq)
1345 {
1346 PyObject *row_iter, *row_obj, *result;
1347
1348 row_iter = PyObject_GetIter(seqseq);
1349 if (row_iter == NULL) {
1350 return NULL;
1351 }
1352 while ((row_obj = PyIter_Next(row_iter))) {
1353 result = csv_writerow(self, row_obj);
1354 Py_DECREF(row_obj);
1355 if (!result) {
1356 Py_DECREF(row_iter);
1357 return NULL;
1358 }
1359 else
1360 Py_DECREF(result);
1361 }
1362 Py_DECREF(row_iter);
1363 if (PyErr_Occurred())
1364 return NULL;
1365 Py_RETURN_NONE;
1366 }
1367
1368 static struct PyMethodDef Writer_methods[] = {
1369 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1370 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1371 { NULL, NULL }
1372 };
1373
1374 #define W_OFF(x) offsetof(WriterObj, x)
1375
1376 static struct PyMemberDef Writer_memberlist[] = {
1377 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1378 { NULL }
1379 };
1380
1381 static int
1382 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1383 {
1384 Py_VISIT(self->dialect);
1385 Py_VISIT(self->write);
1386 Py_VISIT(self->error_obj);
1387 Py_VISIT(Py_TYPE(self));
1388 return 0;
1389 }
1390
1391 static int
1392 Writer_clear(WriterObj *self)
1393 {
1394 Py_CLEAR(self->dialect);
1395 Py_CLEAR(self->write);
1396 Py_CLEAR(self->error_obj);
1397 return 0;
1398 }
1399
1400 static void
1401 Writer_dealloc(WriterObj *self)
1402 {
1403 PyTypeObject *tp = Py_TYPE(self);
1404 PyObject_GC_UnTrack(self);
1405 tp->tp_clear((PyObject *)self);
1406 if (self->rec != NULL) {
1407 PyMem_Free(self->rec);
1408 }
1409 PyObject_GC_Del(self);
1410 Py_DECREF(tp);
1411 }
1412
1413 PyDoc_STRVAR(Writer_Type_doc,
1414 "CSV writer\n"
1415 "\n"
1416 "Writer objects are responsible for generating tabular data\n"
1417 "in CSV format from sequence input.\n"
1418 );
1419
1420 static PyType_Slot Writer_Type_slots[] = {
1421 {Py_tp_doc, (char*)Writer_Type_doc},
1422 {Py_tp_traverse, Writer_traverse},
1423 {Py_tp_clear, Writer_clear},
1424 {Py_tp_dealloc, Writer_dealloc},
1425 {Py_tp_methods, Writer_methods},
1426 {Py_tp_members, Writer_memberlist},
1427 {0, NULL}
1428 };
1429
1430 PyType_Spec Writer_Type_spec = {
1431 .name = "_csv.writer",
1432 .basicsize = sizeof(WriterObj),
1433 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1434 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1435 .slots = Writer_Type_slots,
1436 };
1437
1438
1439 static PyObject *
1440 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1441 {
1442 PyObject * output_file, * dialect = NULL;
1443 _csvstate *module_state = get_csv_state(module);
1444 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1445
1446 if (!self)
1447 return NULL;
1448
1449 self->dialect = NULL;
1450 self->write = NULL;
1451
1452 self->rec = NULL;
1453 self->rec_size = 0;
1454 self->rec_len = 0;
1455 self->num_fields = 0;
1456
1457 self->error_obj = Py_NewRef(module_state->error_obj);
1458
1459 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1460 Py_DECREF(self);
1461 return NULL;
1462 }
1463 if (_PyObject_LookupAttr(output_file,
1464 module_state->str_write,
1465 &self->write) < 0) {
1466 Py_DECREF(self);
1467 return NULL;
1468 }
1469 if (self->write == NULL || !PyCallable_Check(self->write)) {
1470 PyErr_SetString(PyExc_TypeError,
1471 "argument 1 must have a \"write\" method");
1472 Py_DECREF(self);
1473 return NULL;
1474 }
1475 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1476 keyword_args);
1477 if (self->dialect == NULL) {
1478 Py_DECREF(self);
1479 return NULL;
1480 }
1481 PyObject_GC_Track(self);
1482 return (PyObject *)self;
1483 }
1484
1485 /*
1486 * DIALECT REGISTRY
1487 */
1488
1489 /*[clinic input]
1490 _csv.list_dialects
1491
1492 Return a list of all known dialect names.
1493
1494 names = csv.list_dialects()
1495 [clinic start generated code]*/
1496
1497 static PyObject *
1498 _csv_list_dialects_impl(PyObject *module)
1499 /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1500 {
1501 return PyDict_Keys(get_csv_state(module)->dialects);
1502 }
1503
1504 static PyObject *
1505 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1506 {
1507 PyObject *name_obj, *dialect_obj = NULL;
1508 _csvstate *module_state = get_csv_state(module);
1509 PyObject *dialect;
1510
1511 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1512 return NULL;
1513 if (!PyUnicode_Check(name_obj)) {
1514 PyErr_SetString(PyExc_TypeError,
1515 "dialect name must be a string");
1516 return NULL;
1517 }
1518 if (PyUnicode_READY(name_obj) == -1)
1519 return NULL;
1520 dialect = _call_dialect(module_state, dialect_obj, kwargs);
1521 if (dialect == NULL)
1522 return NULL;
1523 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1524 Py_DECREF(dialect);
1525 return NULL;
1526 }
1527 Py_DECREF(dialect);
1528 Py_RETURN_NONE;
1529 }
1530
1531
1532 /*[clinic input]
1533 _csv.unregister_dialect
1534
1535 name: object
1536
1537 Delete the name/dialect mapping associated with a string name.
1538
1539 csv.unregister_dialect(name)
1540 [clinic start generated code]*/
1541
1542 static PyObject *
1543 _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1544 /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1545 {
1546 _csvstate *module_state = get_csv_state(module);
1547 if (PyDict_DelItem(module_state->dialects, name) < 0) {
1548 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1549 PyErr_Format(module_state->error_obj, "unknown dialect");
1550 }
1551 return NULL;
1552 }
1553 Py_RETURN_NONE;
1554 }
1555
1556 /*[clinic input]
1557 _csv.get_dialect
1558
1559 name: object
1560
1561 Return the dialect instance associated with name.
1562
1563 dialect = csv.get_dialect(name)
1564 [clinic start generated code]*/
1565
1566 static PyObject *
1567 _csv_get_dialect_impl(PyObject *module, PyObject *name)
1568 /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1569 {
1570 return get_dialect_from_registry(name, get_csv_state(module));
1571 }
1572
1573 /*[clinic input]
1574 _csv.field_size_limit
1575
1576 new_limit: object = NULL
1577
1578 Sets an upper limit on parsed fields.
1579
1580 csv.field_size_limit([limit])
1581
1582 Returns old limit. If limit is not given, no new limit is set and
1583 the old limit is returned
1584 [clinic start generated code]*/
1585
1586 static PyObject *
1587 _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1588 /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1589 {
1590 _csvstate *module_state = get_csv_state(module);
1591 long old_limit = module_state->field_limit;
1592 if (new_limit != NULL) {
1593 if (!PyLong_CheckExact(new_limit)) {
1594 PyErr_Format(PyExc_TypeError,
1595 "limit must be an integer");
1596 return NULL;
1597 }
1598 module_state->field_limit = PyLong_AsLong(new_limit);
1599 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1600 module_state->field_limit = old_limit;
1601 return NULL;
1602 }
1603 }
1604 return PyLong_FromLong(old_limit);
1605 }
1606
1607 static PyType_Slot error_slots[] = {
1608 {0, NULL},
1609 };
1610
1611 PyType_Spec error_spec = {
1612 .name = "_csv.Error",
1613 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1614 .slots = error_slots,
1615 };
1616
1617 /*
1618 * MODULE
1619 */
1620
1621 PyDoc_STRVAR(csv_module_doc,
1622 "CSV parsing and writing.\n"
1623 "\n"
1624 "This module provides classes that assist in the reading and writing\n"
1625 "of Comma Separated Value (CSV) files, and implements the interface\n"
1626 "described by PEP 305. Although many CSV files are simple to parse,\n"
1627 "the format is not formally defined by a stable specification and\n"
1628 "is subtle enough that parsing lines of a CSV file with something\n"
1629 "like line.split(\",\") is bound to fail. The module supports three\n"
1630 "basic APIs: reading, writing, and registration of dialects.\n"
1631 "\n"
1632 "\n"
1633 "DIALECT REGISTRATION:\n"
1634 "\n"
1635 "Readers and writers support a dialect argument, which is a convenient\n"
1636 "handle on a group of settings. When the dialect argument is a string,\n"
1637 "it identifies one of the dialects previously registered with the module.\n"
1638 "If it is a class or instance, the attributes of the argument are used as\n"
1639 "the settings for the reader or writer:\n"
1640 "\n"
1641 " class excel:\n"
1642 " delimiter = ','\n"
1643 " quotechar = '\"'\n"
1644 " escapechar = None\n"
1645 " doublequote = True\n"
1646 " skipinitialspace = False\n"
1647 " lineterminator = '\\r\\n'\n"
1648 " quoting = QUOTE_MINIMAL\n"
1649 "\n"
1650 "SETTINGS:\n"
1651 "\n"
1652 " * quotechar - specifies a one-character string to use as the\n"
1653 " quoting character. It defaults to '\"'.\n"
1654 " * delimiter - specifies a one-character string to use as the\n"
1655 " field separator. It defaults to ','.\n"
1656 " * skipinitialspace - specifies how to interpret spaces which\n"
1657 " immediately follow a delimiter. It defaults to False, which\n"
1658 " means that spaces immediately following a delimiter is part\n"
1659 " of the following field.\n"
1660 " * lineterminator - specifies the character sequence which should\n"
1661 " terminate rows.\n"
1662 " * quoting - controls when quotes should be generated by the writer.\n"
1663 " It can take on any of the following module constants:\n"
1664 "\n"
1665 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1666 " field contains either the quotechar or the delimiter\n"
1667 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1668 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1669 " fields which do not parse as integers or floating point\n"
1670 " numbers.\n"
1671 " csv.QUOTE_STRINGS means that quotes are always placed around\n"
1672 " fields which are strings. Note that the Python value None\n"
1673 " is not a string.\n"
1674 " csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
1675 " that are not the Python value None.\n"
1676 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1677 " * escapechar - specifies a one-character string used to escape\n"
1678 " the delimiter when quoting is set to QUOTE_NONE.\n"
1679 " * doublequote - controls the handling of quotes inside fields. When\n"
1680 " True, two consecutive quotes are interpreted as one during read,\n"
1681 " and when writing, each quote character embedded in the data is\n"
1682 " written as two quotes\n");
1683
1684 PyDoc_STRVAR(csv_reader_doc,
1685 " csv_reader = reader(iterable [, dialect='excel']\n"
1686 " [optional keyword args])\n"
1687 " for row in csv_reader:\n"
1688 " process(row)\n"
1689 "\n"
1690 "The \"iterable\" argument can be any object that returns a line\n"
1691 "of input for each iteration, such as a file object or a list. The\n"
1692 "optional \"dialect\" parameter is discussed below. The function\n"
1693 "also accepts optional keyword arguments which override settings\n"
1694 "provided by the dialect.\n"
1695 "\n"
1696 "The returned object is an iterator. Each iteration returns a row\n"
1697 "of the CSV file (which can span multiple input lines).\n");
1698
1699 PyDoc_STRVAR(csv_writer_doc,
1700 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1701 " [optional keyword args])\n"
1702 " for row in sequence:\n"
1703 " csv_writer.writerow(row)\n"
1704 "\n"
1705 " [or]\n"
1706 "\n"
1707 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1708 " [optional keyword args])\n"
1709 " csv_writer.writerows(rows)\n"
1710 "\n"
1711 "The \"fileobj\" argument can be any object that supports the file API.\n");
1712
1713 PyDoc_STRVAR(csv_register_dialect_doc,
1714 "Create a mapping from a string name to a dialect class.\n"
1715 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1716
1717 static struct PyMethodDef csv_methods[] = {
1718 { "reader", _PyCFunction_CAST(csv_reader),
1719 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1720 { "writer", _PyCFunction_CAST(csv_writer),
1721 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1722 { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1723 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1724 _CSV_LIST_DIALECTS_METHODDEF
1725 _CSV_UNREGISTER_DIALECT_METHODDEF
1726 _CSV_GET_DIALECT_METHODDEF
1727 _CSV_FIELD_SIZE_LIMIT_METHODDEF
1728 { NULL, NULL }
1729 };
1730
1731 static int
1732 csv_exec(PyObject *module) {
1733 const StyleDesc *style;
1734 PyObject *temp;
1735 _csvstate *module_state = get_csv_state(module);
1736
1737 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1738 module_state->dialect_type = (PyTypeObject *)temp;
1739 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1740 return -1;
1741 }
1742
1743 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1744 module_state->reader_type = (PyTypeObject *)temp;
1745 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1746 return -1;
1747 }
1748
1749 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1750 module_state->writer_type = (PyTypeObject *)temp;
1751 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1752 return -1;
1753 }
1754
1755 /* Add version to the module. */
1756 if (PyModule_AddStringConstant(module, "__version__",
1757 MODULE_VERSION) == -1) {
1758 return -1;
1759 }
1760
1761 /* Set the field limit */
1762 module_state->field_limit = 128 * 1024;
1763
1764 /* Add _dialects dictionary */
1765 module_state->dialects = PyDict_New();
1766 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1767 return -1;
1768 }
1769
1770 /* Add quote styles into dictionary */
1771 for (style = quote_styles; style->name; style++) {
1772 if (PyModule_AddIntConstant(module, style->name,
1773 style->style) == -1)
1774 return -1;
1775 }
1776
1777 /* Add the CSV exception object to the module. */
1778 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1779 if (bases == NULL) {
1780 return -1;
1781 }
1782 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1783 bases);
1784 Py_DECREF(bases);
1785 if (module_state->error_obj == NULL) {
1786 return -1;
1787 }
1788 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1789 return -1;
1790 }
1791
1792 module_state->str_write = PyUnicode_InternFromString("write");
1793 if (module_state->str_write == NULL) {
1794 return -1;
1795 }
1796 return 0;
1797 }
1798
1799 static PyModuleDef_Slot csv_slots[] = {
1800 {Py_mod_exec, csv_exec},
1801 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1802 {0, NULL}
1803 };
1804
1805 static struct PyModuleDef _csvmodule = {
1806 PyModuleDef_HEAD_INIT,
1807 "_csv",
1808 csv_module_doc,
1809 sizeof(_csvstate),
1810 csv_methods,
1811 csv_slots,
1812 _csv_traverse,
1813 _csv_clear,
1814 _csv_free
1815 };
1816
1817 PyMODINIT_FUNC
1818 PyInit__csv(void)
1819 {
1820 return PyModuleDef_Init(&_csvmodule);
1821 }