1  /* csv module */
       2  
       3  /*
       4  
       5  This module provides the low-level underpinnings of a CSV reading/writing
       6  module.  Users should not use this module directly, but import the csv.py
       7  module instead.
       8  
       9  */
      10  
      11  #define MODULE_VERSION "1.0"
      12  
      13  #include "Python.h"
      14  #include "structmember.h"         // PyMemberDef
      15  #include <stdbool.h>
      16  
      17  /*[clinic input]
      18  module _csv
      19  [clinic start generated code]*/
      20  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
      21  
      22  #include "clinic/_csv.c.h"
      23  #define NOT_SET ((Py_UCS4)-1)
      24  #define EOL ((Py_UCS4)-2)
      25  
      26  
      27  typedef struct {
      28      PyObject *error_obj;   /* CSV exception */
      29      PyObject *dialects;   /* Dialect registry */
      30      PyTypeObject *dialect_type;
      31      PyTypeObject *reader_type;
      32      PyTypeObject *writer_type;
      33      long field_limit;   /* max parsed field size */
      34      PyObject *str_write;
      35  } _csvstate;
      36  
      37  static struct PyModuleDef _csvmodule;
      38  
      39  static inline _csvstate*
      40  get_csv_state(PyObject *module)
      41  {
      42      void *state = PyModule_GetState(module);
      43      assert(state != NULL);
      44      return (_csvstate *)state;
      45  }
      46  
      47  static int
      48  _csv_clear(PyObject *module)
      49  {
      50      _csvstate *module_state = PyModule_GetState(module);
      51      Py_CLEAR(module_state->error_obj);
      52      Py_CLEAR(module_state->dialects);
      53      Py_CLEAR(module_state->dialect_type);
      54      Py_CLEAR(module_state->reader_type);
      55      Py_CLEAR(module_state->writer_type);
      56      Py_CLEAR(module_state->str_write);
      57      return 0;
      58  }
      59  
      60  static int
      61  _csv_traverse(PyObject *module, visitproc visit, void *arg)
      62  {
      63      _csvstate *module_state = PyModule_GetState(module);
      64      Py_VISIT(module_state->error_obj);
      65      Py_VISIT(module_state->dialects);
      66      Py_VISIT(module_state->dialect_type);
      67      Py_VISIT(module_state->reader_type);
      68      Py_VISIT(module_state->writer_type);
      69      return 0;
      70  }
      71  
      72  static void
      73  _csv_free(void *module)
      74  {
      75     _csv_clear((PyObject *)module);
      76  }
      77  
      78  typedef enum {
      79      START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
      80      IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
      81      EAT_CRNL,AFTER_ESCAPED_CRNL
      82  } ParserState;
      83  
      84  typedef enum {
      85      QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
      86      QUOTE_STRINGS, QUOTE_NOTNULL
      87  } QuoteStyle;
      88  
      89  typedef struct {
      90      QuoteStyle style;
      91      const char *name;
      92  } StyleDesc;
      93  
      94  static const StyleDesc quote_styles[] = {
      95      { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
      96      { QUOTE_ALL,        "QUOTE_ALL" },
      97      { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
      98      { QUOTE_NONE,       "QUOTE_NONE" },
      99      { QUOTE_STRINGS,    "QUOTE_STRINGS" },
     100      { QUOTE_NOTNULL,    "QUOTE_NOTNULL" },
     101      { 0 }
     102  };
     103  
     104  typedef struct {
     105      PyObject_HEAD
     106  
     107      char doublequote;           /* is " represented by ""? */
     108      char skipinitialspace;      /* ignore spaces following delimiter? */
     109      char strict;                /* raise exception on bad CSV */
     110      int quoting;                /* style of quoting to write */
     111      Py_UCS4 delimiter;          /* field separator */
     112      Py_UCS4 quotechar;          /* quote character */
     113      Py_UCS4 escapechar;         /* escape character */
     114      PyObject *lineterminator;   /* string to write between records */
     115  
     116  } DialectObj;
     117  
     118  typedef struct {
     119      PyObject_HEAD
     120  
     121      PyObject *input_iter;   /* iterate over this for input lines */
     122  
     123      DialectObj *dialect;    /* parsing dialect */
     124  
     125      PyObject *fields;           /* field list for current record */
     126      ParserState state;          /* current CSV parse state */
     127      Py_UCS4 *field;             /* temporary buffer */
     128      Py_ssize_t field_size;      /* size of allocated buffer */
     129      Py_ssize_t field_len;       /* length of current field */
     130      int numeric_field;          /* treat field as numeric */
     131      unsigned long line_num;     /* Source-file line number */
     132  } ReaderObj;
     133  
     134  typedef struct {
     135      PyObject_HEAD
     136  
     137      PyObject *write;    /* write output lines to this file */
     138  
     139      DialectObj *dialect;    /* parsing dialect */
     140  
     141      Py_UCS4 *rec;            /* buffer for parser.join */
     142      Py_ssize_t rec_size;        /* size of allocated record */
     143      Py_ssize_t rec_len;         /* length of record */
     144      int num_fields;             /* number of fields in record */
     145  
     146      PyObject *error_obj;       /* cached error object */
     147  } WriterObj;
     148  
     149  /*
     150   * DIALECT class
     151   */
     152  
     153  static PyObject *
     154  get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
     155  {
     156      PyObject *dialect_obj;
     157  
     158      dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
     159      if (dialect_obj == NULL) {
     160          if (!PyErr_Occurred())
     161              PyErr_Format(module_state->error_obj, "unknown dialect");
     162      }
     163      else
     164          Py_INCREF(dialect_obj);
     165  
     166      return dialect_obj;
     167  }
     168  
     169  static PyObject *
     170  get_char_or_None(Py_UCS4 c)
     171  {
     172      if (c == NOT_SET) {
     173          Py_RETURN_NONE;
     174      }
     175      else
     176          return PyUnicode_FromOrdinal(c);
     177  }
     178  
     179  static PyObject *
     180  Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
     181  {
     182      return Py_XNewRef(self->lineterminator);
     183  }
     184  
     185  static PyObject *
     186  Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
     187  {
     188      return get_char_or_None(self->delimiter);
     189  }
     190  
     191  static PyObject *
     192  Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
     193  {
     194      return get_char_or_None(self->escapechar);
     195  }
     196  
     197  static PyObject *
     198  Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
     199  {
     200      return get_char_or_None(self->quotechar);
     201  }
     202  
     203  static PyObject *
     204  Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
     205  {
     206      return PyLong_FromLong(self->quoting);
     207  }
     208  
     209  static int
     210  _set_bool(const char *name, char *target, PyObject *src, bool dflt)
     211  {
     212      if (src == NULL)
     213          *target = dflt;
     214      else {
     215          int b = PyObject_IsTrue(src);
     216          if (b < 0)
     217              return -1;
     218          *target = (char)b;
     219      }
     220      return 0;
     221  }
     222  
     223  static int
     224  _set_int(const char *name, int *target, PyObject *src, int dflt)
     225  {
     226      if (src == NULL)
     227          *target = dflt;
     228      else {
     229          int value;
     230          if (!PyLong_CheckExact(src)) {
     231              PyErr_Format(PyExc_TypeError,
     232                           "\"%s\" must be an integer", name);
     233              return -1;
     234          }
     235          value = _PyLong_AsInt(src);
     236          if (value == -1 && PyErr_Occurred()) {
     237              return -1;
     238          }
     239          *target = value;
     240      }
     241      return 0;
     242  }
     243  
     244  static int
     245  _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
     246  {
     247      if (src == NULL) {
     248          *target = dflt;
     249      }
     250      else {
     251          *target = NOT_SET;
     252          if (src != Py_None) {
     253              if (!PyUnicode_Check(src)) {
     254                  PyErr_Format(PyExc_TypeError,
     255                      "\"%s\" must be string or None, not %.200s", name,
     256                      Py_TYPE(src)->tp_name);
     257                  return -1;
     258              }
     259              Py_ssize_t len = PyUnicode_GetLength(src);
     260              if (len < 0) {
     261                  return -1;
     262              }
     263              if (len != 1) {
     264                  PyErr_Format(PyExc_TypeError,
     265                      "\"%s\" must be a 1-character string",
     266                      name);
     267                  return -1;
     268              }
     269              /* PyUnicode_READY() is called in PyUnicode_GetLength() */
     270              *target = PyUnicode_READ_CHAR(src, 0);
     271          }
     272      }
     273      return 0;
     274  }
     275  
     276  static int
     277  _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
     278  {
     279      if (src == NULL) {
     280          *target = dflt;
     281      }
     282      else {
     283          if (!PyUnicode_Check(src)) {
     284              PyErr_Format(PyExc_TypeError,
     285                           "\"%s\" must be string, not %.200s", name,
     286                           Py_TYPE(src)->tp_name);
     287                  return -1;
     288          }
     289          Py_ssize_t len = PyUnicode_GetLength(src);
     290          if (len < 0) {
     291              return -1;
     292          }
     293          if (len != 1) {
     294              PyErr_Format(PyExc_TypeError,
     295                           "\"%s\" must be a 1-character string",
     296                           name);
     297              return -1;
     298          }
     299          /* PyUnicode_READY() is called in PyUnicode_GetLength() */
     300          *target = PyUnicode_READ_CHAR(src, 0);
     301      }
     302      return 0;
     303  }
     304  
     305  static int
     306  _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
     307  {
     308      if (src == NULL)
     309          *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
     310      else {
     311          if (src == Py_None)
     312              *target = NULL;
     313          else if (!PyUnicode_Check(src)) {
     314              PyErr_Format(PyExc_TypeError,
     315                           "\"%s\" must be a string", name);
     316              return -1;
     317          }
     318          else {
     319              if (PyUnicode_READY(src) == -1)
     320                  return -1;
     321              Py_XSETREF(*target, Py_NewRef(src));
     322          }
     323      }
     324      return 0;
     325  }
     326  
     327  static int
     328  dialect_check_quoting(int quoting)
     329  {
     330      const StyleDesc *qs;
     331  
     332      for (qs = quote_styles; qs->name; qs++) {
     333          if ((int)qs->style == quoting)
     334              return 0;
     335      }
     336      PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
     337      return -1;
     338  }
     339  
     340  #define D_OFF(x) offsetof(DialectObj, x)
     341  
     342  static struct PyMemberDef Dialect_memberlist[] = {
     343      { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
     344      { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
     345      { "strict",             T_BOOL, D_OFF(strict), READONLY },
     346      { NULL }
     347  };
     348  
     349  static PyGetSetDef Dialect_getsetlist[] = {
     350      { "delimiter",          (getter)Dialect_get_delimiter},
     351      { "escapechar",             (getter)Dialect_get_escapechar},
     352      { "lineterminator",         (getter)Dialect_get_lineterminator},
     353      { "quotechar",              (getter)Dialect_get_quotechar},
     354      { "quoting",                (getter)Dialect_get_quoting},
     355      {NULL},
     356  };
     357  
     358  static void
     359  Dialect_dealloc(DialectObj *self)
     360  {
     361      PyTypeObject *tp = Py_TYPE(self);
     362      PyObject_GC_UnTrack(self);
     363      tp->tp_clear((PyObject *)self);
     364      PyObject_GC_Del(self);
     365      Py_DECREF(tp);
     366  }
     367  
     368  static char *dialect_kws[] = {
     369      "dialect",
     370      "delimiter",
     371      "doublequote",
     372      "escapechar",
     373      "lineterminator",
     374      "quotechar",
     375      "quoting",
     376      "skipinitialspace",
     377      "strict",
     378      NULL
     379  };
     380  
     381  static _csvstate *
     382  _csv_state_from_type(PyTypeObject *type, const char *name)
     383  {
     384      PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
     385      if (module == NULL) {
     386          return NULL;
     387      }
     388      _csvstate *module_state = PyModule_GetState(module);
     389      if (module_state == NULL) {
     390          PyErr_Format(PyExc_SystemError,
     391                       "%s: No _csv module state found", name);
     392          return NULL;
     393      }
     394      return module_state;
     395  }
     396  
     397  static PyObject *
     398  dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
     399  {
     400      DialectObj *self;
     401      PyObject *ret = NULL;
     402      PyObject *dialect = NULL;
     403      PyObject *delimiter = NULL;
     404      PyObject *doublequote = NULL;
     405      PyObject *escapechar = NULL;
     406      PyObject *lineterminator = NULL;
     407      PyObject *quotechar = NULL;
     408      PyObject *quoting = NULL;
     409      PyObject *skipinitialspace = NULL;
     410      PyObject *strict = NULL;
     411  
     412      if (!PyArg_ParseTupleAndKeywords(args, kwargs,
     413                                       "|OOOOOOOOO", dialect_kws,
     414                                       &dialect,
     415                                       &delimiter,
     416                                       &doublequote,
     417                                       &escapechar,
     418                                       &lineterminator,
     419                                       &quotechar,
     420                                       &quoting,
     421                                       &skipinitialspace,
     422                                       &strict))
     423          return NULL;
     424  
     425      _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
     426      if (module_state == NULL) {
     427          return NULL;
     428      }
     429  
     430      if (dialect != NULL) {
     431          if (PyUnicode_Check(dialect)) {
     432              dialect = get_dialect_from_registry(dialect, module_state);
     433              if (dialect == NULL)
     434                  return NULL;
     435          }
     436          else
     437              Py_INCREF(dialect);
     438          /* Can we reuse this instance? */
     439          if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
     440              delimiter == NULL &&
     441              doublequote == NULL &&
     442              escapechar == NULL &&
     443              lineterminator == NULL &&
     444              quotechar == NULL &&
     445              quoting == NULL &&
     446              skipinitialspace == NULL &&
     447              strict == NULL)
     448              return dialect;
     449      }
     450  
     451      self = (DialectObj *)type->tp_alloc(type, 0);
     452      if (self == NULL) {
     453          Py_CLEAR(dialect);
     454          return NULL;
     455      }
     456      self->lineterminator = NULL;
     457  
     458      Py_XINCREF(delimiter);
     459      Py_XINCREF(doublequote);
     460      Py_XINCREF(escapechar);
     461      Py_XINCREF(lineterminator);
     462      Py_XINCREF(quotechar);
     463      Py_XINCREF(quoting);
     464      Py_XINCREF(skipinitialspace);
     465      Py_XINCREF(strict);
     466      if (dialect != NULL) {
     467  #define DIALECT_GETATTR(v, n)                            \
     468          do {                                             \
     469              if (v == NULL) {                             \
     470                  v = PyObject_GetAttrString(dialect, n);  \
     471                  if (v == NULL)                           \
     472                      PyErr_Clear();                       \
     473              }                                            \
     474          } while (0)
     475          DIALECT_GETATTR(delimiter, "delimiter");
     476          DIALECT_GETATTR(doublequote, "doublequote");
     477          DIALECT_GETATTR(escapechar, "escapechar");
     478          DIALECT_GETATTR(lineterminator, "lineterminator");
     479          DIALECT_GETATTR(quotechar, "quotechar");
     480          DIALECT_GETATTR(quoting, "quoting");
     481          DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
     482          DIALECT_GETATTR(strict, "strict");
     483      }
     484  
     485      /* check types and convert to C values */
     486  #define DIASET(meth, name, target, src, dflt) \
     487      if (meth(name, target, src, dflt)) \
     488          goto err
     489      DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
     490      DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
     491      DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
     492      DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
     493      DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
     494      DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
     495      DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
     496      DIASET(_set_bool, "strict", &self->strict, strict, false);
     497  
     498      /* validate options */
     499      if (dialect_check_quoting(self->quoting))
     500          goto err;
     501      if (self->delimiter == NOT_SET) {
     502          PyErr_SetString(PyExc_TypeError,
     503                          "\"delimiter\" must be a 1-character string");
     504          goto err;
     505      }
     506      if (quotechar == Py_None && quoting == NULL)
     507          self->quoting = QUOTE_NONE;
     508      if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
     509          PyErr_SetString(PyExc_TypeError,
     510                          "quotechar must be set if quoting enabled");
     511          goto err;
     512      }
     513      if (self->lineterminator == NULL) {
     514          PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
     515          goto err;
     516      }
     517  
     518      ret = Py_NewRef(self);
     519  err:
     520      Py_CLEAR(self);
     521      Py_CLEAR(dialect);
     522      Py_CLEAR(delimiter);
     523      Py_CLEAR(doublequote);
     524      Py_CLEAR(escapechar);
     525      Py_CLEAR(lineterminator);
     526      Py_CLEAR(quotechar);
     527      Py_CLEAR(quoting);
     528      Py_CLEAR(skipinitialspace);
     529      Py_CLEAR(strict);
     530      return ret;
     531  }
     532  
     533  /* Since dialect is now a heap type, it inherits pickling method for
     534   * protocol 0 and 1 from object, therefore it needs to be overridden */
     535  
     536  PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
     537  
     538  static PyObject *
     539  Dialect_reduce(PyObject *self, PyObject *args) {
     540      PyErr_Format(PyExc_TypeError,
     541          "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
     542      return NULL;
     543  }
     544  
     545  static struct PyMethodDef dialect_methods[] = {
     546      {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
     547      {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
     548      {NULL, NULL}
     549  };
     550  
     551  PyDoc_STRVAR(Dialect_Type_doc,
     552  "CSV dialect\n"
     553  "\n"
     554  "The Dialect type records CSV parsing and generation options.\n");
     555  
     556  static int
     557  Dialect_clear(DialectObj *self)
     558  {
     559      Py_CLEAR(self->lineterminator);
     560      return 0;
     561  }
     562  
     563  static int
     564  Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
     565  {
     566      Py_VISIT(self->lineterminator);
     567      Py_VISIT(Py_TYPE(self));
     568      return 0;
     569  }
     570  
     571  static PyType_Slot Dialect_Type_slots[] = {
     572      {Py_tp_doc, (char*)Dialect_Type_doc},
     573      {Py_tp_members, Dialect_memberlist},
     574      {Py_tp_getset, Dialect_getsetlist},
     575      {Py_tp_new, dialect_new},
     576      {Py_tp_methods, dialect_methods},
     577      {Py_tp_dealloc, Dialect_dealloc},
     578      {Py_tp_clear, Dialect_clear},
     579      {Py_tp_traverse, Dialect_traverse},
     580      {0, NULL}
     581  };
     582  
     583  PyType_Spec Dialect_Type_spec = {
     584      .name = "_csv.Dialect",
     585      .basicsize = sizeof(DialectObj),
     586      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
     587                Py_TPFLAGS_IMMUTABLETYPE),
     588      .slots = Dialect_Type_slots,
     589  };
     590  
     591  
     592  /*
     593   * Return an instance of the dialect type, given a Python instance or kwarg
     594   * description of the dialect
     595   */
     596  static PyObject *
     597  _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
     598  {
     599      PyObject *type = (PyObject *)module_state->dialect_type;
     600      if (dialect_inst) {
     601          return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
     602      }
     603      else {
     604          return PyObject_VectorcallDict(type, NULL, 0, kwargs);
     605      }
     606  }
     607  
     608  /*
     609   * READER
     610   */
     611  static int
     612  parse_save_field(ReaderObj *self)
     613  {
     614      PyObject *field;
     615  
     616      field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
     617                                        (void *) self->field, self->field_len);
     618      if (field == NULL)
     619          return -1;
     620      self->field_len = 0;
     621      if (self->numeric_field) {
     622          PyObject *tmp;
     623  
     624          self->numeric_field = 0;
     625          tmp = PyNumber_Float(field);
     626          Py_DECREF(field);
     627          if (tmp == NULL)
     628              return -1;
     629          field = tmp;
     630      }
     631      if (PyList_Append(self->fields, field) < 0) {
     632          Py_DECREF(field);
     633          return -1;
     634      }
     635      Py_DECREF(field);
     636      return 0;
     637  }
     638  
     639  static int
     640  parse_grow_buff(ReaderObj *self)
     641  {
     642      assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
     643  
     644      Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
     645      Py_UCS4 *field_new = self->field;
     646      PyMem_Resize(field_new, Py_UCS4, field_size_new);
     647      if (field_new == NULL) {
     648          PyErr_NoMemory();
     649          return 0;
     650      }
     651      self->field = field_new;
     652      self->field_size = field_size_new;
     653      return 1;
     654  }
     655  
     656  static int
     657  parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
     658  {
     659      if (self->field_len >= module_state->field_limit) {
     660          PyErr_Format(module_state->error_obj,
     661                       "field larger than field limit (%ld)",
     662                       module_state->field_limit);
     663          return -1;
     664      }
     665      if (self->field_len == self->field_size && !parse_grow_buff(self))
     666          return -1;
     667      self->field[self->field_len++] = c;
     668      return 0;
     669  }
     670  
     671  static int
     672  parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
     673  {
     674      DialectObj *dialect = self->dialect;
     675  
     676      switch (self->state) {
     677      case START_RECORD:
     678          /* start of record */
     679          if (c == EOL)
     680              /* empty line - return [] */
     681              break;
     682          else if (c == '\n' || c == '\r') {
     683              self->state = EAT_CRNL;
     684              break;
     685          }
     686          /* normal character - handle as START_FIELD */
     687          self->state = START_FIELD;
     688          /* fallthru */
     689      case START_FIELD:
     690          /* expecting field */
     691          if (c == '\n' || c == '\r' || c == EOL) {
     692              /* save empty field - return [fields] */
     693              if (parse_save_field(self) < 0)
     694                  return -1;
     695              self->state = (c == EOL ? START_RECORD : EAT_CRNL);
     696          }
     697          else if (c == dialect->quotechar &&
     698                   dialect->quoting != QUOTE_NONE) {
     699              /* start quoted field */
     700              self->state = IN_QUOTED_FIELD;
     701          }
     702          else if (c == dialect->escapechar) {
     703              /* possible escaped character */
     704              self->state = ESCAPED_CHAR;
     705          }
     706          else if (c == ' ' && dialect->skipinitialspace)
     707              /* ignore spaces at start of field */
     708              ;
     709          else if (c == dialect->delimiter) {
     710              /* save empty field */
     711              if (parse_save_field(self) < 0)
     712                  return -1;
     713          }
     714          else {
     715              /* begin new unquoted field */
     716              if (dialect->quoting == QUOTE_NONNUMERIC)
     717                  self->numeric_field = 1;
     718              if (parse_add_char(self, module_state, c) < 0)
     719                  return -1;
     720              self->state = IN_FIELD;
     721          }
     722          break;
     723  
     724      case ESCAPED_CHAR:
     725          if (c == '\n' || c=='\r') {
     726              if (parse_add_char(self, module_state, c) < 0)
     727                  return -1;
     728              self->state = AFTER_ESCAPED_CRNL;
     729              break;
     730          }
     731          if (c == EOL)
     732              c = '\n';
     733          if (parse_add_char(self, module_state, c) < 0)
     734              return -1;
     735          self->state = IN_FIELD;
     736          break;
     737  
     738      case AFTER_ESCAPED_CRNL:
     739          if (c == EOL)
     740              break;
     741          /*fallthru*/
     742  
     743      case IN_FIELD:
     744          /* in unquoted field */
     745          if (c == '\n' || c == '\r' || c == EOL) {
     746              /* end of line - return [fields] */
     747              if (parse_save_field(self) < 0)
     748                  return -1;
     749              self->state = (c == EOL ? START_RECORD : EAT_CRNL);
     750          }
     751          else if (c == dialect->escapechar) {
     752              /* possible escaped character */
     753              self->state = ESCAPED_CHAR;
     754          }
     755          else if (c == dialect->delimiter) {
     756              /* save field - wait for new field */
     757              if (parse_save_field(self) < 0)
     758                  return -1;
     759              self->state = START_FIELD;
     760          }
     761          else {
     762              /* normal character - save in field */
     763              if (parse_add_char(self, module_state, c) < 0)
     764                  return -1;
     765          }
     766          break;
     767  
     768      case IN_QUOTED_FIELD:
     769          /* in quoted field */
     770          if (c == EOL)
     771              ;
     772          else if (c == dialect->escapechar) {
     773              /* Possible escape character */
     774              self->state = ESCAPE_IN_QUOTED_FIELD;
     775          }
     776          else if (c == dialect->quotechar &&
     777                   dialect->quoting != QUOTE_NONE) {
     778              if (dialect->doublequote) {
     779                  /* doublequote; " represented by "" */
     780                  self->state = QUOTE_IN_QUOTED_FIELD;
     781              }
     782              else {
     783                  /* end of quote part of field */
     784                  self->state = IN_FIELD;
     785              }
     786          }
     787          else {
     788              /* normal character - save in field */
     789              if (parse_add_char(self, module_state, c) < 0)
     790                  return -1;
     791          }
     792          break;
     793  
     794      case ESCAPE_IN_QUOTED_FIELD:
     795          if (c == EOL)
     796              c = '\n';
     797          if (parse_add_char(self, module_state, c) < 0)
     798              return -1;
     799          self->state = IN_QUOTED_FIELD;
     800          break;
     801  
     802      case QUOTE_IN_QUOTED_FIELD:
     803          /* doublequote - seen a quote in a quoted field */
     804          if (dialect->quoting != QUOTE_NONE &&
     805              c == dialect->quotechar) {
     806              /* save "" as " */
     807              if (parse_add_char(self, module_state, c) < 0)
     808                  return -1;
     809              self->state = IN_QUOTED_FIELD;
     810          }
     811          else if (c == dialect->delimiter) {
     812              /* save field - wait for new field */
     813              if (parse_save_field(self) < 0)
     814                  return -1;
     815              self->state = START_FIELD;
     816          }
     817          else if (c == '\n' || c == '\r' || c == EOL) {
     818              /* end of line - return [fields] */
     819              if (parse_save_field(self) < 0)
     820                  return -1;
     821              self->state = (c == EOL ? START_RECORD : EAT_CRNL);
     822          }
     823          else if (!dialect->strict) {
     824              if (parse_add_char(self, module_state, c) < 0)
     825                  return -1;
     826              self->state = IN_FIELD;
     827          }
     828          else {
     829              /* illegal */
     830              PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
     831                              dialect->delimiter,
     832                              dialect->quotechar);
     833              return -1;
     834          }
     835          break;
     836  
     837      case EAT_CRNL:
     838          if (c == '\n' || c == '\r')
     839              ;
     840          else if (c == EOL)
     841              self->state = START_RECORD;
     842          else {
     843              PyErr_Format(module_state->error_obj,
     844                           "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
     845              return -1;
     846          }
     847          break;
     848  
     849      }
     850      return 0;
     851  }
     852  
     853  static int
     854  parse_reset(ReaderObj *self)
     855  {
     856      Py_XSETREF(self->fields, PyList_New(0));
     857      if (self->fields == NULL)
     858          return -1;
     859      self->field_len = 0;
     860      self->state = START_RECORD;
     861      self->numeric_field = 0;
     862      return 0;
     863  }
     864  
     865  static PyObject *
     866  Reader_iternext(ReaderObj *self)
     867  {
     868      PyObject *fields = NULL;
     869      Py_UCS4 c;
     870      Py_ssize_t pos, linelen;
     871      int kind;
     872      const void *data;
     873      PyObject *lineobj;
     874  
     875      _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
     876                                                     "Reader.__next__");
     877      if (module_state == NULL) {
     878          return NULL;
     879      }
     880  
     881      if (parse_reset(self) < 0)
     882          return NULL;
     883      do {
     884          lineobj = PyIter_Next(self->input_iter);
     885          if (lineobj == NULL) {
     886              /* End of input OR exception */
     887              if (!PyErr_Occurred() && (self->field_len != 0 ||
     888                                        self->state == IN_QUOTED_FIELD)) {
     889                  if (self->dialect->strict)
     890                      PyErr_SetString(module_state->error_obj,
     891                                      "unexpected end of data");
     892                  else if (parse_save_field(self) >= 0)
     893                      break;
     894              }
     895              return NULL;
     896          }
     897          if (!PyUnicode_Check(lineobj)) {
     898              PyErr_Format(module_state->error_obj,
     899                           "iterator should return strings, "
     900                           "not %.200s "
     901                           "(the file should be opened in text mode)",
     902                           Py_TYPE(lineobj)->tp_name
     903                  );
     904              Py_DECREF(lineobj);
     905              return NULL;
     906          }
     907          if (PyUnicode_READY(lineobj) == -1) {
     908              Py_DECREF(lineobj);
     909              return NULL;
     910          }
     911          ++self->line_num;
     912          kind = PyUnicode_KIND(lineobj);
     913          data = PyUnicode_DATA(lineobj);
     914          pos = 0;
     915          linelen = PyUnicode_GET_LENGTH(lineobj);
     916          while (linelen--) {
     917              c = PyUnicode_READ(kind, data, pos);
     918              if (parse_process_char(self, module_state, c) < 0) {
     919                  Py_DECREF(lineobj);
     920                  goto err;
     921              }
     922              pos++;
     923          }
     924          Py_DECREF(lineobj);
     925          if (parse_process_char(self, module_state, EOL) < 0)
     926              goto err;
     927      } while (self->state != START_RECORD);
     928  
     929      fields = self->fields;
     930      self->fields = NULL;
     931  err:
     932      return fields;
     933  }
     934  
     935  static void
     936  Reader_dealloc(ReaderObj *self)
     937  {
     938      PyTypeObject *tp = Py_TYPE(self);
     939      PyObject_GC_UnTrack(self);
     940      tp->tp_clear((PyObject *)self);
     941      if (self->field != NULL) {
     942          PyMem_Free(self->field);
     943          self->field = NULL;
     944      }
     945      PyObject_GC_Del(self);
     946      Py_DECREF(tp);
     947  }
     948  
     949  static int
     950  Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
     951  {
     952      Py_VISIT(self->dialect);
     953      Py_VISIT(self->input_iter);
     954      Py_VISIT(self->fields);
     955      Py_VISIT(Py_TYPE(self));
     956      return 0;
     957  }
     958  
     959  static int
     960  Reader_clear(ReaderObj *self)
     961  {
     962      Py_CLEAR(self->dialect);
     963      Py_CLEAR(self->input_iter);
     964      Py_CLEAR(self->fields);
     965      return 0;
     966  }
     967  
     968  PyDoc_STRVAR(Reader_Type_doc,
     969  "CSV reader\n"
     970  "\n"
     971  "Reader objects are responsible for reading and parsing tabular data\n"
     972  "in CSV format.\n"
     973  );
     974  
     975  static struct PyMethodDef Reader_methods[] = {
     976      { NULL, NULL }
     977  };
     978  #define R_OFF(x) offsetof(ReaderObj, x)
     979  
     980  static struct PyMemberDef Reader_memberlist[] = {
     981      { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
     982      { "line_num", T_ULONG, R_OFF(line_num), READONLY },
     983      { NULL }
     984  };
     985  
     986  
     987  static PyType_Slot Reader_Type_slots[] = {
     988      {Py_tp_doc, (char*)Reader_Type_doc},
     989      {Py_tp_traverse, Reader_traverse},
     990      {Py_tp_iter, PyObject_SelfIter},
     991      {Py_tp_iternext, Reader_iternext},
     992      {Py_tp_methods, Reader_methods},
     993      {Py_tp_members, Reader_memberlist},
     994      {Py_tp_clear, Reader_clear},
     995      {Py_tp_dealloc, Reader_dealloc},
     996      {0, NULL}
     997  };
     998  
     999  PyType_Spec Reader_Type_spec = {
    1000      .name = "_csv.reader",
    1001      .basicsize = sizeof(ReaderObj),
    1002      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
    1003                Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
    1004      .slots = Reader_Type_slots
    1005  };
    1006  
    1007  
    1008  static PyObject *
    1009  csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
    1010  {
    1011      PyObject * iterator, * dialect = NULL;
    1012      _csvstate *module_state = get_csv_state(module);
    1013      ReaderObj * self = PyObject_GC_New(
    1014          ReaderObj,
    1015          module_state->reader_type);
    1016  
    1017      if (!self)
    1018          return NULL;
    1019  
    1020      self->dialect = NULL;
    1021      self->fields = NULL;
    1022      self->input_iter = NULL;
    1023      self->field = NULL;
    1024      self->field_size = 0;
    1025      self->line_num = 0;
    1026  
    1027      if (parse_reset(self) < 0) {
    1028          Py_DECREF(self);
    1029          return NULL;
    1030      }
    1031  
    1032      if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
    1033          Py_DECREF(self);
    1034          return NULL;
    1035      }
    1036      self->input_iter = PyObject_GetIter(iterator);
    1037      if (self->input_iter == NULL) {
    1038          Py_DECREF(self);
    1039          return NULL;
    1040      }
    1041      self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
    1042                                                  keyword_args);
    1043      if (self->dialect == NULL) {
    1044          Py_DECREF(self);
    1045          return NULL;
    1046      }
    1047  
    1048      PyObject_GC_Track(self);
    1049      return (PyObject *)self;
    1050  }
    1051  
    1052  /*
    1053   * WRITER
    1054   */
    1055  /* ---------------------------------------------------------------- */
    1056  static void
    1057  join_reset(WriterObj *self)
    1058  {
    1059      self->rec_len = 0;
    1060      self->num_fields = 0;
    1061  }
    1062  
    1063  #define MEM_INCR 32768
    1064  
    1065  /* Calculate new record length or append field to record.  Return new
    1066   * record length.
    1067   */
    1068  static Py_ssize_t
    1069  join_append_data(WriterObj *self, int field_kind, const void *field_data,
    1070                   Py_ssize_t field_len, int *quoted,
    1071                   int copy_phase)
    1072  {
    1073      DialectObj *dialect = self->dialect;
    1074      int i;
    1075      Py_ssize_t rec_len;
    1076  
    1077  #define INCLEN \
    1078      do {\
    1079          if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
    1080              goto overflow; \
    1081          } \
    1082          rec_len++; \
    1083      } while(0)
    1084  
    1085  #define ADDCH(c)                                \
    1086      do {\
    1087          if (copy_phase) \
    1088              self->rec[rec_len] = c;\
    1089          INCLEN;\
    1090      } while(0)
    1091  
    1092      rec_len = self->rec_len;
    1093  
    1094      /* If this is not the first field we need a field separator */
    1095      if (self->num_fields > 0)
    1096          ADDCH(dialect->delimiter);
    1097  
    1098      /* Handle preceding quote */
    1099      if (copy_phase && *quoted)
    1100          ADDCH(dialect->quotechar);
    1101  
    1102      /* Copy/count field data */
    1103      /* If field is null just pass over */
    1104      for (i = 0; field_data && (i < field_len); i++) {
    1105          Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
    1106          int want_escape = 0;
    1107  
    1108          if (c == dialect->delimiter ||
    1109              c == dialect->escapechar ||
    1110              c == dialect->quotechar  ||
    1111              PyUnicode_FindChar(
    1112                  dialect->lineterminator, c, 0,
    1113                  PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
    1114              if (dialect->quoting == QUOTE_NONE)
    1115                  want_escape = 1;
    1116              else {
    1117                  if (c == dialect->quotechar) {
    1118                      if (dialect->doublequote)
    1119                          ADDCH(dialect->quotechar);
    1120                      else
    1121                          want_escape = 1;
    1122                  }
    1123                  else if (c == dialect->escapechar) {
    1124                      want_escape = 1;
    1125                  }
    1126                  if (!want_escape)
    1127                      *quoted = 1;
    1128              }
    1129              if (want_escape) {
    1130                  if (dialect->escapechar == NOT_SET) {
    1131                      PyErr_Format(self->error_obj,
    1132                                   "need to escape, but no escapechar set");
    1133                      return -1;
    1134                  }
    1135                  ADDCH(dialect->escapechar);
    1136              }
    1137          }
    1138          /* Copy field character into record buffer.
    1139           */
    1140          ADDCH(c);
    1141      }
    1142  
    1143      if (*quoted) {
    1144          if (copy_phase)
    1145              ADDCH(dialect->quotechar);
    1146          else {
    1147              INCLEN; /* starting quote */
    1148              INCLEN; /* ending quote */
    1149          }
    1150      }
    1151      return rec_len;
    1152  
    1153    overflow:
    1154      PyErr_NoMemory();
    1155      return -1;
    1156  #undef ADDCH
    1157  #undef INCLEN
    1158  }
    1159  
    1160  static int
    1161  join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
    1162  {
    1163      assert(rec_len >= 0);
    1164  
    1165      if (rec_len > self->rec_size) {
    1166          size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
    1167          Py_UCS4 *rec_new = self->rec;
    1168          PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
    1169          if (rec_new == NULL) {
    1170              PyErr_NoMemory();
    1171              return 0;
    1172          }
    1173          self->rec = rec_new;
    1174          self->rec_size = (Py_ssize_t)rec_size_new;
    1175      }
    1176      return 1;
    1177  }
    1178  
    1179  static int
    1180  join_append(WriterObj *self, PyObject *field, int quoted)
    1181  {
    1182      int field_kind = -1;
    1183      const void *field_data = NULL;
    1184      Py_ssize_t field_len = 0;
    1185      Py_ssize_t rec_len;
    1186  
    1187      if (field != NULL) {
    1188          if (PyUnicode_READY(field) == -1)
    1189              return 0;
    1190          field_kind = PyUnicode_KIND(field);
    1191          field_data = PyUnicode_DATA(field);
    1192          field_len = PyUnicode_GET_LENGTH(field);
    1193      }
    1194      rec_len = join_append_data(self, field_kind, field_data, field_len,
    1195                                 &quoted, 0);
    1196      if (rec_len < 0)
    1197          return 0;
    1198  
    1199      /* grow record buffer if necessary */
    1200      if (!join_check_rec_size(self, rec_len))
    1201          return 0;
    1202  
    1203      self->rec_len = join_append_data(self, field_kind, field_data, field_len,
    1204                                       &quoted, 1);
    1205      self->num_fields++;
    1206  
    1207      return 1;
    1208  }
    1209  
    1210  static int
    1211  join_append_lineterminator(WriterObj *self)
    1212  {
    1213      Py_ssize_t terminator_len, i;
    1214      int term_kind;
    1215      const void *term_data;
    1216  
    1217      terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
    1218      if (terminator_len == -1)
    1219          return 0;
    1220  
    1221      /* grow record buffer if necessary */
    1222      if (!join_check_rec_size(self, self->rec_len + terminator_len))
    1223          return 0;
    1224  
    1225      term_kind = PyUnicode_KIND(self->dialect->lineterminator);
    1226      term_data = PyUnicode_DATA(self->dialect->lineterminator);
    1227      for (i = 0; i < terminator_len; i++)
    1228          self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
    1229      self->rec_len += terminator_len;
    1230  
    1231      return 1;
    1232  }
    1233  
    1234  PyDoc_STRVAR(csv_writerow_doc,
    1235  "writerow(iterable)\n"
    1236  "\n"
    1237  "Construct and write a CSV record from an iterable of fields.  Non-string\n"
    1238  "elements will be converted to string.");
    1239  
    1240  static PyObject *
    1241  csv_writerow(WriterObj *self, PyObject *seq)
    1242  {
    1243      DialectObj *dialect = self->dialect;
    1244      PyObject *iter, *field, *line, *result;
    1245  
    1246      iter = PyObject_GetIter(seq);
    1247      if (iter == NULL) {
    1248          if (PyErr_ExceptionMatches(PyExc_TypeError)) {
    1249              PyErr_Format(self->error_obj,
    1250                           "iterable expected, not %.200s",
    1251                           Py_TYPE(seq)->tp_name);
    1252          }
    1253          return NULL;
    1254      }
    1255  
    1256      /* Join all fields in internal buffer.
    1257       */
    1258      join_reset(self);
    1259      while ((field = PyIter_Next(iter))) {
    1260          int append_ok;
    1261          int quoted;
    1262  
    1263          switch (dialect->quoting) {
    1264          case QUOTE_NONNUMERIC:
    1265              quoted = !PyNumber_Check(field);
    1266              break;
    1267          case QUOTE_ALL:
    1268              quoted = 1;
    1269              break;
    1270          case QUOTE_STRINGS:
    1271              quoted = PyUnicode_Check(field);
    1272              break;
    1273          case QUOTE_NOTNULL:
    1274              quoted = field != Py_None;
    1275              break;
    1276          default:
    1277              quoted = 0;
    1278              break;
    1279          }
    1280  
    1281          if (PyUnicode_Check(field)) {
    1282              append_ok = join_append(self, field, quoted);
    1283              Py_DECREF(field);
    1284          }
    1285          else if (field == Py_None) {
    1286              append_ok = join_append(self, NULL, quoted);
    1287              Py_DECREF(field);
    1288          }
    1289          else {
    1290              PyObject *str;
    1291  
    1292              str = PyObject_Str(field);
    1293              Py_DECREF(field);
    1294              if (str == NULL) {
    1295                  Py_DECREF(iter);
    1296                  return NULL;
    1297              }
    1298              append_ok = join_append(self, str, quoted);
    1299              Py_DECREF(str);
    1300          }
    1301          if (!append_ok) {
    1302              Py_DECREF(iter);
    1303              return NULL;
    1304          }
    1305      }
    1306      Py_DECREF(iter);
    1307      if (PyErr_Occurred())
    1308          return NULL;
    1309  
    1310      if (self->num_fields > 0 && self->rec_len == 0) {
    1311          if (dialect->quoting == QUOTE_NONE) {
    1312              PyErr_Format(self->error_obj,
    1313                  "single empty field record must be quoted");
    1314              return NULL;
    1315          }
    1316          self->num_fields--;
    1317          if (!join_append(self, NULL, 1))
    1318              return NULL;
    1319      }
    1320  
    1321      /* Add line terminator.
    1322       */
    1323      if (!join_append_lineterminator(self)) {
    1324          return NULL;
    1325      }
    1326  
    1327      line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
    1328                                       (void *) self->rec, self->rec_len);
    1329      if (line == NULL) {
    1330          return NULL;
    1331      }
    1332      result = PyObject_CallOneArg(self->write, line);
    1333      Py_DECREF(line);
    1334      return result;
    1335  }
    1336  
    1337  PyDoc_STRVAR(csv_writerows_doc,
    1338  "writerows(iterable of iterables)\n"
    1339  "\n"
    1340  "Construct and write a series of iterables to a csv file.  Non-string\n"
    1341  "elements will be converted to string.");
    1342  
    1343  static PyObject *
    1344  csv_writerows(WriterObj *self, PyObject *seqseq)
    1345  {
    1346      PyObject *row_iter, *row_obj, *result;
    1347  
    1348      row_iter = PyObject_GetIter(seqseq);
    1349      if (row_iter == NULL) {
    1350          return NULL;
    1351      }
    1352      while ((row_obj = PyIter_Next(row_iter))) {
    1353          result = csv_writerow(self, row_obj);
    1354          Py_DECREF(row_obj);
    1355          if (!result) {
    1356              Py_DECREF(row_iter);
    1357              return NULL;
    1358          }
    1359          else
    1360               Py_DECREF(result);
    1361      }
    1362      Py_DECREF(row_iter);
    1363      if (PyErr_Occurred())
    1364          return NULL;
    1365      Py_RETURN_NONE;
    1366  }
    1367  
    1368  static struct PyMethodDef Writer_methods[] = {
    1369      { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
    1370      { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
    1371      { NULL, NULL }
    1372  };
    1373  
    1374  #define W_OFF(x) offsetof(WriterObj, x)
    1375  
    1376  static struct PyMemberDef Writer_memberlist[] = {
    1377      { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
    1378      { NULL }
    1379  };
    1380  
    1381  static int
    1382  Writer_traverse(WriterObj *self, visitproc visit, void *arg)
    1383  {
    1384      Py_VISIT(self->dialect);
    1385      Py_VISIT(self->write);
    1386      Py_VISIT(self->error_obj);
    1387      Py_VISIT(Py_TYPE(self));
    1388      return 0;
    1389  }
    1390  
    1391  static int
    1392  Writer_clear(WriterObj *self)
    1393  {
    1394      Py_CLEAR(self->dialect);
    1395      Py_CLEAR(self->write);
    1396      Py_CLEAR(self->error_obj);
    1397      return 0;
    1398  }
    1399  
    1400  static void
    1401  Writer_dealloc(WriterObj *self)
    1402  {
    1403      PyTypeObject *tp = Py_TYPE(self);
    1404      PyObject_GC_UnTrack(self);
    1405      tp->tp_clear((PyObject *)self);
    1406      if (self->rec != NULL) {
    1407          PyMem_Free(self->rec);
    1408      }
    1409      PyObject_GC_Del(self);
    1410      Py_DECREF(tp);
    1411  }
    1412  
    1413  PyDoc_STRVAR(Writer_Type_doc,
    1414  "CSV writer\n"
    1415  "\n"
    1416  "Writer objects are responsible for generating tabular data\n"
    1417  "in CSV format from sequence input.\n"
    1418  );
    1419  
    1420  static PyType_Slot Writer_Type_slots[] = {
    1421      {Py_tp_doc, (char*)Writer_Type_doc},
    1422      {Py_tp_traverse, Writer_traverse},
    1423      {Py_tp_clear, Writer_clear},
    1424      {Py_tp_dealloc, Writer_dealloc},
    1425      {Py_tp_methods, Writer_methods},
    1426      {Py_tp_members, Writer_memberlist},
    1427      {0, NULL}
    1428  };
    1429  
    1430  PyType_Spec Writer_Type_spec = {
    1431      .name = "_csv.writer",
    1432      .basicsize = sizeof(WriterObj),
    1433      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
    1434                Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
    1435      .slots = Writer_Type_slots,
    1436  };
    1437  
    1438  
    1439  static PyObject *
    1440  csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
    1441  {
    1442      PyObject * output_file, * dialect = NULL;
    1443      _csvstate *module_state = get_csv_state(module);
    1444      WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
    1445  
    1446      if (!self)
    1447          return NULL;
    1448  
    1449      self->dialect = NULL;
    1450      self->write = NULL;
    1451  
    1452      self->rec = NULL;
    1453      self->rec_size = 0;
    1454      self->rec_len = 0;
    1455      self->num_fields = 0;
    1456  
    1457      self->error_obj = Py_NewRef(module_state->error_obj);
    1458  
    1459      if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
    1460          Py_DECREF(self);
    1461          return NULL;
    1462      }
    1463      if (_PyObject_LookupAttr(output_file,
    1464                               module_state->str_write,
    1465                               &self->write) < 0) {
    1466          Py_DECREF(self);
    1467          return NULL;
    1468      }
    1469      if (self->write == NULL || !PyCallable_Check(self->write)) {
    1470          PyErr_SetString(PyExc_TypeError,
    1471                          "argument 1 must have a \"write\" method");
    1472          Py_DECREF(self);
    1473          return NULL;
    1474      }
    1475      self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
    1476                                                  keyword_args);
    1477      if (self->dialect == NULL) {
    1478          Py_DECREF(self);
    1479          return NULL;
    1480      }
    1481      PyObject_GC_Track(self);
    1482      return (PyObject *)self;
    1483  }
    1484  
    1485  /*
    1486   * DIALECT REGISTRY
    1487   */
    1488  
    1489  /*[clinic input]
    1490  _csv.list_dialects
    1491  
    1492  Return a list of all known dialect names.
    1493  
    1494      names = csv.list_dialects()
    1495  [clinic start generated code]*/
    1496  
    1497  static PyObject *
    1498  _csv_list_dialects_impl(PyObject *module)
    1499  /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
    1500  {
    1501      return PyDict_Keys(get_csv_state(module)->dialects);
    1502  }
    1503  
    1504  static PyObject *
    1505  csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
    1506  {
    1507      PyObject *name_obj, *dialect_obj = NULL;
    1508      _csvstate *module_state = get_csv_state(module);
    1509      PyObject *dialect;
    1510  
    1511      if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
    1512          return NULL;
    1513      if (!PyUnicode_Check(name_obj)) {
    1514          PyErr_SetString(PyExc_TypeError,
    1515                          "dialect name must be a string");
    1516          return NULL;
    1517      }
    1518      if (PyUnicode_READY(name_obj) == -1)
    1519          return NULL;
    1520      dialect = _call_dialect(module_state, dialect_obj, kwargs);
    1521      if (dialect == NULL)
    1522          return NULL;
    1523      if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
    1524          Py_DECREF(dialect);
    1525          return NULL;
    1526      }
    1527      Py_DECREF(dialect);
    1528      Py_RETURN_NONE;
    1529  }
    1530  
    1531  
    1532  /*[clinic input]
    1533  _csv.unregister_dialect
    1534  
    1535      name: object
    1536  
    1537  Delete the name/dialect mapping associated with a string name.
    1538  
    1539      csv.unregister_dialect(name)
    1540  [clinic start generated code]*/
    1541  
    1542  static PyObject *
    1543  _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
    1544  /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
    1545  {
    1546      _csvstate *module_state = get_csv_state(module);
    1547      if (PyDict_DelItem(module_state->dialects, name) < 0) {
    1548          if (PyErr_ExceptionMatches(PyExc_KeyError)) {
    1549              PyErr_Format(module_state->error_obj, "unknown dialect");
    1550          }
    1551          return NULL;
    1552      }
    1553      Py_RETURN_NONE;
    1554  }
    1555  
    1556  /*[clinic input]
    1557  _csv.get_dialect
    1558  
    1559      name: object
    1560  
    1561  Return the dialect instance associated with name.
    1562  
    1563      dialect = csv.get_dialect(name)
    1564  [clinic start generated code]*/
    1565  
    1566  static PyObject *
    1567  _csv_get_dialect_impl(PyObject *module, PyObject *name)
    1568  /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
    1569  {
    1570      return get_dialect_from_registry(name, get_csv_state(module));
    1571  }
    1572  
    1573  /*[clinic input]
    1574  _csv.field_size_limit
    1575  
    1576      new_limit: object = NULL
    1577  
    1578  Sets an upper limit on parsed fields.
    1579  
    1580      csv.field_size_limit([limit])
    1581  
    1582  Returns old limit. If limit is not given, no new limit is set and
    1583  the old limit is returned
    1584  [clinic start generated code]*/
    1585  
    1586  static PyObject *
    1587  _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
    1588  /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
    1589  {
    1590      _csvstate *module_state = get_csv_state(module);
    1591      long old_limit = module_state->field_limit;
    1592      if (new_limit != NULL) {
    1593          if (!PyLong_CheckExact(new_limit)) {
    1594              PyErr_Format(PyExc_TypeError,
    1595                           "limit must be an integer");
    1596              return NULL;
    1597          }
    1598          module_state->field_limit = PyLong_AsLong(new_limit);
    1599          if (module_state->field_limit == -1 && PyErr_Occurred()) {
    1600              module_state->field_limit = old_limit;
    1601              return NULL;
    1602          }
    1603      }
    1604      return PyLong_FromLong(old_limit);
    1605  }
    1606  
    1607  static PyType_Slot error_slots[] = {
    1608      {0, NULL},
    1609  };
    1610  
    1611  PyType_Spec error_spec = {
    1612      .name = "_csv.Error",
    1613      .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
    1614      .slots = error_slots,
    1615  };
    1616  
    1617  /*
    1618   * MODULE
    1619   */
    1620  
    1621  PyDoc_STRVAR(csv_module_doc,
    1622  "CSV parsing and writing.\n"
    1623  "\n"
    1624  "This module provides classes that assist in the reading and writing\n"
    1625  "of Comma Separated Value (CSV) files, and implements the interface\n"
    1626  "described by PEP 305.  Although many CSV files are simple to parse,\n"
    1627  "the format is not formally defined by a stable specification and\n"
    1628  "is subtle enough that parsing lines of a CSV file with something\n"
    1629  "like line.split(\",\") is bound to fail.  The module supports three\n"
    1630  "basic APIs: reading, writing, and registration of dialects.\n"
    1631  "\n"
    1632  "\n"
    1633  "DIALECT REGISTRATION:\n"
    1634  "\n"
    1635  "Readers and writers support a dialect argument, which is a convenient\n"
    1636  "handle on a group of settings.  When the dialect argument is a string,\n"
    1637  "it identifies one of the dialects previously registered with the module.\n"
    1638  "If it is a class or instance, the attributes of the argument are used as\n"
    1639  "the settings for the reader or writer:\n"
    1640  "\n"
    1641  "    class excel:\n"
    1642  "        delimiter = ','\n"
    1643  "        quotechar = '\"'\n"
    1644  "        escapechar = None\n"
    1645  "        doublequote = True\n"
    1646  "        skipinitialspace = False\n"
    1647  "        lineterminator = '\\r\\n'\n"
    1648  "        quoting = QUOTE_MINIMAL\n"
    1649  "\n"
    1650  "SETTINGS:\n"
    1651  "\n"
    1652  "    * quotechar - specifies a one-character string to use as the\n"
    1653  "        quoting character.  It defaults to '\"'.\n"
    1654  "    * delimiter - specifies a one-character string to use as the\n"
    1655  "        field separator.  It defaults to ','.\n"
    1656  "    * skipinitialspace - specifies how to interpret spaces which\n"
    1657  "        immediately follow a delimiter.  It defaults to False, which\n"
    1658  "        means that spaces immediately following a delimiter is part\n"
    1659  "        of the following field.\n"
    1660  "    * lineterminator -  specifies the character sequence which should\n"
    1661  "        terminate rows.\n"
    1662  "    * quoting - controls when quotes should be generated by the writer.\n"
    1663  "        It can take on any of the following module constants:\n"
    1664  "\n"
    1665  "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
    1666  "            field contains either the quotechar or the delimiter\n"
    1667  "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
    1668  "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
    1669  "            fields which do not parse as integers or floating point\n"
    1670  "            numbers.\n"
    1671  "        csv.QUOTE_STRINGS means that quotes are always placed around\n"
    1672  "            fields which are strings.  Note that the Python value None\n"
    1673  "            is not a string.\n"
    1674  "        csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
    1675  "            that are not the Python value None.\n"
    1676  "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
    1677  "    * escapechar - specifies a one-character string used to escape\n"
    1678  "        the delimiter when quoting is set to QUOTE_NONE.\n"
    1679  "    * doublequote - controls the handling of quotes inside fields.  When\n"
    1680  "        True, two consecutive quotes are interpreted as one during read,\n"
    1681  "        and when writing, each quote character embedded in the data is\n"
    1682  "        written as two quotes\n");
    1683  
    1684  PyDoc_STRVAR(csv_reader_doc,
    1685  "    csv_reader = reader(iterable [, dialect='excel']\n"
    1686  "                        [optional keyword args])\n"
    1687  "    for row in csv_reader:\n"
    1688  "        process(row)\n"
    1689  "\n"
    1690  "The \"iterable\" argument can be any object that returns a line\n"
    1691  "of input for each iteration, such as a file object or a list.  The\n"
    1692  "optional \"dialect\" parameter is discussed below.  The function\n"
    1693  "also accepts optional keyword arguments which override settings\n"
    1694  "provided by the dialect.\n"
    1695  "\n"
    1696  "The returned object is an iterator.  Each iteration returns a row\n"
    1697  "of the CSV file (which can span multiple input lines).\n");
    1698  
    1699  PyDoc_STRVAR(csv_writer_doc,
    1700  "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
    1701  "                            [optional keyword args])\n"
    1702  "    for row in sequence:\n"
    1703  "        csv_writer.writerow(row)\n"
    1704  "\n"
    1705  "    [or]\n"
    1706  "\n"
    1707  "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
    1708  "                            [optional keyword args])\n"
    1709  "    csv_writer.writerows(rows)\n"
    1710  "\n"
    1711  "The \"fileobj\" argument can be any object that supports the file API.\n");
    1712  
    1713  PyDoc_STRVAR(csv_register_dialect_doc,
    1714  "Create a mapping from a string name to a dialect class.\n"
    1715  "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
    1716  
    1717  static struct PyMethodDef csv_methods[] = {
    1718      { "reader", _PyCFunction_CAST(csv_reader),
    1719          METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
    1720      { "writer", _PyCFunction_CAST(csv_writer),
    1721          METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
    1722      { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
    1723          METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
    1724      _CSV_LIST_DIALECTS_METHODDEF
    1725      _CSV_UNREGISTER_DIALECT_METHODDEF
    1726      _CSV_GET_DIALECT_METHODDEF
    1727      _CSV_FIELD_SIZE_LIMIT_METHODDEF
    1728      { NULL, NULL }
    1729  };
    1730  
    1731  static int
    1732  csv_exec(PyObject *module) {
    1733      const StyleDesc *style;
    1734      PyObject *temp;
    1735      _csvstate *module_state = get_csv_state(module);
    1736  
    1737      temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
    1738      module_state->dialect_type = (PyTypeObject *)temp;
    1739      if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
    1740          return -1;
    1741      }
    1742  
    1743      temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
    1744      module_state->reader_type = (PyTypeObject *)temp;
    1745      if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
    1746          return -1;
    1747      }
    1748  
    1749      temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
    1750      module_state->writer_type = (PyTypeObject *)temp;
    1751      if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
    1752          return -1;
    1753      }
    1754  
    1755      /* Add version to the module. */
    1756      if (PyModule_AddStringConstant(module, "__version__",
    1757                                     MODULE_VERSION) == -1) {
    1758          return -1;
    1759      }
    1760  
    1761      /* Set the field limit */
    1762      module_state->field_limit = 128 * 1024;
    1763  
    1764      /* Add _dialects dictionary */
    1765      module_state->dialects = PyDict_New();
    1766      if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
    1767          return -1;
    1768      }
    1769  
    1770      /* Add quote styles into dictionary */
    1771      for (style = quote_styles; style->name; style++) {
    1772          if (PyModule_AddIntConstant(module, style->name,
    1773                                      style->style) == -1)
    1774              return -1;
    1775      }
    1776  
    1777      /* Add the CSV exception object to the module. */
    1778      PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
    1779      if (bases == NULL) {
    1780          return -1;
    1781      }
    1782      module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
    1783                                                         bases);
    1784      Py_DECREF(bases);
    1785      if (module_state->error_obj == NULL) {
    1786          return -1;
    1787      }
    1788      if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
    1789          return -1;
    1790      }
    1791  
    1792      module_state->str_write = PyUnicode_InternFromString("write");
    1793      if (module_state->str_write == NULL) {
    1794          return -1;
    1795      }
    1796      return 0;
    1797  }
    1798  
    1799  static PyModuleDef_Slot csv_slots[] = {
    1800      {Py_mod_exec, csv_exec},
    1801      {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
    1802      {0, NULL}
    1803  };
    1804  
    1805  static struct PyModuleDef _csvmodule = {
    1806      PyModuleDef_HEAD_INIT,
    1807      "_csv",
    1808      csv_module_doc,
    1809      sizeof(_csvstate),
    1810      csv_methods,
    1811      csv_slots,
    1812      _csv_traverse,
    1813      _csv_clear,
    1814      _csv_free
    1815  };
    1816  
    1817  PyMODINIT_FUNC
    1818  PyInit__csv(void)
    1819  {
    1820      return PyModuleDef_Init(&_csvmodule);
    1821  }