(root)/
Python-3.11.7/
Modules/
_io/
stringio.c
       1  #define PY_SSIZE_T_CLEAN
       2  #include "Python.h"
       3  #include <stddef.h>               // offsetof()
       4  #include "pycore_accu.h"
       5  #include "pycore_object.h"
       6  #include "_iomodule.h"
       7  
       8  /* Implementation note: the buffer is always at least one character longer
       9     than the enclosed string, for proper functioning of _PyIO_find_line_ending.
      10  */
      11  
      12  #define STATE_REALIZED 1
      13  #define STATE_ACCUMULATING 2
      14  
      15  /*[clinic input]
      16  module _io
      17  class _io.StringIO "stringio *" "&PyStringIO_Type"
      18  [clinic start generated code]*/
      19  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
      20  
      21  typedef struct {
      22      PyObject_HEAD
      23      Py_UCS4 *buf;
      24      Py_ssize_t pos;
      25      Py_ssize_t string_size;
      26      size_t buf_size;
      27  
      28      /* The stringio object can be in two states: accumulating or realized.
      29         In accumulating state, the internal buffer contains nothing and
      30         the contents are given by the embedded _PyAccu structure.
      31         In realized state, the internal buffer is meaningful and the
      32         _PyAccu is destroyed.
      33      */
      34      int state;
      35      _PyAccu accu;
      36  
      37      char ok; /* initialized? */
      38      char closed;
      39      char readuniversal;
      40      char readtranslate;
      41      PyObject *decoder;
      42      PyObject *readnl;
      43      PyObject *writenl;
      44  
      45      PyObject *dict;
      46      PyObject *weakreflist;
      47  } stringio;
      48  
      49  static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
      50  
      51  #define CHECK_INITIALIZED(self) \
      52      if (self->ok <= 0) { \
      53          PyErr_SetString(PyExc_ValueError, \
      54              "I/O operation on uninitialized object"); \
      55          return NULL; \
      56      }
      57  
      58  #define CHECK_CLOSED(self) \
      59      if (self->closed) { \
      60          PyErr_SetString(PyExc_ValueError, \
      61              "I/O operation on closed file"); \
      62          return NULL; \
      63      }
      64  
      65  #define ENSURE_REALIZED(self) \
      66      if (realize(self) < 0) { \
      67          return NULL; \
      68      }
      69  
      70  
      71  /* Internal routine for changing the size, in terms of characters, of the
      72     buffer of StringIO objects.  The caller should ensure that the 'size'
      73     argument is non-negative.  Returns 0 on success, -1 otherwise. */
      74  static int
      75  resize_buffer(stringio *self, size_t size)
      76  {
      77      /* Here, unsigned types are used to avoid dealing with signed integer
      78         overflow, which is undefined in C. */
      79      size_t alloc = self->buf_size;
      80      Py_UCS4 *new_buf = NULL;
      81  
      82      assert(self->buf != NULL);
      83  
      84      /* Reserve one more char for line ending detection. */
      85      size = size + 1;
      86      /* For simplicity, stay in the range of the signed type. Anyway, Python
      87         doesn't allow strings to be longer than this. */
      88      if (size > PY_SSIZE_T_MAX)
      89          goto overflow;
      90  
      91      if (size < alloc / 2) {
      92          /* Major downsize; resize down to exact size. */
      93          alloc = size + 1;
      94      }
      95      else if (size < alloc) {
      96          /* Within allocated size; quick exit */
      97          return 0;
      98      }
      99      else if (size <= alloc * 1.125) {
     100          /* Moderate upsize; overallocate similar to list_resize() */
     101          alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
     102      }
     103      else {
     104          /* Major upsize; resize up to exact size */
     105          alloc = size + 1;
     106      }
     107  
     108      if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
     109          goto overflow;
     110      new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
     111      if (new_buf == NULL) {
     112          PyErr_NoMemory();
     113          return -1;
     114      }
     115      self->buf_size = alloc;
     116      self->buf = new_buf;
     117  
     118      return 0;
     119  
     120    overflow:
     121      PyErr_SetString(PyExc_OverflowError,
     122                      "new buffer size too large");
     123      return -1;
     124  }
     125  
     126  static PyObject *
     127  make_intermediate(stringio *self)
     128  {
     129      PyObject *intermediate = _PyAccu_Finish(&self->accu);
     130      self->state = STATE_REALIZED;
     131      if (intermediate == NULL)
     132          return NULL;
     133      if (_PyAccu_Init(&self->accu) ||
     134          _PyAccu_Accumulate(&self->accu, intermediate)) {
     135          Py_DECREF(intermediate);
     136          return NULL;
     137      }
     138      self->state = STATE_ACCUMULATING;
     139      return intermediate;
     140  }
     141  
     142  static int
     143  realize(stringio *self)
     144  {
     145      Py_ssize_t len;
     146      PyObject *intermediate;
     147  
     148      if (self->state == STATE_REALIZED)
     149          return 0;
     150      assert(self->state == STATE_ACCUMULATING);
     151      self->state = STATE_REALIZED;
     152  
     153      intermediate = _PyAccu_Finish(&self->accu);
     154      if (intermediate == NULL)
     155          return -1;
     156  
     157      /* Append the intermediate string to the internal buffer.
     158         The length should be equal to the current cursor position.
     159       */
     160      len = PyUnicode_GET_LENGTH(intermediate);
     161      if (resize_buffer(self, len) < 0) {
     162          Py_DECREF(intermediate);
     163          return -1;
     164      }
     165      if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
     166          Py_DECREF(intermediate);
     167          return -1;
     168      }
     169  
     170      Py_DECREF(intermediate);
     171      return 0;
     172  }
     173  
     174  /* Internal routine for writing a whole PyUnicode object to the buffer of a
     175     StringIO object. Returns 0 on success, or -1 on error. */
     176  static Py_ssize_t
     177  write_str(stringio *self, PyObject *obj)
     178  {
     179      Py_ssize_t len;
     180      PyObject *decoded = NULL;
     181  
     182      assert(self->buf != NULL);
     183      assert(self->pos >= 0);
     184  
     185      if (self->decoder != NULL) {
     186          decoded = _PyIncrementalNewlineDecoder_decode(
     187              self->decoder, obj, 1 /* always final */);
     188      }
     189      else {
     190          decoded = obj;
     191          Py_INCREF(decoded);
     192      }
     193      if (self->writenl) {
     194          PyObject *translated = PyUnicode_Replace(
     195              decoded, &_Py_STR(newline), self->writenl, -1);
     196          Py_DECREF(decoded);
     197          decoded = translated;
     198      }
     199      if (decoded == NULL)
     200          return -1;
     201  
     202      assert(PyUnicode_Check(decoded));
     203      if (PyUnicode_READY(decoded)) {
     204          Py_DECREF(decoded);
     205          return -1;
     206      }
     207      len = PyUnicode_GET_LENGTH(decoded);
     208      assert(len >= 0);
     209  
     210      /* This overflow check is not strictly necessary. However, it avoids us to
     211         deal with funky things like comparing an unsigned and a signed
     212         integer. */
     213      if (self->pos > PY_SSIZE_T_MAX - len) {
     214          PyErr_SetString(PyExc_OverflowError,
     215                          "new position too large");
     216          goto fail;
     217      }
     218  
     219      if (self->state == STATE_ACCUMULATING) {
     220          if (self->string_size == self->pos) {
     221              if (_PyAccu_Accumulate(&self->accu, decoded))
     222                  goto fail;
     223              goto success;
     224          }
     225          if (realize(self))
     226              goto fail;
     227      }
     228  
     229      if (self->pos + len > self->string_size) {
     230          if (resize_buffer(self, self->pos + len) < 0)
     231              goto fail;
     232      }
     233  
     234      if (self->pos > self->string_size) {
     235          /* In case of overseek, pad with null bytes the buffer region between
     236             the end of stream and the current position.
     237  
     238            0   lo      string_size                           hi
     239            |   |<---used--->|<----------available----------->|
     240            |   |            <--to pad-->|<---to write--->    |
     241            0   buf                   position
     242  
     243          */
     244          memset(self->buf + self->string_size, '\0',
     245                 (self->pos - self->string_size) * sizeof(Py_UCS4));
     246      }
     247  
     248      /* Copy the data to the internal buffer, overwriting some of the
     249         existing data if self->pos < self->string_size. */
     250      if (!PyUnicode_AsUCS4(decoded,
     251                            self->buf + self->pos,
     252                            self->buf_size - self->pos,
     253                            0))
     254          goto fail;
     255  
     256  success:
     257      /* Set the new length of the internal string if it has changed. */
     258      self->pos += len;
     259      if (self->string_size < self->pos)
     260          self->string_size = self->pos;
     261  
     262      Py_DECREF(decoded);
     263      return 0;
     264  
     265  fail:
     266      Py_XDECREF(decoded);
     267      return -1;
     268  }
     269  
     270  /*[clinic input]
     271  _io.StringIO.getvalue
     272  
     273  Retrieve the entire contents of the object.
     274  [clinic start generated code]*/
     275  
     276  static PyObject *
     277  _io_StringIO_getvalue_impl(stringio *self)
     278  /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
     279  {
     280      CHECK_INITIALIZED(self);
     281      CHECK_CLOSED(self);
     282      if (self->state == STATE_ACCUMULATING)
     283          return make_intermediate(self);
     284      return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
     285                                       self->string_size);
     286  }
     287  
     288  /*[clinic input]
     289  _io.StringIO.tell
     290  
     291  Tell the current file position.
     292  [clinic start generated code]*/
     293  
     294  static PyObject *
     295  _io_StringIO_tell_impl(stringio *self)
     296  /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
     297  {
     298      CHECK_INITIALIZED(self);
     299      CHECK_CLOSED(self);
     300      return PyLong_FromSsize_t(self->pos);
     301  }
     302  
     303  /*[clinic input]
     304  _io.StringIO.read
     305      size: Py_ssize_t(accept={int, NoneType}) = -1
     306      /
     307  
     308  Read at most size characters, returned as a string.
     309  
     310  If the argument is negative or omitted, read until EOF
     311  is reached. Return an empty string at EOF.
     312  [clinic start generated code]*/
     313  
     314  static PyObject *
     315  _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
     316  /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
     317  {
     318      Py_ssize_t n;
     319      Py_UCS4 *output;
     320  
     321      CHECK_INITIALIZED(self);
     322      CHECK_CLOSED(self);
     323  
     324      /* adjust invalid sizes */
     325      n = self->string_size - self->pos;
     326      if (size < 0 || size > n) {
     327          size = n;
     328          if (size < 0)
     329              size = 0;
     330      }
     331  
     332      /* Optimization for seek(0); read() */
     333      if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
     334          PyObject *result = make_intermediate(self);
     335          self->pos = self->string_size;
     336          return result;
     337      }
     338  
     339      ENSURE_REALIZED(self);
     340      output = self->buf + self->pos;
     341      self->pos += size;
     342      return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
     343  }
     344  
     345  /* Internal helper, used by stringio_readline and stringio_iternext */
     346  static PyObject *
     347  _stringio_readline(stringio *self, Py_ssize_t limit)
     348  {
     349      Py_UCS4 *start, *end, old_char;
     350      Py_ssize_t len, consumed;
     351  
     352      /* In case of overseek, return the empty string */
     353      if (self->pos >= self->string_size)
     354          return PyUnicode_New(0, 0);
     355  
     356      start = self->buf + self->pos;
     357      if (limit < 0 || limit > self->string_size - self->pos)
     358          limit = self->string_size - self->pos;
     359  
     360      end = start + limit;
     361      old_char = *end;
     362      *end = '\0';
     363      len = _PyIO_find_line_ending(
     364          self->readtranslate, self->readuniversal, self->readnl,
     365          PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
     366      *end = old_char;
     367      /* If we haven't found any line ending, we just return everything
     368         (`consumed` is ignored). */
     369      if (len < 0)
     370          len = limit;
     371      self->pos += len;
     372      return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
     373  }
     374  
     375  /*[clinic input]
     376  _io.StringIO.readline
     377      size: Py_ssize_t(accept={int, NoneType}) = -1
     378      /
     379  
     380  Read until newline or EOF.
     381  
     382  Returns an empty string if EOF is hit immediately.
     383  [clinic start generated code]*/
     384  
     385  static PyObject *
     386  _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
     387  /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
     388  {
     389      CHECK_INITIALIZED(self);
     390      CHECK_CLOSED(self);
     391      ENSURE_REALIZED(self);
     392  
     393      return _stringio_readline(self, size);
     394  }
     395  
     396  static PyObject *
     397  stringio_iternext(stringio *self)
     398  {
     399      PyObject *line;
     400  
     401      CHECK_INITIALIZED(self);
     402      CHECK_CLOSED(self);
     403      ENSURE_REALIZED(self);
     404  
     405      if (Py_IS_TYPE(self, &PyStringIO_Type)) {
     406          /* Skip method call overhead for speed */
     407          line = _stringio_readline(self, -1);
     408      }
     409      else {
     410          /* XXX is subclassing StringIO really supported? */
     411          line = PyObject_CallMethodNoArgs((PyObject *)self,
     412                                               &_Py_ID(readline));
     413          if (line && !PyUnicode_Check(line)) {
     414              PyErr_Format(PyExc_OSError,
     415                           "readline() should have returned a str object, "
     416                           "not '%.200s'", Py_TYPE(line)->tp_name);
     417              Py_DECREF(line);
     418              return NULL;
     419          }
     420      }
     421  
     422      if (line == NULL)
     423          return NULL;
     424  
     425      if (PyUnicode_GET_LENGTH(line) == 0) {
     426          /* Reached EOF */
     427          Py_DECREF(line);
     428          return NULL;
     429      }
     430  
     431      return line;
     432  }
     433  
     434  /*[clinic input]
     435  _io.StringIO.truncate
     436      pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
     437      /
     438  
     439  Truncate size to pos.
     440  
     441  The pos argument defaults to the current file position, as
     442  returned by tell().  The current file position is unchanged.
     443  Returns the new absolute position.
     444  [clinic start generated code]*/
     445  
     446  static PyObject *
     447  _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
     448  /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
     449  {
     450      CHECK_INITIALIZED(self);
     451      CHECK_CLOSED(self);
     452  
     453      if (size < 0) {
     454          PyErr_Format(PyExc_ValueError,
     455                       "Negative size value %zd", size);
     456          return NULL;
     457      }
     458  
     459      if (size < self->string_size) {
     460          ENSURE_REALIZED(self);
     461          if (resize_buffer(self, size) < 0)
     462              return NULL;
     463          self->string_size = size;
     464      }
     465  
     466      return PyLong_FromSsize_t(size);
     467  }
     468  
     469  /*[clinic input]
     470  _io.StringIO.seek
     471      pos: Py_ssize_t
     472      whence: int = 0
     473      /
     474  
     475  Change stream position.
     476  
     477  Seek to character offset pos relative to position indicated by whence:
     478      0  Start of stream (the default).  pos should be >= 0;
     479      1  Current position - pos must be 0;
     480      2  End of stream - pos must be 0.
     481  Returns the new absolute position.
     482  [clinic start generated code]*/
     483  
     484  static PyObject *
     485  _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
     486  /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
     487  {
     488      CHECK_INITIALIZED(self);
     489      CHECK_CLOSED(self);
     490  
     491      if (whence != 0 && whence != 1 && whence != 2) {
     492          PyErr_Format(PyExc_ValueError,
     493                       "Invalid whence (%i, should be 0, 1 or 2)", whence);
     494          return NULL;
     495      }
     496      else if (pos < 0 && whence == 0) {
     497          PyErr_Format(PyExc_ValueError,
     498                       "Negative seek position %zd", pos);
     499          return NULL;
     500      }
     501      else if (whence != 0 && pos != 0) {
     502          PyErr_SetString(PyExc_OSError,
     503                          "Can't do nonzero cur-relative seeks");
     504          return NULL;
     505      }
     506  
     507      /* whence = 0: offset relative to beginning of the string.
     508         whence = 1: no change to current position.
     509         whence = 2: change position to end of file. */
     510      if (whence == 1) {
     511          pos = self->pos;
     512      }
     513      else if (whence == 2) {
     514          pos = self->string_size;
     515      }
     516  
     517      self->pos = pos;
     518  
     519      return PyLong_FromSsize_t(self->pos);
     520  }
     521  
     522  /*[clinic input]
     523  _io.StringIO.write
     524      s as obj: object
     525      /
     526  
     527  Write string to file.
     528  
     529  Returns the number of characters written, which is always equal to
     530  the length of the string.
     531  [clinic start generated code]*/
     532  
     533  static PyObject *
     534  _io_StringIO_write(stringio *self, PyObject *obj)
     535  /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
     536  {
     537      Py_ssize_t size;
     538  
     539      CHECK_INITIALIZED(self);
     540      if (!PyUnicode_Check(obj)) {
     541          PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
     542                       Py_TYPE(obj)->tp_name);
     543          return NULL;
     544      }
     545      if (PyUnicode_READY(obj))
     546          return NULL;
     547      CHECK_CLOSED(self);
     548      size = PyUnicode_GET_LENGTH(obj);
     549  
     550      if (size > 0 && write_str(self, obj) < 0)
     551          return NULL;
     552  
     553      return PyLong_FromSsize_t(size);
     554  }
     555  
     556  /*[clinic input]
     557  _io.StringIO.close
     558  
     559  Close the IO object.
     560  
     561  Attempting any further operation after the object is closed
     562  will raise a ValueError.
     563  
     564  This method has no effect if the file is already closed.
     565  [clinic start generated code]*/
     566  
     567  static PyObject *
     568  _io_StringIO_close_impl(stringio *self)
     569  /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
     570  {
     571      self->closed = 1;
     572      /* Free up some memory */
     573      if (resize_buffer(self, 0) < 0)
     574          return NULL;
     575      _PyAccu_Destroy(&self->accu);
     576      Py_CLEAR(self->readnl);
     577      Py_CLEAR(self->writenl);
     578      Py_CLEAR(self->decoder);
     579      Py_RETURN_NONE;
     580  }
     581  
     582  static int
     583  stringio_traverse(stringio *self, visitproc visit, void *arg)
     584  {
     585      Py_VISIT(self->dict);
     586      return 0;
     587  }
     588  
     589  static int
     590  stringio_clear(stringio *self)
     591  {
     592      Py_CLEAR(self->dict);
     593      return 0;
     594  }
     595  
     596  static void
     597  stringio_dealloc(stringio *self)
     598  {
     599      _PyObject_GC_UNTRACK(self);
     600      self->ok = 0;
     601      if (self->buf) {
     602          PyMem_Free(self->buf);
     603          self->buf = NULL;
     604      }
     605      _PyAccu_Destroy(&self->accu);
     606      Py_CLEAR(self->readnl);
     607      Py_CLEAR(self->writenl);
     608      Py_CLEAR(self->decoder);
     609      Py_CLEAR(self->dict);
     610      if (self->weakreflist != NULL)
     611          PyObject_ClearWeakRefs((PyObject *) self);
     612      Py_TYPE(self)->tp_free(self);
     613  }
     614  
     615  static PyObject *
     616  stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     617  {
     618      stringio *self;
     619  
     620      assert(type != NULL && type->tp_alloc != NULL);
     621      self = (stringio *)type->tp_alloc(type, 0);
     622      if (self == NULL)
     623          return NULL;
     624  
     625      /* tp_alloc initializes all the fields to zero. So we don't have to
     626         initialize them here. */
     627  
     628      self->buf = (Py_UCS4 *)PyMem_Malloc(0);
     629      if (self->buf == NULL) {
     630          Py_DECREF(self);
     631          return PyErr_NoMemory();
     632      }
     633  
     634      return (PyObject *)self;
     635  }
     636  
     637  /*[clinic input]
     638  _io.StringIO.__init__
     639      initial_value as value: object(c_default="NULL") = ''
     640      newline as newline_obj: object(c_default="NULL") = '\n'
     641  
     642  Text I/O implementation using an in-memory buffer.
     643  
     644  The initial_value argument sets the value of object.  The newline
     645  argument is like the one of TextIOWrapper's constructor.
     646  [clinic start generated code]*/
     647  
     648  static int
     649  _io_StringIO___init___impl(stringio *self, PyObject *value,
     650                             PyObject *newline_obj)
     651  /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
     652  {
     653      const char *newline = "\n";
     654      Py_ssize_t value_len;
     655  
     656      /* Parse the newline argument. We only want to allow unicode objects or
     657         None. */
     658      if (newline_obj == Py_None) {
     659          newline = NULL;
     660      }
     661      else if (newline_obj) {
     662          if (!PyUnicode_Check(newline_obj)) {
     663              PyErr_Format(PyExc_TypeError,
     664                           "newline must be str or None, not %.200s",
     665                           Py_TYPE(newline_obj)->tp_name);
     666              return -1;
     667          }
     668          newline = PyUnicode_AsUTF8(newline_obj);
     669          if (newline == NULL)
     670              return -1;
     671      }
     672  
     673      if (newline && newline[0] != '\0'
     674          && !(newline[0] == '\n' && newline[1] == '\0')
     675          && !(newline[0] == '\r' && newline[1] == '\0')
     676          && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
     677          PyErr_Format(PyExc_ValueError,
     678                       "illegal newline value: %R", newline_obj);
     679          return -1;
     680      }
     681      if (value && value != Py_None && !PyUnicode_Check(value)) {
     682          PyErr_Format(PyExc_TypeError,
     683                       "initial_value must be str or None, not %.200s",
     684                       Py_TYPE(value)->tp_name);
     685          return -1;
     686      }
     687  
     688      self->ok = 0;
     689  
     690      _PyAccu_Destroy(&self->accu);
     691      Py_CLEAR(self->readnl);
     692      Py_CLEAR(self->writenl);
     693      Py_CLEAR(self->decoder);
     694  
     695      assert((newline != NULL && newline_obj != Py_None) ||
     696             (newline == NULL && newline_obj == Py_None));
     697  
     698      if (newline) {
     699          self->readnl = PyUnicode_FromString(newline);
     700          if (self->readnl == NULL)
     701              return -1;
     702      }
     703      self->readuniversal = (newline == NULL || newline[0] == '\0');
     704      self->readtranslate = (newline == NULL);
     705      /* If newline == "", we don't translate anything.
     706         If newline == "\n" or newline == None, we translate to "\n", which is
     707         a no-op.
     708         (for newline == None, TextIOWrapper translates to os.linesep, but it
     709         is pointless for StringIO)
     710      */
     711      if (newline != NULL && newline[0] == '\r') {
     712          self->writenl = self->readnl;
     713          Py_INCREF(self->writenl);
     714      }
     715  
     716      if (self->readuniversal) {
     717          self->decoder = PyObject_CallFunctionObjArgs(
     718              (PyObject *)&PyIncrementalNewlineDecoder_Type,
     719              Py_None, self->readtranslate ? Py_True : Py_False, NULL);
     720          if (self->decoder == NULL)
     721              return -1;
     722      }
     723  
     724      /* Now everything is set up, resize buffer to size of initial value,
     725         and copy it */
     726      self->string_size = 0;
     727      if (value && value != Py_None)
     728          value_len = PyUnicode_GetLength(value);
     729      else
     730          value_len = 0;
     731      if (value_len > 0) {
     732          /* This is a heuristic, for newline translation might change
     733             the string length. */
     734          if (resize_buffer(self, 0) < 0)
     735              return -1;
     736          self->state = STATE_REALIZED;
     737          self->pos = 0;
     738          if (write_str(self, value) < 0)
     739              return -1;
     740      }
     741      else {
     742          /* Empty stringio object, we can start by accumulating */
     743          if (resize_buffer(self, 0) < 0)
     744              return -1;
     745          if (_PyAccu_Init(&self->accu))
     746              return -1;
     747          self->state = STATE_ACCUMULATING;
     748      }
     749      self->pos = 0;
     750  
     751      self->closed = 0;
     752      self->ok = 1;
     753      return 0;
     754  }
     755  
     756  /* Properties and pseudo-properties */
     757  
     758  /*[clinic input]
     759  _io.StringIO.readable
     760  
     761  Returns True if the IO object can be read.
     762  [clinic start generated code]*/
     763  
     764  static PyObject *
     765  _io_StringIO_readable_impl(stringio *self)
     766  /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
     767  {
     768      CHECK_INITIALIZED(self);
     769      CHECK_CLOSED(self);
     770      Py_RETURN_TRUE;
     771  }
     772  
     773  /*[clinic input]
     774  _io.StringIO.writable
     775  
     776  Returns True if the IO object can be written.
     777  [clinic start generated code]*/
     778  
     779  static PyObject *
     780  _io_StringIO_writable_impl(stringio *self)
     781  /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
     782  {
     783      CHECK_INITIALIZED(self);
     784      CHECK_CLOSED(self);
     785      Py_RETURN_TRUE;
     786  }
     787  
     788  /*[clinic input]
     789  _io.StringIO.seekable
     790  
     791  Returns True if the IO object can be seeked.
     792  [clinic start generated code]*/
     793  
     794  static PyObject *
     795  _io_StringIO_seekable_impl(stringio *self)
     796  /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
     797  {
     798      CHECK_INITIALIZED(self);
     799      CHECK_CLOSED(self);
     800      Py_RETURN_TRUE;
     801  }
     802  
     803  /* Pickling support.
     804  
     805     The implementation of __getstate__ is similar to the one for BytesIO,
     806     except that we also save the newline parameter. For __setstate__ and unlike
     807     BytesIO, we call __init__ to restore the object's state. Doing so allows us
     808     to avoid decoding the complex newline state while keeping the object
     809     representation compact.
     810  
     811     See comment in bytesio.c regarding why only pickle protocols and onward are
     812     supported.
     813  */
     814  
     815  static PyObject *
     816  stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
     817  {
     818      PyObject *initvalue = _io_StringIO_getvalue_impl(self);
     819      PyObject *dict;
     820      PyObject *state;
     821  
     822      if (initvalue == NULL)
     823          return NULL;
     824      if (self->dict == NULL) {
     825          Py_INCREF(Py_None);
     826          dict = Py_None;
     827      }
     828      else {
     829          dict = PyDict_Copy(self->dict);
     830          if (dict == NULL) {
     831              Py_DECREF(initvalue);
     832              return NULL;
     833          }
     834      }
     835  
     836      state = Py_BuildValue("(OOnN)", initvalue,
     837                            self->readnl ? self->readnl : Py_None,
     838                            self->pos, dict);
     839      Py_DECREF(initvalue);
     840      return state;
     841  }
     842  
     843  static PyObject *
     844  stringio_setstate(stringio *self, PyObject *state)
     845  {
     846      PyObject *initarg;
     847      PyObject *position_obj;
     848      PyObject *dict;
     849      Py_ssize_t pos;
     850  
     851      assert(state != NULL);
     852      CHECK_CLOSED(self);
     853  
     854      /* We allow the state tuple to be longer than 4, because we may need
     855         someday to extend the object's state without breaking
     856         backward-compatibility. */
     857      if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
     858          PyErr_Format(PyExc_TypeError,
     859                       "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
     860                       Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
     861          return NULL;
     862      }
     863  
     864      /* Initialize the object's state. */
     865      initarg = PyTuple_GetSlice(state, 0, 2);
     866      if (initarg == NULL)
     867          return NULL;
     868      if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
     869          Py_DECREF(initarg);
     870          return NULL;
     871      }
     872      Py_DECREF(initarg);
     873  
     874      /* Restore the buffer state. Even if __init__ did initialize the buffer,
     875         we have to initialize it again since __init__ may translate the
     876         newlines in the initial_value string. We clearly do not want that
     877         because the string value in the state tuple has already been translated
     878         once by __init__. So we do not take any chance and replace object's
     879         buffer completely. */
     880      {
     881          PyObject *item;
     882          Py_UCS4 *buf;
     883          Py_ssize_t bufsize;
     884  
     885          item = PyTuple_GET_ITEM(state, 0);
     886          buf = PyUnicode_AsUCS4Copy(item);
     887          if (buf == NULL)
     888              return NULL;
     889          bufsize = PyUnicode_GET_LENGTH(item);
     890  
     891          if (resize_buffer(self, bufsize) < 0) {
     892              PyMem_Free(buf);
     893              return NULL;
     894          }
     895          memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
     896          PyMem_Free(buf);
     897          self->string_size = bufsize;
     898      }
     899  
     900      /* Set carefully the position value. Alternatively, we could use the seek
     901         method instead of modifying self->pos directly to better protect the
     902         object internal state against erroneous (or malicious) inputs. */
     903      position_obj = PyTuple_GET_ITEM(state, 2);
     904      if (!PyLong_Check(position_obj)) {
     905          PyErr_Format(PyExc_TypeError,
     906                       "third item of state must be an integer, got %.200s",
     907                       Py_TYPE(position_obj)->tp_name);
     908          return NULL;
     909      }
     910      pos = PyLong_AsSsize_t(position_obj);
     911      if (pos == -1 && PyErr_Occurred())
     912          return NULL;
     913      if (pos < 0) {
     914          PyErr_SetString(PyExc_ValueError,
     915                          "position value cannot be negative");
     916          return NULL;
     917      }
     918      self->pos = pos;
     919  
     920      /* Set the dictionary of the instance variables. */
     921      dict = PyTuple_GET_ITEM(state, 3);
     922      if (dict != Py_None) {
     923          if (!PyDict_Check(dict)) {
     924              PyErr_Format(PyExc_TypeError,
     925                           "fourth item of state should be a dict, got a %.200s",
     926                           Py_TYPE(dict)->tp_name);
     927              return NULL;
     928          }
     929          if (self->dict) {
     930              /* Alternatively, we could replace the internal dictionary
     931                 completely. However, it seems more practical to just update it. */
     932              if (PyDict_Update(self->dict, dict) < 0)
     933                  return NULL;
     934          }
     935          else {
     936              Py_INCREF(dict);
     937              self->dict = dict;
     938          }
     939      }
     940  
     941      Py_RETURN_NONE;
     942  }
     943  
     944  
     945  static PyObject *
     946  stringio_closed(stringio *self, void *context)
     947  {
     948      CHECK_INITIALIZED(self);
     949      return PyBool_FromLong(self->closed);
     950  }
     951  
     952  static PyObject *
     953  stringio_line_buffering(stringio *self, void *context)
     954  {
     955      CHECK_INITIALIZED(self);
     956      CHECK_CLOSED(self);
     957      Py_RETURN_FALSE;
     958  }
     959  
     960  static PyObject *
     961  stringio_newlines(stringio *self, void *context)
     962  {
     963      CHECK_INITIALIZED(self);
     964      CHECK_CLOSED(self);
     965      if (self->decoder == NULL)
     966          Py_RETURN_NONE;
     967      return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
     968  }
     969  
     970  #include "clinic/stringio.c.h"
     971  
     972  static struct PyMethodDef stringio_methods[] = {
     973      _IO_STRINGIO_CLOSE_METHODDEF
     974      _IO_STRINGIO_GETVALUE_METHODDEF
     975      _IO_STRINGIO_READ_METHODDEF
     976      _IO_STRINGIO_READLINE_METHODDEF
     977      _IO_STRINGIO_TELL_METHODDEF
     978      _IO_STRINGIO_TRUNCATE_METHODDEF
     979      _IO_STRINGIO_SEEK_METHODDEF
     980      _IO_STRINGIO_WRITE_METHODDEF
     981  
     982      _IO_STRINGIO_SEEKABLE_METHODDEF
     983      _IO_STRINGIO_READABLE_METHODDEF
     984      _IO_STRINGIO_WRITABLE_METHODDEF
     985  
     986      {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
     987      {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
     988      {NULL, NULL}        /* sentinel */
     989  };
     990  
     991  static PyGetSetDef stringio_getset[] = {
     992      {"closed",         (getter)stringio_closed,         NULL, NULL},
     993      {"newlines",       (getter)stringio_newlines,       NULL, NULL},
     994      /*  (following comments straight off of the original Python wrapper:)
     995          XXX Cruft to support the TextIOWrapper API. This would only
     996          be meaningful if StringIO supported the buffer attribute.
     997          Hopefully, a better solution, than adding these pseudo-attributes,
     998          will be found.
     999      */
    1000      {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
    1001      {NULL}
    1002  };
    1003  
    1004  PyTypeObject PyStringIO_Type = {
    1005      PyVarObject_HEAD_INIT(NULL, 0)
    1006      "_io.StringIO",                            /*tp_name*/
    1007      sizeof(stringio),                    /*tp_basicsize*/
    1008      0,                                         /*tp_itemsize*/
    1009      (destructor)stringio_dealloc,              /*tp_dealloc*/
    1010      0,                                         /*tp_vectorcall_offset*/
    1011      0,                                         /*tp_getattr*/
    1012      0,                                         /*tp_setattr*/
    1013      0,                                         /*tp_as_async*/
    1014      0,                                         /*tp_repr*/
    1015      0,                                         /*tp_as_number*/
    1016      0,                                         /*tp_as_sequence*/
    1017      0,                                         /*tp_as_mapping*/
    1018      0,                                         /*tp_hash*/
    1019      0,                                         /*tp_call*/
    1020      0,                                         /*tp_str*/
    1021      0,                                         /*tp_getattro*/
    1022      0,                                         /*tp_setattro*/
    1023      0,                                         /*tp_as_buffer*/
    1024      Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
    1025                         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
    1026      _io_StringIO___init____doc__,              /*tp_doc*/
    1027      (traverseproc)stringio_traverse,           /*tp_traverse*/
    1028      (inquiry)stringio_clear,                   /*tp_clear*/
    1029      0,                                         /*tp_richcompare*/
    1030      offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
    1031      0,                                         /*tp_iter*/
    1032      (iternextfunc)stringio_iternext,           /*tp_iternext*/
    1033      stringio_methods,                          /*tp_methods*/
    1034      0,                                         /*tp_members*/
    1035      stringio_getset,                           /*tp_getset*/
    1036      0,                                         /*tp_base*/
    1037      0,                                         /*tp_dict*/
    1038      0,                                         /*tp_descr_get*/
    1039      0,                                         /*tp_descr_set*/
    1040      offsetof(stringio, dict),                  /*tp_dictoffset*/
    1041      _io_StringIO___init__,                     /*tp_init*/
    1042      0,                                         /*tp_alloc*/
    1043      stringio_new,                              /*tp_new*/
    1044  };