(root)/
Python-3.11.7/
Modules/
pyexpat.c
       1  #include "Python.h"
       2  #include <ctype.h>
       3  
       4  #include "structmember.h"         // PyMemberDef
       5  #include "expat.h"
       6  
       7  #include "pyexpat.h"
       8  
       9  /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
      10     included methods. */
      11  /*[clinic input]
      12  module pyexpat
      13  [clinic start generated code]*/
      14  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
      15  
      16  #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
      17  
      18  static XML_Memory_Handling_Suite ExpatMemoryHandler = {
      19      PyObject_Malloc, PyObject_Realloc, PyObject_Free};
      20  
      21  enum HandlerTypes {
      22      StartElement,
      23      EndElement,
      24      ProcessingInstruction,
      25      CharacterData,
      26      UnparsedEntityDecl,
      27      NotationDecl,
      28      StartNamespaceDecl,
      29      EndNamespaceDecl,
      30      Comment,
      31      StartCdataSection,
      32      EndCdataSection,
      33      Default,
      34      DefaultHandlerExpand,
      35      NotStandalone,
      36      ExternalEntityRef,
      37      StartDoctypeDecl,
      38      EndDoctypeDecl,
      39      EntityDecl,
      40      XmlDecl,
      41      ElementDecl,
      42      AttlistDecl,
      43  #if XML_COMBINED_VERSION >= 19504
      44      SkippedEntity,
      45  #endif
      46      _DummyDecl
      47  };
      48  
      49  typedef struct {
      50      PyTypeObject *xml_parse_type;
      51      PyObject *error;
      52      PyObject *str_read;
      53  } pyexpat_state;
      54  
      55  static inline pyexpat_state*
      56  pyexpat_get_state(PyObject *module)
      57  {
      58      void *state = PyModule_GetState(module);
      59      assert(state != NULL);
      60      return (pyexpat_state *)state;
      61  }
      62  
      63  /* ----------------------------------------------------- */
      64  
      65  /* Declarations for objects of type xmlparser */
      66  
      67  typedef struct {
      68      PyObject_HEAD
      69  
      70      XML_Parser itself;
      71      int ordered_attributes;     /* Return attributes as a list. */
      72      int specified_attributes;   /* Report only specified attributes. */
      73      int in_callback;            /* Is a callback active? */
      74      int ns_prefixes;            /* Namespace-triplets mode? */
      75      XML_Char *buffer;           /* Buffer used when accumulating characters */
      76                                  /* NULL if not enabled */
      77      int buffer_size;            /* Size of buffer, in XML_Char units */
      78      int buffer_used;            /* Buffer units in use */
      79      PyObject *intern;           /* Dictionary to intern strings */
      80      PyObject **handlers;
      81  } xmlparseobject;
      82  
      83  #include "clinic/pyexpat.c.h"
      84  
      85  #define CHARACTER_DATA_BUFFER_SIZE 8192
      86  
      87  typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
      88  typedef void* xmlhandler;
      89  
      90  struct HandlerInfo {
      91      const char *name;
      92      xmlhandlersetter setter;
      93      xmlhandler handler;
      94      PyGetSetDef getset;
      95  };
      96  
      97  static struct HandlerInfo handler_info[64];
      98  
      99  /* Set an integer attribute on the error object; return true on success,
     100   * false on an exception.
     101   */
     102  static int
     103  set_error_attr(PyObject *err, const char *name, int value)
     104  {
     105      PyObject *v = PyLong_FromLong(value);
     106  
     107      if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
     108          Py_XDECREF(v);
     109          return 0;
     110      }
     111      Py_DECREF(v);
     112      return 1;
     113  }
     114  
     115  /* Build and set an Expat exception, including positioning
     116   * information.  Always returns NULL.
     117   */
     118  static PyObject *
     119  set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
     120  {
     121      PyObject *err;
     122      PyObject *buffer;
     123      XML_Parser parser = self->itself;
     124      int lineno = XML_GetErrorLineNumber(parser);
     125      int column = XML_GetErrorColumnNumber(parser);
     126  
     127      buffer = PyUnicode_FromFormat("%s: line %i, column %i",
     128                                    XML_ErrorString(code), lineno, column);
     129      if (buffer == NULL)
     130          return NULL;
     131      err = PyObject_CallOneArg(state->error, buffer);
     132      Py_DECREF(buffer);
     133      if (  err != NULL
     134            && set_error_attr(err, "code", code)
     135            && set_error_attr(err, "offset", column)
     136            && set_error_attr(err, "lineno", lineno)) {
     137          PyErr_SetObject(state->error, err);
     138      }
     139      Py_XDECREF(err);
     140      return NULL;
     141  }
     142  
     143  static int
     144  have_handler(xmlparseobject *self, int type)
     145  {
     146      PyObject *handler = self->handlers[type];
     147      return handler != NULL;
     148  }
     149  
     150  /* Convert a string of XML_Chars into a Unicode string.
     151     Returns None if str is a null pointer. */
     152  
     153  static PyObject *
     154  conv_string_to_unicode(const XML_Char *str)
     155  {
     156      /* XXX currently this code assumes that XML_Char is 8-bit,
     157         and hence in UTF-8.  */
     158      /* UTF-8 from Expat, Unicode desired */
     159      if (str == NULL) {
     160          Py_RETURN_NONE;
     161      }
     162      return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
     163  }
     164  
     165  static PyObject *
     166  conv_string_len_to_unicode(const XML_Char *str, int len)
     167  {
     168      /* XXX currently this code assumes that XML_Char is 8-bit,
     169         and hence in UTF-8.  */
     170      /* UTF-8 from Expat, Unicode desired */
     171      if (str == NULL) {
     172          Py_RETURN_NONE;
     173      }
     174      return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
     175  }
     176  
     177  /* Callback routines */
     178  
     179  static void clear_handlers(xmlparseobject *self, int initial);
     180  
     181  /* This handler is used when an error has been detected, in the hope
     182     that actual parsing can be terminated early.  This will only help
     183     if an external entity reference is encountered. */
     184  static int
     185  error_external_entity_ref_handler(XML_Parser parser,
     186                                    const XML_Char *context,
     187                                    const XML_Char *base,
     188                                    const XML_Char *systemId,
     189                                    const XML_Char *publicId)
     190  {
     191      return 0;
     192  }
     193  
     194  /* Dummy character data handler used when an error (exception) has
     195     been detected, and the actual parsing can be terminated early.
     196     This is needed since character data handler can't be safely removed
     197     from within the character data handler, but can be replaced.  It is
     198     used only from the character data handler trampoline, and must be
     199     used right after `flag_error()` is called. */
     200  static void
     201  noop_character_data_handler(void *userData, const XML_Char *data, int len)
     202  {
     203      /* Do nothing. */
     204  }
     205  
     206  static void
     207  flag_error(xmlparseobject *self)
     208  {
     209      clear_handlers(self, 0);
     210      XML_SetExternalEntityRefHandler(self->itself,
     211                                      error_external_entity_ref_handler);
     212  }
     213  
     214  static PyObject*
     215  call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
     216                  xmlparseobject *self)
     217  {
     218      PyObject *res;
     219  
     220      res = PyObject_Call(func, args, NULL);
     221      if (res == NULL) {
     222          _PyTraceback_Add(funcname, __FILE__, lineno);
     223          XML_StopParser(self->itself, XML_FALSE);
     224      }
     225      return res;
     226  }
     227  
     228  static PyObject*
     229  string_intern(xmlparseobject *self, const char* str)
     230  {
     231      PyObject *result = conv_string_to_unicode(str);
     232      PyObject *value;
     233      /* result can be NULL if the unicode conversion failed. */
     234      if (!result)
     235          return result;
     236      if (!self->intern)
     237          return result;
     238      value = PyDict_GetItemWithError(self->intern, result);
     239      if (!value) {
     240          if (!PyErr_Occurred() &&
     241              PyDict_SetItem(self->intern, result, result) == 0)
     242          {
     243              return result;
     244          }
     245          else {
     246              Py_DECREF(result);
     247              return NULL;
     248          }
     249      }
     250      Py_INCREF(value);
     251      Py_DECREF(result);
     252      return value;
     253  }
     254  
     255  /* Return 0 on success, -1 on exception.
     256   * flag_error() will be called before return if needed.
     257   */
     258  static int
     259  call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
     260  {
     261      PyObject *args;
     262      PyObject *temp;
     263  
     264      if (!have_handler(self, CharacterData))
     265          return -1;
     266  
     267      args = PyTuple_New(1);
     268      if (args == NULL)
     269          return -1;
     270      temp = (conv_string_len_to_unicode(buffer, len));
     271      if (temp == NULL) {
     272          Py_DECREF(args);
     273          flag_error(self);
     274          XML_SetCharacterDataHandler(self->itself,
     275                                      noop_character_data_handler);
     276          return -1;
     277      }
     278      PyTuple_SET_ITEM(args, 0, temp);
     279      /* temp is now a borrowed reference; consider it unused. */
     280      self->in_callback = 1;
     281      temp = call_with_frame("CharacterData", __LINE__,
     282                             self->handlers[CharacterData], args, self);
     283      /* temp is an owned reference again, or NULL */
     284      self->in_callback = 0;
     285      Py_DECREF(args);
     286      if (temp == NULL) {
     287          flag_error(self);
     288          XML_SetCharacterDataHandler(self->itself,
     289                                      noop_character_data_handler);
     290          return -1;
     291      }
     292      Py_DECREF(temp);
     293      return 0;
     294  }
     295  
     296  static int
     297  flush_character_buffer(xmlparseobject *self)
     298  {
     299      int rc;
     300      if (self->buffer == NULL || self->buffer_used == 0)
     301          return 0;
     302      rc = call_character_handler(self, self->buffer, self->buffer_used);
     303      self->buffer_used = 0;
     304      return rc;
     305  }
     306  
     307  static void
     308  my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
     309  {
     310      xmlparseobject *self = (xmlparseobject *) userData;
     311  
     312      if (PyErr_Occurred())
     313          return;
     314  
     315      if (self->buffer == NULL)
     316          call_character_handler(self, data, len);
     317      else {
     318          if ((self->buffer_used + len) > self->buffer_size) {
     319              if (flush_character_buffer(self) < 0)
     320                  return;
     321              /* handler might have changed; drop the rest on the floor
     322               * if there isn't a handler anymore
     323               */
     324              if (!have_handler(self, CharacterData))
     325                  return;
     326          }
     327          if (len > self->buffer_size) {
     328              call_character_handler(self, data, len);
     329              self->buffer_used = 0;
     330          }
     331          else {
     332              memcpy(self->buffer + self->buffer_used,
     333                     data, len * sizeof(XML_Char));
     334              self->buffer_used += len;
     335          }
     336      }
     337  }
     338  
     339  static void
     340  my_StartElementHandler(void *userData,
     341                         const XML_Char *name, const XML_Char *atts[])
     342  {
     343      xmlparseobject *self = (xmlparseobject *)userData;
     344  
     345      if (have_handler(self, StartElement)) {
     346          PyObject *container, *rv, *args;
     347          int i, max;
     348  
     349          if (PyErr_Occurred())
     350              return;
     351  
     352          if (flush_character_buffer(self) < 0)
     353              return;
     354          /* Set max to the number of slots filled in atts[]; max/2 is
     355           * the number of attributes we need to process.
     356           */
     357          if (self->specified_attributes) {
     358              max = XML_GetSpecifiedAttributeCount(self->itself);
     359          }
     360          else {
     361              max = 0;
     362              while (atts[max] != NULL)
     363                  max += 2;
     364          }
     365          /* Build the container. */
     366          if (self->ordered_attributes)
     367              container = PyList_New(max);
     368          else
     369              container = PyDict_New();
     370          if (container == NULL) {
     371              flag_error(self);
     372              return;
     373          }
     374          for (i = 0; i < max; i += 2) {
     375              PyObject *n = string_intern(self, (XML_Char *) atts[i]);
     376              PyObject *v;
     377              if (n == NULL) {
     378                  flag_error(self);
     379                  Py_DECREF(container);
     380                  return;
     381              }
     382              v = conv_string_to_unicode((XML_Char *) atts[i+1]);
     383              if (v == NULL) {
     384                  flag_error(self);
     385                  Py_DECREF(container);
     386                  Py_DECREF(n);
     387                  return;
     388              }
     389              if (self->ordered_attributes) {
     390                  PyList_SET_ITEM(container, i, n);
     391                  PyList_SET_ITEM(container, i+1, v);
     392              }
     393              else if (PyDict_SetItem(container, n, v)) {
     394                  flag_error(self);
     395                  Py_DECREF(n);
     396                  Py_DECREF(v);
     397                  Py_DECREF(container);
     398                  return;
     399              }
     400              else {
     401                  Py_DECREF(n);
     402                  Py_DECREF(v);
     403              }
     404          }
     405          args = string_intern(self, name);
     406          if (args == NULL) {
     407              Py_DECREF(container);
     408              return;
     409          }
     410          args = Py_BuildValue("(NN)", args, container);
     411          if (args == NULL) {
     412              return;
     413          }
     414          /* Container is now a borrowed reference; ignore it. */
     415          self->in_callback = 1;
     416          rv = call_with_frame("StartElement", __LINE__,
     417                               self->handlers[StartElement], args, self);
     418          self->in_callback = 0;
     419          Py_DECREF(args);
     420          if (rv == NULL) {
     421              flag_error(self);
     422              return;
     423          }
     424          Py_DECREF(rv);
     425      }
     426  }
     427  
     428  #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
     429                  RETURN, GETUSERDATA) \
     430  static RC \
     431  my_##NAME##Handler PARAMS {\
     432      xmlparseobject *self = GETUSERDATA ; \
     433      PyObject *args = NULL; \
     434      PyObject *rv = NULL; \
     435      INIT \
     436  \
     437      if (have_handler(self, NAME)) { \
     438          if (PyErr_Occurred()) \
     439              return RETURN; \
     440          if (flush_character_buffer(self) < 0) \
     441              return RETURN; \
     442          args = Py_BuildValue PARAM_FORMAT ;\
     443          if (!args) { flag_error(self); return RETURN;} \
     444          self->in_callback = 1; \
     445          rv = call_with_frame(#NAME,__LINE__, \
     446                               self->handlers[NAME], args, self); \
     447          self->in_callback = 0; \
     448          Py_DECREF(args); \
     449          if (rv == NULL) { \
     450              flag_error(self); \
     451              return RETURN; \
     452          } \
     453          CONVERSION \
     454          Py_DECREF(rv); \
     455      } \
     456      return RETURN; \
     457  }
     458  
     459  #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
     460          RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
     461          (xmlparseobject *)userData)
     462  
     463  #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
     464          RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
     465                          rc = PyLong_AsLong(rv);, rc, \
     466          (xmlparseobject *)userData)
     467  
     468  VOID_HANDLER(EndElement,
     469               (void *userData, const XML_Char *name),
     470               ("(N)", string_intern(self, name)))
     471  
     472  VOID_HANDLER(ProcessingInstruction,
     473               (void *userData,
     474                const XML_Char *target,
     475                const XML_Char *data),
     476               ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
     477  
     478  VOID_HANDLER(UnparsedEntityDecl,
     479               (void *userData,
     480                const XML_Char *entityName,
     481                const XML_Char *base,
     482                const XML_Char *systemId,
     483                const XML_Char *publicId,
     484                const XML_Char *notationName),
     485               ("(NNNNN)",
     486                string_intern(self, entityName), string_intern(self, base),
     487                string_intern(self, systemId), string_intern(self, publicId),
     488                string_intern(self, notationName)))
     489  
     490  VOID_HANDLER(EntityDecl,
     491               (void *userData,
     492                const XML_Char *entityName,
     493                int is_parameter_entity,
     494                const XML_Char *value,
     495                int value_length,
     496                const XML_Char *base,
     497                const XML_Char *systemId,
     498                const XML_Char *publicId,
     499                const XML_Char *notationName),
     500               ("NiNNNNN",
     501                string_intern(self, entityName), is_parameter_entity,
     502                (conv_string_len_to_unicode(value, value_length)),
     503                string_intern(self, base), string_intern(self, systemId),
     504                string_intern(self, publicId),
     505                string_intern(self, notationName)))
     506  
     507  VOID_HANDLER(XmlDecl,
     508               (void *userData,
     509                const XML_Char *version,
     510                const XML_Char *encoding,
     511                int standalone),
     512               ("(O&O&i)",
     513                conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
     514                standalone))
     515  
     516  static PyObject *
     517  conv_content_model(XML_Content * const model,
     518                     PyObject *(*conv_string)(const XML_Char *))
     519  {
     520      PyObject *result = NULL;
     521      PyObject *children = PyTuple_New(model->numchildren);
     522      int i;
     523  
     524      if (children != NULL) {
     525          assert(model->numchildren < INT_MAX);
     526          for (i = 0; i < (int)model->numchildren; ++i) {
     527              PyObject *child = conv_content_model(&model->children[i],
     528                                                   conv_string);
     529              if (child == NULL) {
     530                  Py_XDECREF(children);
     531                  return NULL;
     532              }
     533              PyTuple_SET_ITEM(children, i, child);
     534          }
     535          result = Py_BuildValue("(iiO&N)",
     536                                 model->type, model->quant,
     537                                 conv_string,model->name, children);
     538      }
     539      return result;
     540  }
     541  
     542  static void
     543  my_ElementDeclHandler(void *userData,
     544                        const XML_Char *name,
     545                        XML_Content *model)
     546  {
     547      xmlparseobject *self = (xmlparseobject *)userData;
     548      PyObject *args = NULL;
     549  
     550      if (have_handler(self, ElementDecl)) {
     551          PyObject *rv = NULL;
     552          PyObject *modelobj, *nameobj;
     553  
     554          if (PyErr_Occurred())
     555              return;
     556  
     557          if (flush_character_buffer(self) < 0)
     558              goto finally;
     559          modelobj = conv_content_model(model, (conv_string_to_unicode));
     560          if (modelobj == NULL) {
     561              flag_error(self);
     562              goto finally;
     563          }
     564          nameobj = string_intern(self, name);
     565          if (nameobj == NULL) {
     566              Py_DECREF(modelobj);
     567              flag_error(self);
     568              goto finally;
     569          }
     570          args = Py_BuildValue("NN", nameobj, modelobj);
     571          if (args == NULL) {
     572              flag_error(self);
     573              goto finally;
     574          }
     575          self->in_callback = 1;
     576          rv = call_with_frame("ElementDecl", __LINE__,
     577                               self->handlers[ElementDecl], args, self);
     578          self->in_callback = 0;
     579          if (rv == NULL) {
     580              flag_error(self);
     581              goto finally;
     582          }
     583          Py_DECREF(rv);
     584      }
     585   finally:
     586      Py_XDECREF(args);
     587      XML_FreeContentModel(self->itself, model);
     588      return;
     589  }
     590  
     591  VOID_HANDLER(AttlistDecl,
     592               (void *userData,
     593                const XML_Char *elname,
     594                const XML_Char *attname,
     595                const XML_Char *att_type,
     596                const XML_Char *dflt,
     597                int isrequired),
     598               ("(NNO&O&i)",
     599                string_intern(self, elname), string_intern(self, attname),
     600                conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
     601                isrequired))
     602  
     603  #if XML_COMBINED_VERSION >= 19504
     604  VOID_HANDLER(SkippedEntity,
     605               (void *userData,
     606                const XML_Char *entityName,
     607                int is_parameter_entity),
     608               ("Ni",
     609                string_intern(self, entityName), is_parameter_entity))
     610  #endif
     611  
     612  VOID_HANDLER(NotationDecl,
     613                  (void *userData,
     614                          const XML_Char *notationName,
     615                          const XML_Char *base,
     616                          const XML_Char *systemId,
     617                          const XML_Char *publicId),
     618                  ("(NNNN)",
     619                   string_intern(self, notationName), string_intern(self, base),
     620                   string_intern(self, systemId), string_intern(self, publicId)))
     621  
     622  VOID_HANDLER(StartNamespaceDecl,
     623                  (void *userData,
     624                        const XML_Char *prefix,
     625                        const XML_Char *uri),
     626                  ("(NN)",
     627                   string_intern(self, prefix), string_intern(self, uri)))
     628  
     629  VOID_HANDLER(EndNamespaceDecl,
     630                  (void *userData,
     631                      const XML_Char *prefix),
     632                  ("(N)", string_intern(self, prefix)))
     633  
     634  VOID_HANDLER(Comment,
     635                 (void *userData, const XML_Char *data),
     636                  ("(O&)", conv_string_to_unicode ,data))
     637  
     638  VOID_HANDLER(StartCdataSection,
     639                 (void *userData),
     640                  ("()"))
     641  
     642  VOID_HANDLER(EndCdataSection,
     643                 (void *userData),
     644                  ("()"))
     645  
     646  VOID_HANDLER(Default,
     647                (void *userData, const XML_Char *s, int len),
     648                ("(N)", (conv_string_len_to_unicode(s,len))))
     649  
     650  VOID_HANDLER(DefaultHandlerExpand,
     651                (void *userData, const XML_Char *s, int len),
     652                ("(N)", (conv_string_len_to_unicode(s,len))))
     653  #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
     654  
     655  INT_HANDLER(NotStandalone,
     656                  (void *userData),
     657                  ("()"))
     658  
     659  RC_HANDLER(int, ExternalEntityRef,
     660                  (XML_Parser parser,
     661                      const XML_Char *context,
     662                      const XML_Char *base,
     663                      const XML_Char *systemId,
     664                      const XML_Char *publicId),
     665                  int rc=0;,
     666                  ("(O&NNN)",
     667                   conv_string_to_unicode ,context, string_intern(self, base),
     668                   string_intern(self, systemId), string_intern(self, publicId)),
     669                  rc = PyLong_AsLong(rv);, rc,
     670                  XML_GetUserData(parser))
     671  
     672  /* XXX UnknownEncodingHandler */
     673  
     674  VOID_HANDLER(StartDoctypeDecl,
     675               (void *userData, const XML_Char *doctypeName,
     676                const XML_Char *sysid, const XML_Char *pubid,
     677                int has_internal_subset),
     678               ("(NNNi)", string_intern(self, doctypeName),
     679                string_intern(self, sysid), string_intern(self, pubid),
     680                has_internal_subset))
     681  
     682  VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
     683  
     684  /* ---------------------------------------------------------------- */
     685  /*[clinic input]
     686  class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
     687  [clinic start generated code]*/
     688  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
     689  
     690  
     691  static PyObject *
     692  get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
     693  {
     694      if (PyErr_Occurred()) {
     695          return NULL;
     696      }
     697      if (rv == 0) {
     698          return set_error(state, self, XML_GetErrorCode(self->itself));
     699      }
     700      if (flush_character_buffer(self) < 0) {
     701          return NULL;
     702      }
     703      return PyLong_FromLong(rv);
     704  }
     705  
     706  #define MAX_CHUNK_SIZE (1 << 20)
     707  
     708  /*[clinic input]
     709  pyexpat.xmlparser.Parse
     710  
     711      cls: defining_class
     712      data: object
     713      isfinal: bool(accept={int}) = False
     714      /
     715  
     716  Parse XML data.
     717  
     718  `isfinal' should be true at end of input.
     719  [clinic start generated code]*/
     720  
     721  static PyObject *
     722  pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
     723                               PyObject *data, int isfinal)
     724  /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
     725  {
     726      const char *s;
     727      Py_ssize_t slen;
     728      Py_buffer view;
     729      int rc;
     730      pyexpat_state *state = PyType_GetModuleState(cls);
     731  
     732      if (PyUnicode_Check(data)) {
     733          view.buf = NULL;
     734          s = PyUnicode_AsUTF8AndSize(data, &slen);
     735          if (s == NULL)
     736              return NULL;
     737          /* Explicitly set UTF-8 encoding. Return code ignored. */
     738          (void)XML_SetEncoding(self->itself, "utf-8");
     739      }
     740      else {
     741          if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
     742              return NULL;
     743          s = view.buf;
     744          slen = view.len;
     745      }
     746  
     747      static_assert(MAX_CHUNK_SIZE <= INT_MAX,
     748                    "MAX_CHUNK_SIZE is larger than INT_MAX");
     749      while (slen > MAX_CHUNK_SIZE) {
     750          rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
     751          if (!rc)
     752              goto done;
     753          s += MAX_CHUNK_SIZE;
     754          slen -= MAX_CHUNK_SIZE;
     755      }
     756  
     757      assert(slen <= INT_MAX);
     758      rc = XML_Parse(self->itself, s, (int)slen, isfinal);
     759  
     760  done:
     761      if (view.buf != NULL) {
     762          PyBuffer_Release(&view);
     763      }
     764      return get_parse_result(state, self, rc);
     765  }
     766  
     767  /* File reading copied from cPickle */
     768  
     769  #define BUF_SIZE 2048
     770  
     771  static int
     772  readinst(char *buf, int buf_size, PyObject *meth)
     773  {
     774      PyObject *str;
     775      Py_ssize_t len;
     776      const char *ptr;
     777  
     778      str = PyObject_CallFunction(meth, "i", buf_size);
     779      if (str == NULL)
     780          goto error;
     781  
     782      if (PyBytes_Check(str))
     783          ptr = PyBytes_AS_STRING(str);
     784      else if (PyByteArray_Check(str))
     785          ptr = PyByteArray_AS_STRING(str);
     786      else {
     787          PyErr_Format(PyExc_TypeError,
     788                       "read() did not return a bytes object (type=%.400s)",
     789                       Py_TYPE(str)->tp_name);
     790          goto error;
     791      }
     792      len = Py_SIZE(str);
     793      if (len > buf_size) {
     794          PyErr_Format(PyExc_ValueError,
     795                       "read() returned too much data: "
     796                       "%i bytes requested, %zd returned",
     797                       buf_size, len);
     798          goto error;
     799      }
     800      memcpy(buf, ptr, len);
     801      Py_DECREF(str);
     802      /* len <= buf_size <= INT_MAX */
     803      return (int)len;
     804  
     805  error:
     806      Py_XDECREF(str);
     807      return -1;
     808  }
     809  
     810  /*[clinic input]
     811  pyexpat.xmlparser.ParseFile
     812  
     813      cls: defining_class
     814      file: object
     815      /
     816  
     817  Parse XML data from file-like object.
     818  [clinic start generated code]*/
     819  
     820  static PyObject *
     821  pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
     822                                   PyObject *file)
     823  /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
     824  {
     825      int rv = 1;
     826      PyObject *readmethod = NULL;
     827  
     828      pyexpat_state *state = PyType_GetModuleState(cls);
     829  
     830      if (_PyObject_LookupAttr(file, state->str_read, &readmethod) < 0) {
     831          return NULL;
     832      }
     833      if (readmethod == NULL) {
     834          PyErr_SetString(PyExc_TypeError,
     835                          "argument must have 'read' attribute");
     836          return NULL;
     837      }
     838      for (;;) {
     839          int bytes_read;
     840          void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
     841          if (buf == NULL) {
     842              Py_XDECREF(readmethod);
     843              return get_parse_result(state, self, 0);
     844          }
     845  
     846          bytes_read = readinst(buf, BUF_SIZE, readmethod);
     847          if (bytes_read < 0) {
     848              Py_DECREF(readmethod);
     849              return NULL;
     850          }
     851          rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
     852          if (PyErr_Occurred()) {
     853              Py_XDECREF(readmethod);
     854              return NULL;
     855          }
     856  
     857          if (!rv || bytes_read == 0)
     858              break;
     859      }
     860      Py_XDECREF(readmethod);
     861      return get_parse_result(state, self, rv);
     862  }
     863  
     864  /*[clinic input]
     865  pyexpat.xmlparser.SetBase
     866  
     867      base: str
     868      /
     869  
     870  Set the base URL for the parser.
     871  [clinic start generated code]*/
     872  
     873  static PyObject *
     874  pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
     875  /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
     876  {
     877      if (!XML_SetBase(self->itself, base)) {
     878          return PyErr_NoMemory();
     879      }
     880      Py_RETURN_NONE;
     881  }
     882  
     883  /*[clinic input]
     884  pyexpat.xmlparser.GetBase
     885  
     886  Return base URL string for the parser.
     887  [clinic start generated code]*/
     888  
     889  static PyObject *
     890  pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
     891  /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
     892  {
     893      return Py_BuildValue("z", XML_GetBase(self->itself));
     894  }
     895  
     896  /*[clinic input]
     897  pyexpat.xmlparser.GetInputContext
     898  
     899  Return the untranslated text of the input that caused the current event.
     900  
     901  If the event was generated by a large amount of text (such as a start tag
     902  for an element with many attributes), not all of the text may be available.
     903  [clinic start generated code]*/
     904  
     905  static PyObject *
     906  pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
     907  /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
     908  {
     909      if (self->in_callback) {
     910          int offset, size;
     911          const char *buffer
     912              = XML_GetInputContext(self->itself, &offset, &size);
     913  
     914          if (buffer != NULL)
     915              return PyBytes_FromStringAndSize(buffer + offset,
     916                                                size - offset);
     917          else
     918              Py_RETURN_NONE;
     919      }
     920      else
     921          Py_RETURN_NONE;
     922  }
     923  
     924  /*[clinic input]
     925  pyexpat.xmlparser.ExternalEntityParserCreate
     926  
     927      cls: defining_class
     928      context: str(accept={str, NoneType})
     929      encoding: str = NULL
     930      /
     931  
     932  Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
     933  [clinic start generated code]*/
     934  
     935  static PyObject *
     936  pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     937                                                    PyTypeObject *cls,
     938                                                    const char *context,
     939                                                    const char *encoding)
     940  /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
     941  {
     942      xmlparseobject *new_parser;
     943      int i;
     944  
     945      pyexpat_state *state = PyType_GetModuleState(cls);
     946  
     947      new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
     948      if (new_parser == NULL) {
     949          return NULL;
     950      }
     951  
     952      new_parser->buffer_size = self->buffer_size;
     953      new_parser->buffer_used = 0;
     954      new_parser->buffer = NULL;
     955      new_parser->ordered_attributes = self->ordered_attributes;
     956      new_parser->specified_attributes = self->specified_attributes;
     957      new_parser->in_callback = 0;
     958      new_parser->ns_prefixes = self->ns_prefixes;
     959      new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
     960                                                          encoding);
     961      new_parser->handlers = 0;
     962      new_parser->intern = self->intern;
     963      Py_XINCREF(new_parser->intern);
     964  
     965      if (self->buffer != NULL) {
     966          new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
     967          if (new_parser->buffer == NULL) {
     968              Py_DECREF(new_parser);
     969              return PyErr_NoMemory();
     970          }
     971      }
     972      if (!new_parser->itself) {
     973          Py_DECREF(new_parser);
     974          return PyErr_NoMemory();
     975      }
     976  
     977      XML_SetUserData(new_parser->itself, (void *)new_parser);
     978  
     979      /* allocate and clear handlers first */
     980      for (i = 0; handler_info[i].name != NULL; i++)
     981          /* do nothing */;
     982  
     983      new_parser->handlers = PyMem_New(PyObject *, i);
     984      if (!new_parser->handlers) {
     985          Py_DECREF(new_parser);
     986          return PyErr_NoMemory();
     987      }
     988      clear_handlers(new_parser, 1);
     989  
     990      /* then copy handlers from self */
     991      for (i = 0; handler_info[i].name != NULL; i++) {
     992          PyObject *handler = self->handlers[i];
     993          if (handler != NULL) {
     994              Py_INCREF(handler);
     995              new_parser->handlers[i] = handler;
     996              handler_info[i].setter(new_parser->itself,
     997                                     handler_info[i].handler);
     998          }
     999      }
    1000  
    1001      PyObject_GC_Track(new_parser);
    1002      return (PyObject *)new_parser;
    1003  }
    1004  
    1005  /*[clinic input]
    1006  pyexpat.xmlparser.SetParamEntityParsing
    1007  
    1008      flag: int
    1009      /
    1010  
    1011  Controls parsing of parameter entities (including the external DTD subset).
    1012  
    1013  Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
    1014  XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
    1015  XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
    1016  was successful.
    1017  [clinic start generated code]*/
    1018  
    1019  static PyObject *
    1020  pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
    1021  /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
    1022  {
    1023      flag = XML_SetParamEntityParsing(self->itself, flag);
    1024      return PyLong_FromLong(flag);
    1025  }
    1026  
    1027  
    1028  #if XML_COMBINED_VERSION >= 19505
    1029  /*[clinic input]
    1030  pyexpat.xmlparser.UseForeignDTD
    1031  
    1032      cls: defining_class
    1033      flag: bool = True
    1034      /
    1035  
    1036  Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
    1037  
    1038  This readily allows the use of a 'default' document type controlled by the
    1039  application, while still getting the advantage of providing document type
    1040  information to the parser. 'flag' defaults to True if not provided.
    1041  [clinic start generated code]*/
    1042  
    1043  static PyObject *
    1044  pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
    1045                                       int flag)
    1046  /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
    1047  {
    1048      pyexpat_state *state = PyType_GetModuleState(cls);
    1049      enum XML_Error rc;
    1050  
    1051      rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
    1052      if (rc != XML_ERROR_NONE) {
    1053          return set_error(state, self, rc);
    1054      }
    1055      Py_RETURN_NONE;
    1056  }
    1057  #endif
    1058  
    1059  static struct PyMethodDef xmlparse_methods[] = {
    1060      PYEXPAT_XMLPARSER_PARSE_METHODDEF
    1061      PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
    1062      PYEXPAT_XMLPARSER_SETBASE_METHODDEF
    1063      PYEXPAT_XMLPARSER_GETBASE_METHODDEF
    1064      PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
    1065      PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
    1066      PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
    1067  #if XML_COMBINED_VERSION >= 19505
    1068      PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
    1069  #endif
    1070      {NULL, NULL}  /* sentinel */
    1071  };
    1072  
    1073  /* ---------- */
    1074  
    1075  
    1076  
    1077  /* pyexpat international encoding support.
    1078     Make it as simple as possible.
    1079  */
    1080  
    1081  static int
    1082  PyUnknownEncodingHandler(void *encodingHandlerData,
    1083                           const XML_Char *name,
    1084                           XML_Encoding *info)
    1085  {
    1086      static unsigned char template_buffer[256] = {0};
    1087      PyObject* u;
    1088      int i;
    1089      const void *data;
    1090      unsigned int kind;
    1091  
    1092      if (PyErr_Occurred())
    1093          return XML_STATUS_ERROR;
    1094  
    1095      if (template_buffer[1] == 0) {
    1096          for (i = 0; i < 256; i++)
    1097              template_buffer[i] = i;
    1098      }
    1099  
    1100      u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
    1101      if (u == NULL || PyUnicode_READY(u)) {
    1102          Py_XDECREF(u);
    1103          return XML_STATUS_ERROR;
    1104      }
    1105  
    1106      if (PyUnicode_GET_LENGTH(u) != 256) {
    1107          Py_DECREF(u);
    1108          PyErr_SetString(PyExc_ValueError,
    1109                          "multi-byte encodings are not supported");
    1110          return XML_STATUS_ERROR;
    1111      }
    1112  
    1113      kind = PyUnicode_KIND(u);
    1114      data = PyUnicode_DATA(u);
    1115      for (i = 0; i < 256; i++) {
    1116          Py_UCS4 ch = PyUnicode_READ(kind, data, i);
    1117          if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
    1118              info->map[i] = ch;
    1119          else
    1120              info->map[i] = -1;
    1121      }
    1122  
    1123      info->data = NULL;
    1124      info->convert = NULL;
    1125      info->release = NULL;
    1126      Py_DECREF(u);
    1127  
    1128      return XML_STATUS_OK;
    1129  }
    1130  
    1131  
    1132  static PyObject *
    1133  newxmlparseobject(pyexpat_state *state, const char *encoding,
    1134                    const char *namespace_separator, PyObject *intern)
    1135  {
    1136      int i;
    1137      xmlparseobject *self;
    1138  
    1139      self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
    1140      if (self == NULL)
    1141          return NULL;
    1142  
    1143      self->buffer = NULL;
    1144      self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
    1145      self->buffer_used = 0;
    1146      self->ordered_attributes = 0;
    1147      self->specified_attributes = 0;
    1148      self->in_callback = 0;
    1149      self->ns_prefixes = 0;
    1150      self->handlers = NULL;
    1151      self->intern = intern;
    1152      Py_XINCREF(self->intern);
    1153  
    1154      /* namespace_separator is either NULL or contains one char + \0 */
    1155      self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
    1156                                         namespace_separator);
    1157      if (self->itself == NULL) {
    1158          PyErr_SetString(PyExc_RuntimeError,
    1159                          "XML_ParserCreate failed");
    1160          Py_DECREF(self);
    1161          return NULL;
    1162      }
    1163  #if XML_COMBINED_VERSION >= 20100
    1164      /* This feature was added upstream in libexpat 2.1.0. */
    1165      XML_SetHashSalt(self->itself,
    1166                      (unsigned long)_Py_HashSecret.expat.hashsalt);
    1167  #endif
    1168      XML_SetUserData(self->itself, (void *)self);
    1169      XML_SetUnknownEncodingHandler(self->itself,
    1170                    (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
    1171  
    1172      for (i = 0; handler_info[i].name != NULL; i++)
    1173          /* do nothing */;
    1174  
    1175      self->handlers = PyMem_New(PyObject *, i);
    1176      if (!self->handlers) {
    1177          Py_DECREF(self);
    1178          return PyErr_NoMemory();
    1179      }
    1180      clear_handlers(self, 1);
    1181  
    1182      PyObject_GC_Track(self);
    1183      return (PyObject*)self;
    1184  }
    1185  
    1186  static int
    1187  xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
    1188  {
    1189      for (int i = 0; handler_info[i].name != NULL; i++) {
    1190          Py_VISIT(op->handlers[i]);
    1191      }
    1192      Py_VISIT(Py_TYPE(op));
    1193      return 0;
    1194  }
    1195  
    1196  static int
    1197  xmlparse_clear(xmlparseobject *op)
    1198  {
    1199      clear_handlers(op, 0);
    1200      Py_CLEAR(op->intern);
    1201      return 0;
    1202  }
    1203  
    1204  static void
    1205  xmlparse_dealloc(xmlparseobject *self)
    1206  {
    1207      PyObject_GC_UnTrack(self);
    1208      (void)xmlparse_clear(self);
    1209      if (self->itself != NULL)
    1210          XML_ParserFree(self->itself);
    1211      self->itself = NULL;
    1212  
    1213      if (self->handlers != NULL) {
    1214          PyMem_Free(self->handlers);
    1215          self->handlers = NULL;
    1216      }
    1217      if (self->buffer != NULL) {
    1218          PyMem_Free(self->buffer);
    1219          self->buffer = NULL;
    1220      }
    1221      PyTypeObject *tp = Py_TYPE(self);
    1222      PyObject_GC_Del(self);
    1223      Py_DECREF(tp);
    1224  }
    1225  
    1226  
    1227  static PyObject *
    1228  xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
    1229  {
    1230      assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
    1231      int handlernum = (int)(hi - handler_info);
    1232      PyObject *result = self->handlers[handlernum];
    1233      if (result == NULL)
    1234          result = Py_None;
    1235      Py_INCREF(result);
    1236      return result;
    1237  }
    1238  
    1239  static int
    1240  xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
    1241  {
    1242      assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
    1243      int handlernum = (int)(hi - handler_info);
    1244      if (v == NULL) {
    1245          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1246          return -1;
    1247      }
    1248      if (handlernum == CharacterData) {
    1249          /* If we're changing the character data handler, flush all
    1250           * cached data with the old handler.  Not sure there's a
    1251           * "right" thing to do, though, but this probably won't
    1252           * happen.
    1253           */
    1254          if (flush_character_buffer(self) < 0)
    1255              return -1;
    1256      }
    1257  
    1258      xmlhandler c_handler = NULL;
    1259      if (v == Py_None) {
    1260          /* If this is the character data handler, and a character
    1261             data handler is already active, we need to be more
    1262             careful.  What we can safely do is replace the existing
    1263             character data handler callback function with a no-op
    1264             function that will refuse to call Python.  The downside
    1265             is that this doesn't completely remove the character
    1266             data handler from the C layer if there's any callback
    1267             active, so Expat does a little more work than it
    1268             otherwise would, but that's really an odd case.  A more
    1269             elaborate system of handlers and state could remove the
    1270             C handler more effectively. */
    1271          if (handlernum == CharacterData && self->in_callback)
    1272              c_handler = noop_character_data_handler;
    1273          v = NULL;
    1274      }
    1275      else if (v != NULL) {
    1276          Py_INCREF(v);
    1277          c_handler = handler_info[handlernum].handler;
    1278      }
    1279      Py_XSETREF(self->handlers[handlernum], v);
    1280      handler_info[handlernum].setter(self->itself, c_handler);
    1281      return 0;
    1282  }
    1283  
    1284  #define INT_GETTER(name) \
    1285      static PyObject * \
    1286      xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
    1287      { \
    1288          return PyLong_FromLong((long) XML_Get##name(self->itself)); \
    1289      }
    1290  INT_GETTER(ErrorCode)
    1291  INT_GETTER(ErrorLineNumber)
    1292  INT_GETTER(ErrorColumnNumber)
    1293  INT_GETTER(ErrorByteIndex)
    1294  INT_GETTER(CurrentLineNumber)
    1295  INT_GETTER(CurrentColumnNumber)
    1296  INT_GETTER(CurrentByteIndex)
    1297  
    1298  #undef INT_GETTER
    1299  
    1300  static PyObject *
    1301  xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
    1302  {
    1303      return PyBool_FromLong(self->buffer != NULL);
    1304  }
    1305  
    1306  static int
    1307  xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
    1308  {
    1309      if (v == NULL) {
    1310          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1311          return -1;
    1312      }
    1313      int b = PyObject_IsTrue(v);
    1314      if (b < 0)
    1315          return -1;
    1316      if (b) {
    1317          if (self->buffer == NULL) {
    1318              self->buffer = PyMem_Malloc(self->buffer_size);
    1319              if (self->buffer == NULL) {
    1320                  PyErr_NoMemory();
    1321                  return -1;
    1322              }
    1323              self->buffer_used = 0;
    1324          }
    1325      }
    1326      else if (self->buffer != NULL) {
    1327          if (flush_character_buffer(self) < 0)
    1328              return -1;
    1329          PyMem_Free(self->buffer);
    1330          self->buffer = NULL;
    1331      }
    1332      return 0;
    1333  }
    1334  
    1335  static PyObject *
    1336  xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
    1337  {
    1338      return PyLong_FromLong((long) self->buffer_size);
    1339  }
    1340  
    1341  static int
    1342  xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
    1343  {
    1344      if (v == NULL) {
    1345          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1346          return -1;
    1347      }
    1348      long new_buffer_size;
    1349      if (!PyLong_Check(v)) {
    1350          PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
    1351          return -1;
    1352      }
    1353  
    1354      new_buffer_size = PyLong_AsLong(v);
    1355      if (new_buffer_size <= 0) {
    1356          if (!PyErr_Occurred())
    1357              PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
    1358          return -1;
    1359      }
    1360  
    1361      /* trivial case -- no change */
    1362      if (new_buffer_size == self->buffer_size) {
    1363          return 0;
    1364      }
    1365  
    1366      /* check maximum */
    1367      if (new_buffer_size > INT_MAX) {
    1368          char errmsg[100];
    1369          sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
    1370          PyErr_SetString(PyExc_ValueError, errmsg);
    1371          return -1;
    1372      }
    1373  
    1374      if (self->buffer != NULL) {
    1375          /* there is already a buffer */
    1376          if (self->buffer_used != 0) {
    1377              if (flush_character_buffer(self) < 0) {
    1378                  return -1;
    1379              }
    1380          }
    1381          /* free existing buffer */
    1382          PyMem_Free(self->buffer);
    1383      }
    1384      self->buffer = PyMem_Malloc(new_buffer_size);
    1385      if (self->buffer == NULL) {
    1386          PyErr_NoMemory();
    1387          return -1;
    1388      }
    1389      self->buffer_size = new_buffer_size;
    1390      return 0;
    1391  }
    1392  
    1393  static PyObject *
    1394  xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
    1395  {
    1396      return PyLong_FromLong((long) self->buffer_used);
    1397  }
    1398  
    1399  static PyObject *
    1400  xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
    1401  {
    1402      return PyBool_FromLong(self->ns_prefixes);
    1403  }
    1404  
    1405  static int
    1406  xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1407  {
    1408      if (v == NULL) {
    1409          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1410          return -1;
    1411      }
    1412      int b = PyObject_IsTrue(v);
    1413      if (b < 0)
    1414          return -1;
    1415      self->ns_prefixes = b;
    1416      XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
    1417      return 0;
    1418  }
    1419  
    1420  static PyObject *
    1421  xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
    1422  {
    1423      return PyBool_FromLong(self->ordered_attributes);
    1424  }
    1425  
    1426  static int
    1427  xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1428  {
    1429      if (v == NULL) {
    1430          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1431          return -1;
    1432      }
    1433      int b = PyObject_IsTrue(v);
    1434      if (b < 0)
    1435          return -1;
    1436      self->ordered_attributes = b;
    1437      return 0;
    1438  }
    1439  
    1440  static PyObject *
    1441  xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
    1442  {
    1443      return PyBool_FromLong((long) self->specified_attributes);
    1444  }
    1445  
    1446  static int
    1447  xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1448  {
    1449      if (v == NULL) {
    1450          PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1451          return -1;
    1452      }
    1453      int b = PyObject_IsTrue(v);
    1454      if (b < 0)
    1455          return -1;
    1456      self->specified_attributes = b;
    1457      return 0;
    1458  }
    1459  
    1460  static PyMemberDef xmlparse_members[] = {
    1461      {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
    1462      {NULL}
    1463  };
    1464  
    1465  #define XMLPARSE_GETTER_DEF(name) \
    1466      {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
    1467  #define XMLPARSE_GETTER_SETTER_DEF(name) \
    1468      {#name, (getter)xmlparse_##name##_getter, \
    1469              (setter)xmlparse_##name##_setter, NULL},
    1470  
    1471  static PyGetSetDef xmlparse_getsetlist[] = {
    1472      XMLPARSE_GETTER_DEF(ErrorCode)
    1473      XMLPARSE_GETTER_DEF(ErrorLineNumber)
    1474      XMLPARSE_GETTER_DEF(ErrorColumnNumber)
    1475      XMLPARSE_GETTER_DEF(ErrorByteIndex)
    1476      XMLPARSE_GETTER_DEF(CurrentLineNumber)
    1477      XMLPARSE_GETTER_DEF(CurrentColumnNumber)
    1478      XMLPARSE_GETTER_DEF(CurrentByteIndex)
    1479      XMLPARSE_GETTER_SETTER_DEF(buffer_size)
    1480      XMLPARSE_GETTER_SETTER_DEF(buffer_text)
    1481      XMLPARSE_GETTER_DEF(buffer_used)
    1482      XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
    1483      XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
    1484      XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
    1485      {NULL},
    1486  };
    1487  
    1488  #undef XMLPARSE_GETTER_DEF
    1489  #undef XMLPARSE_GETTER_SETTER_DEF
    1490  
    1491  PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
    1492  
    1493  static PyType_Slot _xml_parse_type_spec_slots[] = {
    1494      {Py_tp_dealloc, xmlparse_dealloc},
    1495      {Py_tp_doc, (void *)Xmlparsetype__doc__},
    1496      {Py_tp_traverse, xmlparse_traverse},
    1497      {Py_tp_clear, xmlparse_clear},
    1498      {Py_tp_methods, xmlparse_methods},
    1499      {Py_tp_members, xmlparse_members},
    1500      {Py_tp_getset, xmlparse_getsetlist},
    1501      {0, 0}
    1502  };
    1503  
    1504  static PyType_Spec _xml_parse_type_spec = {
    1505      .name = "pyexpat.xmlparser",
    1506      .basicsize = sizeof(xmlparseobject),
    1507      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
    1508                Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
    1509      .slots = _xml_parse_type_spec_slots,
    1510  };
    1511  
    1512  /* End of code for xmlparser objects */
    1513  /* -------------------------------------------------------- */
    1514  
    1515  /*[clinic input]
    1516  pyexpat.ParserCreate
    1517  
    1518      encoding: str(accept={str, NoneType}) = None
    1519      namespace_separator: str(accept={str, NoneType}) = None
    1520      intern: object = NULL
    1521  
    1522  Return a new XML parser object.
    1523  [clinic start generated code]*/
    1524  
    1525  static PyObject *
    1526  pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
    1527                            const char *namespace_separator, PyObject *intern)
    1528  /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
    1529  {
    1530      PyObject *result;
    1531      int intern_decref = 0;
    1532  
    1533      if (namespace_separator != NULL
    1534          && strlen(namespace_separator) > 1) {
    1535          PyErr_SetString(PyExc_ValueError,
    1536                          "namespace_separator must be at most one"
    1537                          " character, omitted, or None");
    1538          return NULL;
    1539      }
    1540      /* Explicitly passing None means no interning is desired.
    1541         Not passing anything means that a new dictionary is used. */
    1542      if (intern == Py_None)
    1543          intern = NULL;
    1544      else if (intern == NULL) {
    1545          intern = PyDict_New();
    1546          if (!intern)
    1547              return NULL;
    1548          intern_decref = 1;
    1549      }
    1550      else if (!PyDict_Check(intern)) {
    1551          PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
    1552          return NULL;
    1553      }
    1554  
    1555      pyexpat_state *state = pyexpat_get_state(module);
    1556      result = newxmlparseobject(state, encoding, namespace_separator, intern);
    1557      if (intern_decref) {
    1558          Py_DECREF(intern);
    1559      }
    1560      return result;
    1561  }
    1562  
    1563  /*[clinic input]
    1564  pyexpat.ErrorString
    1565  
    1566      code: long
    1567      /
    1568  
    1569  Returns string error for given number.
    1570  [clinic start generated code]*/
    1571  
    1572  static PyObject *
    1573  pyexpat_ErrorString_impl(PyObject *module, long code)
    1574  /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
    1575  {
    1576      return Py_BuildValue("z", XML_ErrorString((int)code));
    1577  }
    1578  
    1579  /* List of methods defined in the module */
    1580  
    1581  static struct PyMethodDef pyexpat_methods[] = {
    1582      PYEXPAT_PARSERCREATE_METHODDEF
    1583      PYEXPAT_ERRORSTRING_METHODDEF
    1584      {NULL, NULL}  /* sentinel */
    1585  };
    1586  
    1587  /* Module docstring */
    1588  
    1589  PyDoc_STRVAR(pyexpat_module_documentation,
    1590  "Python wrapper for Expat parser.");
    1591  
    1592  /* Initialization function for the module */
    1593  
    1594  #ifndef MODULE_NAME
    1595  #define MODULE_NAME "pyexpat"
    1596  #endif
    1597  
    1598  static int init_handler_descrs(pyexpat_state *state)
    1599  {
    1600      int i;
    1601      assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
    1602      for (i = 0; handler_info[i].name != NULL; i++) {
    1603          struct HandlerInfo *hi = &handler_info[i];
    1604          hi->getset.name = hi->name;
    1605          hi->getset.get = (getter)xmlparse_handler_getter;
    1606          hi->getset.set = (setter)xmlparse_handler_setter;
    1607          hi->getset.closure = &handler_info[i];
    1608  
    1609          PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
    1610          if (descr == NULL)
    1611              return -1;
    1612  
    1613          if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
    1614              Py_DECREF(descr);
    1615              return -1;
    1616          }
    1617          Py_DECREF(descr);
    1618      }
    1619      return 0;
    1620  }
    1621  
    1622  static PyObject *
    1623  add_submodule(PyObject *mod, const char *fullname)
    1624  {
    1625      const char *name = strrchr(fullname, '.') + 1;
    1626  
    1627      PyObject *submodule = PyModule_New(fullname);
    1628      if (submodule == NULL) {
    1629          return NULL;
    1630      }
    1631  
    1632      PyObject *mod_name = PyUnicode_FromString(fullname);
    1633      if (mod_name == NULL) {
    1634          Py_DECREF(submodule);
    1635          return NULL;
    1636      }
    1637  
    1638      if (_PyImport_SetModule(mod_name, submodule) < 0) {
    1639          Py_DECREF(submodule);
    1640          Py_DECREF(mod_name);
    1641          return NULL;
    1642      }
    1643      Py_DECREF(mod_name);
    1644  
    1645      /* gives away the reference to the submodule */
    1646      if (PyModule_AddObject(mod, name, submodule) < 0) {
    1647          Py_DECREF(submodule);
    1648          return NULL;
    1649      }
    1650  
    1651      return submodule;
    1652  }
    1653  
    1654  struct ErrorInfo {
    1655      const char * name;  /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
    1656      const char * description;  /* Error description as returned by XML_ErrorString(<int>) */
    1657  };
    1658  
    1659  static
    1660  struct ErrorInfo error_info_of[] = {
    1661      {NULL, NULL},  /* XML_ERROR_NONE (value 0) is not exposed */
    1662  
    1663      {"XML_ERROR_NO_MEMORY", "out of memory"},
    1664      {"XML_ERROR_SYNTAX", "syntax error"},
    1665      {"XML_ERROR_NO_ELEMENTS", "no element found"},
    1666      {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
    1667      {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
    1668      {"XML_ERROR_PARTIAL_CHAR", "partial character"},
    1669      {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
    1670      {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
    1671      {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
    1672      {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
    1673      {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
    1674      {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
    1675      {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
    1676      {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
    1677      {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
    1678      {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
    1679      {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
    1680      {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
    1681      {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
    1682      {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
    1683      {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
    1684      {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
    1685      {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
    1686      {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
    1687      {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
    1688      {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
    1689  
    1690      /* Added in Expat 1.95.7. */
    1691      {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
    1692  
    1693      /* Added in Expat 1.95.8. */
    1694      {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
    1695      {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
    1696      {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
    1697      {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
    1698      {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
    1699      {"XML_ERROR_SUSPENDED", "parser suspended"},
    1700      {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
    1701      {"XML_ERROR_ABORTED", "parsing aborted"},
    1702      {"XML_ERROR_FINISHED", "parsing finished"},
    1703      {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
    1704  
    1705      /* Added in 2.0.0. */
    1706      {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
    1707      {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
    1708      {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
    1709  
    1710      /* Added in 2.2.1. */
    1711      {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
    1712  
    1713      /* Added in 2.3.0. */
    1714      {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
    1715  
    1716      /* Added in 2.4.0. */
    1717      {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
    1718  };
    1719  
    1720  static int
    1721  add_error(PyObject *errors_module, PyObject *codes_dict,
    1722            PyObject *rev_codes_dict, size_t error_index)
    1723  {
    1724      const char * const name = error_info_of[error_index].name;
    1725      const int error_code = (int)error_index;
    1726  
    1727      /* NOTE: This keeps the source of truth regarding error
    1728       *       messages with libexpat and (by definiton) in bulletproof sync
    1729       *       with the other uses of the XML_ErrorString function
    1730       *       elsewhere within this file.  pyexpat's copy of the messages
    1731       *       only acts as a fallback in case of outdated runtime libexpat,
    1732       *       where it returns NULL. */
    1733      const char *error_string = XML_ErrorString(error_code);
    1734      if (error_string == NULL) {
    1735          error_string = error_info_of[error_index].description;
    1736      }
    1737  
    1738      if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
    1739          return -1;
    1740      }
    1741  
    1742      PyObject *num = PyLong_FromLong(error_code);
    1743      if (num == NULL) {
    1744          return -1;
    1745      }
    1746  
    1747      if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
    1748          Py_DECREF(num);
    1749          return -1;
    1750      }
    1751  
    1752      PyObject *str = PyUnicode_FromString(error_string);
    1753      if (str == NULL) {
    1754          Py_DECREF(num);
    1755          return -1;
    1756      }
    1757  
    1758      int res = PyDict_SetItem(rev_codes_dict, num, str);
    1759      Py_DECREF(str);
    1760      Py_DECREF(num);
    1761      if (res < 0) {
    1762          return -1;
    1763      }
    1764  
    1765      return 0;
    1766  }
    1767  
    1768  static int
    1769  add_errors_module(PyObject *mod)
    1770  {
    1771      // add_submodule() returns a borrowed ref.
    1772      PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
    1773      if (errors_module == NULL) {
    1774          return -1;
    1775      }
    1776  
    1777      PyObject *codes_dict = PyDict_New();
    1778      if (codes_dict == NULL) {
    1779          return -1;
    1780      }
    1781      PyObject *rev_codes_dict = PyDict_New();
    1782      if (rev_codes_dict == NULL) {
    1783          goto error;
    1784      }
    1785  
    1786      size_t error_index = 0;
    1787      for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
    1788          if (error_info_of[error_index].name == NULL) {
    1789              continue;
    1790          }
    1791  
    1792          if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
    1793              goto error;
    1794          }
    1795      }
    1796  
    1797      if (PyModule_AddStringConstant(errors_module, "__doc__",
    1798                                     "Constants used to describe "
    1799                                     "error conditions.") < 0) {
    1800          goto error;
    1801      }
    1802  
    1803      int rc = PyModule_AddObjectRef(errors_module, "codes", codes_dict);
    1804      Py_CLEAR(codes_dict);
    1805      if (rc < 0) {
    1806          goto error;
    1807      }
    1808  
    1809      rc = PyModule_AddObjectRef(errors_module, "messages", rev_codes_dict);
    1810      Py_CLEAR(rev_codes_dict);
    1811      if (rc < 0) {
    1812          goto error;
    1813      }
    1814  
    1815      return 0;
    1816  
    1817  error:
    1818      Py_XDECREF(codes_dict);
    1819      Py_XDECREF(rev_codes_dict);
    1820      return -1;
    1821  }
    1822  
    1823  static int
    1824  add_model_module(PyObject *mod)
    1825  {
    1826      PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
    1827      if (model_module == NULL) {
    1828          return -1;
    1829      }
    1830  
    1831  #define MYCONST(c)  do {                                        \
    1832          if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
    1833              return -1;                                          \
    1834          }                                                       \
    1835      } while(0)
    1836  
    1837      if (PyModule_AddStringConstant(
    1838          model_module, "__doc__",
    1839          "Constants used to interpret content model information.") < 0) {
    1840          return -1;
    1841      }
    1842  
    1843      MYCONST(XML_CTYPE_EMPTY);
    1844      MYCONST(XML_CTYPE_ANY);
    1845      MYCONST(XML_CTYPE_MIXED);
    1846      MYCONST(XML_CTYPE_NAME);
    1847      MYCONST(XML_CTYPE_CHOICE);
    1848      MYCONST(XML_CTYPE_SEQ);
    1849  
    1850      MYCONST(XML_CQUANT_NONE);
    1851      MYCONST(XML_CQUANT_OPT);
    1852      MYCONST(XML_CQUANT_REP);
    1853      MYCONST(XML_CQUANT_PLUS);
    1854  #undef MYCONST
    1855      return 0;
    1856  }
    1857  
    1858  #if XML_COMBINED_VERSION > 19505
    1859  static int
    1860  add_features(PyObject *mod)
    1861  {
    1862      PyObject *list = PyList_New(0);
    1863      if (list == NULL) {
    1864          return -1;
    1865      }
    1866  
    1867      const XML_Feature *features = XML_GetFeatureList();
    1868      for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
    1869          PyObject *item = Py_BuildValue("si", features[i].name,
    1870                                         features[i].value);
    1871          if (item == NULL) {
    1872              goto error;
    1873          }
    1874          int ok = PyList_Append(list, item);
    1875          Py_DECREF(item);
    1876          if (ok < 0) {
    1877              goto error;
    1878          }
    1879      }
    1880      if (PyModule_AddObject(mod, "features", list) < 0) {
    1881          goto error;
    1882      }
    1883      return 0;
    1884  
    1885  error:
    1886      Py_DECREF(list);
    1887      return -1;
    1888  }
    1889  #endif
    1890  
    1891  static int
    1892  pyexpat_exec(PyObject *mod)
    1893  {
    1894      pyexpat_state *state = pyexpat_get_state(mod);
    1895      state->str_read = PyUnicode_InternFromString("read");
    1896      if (state->str_read == NULL) {
    1897          return -1;
    1898      }
    1899      state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
    1900          mod, &_xml_parse_type_spec, NULL);
    1901  
    1902      if (state->xml_parse_type == NULL) {
    1903          return -1;
    1904      }
    1905  
    1906      if (init_handler_descrs(state) < 0) {
    1907          return -1;
    1908      }
    1909      state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
    1910                                        NULL, NULL);
    1911      if (state->error == NULL) {
    1912          return -1;
    1913      }
    1914  
    1915      /* Add some symbolic constants to the module */
    1916  
    1917      if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
    1918          return -1;
    1919      }
    1920  
    1921      if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
    1922          return -1;
    1923      }
    1924  
    1925      if (PyModule_AddObjectRef(mod, "XMLParserType",
    1926                             (PyObject *) state->xml_parse_type) < 0) {
    1927          return -1;
    1928      }
    1929  
    1930      if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
    1931                                     XML_ExpatVersion()) < 0) {
    1932          return -1;
    1933      }
    1934      {
    1935          XML_Expat_Version info = XML_ExpatVersionInfo();
    1936          PyObject *versionInfo = Py_BuildValue("(iii)",
    1937                                                info.major,
    1938                                                info.minor,
    1939                                                info.micro);
    1940          if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
    1941              Py_DECREF(versionInfo);
    1942              return -1;
    1943          }
    1944      }
    1945      /* XXX When Expat supports some way of figuring out how it was
    1946         compiled, this should check and set native_encoding
    1947         appropriately.
    1948      */
    1949      if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
    1950          return -1;
    1951      }
    1952  
    1953      if (add_errors_module(mod) < 0) {
    1954          return -1;
    1955      }
    1956  
    1957      if (add_model_module(mod) < 0) {
    1958          return -1;
    1959      }
    1960  
    1961  #if XML_COMBINED_VERSION > 19505
    1962      if (add_features(mod) < 0) {
    1963          return -1;
    1964      }
    1965  #endif
    1966  
    1967  #define MYCONST(c) do {                                 \
    1968          if (PyModule_AddIntConstant(mod, #c, c) < 0) {  \
    1969              return -1;                                  \
    1970          }                                               \
    1971      } while(0)
    1972  
    1973      MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
    1974      MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
    1975      MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
    1976  #undef MYCONST
    1977  
    1978      static struct PyExpat_CAPI capi;
    1979      /* initialize pyexpat dispatch table */
    1980      capi.size = sizeof(capi);
    1981      capi.magic = PyExpat_CAPI_MAGIC;
    1982      capi.MAJOR_VERSION = XML_MAJOR_VERSION;
    1983      capi.MINOR_VERSION = XML_MINOR_VERSION;
    1984      capi.MICRO_VERSION = XML_MICRO_VERSION;
    1985      capi.ErrorString = XML_ErrorString;
    1986      capi.GetErrorCode = XML_GetErrorCode;
    1987      capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
    1988      capi.GetErrorLineNumber = XML_GetErrorLineNumber;
    1989      capi.Parse = XML_Parse;
    1990      capi.ParserCreate_MM = XML_ParserCreate_MM;
    1991      capi.ParserFree = XML_ParserFree;
    1992      capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
    1993      capi.SetCommentHandler = XML_SetCommentHandler;
    1994      capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
    1995      capi.SetElementHandler = XML_SetElementHandler;
    1996      capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
    1997      capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
    1998      capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
    1999      capi.SetUserData = XML_SetUserData;
    2000      capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
    2001      capi.SetEncoding = XML_SetEncoding;
    2002      capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
    2003  #if XML_COMBINED_VERSION >= 20100
    2004      capi.SetHashSalt = XML_SetHashSalt;
    2005  #else
    2006      capi.SetHashSalt = NULL;
    2007  #endif
    2008  
    2009      /* export using capsule */
    2010      PyObject *capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
    2011      if (capi_object == NULL) {
    2012          return -1;
    2013      }
    2014  
    2015      if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
    2016          Py_DECREF(capi_object);
    2017          return -1;
    2018      }
    2019  
    2020      return 0;
    2021  }
    2022  
    2023  static int
    2024  pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
    2025  {
    2026      pyexpat_state *state = pyexpat_get_state(module);
    2027      Py_VISIT(state->xml_parse_type);
    2028      Py_VISIT(state->error);
    2029      Py_VISIT(state->str_read);
    2030      return 0;
    2031  }
    2032  
    2033  static int
    2034  pyexpat_clear(PyObject *module)
    2035  {
    2036      pyexpat_state *state = pyexpat_get_state(module);
    2037      Py_CLEAR(state->xml_parse_type);
    2038      Py_CLEAR(state->error);
    2039      Py_CLEAR(state->str_read);
    2040      return 0;
    2041  }
    2042  
    2043  static void
    2044  pyexpat_free(void *module)
    2045  {
    2046      pyexpat_clear((PyObject *)module);
    2047  }
    2048  
    2049  static PyModuleDef_Slot pyexpat_slots[] = {
    2050      {Py_mod_exec, pyexpat_exec},
    2051      {0, NULL}
    2052  };
    2053  
    2054  static struct PyModuleDef pyexpatmodule = {
    2055      PyModuleDef_HEAD_INIT,
    2056      .m_name = MODULE_NAME,
    2057      .m_doc = pyexpat_module_documentation,
    2058      .m_size = sizeof(pyexpat_state),
    2059      .m_methods = pyexpat_methods,
    2060      .m_slots = pyexpat_slots,
    2061      .m_traverse = pyexpat_traverse,
    2062      .m_clear = pyexpat_clear,
    2063      .m_free = pyexpat_free
    2064  };
    2065  
    2066  PyMODINIT_FUNC
    2067  PyInit_pyexpat(void)
    2068  {
    2069      return PyModuleDef_Init(&pyexpatmodule);
    2070  }
    2071  
    2072  static void
    2073  clear_handlers(xmlparseobject *self, int initial)
    2074  {
    2075      int i = 0;
    2076  
    2077      for (; handler_info[i].name != NULL; i++) {
    2078          if (initial)
    2079              self->handlers[i] = NULL;
    2080          else {
    2081              Py_CLEAR(self->handlers[i]);
    2082              handler_info[i].setter(self->itself, NULL);
    2083          }
    2084      }
    2085  }
    2086  
    2087  static struct HandlerInfo handler_info[] = {
    2088  
    2089  #define HANDLER_INFO(name) \
    2090      {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
    2091  
    2092      HANDLER_INFO(StartElementHandler)
    2093      HANDLER_INFO(EndElementHandler)
    2094      HANDLER_INFO(ProcessingInstructionHandler)
    2095      HANDLER_INFO(CharacterDataHandler)
    2096      HANDLER_INFO(UnparsedEntityDeclHandler)
    2097      HANDLER_INFO(NotationDeclHandler)
    2098      HANDLER_INFO(StartNamespaceDeclHandler)
    2099      HANDLER_INFO(EndNamespaceDeclHandler)
    2100      HANDLER_INFO(CommentHandler)
    2101      HANDLER_INFO(StartCdataSectionHandler)
    2102      HANDLER_INFO(EndCdataSectionHandler)
    2103      HANDLER_INFO(DefaultHandler)
    2104      HANDLER_INFO(DefaultHandlerExpand)
    2105      HANDLER_INFO(NotStandaloneHandler)
    2106      HANDLER_INFO(ExternalEntityRefHandler)
    2107      HANDLER_INFO(StartDoctypeDeclHandler)
    2108      HANDLER_INFO(EndDoctypeDeclHandler)
    2109      HANDLER_INFO(EntityDeclHandler)
    2110      HANDLER_INFO(XmlDeclHandler)
    2111      HANDLER_INFO(ElementDeclHandler)
    2112      HANDLER_INFO(AttlistDeclHandler)
    2113  #if XML_COMBINED_VERSION >= 19504
    2114      HANDLER_INFO(SkippedEntityHandler)
    2115  #endif
    2116  
    2117  #undef HANDLER_INFO
    2118  
    2119      {NULL, NULL, NULL} /* sentinel */
    2120  };