python (3.12.0)

Browse
Build Log
Usage
       1  """Lightweight XML support for Python.
       2  
       3   XML is an inherently hierarchical data format, and the most natural way to
       4   represent it is with a tree.  This module has two classes for this purpose:
       5  
       6      1. ElementTree represents the whole XML document as a tree and
       7  
       8      2. Element represents a single node in this tree.
       9  
      10   Interactions with the whole document (reading and writing to/from files) are
      11   usually done on the ElementTree level.  Interactions with a single XML element
      12   and its sub-elements are done on the Element level.
      13  
      14   Element is a flexible container object designed to store hierarchical data
      15   structures in memory. It can be described as a cross between a list and a
      16   dictionary.  Each Element has a number of properties associated with it:
      17  
      18      'tag' - a string containing the element's name.
      19  
      20      'attributes' - a Python dictionary storing the element's attributes.
      21  
      22      'text' - a string containing the element's text content.
      23  
      24      'tail' - an optional string containing text after the element's end tag.
      25  
      26      And a number of child elements stored in a Python sequence.
      27  
      28   To create an element instance, use the Element constructor,
      29   or the SubElement factory function.
      30  
      31   You can also use the ElementTree class to wrap an element structure
      32   and convert it to and from XML.
      33  
      34  """
      35  
      36  #---------------------------------------------------------------------
      37  # Licensed to PSF under a Contributor Agreement.
      38  # See https://www.python.org/psf/license for licensing details.
      39  #
      40  # ElementTree
      41  # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
      42  #
      43  # fredrik@pythonware.com
      44  # http://www.pythonware.com
      45  # --------------------------------------------------------------------
      46  # The ElementTree toolkit is
      47  #
      48  # Copyright (c) 1999-2008 by Fredrik Lundh
      49  #
      50  # By obtaining, using, and/or copying this software and/or its
      51  # associated documentation, you agree that you have read, understood,
      52  # and will comply with the following terms and conditions:
      53  #
      54  # Permission to use, copy, modify, and distribute this software and
      55  # its associated documentation for any purpose and without fee is
      56  # hereby granted, provided that the above copyright notice appears in
      57  # all copies, and that both that copyright notice and this permission
      58  # notice appear in supporting documentation, and that the name of
      59  # Secret Labs AB or the author not be used in advertising or publicity
      60  # pertaining to distribution of the software without specific, written
      61  # prior permission.
      62  #
      63  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
      64  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
      65  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
      66  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
      67  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
      68  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
      69  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
      70  # OF THIS SOFTWARE.
      71  # --------------------------------------------------------------------
      72  
      73  __all__ = [
      74      # public symbols
      75      "Comment",
      76      "dump",
      77      "Element", "ElementTree",
      78      "fromstring", "fromstringlist",
      79      "indent", "iselement", "iterparse",
      80      "parse", "ParseError",
      81      "PI", "ProcessingInstruction",
      82      "QName",
      83      "SubElement",
      84      "tostring", "tostringlist",
      85      "TreeBuilder",
      86      "VERSION",
      87      "XML", "XMLID",
      88      "XMLParser", "XMLPullParser",
      89      "register_namespace",
      90      "canonicalize", "C14NWriterTarget",
      91      ]
      92  
      93  VERSION = "1.3.0"
      94  
      95  import sys
      96  import re
      97  import warnings
      98  import io
      99  import collections
     100  import collections.abc
     101  import contextlib
     102  
     103  from . import ElementPath
     104  
     105  
     106  class ESC[4;38;5;81mParseError(ESC[4;38;5;149mSyntaxError):
     107      """An error when parsing an XML document.
     108  
     109      In addition to its exception value, a ParseError contains
     110      two extra attributes:
     111          'code'     - the specific exception code
     112          'position' - the line and column of the error
     113  
     114      """
     115      pass
     116  
     117  # --------------------------------------------------------------------
     118  
     119  
     120  def iselement(element):
     121      """Return True if *element* appears to be an Element."""
     122      return hasattr(element, 'tag')
     123  
     124  
     125  class ESC[4;38;5;81mElement:
     126      """An XML element.
     127  
     128      This class is the reference implementation of the Element interface.
     129  
     130      An element's length is its number of subelements.  That means if you
     131      want to check if an element is truly empty, you should check BOTH
     132      its length AND its text attribute.
     133  
     134      The element tag, attribute names, and attribute values can be either
     135      bytes or strings.
     136  
     137      *tag* is the element name.  *attrib* is an optional dictionary containing
     138      element attributes. *extra* are additional element attributes given as
     139      keyword arguments.
     140  
     141      Example form:
     142          <tag attrib>text<child/>...</tag>tail
     143  
     144      """
     145  
     146      tag = None
     147      """The element's name."""
     148  
     149      attrib = None
     150      """Dictionary of the element's attributes."""
     151  
     152      text = None
     153      """
     154      Text before first subelement. This is either a string or the value None.
     155      Note that if there is no text, this attribute may be either
     156      None or the empty string, depending on the parser.
     157  
     158      """
     159  
     160      tail = None
     161      """
     162      Text after this element's end tag, but before the next sibling element's
     163      start tag.  This is either a string or the value None.  Note that if there
     164      was no text, this attribute may be either None or an empty string,
     165      depending on the parser.
     166  
     167      """
     168  
     169      def __init__(self, tag, attrib={}, **extra):
     170          if not isinstance(attrib, dict):
     171              raise TypeError("attrib must be dict, not %s" % (
     172                  attrib.__class__.__name__,))
     173          self.tag = tag
     174          self.attrib = {**attrib, **extra}
     175          self._children = []
     176  
     177      def __repr__(self):
     178          return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
     179  
     180      def makeelement(self, tag, attrib):
     181          """Create a new element with the same type.
     182  
     183          *tag* is a string containing the element name.
     184          *attrib* is a dictionary containing the element attributes.
     185  
     186          Do not call this method, use the SubElement factory function instead.
     187  
     188          """
     189          return self.__class__(tag, attrib)
     190  
     191      def __copy__(self):
     192          elem = self.makeelement(self.tag, self.attrib)
     193          elem.text = self.text
     194          elem.tail = self.tail
     195          elem[:] = self
     196          return elem
     197  
     198      def __len__(self):
     199          return len(self._children)
     200  
     201      def __bool__(self):
     202          warnings.warn(
     203              "Testing an element's truth value will raise an exception in "
     204              "future versions.  "
     205              "Use specific 'len(elem)' or 'elem is not None' test instead.",
     206              DeprecationWarning, stacklevel=2
     207              )
     208          return len(self._children) != 0 # emulate old behaviour, for now
     209  
     210      def __getitem__(self, index):
     211          return self._children[index]
     212  
     213      def __setitem__(self, index, element):
     214          if isinstance(index, slice):
     215              for elt in element:
     216                  self._assert_is_element(elt)
     217          else:
     218              self._assert_is_element(element)
     219          self._children[index] = element
     220  
     221      def __delitem__(self, index):
     222          del self._children[index]
     223  
     224      def append(self, subelement):
     225          """Add *subelement* to the end of this element.
     226  
     227          The new element will appear in document order after the last existing
     228          subelement (or directly after the text, if it's the first subelement),
     229          but before the end tag for this element.
     230  
     231          """
     232          self._assert_is_element(subelement)
     233          self._children.append(subelement)
     234  
     235      def extend(self, elements):
     236          """Append subelements from a sequence.
     237  
     238          *elements* is a sequence with zero or more elements.
     239  
     240          """
     241          for element in elements:
     242              self._assert_is_element(element)
     243              self._children.append(element)
     244  
     245      def insert(self, index, subelement):
     246          """Insert *subelement* at position *index*."""
     247          self._assert_is_element(subelement)
     248          self._children.insert(index, subelement)
     249  
     250      def _assert_is_element(self, e):
     251          # Need to refer to the actual Python implementation, not the
     252          # shadowing C implementation.
     253          if not isinstance(e, _Element_Py):
     254              raise TypeError('expected an Element, not %s' % type(e).__name__)
     255  
     256      def remove(self, subelement):
     257          """Remove matching subelement.
     258  
     259          Unlike the find methods, this method compares elements based on
     260          identity, NOT ON tag value or contents.  To remove subelements by
     261          other means, the easiest way is to use a list comprehension to
     262          select what elements to keep, and then use slice assignment to update
     263          the parent element.
     264  
     265          ValueError is raised if a matching element could not be found.
     266  
     267          """
     268          # assert iselement(element)
     269          self._children.remove(subelement)
     270  
     271      def find(self, path, namespaces=None):
     272          """Find first matching element by tag name or path.
     273  
     274          *path* is a string having either an element tag or an XPath,
     275          *namespaces* is an optional mapping from namespace prefix to full name.
     276  
     277          Return the first matching element, or None if no element was found.
     278  
     279          """
     280          return ElementPath.find(self, path, namespaces)
     281  
     282      def findtext(self, path, default=None, namespaces=None):
     283          """Find text for first matching element by tag name or path.
     284  
     285          *path* is a string having either an element tag or an XPath,
     286          *default* is the value to return if the element was not found,
     287          *namespaces* is an optional mapping from namespace prefix to full name.
     288  
     289          Return text content of first matching element, or default value if
     290          none was found.  Note that if an element is found having no text
     291          content, the empty string is returned.
     292  
     293          """
     294          return ElementPath.findtext(self, path, default, namespaces)
     295  
     296      def findall(self, path, namespaces=None):
     297          """Find all matching subelements by tag name or path.
     298  
     299          *path* is a string having either an element tag or an XPath,
     300          *namespaces* is an optional mapping from namespace prefix to full name.
     301  
     302          Returns list containing all matching elements in document order.
     303  
     304          """
     305          return ElementPath.findall(self, path, namespaces)
     306  
     307      def iterfind(self, path, namespaces=None):
     308          """Find all matching subelements by tag name or path.
     309  
     310          *path* is a string having either an element tag or an XPath,
     311          *namespaces* is an optional mapping from namespace prefix to full name.
     312  
     313          Return an iterable yielding all matching elements in document order.
     314  
     315          """
     316          return ElementPath.iterfind(self, path, namespaces)
     317  
     318      def clear(self):
     319          """Reset element.
     320  
     321          This function removes all subelements, clears all attributes, and sets
     322          the text and tail attributes to None.
     323  
     324          """
     325          self.attrib.clear()
     326          self._children = []
     327          self.text = self.tail = None
     328  
     329      def get(self, key, default=None):
     330          """Get element attribute.
     331  
     332          Equivalent to attrib.get, but some implementations may handle this a
     333          bit more efficiently.  *key* is what attribute to look for, and
     334          *default* is what to return if the attribute was not found.
     335  
     336          Returns a string containing the attribute value, or the default if
     337          attribute was not found.
     338  
     339          """
     340          return self.attrib.get(key, default)
     341  
     342      def set(self, key, value):
     343          """Set element attribute.
     344  
     345          Equivalent to attrib[key] = value, but some implementations may handle
     346          this a bit more efficiently.  *key* is what attribute to set, and
     347          *value* is the attribute value to set it to.
     348  
     349          """
     350          self.attrib[key] = value
     351  
     352      def keys(self):
     353          """Get list of attribute names.
     354  
     355          Names are returned in an arbitrary order, just like an ordinary
     356          Python dict.  Equivalent to attrib.keys()
     357  
     358          """
     359          return self.attrib.keys()
     360  
     361      def items(self):
     362          """Get element attributes as a sequence.
     363  
     364          The attributes are returned in arbitrary order.  Equivalent to
     365          attrib.items().
     366  
     367          Return a list of (name, value) tuples.
     368  
     369          """
     370          return self.attrib.items()
     371  
     372      def iter(self, tag=None):
     373          """Create tree iterator.
     374  
     375          The iterator loops over the element and all subelements in document
     376          order, returning all elements with a matching tag.
     377  
     378          If the tree structure is modified during iteration, new or removed
     379          elements may or may not be included.  To get a stable set, use the
     380          list() function on the iterator, and loop over the resulting list.
     381  
     382          *tag* is what tags to look for (default is to return all elements)
     383  
     384          Return an iterator containing all the matching elements.
     385  
     386          """
     387          if tag == "*":
     388              tag = None
     389          if tag is None or self.tag == tag:
     390              yield self
     391          for e in self._children:
     392              yield from e.iter(tag)
     393  
     394      def itertext(self):
     395          """Create text iterator.
     396  
     397          The iterator loops over the element and all subelements in document
     398          order, returning all inner text.
     399  
     400          """
     401          tag = self.tag
     402          if not isinstance(tag, str) and tag is not None:
     403              return
     404          t = self.text
     405          if t:
     406              yield t
     407          for e in self:
     408              yield from e.itertext()
     409              t = e.tail
     410              if t:
     411                  yield t
     412  
     413  
     414  def SubElement(parent, tag, attrib={}, **extra):
     415      """Subelement factory which creates an element instance, and appends it
     416      to an existing parent.
     417  
     418      The element tag, attribute names, and attribute values can be either
     419      bytes or Unicode strings.
     420  
     421      *parent* is the parent element, *tag* is the subelements name, *attrib* is
     422      an optional directory containing element attributes, *extra* are
     423      additional attributes given as keyword arguments.
     424  
     425      """
     426      attrib = {**attrib, **extra}
     427      element = parent.makeelement(tag, attrib)
     428      parent.append(element)
     429      return element
     430  
     431  
     432  def Comment(text=None):
     433      """Comment element factory.
     434  
     435      This function creates a special element which the standard serializer
     436      serializes as an XML comment.
     437  
     438      *text* is a string containing the comment string.
     439  
     440      """
     441      element = Element(Comment)
     442      element.text = text
     443      return element
     444  
     445  
     446  def ProcessingInstruction(target, text=None):
     447      """Processing Instruction element factory.
     448  
     449      This function creates a special element which the standard serializer
     450      serializes as an XML comment.
     451  
     452      *target* is a string containing the processing instruction, *text* is a
     453      string containing the processing instruction contents, if any.
     454  
     455      """
     456      element = Element(ProcessingInstruction)
     457      element.text = target
     458      if text:
     459          element.text = element.text + " " + text
     460      return element
     461  
     462  PI = ProcessingInstruction
     463  
     464  
     465  class ESC[4;38;5;81mQName:
     466      """Qualified name wrapper.
     467  
     468      This class can be used to wrap a QName attribute value in order to get
     469      proper namespace handing on output.
     470  
     471      *text_or_uri* is a string containing the QName value either in the form
     472      {uri}local, or if the tag argument is given, the URI part of a QName.
     473  
     474      *tag* is an optional argument which if given, will make the first
     475      argument (text_or_uri) be interpreted as a URI, and this argument (tag)
     476      be interpreted as a local name.
     477  
     478      """
     479      def __init__(self, text_or_uri, tag=None):
     480          if tag:
     481              text_or_uri = "{%s}%s" % (text_or_uri, tag)
     482          self.text = text_or_uri
     483      def __str__(self):
     484          return self.text
     485      def __repr__(self):
     486          return '<%s %r>' % (self.__class__.__name__, self.text)
     487      def __hash__(self):
     488          return hash(self.text)
     489      def __le__(self, other):
     490          if isinstance(other, QName):
     491              return self.text <= other.text
     492          return self.text <= other
     493      def __lt__(self, other):
     494          if isinstance(other, QName):
     495              return self.text < other.text
     496          return self.text < other
     497      def __ge__(self, other):
     498          if isinstance(other, QName):
     499              return self.text >= other.text
     500          return self.text >= other
     501      def __gt__(self, other):
     502          if isinstance(other, QName):
     503              return self.text > other.text
     504          return self.text > other
     505      def __eq__(self, other):
     506          if isinstance(other, QName):
     507              return self.text == other.text
     508          return self.text == other
     509  
     510  # --------------------------------------------------------------------
     511  
     512  
     513  class ESC[4;38;5;81mElementTree:
     514      """An XML element hierarchy.
     515  
     516      This class also provides support for serialization to and from
     517      standard XML.
     518  
     519      *element* is an optional root element node,
     520      *file* is an optional file handle or file name of an XML file whose
     521      contents will be used to initialize the tree with.
     522  
     523      """
     524      def __init__(self, element=None, file=None):
     525          # assert element is None or iselement(element)
     526          self._root = element # first node
     527          if file:
     528              self.parse(file)
     529  
     530      def getroot(self):
     531          """Return root element of this tree."""
     532          return self._root
     533  
     534      def _setroot(self, element):
     535          """Replace root element of this tree.
     536  
     537          This will discard the current contents of the tree and replace it
     538          with the given element.  Use with care!
     539  
     540          """
     541          # assert iselement(element)
     542          self._root = element
     543  
     544      def parse(self, source, parser=None):
     545          """Load external XML document into element tree.
     546  
     547          *source* is a file name or file object, *parser* is an optional parser
     548          instance that defaults to XMLParser.
     549  
     550          ParseError is raised if the parser fails to parse the document.
     551  
     552          Returns the root element of the given source document.
     553  
     554          """
     555          close_source = False
     556          if not hasattr(source, "read"):
     557              source = open(source, "rb")
     558              close_source = True
     559          try:
     560              if parser is None:
     561                  # If no parser was specified, create a default XMLParser
     562                  parser = XMLParser()
     563                  if hasattr(parser, '_parse_whole'):
     564                      # The default XMLParser, when it comes from an accelerator,
     565                      # can define an internal _parse_whole API for efficiency.
     566                      # It can be used to parse the whole source without feeding
     567                      # it with chunks.
     568                      self._root = parser._parse_whole(source)
     569                      return self._root
     570              while data := source.read(65536):
     571                  parser.feed(data)
     572              self._root = parser.close()
     573              return self._root
     574          finally:
     575              if close_source:
     576                  source.close()
     577  
     578      def iter(self, tag=None):
     579          """Create and return tree iterator for the root element.
     580  
     581          The iterator loops over all elements in this tree, in document order.
     582  
     583          *tag* is a string with the tag name to iterate over
     584          (default is to return all elements).
     585  
     586          """
     587          # assert self._root is not None
     588          return self._root.iter(tag)
     589  
     590      def find(self, path, namespaces=None):
     591          """Find first matching element by tag name or path.
     592  
     593          Same as getroot().find(path), which is Element.find()
     594  
     595          *path* is a string having either an element tag or an XPath,
     596          *namespaces* is an optional mapping from namespace prefix to full name.
     597  
     598          Return the first matching element, or None if no element was found.
     599  
     600          """
     601          # assert self._root is not None
     602          if path[:1] == "/":
     603              path = "." + path
     604              warnings.warn(
     605                  "This search is broken in 1.3 and earlier, and will be "
     606                  "fixed in a future version.  If you rely on the current "
     607                  "behaviour, change it to %r" % path,
     608                  FutureWarning, stacklevel=2
     609                  )
     610          return self._root.find(path, namespaces)
     611  
     612      def findtext(self, path, default=None, namespaces=None):
     613          """Find first matching element by tag name or path.
     614  
     615          Same as getroot().findtext(path),  which is Element.findtext()
     616  
     617          *path* is a string having either an element tag or an XPath,
     618          *namespaces* is an optional mapping from namespace prefix to full name.
     619  
     620          Return the first matching element, or None if no element was found.
     621  
     622          """
     623          # assert self._root is not None
     624          if path[:1] == "/":
     625              path = "." + path
     626              warnings.warn(
     627                  "This search is broken in 1.3 and earlier, and will be "
     628                  "fixed in a future version.  If you rely on the current "
     629                  "behaviour, change it to %r" % path,
     630                  FutureWarning, stacklevel=2
     631                  )
     632          return self._root.findtext(path, default, namespaces)
     633  
     634      def findall(self, path, namespaces=None):
     635          """Find all matching subelements by tag name or path.
     636  
     637          Same as getroot().findall(path), which is Element.findall().
     638  
     639          *path* is a string having either an element tag or an XPath,
     640          *namespaces* is an optional mapping from namespace prefix to full name.
     641  
     642          Return list containing all matching elements in document order.
     643  
     644          """
     645          # assert self._root is not None
     646          if path[:1] == "/":
     647              path = "." + path
     648              warnings.warn(
     649                  "This search is broken in 1.3 and earlier, and will be "
     650                  "fixed in a future version.  If you rely on the current "
     651                  "behaviour, change it to %r" % path,
     652                  FutureWarning, stacklevel=2
     653                  )
     654          return self._root.findall(path, namespaces)
     655  
     656      def iterfind(self, path, namespaces=None):
     657          """Find all matching subelements by tag name or path.
     658  
     659          Same as getroot().iterfind(path), which is element.iterfind()
     660  
     661          *path* is a string having either an element tag or an XPath,
     662          *namespaces* is an optional mapping from namespace prefix to full name.
     663  
     664          Return an iterable yielding all matching elements in document order.
     665  
     666          """
     667          # assert self._root is not None
     668          if path[:1] == "/":
     669              path = "." + path
     670              warnings.warn(
     671                  "This search is broken in 1.3 and earlier, and will be "
     672                  "fixed in a future version.  If you rely on the current "
     673                  "behaviour, change it to %r" % path,
     674                  FutureWarning, stacklevel=2
     675                  )
     676          return self._root.iterfind(path, namespaces)
     677  
     678      def write(self, file_or_filename,
     679                encoding=None,
     680                xml_declaration=None,
     681                default_namespace=None,
     682                method=None, *,
     683                short_empty_elements=True):
     684          """Write element tree to a file as XML.
     685  
     686          Arguments:
     687            *file_or_filename* -- file name or a file object opened for writing
     688  
     689            *encoding* -- the output encoding (default: US-ASCII)
     690  
     691            *xml_declaration* -- bool indicating if an XML declaration should be
     692                                 added to the output. If None, an XML declaration
     693                                 is added if encoding IS NOT either of:
     694                                 US-ASCII, UTF-8, or Unicode
     695  
     696            *default_namespace* -- sets the default XML namespace (for "xmlns")
     697  
     698            *method* -- either "xml" (default), "html, "text", or "c14n"
     699  
     700            *short_empty_elements* -- controls the formatting of elements
     701                                      that contain no content. If True (default)
     702                                      they are emitted as a single self-closed
     703                                      tag, otherwise they are emitted as a pair
     704                                      of start/end tags
     705  
     706          """
     707          if not method:
     708              method = "xml"
     709          elif method not in _serialize:
     710              raise ValueError("unknown method %r" % method)
     711          if not encoding:
     712              if method == "c14n":
     713                  encoding = "utf-8"
     714              else:
     715                  encoding = "us-ascii"
     716          with _get_writer(file_or_filename, encoding) as (write, declared_encoding):
     717              if method == "xml" and (xml_declaration or
     718                      (xml_declaration is None and
     719                       encoding.lower() != "unicode" and
     720                       declared_encoding.lower() not in ("utf-8", "us-ascii"))):
     721                  write("<?xml version='1.0' encoding='%s'?>\n" % (
     722                      declared_encoding,))
     723              if method == "text":
     724                  _serialize_text(write, self._root)
     725              else:
     726                  qnames, namespaces = _namespaces(self._root, default_namespace)
     727                  serialize = _serialize[method]
     728                  serialize(write, self._root, qnames, namespaces,
     729                            short_empty_elements=short_empty_elements)
     730  
     731      def write_c14n(self, file):
     732          # lxml.etree compatibility.  use output method instead
     733          return self.write(file, method="c14n")
     734  
     735  # --------------------------------------------------------------------
     736  # serialization support
     737  
     738  @contextlib.contextmanager
     739  def _get_writer(file_or_filename, encoding):
     740      # returns text write method and release all resources after using
     741      try:
     742          write = file_or_filename.write
     743      except AttributeError:
     744          # file_or_filename is a file name
     745          if encoding.lower() == "unicode":
     746              encoding="utf-8"
     747          with open(file_or_filename, "w", encoding=encoding,
     748                    errors="xmlcharrefreplace") as file:
     749              yield file.write, encoding
     750      else:
     751          # file_or_filename is a file-like object
     752          # encoding determines if it is a text or binary writer
     753          if encoding.lower() == "unicode":
     754              # use a text writer as is
     755              yield write, getattr(file_or_filename, "encoding", None) or "utf-8"
     756          else:
     757              # wrap a binary writer with TextIOWrapper
     758              with contextlib.ExitStack() as stack:
     759                  if isinstance(file_or_filename, io.BufferedIOBase):
     760                      file = file_or_filename
     761                  elif isinstance(file_or_filename, io.RawIOBase):
     762                      file = io.BufferedWriter(file_or_filename)
     763                      # Keep the original file open when the BufferedWriter is
     764                      # destroyed
     765                      stack.callback(file.detach)
     766                  else:
     767                      # This is to handle passed objects that aren't in the
     768                      # IOBase hierarchy, but just have a write method
     769                      file = io.BufferedIOBase()
     770                      file.writable = lambda: True
     771                      file.write = write
     772                      try:
     773                          # TextIOWrapper uses this methods to determine
     774                          # if BOM (for UTF-16, etc) should be added
     775                          file.seekable = file_or_filename.seekable
     776                          file.tell = file_or_filename.tell
     777                      except AttributeError:
     778                          pass
     779                  file = io.TextIOWrapper(file,
     780                                          encoding=encoding,
     781                                          errors="xmlcharrefreplace",
     782                                          newline="\n")
     783                  # Keep the original file open when the TextIOWrapper is
     784                  # destroyed
     785                  stack.callback(file.detach)
     786                  yield file.write, encoding
     787  
     788  def _namespaces(elem, default_namespace=None):
     789      # identify namespaces used in this tree
     790  
     791      # maps qnames to *encoded* prefix:local names
     792      qnames = {None: None}
     793  
     794      # maps uri:s to prefixes
     795      namespaces = {}
     796      if default_namespace:
     797          namespaces[default_namespace] = ""
     798  
     799      def add_qname(qname):
     800          # calculate serialized qname representation
     801          try:
     802              if qname[:1] == "{":
     803                  uri, tag = qname[1:].rsplit("}", 1)
     804                  prefix = namespaces.get(uri)
     805                  if prefix is None:
     806                      prefix = _namespace_map.get(uri)
     807                      if prefix is None:
     808                          prefix = "ns%d" % len(namespaces)
     809                      if prefix != "xml":
     810                          namespaces[uri] = prefix
     811                  if prefix:
     812                      qnames[qname] = "%s:%s" % (prefix, tag)
     813                  else:
     814                      qnames[qname] = tag # default element
     815              else:
     816                  if default_namespace:
     817                      # FIXME: can this be handled in XML 1.0?
     818                      raise ValueError(
     819                          "cannot use non-qualified names with "
     820                          "default_namespace option"
     821                          )
     822                  qnames[qname] = qname
     823          except TypeError:
     824              _raise_serialization_error(qname)
     825  
     826      # populate qname and namespaces table
     827      for elem in elem.iter():
     828          tag = elem.tag
     829          if isinstance(tag, QName):
     830              if tag.text not in qnames:
     831                  add_qname(tag.text)
     832          elif isinstance(tag, str):
     833              if tag not in qnames:
     834                  add_qname(tag)
     835          elif tag is not None and tag is not Comment and tag is not PI:
     836              _raise_serialization_error(tag)
     837          for key, value in elem.items():
     838              if isinstance(key, QName):
     839                  key = key.text
     840              if key not in qnames:
     841                  add_qname(key)
     842              if isinstance(value, QName) and value.text not in qnames:
     843                  add_qname(value.text)
     844          text = elem.text
     845          if isinstance(text, QName) and text.text not in qnames:
     846              add_qname(text.text)
     847      return qnames, namespaces
     848  
     849  def _serialize_xml(write, elem, qnames, namespaces,
     850                     short_empty_elements, **kwargs):
     851      tag = elem.tag
     852      text = elem.text
     853      if tag is Comment:
     854          write("<!--%s-->" % text)
     855      elif tag is ProcessingInstruction:
     856          write("<?%s?>" % text)
     857      else:
     858          tag = qnames[tag]
     859          if tag is None:
     860              if text:
     861                  write(_escape_cdata(text))
     862              for e in elem:
     863                  _serialize_xml(write, e, qnames, None,
     864                                 short_empty_elements=short_empty_elements)
     865          else:
     866              write("<" + tag)
     867              items = list(elem.items())
     868              if items or namespaces:
     869                  if namespaces:
     870                      for v, k in sorted(namespaces.items(),
     871                                         key=lambda x: x[1]):  # sort on prefix
     872                          if k:
     873                              k = ":" + k
     874                          write(" xmlns%s=\"%s\"" % (
     875                              k,
     876                              _escape_attrib(v)
     877                              ))
     878                  for k, v in items:
     879                      if isinstance(k, QName):
     880                          k = k.text
     881                      if isinstance(v, QName):
     882                          v = qnames[v.text]
     883                      else:
     884                          v = _escape_attrib(v)
     885                      write(" %s=\"%s\"" % (qnames[k], v))
     886              if text or len(elem) or not short_empty_elements:
     887                  write(">")
     888                  if text:
     889                      write(_escape_cdata(text))
     890                  for e in elem:
     891                      _serialize_xml(write, e, qnames, None,
     892                                     short_empty_elements=short_empty_elements)
     893                  write("</" + tag + ">")
     894              else:
     895                  write(" />")
     896      if elem.tail:
     897          write(_escape_cdata(elem.tail))
     898  
     899  HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
     900                "img", "input", "isindex", "link", "meta", "param", "source",
     901                "track", "wbr"}
     902  
     903  def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     904      tag = elem.tag
     905      text = elem.text
     906      if tag is Comment:
     907          write("<!--%s-->" % _escape_cdata(text))
     908      elif tag is ProcessingInstruction:
     909          write("<?%s?>" % _escape_cdata(text))
     910      else:
     911          tag = qnames[tag]
     912          if tag is None:
     913              if text:
     914                  write(_escape_cdata(text))
     915              for e in elem:
     916                  _serialize_html(write, e, qnames, None)
     917          else:
     918              write("<" + tag)
     919              items = list(elem.items())
     920              if items or namespaces:
     921                  if namespaces:
     922                      for v, k in sorted(namespaces.items(),
     923                                         key=lambda x: x[1]):  # sort on prefix
     924                          if k:
     925                              k = ":" + k
     926                          write(" xmlns%s=\"%s\"" % (
     927                              k,
     928                              _escape_attrib(v)
     929                              ))
     930                  for k, v in items:
     931                      if isinstance(k, QName):
     932                          k = k.text
     933                      if isinstance(v, QName):
     934                          v = qnames[v.text]
     935                      else:
     936                          v = _escape_attrib_html(v)
     937                      # FIXME: handle boolean attributes
     938                      write(" %s=\"%s\"" % (qnames[k], v))
     939              write(">")
     940              ltag = tag.lower()
     941              if text:
     942                  if ltag == "script" or ltag == "style":
     943                      write(text)
     944                  else:
     945                      write(_escape_cdata(text))
     946              for e in elem:
     947                  _serialize_html(write, e, qnames, None)
     948              if ltag not in HTML_EMPTY:
     949                  write("</" + tag + ">")
     950      if elem.tail:
     951          write(_escape_cdata(elem.tail))
     952  
     953  def _serialize_text(write, elem):
     954      for part in elem.itertext():
     955          write(part)
     956      if elem.tail:
     957          write(elem.tail)
     958  
     959  _serialize = {
     960      "xml": _serialize_xml,
     961      "html": _serialize_html,
     962      "text": _serialize_text,
     963  # this optional method is imported at the end of the module
     964  #   "c14n": _serialize_c14n,
     965  }
     966  
     967  
     968  def register_namespace(prefix, uri):
     969      """Register a namespace prefix.
     970  
     971      The registry is global, and any existing mapping for either the
     972      given prefix or the namespace URI will be removed.
     973  
     974      *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
     975      attributes in this namespace will be serialized with prefix if possible.
     976  
     977      ValueError is raised if prefix is reserved or is invalid.
     978  
     979      """
     980      if re.match(r"ns\d+$", prefix):
     981          raise ValueError("Prefix format reserved for internal use")
     982      for k, v in list(_namespace_map.items()):
     983          if k == uri or v == prefix:
     984              del _namespace_map[k]
     985      _namespace_map[uri] = prefix
     986  
     987  _namespace_map = {
     988      # "well-known" namespace prefixes
     989      "http://www.w3.org/XML/1998/namespace": "xml",
     990      "http://www.w3.org/1999/xhtml": "html",
     991      "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
     992      "http://schemas.xmlsoap.org/wsdl/": "wsdl",
     993      # xml schema
     994      "http://www.w3.org/2001/XMLSchema": "xs",
     995      "http://www.w3.org/2001/XMLSchema-instance": "xsi",
     996      # dublin core
     997      "http://purl.org/dc/elements/1.1/": "dc",
     998  }
     999  # For tests and troubleshooting
    1000  register_namespace._namespace_map = _namespace_map
    1001  
    1002  def _raise_serialization_error(text):
    1003      raise TypeError(
    1004          "cannot serialize %r (type %s)" % (text, type(text).__name__)
    1005          )
    1006  
    1007  def _escape_cdata(text):
    1008      # escape character data
    1009      try:
    1010          # it's worth avoiding do-nothing calls for strings that are
    1011          # shorter than 500 characters, or so.  assume that's, by far,
    1012          # the most common case in most applications.
    1013          if "&" in text:
    1014              text = text.replace("&", "&amp;")
    1015          if "<" in text:
    1016              text = text.replace("<", "&lt;")
    1017          if ">" in text:
    1018              text = text.replace(">", "&gt;")
    1019          return text
    1020      except (TypeError, AttributeError):
    1021          _raise_serialization_error(text)
    1022  
    1023  def _escape_attrib(text):
    1024      # escape attribute value
    1025      try:
    1026          if "&" in text:
    1027              text = text.replace("&", "&amp;")
    1028          if "<" in text:
    1029              text = text.replace("<", "&lt;")
    1030          if ">" in text:
    1031              text = text.replace(">", "&gt;")
    1032          if "\"" in text:
    1033              text = text.replace("\"", "&quot;")
    1034          # Although section 2.11 of the XML specification states that CR or
    1035          # CR LN should be replaced with just LN, it applies only to EOLNs
    1036          # which take part of organizing file into lines. Within attributes,
    1037          # we are replacing these with entity numbers, so they do not count.
    1038          # http://www.w3.org/TR/REC-xml/#sec-line-ends
    1039          # The current solution, contained in following six lines, was
    1040          # discussed in issue 17582 and 39011.
    1041          if "\r" in text:
    1042              text = text.replace("\r", "&#13;")
    1043          if "\n" in text:
    1044              text = text.replace("\n", "&#10;")
    1045          if "\t" in text:
    1046              text = text.replace("\t", "&#09;")
    1047          return text
    1048      except (TypeError, AttributeError):
    1049          _raise_serialization_error(text)
    1050  
    1051  def _escape_attrib_html(text):
    1052      # escape attribute value
    1053      try:
    1054          if "&" in text:
    1055              text = text.replace("&", "&amp;")
    1056          if ">" in text:
    1057              text = text.replace(">", "&gt;")
    1058          if "\"" in text:
    1059              text = text.replace("\"", "&quot;")
    1060          return text
    1061      except (TypeError, AttributeError):
    1062          _raise_serialization_error(text)
    1063  
    1064  # --------------------------------------------------------------------
    1065  
    1066  def tostring(element, encoding=None, method=None, *,
    1067               xml_declaration=None, default_namespace=None,
    1068               short_empty_elements=True):
    1069      """Generate string representation of XML element.
    1070  
    1071      All subelements are included.  If encoding is "unicode", a string
    1072      is returned. Otherwise a bytestring is returned.
    1073  
    1074      *element* is an Element instance, *encoding* is an optional output
    1075      encoding defaulting to US-ASCII, *method* is an optional output which can
    1076      be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
    1077      sets the default XML namespace (for "xmlns").
    1078  
    1079      Returns an (optionally) encoded string containing the XML data.
    1080  
    1081      """
    1082      stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
    1083      ElementTree(element).write(stream, encoding,
    1084                                 xml_declaration=xml_declaration,
    1085                                 default_namespace=default_namespace,
    1086                                 method=method,
    1087                                 short_empty_elements=short_empty_elements)
    1088      return stream.getvalue()
    1089  
    1090  class ESC[4;38;5;81m_ListDataStream(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedIOBase):
    1091      """An auxiliary stream accumulating into a list reference."""
    1092      def __init__(self, lst):
    1093          self.lst = lst
    1094  
    1095      def writable(self):
    1096          return True
    1097  
    1098      def seekable(self):
    1099          return True
    1100  
    1101      def write(self, b):
    1102          self.lst.append(b)
    1103  
    1104      def tell(self):
    1105          return len(self.lst)
    1106  
    1107  def tostringlist(element, encoding=None, method=None, *,
    1108                   xml_declaration=None, default_namespace=None,
    1109                   short_empty_elements=True):
    1110      lst = []
    1111      stream = _ListDataStream(lst)
    1112      ElementTree(element).write(stream, encoding,
    1113                                 xml_declaration=xml_declaration,
    1114                                 default_namespace=default_namespace,
    1115                                 method=method,
    1116                                 short_empty_elements=short_empty_elements)
    1117      return lst
    1118  
    1119  
    1120  def dump(elem):
    1121      """Write element tree or element structure to sys.stdout.
    1122  
    1123      This function should be used for debugging only.
    1124  
    1125      *elem* is either an ElementTree, or a single Element.  The exact output
    1126      format is implementation dependent.  In this version, it's written as an
    1127      ordinary XML file.
    1128  
    1129      """
    1130      # debugging
    1131      if not isinstance(elem, ElementTree):
    1132          elem = ElementTree(elem)
    1133      elem.write(sys.stdout, encoding="unicode")
    1134      tail = elem.getroot().tail
    1135      if not tail or tail[-1] != "\n":
    1136          sys.stdout.write("\n")
    1137  
    1138  
    1139  def indent(tree, space="  ", level=0):
    1140      """Indent an XML document by inserting newlines and indentation space
    1141      after elements.
    1142  
    1143      *tree* is the ElementTree or Element to modify.  The (root) element
    1144      itself will not be changed, but the tail text of all elements in its
    1145      subtree will be adapted.
    1146  
    1147      *space* is the whitespace to insert for each indentation level, two
    1148      space characters by default.
    1149  
    1150      *level* is the initial indentation level. Setting this to a higher
    1151      value than 0 can be used for indenting subtrees that are more deeply
    1152      nested inside of a document.
    1153      """
    1154      if isinstance(tree, ElementTree):
    1155          tree = tree.getroot()
    1156      if level < 0:
    1157          raise ValueError(f"Initial indentation level must be >= 0, got {level}")
    1158      if not len(tree):
    1159          return
    1160  
    1161      # Reduce the memory consumption by reusing indentation strings.
    1162      indentations = ["\n" + level * space]
    1163  
    1164      def _indent_children(elem, level):
    1165          # Start a new indentation level for the first child.
    1166          child_level = level + 1
    1167          try:
    1168              child_indentation = indentations[child_level]
    1169          except IndexError:
    1170              child_indentation = indentations[level] + space
    1171              indentations.append(child_indentation)
    1172  
    1173          if not elem.text or not elem.text.strip():
    1174              elem.text = child_indentation
    1175  
    1176          for child in elem:
    1177              if len(child):
    1178                  _indent_children(child, child_level)
    1179              if not child.tail or not child.tail.strip():
    1180                  child.tail = child_indentation
    1181  
    1182          # Dedent after the last child by overwriting the previous indentation.
    1183          if not child.tail.strip():
    1184              child.tail = indentations[level]
    1185  
    1186      _indent_children(tree, 0)
    1187  
    1188  
    1189  # --------------------------------------------------------------------
    1190  # parsing
    1191  
    1192  
    1193  def parse(source, parser=None):
    1194      """Parse XML document into element tree.
    1195  
    1196      *source* is a filename or file object containing XML data,
    1197      *parser* is an optional parser instance defaulting to XMLParser.
    1198  
    1199      Return an ElementTree instance.
    1200  
    1201      """
    1202      tree = ElementTree()
    1203      tree.parse(source, parser)
    1204      return tree
    1205  
    1206  
    1207  def iterparse(source, events=None, parser=None):
    1208      """Incrementally parse XML document into ElementTree.
    1209  
    1210      This class also reports what's going on to the user based on the
    1211      *events* it is initialized with.  The supported events are the strings
    1212      "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
    1213      detailed namespace information).  If *events* is omitted, only
    1214      "end" events are reported.
    1215  
    1216      *source* is a filename or file object containing XML data, *events* is
    1217      a list of events to report back, *parser* is an optional parser instance.
    1218  
    1219      Returns an iterator providing (event, elem) pairs.
    1220  
    1221      """
    1222      # Use the internal, undocumented _parser argument for now; When the
    1223      # parser argument of iterparse is removed, this can be killed.
    1224      pullparser = XMLPullParser(events=events, _parser=parser)
    1225  
    1226      def iterator(source):
    1227          close_source = False
    1228          try:
    1229              if not hasattr(source, "read"):
    1230                  source = open(source, "rb")
    1231                  close_source = True
    1232              yield None
    1233              while True:
    1234                  yield from pullparser.read_events()
    1235                  # load event buffer
    1236                  data = source.read(16 * 1024)
    1237                  if not data:
    1238                      break
    1239                  pullparser.feed(data)
    1240              root = pullparser._close_and_return_root()
    1241              yield from pullparser.read_events()
    1242              it.root = root
    1243          finally:
    1244              if close_source:
    1245                  source.close()
    1246  
    1247      class ESC[4;38;5;81mIterParseIterator(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mabcESC[4;38;5;149m.ESC[4;38;5;149mIterator):
    1248          __next__ = iterator(source).__next__
    1249      it = IterParseIterator()
    1250      it.root = None
    1251      del iterator, IterParseIterator
    1252  
    1253      next(it)
    1254      return it
    1255  
    1256  
    1257  class ESC[4;38;5;81mXMLPullParser:
    1258  
    1259      def __init__(self, events=None, *, _parser=None):
    1260          # The _parser argument is for internal use only and must not be relied
    1261          # upon in user code. It will be removed in a future release.
    1262          # See https://bugs.python.org/issue17741 for more details.
    1263  
    1264          self._events_queue = collections.deque()
    1265          self._parser = _parser or XMLParser(target=TreeBuilder())
    1266          # wire up the parser for event reporting
    1267          if events is None:
    1268              events = ("end",)
    1269          self._parser._setevents(self._events_queue, events)
    1270  
    1271      def feed(self, data):
    1272          """Feed encoded data to parser."""
    1273          if self._parser is None:
    1274              raise ValueError("feed() called after end of stream")
    1275          if data:
    1276              try:
    1277                  self._parser.feed(data)
    1278              except SyntaxError as exc:
    1279                  self._events_queue.append(exc)
    1280  
    1281      def _close_and_return_root(self):
    1282          # iterparse needs this to set its root attribute properly :(
    1283          root = self._parser.close()
    1284          self._parser = None
    1285          return root
    1286  
    1287      def close(self):
    1288          """Finish feeding data to parser.
    1289  
    1290          Unlike XMLParser, does not return the root element. Use
    1291          read_events() to consume elements from XMLPullParser.
    1292          """
    1293          self._close_and_return_root()
    1294  
    1295      def read_events(self):
    1296          """Return an iterator over currently available (event, elem) pairs.
    1297  
    1298          Events are consumed from the internal event queue as they are
    1299          retrieved from the iterator.
    1300          """
    1301          events = self._events_queue
    1302          while events:
    1303              event = events.popleft()
    1304              if isinstance(event, Exception):
    1305                  raise event
    1306              else:
    1307                  yield event
    1308  
    1309  
    1310  def XML(text, parser=None):
    1311      """Parse XML document from string constant.
    1312  
    1313      This function can be used to embed "XML Literals" in Python code.
    1314  
    1315      *text* is a string containing XML data, *parser* is an
    1316      optional parser instance, defaulting to the standard XMLParser.
    1317  
    1318      Returns an Element instance.
    1319  
    1320      """
    1321      if not parser:
    1322          parser = XMLParser(target=TreeBuilder())
    1323      parser.feed(text)
    1324      return parser.close()
    1325  
    1326  
    1327  def XMLID(text, parser=None):
    1328      """Parse XML document from string constant for its IDs.
    1329  
    1330      *text* is a string containing XML data, *parser* is an
    1331      optional parser instance, defaulting to the standard XMLParser.
    1332  
    1333      Returns an (Element, dict) tuple, in which the
    1334      dict maps element id:s to elements.
    1335  
    1336      """
    1337      if not parser:
    1338          parser = XMLParser(target=TreeBuilder())
    1339      parser.feed(text)
    1340      tree = parser.close()
    1341      ids = {}
    1342      for elem in tree.iter():
    1343          id = elem.get("id")
    1344          if id:
    1345              ids[id] = elem
    1346      return tree, ids
    1347  
    1348  # Parse XML document from string constant.  Alias for XML().
    1349  fromstring = XML
    1350  
    1351  def fromstringlist(sequence, parser=None):
    1352      """Parse XML document from sequence of string fragments.
    1353  
    1354      *sequence* is a list of other sequence, *parser* is an optional parser
    1355      instance, defaulting to the standard XMLParser.
    1356  
    1357      Returns an Element instance.
    1358  
    1359      """
    1360      if not parser:
    1361          parser = XMLParser(target=TreeBuilder())
    1362      for text in sequence:
    1363          parser.feed(text)
    1364      return parser.close()
    1365  
    1366  # --------------------------------------------------------------------
    1367  
    1368  
    1369  class ESC[4;38;5;81mTreeBuilder:
    1370      """Generic element structure builder.
    1371  
    1372      This builder converts a sequence of start, data, and end method
    1373      calls to a well-formed element structure.
    1374  
    1375      You can use this class to build an element structure using a custom XML
    1376      parser, or a parser for some other XML-like format.
    1377  
    1378      *element_factory* is an optional element factory which is called
    1379      to create new Element instances, as necessary.
    1380  
    1381      *comment_factory* is a factory to create comments to be used instead of
    1382      the standard factory.  If *insert_comments* is false (the default),
    1383      comments will not be inserted into the tree.
    1384  
    1385      *pi_factory* is a factory to create processing instructions to be used
    1386      instead of the standard factory.  If *insert_pis* is false (the default),
    1387      processing instructions will not be inserted into the tree.
    1388      """
    1389      def __init__(self, element_factory=None, *,
    1390                   comment_factory=None, pi_factory=None,
    1391                   insert_comments=False, insert_pis=False):
    1392          self._data = [] # data collector
    1393          self._elem = [] # element stack
    1394          self._last = None # last element
    1395          self._root = None # root element
    1396          self._tail = None # true if we're after an end tag
    1397          if comment_factory is None:
    1398              comment_factory = Comment
    1399          self._comment_factory = comment_factory
    1400          self.insert_comments = insert_comments
    1401          if pi_factory is None:
    1402              pi_factory = ProcessingInstruction
    1403          self._pi_factory = pi_factory
    1404          self.insert_pis = insert_pis
    1405          if element_factory is None:
    1406              element_factory = Element
    1407          self._factory = element_factory
    1408  
    1409      def close(self):
    1410          """Flush builder buffers and return toplevel document Element."""
    1411          assert len(self._elem) == 0, "missing end tags"
    1412          assert self._root is not None, "missing toplevel element"
    1413          return self._root
    1414  
    1415      def _flush(self):
    1416          if self._data:
    1417              if self._last is not None:
    1418                  text = "".join(self._data)
    1419                  if self._tail:
    1420                      assert self._last.tail is None, "internal error (tail)"
    1421                      self._last.tail = text
    1422                  else:
    1423                      assert self._last.text is None, "internal error (text)"
    1424                      self._last.text = text
    1425              self._data = []
    1426  
    1427      def data(self, data):
    1428          """Add text to current element."""
    1429          self._data.append(data)
    1430  
    1431      def start(self, tag, attrs):
    1432          """Open new element and return it.
    1433  
    1434          *tag* is the element name, *attrs* is a dict containing element
    1435          attributes.
    1436  
    1437          """
    1438          self._flush()
    1439          self._last = elem = self._factory(tag, attrs)
    1440          if self._elem:
    1441              self._elem[-1].append(elem)
    1442          elif self._root is None:
    1443              self._root = elem
    1444          self._elem.append(elem)
    1445          self._tail = 0
    1446          return elem
    1447  
    1448      def end(self, tag):
    1449          """Close and return current Element.
    1450  
    1451          *tag* is the element name.
    1452  
    1453          """
    1454          self._flush()
    1455          self._last = self._elem.pop()
    1456          assert self._last.tag == tag,\
    1457                 "end tag mismatch (expected %s, got %s)" % (
    1458                     self._last.tag, tag)
    1459          self._tail = 1
    1460          return self._last
    1461  
    1462      def comment(self, text):
    1463          """Create a comment using the comment_factory.
    1464  
    1465          *text* is the text of the comment.
    1466          """
    1467          return self._handle_single(
    1468              self._comment_factory, self.insert_comments, text)
    1469  
    1470      def pi(self, target, text=None):
    1471          """Create a processing instruction using the pi_factory.
    1472  
    1473          *target* is the target name of the processing instruction.
    1474          *text* is the data of the processing instruction, or ''.
    1475          """
    1476          return self._handle_single(
    1477              self._pi_factory, self.insert_pis, target, text)
    1478  
    1479      def _handle_single(self, factory, insert, *args):
    1480          elem = factory(*args)
    1481          if insert:
    1482              self._flush()
    1483              self._last = elem
    1484              if self._elem:
    1485                  self._elem[-1].append(elem)
    1486              self._tail = 1
    1487          return elem
    1488  
    1489  
    1490  # also see ElementTree and TreeBuilder
    1491  class ESC[4;38;5;81mXMLParser:
    1492      """Element structure builder for XML source data based on the expat parser.
    1493  
    1494      *target* is an optional target object which defaults to an instance of the
    1495      standard TreeBuilder class, *encoding* is an optional encoding string
    1496      which if given, overrides the encoding specified in the XML file:
    1497      http://www.iana.org/assignments/character-sets
    1498  
    1499      """
    1500  
    1501      def __init__(self, *, target=None, encoding=None):
    1502          try:
    1503              from xml.parsers import expat
    1504          except ImportError:
    1505              try:
    1506                  import pyexpat as expat
    1507              except ImportError:
    1508                  raise ImportError(
    1509                      "No module named expat; use SimpleXMLTreeBuilder instead"
    1510                      )
    1511          parser = expat.ParserCreate(encoding, "}")
    1512          if target is None:
    1513              target = TreeBuilder()
    1514          # underscored names are provided for compatibility only
    1515          self.parser = self._parser = parser
    1516          self.target = self._target = target
    1517          self._error = expat.error
    1518          self._names = {} # name memo cache
    1519          # main callbacks
    1520          parser.DefaultHandlerExpand = self._default
    1521          if hasattr(target, 'start'):
    1522              parser.StartElementHandler = self._start
    1523          if hasattr(target, 'end'):
    1524              parser.EndElementHandler = self._end
    1525          if hasattr(target, 'start_ns'):
    1526              parser.StartNamespaceDeclHandler = self._start_ns
    1527          if hasattr(target, 'end_ns'):
    1528              parser.EndNamespaceDeclHandler = self._end_ns
    1529          if hasattr(target, 'data'):
    1530              parser.CharacterDataHandler = target.data
    1531          # miscellaneous callbacks
    1532          if hasattr(target, 'comment'):
    1533              parser.CommentHandler = target.comment
    1534          if hasattr(target, 'pi'):
    1535              parser.ProcessingInstructionHandler = target.pi
    1536          # Configure pyexpat: buffering, new-style attribute handling.
    1537          parser.buffer_text = 1
    1538          parser.ordered_attributes = 1
    1539          self._doctype = None
    1540          self.entity = {}
    1541          try:
    1542              self.version = "Expat %d.%d.%d" % expat.version_info
    1543          except AttributeError:
    1544              pass # unknown
    1545  
    1546      def _setevents(self, events_queue, events_to_report):
    1547          # Internal API for XMLPullParser
    1548          # events_to_report: a list of events to report during parsing (same as
    1549          # the *events* of XMLPullParser's constructor.
    1550          # events_queue: a list of actual parsing events that will be populated
    1551          # by the underlying parser.
    1552          #
    1553          parser = self._parser
    1554          append = events_queue.append
    1555          for event_name in events_to_report:
    1556              if event_name == "start":
    1557                  parser.ordered_attributes = 1
    1558                  def handler(tag, attrib_in, event=event_name, append=append,
    1559                              start=self._start):
    1560                      append((event, start(tag, attrib_in)))
    1561                  parser.StartElementHandler = handler
    1562              elif event_name == "end":
    1563                  def handler(tag, event=event_name, append=append,
    1564                              end=self._end):
    1565                      append((event, end(tag)))
    1566                  parser.EndElementHandler = handler
    1567              elif event_name == "start-ns":
    1568                  # TreeBuilder does not implement .start_ns()
    1569                  if hasattr(self.target, "start_ns"):
    1570                      def handler(prefix, uri, event=event_name, append=append,
    1571                                  start_ns=self._start_ns):
    1572                          append((event, start_ns(prefix, uri)))
    1573                  else:
    1574                      def handler(prefix, uri, event=event_name, append=append):
    1575                          append((event, (prefix or '', uri or '')))
    1576                  parser.StartNamespaceDeclHandler = handler
    1577              elif event_name == "end-ns":
    1578                  # TreeBuilder does not implement .end_ns()
    1579                  if hasattr(self.target, "end_ns"):
    1580                      def handler(prefix, event=event_name, append=append,
    1581                                  end_ns=self._end_ns):
    1582                          append((event, end_ns(prefix)))
    1583                  else:
    1584                      def handler(prefix, event=event_name, append=append):
    1585                          append((event, None))
    1586                  parser.EndNamespaceDeclHandler = handler
    1587              elif event_name == 'comment':
    1588                  def handler(text, event=event_name, append=append, self=self):
    1589                      append((event, self.target.comment(text)))
    1590                  parser.CommentHandler = handler
    1591              elif event_name == 'pi':
    1592                  def handler(pi_target, data, event=event_name, append=append,
    1593                              self=self):
    1594                      append((event, self.target.pi(pi_target, data)))
    1595                  parser.ProcessingInstructionHandler = handler
    1596              else:
    1597                  raise ValueError("unknown event %r" % event_name)
    1598  
    1599      def _raiseerror(self, value):
    1600          err = ParseError(value)
    1601          err.code = value.code
    1602          err.position = value.lineno, value.offset
    1603          raise err
    1604  
    1605      def _fixname(self, key):
    1606          # expand qname, and convert name string to ascii, if possible
    1607          try:
    1608              name = self._names[key]
    1609          except KeyError:
    1610              name = key
    1611              if "}" in name:
    1612                  name = "{" + name
    1613              self._names[key] = name
    1614          return name
    1615  
    1616      def _start_ns(self, prefix, uri):
    1617          return self.target.start_ns(prefix or '', uri or '')
    1618  
    1619      def _end_ns(self, prefix):
    1620          return self.target.end_ns(prefix or '')
    1621  
    1622      def _start(self, tag, attr_list):
    1623          # Handler for expat's StartElementHandler. Since ordered_attributes
    1624          # is set, the attributes are reported as a list of alternating
    1625          # attribute name,value.
    1626          fixname = self._fixname
    1627          tag = fixname(tag)
    1628          attrib = {}
    1629          if attr_list:
    1630              for i in range(0, len(attr_list), 2):
    1631                  attrib[fixname(attr_list[i])] = attr_list[i+1]
    1632          return self.target.start(tag, attrib)
    1633  
    1634      def _end(self, tag):
    1635          return self.target.end(self._fixname(tag))
    1636  
    1637      def _default(self, text):
    1638          prefix = text[:1]
    1639          if prefix == "&":
    1640              # deal with undefined entities
    1641              try:
    1642                  data_handler = self.target.data
    1643              except AttributeError:
    1644                  return
    1645              try:
    1646                  data_handler(self.entity[text[1:-1]])
    1647              except KeyError:
    1648                  from xml.parsers import expat
    1649                  err = expat.error(
    1650                      "undefined entity %s: line %d, column %d" %
    1651                      (text, self.parser.ErrorLineNumber,
    1652                      self.parser.ErrorColumnNumber)
    1653                      )
    1654                  err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
    1655                  err.lineno = self.parser.ErrorLineNumber
    1656                  err.offset = self.parser.ErrorColumnNumber
    1657                  raise err
    1658          elif prefix == "<" and text[:9] == "<!DOCTYPE":
    1659              self._doctype = [] # inside a doctype declaration
    1660          elif self._doctype is not None:
    1661              # parse doctype contents
    1662              if prefix == ">":
    1663                  self._doctype = None
    1664                  return
    1665              text = text.strip()
    1666              if not text:
    1667                  return
    1668              self._doctype.append(text)
    1669              n = len(self._doctype)
    1670              if n > 2:
    1671                  type = self._doctype[1]
    1672                  if type == "PUBLIC" and n == 4:
    1673                      name, type, pubid, system = self._doctype
    1674                      if pubid:
    1675                          pubid = pubid[1:-1]
    1676                  elif type == "SYSTEM" and n == 3:
    1677                      name, type, system = self._doctype
    1678                      pubid = None
    1679                  else:
    1680                      return
    1681                  if hasattr(self.target, "doctype"):
    1682                      self.target.doctype(name, pubid, system[1:-1])
    1683                  elif hasattr(self, "doctype"):
    1684                      warnings.warn(
    1685                          "The doctype() method of XMLParser is ignored.  "
    1686                          "Define doctype() method on the TreeBuilder target.",
    1687                          RuntimeWarning)
    1688  
    1689                  self._doctype = None
    1690  
    1691      def feed(self, data):
    1692          """Feed encoded data to parser."""
    1693          try:
    1694              self.parser.Parse(data, False)
    1695          except self._error as v:
    1696              self._raiseerror(v)
    1697  
    1698      def close(self):
    1699          """Finish feeding data to parser and return element structure."""
    1700          try:
    1701              self.parser.Parse(b"", True) # end of data
    1702          except self._error as v:
    1703              self._raiseerror(v)
    1704          try:
    1705              close_handler = self.target.close
    1706          except AttributeError:
    1707              pass
    1708          else:
    1709              return close_handler()
    1710          finally:
    1711              # get rid of circular references
    1712              del self.parser, self._parser
    1713              del self.target, self._target
    1714  
    1715  
    1716  # --------------------------------------------------------------------
    1717  # C14N 2.0
    1718  
    1719  def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
    1720      """Convert XML to its C14N 2.0 serialised form.
    1721  
    1722      If *out* is provided, it must be a file or file-like object that receives
    1723      the serialised canonical XML output (text, not bytes) through its ``.write()``
    1724      method.  To write to a file, open it in text mode with encoding "utf-8".
    1725      If *out* is not provided, this function returns the output as text string.
    1726  
    1727      Either *xml_data* (an XML string) or *from_file* (a file path or
    1728      file-like object) must be provided as input.
    1729  
    1730      The configuration options are the same as for the ``C14NWriterTarget``.
    1731      """
    1732      if xml_data is None and from_file is None:
    1733          raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
    1734      sio = None
    1735      if out is None:
    1736          sio = out = io.StringIO()
    1737  
    1738      parser = XMLParser(target=C14NWriterTarget(out.write, **options))
    1739  
    1740      if xml_data is not None:
    1741          parser.feed(xml_data)
    1742          parser.close()
    1743      elif from_file is not None:
    1744          parse(from_file, parser=parser)
    1745  
    1746      return sio.getvalue() if sio is not None else None
    1747  
    1748  
    1749  _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
    1750  
    1751  
    1752  class ESC[4;38;5;81mC14NWriterTarget:
    1753      """
    1754      Canonicalization writer target for the XMLParser.
    1755  
    1756      Serialises parse events to XML C14N 2.0.
    1757  
    1758      The *write* function is used for writing out the resulting data stream
    1759      as text (not bytes).  To write to a file, open it in text mode with encoding
    1760      "utf-8" and pass its ``.write`` method.
    1761  
    1762      Configuration options:
    1763  
    1764      - *with_comments*: set to true to include comments
    1765      - *strip_text*: set to true to strip whitespace before and after text content
    1766      - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
    1767      - *qname_aware_tags*: a set of qname aware tag names in which prefixes
    1768                            should be replaced in text content
    1769      - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
    1770                             should be replaced in text content
    1771      - *exclude_attrs*: a set of attribute names that should not be serialised
    1772      - *exclude_tags*: a set of tag names that should not be serialised
    1773      """
    1774      def __init__(self, write, *,
    1775                   with_comments=False, strip_text=False, rewrite_prefixes=False,
    1776                   qname_aware_tags=None, qname_aware_attrs=None,
    1777                   exclude_attrs=None, exclude_tags=None):
    1778          self._write = write
    1779          self._data = []
    1780          self._with_comments = with_comments
    1781          self._strip_text = strip_text
    1782          self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
    1783          self._exclude_tags = set(exclude_tags) if exclude_tags else None
    1784  
    1785          self._rewrite_prefixes = rewrite_prefixes
    1786          if qname_aware_tags:
    1787              self._qname_aware_tags = set(qname_aware_tags)
    1788          else:
    1789              self._qname_aware_tags = None
    1790          if qname_aware_attrs:
    1791              self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
    1792          else:
    1793              self._find_qname_aware_attrs = None
    1794  
    1795          # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
    1796          self._declared_ns_stack = [[
    1797              ("http://www.w3.org/XML/1998/namespace", "xml"),
    1798          ]]
    1799          # Stack with user declared namespace prefixes as (uri, prefix) pairs.
    1800          self._ns_stack = []
    1801          if not rewrite_prefixes:
    1802              self._ns_stack.append(list(_namespace_map.items()))
    1803          self._ns_stack.append([])
    1804          self._prefix_map = {}
    1805          self._preserve_space = [False]
    1806          self._pending_start = None
    1807          self._root_seen = False
    1808          self._root_done = False
    1809          self._ignored_depth = 0
    1810  
    1811      def _iter_namespaces(self, ns_stack, _reversed=reversed):
    1812          for namespaces in _reversed(ns_stack):
    1813              if namespaces:  # almost no element declares new namespaces
    1814                  yield from namespaces
    1815  
    1816      def _resolve_prefix_name(self, prefixed_name):
    1817          prefix, name = prefixed_name.split(':', 1)
    1818          for uri, p in self._iter_namespaces(self._ns_stack):
    1819              if p == prefix:
    1820                  return f'{{{uri}}}{name}'
    1821          raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
    1822  
    1823      def _qname(self, qname, uri=None):
    1824          if uri is None:
    1825              uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
    1826          else:
    1827              tag = qname
    1828  
    1829          prefixes_seen = set()
    1830          for u, prefix in self._iter_namespaces(self._declared_ns_stack):
    1831              if u == uri and prefix not in prefixes_seen:
    1832                  return f'{prefix}:{tag}' if prefix else tag, tag, uri
    1833              prefixes_seen.add(prefix)
    1834  
    1835          # Not declared yet => add new declaration.
    1836          if self._rewrite_prefixes:
    1837              if uri in self._prefix_map:
    1838                  prefix = self._prefix_map[uri]
    1839              else:
    1840                  prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
    1841              self._declared_ns_stack[-1].append((uri, prefix))
    1842              return f'{prefix}:{tag}', tag, uri
    1843  
    1844          if not uri and '' not in prefixes_seen:
    1845              # No default namespace declared => no prefix needed.
    1846              return tag, tag, uri
    1847  
    1848          for u, prefix in self._iter_namespaces(self._ns_stack):
    1849              if u == uri:
    1850                  self._declared_ns_stack[-1].append((uri, prefix))
    1851                  return f'{prefix}:{tag}' if prefix else tag, tag, uri
    1852  
    1853          if not uri:
    1854              # As soon as a default namespace is defined,
    1855              # anything that has no namespace (and thus, no prefix) goes there.
    1856              return tag, tag, uri
    1857  
    1858          raise ValueError(f'Namespace "{uri}" is not declared in scope')
    1859  
    1860      def data(self, data):
    1861          if not self._ignored_depth:
    1862              self._data.append(data)
    1863  
    1864      def _flush(self, _join_text=''.join):
    1865          data = _join_text(self._data)
    1866          del self._data[:]
    1867          if self._strip_text and not self._preserve_space[-1]:
    1868              data = data.strip()
    1869          if self._pending_start is not None:
    1870              args, self._pending_start = self._pending_start, None
    1871              qname_text = data if data and _looks_like_prefix_name(data) else None
    1872              self._start(*args, qname_text)
    1873              if qname_text is not None:
    1874                  return
    1875          if data and self._root_seen:
    1876              self._write(_escape_cdata_c14n(data))
    1877  
    1878      def start_ns(self, prefix, uri):
    1879          if self._ignored_depth:
    1880              return
    1881          # we may have to resolve qnames in text content
    1882          if self._data:
    1883              self._flush()
    1884          self._ns_stack[-1].append((uri, prefix))
    1885  
    1886      def start(self, tag, attrs):
    1887          if self._exclude_tags is not None and (
    1888                  self._ignored_depth or tag in self._exclude_tags):
    1889              self._ignored_depth += 1
    1890              return
    1891          if self._data:
    1892              self._flush()
    1893  
    1894          new_namespaces = []
    1895          self._declared_ns_stack.append(new_namespaces)
    1896  
    1897          if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
    1898              # Need to parse text first to see if it requires a prefix declaration.
    1899              self._pending_start = (tag, attrs, new_namespaces)
    1900              return
    1901          self._start(tag, attrs, new_namespaces)
    1902  
    1903      def _start(self, tag, attrs, new_namespaces, qname_text=None):
    1904          if self._exclude_attrs is not None and attrs:
    1905              attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
    1906  
    1907          qnames = {tag, *attrs}
    1908          resolved_names = {}
    1909  
    1910          # Resolve prefixes in attribute and tag text.
    1911          if qname_text is not None:
    1912              qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
    1913              qnames.add(qname)
    1914          if self._find_qname_aware_attrs is not None and attrs:
    1915              qattrs = self._find_qname_aware_attrs(attrs)
    1916              if qattrs:
    1917                  for attr_name in qattrs:
    1918                      value = attrs[attr_name]
    1919                      if _looks_like_prefix_name(value):
    1920                          qname = resolved_names[value] = self._resolve_prefix_name(value)
    1921                          qnames.add(qname)
    1922              else:
    1923                  qattrs = None
    1924          else:
    1925              qattrs = None
    1926  
    1927          # Assign prefixes in lexicographical order of used URIs.
    1928          parse_qname = self._qname
    1929          parsed_qnames = {n: parse_qname(n) for n in sorted(
    1930              qnames, key=lambda n: n.split('}', 1))}
    1931  
    1932          # Write namespace declarations in prefix order ...
    1933          if new_namespaces:
    1934              attr_list = [
    1935                  ('xmlns:' + prefix if prefix else 'xmlns', uri)
    1936                  for uri, prefix in new_namespaces
    1937              ]
    1938              attr_list.sort()
    1939          else:
    1940              # almost always empty
    1941              attr_list = []
    1942  
    1943          # ... followed by attributes in URI+name order
    1944          if attrs:
    1945              for k, v in sorted(attrs.items()):
    1946                  if qattrs is not None and k in qattrs and v in resolved_names:
    1947                      v = parsed_qnames[resolved_names[v]][0]
    1948                  attr_qname, attr_name, uri = parsed_qnames[k]
    1949                  # No prefix for attributes in default ('') namespace.
    1950                  attr_list.append((attr_qname if uri else attr_name, v))
    1951  
    1952          # Honour xml:space attributes.
    1953          space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
    1954          self._preserve_space.append(
    1955              space_behaviour == 'preserve' if space_behaviour
    1956              else self._preserve_space[-1])
    1957  
    1958          # Write the tag.
    1959          write = self._write
    1960          write('<' + parsed_qnames[tag][0])
    1961          if attr_list:
    1962              write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
    1963          write('>')
    1964  
    1965          # Write the resolved qname text content.
    1966          if qname_text is not None:
    1967              write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
    1968  
    1969          self._root_seen = True
    1970          self._ns_stack.append([])
    1971  
    1972      def end(self, tag):
    1973          if self._ignored_depth:
    1974              self._ignored_depth -= 1
    1975              return
    1976          if self._data:
    1977              self._flush()
    1978          self._write(f'</{self._qname(tag)[0]}>')
    1979          self._preserve_space.pop()
    1980          self._root_done = len(self._preserve_space) == 1
    1981          self._declared_ns_stack.pop()
    1982          self._ns_stack.pop()
    1983  
    1984      def comment(self, text):
    1985          if not self._with_comments:
    1986              return
    1987          if self._ignored_depth:
    1988              return
    1989          if self._root_done:
    1990              self._write('\n')
    1991          elif self._root_seen and self._data:
    1992              self._flush()
    1993          self._write(f'<!--{_escape_cdata_c14n(text)}-->')
    1994          if not self._root_seen:
    1995              self._write('\n')
    1996  
    1997      def pi(self, target, data):
    1998          if self._ignored_depth:
    1999              return
    2000          if self._root_done:
    2001              self._write('\n')
    2002          elif self._root_seen and self._data:
    2003              self._flush()
    2004          self._write(
    2005              f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
    2006          if not self._root_seen:
    2007              self._write('\n')
    2008  
    2009  
    2010  def _escape_cdata_c14n(text):
    2011      # escape character data
    2012      try:
    2013          # it's worth avoiding do-nothing calls for strings that are
    2014          # shorter than 500 character, or so.  assume that's, by far,
    2015          # the most common case in most applications.
    2016          if '&' in text:
    2017              text = text.replace('&', '&amp;')
    2018          if '<' in text:
    2019              text = text.replace('<', '&lt;')
    2020          if '>' in text:
    2021              text = text.replace('>', '&gt;')
    2022          if '\r' in text:
    2023              text = text.replace('\r', '&#xD;')
    2024          return text
    2025      except (TypeError, AttributeError):
    2026          _raise_serialization_error(text)
    2027  
    2028  
    2029  def _escape_attrib_c14n(text):
    2030      # escape attribute value
    2031      try:
    2032          if '&' in text:
    2033              text = text.replace('&', '&amp;')
    2034          if '<' in text:
    2035              text = text.replace('<', '&lt;')
    2036          if '"' in text:
    2037              text = text.replace('"', '&quot;')
    2038          if '\t' in text:
    2039              text = text.replace('\t', '&#x9;')
    2040          if '\n' in text:
    2041              text = text.replace('\n', '&#xA;')
    2042          if '\r' in text:
    2043              text = text.replace('\r', '&#xD;')
    2044          return text
    2045      except (TypeError, AttributeError):
    2046          _raise_serialization_error(text)
    2047  
    2048  
    2049  # --------------------------------------------------------------------
    2050  
    2051  # Import the C accelerators
    2052  try:
    2053      # Element is going to be shadowed by the C implementation. We need to keep
    2054      # the Python version of it accessible for some "creative" by external code
    2055      # (see tests)
    2056      _Element_Py = Element
    2057  
    2058      # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
    2059      from _elementtree import *
    2060      from _elementtree import _set_factories
    2061  except ImportError:
    2062      pass
    2063  else:
    2064      _set_factories(Comment, ProcessingInstruction)