python (3.11.7)

(root)/
lib/
python3.11/
xml/
sax/
saxutils.py
       1  """\
       2  A library of useful helper classes to the SAX classes, for the
       3  convenience of application and driver writers.
       4  """
       5  
       6  import os, urllib.parse, urllib.request
       7  import io
       8  import codecs
       9  from . import handler
      10  from . import xmlreader
      11  
      12  def __dict_replace(s, d):
      13      """Replace substrings of a string using a dictionary."""
      14      for key, value in d.items():
      15          s = s.replace(key, value)
      16      return s
      17  
      18  def escape(data, entities={}):
      19      """Escape &, <, and > in a string of data.
      20  
      21      You can escape other strings of data by passing a dictionary as
      22      the optional entities parameter.  The keys and values must all be
      23      strings; each key will be replaced with its corresponding value.
      24      """
      25  
      26      # must do ampersand first
      27      data = data.replace("&", "&amp;")
      28      data = data.replace(">", "&gt;")
      29      data = data.replace("<", "&lt;")
      30      if entities:
      31          data = __dict_replace(data, entities)
      32      return data
      33  
      34  def unescape(data, entities={}):
      35      """Unescape &amp;, &lt;, and &gt; in a string of data.
      36  
      37      You can unescape other strings of data by passing a dictionary as
      38      the optional entities parameter.  The keys and values must all be
      39      strings; each key will be replaced with its corresponding value.
      40      """
      41      data = data.replace("&lt;", "<")
      42      data = data.replace("&gt;", ">")
      43      if entities:
      44          data = __dict_replace(data, entities)
      45      # must do ampersand last
      46      return data.replace("&amp;", "&")
      47  
      48  def quoteattr(data, entities={}):
      49      """Escape and quote an attribute value.
      50  
      51      Escape &, <, and > in a string of data, then quote it for use as
      52      an attribute value.  The \" character will be escaped as well, if
      53      necessary.
      54  
      55      You can escape other strings of data by passing a dictionary as
      56      the optional entities parameter.  The keys and values must all be
      57      strings; each key will be replaced with its corresponding value.
      58      """
      59      entities = {**entities, '\n': '&#10;', '\r': '&#13;', '\t':'&#9;'}
      60      data = escape(data, entities)
      61      if '"' in data:
      62          if "'" in data:
      63              data = '"%s"' % data.replace('"', "&quot;")
      64          else:
      65              data = "'%s'" % data
      66      else:
      67          data = '"%s"' % data
      68      return data
      69  
      70  
      71  def _gettextwriter(out, encoding):
      72      if out is None:
      73          import sys
      74          return sys.stdout
      75  
      76      if isinstance(out, io.TextIOBase):
      77          # use a text writer as is
      78          return out
      79  
      80      if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
      81          # use a codecs stream writer as is
      82          return out
      83  
      84      # wrap a binary writer with TextIOWrapper
      85      if isinstance(out, io.RawIOBase):
      86          # Keep the original file open when the TextIOWrapper is
      87          # destroyed
      88          class ESC[4;38;5;81m_wrapper:
      89              __class__ = out.__class__
      90              def __getattr__(self, name):
      91                  return getattr(out, name)
      92          buffer = _wrapper()
      93          buffer.close = lambda: None
      94      else:
      95          # This is to handle passed objects that aren't in the
      96          # IOBase hierarchy, but just have a write method
      97          buffer = io.BufferedIOBase()
      98          buffer.writable = lambda: True
      99          buffer.write = out.write
     100          try:
     101              # TextIOWrapper uses this methods to determine
     102              # if BOM (for UTF-16, etc) should be added
     103              buffer.seekable = out.seekable
     104              buffer.tell = out.tell
     105          except AttributeError:
     106              pass
     107      return io.TextIOWrapper(buffer, encoding=encoding,
     108                              errors='xmlcharrefreplace',
     109                              newline='\n',
     110                              write_through=True)
     111  
     112  class ESC[4;38;5;81mXMLGenerator(ESC[4;38;5;149mhandlerESC[4;38;5;149m.ESC[4;38;5;149mContentHandler):
     113  
     114      def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
     115          handler.ContentHandler.__init__(self)
     116          out = _gettextwriter(out, encoding)
     117          self._write = out.write
     118          self._flush = out.flush
     119          self._ns_contexts = [{}] # contains uri -> prefix dicts
     120          self._current_context = self._ns_contexts[-1]
     121          self._undeclared_ns_maps = []
     122          self._encoding = encoding
     123          self._short_empty_elements = short_empty_elements
     124          self._pending_start_element = False
     125  
     126      def _qname(self, name):
     127          """Builds a qualified name from a (ns_url, localname) pair"""
     128          if name[0]:
     129              # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
     130              # bound by definition to http://www.w3.org/XML/1998/namespace.  It
     131              # does not need to be declared and will not usually be found in
     132              # self._current_context.
     133              if 'http://www.w3.org/XML/1998/namespace' == name[0]:
     134                  return 'xml:' + name[1]
     135              # The name is in a non-empty namespace
     136              prefix = self._current_context[name[0]]
     137              if prefix:
     138                  # If it is not the default namespace, prepend the prefix
     139                  return prefix + ":" + name[1]
     140          # Return the unqualified name
     141          return name[1]
     142  
     143      def _finish_pending_start_element(self,endElement=False):
     144          if self._pending_start_element:
     145              self._write('>')
     146              self._pending_start_element = False
     147  
     148      # ContentHandler methods
     149  
     150      def startDocument(self):
     151          self._write('<?xml version="1.0" encoding="%s"?>\n' %
     152                          self._encoding)
     153  
     154      def endDocument(self):
     155          self._flush()
     156  
     157      def startPrefixMapping(self, prefix, uri):
     158          self._ns_contexts.append(self._current_context.copy())
     159          self._current_context[uri] = prefix
     160          self._undeclared_ns_maps.append((prefix, uri))
     161  
     162      def endPrefixMapping(self, prefix):
     163          self._current_context = self._ns_contexts[-1]
     164          del self._ns_contexts[-1]
     165  
     166      def startElement(self, name, attrs):
     167          self._finish_pending_start_element()
     168          self._write('<' + name)
     169          for (name, value) in attrs.items():
     170              self._write(' %s=%s' % (name, quoteattr(value)))
     171          if self._short_empty_elements:
     172              self._pending_start_element = True
     173          else:
     174              self._write(">")
     175  
     176      def endElement(self, name):
     177          if self._pending_start_element:
     178              self._write('/>')
     179              self._pending_start_element = False
     180          else:
     181              self._write('</%s>' % name)
     182  
     183      def startElementNS(self, name, qname, attrs):
     184          self._finish_pending_start_element()
     185          self._write('<' + self._qname(name))
     186  
     187          for prefix, uri in self._undeclared_ns_maps:
     188              if prefix:
     189                  self._write(' xmlns:%s="%s"' % (prefix, uri))
     190              else:
     191                  self._write(' xmlns="%s"' % uri)
     192          self._undeclared_ns_maps = []
     193  
     194          for (name, value) in attrs.items():
     195              self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
     196          if self._short_empty_elements:
     197              self._pending_start_element = True
     198          else:
     199              self._write(">")
     200  
     201      def endElementNS(self, name, qname):
     202          if self._pending_start_element:
     203              self._write('/>')
     204              self._pending_start_element = False
     205          else:
     206              self._write('</%s>' % self._qname(name))
     207  
     208      def characters(self, content):
     209          if content:
     210              self._finish_pending_start_element()
     211              if not isinstance(content, str):
     212                  content = str(content, self._encoding)
     213              self._write(escape(content))
     214  
     215      def ignorableWhitespace(self, content):
     216          if content:
     217              self._finish_pending_start_element()
     218              if not isinstance(content, str):
     219                  content = str(content, self._encoding)
     220              self._write(content)
     221  
     222      def processingInstruction(self, target, data):
     223          self._finish_pending_start_element()
     224          self._write('<?%s %s?>' % (target, data))
     225  
     226  
     227  class ESC[4;38;5;81mXMLFilterBase(ESC[4;38;5;149mxmlreaderESC[4;38;5;149m.ESC[4;38;5;149mXMLReader):
     228      """This class is designed to sit between an XMLReader and the
     229      client application's event handlers.  By default, it does nothing
     230      but pass requests up to the reader and events on to the handlers
     231      unmodified, but subclasses can override specific methods to modify
     232      the event stream or the configuration requests as they pass
     233      through."""
     234  
     235      def __init__(self, parent = None):
     236          xmlreader.XMLReader.__init__(self)
     237          self._parent = parent
     238  
     239      # ErrorHandler methods
     240  
     241      def error(self, exception):
     242          self._err_handler.error(exception)
     243  
     244      def fatalError(self, exception):
     245          self._err_handler.fatalError(exception)
     246  
     247      def warning(self, exception):
     248          self._err_handler.warning(exception)
     249  
     250      # ContentHandler methods
     251  
     252      def setDocumentLocator(self, locator):
     253          self._cont_handler.setDocumentLocator(locator)
     254  
     255      def startDocument(self):
     256          self._cont_handler.startDocument()
     257  
     258      def endDocument(self):
     259          self._cont_handler.endDocument()
     260  
     261      def startPrefixMapping(self, prefix, uri):
     262          self._cont_handler.startPrefixMapping(prefix, uri)
     263  
     264      def endPrefixMapping(self, prefix):
     265          self._cont_handler.endPrefixMapping(prefix)
     266  
     267      def startElement(self, name, attrs):
     268          self._cont_handler.startElement(name, attrs)
     269  
     270      def endElement(self, name):
     271          self._cont_handler.endElement(name)
     272  
     273      def startElementNS(self, name, qname, attrs):
     274          self._cont_handler.startElementNS(name, qname, attrs)
     275  
     276      def endElementNS(self, name, qname):
     277          self._cont_handler.endElementNS(name, qname)
     278  
     279      def characters(self, content):
     280          self._cont_handler.characters(content)
     281  
     282      def ignorableWhitespace(self, chars):
     283          self._cont_handler.ignorableWhitespace(chars)
     284  
     285      def processingInstruction(self, target, data):
     286          self._cont_handler.processingInstruction(target, data)
     287  
     288      def skippedEntity(self, name):
     289          self._cont_handler.skippedEntity(name)
     290  
     291      # DTDHandler methods
     292  
     293      def notationDecl(self, name, publicId, systemId):
     294          self._dtd_handler.notationDecl(name, publicId, systemId)
     295  
     296      def unparsedEntityDecl(self, name, publicId, systemId, ndata):
     297          self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
     298  
     299      # EntityResolver methods
     300  
     301      def resolveEntity(self, publicId, systemId):
     302          return self._ent_handler.resolveEntity(publicId, systemId)
     303  
     304      # XMLReader methods
     305  
     306      def parse(self, source):
     307          self._parent.setContentHandler(self)
     308          self._parent.setErrorHandler(self)
     309          self._parent.setEntityResolver(self)
     310          self._parent.setDTDHandler(self)
     311          self._parent.parse(source)
     312  
     313      def setLocale(self, locale):
     314          self._parent.setLocale(locale)
     315  
     316      def getFeature(self, name):
     317          return self._parent.getFeature(name)
     318  
     319      def setFeature(self, name, state):
     320          self._parent.setFeature(name, state)
     321  
     322      def getProperty(self, name):
     323          return self._parent.getProperty(name)
     324  
     325      def setProperty(self, name, value):
     326          self._parent.setProperty(name, value)
     327  
     328      # XMLFilter methods
     329  
     330      def getParent(self):
     331          return self._parent
     332  
     333      def setParent(self, parent):
     334          self._parent = parent
     335  
     336  # --- Utility functions
     337  
     338  def prepare_input_source(source, base=""):
     339      """This function takes an InputSource and an optional base URL and
     340      returns a fully resolved InputSource object ready for reading."""
     341  
     342      if isinstance(source, os.PathLike):
     343          source = os.fspath(source)
     344      if isinstance(source, str):
     345          source = xmlreader.InputSource(source)
     346      elif hasattr(source, "read"):
     347          f = source
     348          source = xmlreader.InputSource()
     349          if isinstance(f.read(0), str):
     350              source.setCharacterStream(f)
     351          else:
     352              source.setByteStream(f)
     353          if hasattr(f, "name") and isinstance(f.name, str):
     354              source.setSystemId(f.name)
     355  
     356      if source.getCharacterStream() is None and source.getByteStream() is None:
     357          sysid = source.getSystemId()
     358          basehead = os.path.dirname(os.path.normpath(base))
     359          sysidfilename = os.path.join(basehead, sysid)
     360          if os.path.isfile(sysidfilename):
     361              source.setSystemId(sysidfilename)
     362              f = open(sysidfilename, "rb")
     363          else:
     364              source.setSystemId(urllib.parse.urljoin(base, sysid))
     365              f = urllib.request.urlopen(source.getSystemId())
     366  
     367          source.setByteStream(f)
     368  
     369      return source