(root)/
Python-3.11.7/
Lib/
xml/
dom/
pulldom.py
       1  import xml.sax
       2  import xml.sax.handler
       3  
       4  START_ELEMENT = "START_ELEMENT"
       5  END_ELEMENT = "END_ELEMENT"
       6  COMMENT = "COMMENT"
       7  START_DOCUMENT = "START_DOCUMENT"
       8  END_DOCUMENT = "END_DOCUMENT"
       9  PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
      10  IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
      11  CHARACTERS = "CHARACTERS"
      12  
      13  class ESC[4;38;5;81mPullDOM(ESC[4;38;5;149mxmlESC[4;38;5;149m.ESC[4;38;5;149msaxESC[4;38;5;149m.ESC[4;38;5;149mContentHandler):
      14      _locator = None
      15      document = None
      16  
      17      def __init__(self, documentFactory=None):
      18          from xml.dom import XML_NAMESPACE
      19          self.documentFactory = documentFactory
      20          self.firstEvent = [None, None]
      21          self.lastEvent = self.firstEvent
      22          self.elementStack = []
      23          self.push = self.elementStack.append
      24          try:
      25              self.pop = self.elementStack.pop
      26          except AttributeError:
      27              # use class' pop instead
      28              pass
      29          self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
      30          self._current_context = self._ns_contexts[-1]
      31          self.pending_events = []
      32  
      33      def pop(self):
      34          result = self.elementStack[-1]
      35          del self.elementStack[-1]
      36          return result
      37  
      38      def setDocumentLocator(self, locator):
      39          self._locator = locator
      40  
      41      def startPrefixMapping(self, prefix, uri):
      42          if not hasattr(self, '_xmlns_attrs'):
      43              self._xmlns_attrs = []
      44          self._xmlns_attrs.append((prefix or 'xmlns', uri))
      45          self._ns_contexts.append(self._current_context.copy())
      46          self._current_context[uri] = prefix or None
      47  
      48      def endPrefixMapping(self, prefix):
      49          self._current_context = self._ns_contexts.pop()
      50  
      51      def startElementNS(self, name, tagName , attrs):
      52          # Retrieve xml namespace declaration attributes.
      53          xmlns_uri = 'http://www.w3.org/2000/xmlns/'
      54          xmlns_attrs = getattr(self, '_xmlns_attrs', None)
      55          if xmlns_attrs is not None:
      56              for aname, value in xmlns_attrs:
      57                  attrs._attrs[(xmlns_uri, aname)] = value
      58              self._xmlns_attrs = []
      59          uri, localname = name
      60          if uri:
      61              # When using namespaces, the reader may or may not
      62              # provide us with the original name. If not, create
      63              # *a* valid tagName from the current context.
      64              if tagName is None:
      65                  prefix = self._current_context[uri]
      66                  if prefix:
      67                      tagName = prefix + ":" + localname
      68                  else:
      69                      tagName = localname
      70              if self.document:
      71                  node = self.document.createElementNS(uri, tagName)
      72              else:
      73                  node = self.buildDocument(uri, tagName)
      74          else:
      75              # When the tagname is not prefixed, it just appears as
      76              # localname
      77              if self.document:
      78                  node = self.document.createElement(localname)
      79              else:
      80                  node = self.buildDocument(None, localname)
      81  
      82          for aname,value in attrs.items():
      83              a_uri, a_localname = aname
      84              if a_uri == xmlns_uri:
      85                  if a_localname == 'xmlns':
      86                      qname = a_localname
      87                  else:
      88                      qname = 'xmlns:' + a_localname
      89                  attr = self.document.createAttributeNS(a_uri, qname)
      90                  node.setAttributeNodeNS(attr)
      91              elif a_uri:
      92                  prefix = self._current_context[a_uri]
      93                  if prefix:
      94                      qname = prefix + ":" + a_localname
      95                  else:
      96                      qname = a_localname
      97                  attr = self.document.createAttributeNS(a_uri, qname)
      98                  node.setAttributeNodeNS(attr)
      99              else:
     100                  attr = self.document.createAttribute(a_localname)
     101                  node.setAttributeNode(attr)
     102              attr.value = value
     103  
     104          self.lastEvent[1] = [(START_ELEMENT, node), None]
     105          self.lastEvent = self.lastEvent[1]
     106          self.push(node)
     107  
     108      def endElementNS(self, name, tagName):
     109          self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
     110          self.lastEvent = self.lastEvent[1]
     111  
     112      def startElement(self, name, attrs):
     113          if self.document:
     114              node = self.document.createElement(name)
     115          else:
     116              node = self.buildDocument(None, name)
     117  
     118          for aname,value in attrs.items():
     119              attr = self.document.createAttribute(aname)
     120              attr.value = value
     121              node.setAttributeNode(attr)
     122  
     123          self.lastEvent[1] = [(START_ELEMENT, node), None]
     124          self.lastEvent = self.lastEvent[1]
     125          self.push(node)
     126  
     127      def endElement(self, name):
     128          self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
     129          self.lastEvent = self.lastEvent[1]
     130  
     131      def comment(self, s):
     132          if self.document:
     133              node = self.document.createComment(s)
     134              self.lastEvent[1] = [(COMMENT, node), None]
     135              self.lastEvent = self.lastEvent[1]
     136          else:
     137              event = [(COMMENT, s), None]
     138              self.pending_events.append(event)
     139  
     140      def processingInstruction(self, target, data):
     141          if self.document:
     142              node = self.document.createProcessingInstruction(target, data)
     143              self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
     144              self.lastEvent = self.lastEvent[1]
     145          else:
     146              event = [(PROCESSING_INSTRUCTION, target, data), None]
     147              self.pending_events.append(event)
     148  
     149      def ignorableWhitespace(self, chars):
     150          node = self.document.createTextNode(chars)
     151          self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
     152          self.lastEvent = self.lastEvent[1]
     153  
     154      def characters(self, chars):
     155          node = self.document.createTextNode(chars)
     156          self.lastEvent[1] = [(CHARACTERS, node), None]
     157          self.lastEvent = self.lastEvent[1]
     158  
     159      def startDocument(self):
     160          if self.documentFactory is None:
     161              import xml.dom.minidom
     162              self.documentFactory = xml.dom.minidom.Document.implementation
     163  
     164      def buildDocument(self, uri, tagname):
     165          # Can't do that in startDocument, since we need the tagname
     166          # XXX: obtain DocumentType
     167          node = self.documentFactory.createDocument(uri, tagname, None)
     168          self.document = node
     169          self.lastEvent[1] = [(START_DOCUMENT, node), None]
     170          self.lastEvent = self.lastEvent[1]
     171          self.push(node)
     172          # Put everything we have seen so far into the document
     173          for e in self.pending_events:
     174              if e[0][0] == PROCESSING_INSTRUCTION:
     175                  _,target,data = e[0]
     176                  n = self.document.createProcessingInstruction(target, data)
     177                  e[0] = (PROCESSING_INSTRUCTION, n)
     178              elif e[0][0] == COMMENT:
     179                  n = self.document.createComment(e[0][1])
     180                  e[0] = (COMMENT, n)
     181              else:
     182                  raise AssertionError("Unknown pending event ",e[0][0])
     183              self.lastEvent[1] = e
     184              self.lastEvent = e
     185          self.pending_events = None
     186          return node.firstChild
     187  
     188      def endDocument(self):
     189          self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
     190          self.pop()
     191  
     192      def clear(self):
     193          "clear(): Explicitly release parsing structures"
     194          self.document = None
     195  
     196  class ESC[4;38;5;81mErrorHandler:
     197      def warning(self, exception):
     198          print(exception)
     199      def error(self, exception):
     200          raise exception
     201      def fatalError(self, exception):
     202          raise exception
     203  
     204  class ESC[4;38;5;81mDOMEventStream:
     205      def __init__(self, stream, parser, bufsize):
     206          self.stream = stream
     207          self.parser = parser
     208          self.bufsize = bufsize
     209          if not hasattr(self.parser, 'feed'):
     210              self.getEvent = self._slurp
     211          self.reset()
     212  
     213      def reset(self):
     214          self.pulldom = PullDOM()
     215          # This content handler relies on namespace support
     216          self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
     217          self.parser.setContentHandler(self.pulldom)
     218  
     219      def __next__(self):
     220          rc = self.getEvent()
     221          if rc:
     222              return rc
     223          raise StopIteration
     224  
     225      def __iter__(self):
     226          return self
     227  
     228      def expandNode(self, node):
     229          event = self.getEvent()
     230          parents = [node]
     231          while event:
     232              token, cur_node = event
     233              if cur_node is node:
     234                  return
     235              if token != END_ELEMENT:
     236                  parents[-1].appendChild(cur_node)
     237              if token == START_ELEMENT:
     238                  parents.append(cur_node)
     239              elif token == END_ELEMENT:
     240                  del parents[-1]
     241              event = self.getEvent()
     242  
     243      def getEvent(self):
     244          # use IncrementalParser interface, so we get the desired
     245          # pull effect
     246          if not self.pulldom.firstEvent[1]:
     247              self.pulldom.lastEvent = self.pulldom.firstEvent
     248          while not self.pulldom.firstEvent[1]:
     249              buf = self.stream.read(self.bufsize)
     250              if not buf:
     251                  self.parser.close()
     252                  return None
     253              self.parser.feed(buf)
     254          rc = self.pulldom.firstEvent[1][0]
     255          self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
     256          return rc
     257  
     258      def _slurp(self):
     259          """ Fallback replacement for getEvent() using the
     260              standard SAX2 interface, which means we slurp the
     261              SAX events into memory (no performance gain, but
     262              we are compatible to all SAX parsers).
     263          """
     264          self.parser.parse(self.stream)
     265          self.getEvent = self._emit
     266          return self._emit()
     267  
     268      def _emit(self):
     269          """ Fallback replacement for getEvent() that emits
     270              the events that _slurp() read previously.
     271          """
     272          rc = self.pulldom.firstEvent[1][0]
     273          self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
     274          return rc
     275  
     276      def clear(self):
     277          """clear(): Explicitly release parsing objects"""
     278          self.pulldom.clear()
     279          del self.pulldom
     280          self.parser = None
     281          self.stream = None
     282  
     283  class ESC[4;38;5;81mSAX2DOM(ESC[4;38;5;149mPullDOM):
     284  
     285      def startElementNS(self, name, tagName , attrs):
     286          PullDOM.startElementNS(self, name, tagName, attrs)
     287          curNode = self.elementStack[-1]
     288          parentNode = self.elementStack[-2]
     289          parentNode.appendChild(curNode)
     290  
     291      def startElement(self, name, attrs):
     292          PullDOM.startElement(self, name, attrs)
     293          curNode = self.elementStack[-1]
     294          parentNode = self.elementStack[-2]
     295          parentNode.appendChild(curNode)
     296  
     297      def processingInstruction(self, target, data):
     298          PullDOM.processingInstruction(self, target, data)
     299          node = self.lastEvent[0][1]
     300          parentNode = self.elementStack[-1]
     301          parentNode.appendChild(node)
     302  
     303      def ignorableWhitespace(self, chars):
     304          PullDOM.ignorableWhitespace(self, chars)
     305          node = self.lastEvent[0][1]
     306          parentNode = self.elementStack[-1]
     307          parentNode.appendChild(node)
     308  
     309      def characters(self, chars):
     310          PullDOM.characters(self, chars)
     311          node = self.lastEvent[0][1]
     312          parentNode = self.elementStack[-1]
     313          parentNode.appendChild(node)
     314  
     315  
     316  default_bufsize = (2 ** 14) - 20
     317  
     318  def parse(stream_or_string, parser=None, bufsize=None):
     319      if bufsize is None:
     320          bufsize = default_bufsize
     321      if isinstance(stream_or_string, str):
     322          stream = open(stream_or_string, 'rb')
     323      else:
     324          stream = stream_or_string
     325      if not parser:
     326          parser = xml.sax.make_parser()
     327      return DOMEventStream(stream, parser, bufsize)
     328  
     329  def parseString(string, parser=None):
     330      from io import StringIO
     331  
     332      bufsize = len(string)
     333      buf = StringIO(string)
     334      if not parser:
     335          parser = xml.sax.make_parser()
     336      return DOMEventStream(buf, parser, bufsize)