(root)/
Python-3.11.7/
Lib/
test/
test_pulldom.py
       1  import io
       2  import unittest
       3  import xml.sax
       4  
       5  from xml.sax.xmlreader import AttributesImpl
       6  from xml.sax.handler import feature_external_ges
       7  from xml.dom import pulldom
       8  
       9  from test.support import findfile
      10  
      11  
      12  tstfile = findfile("test.xml", subdir="xmltestdata")
      13  
      14  # A handy XML snippet, containing attributes, a namespace prefix, and a
      15  # self-closing tag:
      16  SMALL_SAMPLE = """<?xml version="1.0"?>
      17  <html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
      18  <!-- A comment -->
      19  <title>Introduction to XSL</title>
      20  <hr/>
      21  <p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
      22  </html>"""
      23  
      24  
      25  class ESC[4;38;5;81mPullDOMTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      26  
      27      def test_parse(self):
      28          """Minimal test of DOMEventStream.parse()"""
      29  
      30          # This just tests that parsing from a stream works. Actual parser
      31          # semantics are tested using parseString with a more focused XML
      32          # fragment.
      33  
      34          # Test with a filename:
      35          handler = pulldom.parse(tstfile)
      36          self.addCleanup(handler.stream.close)
      37          list(handler)
      38  
      39          # Test with a file object:
      40          with open(tstfile, "rb") as fin:
      41              list(pulldom.parse(fin))
      42  
      43      def test_parse_semantics(self):
      44          """Test DOMEventStream parsing semantics."""
      45  
      46          items = pulldom.parseString(SMALL_SAMPLE)
      47          evt, node = next(items)
      48          # Just check the node is a Document:
      49          self.assertTrue(hasattr(node, "createElement"))
      50          self.assertEqual(pulldom.START_DOCUMENT, evt)
      51          evt, node = next(items)
      52          self.assertEqual(pulldom.START_ELEMENT, evt)
      53          self.assertEqual("html", node.tagName)
      54          self.assertEqual(2, len(node.attributes))
      55          self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
      56                "http://www.xml.com/books")
      57          evt, node = next(items)
      58          self.assertEqual(pulldom.CHARACTERS, evt) # Line break
      59          evt, node = next(items)
      60          # XXX - A comment should be reported here!
      61          # self.assertEqual(pulldom.COMMENT, evt)
      62          # Line break after swallowed comment:
      63          self.assertEqual(pulldom.CHARACTERS, evt)
      64          evt, node = next(items)
      65          self.assertEqual("title", node.tagName)
      66          title_node = node
      67          evt, node = next(items)
      68          self.assertEqual(pulldom.CHARACTERS, evt)
      69          self.assertEqual("Introduction to XSL", node.data)
      70          evt, node = next(items)
      71          self.assertEqual(pulldom.END_ELEMENT, evt)
      72          self.assertEqual("title", node.tagName)
      73          self.assertTrue(title_node is node)
      74          evt, node = next(items)
      75          self.assertEqual(pulldom.CHARACTERS, evt)
      76          evt, node = next(items)
      77          self.assertEqual(pulldom.START_ELEMENT, evt)
      78          self.assertEqual("hr", node.tagName)
      79          evt, node = next(items)
      80          self.assertEqual(pulldom.END_ELEMENT, evt)
      81          self.assertEqual("hr", node.tagName)
      82          evt, node = next(items)
      83          self.assertEqual(pulldom.CHARACTERS, evt)
      84          evt, node = next(items)
      85          self.assertEqual(pulldom.START_ELEMENT, evt)
      86          self.assertEqual("p", node.tagName)
      87          evt, node = next(items)
      88          self.assertEqual(pulldom.START_ELEMENT, evt)
      89          self.assertEqual("xdc:author", node.tagName)
      90          evt, node = next(items)
      91          self.assertEqual(pulldom.CHARACTERS, evt)
      92          evt, node = next(items)
      93          self.assertEqual(pulldom.END_ELEMENT, evt)
      94          self.assertEqual("xdc:author", node.tagName)
      95          evt, node = next(items)
      96          self.assertEqual(pulldom.END_ELEMENT, evt)
      97          evt, node = next(items)
      98          self.assertEqual(pulldom.CHARACTERS, evt)
      99          evt, node = next(items)
     100          self.assertEqual(pulldom.END_ELEMENT, evt)
     101          # XXX No END_DOCUMENT item is ever obtained:
     102          #evt, node = next(items)
     103          #self.assertEqual(pulldom.END_DOCUMENT, evt)
     104  
     105      def test_expandItem(self):
     106          """Ensure expandItem works as expected."""
     107          items = pulldom.parseString(SMALL_SAMPLE)
     108          # Loop through the nodes until we get to a "title" start tag:
     109          for evt, item in items:
     110              if evt == pulldom.START_ELEMENT and item.tagName == "title":
     111                  items.expandNode(item)
     112                  self.assertEqual(1, len(item.childNodes))
     113                  break
     114          else:
     115              self.fail("No \"title\" element detected in SMALL_SAMPLE!")
     116          # Loop until we get to the next start-element:
     117          for evt, node in items:
     118              if evt == pulldom.START_ELEMENT:
     119                  break
     120          self.assertEqual("hr", node.tagName,
     121              "expandNode did not leave DOMEventStream in the correct state.")
     122          # Attempt to expand a standalone element:
     123          items.expandNode(node)
     124          self.assertEqual(next(items)[0], pulldom.CHARACTERS)
     125          evt, node = next(items)
     126          self.assertEqual(node.tagName, "p")
     127          items.expandNode(node)
     128          next(items) # Skip character data
     129          evt, node = next(items)
     130          self.assertEqual(node.tagName, "html")
     131          with self.assertRaises(StopIteration):
     132              next(items)
     133          items.clear()
     134          self.assertIsNone(items.parser)
     135          self.assertIsNone(items.stream)
     136  
     137      @unittest.expectedFailure
     138      def test_comment(self):
     139          """PullDOM does not receive "comment" events."""
     140          items = pulldom.parseString(SMALL_SAMPLE)
     141          for evt, _ in items:
     142              if evt == pulldom.COMMENT:
     143                  break
     144          else:
     145              self.fail("No comment was encountered")
     146  
     147      @unittest.expectedFailure
     148      def test_end_document(self):
     149          """PullDOM does not receive "end-document" events."""
     150          items = pulldom.parseString(SMALL_SAMPLE)
     151          # Read all of the nodes up to and including </html>:
     152          for evt, node in items:
     153              if evt == pulldom.END_ELEMENT and node.tagName == "html":
     154                  break
     155          try:
     156              # Assert that the next node is END_DOCUMENT:
     157              evt, node = next(items)
     158              self.assertEqual(pulldom.END_DOCUMENT, evt)
     159          except StopIteration:
     160              self.fail(
     161                  "Ran out of events, but should have received END_DOCUMENT")
     162  
     163      def test_external_ges_default(self):
     164          parser = pulldom.parseString(SMALL_SAMPLE)
     165          saxparser = parser.parser
     166          ges = saxparser.getFeature(feature_external_ges)
     167          self.assertEqual(ges, False)
     168  
     169  
     170  class ESC[4;38;5;81mThoroughTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     171      """Test the hard-to-reach parts of pulldom."""
     172  
     173      def test_thorough_parse(self):
     174          """Test some of the hard-to-reach parts of PullDOM."""
     175          self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
     176  
     177      @unittest.expectedFailure
     178      def test_sax2dom_fail(self):
     179          """SAX2DOM can"t handle a PI before the root element."""
     180          pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
     181          self._test_thorough(pd)
     182  
     183      def test_thorough_sax2dom(self):
     184          """Test some of the hard-to-reach parts of SAX2DOM."""
     185          pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
     186          self._test_thorough(pd, False)
     187  
     188      def _test_thorough(self, pd, before_root=True):
     189          """Test some of the hard-to-reach parts of the parser, using a mock
     190          parser."""
     191  
     192          evt, node = next(pd)
     193          self.assertEqual(pulldom.START_DOCUMENT, evt)
     194          # Just check the node is a Document:
     195          self.assertTrue(hasattr(node, "createElement"))
     196  
     197          if before_root:
     198              evt, node = next(pd)
     199              self.assertEqual(pulldom.COMMENT, evt)
     200              self.assertEqual("a comment", node.data)
     201              evt, node = next(pd)
     202              self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
     203              self.assertEqual("target", node.target)
     204              self.assertEqual("data", node.data)
     205  
     206          evt, node = next(pd)
     207          self.assertEqual(pulldom.START_ELEMENT, evt)
     208          self.assertEqual("html", node.tagName)
     209  
     210          evt, node = next(pd)
     211          self.assertEqual(pulldom.COMMENT, evt)
     212          self.assertEqual("a comment", node.data)
     213          evt, node = next(pd)
     214          self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
     215          self.assertEqual("target", node.target)
     216          self.assertEqual("data", node.data)
     217  
     218          evt, node = next(pd)
     219          self.assertEqual(pulldom.START_ELEMENT, evt)
     220          self.assertEqual("p", node.tagName)
     221  
     222          evt, node = next(pd)
     223          self.assertEqual(pulldom.CHARACTERS, evt)
     224          self.assertEqual("text", node.data)
     225          evt, node = next(pd)
     226          self.assertEqual(pulldom.END_ELEMENT, evt)
     227          self.assertEqual("p", node.tagName)
     228          evt, node = next(pd)
     229          self.assertEqual(pulldom.END_ELEMENT, evt)
     230          self.assertEqual("html", node.tagName)
     231          evt, node = next(pd)
     232          self.assertEqual(pulldom.END_DOCUMENT, evt)
     233  
     234  
     235  class ESC[4;38;5;81mSAXExerciser(ESC[4;38;5;149mobject):
     236      """A fake sax parser that calls some of the harder-to-reach sax methods to
     237      ensure it emits the correct events"""
     238  
     239      def setContentHandler(self, handler):
     240          self._handler = handler
     241  
     242      def parse(self, _):
     243          h = self._handler
     244          h.startDocument()
     245  
     246          # The next two items ensure that items preceding the first
     247          # start_element are properly stored and emitted:
     248          h.comment("a comment")
     249          h.processingInstruction("target", "data")
     250  
     251          h.startElement("html", AttributesImpl({}))
     252  
     253          h.comment("a comment")
     254          h.processingInstruction("target", "data")
     255  
     256          h.startElement("p", AttributesImpl({"class": "paraclass"}))
     257          h.characters("text")
     258          h.endElement("p")
     259          h.endElement("html")
     260          h.endDocument()
     261  
     262      def stub(self, *args, **kwargs):
     263          """Stub method. Does nothing."""
     264          pass
     265      setProperty = stub
     266      setFeature = stub
     267  
     268  
     269  class ESC[4;38;5;81mSAX2DOMExerciser(ESC[4;38;5;149mSAXExerciser):
     270      """The same as SAXExerciser, but without the processing instruction and
     271      comment before the root element, because S2D can"t handle it"""
     272  
     273      def parse(self, _):
     274          h = self._handler
     275          h.startDocument()
     276          h.startElement("html", AttributesImpl({}))
     277          h.comment("a comment")
     278          h.processingInstruction("target", "data")
     279          h.startElement("p", AttributesImpl({"class": "paraclass"}))
     280          h.characters("text")
     281          h.endElement("p")
     282          h.endElement("html")
     283          h.endDocument()
     284  
     285  
     286  class ESC[4;38;5;81mSAX2DOMTestHelper(ESC[4;38;5;149mpulldomESC[4;38;5;149m.ESC[4;38;5;149mDOMEventStream):
     287      """Allows us to drive SAX2DOM from a DOMEventStream."""
     288  
     289      def reset(self):
     290          self.pulldom = pulldom.SAX2DOM()
     291          # This content handler relies on namespace support
     292          self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
     293          self.parser.setContentHandler(self.pulldom)
     294  
     295  
     296  class ESC[4;38;5;81mSAX2DOMTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     297  
     298      def confirm(self, test, testname="Test"):
     299          self.assertTrue(test, testname)
     300  
     301      def test_basic(self):
     302          """Ensure SAX2DOM can parse from a stream."""
     303          with io.StringIO(SMALL_SAMPLE) as fin:
     304              sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
     305                                     len(SMALL_SAMPLE))
     306              for evt, node in sd:
     307                  if evt == pulldom.START_ELEMENT and node.tagName == "html":
     308                      break
     309              # Because the buffer is the same length as the XML, all the
     310              # nodes should have been parsed and added:
     311              self.assertGreater(len(node.childNodes), 0)
     312  
     313      def testSAX2DOM(self):
     314          """Ensure SAX2DOM expands nodes as expected."""
     315          sax2dom = pulldom.SAX2DOM()
     316          sax2dom.startDocument()
     317          sax2dom.startElement("doc", {})
     318          sax2dom.characters("text")
     319          sax2dom.startElement("subelm", {})
     320          sax2dom.characters("text")
     321          sax2dom.endElement("subelm")
     322          sax2dom.characters("text")
     323          sax2dom.endElement("doc")
     324          sax2dom.endDocument()
     325  
     326          doc = sax2dom.document
     327          root = doc.documentElement
     328          (text1, elm1, text2) = root.childNodes
     329          text3 = elm1.childNodes[0]
     330  
     331          self.assertIsNone(text1.previousSibling)
     332          self.assertIs(text1.nextSibling, elm1)
     333          self.assertIs(elm1.previousSibling, text1)
     334          self.assertIs(elm1.nextSibling, text2)
     335          self.assertIs(text2.previousSibling, elm1)
     336          self.assertIsNone(text2.nextSibling)
     337          self.assertIsNone(text3.previousSibling)
     338          self.assertIsNone(text3.nextSibling)
     339  
     340          self.assertIs(root.parentNode, doc)
     341          self.assertIs(text1.parentNode, root)
     342          self.assertIs(elm1.parentNode, root)
     343          self.assertIs(text2.parentNode, root)
     344          self.assertIs(text3.parentNode, elm1)
     345          doc.unlink()
     346  
     347  
     348  if __name__ == "__main__":
     349      unittest.main()