(root)/
Python-3.12.0/
Lib/
test/
test_sax.py
       1  # regression test for SAX 2.0
       2  # $Id$
       3  
       4  from xml.sax import make_parser, ContentHandler, \
       5                      SAXException, SAXReaderNotAvailable, SAXParseException
       6  import unittest
       7  from unittest import mock
       8  try:
       9      make_parser()
      10  except SAXReaderNotAvailable:
      11      # don't try to test this module if we cannot create a parser
      12      raise unittest.SkipTest("no XML parsers available")
      13  from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
      14                               XMLFilterBase, prepare_input_source
      15  from xml.sax.expatreader import create_parser
      16  from xml.sax.handler import (feature_namespaces, feature_external_ges,
      17                               LexicalHandler)
      18  from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
      19  from io import BytesIO, StringIO
      20  import codecs
      21  import os.path
      22  import shutil
      23  import sys
      24  from urllib.error import URLError
      25  import urllib.request
      26  from test.support import os_helper
      27  from test.support import findfile
      28  from test.support.os_helper import FakePath, TESTFN
      29  
      30  
      31  TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
      32  TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
      33  try:
      34      TEST_XMLFILE.encode("utf-8")
      35      TEST_XMLFILE_OUT.encode("utf-8")
      36  except UnicodeEncodeError:
      37      raise unittest.SkipTest("filename is not encodable to utf8")
      38  
      39  supports_nonascii_filenames = True
      40  if not os.path.supports_unicode_filenames:
      41      try:
      42          os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
      43      except (UnicodeError, TypeError):
      44          # Either the file system encoding is None, or the file name
      45          # cannot be encoded in the file system encoding.
      46          supports_nonascii_filenames = False
      47  requires_nonascii_filenames = unittest.skipUnless(
      48          supports_nonascii_filenames,
      49          'Requires non-ascii filenames support')
      50  
      51  ns_uri = "http://www.python.org/xml-ns/saxtest/"
      52  
      53  class ESC[4;38;5;81mXmlTestBase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      54      def verify_empty_attrs(self, attrs):
      55          self.assertRaises(KeyError, attrs.getValue, "attr")
      56          self.assertRaises(KeyError, attrs.getValueByQName, "attr")
      57          self.assertRaises(KeyError, attrs.getNameByQName, "attr")
      58          self.assertRaises(KeyError, attrs.getQNameByName, "attr")
      59          self.assertRaises(KeyError, attrs.__getitem__, "attr")
      60          self.assertEqual(attrs.getLength(), 0)
      61          self.assertEqual(attrs.getNames(), [])
      62          self.assertEqual(attrs.getQNames(), [])
      63          self.assertEqual(len(attrs), 0)
      64          self.assertNotIn("attr", attrs)
      65          self.assertEqual(list(attrs.keys()), [])
      66          self.assertEqual(attrs.get("attrs"), None)
      67          self.assertEqual(attrs.get("attrs", 25), 25)
      68          self.assertEqual(list(attrs.items()), [])
      69          self.assertEqual(list(attrs.values()), [])
      70  
      71      def verify_empty_nsattrs(self, attrs):
      72          self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
      73          self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
      74          self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
      75          self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
      76          self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
      77          self.assertEqual(attrs.getLength(), 0)
      78          self.assertEqual(attrs.getNames(), [])
      79          self.assertEqual(attrs.getQNames(), [])
      80          self.assertEqual(len(attrs), 0)
      81          self.assertNotIn((ns_uri, "attr"), attrs)
      82          self.assertEqual(list(attrs.keys()), [])
      83          self.assertEqual(attrs.get((ns_uri, "attr")), None)
      84          self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
      85          self.assertEqual(list(attrs.items()), [])
      86          self.assertEqual(list(attrs.values()), [])
      87  
      88      def verify_attrs_wattr(self, attrs):
      89          self.assertEqual(attrs.getLength(), 1)
      90          self.assertEqual(attrs.getNames(), ["attr"])
      91          self.assertEqual(attrs.getQNames(), ["attr"])
      92          self.assertEqual(len(attrs), 1)
      93          self.assertIn("attr", attrs)
      94          self.assertEqual(list(attrs.keys()), ["attr"])
      95          self.assertEqual(attrs.get("attr"), "val")
      96          self.assertEqual(attrs.get("attr", 25), "val")
      97          self.assertEqual(list(attrs.items()), [("attr", "val")])
      98          self.assertEqual(list(attrs.values()), ["val"])
      99          self.assertEqual(attrs.getValue("attr"), "val")
     100          self.assertEqual(attrs.getValueByQName("attr"), "val")
     101          self.assertEqual(attrs.getNameByQName("attr"), "attr")
     102          self.assertEqual(attrs["attr"], "val")
     103          self.assertEqual(attrs.getQNameByName("attr"), "attr")
     104  
     105  
     106  def xml_str(doc, encoding=None):
     107      if encoding is None:
     108          return doc
     109      return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
     110  
     111  def xml_bytes(doc, encoding, decl_encoding=...):
     112      if decl_encoding is ...:
     113          decl_encoding = encoding
     114      return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
     115  
     116  def make_xml_file(doc, encoding, decl_encoding=...):
     117      if decl_encoding is ...:
     118          decl_encoding = encoding
     119      with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
     120          f.write(xml_str(doc, decl_encoding))
     121  
     122  
     123  class ESC[4;38;5;81mParseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     124      data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
     125  
     126      def tearDown(self):
     127          os_helper.unlink(TESTFN)
     128  
     129      def check_parse(self, f):
     130          from xml.sax import parse
     131          result = StringIO()
     132          parse(f, XMLGenerator(result, 'utf-8'))
     133          self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
     134  
     135      def test_parse_text(self):
     136          encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
     137                       'utf-16', 'utf-16le', 'utf-16be')
     138          for encoding in encodings:
     139              self.check_parse(StringIO(xml_str(self.data, encoding)))
     140              make_xml_file(self.data, encoding)
     141              with open(TESTFN, 'r', encoding=encoding) as f:
     142                  self.check_parse(f)
     143              self.check_parse(StringIO(self.data))
     144              make_xml_file(self.data, encoding, None)
     145              with open(TESTFN, 'r', encoding=encoding) as f:
     146                  self.check_parse(f)
     147  
     148      def test_parse_bytes(self):
     149          # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
     150          # UTF-16 is autodetected
     151          encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
     152          for encoding in encodings:
     153              self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
     154              make_xml_file(self.data, encoding)
     155              self.check_parse(TESTFN)
     156              with open(TESTFN, 'rb') as f:
     157                  self.check_parse(f)
     158              self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
     159              make_xml_file(self.data, encoding, None)
     160              self.check_parse(TESTFN)
     161              with open(TESTFN, 'rb') as f:
     162                  self.check_parse(f)
     163          # accept UTF-8 with BOM
     164          self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
     165          make_xml_file(self.data, 'utf-8-sig', 'utf-8')
     166          self.check_parse(TESTFN)
     167          with open(TESTFN, 'rb') as f:
     168              self.check_parse(f)
     169          self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
     170          make_xml_file(self.data, 'utf-8-sig', None)
     171          self.check_parse(TESTFN)
     172          with open(TESTFN, 'rb') as f:
     173              self.check_parse(f)
     174          # accept data with declared encoding
     175          self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
     176          make_xml_file(self.data, 'iso-8859-1')
     177          self.check_parse(TESTFN)
     178          with open(TESTFN, 'rb') as f:
     179              self.check_parse(f)
     180          # fail on non-UTF-8 incompatible data without declared encoding
     181          with self.assertRaises(SAXException):
     182              self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
     183          make_xml_file(self.data, 'iso-8859-1', None)
     184          with self.assertRaises(SAXException):
     185              self.check_parse(TESTFN)
     186          with open(TESTFN, 'rb') as f:
     187              with self.assertRaises(SAXException):
     188                  self.check_parse(f)
     189  
     190      def test_parse_path_object(self):
     191          make_xml_file(self.data, 'utf-8', None)
     192          self.check_parse(FakePath(TESTFN))
     193  
     194      def test_parse_InputSource(self):
     195          # accept data without declared but with explicitly specified encoding
     196          make_xml_file(self.data, 'iso-8859-1', None)
     197          with open(TESTFN, 'rb') as f:
     198              input = InputSource()
     199              input.setByteStream(f)
     200              input.setEncoding('iso-8859-1')
     201              self.check_parse(input)
     202  
     203      def test_parse_close_source(self):
     204          builtin_open = open
     205          fileobj = None
     206  
     207          def mock_open(*args):
     208              nonlocal fileobj
     209              fileobj = builtin_open(*args)
     210              return fileobj
     211  
     212          with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
     213              make_xml_file(self.data, 'iso-8859-1', None)
     214              with self.assertRaises(SAXException):
     215                  self.check_parse(TESTFN)
     216              self.assertTrue(fileobj.closed)
     217  
     218      def check_parseString(self, s):
     219          from xml.sax import parseString
     220          result = StringIO()
     221          parseString(s, XMLGenerator(result, 'utf-8'))
     222          self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
     223  
     224      def test_parseString_text(self):
     225          encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
     226                       'utf-16', 'utf-16le', 'utf-16be')
     227          for encoding in encodings:
     228              self.check_parseString(xml_str(self.data, encoding))
     229          self.check_parseString(self.data)
     230  
     231      def test_parseString_bytes(self):
     232          # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
     233          # UTF-16 is autodetected
     234          encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
     235          for encoding in encodings:
     236              self.check_parseString(xml_bytes(self.data, encoding))
     237              self.check_parseString(xml_bytes(self.data, encoding, None))
     238          # accept UTF-8 with BOM
     239          self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
     240          self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
     241          # accept data with declared encoding
     242          self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
     243          # fail on non-UTF-8 incompatible data without declared encoding
     244          with self.assertRaises(SAXException):
     245              self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
     246  
     247  class ESC[4;38;5;81mMakeParserTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     248      def test_make_parser2(self):
     249          # Creating parsers several times in a row should succeed.
     250          # Testing this because there have been failures of this kind
     251          # before.
     252          from xml.sax import make_parser
     253          p = make_parser()
     254          from xml.sax import make_parser
     255          p = make_parser()
     256          from xml.sax import make_parser
     257          p = make_parser()
     258          from xml.sax import make_parser
     259          p = make_parser()
     260          from xml.sax import make_parser
     261          p = make_parser()
     262          from xml.sax import make_parser
     263          p = make_parser()
     264  
     265      def test_make_parser3(self):
     266          # Testing that make_parser can handle different types of
     267          # iterables.
     268          make_parser(['module'])
     269          make_parser(('module', ))
     270          make_parser({'module'})
     271          make_parser(frozenset({'module'}))
     272          make_parser({'module': None})
     273          make_parser(iter(['module']))
     274  
     275      def test_make_parser4(self):
     276          # Testing that make_parser can handle empty iterables.
     277          make_parser([])
     278          make_parser(tuple())
     279          make_parser(set())
     280          make_parser(frozenset())
     281          make_parser({})
     282          make_parser(iter([]))
     283  
     284      def test_make_parser5(self):
     285          # Testing that make_parser can handle iterables with more than
     286          # one item.
     287          make_parser(['module1', 'module2'])
     288          make_parser(('module1', 'module2'))
     289          make_parser({'module1', 'module2'})
     290          make_parser(frozenset({'module1', 'module2'}))
     291          make_parser({'module1': None, 'module2': None})
     292          make_parser(iter(['module1', 'module2']))
     293  
     294  # ===========================================================================
     295  #
     296  #   saxutils tests
     297  #
     298  # ===========================================================================
     299  
     300  class ESC[4;38;5;81mSaxutilsTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     301      # ===== escape
     302      def test_escape_basic(self):
     303          self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
     304  
     305      def test_escape_all(self):
     306          self.assertEqual(escape("<Donald Duck & Co>"),
     307                           "&lt;Donald Duck &amp; Co&gt;")
     308  
     309      def test_escape_extra(self):
     310          self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
     311                           "Hei p&aring; deg")
     312  
     313      # ===== unescape
     314      def test_unescape_basic(self):
     315          self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
     316  
     317      def test_unescape_all(self):
     318          self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
     319                           "<Donald Duck & Co>")
     320  
     321      def test_unescape_extra(self):
     322          self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
     323                           "Hei p&aring; deg")
     324  
     325      def test_unescape_amp_extra(self):
     326          self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
     327  
     328      # ===== quoteattr
     329      def test_quoteattr_basic(self):
     330          self.assertEqual(quoteattr("Donald Duck & Co"),
     331                           '"Donald Duck &amp; Co"')
     332  
     333      def test_single_quoteattr(self):
     334          self.assertEqual(quoteattr('Includes "double" quotes'),
     335                           '\'Includes "double" quotes\'')
     336  
     337      def test_double_quoteattr(self):
     338          self.assertEqual(quoteattr("Includes 'single' quotes"),
     339                           "\"Includes 'single' quotes\"")
     340  
     341      def test_single_double_quoteattr(self):
     342          self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
     343                           "\"Includes 'single' and &quot;double&quot; quotes\"")
     344  
     345      # ===== make_parser
     346      def test_make_parser(self):
     347          # Creating a parser should succeed - it should fall back
     348          # to the expatreader
     349          p = make_parser(['xml.parsers.no_such_parser'])
     350  
     351  
     352  class ESC[4;38;5;81mPrepareInputSourceTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     353  
     354      def setUp(self):
     355          self.file = os_helper.TESTFN
     356          with open(self.file, "w") as tmp:
     357              tmp.write("This was read from a file.")
     358  
     359      def tearDown(self):
     360          os_helper.unlink(self.file)
     361  
     362      def make_byte_stream(self):
     363          return BytesIO(b"This is a byte stream.")
     364  
     365      def make_character_stream(self):
     366          return StringIO("This is a character stream.")
     367  
     368      def checkContent(self, stream, content):
     369          self.assertIsNotNone(stream)
     370          self.assertEqual(stream.read(), content)
     371          stream.close()
     372  
     373  
     374      def test_character_stream(self):
     375          # If the source is an InputSource with a character stream, use it.
     376          src = InputSource(self.file)
     377          src.setCharacterStream(self.make_character_stream())
     378          prep = prepare_input_source(src)
     379          self.assertIsNone(prep.getByteStream())
     380          self.checkContent(prep.getCharacterStream(),
     381                            "This is a character stream.")
     382  
     383      def test_byte_stream(self):
     384          # If the source is an InputSource that does not have a character
     385          # stream but does have a byte stream, use the byte stream.
     386          src = InputSource(self.file)
     387          src.setByteStream(self.make_byte_stream())
     388          prep = prepare_input_source(src)
     389          self.assertIsNone(prep.getCharacterStream())
     390          self.checkContent(prep.getByteStream(),
     391                            b"This is a byte stream.")
     392  
     393      def test_system_id(self):
     394          # If the source is an InputSource that has neither a character
     395          # stream nor a byte stream, open the system ID.
     396          src = InputSource(self.file)
     397          prep = prepare_input_source(src)
     398          self.assertIsNone(prep.getCharacterStream())
     399          self.checkContent(prep.getByteStream(),
     400                            b"This was read from a file.")
     401  
     402      def test_string(self):
     403          # If the source is a string, use it as a system ID and open it.
     404          prep = prepare_input_source(self.file)
     405          self.assertIsNone(prep.getCharacterStream())
     406          self.checkContent(prep.getByteStream(),
     407                            b"This was read from a file.")
     408  
     409      def test_path_objects(self):
     410          # If the source is a Path object, use it as a system ID and open it.
     411          prep = prepare_input_source(FakePath(self.file))
     412          self.assertIsNone(prep.getCharacterStream())
     413          self.checkContent(prep.getByteStream(),
     414                            b"This was read from a file.")
     415  
     416      def test_binary_file(self):
     417          # If the source is a binary file-like object, use it as a byte
     418          # stream.
     419          prep = prepare_input_source(self.make_byte_stream())
     420          self.assertIsNone(prep.getCharacterStream())
     421          self.checkContent(prep.getByteStream(),
     422                            b"This is a byte stream.")
     423  
     424      def test_text_file(self):
     425          # If the source is a text file-like object, use it as a character
     426          # stream.
     427          prep = prepare_input_source(self.make_character_stream())
     428          self.assertIsNone(prep.getByteStream())
     429          self.checkContent(prep.getCharacterStream(),
     430                            "This is a character stream.")
     431  
     432  
     433  # ===== XMLGenerator
     434  
     435  class ESC[4;38;5;81mXmlgenTest:
     436      def test_xmlgen_basic(self):
     437          result = self.ioclass()
     438          gen = XMLGenerator(result)
     439          gen.startDocument()
     440          gen.startElement("doc", {})
     441          gen.endElement("doc")
     442          gen.endDocument()
     443  
     444          self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
     445  
     446      def test_xmlgen_basic_empty(self):
     447          result = self.ioclass()
     448          gen = XMLGenerator(result, short_empty_elements=True)
     449          gen.startDocument()
     450          gen.startElement("doc", {})
     451          gen.endElement("doc")
     452          gen.endDocument()
     453  
     454          self.assertEqual(result.getvalue(), self.xml("<doc/>"))
     455  
     456      def test_xmlgen_content(self):
     457          result = self.ioclass()
     458          gen = XMLGenerator(result)
     459  
     460          gen.startDocument()
     461          gen.startElement("doc", {})
     462          gen.characters("huhei")
     463          gen.endElement("doc")
     464          gen.endDocument()
     465  
     466          self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
     467  
     468      def test_xmlgen_content_empty(self):
     469          result = self.ioclass()
     470          gen = XMLGenerator(result, short_empty_elements=True)
     471  
     472          gen.startDocument()
     473          gen.startElement("doc", {})
     474          gen.characters("huhei")
     475          gen.endElement("doc")
     476          gen.endDocument()
     477  
     478          self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
     479  
     480      def test_xmlgen_pi(self):
     481          result = self.ioclass()
     482          gen = XMLGenerator(result)
     483  
     484          gen.startDocument()
     485          gen.processingInstruction("test", "data")
     486          gen.startElement("doc", {})
     487          gen.endElement("doc")
     488          gen.endDocument()
     489  
     490          self.assertEqual(result.getvalue(),
     491              self.xml("<?test data?><doc></doc>"))
     492  
     493      def test_xmlgen_content_escape(self):
     494          result = self.ioclass()
     495          gen = XMLGenerator(result)
     496  
     497          gen.startDocument()
     498          gen.startElement("doc", {})
     499          gen.characters("<huhei&")
     500          gen.endElement("doc")
     501          gen.endDocument()
     502  
     503          self.assertEqual(result.getvalue(),
     504              self.xml("<doc>&lt;huhei&amp;</doc>"))
     505  
     506      def test_xmlgen_attr_escape(self):
     507          result = self.ioclass()
     508          gen = XMLGenerator(result)
     509  
     510          gen.startDocument()
     511          gen.startElement("doc", {"a": '"'})
     512          gen.startElement("e", {"a": "'"})
     513          gen.endElement("e")
     514          gen.startElement("e", {"a": "'\""})
     515          gen.endElement("e")
     516          gen.startElement("e", {"a": "\n\r\t"})
     517          gen.endElement("e")
     518          gen.endElement("doc")
     519          gen.endDocument()
     520  
     521          self.assertEqual(result.getvalue(), self.xml(
     522              "<doc a='\"'><e a=\"'\"></e>"
     523              "<e a=\"'&quot;\"></e>"
     524              "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
     525  
     526      def test_xmlgen_encoding(self):
     527          encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
     528                       'utf-16', 'utf-16be', 'utf-16le',
     529                       'utf-32', 'utf-32be', 'utf-32le')
     530          for encoding in encodings:
     531              result = self.ioclass()
     532              gen = XMLGenerator(result, encoding=encoding)
     533  
     534              gen.startDocument()
     535              gen.startElement("doc", {"a": '\u20ac'})
     536              gen.characters("\u20ac")
     537              gen.endElement("doc")
     538              gen.endDocument()
     539  
     540              self.assertEqual(result.getvalue(),
     541                  self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
     542  
     543      def test_xmlgen_unencodable(self):
     544          result = self.ioclass()
     545          gen = XMLGenerator(result, encoding='ascii')
     546  
     547          gen.startDocument()
     548          gen.startElement("doc", {"a": '\u20ac'})
     549          gen.characters("\u20ac")
     550          gen.endElement("doc")
     551          gen.endDocument()
     552  
     553          self.assertEqual(result.getvalue(),
     554              self.xml('<doc a="&#8364;">&#8364;</doc>', encoding='ascii'))
     555  
     556      def test_xmlgen_ignorable(self):
     557          result = self.ioclass()
     558          gen = XMLGenerator(result)
     559  
     560          gen.startDocument()
     561          gen.startElement("doc", {})
     562          gen.ignorableWhitespace(" ")
     563          gen.endElement("doc")
     564          gen.endDocument()
     565  
     566          self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
     567  
     568      def test_xmlgen_ignorable_empty(self):
     569          result = self.ioclass()
     570          gen = XMLGenerator(result, short_empty_elements=True)
     571  
     572          gen.startDocument()
     573          gen.startElement("doc", {})
     574          gen.ignorableWhitespace(" ")
     575          gen.endElement("doc")
     576          gen.endDocument()
     577  
     578          self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
     579  
     580      def test_xmlgen_encoding_bytes(self):
     581          encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
     582                       'utf-16', 'utf-16be', 'utf-16le',
     583                       'utf-32', 'utf-32be', 'utf-32le')
     584          for encoding in encodings:
     585              result = self.ioclass()
     586              gen = XMLGenerator(result, encoding=encoding)
     587  
     588              gen.startDocument()
     589              gen.startElement("doc", {"a": '\u20ac'})
     590              gen.characters("\u20ac".encode(encoding))
     591              gen.ignorableWhitespace(" ".encode(encoding))
     592              gen.endElement("doc")
     593              gen.endDocument()
     594  
     595              self.assertEqual(result.getvalue(),
     596                  self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
     597  
     598      def test_xmlgen_ns(self):
     599          result = self.ioclass()
     600          gen = XMLGenerator(result)
     601  
     602          gen.startDocument()
     603          gen.startPrefixMapping("ns1", ns_uri)
     604          gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
     605          # add an unqualified name
     606          gen.startElementNS((None, "udoc"), None, {})
     607          gen.endElementNS((None, "udoc"), None)
     608          gen.endElementNS((ns_uri, "doc"), "ns1:doc")
     609          gen.endPrefixMapping("ns1")
     610          gen.endDocument()
     611  
     612          self.assertEqual(result.getvalue(), self.xml(
     613             '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
     614                                           ns_uri))
     615  
     616      def test_xmlgen_ns_empty(self):
     617          result = self.ioclass()
     618          gen = XMLGenerator(result, short_empty_elements=True)
     619  
     620          gen.startDocument()
     621          gen.startPrefixMapping("ns1", ns_uri)
     622          gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
     623          # add an unqualified name
     624          gen.startElementNS((None, "udoc"), None, {})
     625          gen.endElementNS((None, "udoc"), None)
     626          gen.endElementNS((ns_uri, "doc"), "ns1:doc")
     627          gen.endPrefixMapping("ns1")
     628          gen.endDocument()
     629  
     630          self.assertEqual(result.getvalue(), self.xml(
     631             '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
     632                                           ns_uri))
     633  
     634      def test_1463026_1(self):
     635          result = self.ioclass()
     636          gen = XMLGenerator(result)
     637  
     638          gen.startDocument()
     639          gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
     640          gen.endElementNS((None, 'a'), 'a')
     641          gen.endDocument()
     642  
     643          self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
     644  
     645      def test_1463026_1_empty(self):
     646          result = self.ioclass()
     647          gen = XMLGenerator(result, short_empty_elements=True)
     648  
     649          gen.startDocument()
     650          gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
     651          gen.endElementNS((None, 'a'), 'a')
     652          gen.endDocument()
     653  
     654          self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
     655  
     656      def test_1463026_2(self):
     657          result = self.ioclass()
     658          gen = XMLGenerator(result)
     659  
     660          gen.startDocument()
     661          gen.startPrefixMapping(None, 'qux')
     662          gen.startElementNS(('qux', 'a'), 'a', {})
     663          gen.endElementNS(('qux', 'a'), 'a')
     664          gen.endPrefixMapping(None)
     665          gen.endDocument()
     666  
     667          self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
     668  
     669      def test_1463026_2_empty(self):
     670          result = self.ioclass()
     671          gen = XMLGenerator(result, short_empty_elements=True)
     672  
     673          gen.startDocument()
     674          gen.startPrefixMapping(None, 'qux')
     675          gen.startElementNS(('qux', 'a'), 'a', {})
     676          gen.endElementNS(('qux', 'a'), 'a')
     677          gen.endPrefixMapping(None)
     678          gen.endDocument()
     679  
     680          self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
     681  
     682      def test_1463026_3(self):
     683          result = self.ioclass()
     684          gen = XMLGenerator(result)
     685  
     686          gen.startDocument()
     687          gen.startPrefixMapping('my', 'qux')
     688          gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
     689          gen.endElementNS(('qux', 'a'), 'a')
     690          gen.endPrefixMapping('my')
     691          gen.endDocument()
     692  
     693          self.assertEqual(result.getvalue(),
     694              self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
     695  
     696      def test_1463026_3_empty(self):
     697          result = self.ioclass()
     698          gen = XMLGenerator(result, short_empty_elements=True)
     699  
     700          gen.startDocument()
     701          gen.startPrefixMapping('my', 'qux')
     702          gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
     703          gen.endElementNS(('qux', 'a'), 'a')
     704          gen.endPrefixMapping('my')
     705          gen.endDocument()
     706  
     707          self.assertEqual(result.getvalue(),
     708              self.xml('<my:a xmlns:my="qux" b="c"/>'))
     709  
     710      def test_5027_1(self):
     711          # The xml prefix (as in xml:lang below) is reserved and bound by
     712          # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
     713          # a bug whereby a KeyError is raised because this namespace is missing
     714          # from a dictionary.
     715          #
     716          # This test demonstrates the bug by parsing a document.
     717          test_xml = StringIO(
     718              '<?xml version="1.0"?>'
     719              '<a:g1 xmlns:a="http://example.com/ns">'
     720               '<a:g2 xml:lang="en">Hello</a:g2>'
     721              '</a:g1>')
     722  
     723          parser = make_parser()
     724          parser.setFeature(feature_namespaces, True)
     725          result = self.ioclass()
     726          gen = XMLGenerator(result)
     727          parser.setContentHandler(gen)
     728          parser.parse(test_xml)
     729  
     730          self.assertEqual(result.getvalue(),
     731                           self.xml(
     732                           '<a:g1 xmlns:a="http://example.com/ns">'
     733                            '<a:g2 xml:lang="en">Hello</a:g2>'
     734                           '</a:g1>'))
     735  
     736      def test_5027_2(self):
     737          # The xml prefix (as in xml:lang below) is reserved and bound by
     738          # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
     739          # a bug whereby a KeyError is raised because this namespace is missing
     740          # from a dictionary.
     741          #
     742          # This test demonstrates the bug by direct manipulation of the
     743          # XMLGenerator.
     744          result = self.ioclass()
     745          gen = XMLGenerator(result)
     746  
     747          gen.startDocument()
     748          gen.startPrefixMapping('a', 'http://example.com/ns')
     749          gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
     750          lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
     751          gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
     752          gen.characters('Hello')
     753          gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
     754          gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
     755          gen.endPrefixMapping('a')
     756          gen.endDocument()
     757  
     758          self.assertEqual(result.getvalue(),
     759                           self.xml(
     760                           '<a:g1 xmlns:a="http://example.com/ns">'
     761                            '<a:g2 xml:lang="en">Hello</a:g2>'
     762                           '</a:g1>'))
     763  
     764      def test_no_close_file(self):
     765          result = self.ioclass()
     766          def func(out):
     767              gen = XMLGenerator(out)
     768              gen.startDocument()
     769              gen.startElement("doc", {})
     770          func(result)
     771          self.assertFalse(result.closed)
     772  
     773      def test_xmlgen_fragment(self):
     774          result = self.ioclass()
     775          gen = XMLGenerator(result)
     776  
     777          # Don't call gen.startDocument()
     778          gen.startElement("foo", {"a": "1.0"})
     779          gen.characters("Hello")
     780          gen.endElement("foo")
     781          gen.startElement("bar", {"b": "2.0"})
     782          gen.endElement("bar")
     783          # Don't call gen.endDocument()
     784  
     785          self.assertEqual(result.getvalue(),
     786              self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
     787  
     788  class ESC[4;38;5;81mStringXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     789      ioclass = StringIO
     790  
     791      def xml(self, doc, encoding='iso-8859-1'):
     792          return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
     793  
     794      test_xmlgen_unencodable = None
     795  
     796  class ESC[4;38;5;81mBytesXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     797      ioclass = BytesIO
     798  
     799      def xml(self, doc, encoding='iso-8859-1'):
     800          return ('<?xml version="1.0" encoding="%s"?>\n%s' %
     801                  (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
     802  
     803  class ESC[4;38;5;81mWriterXmlgenTest(ESC[4;38;5;149mBytesXmlgenTest):
     804      class ESC[4;38;5;81mioclass(ESC[4;38;5;149mlist):
     805          write = list.append
     806          closed = False
     807  
     808          def seekable(self):
     809              return True
     810  
     811          def tell(self):
     812              # return 0 at start and not 0 after start
     813              return len(self)
     814  
     815          def getvalue(self):
     816              return b''.join(self)
     817  
     818  class ESC[4;38;5;81mStreamWriterXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     819      def ioclass(self):
     820          raw = BytesIO()
     821          writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
     822          writer.getvalue = raw.getvalue
     823          return writer
     824  
     825      def xml(self, doc, encoding='iso-8859-1'):
     826          return ('<?xml version="1.0" encoding="%s"?>\n%s' %
     827                  (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
     828  
     829  class ESC[4;38;5;81mStreamReaderWriterXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     830      fname = os_helper.TESTFN + '-codecs'
     831  
     832      def ioclass(self):
     833          writer = codecs.open(self.fname, 'w', encoding='ascii',
     834                               errors='xmlcharrefreplace', buffering=0)
     835          def cleanup():
     836              writer.close()
     837              os_helper.unlink(self.fname)
     838          self.addCleanup(cleanup)
     839          def getvalue():
     840              # Windows will not let use reopen without first closing
     841              writer.close()
     842              with open(writer.name, 'rb') as f:
     843                  return f.read()
     844          writer.getvalue = getvalue
     845          return writer
     846  
     847      def xml(self, doc, encoding='iso-8859-1'):
     848          return ('<?xml version="1.0" encoding="%s"?>\n%s' %
     849                  (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
     850  
     851  start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
     852  
     853  
     854  class ESC[4;38;5;81mXMLFilterBaseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     855      def test_filter_basic(self):
     856          result = BytesIO()
     857          gen = XMLGenerator(result)
     858          filter = XMLFilterBase()
     859          filter.setContentHandler(gen)
     860  
     861          filter.startDocument()
     862          filter.startElement("doc", {})
     863          filter.characters("content")
     864          filter.ignorableWhitespace(" ")
     865          filter.endElement("doc")
     866          filter.endDocument()
     867  
     868          self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
     869  
     870  # ===========================================================================
     871  #
     872  #   expatreader tests
     873  #
     874  # ===========================================================================
     875  
     876  with open(TEST_XMLFILE_OUT, 'rb') as f:
     877      xml_test_out = f.read()
     878  
     879  class ESC[4;38;5;81mExpatReaderTest(ESC[4;38;5;149mXmlTestBase):
     880  
     881      # ===== XMLReader support
     882  
     883      def test_expat_binary_file(self):
     884          parser = create_parser()
     885          result = BytesIO()
     886          xmlgen = XMLGenerator(result)
     887  
     888          parser.setContentHandler(xmlgen)
     889          with open(TEST_XMLFILE, 'rb') as f:
     890              parser.parse(f)
     891  
     892          self.assertEqual(result.getvalue(), xml_test_out)
     893  
     894      def test_expat_text_file(self):
     895          parser = create_parser()
     896          result = BytesIO()
     897          xmlgen = XMLGenerator(result)
     898  
     899          parser.setContentHandler(xmlgen)
     900          with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
     901              parser.parse(f)
     902  
     903          self.assertEqual(result.getvalue(), xml_test_out)
     904  
     905      @requires_nonascii_filenames
     906      def test_expat_binary_file_nonascii(self):
     907          fname = os_helper.TESTFN_UNICODE
     908          shutil.copyfile(TEST_XMLFILE, fname)
     909          self.addCleanup(os_helper.unlink, fname)
     910  
     911          parser = create_parser()
     912          result = BytesIO()
     913          xmlgen = XMLGenerator(result)
     914  
     915          parser.setContentHandler(xmlgen)
     916          parser.parse(open(fname, 'rb'))
     917  
     918          self.assertEqual(result.getvalue(), xml_test_out)
     919  
     920      def test_expat_binary_file_bytes_name(self):
     921          fname = os.fsencode(TEST_XMLFILE)
     922          parser = create_parser()
     923          result = BytesIO()
     924          xmlgen = XMLGenerator(result)
     925  
     926          parser.setContentHandler(xmlgen)
     927          with open(fname, 'rb') as f:
     928              parser.parse(f)
     929  
     930          self.assertEqual(result.getvalue(), xml_test_out)
     931  
     932      def test_expat_binary_file_int_name(self):
     933          parser = create_parser()
     934          result = BytesIO()
     935          xmlgen = XMLGenerator(result)
     936  
     937          parser.setContentHandler(xmlgen)
     938          with open(TEST_XMLFILE, 'rb') as f:
     939              with open(f.fileno(), 'rb', closefd=False) as f2:
     940                  parser.parse(f2)
     941  
     942          self.assertEqual(result.getvalue(), xml_test_out)
     943  
     944      # ===== DTDHandler support
     945  
     946      class ESC[4;38;5;81mTestDTDHandler:
     947  
     948          def __init__(self):
     949              self._notations = []
     950              self._entities  = []
     951  
     952          def notationDecl(self, name, publicId, systemId):
     953              self._notations.append((name, publicId, systemId))
     954  
     955          def unparsedEntityDecl(self, name, publicId, systemId, ndata):
     956              self._entities.append((name, publicId, systemId, ndata))
     957  
     958  
     959      class ESC[4;38;5;81mTestEntityRecorder:
     960          def __init__(self):
     961              self.entities = []
     962  
     963          def resolveEntity(self, publicId, systemId):
     964              self.entities.append((publicId, systemId))
     965              source = InputSource()
     966              source.setPublicId(publicId)
     967              source.setSystemId(systemId)
     968              return source
     969  
     970      def test_expat_dtdhandler(self):
     971          parser = create_parser()
     972          handler = self.TestDTDHandler()
     973          parser.setDTDHandler(handler)
     974  
     975          parser.feed('<!DOCTYPE doc [\n')
     976          parser.feed('  <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
     977          parser.feed('  <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
     978          parser.feed(']>\n')
     979          parser.feed('<doc></doc>')
     980          parser.close()
     981  
     982          self.assertEqual(handler._notations,
     983              [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
     984          self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
     985  
     986      def test_expat_external_dtd_enabled(self):
     987          # clear _opener global variable
     988          self.addCleanup(urllib.request.urlcleanup)
     989  
     990          parser = create_parser()
     991          parser.setFeature(feature_external_ges, True)
     992          resolver = self.TestEntityRecorder()
     993          parser.setEntityResolver(resolver)
     994  
     995          with self.assertRaises(URLError):
     996              parser.feed(
     997                  '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
     998              )
     999          self.assertEqual(
    1000              resolver.entities, [(None, 'unsupported://non-existing')]
    1001          )
    1002  
    1003      def test_expat_external_dtd_default(self):
    1004          parser = create_parser()
    1005          resolver = self.TestEntityRecorder()
    1006          parser.setEntityResolver(resolver)
    1007  
    1008          parser.feed(
    1009              '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
    1010          )
    1011          parser.feed('<doc />')
    1012          parser.close()
    1013          self.assertEqual(resolver.entities, [])
    1014  
    1015      # ===== EntityResolver support
    1016  
    1017      class ESC[4;38;5;81mTestEntityResolver:
    1018  
    1019          def resolveEntity(self, publicId, systemId):
    1020              inpsrc = InputSource()
    1021              inpsrc.setByteStream(BytesIO(b"<entity/>"))
    1022              return inpsrc
    1023  
    1024      def test_expat_entityresolver_enabled(self):
    1025          parser = create_parser()
    1026          parser.setFeature(feature_external_ges, True)
    1027          parser.setEntityResolver(self.TestEntityResolver())
    1028          result = BytesIO()
    1029          parser.setContentHandler(XMLGenerator(result))
    1030  
    1031          parser.feed('<!DOCTYPE doc [\n')
    1032          parser.feed('  <!ENTITY test SYSTEM "whatever">\n')
    1033          parser.feed(']>\n')
    1034          parser.feed('<doc>&test;</doc>')
    1035          parser.close()
    1036  
    1037          self.assertEqual(result.getvalue(), start +
    1038                           b"<doc><entity></entity></doc>")
    1039  
    1040      def test_expat_entityresolver_default(self):
    1041          parser = create_parser()
    1042          self.assertEqual(parser.getFeature(feature_external_ges), False)
    1043          parser.setEntityResolver(self.TestEntityResolver())
    1044          result = BytesIO()
    1045          parser.setContentHandler(XMLGenerator(result))
    1046  
    1047          parser.feed('<!DOCTYPE doc [\n')
    1048          parser.feed('  <!ENTITY test SYSTEM "whatever">\n')
    1049          parser.feed(']>\n')
    1050          parser.feed('<doc>&test;</doc>')
    1051          parser.close()
    1052  
    1053          self.assertEqual(result.getvalue(), start +
    1054                           b"<doc></doc>")
    1055  
    1056      # ===== Attributes support
    1057  
    1058      class ESC[4;38;5;81mAttrGatherer(ESC[4;38;5;149mContentHandler):
    1059  
    1060          def startElement(self, name, attrs):
    1061              self._attrs = attrs
    1062  
    1063          def startElementNS(self, name, qname, attrs):
    1064              self._attrs = attrs
    1065  
    1066      def test_expat_attrs_empty(self):
    1067          parser = create_parser()
    1068          gather = self.AttrGatherer()
    1069          parser.setContentHandler(gather)
    1070  
    1071          parser.feed("<doc/>")
    1072          parser.close()
    1073  
    1074          self.verify_empty_attrs(gather._attrs)
    1075  
    1076      def test_expat_attrs_wattr(self):
    1077          parser = create_parser()
    1078          gather = self.AttrGatherer()
    1079          parser.setContentHandler(gather)
    1080  
    1081          parser.feed("<doc attr='val'/>")
    1082          parser.close()
    1083  
    1084          self.verify_attrs_wattr(gather._attrs)
    1085  
    1086      def test_expat_nsattrs_empty(self):
    1087          parser = create_parser(1)
    1088          gather = self.AttrGatherer()
    1089          parser.setContentHandler(gather)
    1090  
    1091          parser.feed("<doc/>")
    1092          parser.close()
    1093  
    1094          self.verify_empty_nsattrs(gather._attrs)
    1095  
    1096      def test_expat_nsattrs_wattr(self):
    1097          parser = create_parser(1)
    1098          gather = self.AttrGatherer()
    1099          parser.setContentHandler(gather)
    1100  
    1101          parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
    1102          parser.close()
    1103  
    1104          attrs = gather._attrs
    1105  
    1106          self.assertEqual(attrs.getLength(), 1)
    1107          self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
    1108          self.assertTrue((attrs.getQNames() == [] or
    1109                           attrs.getQNames() == ["ns:attr"]))
    1110          self.assertEqual(len(attrs), 1)
    1111          self.assertIn((ns_uri, "attr"), attrs)
    1112          self.assertEqual(attrs.get((ns_uri, "attr")), "val")
    1113          self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
    1114          self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
    1115          self.assertEqual(list(attrs.values()), ["val"])
    1116          self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
    1117          self.assertEqual(attrs[(ns_uri, "attr")], "val")
    1118  
    1119      # ===== InputSource support
    1120  
    1121      def test_expat_inpsource_filename(self):
    1122          parser = create_parser()
    1123          result = BytesIO()
    1124          xmlgen = XMLGenerator(result)
    1125  
    1126          parser.setContentHandler(xmlgen)
    1127          parser.parse(TEST_XMLFILE)
    1128  
    1129          self.assertEqual(result.getvalue(), xml_test_out)
    1130  
    1131      def test_expat_inpsource_sysid(self):
    1132          parser = create_parser()
    1133          result = BytesIO()
    1134          xmlgen = XMLGenerator(result)
    1135  
    1136          parser.setContentHandler(xmlgen)
    1137          parser.parse(InputSource(TEST_XMLFILE))
    1138  
    1139          self.assertEqual(result.getvalue(), xml_test_out)
    1140  
    1141      @requires_nonascii_filenames
    1142      def test_expat_inpsource_sysid_nonascii(self):
    1143          fname = os_helper.TESTFN_UNICODE
    1144          shutil.copyfile(TEST_XMLFILE, fname)
    1145          self.addCleanup(os_helper.unlink, fname)
    1146  
    1147          parser = create_parser()
    1148          result = BytesIO()
    1149          xmlgen = XMLGenerator(result)
    1150  
    1151          parser.setContentHandler(xmlgen)
    1152          parser.parse(InputSource(fname))
    1153  
    1154          self.assertEqual(result.getvalue(), xml_test_out)
    1155  
    1156      def test_expat_inpsource_byte_stream(self):
    1157          parser = create_parser()
    1158          result = BytesIO()
    1159          xmlgen = XMLGenerator(result)
    1160  
    1161          parser.setContentHandler(xmlgen)
    1162          inpsrc = InputSource()
    1163          with open(TEST_XMLFILE, 'rb') as f:
    1164              inpsrc.setByteStream(f)
    1165              parser.parse(inpsrc)
    1166  
    1167          self.assertEqual(result.getvalue(), xml_test_out)
    1168  
    1169      def test_expat_inpsource_character_stream(self):
    1170          parser = create_parser()
    1171          result = BytesIO()
    1172          xmlgen = XMLGenerator(result)
    1173  
    1174          parser.setContentHandler(xmlgen)
    1175          inpsrc = InputSource()
    1176          with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
    1177              inpsrc.setCharacterStream(f)
    1178              parser.parse(inpsrc)
    1179  
    1180          self.assertEqual(result.getvalue(), xml_test_out)
    1181  
    1182      # ===== IncrementalParser support
    1183  
    1184      def test_expat_incremental(self):
    1185          result = BytesIO()
    1186          xmlgen = XMLGenerator(result)
    1187          parser = create_parser()
    1188          parser.setContentHandler(xmlgen)
    1189  
    1190          parser.feed("<doc>")
    1191          parser.feed("</doc>")
    1192          parser.close()
    1193  
    1194          self.assertEqual(result.getvalue(), start + b"<doc></doc>")
    1195  
    1196      def test_expat_incremental_reset(self):
    1197          result = BytesIO()
    1198          xmlgen = XMLGenerator(result)
    1199          parser = create_parser()
    1200          parser.setContentHandler(xmlgen)
    1201  
    1202          parser.feed("<doc>")
    1203          parser.feed("text")
    1204  
    1205          result = BytesIO()
    1206          xmlgen = XMLGenerator(result)
    1207          parser.setContentHandler(xmlgen)
    1208          parser.reset()
    1209  
    1210          parser.feed("<doc>")
    1211          parser.feed("text")
    1212          parser.feed("</doc>")
    1213          parser.close()
    1214  
    1215          self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
    1216  
    1217      # ===== Locator support
    1218  
    1219      def test_expat_locator_noinfo(self):
    1220          result = BytesIO()
    1221          xmlgen = XMLGenerator(result)
    1222          parser = create_parser()
    1223          parser.setContentHandler(xmlgen)
    1224  
    1225          parser.feed("<doc>")
    1226          parser.feed("</doc>")
    1227          parser.close()
    1228  
    1229          self.assertEqual(parser.getSystemId(), None)
    1230          self.assertEqual(parser.getPublicId(), None)
    1231          self.assertEqual(parser.getLineNumber(), 1)
    1232  
    1233      def test_expat_locator_withinfo(self):
    1234          result = BytesIO()
    1235          xmlgen = XMLGenerator(result)
    1236          parser = create_parser()
    1237          parser.setContentHandler(xmlgen)
    1238          parser.parse(TEST_XMLFILE)
    1239  
    1240          self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
    1241          self.assertEqual(parser.getPublicId(), None)
    1242  
    1243      @requires_nonascii_filenames
    1244      def test_expat_locator_withinfo_nonascii(self):
    1245          fname = os_helper.TESTFN_UNICODE
    1246          shutil.copyfile(TEST_XMLFILE, fname)
    1247          self.addCleanup(os_helper.unlink, fname)
    1248  
    1249          result = BytesIO()
    1250          xmlgen = XMLGenerator(result)
    1251          parser = create_parser()
    1252          parser.setContentHandler(xmlgen)
    1253          parser.parse(fname)
    1254  
    1255          self.assertEqual(parser.getSystemId(), fname)
    1256          self.assertEqual(parser.getPublicId(), None)
    1257  
    1258  
    1259  # ===========================================================================
    1260  #
    1261  #   error reporting
    1262  #
    1263  # ===========================================================================
    1264  
    1265  class ESC[4;38;5;81mErrorReportingTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1266      def test_expat_inpsource_location(self):
    1267          parser = create_parser()
    1268          parser.setContentHandler(ContentHandler()) # do nothing
    1269          source = InputSource()
    1270          source.setByteStream(BytesIO(b"<foo bar foobar>"))   #ill-formed
    1271          name = "a file name"
    1272          source.setSystemId(name)
    1273          try:
    1274              parser.parse(source)
    1275              self.fail()
    1276          except SAXException as e:
    1277              self.assertEqual(e.getSystemId(), name)
    1278  
    1279      def test_expat_incomplete(self):
    1280          parser = create_parser()
    1281          parser.setContentHandler(ContentHandler()) # do nothing
    1282          self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
    1283          self.assertEqual(parser.getColumnNumber(), 5)
    1284          self.assertEqual(parser.getLineNumber(), 1)
    1285  
    1286      def test_sax_parse_exception_str(self):
    1287          # pass various values from a locator to the SAXParseException to
    1288          # make sure that the __str__() doesn't fall apart when None is
    1289          # passed instead of an integer line and column number
    1290          #
    1291          # use "normal" values for the locator:
    1292          str(SAXParseException("message", None,
    1293                                self.DummyLocator(1, 1)))
    1294          # use None for the line number:
    1295          str(SAXParseException("message", None,
    1296                                self.DummyLocator(None, 1)))
    1297          # use None for the column number:
    1298          str(SAXParseException("message", None,
    1299                                self.DummyLocator(1, None)))
    1300          # use None for both:
    1301          str(SAXParseException("message", None,
    1302                                self.DummyLocator(None, None)))
    1303  
    1304      class ESC[4;38;5;81mDummyLocator:
    1305          def __init__(self, lineno, colno):
    1306              self._lineno = lineno
    1307              self._colno = colno
    1308  
    1309          def getPublicId(self):
    1310              return "pubid"
    1311  
    1312          def getSystemId(self):
    1313              return "sysid"
    1314  
    1315          def getLineNumber(self):
    1316              return self._lineno
    1317  
    1318          def getColumnNumber(self):
    1319              return self._colno
    1320  
    1321  # ===========================================================================
    1322  #
    1323  #   xmlreader tests
    1324  #
    1325  # ===========================================================================
    1326  
    1327  class ESC[4;38;5;81mXmlReaderTest(ESC[4;38;5;149mXmlTestBase):
    1328  
    1329      # ===== AttributesImpl
    1330      def test_attrs_empty(self):
    1331          self.verify_empty_attrs(AttributesImpl({}))
    1332  
    1333      def test_attrs_wattr(self):
    1334          self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
    1335  
    1336      def test_nsattrs_empty(self):
    1337          self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
    1338  
    1339      def test_nsattrs_wattr(self):
    1340          attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
    1341                                   {(ns_uri, "attr") : "ns:attr"})
    1342  
    1343          self.assertEqual(attrs.getLength(), 1)
    1344          self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
    1345          self.assertEqual(attrs.getQNames(), ["ns:attr"])
    1346          self.assertEqual(len(attrs), 1)
    1347          self.assertIn((ns_uri, "attr"), attrs)
    1348          self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
    1349          self.assertEqual(attrs.get((ns_uri, "attr")), "val")
    1350          self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
    1351          self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
    1352          self.assertEqual(list(attrs.values()), ["val"])
    1353          self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
    1354          self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
    1355          self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
    1356          self.assertEqual(attrs[(ns_uri, "attr")], "val")
    1357          self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
    1358  
    1359  
    1360  class ESC[4;38;5;81mLexicalHandlerTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1361      def setUp(self):
    1362          self.parser = None
    1363  
    1364          self.specified_version = '1.0'
    1365          self.specified_encoding = 'UTF-8'
    1366          self.specified_doctype = 'wish'
    1367          self.specified_entity_names = ('nbsp', 'source', 'target')
    1368          self.specified_comment = ('Comment in a DTD',
    1369                                    'Really! You think so?')
    1370          self.test_data = StringIO()
    1371          self.test_data.write('<?xml version="{}" encoding="{}"?>\n'.
    1372                               format(self.specified_version,
    1373                                      self.specified_encoding))
    1374          self.test_data.write('<!DOCTYPE {} [\n'.
    1375                               format(self.specified_doctype))
    1376          self.test_data.write('<!-- {} -->\n'.
    1377                               format(self.specified_comment[0]))
    1378          self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'.
    1379                               format(self.specified_doctype))
    1380          self.test_data.write('<!ELEMENT to (#PCDATA)>\n')
    1381          self.test_data.write('<!ELEMENT from (#PCDATA)>\n')
    1382          self.test_data.write('<!ELEMENT heading (#PCDATA)>\n')
    1383          self.test_data.write('<!ELEMENT body (#PCDATA)>\n')
    1384          self.test_data.write('<!ELEMENT footer (#PCDATA)>\n')
    1385          self.test_data.write('<!ENTITY {} "&#xA0;">\n'.
    1386                               format(self.specified_entity_names[0]))
    1387          self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'.
    1388                               format(self.specified_entity_names[1]))
    1389          self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'.
    1390                               format(self.specified_entity_names[2]))
    1391          self.test_data.write(']>\n')
    1392          self.test_data.write('<{}>'.format(self.specified_doctype))
    1393          self.test_data.write('<to>Aristotle</to>\n')
    1394          self.test_data.write('<from>Alexander</from>\n')
    1395          self.test_data.write('<heading>Supplication</heading>\n')
    1396          self.test_data.write('<body>Teach me patience!</body>\n')
    1397          self.test_data.write('<footer>&{};&{};&{};</footer>\n'.
    1398                               format(self.specified_entity_names[1],
    1399                                      self.specified_entity_names[0],
    1400                                      self.specified_entity_names[2]))
    1401          self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1]))
    1402          self.test_data.write('</{}>\n'.format(self.specified_doctype))
    1403          self.test_data.seek(0)
    1404  
    1405          # Data received from handlers - to be validated
    1406          self.version = None
    1407          self.encoding = None
    1408          self.standalone = None
    1409          self.doctype = None
    1410          self.publicID = None
    1411          self.systemID = None
    1412          self.end_of_dtd = False
    1413          self.comments = []
    1414  
    1415      def test_handlers(self):
    1416          class ESC[4;38;5;81mTestLexicalHandler(ESC[4;38;5;149mLexicalHandler):
    1417              def __init__(self, test_harness, *args, **kwargs):
    1418                  super().__init__(*args, **kwargs)
    1419                  self.test_harness = test_harness
    1420  
    1421              def startDTD(self, doctype, publicID, systemID):
    1422                  self.test_harness.doctype = doctype
    1423                  self.test_harness.publicID = publicID
    1424                  self.test_harness.systemID = systemID
    1425  
    1426              def endDTD(self):
    1427                  self.test_harness.end_of_dtd = True
    1428  
    1429              def comment(self, text):
    1430                  self.test_harness.comments.append(text)
    1431  
    1432          self.parser = create_parser()
    1433          self.parser.setContentHandler(ContentHandler())
    1434          self.parser.setProperty(
    1435              'http://xml.org/sax/properties/lexical-handler',
    1436              TestLexicalHandler(self))
    1437          source = InputSource()
    1438          source.setCharacterStream(self.test_data)
    1439          self.parser.parse(source)
    1440          self.assertEqual(self.doctype, self.specified_doctype)
    1441          self.assertIsNone(self.publicID)
    1442          self.assertIsNone(self.systemID)
    1443          self.assertTrue(self.end_of_dtd)
    1444          self.assertEqual(len(self.comments),
    1445                           len(self.specified_comment))
    1446          self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
    1447  
    1448  
    1449  class ESC[4;38;5;81mCDATAHandlerTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1450      def setUp(self):
    1451          self.parser = None
    1452          self.specified_chars = []
    1453          self.specified_chars.append(('Parseable character data', False))
    1454          self.specified_chars.append(('<> &% - assorted other XML junk.', True))
    1455          self.char_index = 0  # Used to index specified results within handlers
    1456          self.test_data = StringIO()
    1457          self.test_data.write('<root_doc>\n')
    1458          self.test_data.write('<some_pcdata>\n')
    1459          self.test_data.write(f'{self.specified_chars[0][0]}\n')
    1460          self.test_data.write('</some_pcdata>\n')
    1461          self.test_data.write('<some_cdata>\n')
    1462          self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n')
    1463          self.test_data.write('</some_cdata>\n')
    1464          self.test_data.write('</root_doc>\n')
    1465          self.test_data.seek(0)
    1466  
    1467          # Data received from handlers - to be validated
    1468          self.chardata = []
    1469          self.in_cdata = False
    1470  
    1471      def test_handlers(self):
    1472          class ESC[4;38;5;81mTestLexicalHandler(ESC[4;38;5;149mLexicalHandler):
    1473              def __init__(self, test_harness, *args, **kwargs):
    1474                  super().__init__(*args, **kwargs)
    1475                  self.test_harness = test_harness
    1476  
    1477              def startCDATA(self):
    1478                  self.test_harness.in_cdata = True
    1479  
    1480              def endCDATA(self):
    1481                  self.test_harness.in_cdata = False
    1482  
    1483          class ESC[4;38;5;81mTestCharHandler(ESC[4;38;5;149mContentHandler):
    1484              def __init__(self, test_harness, *args, **kwargs):
    1485                  super().__init__(*args, **kwargs)
    1486                  self.test_harness = test_harness
    1487  
    1488              def characters(self, content):
    1489                  if content != '\n':
    1490                      h = self.test_harness
    1491                      t = h.specified_chars[h.char_index]
    1492                      h.assertEqual(t[0], content)
    1493                      h.assertEqual(t[1], h.in_cdata)
    1494                      h.char_index += 1
    1495  
    1496          self.parser = create_parser()
    1497          self.parser.setContentHandler(TestCharHandler(self))
    1498          self.parser.setProperty(
    1499              'http://xml.org/sax/properties/lexical-handler',
    1500              TestLexicalHandler(self))
    1501          source = InputSource()
    1502          source.setCharacterStream(self.test_data)
    1503          self.parser.parse(source)
    1504  
    1505          self.assertFalse(self.in_cdata)
    1506          self.assertEqual(self.char_index, 2)
    1507  
    1508  
    1509  if __name__ == "__main__":
    1510      unittest.main()