(root)/
Python-3.11.7/
Lib/
test/
test_pyexpat.py
       1  # XXX TypeErrors on calling handlers, or on bad return values from a
       2  # handler, are obscure and unhelpful.
       3  
       4  import os
       5  import platform
       6  import sys
       7  import sysconfig
       8  import unittest
       9  import traceback
      10  from io import BytesIO
      11  from test import support
      12  from test.support import os_helper
      13  
      14  from xml.parsers import expat
      15  from xml.parsers.expat import errors
      16  
      17  from test.support import sortdict, is_emscripten, is_wasi
      18  
      19  
      20  class ESC[4;38;5;81mSetAttributeTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      21      def setUp(self):
      22          self.parser = expat.ParserCreate(namespace_separator='!')
      23  
      24      def test_buffer_text(self):
      25          self.assertIs(self.parser.buffer_text, False)
      26          for x in 0, 1, 2, 0:
      27              self.parser.buffer_text = x
      28              self.assertIs(self.parser.buffer_text, bool(x))
      29  
      30      def test_namespace_prefixes(self):
      31          self.assertIs(self.parser.namespace_prefixes, False)
      32          for x in 0, 1, 2, 0:
      33              self.parser.namespace_prefixes = x
      34              self.assertIs(self.parser.namespace_prefixes, bool(x))
      35  
      36      def test_ordered_attributes(self):
      37          self.assertIs(self.parser.ordered_attributes, False)
      38          for x in 0, 1, 2, 0:
      39              self.parser.ordered_attributes = x
      40              self.assertIs(self.parser.ordered_attributes, bool(x))
      41  
      42      def test_specified_attributes(self):
      43          self.assertIs(self.parser.specified_attributes, False)
      44          for x in 0, 1, 2, 0:
      45              self.parser.specified_attributes = x
      46              self.assertIs(self.parser.specified_attributes, bool(x))
      47  
      48      def test_invalid_attributes(self):
      49          with self.assertRaises(AttributeError):
      50              self.parser.returns_unicode = 1
      51          with self.assertRaises(AttributeError):
      52              self.parser.returns_unicode
      53  
      54          # Issue #25019
      55          self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
      56          self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
      57          self.assertRaises(TypeError, getattr, self.parser, range(0xF))
      58  
      59  
      60  data = b'''\
      61  <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
      62  <?xml-stylesheet href="stylesheet.css"?>
      63  <!-- comment data -->
      64  <!DOCTYPE quotations SYSTEM "quotations.dtd" [
      65  <!ELEMENT root ANY>
      66  <!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
      67  <!NOTATION notation SYSTEM "notation.jpeg">
      68  <!ENTITY acirc "&#226;">
      69  <!ENTITY external_entity SYSTEM "entity.file">
      70  <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
      71  %unparsed_entity;
      72  ]>
      73  
      74  <root attr1="value1" attr2="value2&#8000;">
      75  <myns:subelement xmlns:myns="http://www.python.org/namespace">
      76       Contents of subelements
      77  </myns:subelement>
      78  <sub2><![CDATA[contents of CDATA section]]></sub2>
      79  &external_entity;
      80  &skipped_entity;
      81  \xb5
      82  </root>
      83  '''
      84  
      85  
      86  # Produce UTF-8 output
      87  class ESC[4;38;5;81mParseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      88      class ESC[4;38;5;81mOutputter:
      89          def __init__(self):
      90              self.out = []
      91  
      92          def StartElementHandler(self, name, attrs):
      93              self.out.append('Start element: ' + repr(name) + ' ' +
      94                              sortdict(attrs))
      95  
      96          def EndElementHandler(self, name):
      97              self.out.append('End element: ' + repr(name))
      98  
      99          def CharacterDataHandler(self, data):
     100              data = data.strip()
     101              if data:
     102                  self.out.append('Character data: ' + repr(data))
     103  
     104          def ProcessingInstructionHandler(self, target, data):
     105              self.out.append('PI: ' + repr(target) + ' ' + repr(data))
     106  
     107          def StartNamespaceDeclHandler(self, prefix, uri):
     108              self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
     109  
     110          def EndNamespaceDeclHandler(self, prefix):
     111              self.out.append('End of NS decl: ' + repr(prefix))
     112  
     113          def StartCdataSectionHandler(self):
     114              self.out.append('Start of CDATA section')
     115  
     116          def EndCdataSectionHandler(self):
     117              self.out.append('End of CDATA section')
     118  
     119          def CommentHandler(self, text):
     120              self.out.append('Comment: ' + repr(text))
     121  
     122          def NotationDeclHandler(self, *args):
     123              name, base, sysid, pubid = args
     124              self.out.append('Notation declared: %s' %(args,))
     125  
     126          def UnparsedEntityDeclHandler(self, *args):
     127              entityName, base, systemId, publicId, notationName = args
     128              self.out.append('Unparsed entity decl: %s' %(args,))
     129  
     130          def NotStandaloneHandler(self):
     131              self.out.append('Not standalone')
     132              return 1
     133  
     134          def ExternalEntityRefHandler(self, *args):
     135              context, base, sysId, pubId = args
     136              self.out.append('External entity ref: %s' %(args[1:],))
     137              return 1
     138  
     139          def StartDoctypeDeclHandler(self, *args):
     140              self.out.append(('Start doctype', args))
     141              return 1
     142  
     143          def EndDoctypeDeclHandler(self):
     144              self.out.append("End doctype")
     145              return 1
     146  
     147          def EntityDeclHandler(self, *args):
     148              self.out.append(('Entity declaration', args))
     149              return 1
     150  
     151          def XmlDeclHandler(self, *args):
     152              self.out.append(('XML declaration', args))
     153              return 1
     154  
     155          def ElementDeclHandler(self, *args):
     156              self.out.append(('Element declaration', args))
     157              return 1
     158  
     159          def AttlistDeclHandler(self, *args):
     160              self.out.append(('Attribute list declaration', args))
     161              return 1
     162  
     163          def SkippedEntityHandler(self, *args):
     164              self.out.append(("Skipped entity", args))
     165              return 1
     166  
     167          def DefaultHandler(self, userData):
     168              pass
     169  
     170          def DefaultHandlerExpand(self, userData):
     171              pass
     172  
     173      handler_names = [
     174          'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
     175          'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
     176          'NotationDeclHandler', 'StartNamespaceDeclHandler',
     177          'EndNamespaceDeclHandler', 'CommentHandler',
     178          'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
     179          'DefaultHandlerExpand', 'NotStandaloneHandler',
     180          'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
     181          'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
     182          'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
     183          ]
     184  
     185      def _hookup_callbacks(self, parser, handler):
     186          """
     187          Set each of the callbacks defined on handler and named in
     188          self.handler_names on the given parser.
     189          """
     190          for name in self.handler_names:
     191              setattr(parser, name, getattr(handler, name))
     192  
     193      def _verify_parse_output(self, operations):
     194          expected_operations = [
     195              ('XML declaration', ('1.0', 'iso-8859-1', 0)),
     196              'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
     197              "Comment: ' comment data '",
     198              "Not standalone",
     199              ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
     200              ('Element declaration', ('root', (2, 0, None, ()))),
     201              ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
     202                  1)),
     203              ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
     204                  0)),
     205              "Notation declared: ('notation', None, 'notation.jpeg', None)",
     206              ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
     207              ('Entity declaration', ('external_entity', 0, None, None,
     208                  'entity.file', None, None)),
     209              "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
     210              "Not standalone",
     211              "End doctype",
     212              "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
     213              "NS decl: 'myns' 'http://www.python.org/namespace'",
     214              "Start element: 'http://www.python.org/namespace!subelement' {}",
     215              "Character data: 'Contents of subelements'",
     216              "End element: 'http://www.python.org/namespace!subelement'",
     217              "End of NS decl: 'myns'",
     218              "Start element: 'sub2' {}",
     219              'Start of CDATA section',
     220              "Character data: 'contents of CDATA section'",
     221              'End of CDATA section',
     222              "End element: 'sub2'",
     223              "External entity ref: (None, 'entity.file', None)",
     224              ('Skipped entity', ('skipped_entity', 0)),
     225              "Character data: '\xb5'",
     226              "End element: 'root'",
     227          ]
     228          for operation, expected_operation in zip(operations, expected_operations):
     229              self.assertEqual(operation, expected_operation)
     230  
     231      def test_parse_bytes(self):
     232          out = self.Outputter()
     233          parser = expat.ParserCreate(namespace_separator='!')
     234          self._hookup_callbacks(parser, out)
     235  
     236          parser.Parse(data, True)
     237  
     238          operations = out.out
     239          self._verify_parse_output(operations)
     240          # Issue #6697.
     241          self.assertRaises(AttributeError, getattr, parser, '\uD800')
     242  
     243      def test_parse_str(self):
     244          out = self.Outputter()
     245          parser = expat.ParserCreate(namespace_separator='!')
     246          self._hookup_callbacks(parser, out)
     247  
     248          parser.Parse(data.decode('iso-8859-1'), True)
     249  
     250          operations = out.out
     251          self._verify_parse_output(operations)
     252  
     253      def test_parse_file(self):
     254          # Try parsing a file
     255          out = self.Outputter()
     256          parser = expat.ParserCreate(namespace_separator='!')
     257          self._hookup_callbacks(parser, out)
     258          file = BytesIO(data)
     259  
     260          parser.ParseFile(file)
     261  
     262          operations = out.out
     263          self._verify_parse_output(operations)
     264  
     265      def test_parse_again(self):
     266          parser = expat.ParserCreate()
     267          file = BytesIO(data)
     268          parser.ParseFile(file)
     269          # Issue 6676: ensure a meaningful exception is raised when attempting
     270          # to parse more than one XML document per xmlparser instance,
     271          # a limitation of the Expat library.
     272          with self.assertRaises(expat.error) as cm:
     273              parser.ParseFile(file)
     274          self.assertEqual(expat.ErrorString(cm.exception.code),
     275                            expat.errors.XML_ERROR_FINISHED)
     276  
     277  class ESC[4;38;5;81mNamespaceSeparatorTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     278      def test_legal(self):
     279          # Tests that make sure we get errors when the namespace_separator value
     280          # is illegal, and that we don't for good values:
     281          expat.ParserCreate()
     282          expat.ParserCreate(namespace_separator=None)
     283          expat.ParserCreate(namespace_separator=' ')
     284  
     285      def test_illegal(self):
     286          try:
     287              expat.ParserCreate(namespace_separator=42)
     288              self.fail()
     289          except TypeError as e:
     290              self.assertEqual(str(e),
     291                  "ParserCreate() argument 'namespace_separator' must be str or None, not int")
     292  
     293          try:
     294              expat.ParserCreate(namespace_separator='too long')
     295              self.fail()
     296          except ValueError as e:
     297              self.assertEqual(str(e),
     298                  'namespace_separator must be at most one character, omitted, or None')
     299  
     300      def test_zero_length(self):
     301          # ParserCreate() needs to accept a namespace_separator of zero length
     302          # to satisfy the requirements of RDF applications that are required
     303          # to simply glue together the namespace URI and the localname.  Though
     304          # considered a wart of the RDF specifications, it needs to be supported.
     305          #
     306          # See XML-SIG mailing list thread starting with
     307          # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
     308          #
     309          expat.ParserCreate(namespace_separator='') # too short
     310  
     311  
     312  class ESC[4;38;5;81mInterningTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     313      def test(self):
     314          # Test the interning machinery.
     315          p = expat.ParserCreate()
     316          L = []
     317          def collector(name, *args):
     318              L.append(name)
     319          p.StartElementHandler = collector
     320          p.EndElementHandler = collector
     321          p.Parse(b"<e> <e/> <e></e> </e>", True)
     322          tag = L[0]
     323          self.assertEqual(len(L), 6)
     324          for entry in L:
     325              # L should have the same string repeated over and over.
     326              self.assertTrue(tag is entry)
     327  
     328      def test_issue9402(self):
     329          # create an ExternalEntityParserCreate with buffer text
     330          class ESC[4;38;5;81mExternalOutputter:
     331              def __init__(self, parser):
     332                  self.parser = parser
     333                  self.parser_result = None
     334  
     335              def ExternalEntityRefHandler(self, context, base, sysId, pubId):
     336                  external_parser = self.parser.ExternalEntityParserCreate("")
     337                  self.parser_result = external_parser.Parse(b"", True)
     338                  return 1
     339  
     340          parser = expat.ParserCreate(namespace_separator='!')
     341          parser.buffer_text = 1
     342          out = ExternalOutputter(parser)
     343          parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
     344          parser.Parse(data, True)
     345          self.assertEqual(out.parser_result, 1)
     346  
     347  
     348  class ESC[4;38;5;81mBufferTextTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     349      def setUp(self):
     350          self.stuff = []
     351          self.parser = expat.ParserCreate()
     352          self.parser.buffer_text = 1
     353          self.parser.CharacterDataHandler = self.CharacterDataHandler
     354  
     355      def check(self, expected, label):
     356          self.assertEqual(self.stuff, expected,
     357                  "%s\nstuff    = %r\nexpected = %r"
     358                  % (label, self.stuff, map(str, expected)))
     359  
     360      def CharacterDataHandler(self, text):
     361          self.stuff.append(text)
     362  
     363      def StartElementHandler(self, name, attrs):
     364          self.stuff.append("<%s>" % name)
     365          bt = attrs.get("buffer-text")
     366          if bt == "yes":
     367              self.parser.buffer_text = 1
     368          elif bt == "no":
     369              self.parser.buffer_text = 0
     370  
     371      def EndElementHandler(self, name):
     372          self.stuff.append("</%s>" % name)
     373  
     374      def CommentHandler(self, data):
     375          self.stuff.append("<!--%s-->" % data)
     376  
     377      def setHandlers(self, handlers=[]):
     378          for name in handlers:
     379              setattr(self.parser, name, getattr(self, name))
     380  
     381      def test_default_to_disabled(self):
     382          parser = expat.ParserCreate()
     383          self.assertFalse(parser.buffer_text)
     384  
     385      def test_buffering_enabled(self):
     386          # Make sure buffering is turned on
     387          self.assertTrue(self.parser.buffer_text)
     388          self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
     389          self.assertEqual(self.stuff, ['123'],
     390                           "buffered text not properly collapsed")
     391  
     392      def test1(self):
     393          # XXX This test exposes more detail of Expat's text chunking than we
     394          # XXX like, but it tests what we need to concisely.
     395          self.setHandlers(["StartElementHandler"])
     396          self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True)
     397          self.assertEqual(self.stuff,
     398                           ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
     399                           "buffering control not reacting as expected")
     400  
     401      def test2(self):
     402          self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", True)
     403          self.assertEqual(self.stuff, ["1<2> \n 3"],
     404                           "buffered text not properly collapsed")
     405  
     406      def test3(self):
     407          self.setHandlers(["StartElementHandler"])
     408          self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
     409          self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
     410                           "buffered text not properly split")
     411  
     412      def test4(self):
     413          self.setHandlers(["StartElementHandler", "EndElementHandler"])
     414          self.parser.CharacterDataHandler = None
     415          self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
     416          self.assertEqual(self.stuff,
     417                           ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
     418  
     419      def test5(self):
     420          self.setHandlers(["StartElementHandler", "EndElementHandler"])
     421          self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True)
     422          self.assertEqual(self.stuff,
     423              ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
     424  
     425      def test6(self):
     426          self.setHandlers(["CommentHandler", "EndElementHandler",
     427                      "StartElementHandler"])
     428          self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True)
     429          self.assertEqual(self.stuff,
     430              ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
     431              "buffered text not properly split")
     432  
     433      def test7(self):
     434          self.setHandlers(["CommentHandler", "EndElementHandler",
     435                      "StartElementHandler"])
     436          self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
     437          self.assertEqual(self.stuff,
     438                           ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
     439                            "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
     440                           "buffered text not properly split")
     441  
     442  
     443  # Test handling of exception from callback:
     444  class ESC[4;38;5;81mHandlerExceptionTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     445      def StartElementHandler(self, name, attrs):
     446          raise RuntimeError(f'StartElementHandler: <{name}>')
     447  
     448      def check_traceback_entry(self, entry, filename, funcname):
     449          self.assertEqual(os.path.basename(entry.filename), filename)
     450          self.assertEqual(entry.name, funcname)
     451  
     452      @support.cpython_only
     453      def test_exception(self):
     454          # gh-66652: test _PyTraceback_Add() used by pyexpat.c to inject frames
     455  
     456          # Change the current directory to the Python source code directory
     457          # if it is available.
     458          src_dir = sysconfig.get_config_var('abs_builddir')
     459          if src_dir:
     460              have_source = os.path.isdir(src_dir)
     461          else:
     462              have_source = False
     463          if have_source:
     464              with os_helper.change_cwd(src_dir):
     465                  self._test_exception(have_source)
     466          else:
     467              self._test_exception(have_source)
     468  
     469      def _test_exception(self, have_source):
     470          # Use path relative to the current directory which should be the Python
     471          # source code directory (if it is available).
     472          PYEXPAT_C = os.path.join('Modules', 'pyexpat.c')
     473  
     474          parser = expat.ParserCreate()
     475          parser.StartElementHandler = self.StartElementHandler
     476          try:
     477              parser.Parse(b"<a><b><c/></b></a>", True)
     478  
     479              self.fail("the parser did not raise RuntimeError")
     480          except RuntimeError as exc:
     481              self.assertEqual(exc.args[0], 'StartElementHandler: <a>', exc)
     482              entries = traceback.extract_tb(exc.__traceback__)
     483  
     484          self.assertEqual(len(entries), 3, entries)
     485          self.check_traceback_entry(entries[0],
     486                                     "test_pyexpat.py", "_test_exception")
     487          self.check_traceback_entry(entries[1],
     488                                     os.path.basename(PYEXPAT_C),
     489                                     "StartElement")
     490          self.check_traceback_entry(entries[2],
     491                                     "test_pyexpat.py", "StartElementHandler")
     492  
     493          # Check that the traceback contains the relevant line in
     494          # Modules/pyexpat.c. Skip the test if Modules/pyexpat.c is not
     495          # available.
     496          if have_source and os.path.exists(PYEXPAT_C):
     497              self.assertIn('call_with_frame("StartElement"',
     498                            entries[1].line)
     499  
     500  
     501  # Test Current* members:
     502  class ESC[4;38;5;81mPositionTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     503      def StartElementHandler(self, name, attrs):
     504          self.check_pos('s')
     505  
     506      def EndElementHandler(self, name):
     507          self.check_pos('e')
     508  
     509      def check_pos(self, event):
     510          pos = (event,
     511                 self.parser.CurrentByteIndex,
     512                 self.parser.CurrentLineNumber,
     513                 self.parser.CurrentColumnNumber)
     514          self.assertTrue(self.upto < len(self.expected_list),
     515                          'too many parser events')
     516          expected = self.expected_list[self.upto]
     517          self.assertEqual(pos, expected,
     518                  'Expected position %s, got position %s' %(pos, expected))
     519          self.upto += 1
     520  
     521      def test(self):
     522          self.parser = expat.ParserCreate()
     523          self.parser.StartElementHandler = self.StartElementHandler
     524          self.parser.EndElementHandler = self.EndElementHandler
     525          self.upto = 0
     526          self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
     527                                ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
     528  
     529          xml = b'<a>\n <b>\n  <c/>\n </b>\n</a>'
     530          self.parser.Parse(xml, True)
     531  
     532  
     533  class ESC[4;38;5;81msf1296433Test(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     534      def test_parse_only_xml_data(self):
     535          # http://python.org/sf/1296433
     536          #
     537          xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
     538          # this one doesn't crash
     539          #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
     540  
     541          class ESC[4;38;5;81mSpecificException(ESC[4;38;5;149mException):
     542              pass
     543  
     544          def handler(text):
     545              raise SpecificException
     546  
     547          parser = expat.ParserCreate()
     548          parser.CharacterDataHandler = handler
     549  
     550          self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
     551  
     552  class ESC[4;38;5;81mChardataBufferTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     553      """
     554      test setting of chardata buffer size
     555      """
     556  
     557      def test_1025_bytes(self):
     558          self.assertEqual(self.small_buffer_test(1025), 2)
     559  
     560      def test_1000_bytes(self):
     561          self.assertEqual(self.small_buffer_test(1000), 1)
     562  
     563      def test_wrong_size(self):
     564          parser = expat.ParserCreate()
     565          parser.buffer_text = 1
     566          with self.assertRaises(ValueError):
     567              parser.buffer_size = -1
     568          with self.assertRaises(ValueError):
     569              parser.buffer_size = 0
     570          with self.assertRaises((ValueError, OverflowError)):
     571              parser.buffer_size = sys.maxsize + 1
     572          with self.assertRaises(TypeError):
     573              parser.buffer_size = 512.0
     574  
     575      def test_unchanged_size(self):
     576          xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
     577          xml2 = b'a'*512 + b'</s>'
     578          parser = expat.ParserCreate()
     579          parser.CharacterDataHandler = self.counting_handler
     580          parser.buffer_size = 512
     581          parser.buffer_text = 1
     582  
     583          # Feed 512 bytes of character data: the handler should be called
     584          # once.
     585          self.n = 0
     586          parser.Parse(xml1)
     587          self.assertEqual(self.n, 1)
     588  
     589          # Reassign to buffer_size, but assign the same size.
     590          parser.buffer_size = parser.buffer_size
     591          self.assertEqual(self.n, 1)
     592  
     593          # Try parsing rest of the document
     594          parser.Parse(xml2)
     595          self.assertEqual(self.n, 2)
     596  
     597  
     598      def test_disabling_buffer(self):
     599          xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
     600          xml2 = b'b' * 1024
     601          xml3 = b'c' * 1024 + b'</a>';
     602          parser = expat.ParserCreate()
     603          parser.CharacterDataHandler = self.counting_handler
     604          parser.buffer_text = 1
     605          parser.buffer_size = 1024
     606          self.assertEqual(parser.buffer_size, 1024)
     607  
     608          # Parse one chunk of XML
     609          self.n = 0
     610          parser.Parse(xml1, False)
     611          self.assertEqual(parser.buffer_size, 1024)
     612          self.assertEqual(self.n, 1)
     613  
     614          # Turn off buffering and parse the next chunk.
     615          parser.buffer_text = 0
     616          self.assertFalse(parser.buffer_text)
     617          self.assertEqual(parser.buffer_size, 1024)
     618          for i in range(10):
     619              parser.Parse(xml2, False)
     620          self.assertEqual(self.n, 11)
     621  
     622          parser.buffer_text = 1
     623          self.assertTrue(parser.buffer_text)
     624          self.assertEqual(parser.buffer_size, 1024)
     625          parser.Parse(xml3, True)
     626          self.assertEqual(self.n, 12)
     627  
     628      def counting_handler(self, text):
     629          self.n += 1
     630  
     631      def small_buffer_test(self, buffer_len):
     632          xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
     633          parser = expat.ParserCreate()
     634          parser.CharacterDataHandler = self.counting_handler
     635          parser.buffer_size = 1024
     636          parser.buffer_text = 1
     637  
     638          self.n = 0
     639          parser.Parse(xml)
     640          return self.n
     641  
     642      def test_change_size_1(self):
     643          xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
     644          xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
     645          parser = expat.ParserCreate()
     646          parser.CharacterDataHandler = self.counting_handler
     647          parser.buffer_text = 1
     648          parser.buffer_size = 1024
     649          self.assertEqual(parser.buffer_size, 1024)
     650  
     651          self.n = 0
     652          parser.Parse(xml1, False)
     653          parser.buffer_size *= 2
     654          self.assertEqual(parser.buffer_size, 2048)
     655          parser.Parse(xml2, True)
     656          self.assertEqual(self.n, 2)
     657  
     658      def test_change_size_2(self):
     659          xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
     660          xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
     661          parser = expat.ParserCreate()
     662          parser.CharacterDataHandler = self.counting_handler
     663          parser.buffer_text = 1
     664          parser.buffer_size = 2048
     665          self.assertEqual(parser.buffer_size, 2048)
     666  
     667          self.n=0
     668          parser.Parse(xml1, False)
     669          parser.buffer_size = parser.buffer_size // 2
     670          self.assertEqual(parser.buffer_size, 1024)
     671          parser.Parse(xml2, True)
     672          self.assertEqual(self.n, 4)
     673  
     674  class ESC[4;38;5;81mMalformedInputTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     675      def test1(self):
     676          xml = b"\0\r\n"
     677          parser = expat.ParserCreate()
     678          try:
     679              parser.Parse(xml, True)
     680              self.fail()
     681          except expat.ExpatError as e:
     682              self.assertEqual(str(e), 'unclosed token: line 2, column 0')
     683  
     684      def test2(self):
     685          # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
     686          xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
     687          parser = expat.ParserCreate()
     688          err_pattern = r'XML declaration not well-formed: line 1, column \d+'
     689          with self.assertRaisesRegex(expat.ExpatError, err_pattern):
     690              parser.Parse(xml, True)
     691  
     692  class ESC[4;38;5;81mErrorMessageTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     693      def test_codes(self):
     694          # verify mapping of errors.codes and errors.messages
     695          self.assertEqual(errors.XML_ERROR_SYNTAX,
     696                           errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
     697  
     698      def test_expaterror(self):
     699          xml = b'<'
     700          parser = expat.ParserCreate()
     701          try:
     702              parser.Parse(xml, True)
     703              self.fail()
     704          except expat.ExpatError as e:
     705              self.assertEqual(e.code,
     706                               errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
     707  
     708  
     709  class ESC[4;38;5;81mForeignDTDTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     710      """
     711      Tests for the UseForeignDTD method of expat parser objects.
     712      """
     713      def test_use_foreign_dtd(self):
     714          """
     715          If UseForeignDTD is passed True and a document without an external
     716          entity reference is parsed, ExternalEntityRefHandler is first called
     717          with None for the public and system ids.
     718          """
     719          handler_call_args = []
     720          def resolve_entity(context, base, system_id, public_id):
     721              handler_call_args.append((public_id, system_id))
     722              return 1
     723  
     724          parser = expat.ParserCreate()
     725          parser.UseForeignDTD(True)
     726          parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
     727          parser.ExternalEntityRefHandler = resolve_entity
     728          parser.Parse(b"<?xml version='1.0'?><element/>")
     729          self.assertEqual(handler_call_args, [(None, None)])
     730  
     731          # test UseForeignDTD() is equal to UseForeignDTD(True)
     732          handler_call_args[:] = []
     733  
     734          parser = expat.ParserCreate()
     735          parser.UseForeignDTD()
     736          parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
     737          parser.ExternalEntityRefHandler = resolve_entity
     738          parser.Parse(b"<?xml version='1.0'?><element/>")
     739          self.assertEqual(handler_call_args, [(None, None)])
     740  
     741      def test_ignore_use_foreign_dtd(self):
     742          """
     743          If UseForeignDTD is passed True and a document with an external
     744          entity reference is parsed, ExternalEntityRefHandler is called with
     745          the public and system ids from the document.
     746          """
     747          handler_call_args = []
     748          def resolve_entity(context, base, system_id, public_id):
     749              handler_call_args.append((public_id, system_id))
     750              return 1
     751  
     752          parser = expat.ParserCreate()
     753          parser.UseForeignDTD(True)
     754          parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
     755          parser.ExternalEntityRefHandler = resolve_entity
     756          parser.Parse(
     757              b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
     758          self.assertEqual(handler_call_args, [("bar", "baz")])
     759  
     760  
     761  if __name__ == "__main__":
     762      unittest.main()