1 # XXX TypeErrors on calling handlers, or on bad return values from a
2 # handler, are obscure and unhelpful.
3
4 import os
5 import platform
6 import sys
7 import sysconfig
8 import unittest
9 import traceback
10 from io import BytesIO
11 from test import support
12 from test.support import os_helper
13
14 from xml.parsers import expat
15 from xml.parsers.expat import errors
16
17 from test.support import sortdict, is_emscripten, is_wasi
18
19
20 class ESC[4;38;5;81mSetAttributeTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
21 def setUp(self):
22 self.parser = expat.ParserCreate(namespace_separator='!')
23
24 def test_buffer_text(self):
25 self.assertIs(self.parser.buffer_text, False)
26 for x in 0, 1, 2, 0:
27 self.parser.buffer_text = x
28 self.assertIs(self.parser.buffer_text, bool(x))
29
30 def test_namespace_prefixes(self):
31 self.assertIs(self.parser.namespace_prefixes, False)
32 for x in 0, 1, 2, 0:
33 self.parser.namespace_prefixes = x
34 self.assertIs(self.parser.namespace_prefixes, bool(x))
35
36 def test_ordered_attributes(self):
37 self.assertIs(self.parser.ordered_attributes, False)
38 for x in 0, 1, 2, 0:
39 self.parser.ordered_attributes = x
40 self.assertIs(self.parser.ordered_attributes, bool(x))
41
42 def test_specified_attributes(self):
43 self.assertIs(self.parser.specified_attributes, False)
44 for x in 0, 1, 2, 0:
45 self.parser.specified_attributes = x
46 self.assertIs(self.parser.specified_attributes, bool(x))
47
48 def test_invalid_attributes(self):
49 with self.assertRaises(AttributeError):
50 self.parser.returns_unicode = 1
51 with self.assertRaises(AttributeError):
52 self.parser.returns_unicode
53
54 # Issue #25019
55 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
56 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
57 self.assertRaises(TypeError, getattr, self.parser, range(0xF))
58
59
60 data = b'''\
61 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
62 <?xml-stylesheet href="stylesheet.css"?>
63 <!-- comment data -->
64 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
65 <!ELEMENT root ANY>
66 <!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
67 <!NOTATION notation SYSTEM "notation.jpeg">
68 <!ENTITY acirc "â">
69 <!ENTITY external_entity SYSTEM "entity.file">
70 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
71 %unparsed_entity;
72 ]>
73
74 <root attr1="value1" attr2="value2ὀ">
75 <myns:subelement xmlns:myns="http://www.python.org/namespace">
76 Contents of subelements
77 </myns:subelement>
78 <sub2><![CDATA[contents of CDATA section]]></sub2>
79 &external_entity;
80 &skipped_entity;
81 \xb5
82 </root>
83 '''
84
85
86 # Produce UTF-8 output
87 class ESC[4;38;5;81mParseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
88 class ESC[4;38;5;81mOutputter:
89 def __init__(self):
90 self.out = []
91
92 def StartElementHandler(self, name, attrs):
93 self.out.append('Start element: ' + repr(name) + ' ' +
94 sortdict(attrs))
95
96 def EndElementHandler(self, name):
97 self.out.append('End element: ' + repr(name))
98
99 def CharacterDataHandler(self, data):
100 data = data.strip()
101 if data:
102 self.out.append('Character data: ' + repr(data))
103
104 def ProcessingInstructionHandler(self, target, data):
105 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
106
107 def StartNamespaceDeclHandler(self, prefix, uri):
108 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
109
110 def EndNamespaceDeclHandler(self, prefix):
111 self.out.append('End of NS decl: ' + repr(prefix))
112
113 def StartCdataSectionHandler(self):
114 self.out.append('Start of CDATA section')
115
116 def EndCdataSectionHandler(self):
117 self.out.append('End of CDATA section')
118
119 def CommentHandler(self, text):
120 self.out.append('Comment: ' + repr(text))
121
122 def NotationDeclHandler(self, *args):
123 name, base, sysid, pubid = args
124 self.out.append('Notation declared: %s' %(args,))
125
126 def UnparsedEntityDeclHandler(self, *args):
127 entityName, base, systemId, publicId, notationName = args
128 self.out.append('Unparsed entity decl: %s' %(args,))
129
130 def NotStandaloneHandler(self):
131 self.out.append('Not standalone')
132 return 1
133
134 def ExternalEntityRefHandler(self, *args):
135 context, base, sysId, pubId = args
136 self.out.append('External entity ref: %s' %(args[1:],))
137 return 1
138
139 def StartDoctypeDeclHandler(self, *args):
140 self.out.append(('Start doctype', args))
141 return 1
142
143 def EndDoctypeDeclHandler(self):
144 self.out.append("End doctype")
145 return 1
146
147 def EntityDeclHandler(self, *args):
148 self.out.append(('Entity declaration', args))
149 return 1
150
151 def XmlDeclHandler(self, *args):
152 self.out.append(('XML declaration', args))
153 return 1
154
155 def ElementDeclHandler(self, *args):
156 self.out.append(('Element declaration', args))
157 return 1
158
159 def AttlistDeclHandler(self, *args):
160 self.out.append(('Attribute list declaration', args))
161 return 1
162
163 def SkippedEntityHandler(self, *args):
164 self.out.append(("Skipped entity", args))
165 return 1
166
167 def DefaultHandler(self, userData):
168 pass
169
170 def DefaultHandlerExpand(self, userData):
171 pass
172
173 handler_names = [
174 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
175 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
176 'NotationDeclHandler', 'StartNamespaceDeclHandler',
177 'EndNamespaceDeclHandler', 'CommentHandler',
178 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
179 'DefaultHandlerExpand', 'NotStandaloneHandler',
180 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
181 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
182 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
183 ]
184
185 def _hookup_callbacks(self, parser, handler):
186 """
187 Set each of the callbacks defined on handler and named in
188 self.handler_names on the given parser.
189 """
190 for name in self.handler_names:
191 setattr(parser, name, getattr(handler, name))
192
193 def _verify_parse_output(self, operations):
194 expected_operations = [
195 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
196 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
197 "Comment: ' comment data '",
198 "Not standalone",
199 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
200 ('Element declaration', ('root', (2, 0, None, ()))),
201 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
202 1)),
203 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
204 0)),
205 "Notation declared: ('notation', None, 'notation.jpeg', None)",
206 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
207 ('Entity declaration', ('external_entity', 0, None, None,
208 'entity.file', None, None)),
209 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
210 "Not standalone",
211 "End doctype",
212 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
213 "NS decl: 'myns' 'http://www.python.org/namespace'",
214 "Start element: 'http://www.python.org/namespace!subelement' {}",
215 "Character data: 'Contents of subelements'",
216 "End element: 'http://www.python.org/namespace!subelement'",
217 "End of NS decl: 'myns'",
218 "Start element: 'sub2' {}",
219 'Start of CDATA section',
220 "Character data: 'contents of CDATA section'",
221 'End of CDATA section',
222 "End element: 'sub2'",
223 "External entity ref: (None, 'entity.file', None)",
224 ('Skipped entity', ('skipped_entity', 0)),
225 "Character data: '\xb5'",
226 "End element: 'root'",
227 ]
228 for operation, expected_operation in zip(operations, expected_operations):
229 self.assertEqual(operation, expected_operation)
230
231 def test_parse_bytes(self):
232 out = self.Outputter()
233 parser = expat.ParserCreate(namespace_separator='!')
234 self._hookup_callbacks(parser, out)
235
236 parser.Parse(data, True)
237
238 operations = out.out
239 self._verify_parse_output(operations)
240 # Issue #6697.
241 self.assertRaises(AttributeError, getattr, parser, '\uD800')
242
243 def test_parse_str(self):
244 out = self.Outputter()
245 parser = expat.ParserCreate(namespace_separator='!')
246 self._hookup_callbacks(parser, out)
247
248 parser.Parse(data.decode('iso-8859-1'), True)
249
250 operations = out.out
251 self._verify_parse_output(operations)
252
253 def test_parse_file(self):
254 # Try parsing a file
255 out = self.Outputter()
256 parser = expat.ParserCreate(namespace_separator='!')
257 self._hookup_callbacks(parser, out)
258 file = BytesIO(data)
259
260 parser.ParseFile(file)
261
262 operations = out.out
263 self._verify_parse_output(operations)
264
265 def test_parse_again(self):
266 parser = expat.ParserCreate()
267 file = BytesIO(data)
268 parser.ParseFile(file)
269 # Issue 6676: ensure a meaningful exception is raised when attempting
270 # to parse more than one XML document per xmlparser instance,
271 # a limitation of the Expat library.
272 with self.assertRaises(expat.error) as cm:
273 parser.ParseFile(file)
274 self.assertEqual(expat.ErrorString(cm.exception.code),
275 expat.errors.XML_ERROR_FINISHED)
276
277 class ESC[4;38;5;81mNamespaceSeparatorTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
278 def test_legal(self):
279 # Tests that make sure we get errors when the namespace_separator value
280 # is illegal, and that we don't for good values:
281 expat.ParserCreate()
282 expat.ParserCreate(namespace_separator=None)
283 expat.ParserCreate(namespace_separator=' ')
284
285 def test_illegal(self):
286 try:
287 expat.ParserCreate(namespace_separator=42)
288 self.fail()
289 except TypeError as e:
290 self.assertEqual(str(e),
291 "ParserCreate() argument 'namespace_separator' must be str or None, not int")
292
293 try:
294 expat.ParserCreate(namespace_separator='too long')
295 self.fail()
296 except ValueError as e:
297 self.assertEqual(str(e),
298 'namespace_separator must be at most one character, omitted, or None')
299
300 def test_zero_length(self):
301 # ParserCreate() needs to accept a namespace_separator of zero length
302 # to satisfy the requirements of RDF applications that are required
303 # to simply glue together the namespace URI and the localname. Though
304 # considered a wart of the RDF specifications, it needs to be supported.
305 #
306 # See XML-SIG mailing list thread starting with
307 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
308 #
309 expat.ParserCreate(namespace_separator='') # too short
310
311
312 class ESC[4;38;5;81mInterningTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
313 def test(self):
314 # Test the interning machinery.
315 p = expat.ParserCreate()
316 L = []
317 def collector(name, *args):
318 L.append(name)
319 p.StartElementHandler = collector
320 p.EndElementHandler = collector
321 p.Parse(b"<e> <e/> <e></e> </e>", True)
322 tag = L[0]
323 self.assertEqual(len(L), 6)
324 for entry in L:
325 # L should have the same string repeated over and over.
326 self.assertTrue(tag is entry)
327
328 def test_issue9402(self):
329 # create an ExternalEntityParserCreate with buffer text
330 class ESC[4;38;5;81mExternalOutputter:
331 def __init__(self, parser):
332 self.parser = parser
333 self.parser_result = None
334
335 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
336 external_parser = self.parser.ExternalEntityParserCreate("")
337 self.parser_result = external_parser.Parse(b"", True)
338 return 1
339
340 parser = expat.ParserCreate(namespace_separator='!')
341 parser.buffer_text = 1
342 out = ExternalOutputter(parser)
343 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
344 parser.Parse(data, True)
345 self.assertEqual(out.parser_result, 1)
346
347
348 class ESC[4;38;5;81mBufferTextTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
349 def setUp(self):
350 self.stuff = []
351 self.parser = expat.ParserCreate()
352 self.parser.buffer_text = 1
353 self.parser.CharacterDataHandler = self.CharacterDataHandler
354
355 def check(self, expected, label):
356 self.assertEqual(self.stuff, expected,
357 "%s\nstuff = %r\nexpected = %r"
358 % (label, self.stuff, map(str, expected)))
359
360 def CharacterDataHandler(self, text):
361 self.stuff.append(text)
362
363 def StartElementHandler(self, name, attrs):
364 self.stuff.append("<%s>" % name)
365 bt = attrs.get("buffer-text")
366 if bt == "yes":
367 self.parser.buffer_text = 1
368 elif bt == "no":
369 self.parser.buffer_text = 0
370
371 def EndElementHandler(self, name):
372 self.stuff.append("</%s>" % name)
373
374 def CommentHandler(self, data):
375 self.stuff.append("<!--%s-->" % data)
376
377 def setHandlers(self, handlers=[]):
378 for name in handlers:
379 setattr(self.parser, name, getattr(self, name))
380
381 def test_default_to_disabled(self):
382 parser = expat.ParserCreate()
383 self.assertFalse(parser.buffer_text)
384
385 def test_buffering_enabled(self):
386 # Make sure buffering is turned on
387 self.assertTrue(self.parser.buffer_text)
388 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
389 self.assertEqual(self.stuff, ['123'],
390 "buffered text not properly collapsed")
391
392 def test1(self):
393 # XXX This test exposes more detail of Expat's text chunking than we
394 # XXX like, but it tests what we need to concisely.
395 self.setHandlers(["StartElementHandler"])
396 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True)
397 self.assertEqual(self.stuff,
398 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
399 "buffering control not reacting as expected")
400
401 def test2(self):
402 self.parser.Parse(b"<a>1<b/><2><c/> \n 3</a>", True)
403 self.assertEqual(self.stuff, ["1<2> \n 3"],
404 "buffered text not properly collapsed")
405
406 def test3(self):
407 self.setHandlers(["StartElementHandler"])
408 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
409 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
410 "buffered text not properly split")
411
412 def test4(self):
413 self.setHandlers(["StartElementHandler", "EndElementHandler"])
414 self.parser.CharacterDataHandler = None
415 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
416 self.assertEqual(self.stuff,
417 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
418
419 def test5(self):
420 self.setHandlers(["StartElementHandler", "EndElementHandler"])
421 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True)
422 self.assertEqual(self.stuff,
423 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
424
425 def test6(self):
426 self.setHandlers(["CommentHandler", "EndElementHandler",
427 "StartElementHandler"])
428 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True)
429 self.assertEqual(self.stuff,
430 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
431 "buffered text not properly split")
432
433 def test7(self):
434 self.setHandlers(["CommentHandler", "EndElementHandler",
435 "StartElementHandler"])
436 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
437 self.assertEqual(self.stuff,
438 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
439 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
440 "buffered text not properly split")
441
442
443 # Test handling of exception from callback:
444 class ESC[4;38;5;81mHandlerExceptionTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
445 def StartElementHandler(self, name, attrs):
446 raise RuntimeError(f'StartElementHandler: <{name}>')
447
448 def check_traceback_entry(self, entry, filename, funcname):
449 self.assertEqual(os.path.basename(entry.filename), filename)
450 self.assertEqual(entry.name, funcname)
451
452 @support.cpython_only
453 def test_exception(self):
454 # gh-66652: test _PyTraceback_Add() used by pyexpat.c to inject frames
455
456 # Change the current directory to the Python source code directory
457 # if it is available.
458 src_dir = sysconfig.get_config_var('abs_builddir')
459 if src_dir:
460 have_source = os.path.isdir(src_dir)
461 else:
462 have_source = False
463 if have_source:
464 with os_helper.change_cwd(src_dir):
465 self._test_exception(have_source)
466 else:
467 self._test_exception(have_source)
468
469 def _test_exception(self, have_source):
470 # Use path relative to the current directory which should be the Python
471 # source code directory (if it is available).
472 PYEXPAT_C = os.path.join('Modules', 'pyexpat.c')
473
474 parser = expat.ParserCreate()
475 parser.StartElementHandler = self.StartElementHandler
476 try:
477 parser.Parse(b"<a><b><c/></b></a>", True)
478
479 self.fail("the parser did not raise RuntimeError")
480 except RuntimeError as exc:
481 self.assertEqual(exc.args[0], 'StartElementHandler: <a>', exc)
482 entries = traceback.extract_tb(exc.__traceback__)
483
484 self.assertEqual(len(entries), 3, entries)
485 self.check_traceback_entry(entries[0],
486 "test_pyexpat.py", "_test_exception")
487 self.check_traceback_entry(entries[1],
488 os.path.basename(PYEXPAT_C),
489 "StartElement")
490 self.check_traceback_entry(entries[2],
491 "test_pyexpat.py", "StartElementHandler")
492
493 # Check that the traceback contains the relevant line in
494 # Modules/pyexpat.c. Skip the test if Modules/pyexpat.c is not
495 # available.
496 if have_source and os.path.exists(PYEXPAT_C):
497 self.assertIn('call_with_frame("StartElement"',
498 entries[1].line)
499
500
501 # Test Current* members:
502 class ESC[4;38;5;81mPositionTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
503 def StartElementHandler(self, name, attrs):
504 self.check_pos('s')
505
506 def EndElementHandler(self, name):
507 self.check_pos('e')
508
509 def check_pos(self, event):
510 pos = (event,
511 self.parser.CurrentByteIndex,
512 self.parser.CurrentLineNumber,
513 self.parser.CurrentColumnNumber)
514 self.assertTrue(self.upto < len(self.expected_list),
515 'too many parser events')
516 expected = self.expected_list[self.upto]
517 self.assertEqual(pos, expected,
518 'Expected position %s, got position %s' %(pos, expected))
519 self.upto += 1
520
521 def test(self):
522 self.parser = expat.ParserCreate()
523 self.parser.StartElementHandler = self.StartElementHandler
524 self.parser.EndElementHandler = self.EndElementHandler
525 self.upto = 0
526 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
527 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
528
529 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
530 self.parser.Parse(xml, True)
531
532
533 class ESC[4;38;5;81msf1296433Test(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
534 def test_parse_only_xml_data(self):
535 # https://bugs.python.org/issue1296433
536 #
537 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
538 # this one doesn't crash
539 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
540
541 class ESC[4;38;5;81mSpecificException(ESC[4;38;5;149mException):
542 pass
543
544 def handler(text):
545 raise SpecificException
546
547 parser = expat.ParserCreate()
548 parser.CharacterDataHandler = handler
549
550 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
551
552 class ESC[4;38;5;81mChardataBufferTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
553 """
554 test setting of chardata buffer size
555 """
556
557 def test_1025_bytes(self):
558 self.assertEqual(self.small_buffer_test(1025), 2)
559
560 def test_1000_bytes(self):
561 self.assertEqual(self.small_buffer_test(1000), 1)
562
563 def test_wrong_size(self):
564 parser = expat.ParserCreate()
565 parser.buffer_text = 1
566 with self.assertRaises(ValueError):
567 parser.buffer_size = -1
568 with self.assertRaises(ValueError):
569 parser.buffer_size = 0
570 with self.assertRaises((ValueError, OverflowError)):
571 parser.buffer_size = sys.maxsize + 1
572 with self.assertRaises(TypeError):
573 parser.buffer_size = 512.0
574
575 def test_unchanged_size(self):
576 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
577 xml2 = b'a'*512 + b'</s>'
578 parser = expat.ParserCreate()
579 parser.CharacterDataHandler = self.counting_handler
580 parser.buffer_size = 512
581 parser.buffer_text = 1
582
583 # Feed 512 bytes of character data: the handler should be called
584 # once.
585 self.n = 0
586 parser.Parse(xml1)
587 self.assertEqual(self.n, 1)
588
589 # Reassign to buffer_size, but assign the same size.
590 parser.buffer_size = parser.buffer_size
591 self.assertEqual(self.n, 1)
592
593 # Try parsing rest of the document
594 parser.Parse(xml2)
595 self.assertEqual(self.n, 2)
596
597
598 def test_disabling_buffer(self):
599 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
600 xml2 = b'b' * 1024
601 xml3 = b'c' * 1024 + b'</a>';
602 parser = expat.ParserCreate()
603 parser.CharacterDataHandler = self.counting_handler
604 parser.buffer_text = 1
605 parser.buffer_size = 1024
606 self.assertEqual(parser.buffer_size, 1024)
607
608 # Parse one chunk of XML
609 self.n = 0
610 parser.Parse(xml1, False)
611 self.assertEqual(parser.buffer_size, 1024)
612 self.assertEqual(self.n, 1)
613
614 # Turn off buffering and parse the next chunk.
615 parser.buffer_text = 0
616 self.assertFalse(parser.buffer_text)
617 self.assertEqual(parser.buffer_size, 1024)
618 for i in range(10):
619 parser.Parse(xml2, False)
620 self.assertEqual(self.n, 11)
621
622 parser.buffer_text = 1
623 self.assertTrue(parser.buffer_text)
624 self.assertEqual(parser.buffer_size, 1024)
625 parser.Parse(xml3, True)
626 self.assertEqual(self.n, 12)
627
628 def counting_handler(self, text):
629 self.n += 1
630
631 def small_buffer_test(self, buffer_len):
632 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
633 parser = expat.ParserCreate()
634 parser.CharacterDataHandler = self.counting_handler
635 parser.buffer_size = 1024
636 parser.buffer_text = 1
637
638 self.n = 0
639 parser.Parse(xml)
640 return self.n
641
642 def test_change_size_1(self):
643 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
644 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
645 parser = expat.ParserCreate()
646 parser.CharacterDataHandler = self.counting_handler
647 parser.buffer_text = 1
648 parser.buffer_size = 1024
649 self.assertEqual(parser.buffer_size, 1024)
650
651 self.n = 0
652 parser.Parse(xml1, False)
653 parser.buffer_size *= 2
654 self.assertEqual(parser.buffer_size, 2048)
655 parser.Parse(xml2, True)
656 self.assertEqual(self.n, 2)
657
658 def test_change_size_2(self):
659 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
660 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
661 parser = expat.ParserCreate()
662 parser.CharacterDataHandler = self.counting_handler
663 parser.buffer_text = 1
664 parser.buffer_size = 2048
665 self.assertEqual(parser.buffer_size, 2048)
666
667 self.n=0
668 parser.Parse(xml1, False)
669 parser.buffer_size = parser.buffer_size // 2
670 self.assertEqual(parser.buffer_size, 1024)
671 parser.Parse(xml2, True)
672 self.assertEqual(self.n, 4)
673
674 class ESC[4;38;5;81mMalformedInputTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
675 def test1(self):
676 xml = b"\0\r\n"
677 parser = expat.ParserCreate()
678 try:
679 parser.Parse(xml, True)
680 self.fail()
681 except expat.ExpatError as e:
682 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
683
684 def test2(self):
685 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
686 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
687 parser = expat.ParserCreate()
688 err_pattern = r'XML declaration not well-formed: line 1, column \d+'
689 with self.assertRaisesRegex(expat.ExpatError, err_pattern):
690 parser.Parse(xml, True)
691
692 class ESC[4;38;5;81mErrorMessageTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
693 def test_codes(self):
694 # verify mapping of errors.codes and errors.messages
695 self.assertEqual(errors.XML_ERROR_SYNTAX,
696 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
697
698 def test_expaterror(self):
699 xml = b'<'
700 parser = expat.ParserCreate()
701 try:
702 parser.Parse(xml, True)
703 self.fail()
704 except expat.ExpatError as e:
705 self.assertEqual(e.code,
706 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
707
708
709 class ESC[4;38;5;81mForeignDTDTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
710 """
711 Tests for the UseForeignDTD method of expat parser objects.
712 """
713 def test_use_foreign_dtd(self):
714 """
715 If UseForeignDTD is passed True and a document without an external
716 entity reference is parsed, ExternalEntityRefHandler is first called
717 with None for the public and system ids.
718 """
719 handler_call_args = []
720 def resolve_entity(context, base, system_id, public_id):
721 handler_call_args.append((public_id, system_id))
722 return 1
723
724 parser = expat.ParserCreate()
725 parser.UseForeignDTD(True)
726 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
727 parser.ExternalEntityRefHandler = resolve_entity
728 parser.Parse(b"<?xml version='1.0'?><element/>")
729 self.assertEqual(handler_call_args, [(None, None)])
730
731 # test UseForeignDTD() is equal to UseForeignDTD(True)
732 handler_call_args[:] = []
733
734 parser = expat.ParserCreate()
735 parser.UseForeignDTD()
736 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
737 parser.ExternalEntityRefHandler = resolve_entity
738 parser.Parse(b"<?xml version='1.0'?><element/>")
739 self.assertEqual(handler_call_args, [(None, None)])
740
741 def test_ignore_use_foreign_dtd(self):
742 """
743 If UseForeignDTD is passed True and a document with an external
744 entity reference is parsed, ExternalEntityRefHandler is called with
745 the public and system ids from the document.
746 """
747 handler_call_args = []
748 def resolve_entity(context, base, system_id, public_id):
749 handler_call_args.append((public_id, system_id))
750 return 1
751
752 parser = expat.ParserCreate()
753 parser.UseForeignDTD(True)
754 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
755 parser.ExternalEntityRefHandler = resolve_entity
756 parser.Parse(
757 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
758 self.assertEqual(handler_call_args, [("bar", "baz")])
759
760
761 if __name__ == "__main__":
762 unittest.main()