1 # regression test for SAX 2.0
2 # $Id$
3
4 from xml.sax import make_parser, ContentHandler, \
5 SAXException, SAXReaderNotAvailable, SAXParseException
6 import unittest
7 from unittest import mock
8 try:
9 make_parser()
10 except SAXReaderNotAvailable:
11 # don't try to test this module if we cannot create a parser
12 raise unittest.SkipTest("no XML parsers available")
13 from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
14 XMLFilterBase, prepare_input_source
15 from xml.sax.expatreader import create_parser
16 from xml.sax.handler import (feature_namespaces, feature_external_ges,
17 LexicalHandler)
18 from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
19 from io import BytesIO, StringIO
20 import codecs
21 import os.path
22 import shutil
23 import sys
24 from urllib.error import URLError
25 import urllib.request
26 from test.support import os_helper
27 from test.support import findfile
28 from test.support.os_helper import FakePath, TESTFN
29
30
31 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
32 TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
33 try:
34 TEST_XMLFILE.encode("utf-8")
35 TEST_XMLFILE_OUT.encode("utf-8")
36 except UnicodeEncodeError:
37 raise unittest.SkipTest("filename is not encodable to utf8")
38
39 supports_nonascii_filenames = True
40 if not os.path.supports_unicode_filenames:
41 try:
42 os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
43 except (UnicodeError, TypeError):
44 # Either the file system encoding is None, or the file name
45 # cannot be encoded in the file system encoding.
46 supports_nonascii_filenames = False
47 requires_nonascii_filenames = unittest.skipUnless(
48 supports_nonascii_filenames,
49 'Requires non-ascii filenames support')
50
51 ns_uri = "http://www.python.org/xml-ns/saxtest/"
52
53 class ESC[4;38;5;81mXmlTestBase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
54 def verify_empty_attrs(self, attrs):
55 self.assertRaises(KeyError, attrs.getValue, "attr")
56 self.assertRaises(KeyError, attrs.getValueByQName, "attr")
57 self.assertRaises(KeyError, attrs.getNameByQName, "attr")
58 self.assertRaises(KeyError, attrs.getQNameByName, "attr")
59 self.assertRaises(KeyError, attrs.__getitem__, "attr")
60 self.assertEqual(attrs.getLength(), 0)
61 self.assertEqual(attrs.getNames(), [])
62 self.assertEqual(attrs.getQNames(), [])
63 self.assertEqual(len(attrs), 0)
64 self.assertNotIn("attr", attrs)
65 self.assertEqual(list(attrs.keys()), [])
66 self.assertEqual(attrs.get("attrs"), None)
67 self.assertEqual(attrs.get("attrs", 25), 25)
68 self.assertEqual(list(attrs.items()), [])
69 self.assertEqual(list(attrs.values()), [])
70
71 def verify_empty_nsattrs(self, attrs):
72 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
73 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
74 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
75 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
76 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
77 self.assertEqual(attrs.getLength(), 0)
78 self.assertEqual(attrs.getNames(), [])
79 self.assertEqual(attrs.getQNames(), [])
80 self.assertEqual(len(attrs), 0)
81 self.assertNotIn((ns_uri, "attr"), attrs)
82 self.assertEqual(list(attrs.keys()), [])
83 self.assertEqual(attrs.get((ns_uri, "attr")), None)
84 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
85 self.assertEqual(list(attrs.items()), [])
86 self.assertEqual(list(attrs.values()), [])
87
88 def verify_attrs_wattr(self, attrs):
89 self.assertEqual(attrs.getLength(), 1)
90 self.assertEqual(attrs.getNames(), ["attr"])
91 self.assertEqual(attrs.getQNames(), ["attr"])
92 self.assertEqual(len(attrs), 1)
93 self.assertIn("attr", attrs)
94 self.assertEqual(list(attrs.keys()), ["attr"])
95 self.assertEqual(attrs.get("attr"), "val")
96 self.assertEqual(attrs.get("attr", 25), "val")
97 self.assertEqual(list(attrs.items()), [("attr", "val")])
98 self.assertEqual(list(attrs.values()), ["val"])
99 self.assertEqual(attrs.getValue("attr"), "val")
100 self.assertEqual(attrs.getValueByQName("attr"), "val")
101 self.assertEqual(attrs.getNameByQName("attr"), "attr")
102 self.assertEqual(attrs["attr"], "val")
103 self.assertEqual(attrs.getQNameByName("attr"), "attr")
104
105
106 def xml_str(doc, encoding=None):
107 if encoding is None:
108 return doc
109 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
110
111 def xml_bytes(doc, encoding, decl_encoding=...):
112 if decl_encoding is ...:
113 decl_encoding = encoding
114 return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
115
116 def make_xml_file(doc, encoding, decl_encoding=...):
117 if decl_encoding is ...:
118 decl_encoding = encoding
119 with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
120 f.write(xml_str(doc, decl_encoding))
121
122
123 class ESC[4;38;5;81mParseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
124 data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
125
126 def tearDown(self):
127 os_helper.unlink(TESTFN)
128
129 def check_parse(self, f):
130 from xml.sax import parse
131 result = StringIO()
132 parse(f, XMLGenerator(result, 'utf-8'))
133 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
134
135 def test_parse_text(self):
136 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
137 'utf-16', 'utf-16le', 'utf-16be')
138 for encoding in encodings:
139 self.check_parse(StringIO(xml_str(self.data, encoding)))
140 make_xml_file(self.data, encoding)
141 with open(TESTFN, 'r', encoding=encoding) as f:
142 self.check_parse(f)
143 self.check_parse(StringIO(self.data))
144 make_xml_file(self.data, encoding, None)
145 with open(TESTFN, 'r', encoding=encoding) as f:
146 self.check_parse(f)
147
148 def test_parse_bytes(self):
149 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
150 # UTF-16 is autodetected
151 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
152 for encoding in encodings:
153 self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
154 make_xml_file(self.data, encoding)
155 self.check_parse(TESTFN)
156 with open(TESTFN, 'rb') as f:
157 self.check_parse(f)
158 self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
159 make_xml_file(self.data, encoding, None)
160 self.check_parse(TESTFN)
161 with open(TESTFN, 'rb') as f:
162 self.check_parse(f)
163 # accept UTF-8 with BOM
164 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
165 make_xml_file(self.data, 'utf-8-sig', 'utf-8')
166 self.check_parse(TESTFN)
167 with open(TESTFN, 'rb') as f:
168 self.check_parse(f)
169 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
170 make_xml_file(self.data, 'utf-8-sig', None)
171 self.check_parse(TESTFN)
172 with open(TESTFN, 'rb') as f:
173 self.check_parse(f)
174 # accept data with declared encoding
175 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
176 make_xml_file(self.data, 'iso-8859-1')
177 self.check_parse(TESTFN)
178 with open(TESTFN, 'rb') as f:
179 self.check_parse(f)
180 # fail on non-UTF-8 incompatible data without declared encoding
181 with self.assertRaises(SAXException):
182 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
183 make_xml_file(self.data, 'iso-8859-1', None)
184 with self.assertRaises(SAXException):
185 self.check_parse(TESTFN)
186 with open(TESTFN, 'rb') as f:
187 with self.assertRaises(SAXException):
188 self.check_parse(f)
189
190 def test_parse_path_object(self):
191 make_xml_file(self.data, 'utf-8', None)
192 self.check_parse(FakePath(TESTFN))
193
194 def test_parse_InputSource(self):
195 # accept data without declared but with explicitly specified encoding
196 make_xml_file(self.data, 'iso-8859-1', None)
197 with open(TESTFN, 'rb') as f:
198 input = InputSource()
199 input.setByteStream(f)
200 input.setEncoding('iso-8859-1')
201 self.check_parse(input)
202
203 def test_parse_close_source(self):
204 builtin_open = open
205 fileobj = None
206
207 def mock_open(*args):
208 nonlocal fileobj
209 fileobj = builtin_open(*args)
210 return fileobj
211
212 with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
213 make_xml_file(self.data, 'iso-8859-1', None)
214 with self.assertRaises(SAXException):
215 self.check_parse(TESTFN)
216 self.assertTrue(fileobj.closed)
217
218 def check_parseString(self, s):
219 from xml.sax import parseString
220 result = StringIO()
221 parseString(s, XMLGenerator(result, 'utf-8'))
222 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
223
224 def test_parseString_text(self):
225 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
226 'utf-16', 'utf-16le', 'utf-16be')
227 for encoding in encodings:
228 self.check_parseString(xml_str(self.data, encoding))
229 self.check_parseString(self.data)
230
231 def test_parseString_bytes(self):
232 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
233 # UTF-16 is autodetected
234 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
235 for encoding in encodings:
236 self.check_parseString(xml_bytes(self.data, encoding))
237 self.check_parseString(xml_bytes(self.data, encoding, None))
238 # accept UTF-8 with BOM
239 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
240 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
241 # accept data with declared encoding
242 self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
243 # fail on non-UTF-8 incompatible data without declared encoding
244 with self.assertRaises(SAXException):
245 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
246
247 class ESC[4;38;5;81mMakeParserTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
248 def test_make_parser2(self):
249 # Creating parsers several times in a row should succeed.
250 # Testing this because there have been failures of this kind
251 # before.
252 from xml.sax import make_parser
253 p = make_parser()
254 from xml.sax import make_parser
255 p = make_parser()
256 from xml.sax import make_parser
257 p = make_parser()
258 from xml.sax import make_parser
259 p = make_parser()
260 from xml.sax import make_parser
261 p = make_parser()
262 from xml.sax import make_parser
263 p = make_parser()
264
265 def test_make_parser3(self):
266 # Testing that make_parser can handle different types of
267 # iterables.
268 make_parser(['module'])
269 make_parser(('module', ))
270 make_parser({'module'})
271 make_parser(frozenset({'module'}))
272 make_parser({'module': None})
273 make_parser(iter(['module']))
274
275 def test_make_parser4(self):
276 # Testing that make_parser can handle empty iterables.
277 make_parser([])
278 make_parser(tuple())
279 make_parser(set())
280 make_parser(frozenset())
281 make_parser({})
282 make_parser(iter([]))
283
284 def test_make_parser5(self):
285 # Testing that make_parser can handle iterables with more than
286 # one item.
287 make_parser(['module1', 'module2'])
288 make_parser(('module1', 'module2'))
289 make_parser({'module1', 'module2'})
290 make_parser(frozenset({'module1', 'module2'}))
291 make_parser({'module1': None, 'module2': None})
292 make_parser(iter(['module1', 'module2']))
293
294 # ===========================================================================
295 #
296 # saxutils tests
297 #
298 # ===========================================================================
299
300 class ESC[4;38;5;81mSaxutilsTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
301 # ===== escape
302 def test_escape_basic(self):
303 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co")
304
305 def test_escape_all(self):
306 self.assertEqual(escape("<Donald Duck & Co>"),
307 "<Donald Duck & Co>")
308
309 def test_escape_extra(self):
310 self.assertEqual(escape("Hei pÃ¥ deg", {"Ã¥" : "å"}),
311 "Hei på deg")
312
313 # ===== unescape
314 def test_unescape_basic(self):
315 self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co")
316
317 def test_unescape_all(self):
318 self.assertEqual(unescape("<Donald Duck & Co>"),
319 "<Donald Duck & Co>")
320
321 def test_unescape_extra(self):
322 self.assertEqual(unescape("Hei pÃ¥ deg", {"Ã¥" : "å"}),
323 "Hei på deg")
324
325 def test_unescape_amp_extra(self):
326 self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;")
327
328 # ===== quoteattr
329 def test_quoteattr_basic(self):
330 self.assertEqual(quoteattr("Donald Duck & Co"),
331 '"Donald Duck & Co"')
332
333 def test_single_quoteattr(self):
334 self.assertEqual(quoteattr('Includes "double" quotes'),
335 '\'Includes "double" quotes\'')
336
337 def test_double_quoteattr(self):
338 self.assertEqual(quoteattr("Includes 'single' quotes"),
339 "\"Includes 'single' quotes\"")
340
341 def test_single_double_quoteattr(self):
342 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
343 "\"Includes 'single' and "double" quotes\"")
344
345 # ===== make_parser
346 def test_make_parser(self):
347 # Creating a parser should succeed - it should fall back
348 # to the expatreader
349 p = make_parser(['xml.parsers.no_such_parser'])
350
351
352 class ESC[4;38;5;81mPrepareInputSourceTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
353
354 def setUp(self):
355 self.file = os_helper.TESTFN
356 with open(self.file, "w") as tmp:
357 tmp.write("This was read from a file.")
358
359 def tearDown(self):
360 os_helper.unlink(self.file)
361
362 def make_byte_stream(self):
363 return BytesIO(b"This is a byte stream.")
364
365 def make_character_stream(self):
366 return StringIO("This is a character stream.")
367
368 def checkContent(self, stream, content):
369 self.assertIsNotNone(stream)
370 self.assertEqual(stream.read(), content)
371 stream.close()
372
373
374 def test_character_stream(self):
375 # If the source is an InputSource with a character stream, use it.
376 src = InputSource(self.file)
377 src.setCharacterStream(self.make_character_stream())
378 prep = prepare_input_source(src)
379 self.assertIsNone(prep.getByteStream())
380 self.checkContent(prep.getCharacterStream(),
381 "This is a character stream.")
382
383 def test_byte_stream(self):
384 # If the source is an InputSource that does not have a character
385 # stream but does have a byte stream, use the byte stream.
386 src = InputSource(self.file)
387 src.setByteStream(self.make_byte_stream())
388 prep = prepare_input_source(src)
389 self.assertIsNone(prep.getCharacterStream())
390 self.checkContent(prep.getByteStream(),
391 b"This is a byte stream.")
392
393 def test_system_id(self):
394 # If the source is an InputSource that has neither a character
395 # stream nor a byte stream, open the system ID.
396 src = InputSource(self.file)
397 prep = prepare_input_source(src)
398 self.assertIsNone(prep.getCharacterStream())
399 self.checkContent(prep.getByteStream(),
400 b"This was read from a file.")
401
402 def test_string(self):
403 # If the source is a string, use it as a system ID and open it.
404 prep = prepare_input_source(self.file)
405 self.assertIsNone(prep.getCharacterStream())
406 self.checkContent(prep.getByteStream(),
407 b"This was read from a file.")
408
409 def test_path_objects(self):
410 # If the source is a Path object, use it as a system ID and open it.
411 prep = prepare_input_source(FakePath(self.file))
412 self.assertIsNone(prep.getCharacterStream())
413 self.checkContent(prep.getByteStream(),
414 b"This was read from a file.")
415
416 def test_binary_file(self):
417 # If the source is a binary file-like object, use it as a byte
418 # stream.
419 prep = prepare_input_source(self.make_byte_stream())
420 self.assertIsNone(prep.getCharacterStream())
421 self.checkContent(prep.getByteStream(),
422 b"This is a byte stream.")
423
424 def test_text_file(self):
425 # If the source is a text file-like object, use it as a character
426 # stream.
427 prep = prepare_input_source(self.make_character_stream())
428 self.assertIsNone(prep.getByteStream())
429 self.checkContent(prep.getCharacterStream(),
430 "This is a character stream.")
431
432
433 # ===== XMLGenerator
434
435 class ESC[4;38;5;81mXmlgenTest:
436 def test_xmlgen_basic(self):
437 result = self.ioclass()
438 gen = XMLGenerator(result)
439 gen.startDocument()
440 gen.startElement("doc", {})
441 gen.endElement("doc")
442 gen.endDocument()
443
444 self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
445
446 def test_xmlgen_basic_empty(self):
447 result = self.ioclass()
448 gen = XMLGenerator(result, short_empty_elements=True)
449 gen.startDocument()
450 gen.startElement("doc", {})
451 gen.endElement("doc")
452 gen.endDocument()
453
454 self.assertEqual(result.getvalue(), self.xml("<doc/>"))
455
456 def test_xmlgen_content(self):
457 result = self.ioclass()
458 gen = XMLGenerator(result)
459
460 gen.startDocument()
461 gen.startElement("doc", {})
462 gen.characters("huhei")
463 gen.endElement("doc")
464 gen.endDocument()
465
466 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
467
468 def test_xmlgen_content_empty(self):
469 result = self.ioclass()
470 gen = XMLGenerator(result, short_empty_elements=True)
471
472 gen.startDocument()
473 gen.startElement("doc", {})
474 gen.characters("huhei")
475 gen.endElement("doc")
476 gen.endDocument()
477
478 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
479
480 def test_xmlgen_pi(self):
481 result = self.ioclass()
482 gen = XMLGenerator(result)
483
484 gen.startDocument()
485 gen.processingInstruction("test", "data")
486 gen.startElement("doc", {})
487 gen.endElement("doc")
488 gen.endDocument()
489
490 self.assertEqual(result.getvalue(),
491 self.xml("<?test data?><doc></doc>"))
492
493 def test_xmlgen_content_escape(self):
494 result = self.ioclass()
495 gen = XMLGenerator(result)
496
497 gen.startDocument()
498 gen.startElement("doc", {})
499 gen.characters("<huhei&")
500 gen.endElement("doc")
501 gen.endDocument()
502
503 self.assertEqual(result.getvalue(),
504 self.xml("<doc><huhei&</doc>"))
505
506 def test_xmlgen_attr_escape(self):
507 result = self.ioclass()
508 gen = XMLGenerator(result)
509
510 gen.startDocument()
511 gen.startElement("doc", {"a": '"'})
512 gen.startElement("e", {"a": "'"})
513 gen.endElement("e")
514 gen.startElement("e", {"a": "'\""})
515 gen.endElement("e")
516 gen.startElement("e", {"a": "\n\r\t"})
517 gen.endElement("e")
518 gen.endElement("doc")
519 gen.endDocument()
520
521 self.assertEqual(result.getvalue(), self.xml(
522 "<doc a='\"'><e a=\"'\"></e>"
523 "<e a=\"'"\"></e>"
524 "<e a=\" 	\"></e></doc>"))
525
526 def test_xmlgen_encoding(self):
527 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
528 'utf-16', 'utf-16be', 'utf-16le',
529 'utf-32', 'utf-32be', 'utf-32le')
530 for encoding in encodings:
531 result = self.ioclass()
532 gen = XMLGenerator(result, encoding=encoding)
533
534 gen.startDocument()
535 gen.startElement("doc", {"a": '\u20ac'})
536 gen.characters("\u20ac")
537 gen.endElement("doc")
538 gen.endDocument()
539
540 self.assertEqual(result.getvalue(),
541 self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
542
543 def test_xmlgen_unencodable(self):
544 result = self.ioclass()
545 gen = XMLGenerator(result, encoding='ascii')
546
547 gen.startDocument()
548 gen.startElement("doc", {"a": '\u20ac'})
549 gen.characters("\u20ac")
550 gen.endElement("doc")
551 gen.endDocument()
552
553 self.assertEqual(result.getvalue(),
554 self.xml('<doc a="€">€</doc>', encoding='ascii'))
555
556 def test_xmlgen_ignorable(self):
557 result = self.ioclass()
558 gen = XMLGenerator(result)
559
560 gen.startDocument()
561 gen.startElement("doc", {})
562 gen.ignorableWhitespace(" ")
563 gen.endElement("doc")
564 gen.endDocument()
565
566 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
567
568 def test_xmlgen_ignorable_empty(self):
569 result = self.ioclass()
570 gen = XMLGenerator(result, short_empty_elements=True)
571
572 gen.startDocument()
573 gen.startElement("doc", {})
574 gen.ignorableWhitespace(" ")
575 gen.endElement("doc")
576 gen.endDocument()
577
578 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
579
580 def test_xmlgen_encoding_bytes(self):
581 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
582 'utf-16', 'utf-16be', 'utf-16le',
583 'utf-32', 'utf-32be', 'utf-32le')
584 for encoding in encodings:
585 result = self.ioclass()
586 gen = XMLGenerator(result, encoding=encoding)
587
588 gen.startDocument()
589 gen.startElement("doc", {"a": '\u20ac'})
590 gen.characters("\u20ac".encode(encoding))
591 gen.ignorableWhitespace(" ".encode(encoding))
592 gen.endElement("doc")
593 gen.endDocument()
594
595 self.assertEqual(result.getvalue(),
596 self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
597
598 def test_xmlgen_ns(self):
599 result = self.ioclass()
600 gen = XMLGenerator(result)
601
602 gen.startDocument()
603 gen.startPrefixMapping("ns1", ns_uri)
604 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
605 # add an unqualified name
606 gen.startElementNS((None, "udoc"), None, {})
607 gen.endElementNS((None, "udoc"), None)
608 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
609 gen.endPrefixMapping("ns1")
610 gen.endDocument()
611
612 self.assertEqual(result.getvalue(), self.xml(
613 '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
614 ns_uri))
615
616 def test_xmlgen_ns_empty(self):
617 result = self.ioclass()
618 gen = XMLGenerator(result, short_empty_elements=True)
619
620 gen.startDocument()
621 gen.startPrefixMapping("ns1", ns_uri)
622 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
623 # add an unqualified name
624 gen.startElementNS((None, "udoc"), None, {})
625 gen.endElementNS((None, "udoc"), None)
626 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
627 gen.endPrefixMapping("ns1")
628 gen.endDocument()
629
630 self.assertEqual(result.getvalue(), self.xml(
631 '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
632 ns_uri))
633
634 def test_1463026_1(self):
635 result = self.ioclass()
636 gen = XMLGenerator(result)
637
638 gen.startDocument()
639 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
640 gen.endElementNS((None, 'a'), 'a')
641 gen.endDocument()
642
643 self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
644
645 def test_1463026_1_empty(self):
646 result = self.ioclass()
647 gen = XMLGenerator(result, short_empty_elements=True)
648
649 gen.startDocument()
650 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
651 gen.endElementNS((None, 'a'), 'a')
652 gen.endDocument()
653
654 self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
655
656 def test_1463026_2(self):
657 result = self.ioclass()
658 gen = XMLGenerator(result)
659
660 gen.startDocument()
661 gen.startPrefixMapping(None, 'qux')
662 gen.startElementNS(('qux', 'a'), 'a', {})
663 gen.endElementNS(('qux', 'a'), 'a')
664 gen.endPrefixMapping(None)
665 gen.endDocument()
666
667 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
668
669 def test_1463026_2_empty(self):
670 result = self.ioclass()
671 gen = XMLGenerator(result, short_empty_elements=True)
672
673 gen.startDocument()
674 gen.startPrefixMapping(None, 'qux')
675 gen.startElementNS(('qux', 'a'), 'a', {})
676 gen.endElementNS(('qux', 'a'), 'a')
677 gen.endPrefixMapping(None)
678 gen.endDocument()
679
680 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
681
682 def test_1463026_3(self):
683 result = self.ioclass()
684 gen = XMLGenerator(result)
685
686 gen.startDocument()
687 gen.startPrefixMapping('my', 'qux')
688 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
689 gen.endElementNS(('qux', 'a'), 'a')
690 gen.endPrefixMapping('my')
691 gen.endDocument()
692
693 self.assertEqual(result.getvalue(),
694 self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
695
696 def test_1463026_3_empty(self):
697 result = self.ioclass()
698 gen = XMLGenerator(result, short_empty_elements=True)
699
700 gen.startDocument()
701 gen.startPrefixMapping('my', 'qux')
702 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
703 gen.endElementNS(('qux', 'a'), 'a')
704 gen.endPrefixMapping('my')
705 gen.endDocument()
706
707 self.assertEqual(result.getvalue(),
708 self.xml('<my:a xmlns:my="qux" b="c"/>'))
709
710 def test_5027_1(self):
711 # The xml prefix (as in xml:lang below) is reserved and bound by
712 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
713 # a bug whereby a KeyError is raised because this namespace is missing
714 # from a dictionary.
715 #
716 # This test demonstrates the bug by parsing a document.
717 test_xml = StringIO(
718 '<?xml version="1.0"?>'
719 '<a:g1 xmlns:a="http://example.com/ns">'
720 '<a:g2 xml:lang="en">Hello</a:g2>'
721 '</a:g1>')
722
723 parser = make_parser()
724 parser.setFeature(feature_namespaces, True)
725 result = self.ioclass()
726 gen = XMLGenerator(result)
727 parser.setContentHandler(gen)
728 parser.parse(test_xml)
729
730 self.assertEqual(result.getvalue(),
731 self.xml(
732 '<a:g1 xmlns:a="http://example.com/ns">'
733 '<a:g2 xml:lang="en">Hello</a:g2>'
734 '</a:g1>'))
735
736 def test_5027_2(self):
737 # The xml prefix (as in xml:lang below) is reserved and bound by
738 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
739 # a bug whereby a KeyError is raised because this namespace is missing
740 # from a dictionary.
741 #
742 # This test demonstrates the bug by direct manipulation of the
743 # XMLGenerator.
744 result = self.ioclass()
745 gen = XMLGenerator(result)
746
747 gen.startDocument()
748 gen.startPrefixMapping('a', 'http://example.com/ns')
749 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
750 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
751 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
752 gen.characters('Hello')
753 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
754 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
755 gen.endPrefixMapping('a')
756 gen.endDocument()
757
758 self.assertEqual(result.getvalue(),
759 self.xml(
760 '<a:g1 xmlns:a="http://example.com/ns">'
761 '<a:g2 xml:lang="en">Hello</a:g2>'
762 '</a:g1>'))
763
764 def test_no_close_file(self):
765 result = self.ioclass()
766 def func(out):
767 gen = XMLGenerator(out)
768 gen.startDocument()
769 gen.startElement("doc", {})
770 func(result)
771 self.assertFalse(result.closed)
772
773 def test_xmlgen_fragment(self):
774 result = self.ioclass()
775 gen = XMLGenerator(result)
776
777 # Don't call gen.startDocument()
778 gen.startElement("foo", {"a": "1.0"})
779 gen.characters("Hello")
780 gen.endElement("foo")
781 gen.startElement("bar", {"b": "2.0"})
782 gen.endElement("bar")
783 # Don't call gen.endDocument()
784
785 self.assertEqual(result.getvalue(),
786 self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
787
788 class ESC[4;38;5;81mStringXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
789 ioclass = StringIO
790
791 def xml(self, doc, encoding='iso-8859-1'):
792 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
793
794 test_xmlgen_unencodable = None
795
796 class ESC[4;38;5;81mBytesXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
797 ioclass = BytesIO
798
799 def xml(self, doc, encoding='iso-8859-1'):
800 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
801 (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
802
803 class ESC[4;38;5;81mWriterXmlgenTest(ESC[4;38;5;149mBytesXmlgenTest):
804 class ESC[4;38;5;81mioclass(ESC[4;38;5;149mlist):
805 write = list.append
806 closed = False
807
808 def seekable(self):
809 return True
810
811 def tell(self):
812 # return 0 at start and not 0 after start
813 return len(self)
814
815 def getvalue(self):
816 return b''.join(self)
817
818 class ESC[4;38;5;81mStreamWriterXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
819 def ioclass(self):
820 raw = BytesIO()
821 writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
822 writer.getvalue = raw.getvalue
823 return writer
824
825 def xml(self, doc, encoding='iso-8859-1'):
826 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
827 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
828
829 class ESC[4;38;5;81mStreamReaderWriterXmlgenTest(ESC[4;38;5;149mXmlgenTest, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
830 fname = os_helper.TESTFN + '-codecs'
831
832 def ioclass(self):
833 writer = codecs.open(self.fname, 'w', encoding='ascii',
834 errors='xmlcharrefreplace', buffering=0)
835 def cleanup():
836 writer.close()
837 os_helper.unlink(self.fname)
838 self.addCleanup(cleanup)
839 def getvalue():
840 # Windows will not let use reopen without first closing
841 writer.close()
842 with open(writer.name, 'rb') as f:
843 return f.read()
844 writer.getvalue = getvalue
845 return writer
846
847 def xml(self, doc, encoding='iso-8859-1'):
848 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
849 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
850
851 start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
852
853
854 class ESC[4;38;5;81mXMLFilterBaseTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
855 def test_filter_basic(self):
856 result = BytesIO()
857 gen = XMLGenerator(result)
858 filter = XMLFilterBase()
859 filter.setContentHandler(gen)
860
861 filter.startDocument()
862 filter.startElement("doc", {})
863 filter.characters("content")
864 filter.ignorableWhitespace(" ")
865 filter.endElement("doc")
866 filter.endDocument()
867
868 self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
869
870 # ===========================================================================
871 #
872 # expatreader tests
873 #
874 # ===========================================================================
875
876 with open(TEST_XMLFILE_OUT, 'rb') as f:
877 xml_test_out = f.read()
878
879 class ESC[4;38;5;81mExpatReaderTest(ESC[4;38;5;149mXmlTestBase):
880
881 # ===== XMLReader support
882
883 def test_expat_binary_file(self):
884 parser = create_parser()
885 result = BytesIO()
886 xmlgen = XMLGenerator(result)
887
888 parser.setContentHandler(xmlgen)
889 with open(TEST_XMLFILE, 'rb') as f:
890 parser.parse(f)
891
892 self.assertEqual(result.getvalue(), xml_test_out)
893
894 def test_expat_text_file(self):
895 parser = create_parser()
896 result = BytesIO()
897 xmlgen = XMLGenerator(result)
898
899 parser.setContentHandler(xmlgen)
900 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
901 parser.parse(f)
902
903 self.assertEqual(result.getvalue(), xml_test_out)
904
905 @requires_nonascii_filenames
906 def test_expat_binary_file_nonascii(self):
907 fname = os_helper.TESTFN_UNICODE
908 shutil.copyfile(TEST_XMLFILE, fname)
909 self.addCleanup(os_helper.unlink, fname)
910
911 parser = create_parser()
912 result = BytesIO()
913 xmlgen = XMLGenerator(result)
914
915 parser.setContentHandler(xmlgen)
916 parser.parse(open(fname, 'rb'))
917
918 self.assertEqual(result.getvalue(), xml_test_out)
919
920 def test_expat_binary_file_bytes_name(self):
921 fname = os.fsencode(TEST_XMLFILE)
922 parser = create_parser()
923 result = BytesIO()
924 xmlgen = XMLGenerator(result)
925
926 parser.setContentHandler(xmlgen)
927 with open(fname, 'rb') as f:
928 parser.parse(f)
929
930 self.assertEqual(result.getvalue(), xml_test_out)
931
932 def test_expat_binary_file_int_name(self):
933 parser = create_parser()
934 result = BytesIO()
935 xmlgen = XMLGenerator(result)
936
937 parser.setContentHandler(xmlgen)
938 with open(TEST_XMLFILE, 'rb') as f:
939 with open(f.fileno(), 'rb', closefd=False) as f2:
940 parser.parse(f2)
941
942 self.assertEqual(result.getvalue(), xml_test_out)
943
944 # ===== DTDHandler support
945
946 class ESC[4;38;5;81mTestDTDHandler:
947
948 def __init__(self):
949 self._notations = []
950 self._entities = []
951
952 def notationDecl(self, name, publicId, systemId):
953 self._notations.append((name, publicId, systemId))
954
955 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
956 self._entities.append((name, publicId, systemId, ndata))
957
958
959 class ESC[4;38;5;81mTestEntityRecorder:
960 def __init__(self):
961 self.entities = []
962
963 def resolveEntity(self, publicId, systemId):
964 self.entities.append((publicId, systemId))
965 source = InputSource()
966 source.setPublicId(publicId)
967 source.setSystemId(systemId)
968 return source
969
970 def test_expat_dtdhandler(self):
971 parser = create_parser()
972 handler = self.TestDTDHandler()
973 parser.setDTDHandler(handler)
974
975 parser.feed('<!DOCTYPE doc [\n')
976 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
977 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
978 parser.feed(']>\n')
979 parser.feed('<doc></doc>')
980 parser.close()
981
982 self.assertEqual(handler._notations,
983 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
984 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
985
986 def test_expat_external_dtd_enabled(self):
987 # clear _opener global variable
988 self.addCleanup(urllib.request.urlcleanup)
989
990 parser = create_parser()
991 parser.setFeature(feature_external_ges, True)
992 resolver = self.TestEntityRecorder()
993 parser.setEntityResolver(resolver)
994
995 with self.assertRaises(URLError):
996 parser.feed(
997 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
998 )
999 self.assertEqual(
1000 resolver.entities, [(None, 'unsupported://non-existing')]
1001 )
1002
1003 def test_expat_external_dtd_default(self):
1004 parser = create_parser()
1005 resolver = self.TestEntityRecorder()
1006 parser.setEntityResolver(resolver)
1007
1008 parser.feed(
1009 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
1010 )
1011 parser.feed('<doc />')
1012 parser.close()
1013 self.assertEqual(resolver.entities, [])
1014
1015 # ===== EntityResolver support
1016
1017 class ESC[4;38;5;81mTestEntityResolver:
1018
1019 def resolveEntity(self, publicId, systemId):
1020 inpsrc = InputSource()
1021 inpsrc.setByteStream(BytesIO(b"<entity/>"))
1022 return inpsrc
1023
1024 def test_expat_entityresolver_enabled(self):
1025 parser = create_parser()
1026 parser.setFeature(feature_external_ges, True)
1027 parser.setEntityResolver(self.TestEntityResolver())
1028 result = BytesIO()
1029 parser.setContentHandler(XMLGenerator(result))
1030
1031 parser.feed('<!DOCTYPE doc [\n')
1032 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1033 parser.feed(']>\n')
1034 parser.feed('<doc>&test;</doc>')
1035 parser.close()
1036
1037 self.assertEqual(result.getvalue(), start +
1038 b"<doc><entity></entity></doc>")
1039
1040 def test_expat_entityresolver_default(self):
1041 parser = create_parser()
1042 self.assertEqual(parser.getFeature(feature_external_ges), False)
1043 parser.setEntityResolver(self.TestEntityResolver())
1044 result = BytesIO()
1045 parser.setContentHandler(XMLGenerator(result))
1046
1047 parser.feed('<!DOCTYPE doc [\n')
1048 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1049 parser.feed(']>\n')
1050 parser.feed('<doc>&test;</doc>')
1051 parser.close()
1052
1053 self.assertEqual(result.getvalue(), start +
1054 b"<doc></doc>")
1055
1056 # ===== Attributes support
1057
1058 class ESC[4;38;5;81mAttrGatherer(ESC[4;38;5;149mContentHandler):
1059
1060 def startElement(self, name, attrs):
1061 self._attrs = attrs
1062
1063 def startElementNS(self, name, qname, attrs):
1064 self._attrs = attrs
1065
1066 def test_expat_attrs_empty(self):
1067 parser = create_parser()
1068 gather = self.AttrGatherer()
1069 parser.setContentHandler(gather)
1070
1071 parser.feed("<doc/>")
1072 parser.close()
1073
1074 self.verify_empty_attrs(gather._attrs)
1075
1076 def test_expat_attrs_wattr(self):
1077 parser = create_parser()
1078 gather = self.AttrGatherer()
1079 parser.setContentHandler(gather)
1080
1081 parser.feed("<doc attr='val'/>")
1082 parser.close()
1083
1084 self.verify_attrs_wattr(gather._attrs)
1085
1086 def test_expat_nsattrs_empty(self):
1087 parser = create_parser(1)
1088 gather = self.AttrGatherer()
1089 parser.setContentHandler(gather)
1090
1091 parser.feed("<doc/>")
1092 parser.close()
1093
1094 self.verify_empty_nsattrs(gather._attrs)
1095
1096 def test_expat_nsattrs_wattr(self):
1097 parser = create_parser(1)
1098 gather = self.AttrGatherer()
1099 parser.setContentHandler(gather)
1100
1101 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
1102 parser.close()
1103
1104 attrs = gather._attrs
1105
1106 self.assertEqual(attrs.getLength(), 1)
1107 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1108 self.assertTrue((attrs.getQNames() == [] or
1109 attrs.getQNames() == ["ns:attr"]))
1110 self.assertEqual(len(attrs), 1)
1111 self.assertIn((ns_uri, "attr"), attrs)
1112 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1113 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1114 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1115 self.assertEqual(list(attrs.values()), ["val"])
1116 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1117 self.assertEqual(attrs[(ns_uri, "attr")], "val")
1118
1119 # ===== InputSource support
1120
1121 def test_expat_inpsource_filename(self):
1122 parser = create_parser()
1123 result = BytesIO()
1124 xmlgen = XMLGenerator(result)
1125
1126 parser.setContentHandler(xmlgen)
1127 parser.parse(TEST_XMLFILE)
1128
1129 self.assertEqual(result.getvalue(), xml_test_out)
1130
1131 def test_expat_inpsource_sysid(self):
1132 parser = create_parser()
1133 result = BytesIO()
1134 xmlgen = XMLGenerator(result)
1135
1136 parser.setContentHandler(xmlgen)
1137 parser.parse(InputSource(TEST_XMLFILE))
1138
1139 self.assertEqual(result.getvalue(), xml_test_out)
1140
1141 @requires_nonascii_filenames
1142 def test_expat_inpsource_sysid_nonascii(self):
1143 fname = os_helper.TESTFN_UNICODE
1144 shutil.copyfile(TEST_XMLFILE, fname)
1145 self.addCleanup(os_helper.unlink, fname)
1146
1147 parser = create_parser()
1148 result = BytesIO()
1149 xmlgen = XMLGenerator(result)
1150
1151 parser.setContentHandler(xmlgen)
1152 parser.parse(InputSource(fname))
1153
1154 self.assertEqual(result.getvalue(), xml_test_out)
1155
1156 def test_expat_inpsource_byte_stream(self):
1157 parser = create_parser()
1158 result = BytesIO()
1159 xmlgen = XMLGenerator(result)
1160
1161 parser.setContentHandler(xmlgen)
1162 inpsrc = InputSource()
1163 with open(TEST_XMLFILE, 'rb') as f:
1164 inpsrc.setByteStream(f)
1165 parser.parse(inpsrc)
1166
1167 self.assertEqual(result.getvalue(), xml_test_out)
1168
1169 def test_expat_inpsource_character_stream(self):
1170 parser = create_parser()
1171 result = BytesIO()
1172 xmlgen = XMLGenerator(result)
1173
1174 parser.setContentHandler(xmlgen)
1175 inpsrc = InputSource()
1176 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
1177 inpsrc.setCharacterStream(f)
1178 parser.parse(inpsrc)
1179
1180 self.assertEqual(result.getvalue(), xml_test_out)
1181
1182 # ===== IncrementalParser support
1183
1184 def test_expat_incremental(self):
1185 result = BytesIO()
1186 xmlgen = XMLGenerator(result)
1187 parser = create_parser()
1188 parser.setContentHandler(xmlgen)
1189
1190 parser.feed("<doc>")
1191 parser.feed("</doc>")
1192 parser.close()
1193
1194 self.assertEqual(result.getvalue(), start + b"<doc></doc>")
1195
1196 def test_expat_incremental_reset(self):
1197 result = BytesIO()
1198 xmlgen = XMLGenerator(result)
1199 parser = create_parser()
1200 parser.setContentHandler(xmlgen)
1201
1202 parser.feed("<doc>")
1203 parser.feed("text")
1204
1205 result = BytesIO()
1206 xmlgen = XMLGenerator(result)
1207 parser.setContentHandler(xmlgen)
1208 parser.reset()
1209
1210 parser.feed("<doc>")
1211 parser.feed("text")
1212 parser.feed("</doc>")
1213 parser.close()
1214
1215 self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
1216
1217 # ===== Locator support
1218
1219 def test_expat_locator_noinfo(self):
1220 result = BytesIO()
1221 xmlgen = XMLGenerator(result)
1222 parser = create_parser()
1223 parser.setContentHandler(xmlgen)
1224
1225 parser.feed("<doc>")
1226 parser.feed("</doc>")
1227 parser.close()
1228
1229 self.assertEqual(parser.getSystemId(), None)
1230 self.assertEqual(parser.getPublicId(), None)
1231 self.assertEqual(parser.getLineNumber(), 1)
1232
1233 def test_expat_locator_withinfo(self):
1234 result = BytesIO()
1235 xmlgen = XMLGenerator(result)
1236 parser = create_parser()
1237 parser.setContentHandler(xmlgen)
1238 parser.parse(TEST_XMLFILE)
1239
1240 self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
1241 self.assertEqual(parser.getPublicId(), None)
1242
1243 @requires_nonascii_filenames
1244 def test_expat_locator_withinfo_nonascii(self):
1245 fname = os_helper.TESTFN_UNICODE
1246 shutil.copyfile(TEST_XMLFILE, fname)
1247 self.addCleanup(os_helper.unlink, fname)
1248
1249 result = BytesIO()
1250 xmlgen = XMLGenerator(result)
1251 parser = create_parser()
1252 parser.setContentHandler(xmlgen)
1253 parser.parse(fname)
1254
1255 self.assertEqual(parser.getSystemId(), fname)
1256 self.assertEqual(parser.getPublicId(), None)
1257
1258
1259 # ===========================================================================
1260 #
1261 # error reporting
1262 #
1263 # ===========================================================================
1264
1265 class ESC[4;38;5;81mErrorReportingTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1266 def test_expat_inpsource_location(self):
1267 parser = create_parser()
1268 parser.setContentHandler(ContentHandler()) # do nothing
1269 source = InputSource()
1270 source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed
1271 name = "a file name"
1272 source.setSystemId(name)
1273 try:
1274 parser.parse(source)
1275 self.fail()
1276 except SAXException as e:
1277 self.assertEqual(e.getSystemId(), name)
1278
1279 def test_expat_incomplete(self):
1280 parser = create_parser()
1281 parser.setContentHandler(ContentHandler()) # do nothing
1282 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
1283 self.assertEqual(parser.getColumnNumber(), 5)
1284 self.assertEqual(parser.getLineNumber(), 1)
1285
1286 def test_sax_parse_exception_str(self):
1287 # pass various values from a locator to the SAXParseException to
1288 # make sure that the __str__() doesn't fall apart when None is
1289 # passed instead of an integer line and column number
1290 #
1291 # use "normal" values for the locator:
1292 str(SAXParseException("message", None,
1293 self.DummyLocator(1, 1)))
1294 # use None for the line number:
1295 str(SAXParseException("message", None,
1296 self.DummyLocator(None, 1)))
1297 # use None for the column number:
1298 str(SAXParseException("message", None,
1299 self.DummyLocator(1, None)))
1300 # use None for both:
1301 str(SAXParseException("message", None,
1302 self.DummyLocator(None, None)))
1303
1304 class ESC[4;38;5;81mDummyLocator:
1305 def __init__(self, lineno, colno):
1306 self._lineno = lineno
1307 self._colno = colno
1308
1309 def getPublicId(self):
1310 return "pubid"
1311
1312 def getSystemId(self):
1313 return "sysid"
1314
1315 def getLineNumber(self):
1316 return self._lineno
1317
1318 def getColumnNumber(self):
1319 return self._colno
1320
1321 # ===========================================================================
1322 #
1323 # xmlreader tests
1324 #
1325 # ===========================================================================
1326
1327 class ESC[4;38;5;81mXmlReaderTest(ESC[4;38;5;149mXmlTestBase):
1328
1329 # ===== AttributesImpl
1330 def test_attrs_empty(self):
1331 self.verify_empty_attrs(AttributesImpl({}))
1332
1333 def test_attrs_wattr(self):
1334 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
1335
1336 def test_nsattrs_empty(self):
1337 self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
1338
1339 def test_nsattrs_wattr(self):
1340 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
1341 {(ns_uri, "attr") : "ns:attr"})
1342
1343 self.assertEqual(attrs.getLength(), 1)
1344 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1345 self.assertEqual(attrs.getQNames(), ["ns:attr"])
1346 self.assertEqual(len(attrs), 1)
1347 self.assertIn((ns_uri, "attr"), attrs)
1348 self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
1349 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1350 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1351 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1352 self.assertEqual(list(attrs.values()), ["val"])
1353 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1354 self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
1355 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
1356 self.assertEqual(attrs[(ns_uri, "attr")], "val")
1357 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
1358
1359
1360 class ESC[4;38;5;81mLexicalHandlerTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1361 def setUp(self):
1362 self.parser = None
1363
1364 self.specified_version = '1.0'
1365 self.specified_encoding = 'UTF-8'
1366 self.specified_doctype = 'wish'
1367 self.specified_entity_names = ('nbsp', 'source', 'target')
1368 self.specified_comment = ('Comment in a DTD',
1369 'Really! You think so?')
1370 self.test_data = StringIO()
1371 self.test_data.write('<?xml version="{}" encoding="{}"?>\n'.
1372 format(self.specified_version,
1373 self.specified_encoding))
1374 self.test_data.write('<!DOCTYPE {} [\n'.
1375 format(self.specified_doctype))
1376 self.test_data.write('<!-- {} -->\n'.
1377 format(self.specified_comment[0]))
1378 self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'.
1379 format(self.specified_doctype))
1380 self.test_data.write('<!ELEMENT to (#PCDATA)>\n')
1381 self.test_data.write('<!ELEMENT from (#PCDATA)>\n')
1382 self.test_data.write('<!ELEMENT heading (#PCDATA)>\n')
1383 self.test_data.write('<!ELEMENT body (#PCDATA)>\n')
1384 self.test_data.write('<!ELEMENT footer (#PCDATA)>\n')
1385 self.test_data.write('<!ENTITY {} " ">\n'.
1386 format(self.specified_entity_names[0]))
1387 self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'.
1388 format(self.specified_entity_names[1]))
1389 self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'.
1390 format(self.specified_entity_names[2]))
1391 self.test_data.write(']>\n')
1392 self.test_data.write('<{}>'.format(self.specified_doctype))
1393 self.test_data.write('<to>Aristotle</to>\n')
1394 self.test_data.write('<from>Alexander</from>\n')
1395 self.test_data.write('<heading>Supplication</heading>\n')
1396 self.test_data.write('<body>Teach me patience!</body>\n')
1397 self.test_data.write('<footer>&{};&{};&{};</footer>\n'.
1398 format(self.specified_entity_names[1],
1399 self.specified_entity_names[0],
1400 self.specified_entity_names[2]))
1401 self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1]))
1402 self.test_data.write('</{}>\n'.format(self.specified_doctype))
1403 self.test_data.seek(0)
1404
1405 # Data received from handlers - to be validated
1406 self.version = None
1407 self.encoding = None
1408 self.standalone = None
1409 self.doctype = None
1410 self.publicID = None
1411 self.systemID = None
1412 self.end_of_dtd = False
1413 self.comments = []
1414
1415 def test_handlers(self):
1416 class ESC[4;38;5;81mTestLexicalHandler(ESC[4;38;5;149mLexicalHandler):
1417 def __init__(self, test_harness, *args, **kwargs):
1418 super().__init__(*args, **kwargs)
1419 self.test_harness = test_harness
1420
1421 def startDTD(self, doctype, publicID, systemID):
1422 self.test_harness.doctype = doctype
1423 self.test_harness.publicID = publicID
1424 self.test_harness.systemID = systemID
1425
1426 def endDTD(self):
1427 self.test_harness.end_of_dtd = True
1428
1429 def comment(self, text):
1430 self.test_harness.comments.append(text)
1431
1432 self.parser = create_parser()
1433 self.parser.setContentHandler(ContentHandler())
1434 self.parser.setProperty(
1435 'http://xml.org/sax/properties/lexical-handler',
1436 TestLexicalHandler(self))
1437 source = InputSource()
1438 source.setCharacterStream(self.test_data)
1439 self.parser.parse(source)
1440 self.assertEqual(self.doctype, self.specified_doctype)
1441 self.assertIsNone(self.publicID)
1442 self.assertIsNone(self.systemID)
1443 self.assertTrue(self.end_of_dtd)
1444 self.assertEqual(len(self.comments),
1445 len(self.specified_comment))
1446 self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
1447
1448
1449 class ESC[4;38;5;81mCDATAHandlerTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1450 def setUp(self):
1451 self.parser = None
1452 self.specified_chars = []
1453 self.specified_chars.append(('Parseable character data', False))
1454 self.specified_chars.append(('<> &% - assorted other XML junk.', True))
1455 self.char_index = 0 # Used to index specified results within handlers
1456 self.test_data = StringIO()
1457 self.test_data.write('<root_doc>\n')
1458 self.test_data.write('<some_pcdata>\n')
1459 self.test_data.write(f'{self.specified_chars[0][0]}\n')
1460 self.test_data.write('</some_pcdata>\n')
1461 self.test_data.write('<some_cdata>\n')
1462 self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n')
1463 self.test_data.write('</some_cdata>\n')
1464 self.test_data.write('</root_doc>\n')
1465 self.test_data.seek(0)
1466
1467 # Data received from handlers - to be validated
1468 self.chardata = []
1469 self.in_cdata = False
1470
1471 def test_handlers(self):
1472 class ESC[4;38;5;81mTestLexicalHandler(ESC[4;38;5;149mLexicalHandler):
1473 def __init__(self, test_harness, *args, **kwargs):
1474 super().__init__(*args, **kwargs)
1475 self.test_harness = test_harness
1476
1477 def startCDATA(self):
1478 self.test_harness.in_cdata = True
1479
1480 def endCDATA(self):
1481 self.test_harness.in_cdata = False
1482
1483 class ESC[4;38;5;81mTestCharHandler(ESC[4;38;5;149mContentHandler):
1484 def __init__(self, test_harness, *args, **kwargs):
1485 super().__init__(*args, **kwargs)
1486 self.test_harness = test_harness
1487
1488 def characters(self, content):
1489 if content != '\n':
1490 h = self.test_harness
1491 t = h.specified_chars[h.char_index]
1492 h.assertEqual(t[0], content)
1493 h.assertEqual(t[1], h.in_cdata)
1494 h.char_index += 1
1495
1496 self.parser = create_parser()
1497 self.parser.setContentHandler(TestCharHandler(self))
1498 self.parser.setProperty(
1499 'http://xml.org/sax/properties/lexical-handler',
1500 TestLexicalHandler(self))
1501 source = InputSource()
1502 source.setCharacterStream(self.test_data)
1503 self.parser.parse(source)
1504
1505 self.assertFalse(self.in_cdata)
1506 self.assertEqual(self.char_index, 2)
1507
1508
1509 if __name__ == "__main__":
1510 unittest.main()