1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 #
4 # this tests the DTD validation with the XmlTextReader interface
5 #
6 import sys
7 import glob
8 import os
9 import setup_test
10 import libxml2
11 try:
12 import StringIO
13 str_io = StringIO.StringIO
14 except:
15 import io
16 str_io = io.StringIO
17
18 # Memory debug specific
19 libxml2.debugMemory(1)
20
21 err = ""
22 basedir = os.path.dirname(os.path.realpath(__file__))
23 dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid"))
24
25 # This dictionary reflects the contents of the files
26 # ../../test/valid/*.xml.err that are not empty, except that
27 # the file paths in the messages start with ../../test/
28
29 expect = {
30 '766956':
31 """{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
32 %ä%ent;
33 ^
34 {0}/dtds/766956.dtd:2: parser error : Content error in the external subset
35 %ä%ent;
36 ^
37 Entity: line 1:
38 value
39 ^
40 """.format(dir_prefix),
41 '781333':
42 """{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
43 <a/>
44 ^
45 {0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more children
46
47 ^
48 """.format(dir_prefix),
49 'cond_sect2':
50 """{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
51 %ent;
52 ^
53 Entity: line 1:
54 ]]>
55 ^
56 {0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
57
58 ^
59 """.format(dir_prefix),
60 'rss':
61 """{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
62 </rss>
63 ^
64 """.format(dir_prefix),
65 't8':
66 """{0}/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
67
68 %defroot; %defmiddle; %deftest;
69 ^
70 Entity: line 1:
71 <!ELEMENT root (middle) >
72 ^
73 """.format(dir_prefix),
74 't8a':
75 """{0}/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
76
77 %defroot;%defmiddle;%deftest;
78 ^
79 Entity: line 1:
80 <!ELEMENT root (middle) >
81 ^
82 """.format(dir_prefix),
83 'xlink':
84 """{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
85 <p><termdef id="dt-arc" term="Arc">An <ter
86 ^
87 validity error : attribute def line 199 references an unknown ID "dt-xlg"
88 """.format(dir_prefix),
89 }
90
91 # Add prefix_dir and extension to the keys
92 expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()}
93
94 def callback(ctx, str):
95 global err
96 err = err + "%s" % (str)
97 libxml2.registerErrorHandler(callback, "")
98
99 parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
100 expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
101
102 valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
103 assert valid_files, "found no valid files in '{}'".format(dir_prefix)
104 valid_files.sort()
105 failures = 0
106 for file in valid_files:
107 err = ""
108 reader = libxml2.newTextReaderFilename(file)
109 #print "%s:" % (file)
110 reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
111 ret = reader.Read()
112 while ret == 1:
113 ret = reader.Read()
114 if ret != 0 and file not in expect_parsing_error:
115 print("Error parsing and validating %s" % (file))
116 #sys.exit(1)
117 if (err):
118 if not(file in expect and err == expect[file]):
119 failures += 1
120 print("Error: ", err)
121 if file in expect:
122 print("Expected: ", expect[file])
123
124 if failures:
125 print("Failed %d tests" % failures)
126 sys.exit(1)
127
128 #
129 # another separate test based on Stephane Bidoul one
130 #
131 s = """
132 <!DOCTYPE test [
133 <!ELEMENT test (x,b)>
134 <!ELEMENT x (c)>
135 <!ELEMENT b (#PCDATA)>
136 <!ELEMENT c (#PCDATA)>
137 <!ENTITY x "<x><c>xxx</c></x>">
138 ]>
139 <test>
140 &x;
141 <b>bbb</b>
142 </test>
143 """
144 expect="""10,test
145 1,test
146 14,#text
147 1,x
148 1,c
149 3,#text
150 15,c
151 15,x
152 14,#text
153 1,b
154 3,#text
155 15,b
156 14,#text
157 15,test
158 """
159 res=""
160 err=""
161
162 input = libxml2.inputBuffer(str_io(s))
163 reader = input.newTextReader("test2")
164 reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
165 reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
166 reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
167 reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
168 while reader.Read() == 1:
169 res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
170
171 if res != expect:
172 print("test2 failed: unexpected output")
173 print(res)
174 sys.exit(1)
175 if err != "":
176 print("test2 failed: validation error found")
177 print(err)
178 sys.exit(1)
179
180 #
181 # Another test for external entity parsing and validation
182 #
183
184 s = """<!DOCTYPE test [
185 <!ELEMENT test (x)>
186 <!ELEMENT x (#PCDATA)>
187 <!ENTITY e SYSTEM "tst.ent">
188 ]>
189 <test>
190 &e;
191 </test>
192 """
193 tst_ent = """<x>hello</x>"""
194 expect="""10 test
195 1 test
196 14 #text
197 1 x
198 3 #text
199 15 x
200 14 #text
201 15 test
202 """
203 res=""
204
205 def myResolver(URL, ID, ctxt):
206 if URL == "tst.ent":
207 return(str_io(tst_ent))
208 return None
209
210 libxml2.setEntityLoader(myResolver)
211
212 input = libxml2.inputBuffer(str_io(s))
213 reader = input.newTextReader("test3")
214 reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
215 reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
216 reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
217 reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
218 while reader.Read() == 1:
219 res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
220
221 if res != expect:
222 print("test3 failed: unexpected output")
223 print(res)
224 sys.exit(1)
225 if err != "":
226 print("test3 failed: validation error found")
227 print(err)
228 sys.exit(1)
229
230 #
231 # Another test for recursive entity parsing, validation, and replacement of
232 # entities, making sure the entity ref node doesn't show up in that case
233 #
234
235 s = """<!DOCTYPE test [
236 <!ELEMENT test (x, x)>
237 <!ELEMENT x (y)>
238 <!ELEMENT y (#PCDATA)>
239 <!ENTITY x "<x>&y;</x>">
240 <!ENTITY y "<y>yyy</y>">
241 ]>
242 <test>
243 &x;
244 &x;
245 </test>"""
246 expect="""10 test 0
247 1 test 0
248 14 #text 1
249 1 x 1
250 1 y 2
251 3 #text 3
252 15 y 2
253 15 x 1
254 14 #text 1
255 1 x 1
256 1 y 2
257 3 #text 3
258 15 y 2
259 15 x 1
260 14 #text 1
261 15 test 0
262 """
263 res=""
264 err=""
265
266 input = libxml2.inputBuffer(str_io(s))
267 reader = input.newTextReader("test4")
268 reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
269 reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
270 reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
271 reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
272 while reader.Read() == 1:
273 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
274
275 if res != expect:
276 print("test4 failed: unexpected output")
277 print(res)
278 sys.exit(1)
279 if err != "":
280 print("test4 failed: validation error found")
281 print(err)
282 sys.exit(1)
283
284 #
285 # The same test but without entity substitution this time
286 #
287
288 s = """<!DOCTYPE test [
289 <!ELEMENT test (x, x)>
290 <!ELEMENT x (y)>
291 <!ELEMENT y (#PCDATA)>
292 <!ENTITY x "<x>&y;</x>">
293 <!ENTITY y "<y>yyy</y>">
294 ]>
295 <test>
296 &x;
297 &x;
298 </test>"""
299 expect="""10 test 0
300 1 test 0
301 14 #text 1
302 5 x 1
303 14 #text 1
304 5 x 1
305 14 #text 1
306 15 test 0
307 """
308 res=""
309 err=""
310
311 input = libxml2.inputBuffer(str_io(s))
312 reader = input.newTextReader("test5")
313 reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
314 while reader.Read() == 1:
315 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
316
317 if res != expect:
318 print("test5 failed: unexpected output")
319 print(res)
320 sys.exit(1)
321 if err != "":
322 print("test5 failed: validation error found")
323 print(err)
324 sys.exit(1)
325
326 #
327 # cleanup
328 #
329 del input
330 del reader
331
332 # Memory debug specific
333 libxml2.cleanupParser()
334 if libxml2.debugMemory(1) == 0:
335 print("OK")
336 else:
337 print("Memory leak %d bytes" % (libxml2.debugMemory(1)))