(root)/
libxml2-2.12.3/
python/
tests/
reader2.py
       1  #!/usr/bin/env python3
       2  # -*- coding: utf-8 -*-
       3  #
       4  # this tests the DTD validation with the XmlTextReader interface
       5  #
       6  import sys
       7  import glob
       8  import os
       9  import setup_test
      10  import libxml2
      11  try:
      12      import StringIO
      13      str_io = StringIO.StringIO
      14  except:
      15      import io
      16      str_io = io.StringIO
      17  
      18  # Memory debug specific
      19  libxml2.debugMemory(1)
      20  
      21  err = ""
      22  basedir = os.path.dirname(os.path.realpath(__file__))
      23  dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid"))
      24  
      25  # This dictionary reflects the contents of the files
      26  # ../../test/valid/*.xml.err that are not empty, except that
      27  # the file paths in the messages start with ../../test/
      28  
      29  expect = {
      30      '766956':
      31  """{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
      32  %ent;
      33     ^
      34  {0}/dtds/766956.dtd:2: parser error : Content error in the external subset
      35  %ent;
      36          ^
      37  Entity: line 1: 
      38  value
      39  ^
      40  """.format(dir_prefix),
      41      '781333':
      42  """{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got 
      43  <a/>
      44      ^
      45  {0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more children
      46  
      47  ^
      48  """.format(dir_prefix),
      49      'cond_sect2':
      50  """{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
      51      %ent;
      52           ^
      53  Entity: line 1: 
      54  ]]>
      55  ^
      56  {0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
      57  
      58  ^
      59  """.format(dir_prefix),
      60      'rss':
      61  """{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
      62  </rss>
      63        ^
      64  """.format(dir_prefix),
      65      't8':
      66  """{0}/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
      67  
      68  %defroot; %defmiddle; %deftest;
      69           ^
      70  Entity: line 1: 
      71  &lt;!ELEMENT root (middle) >
      72  ^
      73  """.format(dir_prefix),
      74      't8a':
      75  """{0}/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
      76  
      77  %defroot;%defmiddle;%deftest;
      78           ^
      79  Entity: line 1: 
      80  &lt;!ELEMENT root (middle) >
      81  ^
      82  """.format(dir_prefix),
      83      'xlink':
      84  """{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
      85  	<p><termdef id="dt-arc" term="Arc">An <ter
      86  	                                  ^
      87  validity error : attribute def line 199 references an unknown ID "dt-xlg"
      88  """.format(dir_prefix),
      89  }
      90  
      91  # Add prefix_dir and extension to the keys
      92  expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()}
      93  
      94  def callback(ctx, str):
      95      global err
      96      err = err + "%s" % (str)
      97  libxml2.registerErrorHandler(callback, "")
      98  
      99  parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
     100  expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
     101  
     102  valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
     103  assert valid_files, "found no valid files in '{}'".format(dir_prefix)
     104  valid_files.sort()
     105  failures = 0
     106  for file in valid_files:
     107      err = ""
     108      reader = libxml2.newTextReaderFilename(file)
     109      #print "%s:" % (file)
     110      reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
     111      ret = reader.Read()
     112      while ret == 1:
     113          ret = reader.Read()
     114      if ret != 0 and file not in expect_parsing_error:
     115          print("Error parsing and validating %s" % (file))
     116          #sys.exit(1)
     117      if (err):
     118          if not(file in expect and err == expect[file]):
     119              failures += 1
     120              print("Error: ", err)
     121              if file in expect:
     122                  print("Expected: ", expect[file])
     123  
     124  if failures:
     125      print("Failed %d tests" % failures)
     126      sys.exit(1)
     127  
     128  #
     129  # another separate test based on Stephane Bidoul one
     130  #
     131  s = """
     132  <!DOCTYPE test [
     133  <!ELEMENT test (x,b)>
     134  <!ELEMENT x (c)>
     135  <!ELEMENT b (#PCDATA)>
     136  <!ELEMENT c (#PCDATA)>
     137  <!ENTITY x "<x><c>xxx</c></x>">
     138  ]>
     139  <test>
     140      &x;
     141      <b>bbb</b>
     142  </test>
     143  """
     144  expect="""10,test
     145  1,test
     146  14,#text
     147  1,x
     148  1,c
     149  3,#text
     150  15,c
     151  15,x
     152  14,#text
     153  1,b
     154  3,#text
     155  15,b
     156  14,#text
     157  15,test
     158  """
     159  res=""
     160  err=""
     161  
     162  input = libxml2.inputBuffer(str_io(s))
     163  reader = input.newTextReader("test2")
     164  reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
     165  reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
     166  reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
     167  reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
     168  while reader.Read() == 1:
     169      res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
     170  
     171  if res != expect:
     172      print("test2 failed: unexpected output")
     173      print(res)
     174      sys.exit(1)
     175  if err != "":
     176      print("test2 failed: validation error found")
     177      print(err)
     178      sys.exit(1)
     179  
     180  #
     181  # Another test for external entity parsing and validation
     182  #
     183  
     184  s = """<!DOCTYPE test [
     185  <!ELEMENT test (x)>
     186  <!ELEMENT x (#PCDATA)>
     187  <!ENTITY e SYSTEM "tst.ent">
     188  ]>
     189  <test>
     190    &e;
     191  </test>
     192  """
     193  tst_ent = """<x>hello</x>"""
     194  expect="""10 test
     195  1 test
     196  14 #text
     197  1 x
     198  3 #text
     199  15 x
     200  14 #text
     201  15 test
     202  """
     203  res=""
     204  
     205  def myResolver(URL, ID, ctxt):
     206      if URL == "tst.ent":
     207          return(str_io(tst_ent))
     208      return None
     209  
     210  libxml2.setEntityLoader(myResolver)
     211  
     212  input = libxml2.inputBuffer(str_io(s))
     213  reader = input.newTextReader("test3")
     214  reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
     215  reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
     216  reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
     217  reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
     218  while reader.Read() == 1:
     219      res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
     220  
     221  if res != expect:
     222      print("test3 failed: unexpected output")
     223      print(res)
     224      sys.exit(1)
     225  if err != "":
     226      print("test3 failed: validation error found")
     227      print(err)
     228      sys.exit(1)
     229  
     230  #
     231  # Another test for recursive entity parsing, validation, and replacement of
     232  # entities, making sure the entity ref node doesn't show up in that case
     233  #
     234  
     235  s = """<!DOCTYPE test [
     236  <!ELEMENT test (x, x)>
     237  <!ELEMENT x (y)>
     238  <!ELEMENT y (#PCDATA)>
     239  <!ENTITY x "<x>&y;</x>">
     240  <!ENTITY y "<y>yyy</y>">
     241  ]>
     242  <test>
     243    &x;
     244    &x;
     245  </test>"""
     246  expect="""10 test 0
     247  1 test 0
     248  14 #text 1
     249  1 x 1
     250  1 y 2
     251  3 #text 3
     252  15 y 2
     253  15 x 1
     254  14 #text 1
     255  1 x 1
     256  1 y 2
     257  3 #text 3
     258  15 y 2
     259  15 x 1
     260  14 #text 1
     261  15 test 0
     262  """
     263  res=""
     264  err=""
     265  
     266  input = libxml2.inputBuffer(str_io(s))
     267  reader = input.newTextReader("test4")
     268  reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
     269  reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
     270  reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
     271  reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
     272  while reader.Read() == 1:
     273      res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
     274  
     275  if res != expect:
     276      print("test4 failed: unexpected output")
     277      print(res)
     278      sys.exit(1)
     279  if err != "":
     280      print("test4 failed: validation error found")
     281      print(err)
     282      sys.exit(1)
     283  
     284  #
     285  # The same test but without entity substitution this time
     286  #
     287  
     288  s = """<!DOCTYPE test [
     289  <!ELEMENT test (x, x)>
     290  <!ELEMENT x (y)>
     291  <!ELEMENT y (#PCDATA)>
     292  <!ENTITY x "<x>&y;</x>">
     293  <!ENTITY y "<y>yyy</y>">
     294  ]>
     295  <test>
     296    &x;
     297    &x;
     298  </test>"""
     299  expect="""10 test 0
     300  1 test 0
     301  14 #text 1
     302  5 x 1
     303  14 #text 1
     304  5 x 1
     305  14 #text 1
     306  15 test 0
     307  """
     308  res=""
     309  err=""
     310  
     311  input = libxml2.inputBuffer(str_io(s))
     312  reader = input.newTextReader("test5")
     313  reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
     314  while reader.Read() == 1:
     315      res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
     316  
     317  if res != expect:
     318      print("test5 failed: unexpected output")
     319      print(res)
     320      sys.exit(1)
     321  if err != "":
     322      print("test5 failed: validation error found")
     323      print(err)
     324      sys.exit(1)
     325  
     326  #
     327  # cleanup
     328  #
     329  del input
     330  del reader
     331  
     332  # Memory debug specific
     333  libxml2.cleanupParser()
     334  if libxml2.debugMemory(1) == 0:
     335      print("OK")
     336  else:
     337      print("Memory leak %d bytes" % (libxml2.debugMemory(1)))