(root)/
Python-3.11.7/
Tools/
scripts/
parseentities.py
       1  #!/usr/bin/env python3
       2  """ Utility for parsing HTML entity definitions available from:
       3  
       4        http://www.w3.org/ as e.g.
       5        http://www.w3.org/TR/REC-html40/HTMLlat1.ent
       6  
       7      Input is read from stdin, output is written to stdout in form of a
       8      Python snippet defining a dictionary "entitydefs" mapping literal
       9      entity name to character or numeric entity.
      10  
      11      Marc-Andre Lemburg, mal@lemburg.com, 1999.
      12      Use as you like. NO WARRANTIES.
      13  
      14  """
      15  import re,sys
      16  
      17  entityRE = re.compile(r'<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
      18  
      19  def parse(text,pos=0,endpos=None):
      20  
      21      pos = 0
      22      if endpos is None:
      23          endpos = len(text)
      24      d = {}
      25      while 1:
      26          m = entityRE.search(text,pos,endpos)
      27          if not m:
      28              break
      29          name,charcode,comment = m.groups()
      30          d[name] = charcode,comment
      31          pos = m.end()
      32      return d
      33  
      34  def writefile(f,defs):
      35  
      36      f.write("entitydefs = {\n")
      37      items = sorted(defs.items())
      38      for name, (charcode,comment) in items:
      39          if charcode[:2] == '&#':
      40              code = int(charcode[2:-1])
      41              if code < 256:
      42                  charcode = r"'\%o'" % code
      43              else:
      44                  charcode = repr(charcode)
      45          else:
      46              charcode = repr(charcode)
      47          comment = ' '.join(comment.split())
      48          f.write("    '%s':\t%s,  \t# %s\n" % (name,charcode,comment))
      49      f.write('\n}\n')
      50  
      51  if __name__ == '__main__':
      52      if len(sys.argv) > 1:
      53          with open(sys.argv[1]) as infile:
      54              text = infile.read()
      55      else:
      56          text = sys.stdin.read()
      57  
      58      defs = parse(text)
      59  
      60      if len(sys.argv) > 2:
      61          with open(sys.argv[2],'w') as outfile:
      62              writefile(outfile, defs)
      63      else:
      64          writefile(sys.stdout, defs)