(root)/
Python-3.11.7/
Tools/
scripts/
parse_html5_entities.py
       1  #!/usr/bin/env python3
       2  """
       3  Utility for parsing HTML5 entity definitions available from:
       4  
       5      http://dev.w3.org/html5/spec/entities.json
       6  
       7  Written by Ezio Melotti and Iuliia Proskurnia.
       8  
       9  """
      10  
      11  import os
      12  import sys
      13  import json
      14  from urllib.request import urlopen
      15  from html.entities import html5
      16  
      17  entities_url = 'http://dev.w3.org/html5/spec/entities.json'
      18  
      19  def get_json(url):
      20      """Download the json file from the url and returns a decoded object."""
      21      with urlopen(url) as f:
      22          data = f.read().decode('utf-8')
      23      return json.loads(data)
      24  
      25  def create_dict(entities):
      26      """Create the html5 dict from the decoded json object."""
      27      new_html5 = {}
      28      for name, value in entities.items():
      29          new_html5[name.lstrip('&')] = value['characters']
      30      return new_html5
      31  
      32  def compare_dicts(old, new):
      33      """Compare the old and new dicts and print the differences."""
      34      added = new.keys() - old.keys()
      35      if added:
      36          print('{} entitie(s) have been added:'.format(len(added)))
      37          for name in sorted(added):
      38              print('  {!r}: {!r}'.format(name, new[name]))
      39      removed = old.keys() - new.keys()
      40      if removed:
      41          print('{} entitie(s) have been removed:'.format(len(removed)))
      42          for name in sorted(removed):
      43              print('  {!r}: {!r}'.format(name, old[name]))
      44      changed = set()
      45      for name in (old.keys() & new.keys()):
      46          if old[name] != new[name]:
      47              changed.add((name, old[name], new[name]))
      48      if changed:
      49          print('{} entitie(s) have been modified:'.format(len(changed)))
      50          for item in sorted(changed):
      51              print('  {!r}: {!r} -> {!r}'.format(*item))
      52  
      53  def write_items(entities, file=sys.stdout):
      54      """Write the items of the dictionary in the specified file."""
      55      # The keys in the generated dictionary should be sorted
      56      # in a case-insensitive way, however, when two keys are equal,
      57      # the uppercase version should come first so that the result
      58      # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
      59      # To do this we first sort in a case-sensitive way (so all the
      60      # uppercase chars come first) and then sort with key=str.lower.
      61      # Since the sorting is stable the uppercase keys will eventually
      62      # be before their equivalent lowercase version.
      63      keys = sorted(entities.keys())
      64      keys = sorted(keys, key=str.lower)
      65      print('html5 = {', file=file)
      66      for name in keys:
      67          print('    {!r}: {!a},'.format(name, entities[name]), file=file)
      68      print('}', file=file)
      69  
      70  
      71  if __name__ == '__main__':
      72      # without args print a diff between html.entities.html5 and new_html5
      73      # with --create print the new html5 dict
      74      # with --patch patch the Lib/html/entities.py file
      75      new_html5 = create_dict(get_json(entities_url))
      76      if '--create' in sys.argv:
      77          print('# map the HTML5 named character references to the '
      78                'equivalent Unicode character(s)')
      79          print('# Generated by {}.  Do not edit manually.'.format(__file__))
      80          write_items(new_html5)
      81      elif '--patch' in sys.argv:
      82          fname = 'Lib/html/entities.py'
      83          temp_fname = fname + '.temp'
      84          with open(fname) as f1, open(temp_fname, 'w') as f2:
      85              skip = False
      86              for line in f1:
      87                  if line.startswith('html5 = {'):
      88                      write_items(new_html5, file=f2)
      89                      skip = True
      90                      continue
      91                  if skip:
      92                      # skip the old items until the }
      93                      if line.startswith('}'):
      94                          skip = False
      95                      continue
      96                  f2.write(line)
      97          os.remove(fname)
      98          os.rename(temp_fname, fname)
      99      else:
     100          if html5 == new_html5:
     101              print('The current dictionary is updated.')
     102          else:
     103              compare_dicts(html5, new_html5)
     104              print('Run "./python {0} --patch" to update Lib/html/entities.html '
     105                    'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__))