(root)/
Python-3.11.7/
Tools/
scripts/
generate_global_objects.py
       1  import contextlib
       2  import io
       3  import os.path
       4  import re
       5  
       6  __file__ = os.path.abspath(__file__)
       7  ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
       8  INTERNAL = os.path.join(ROOT, 'Include', 'internal')
       9  
      10  
      11  IGNORED = {
      12      'ACTION',  # Python/_warnings.c
      13      'ATTR',  # Python/_warnings.c and Objects/funcobject.c
      14      'DUNDER',  # Objects/typeobject.c
      15      'RDUNDER',  # Objects/typeobject.c
      16      'SPECIAL',  # Objects/weakrefobject.c
      17  }
      18  IDENTIFIERS = [
      19      # from ADD() Python/_warnings.c
      20      'default',
      21      'ignore',
      22  
      23      # from GET_WARNINGS_ATTR() in Python/_warnings.c
      24      'WarningMessage',
      25      '_showwarnmsg',
      26      '_warn_unawaited_coroutine',
      27      'defaultaction',
      28      'filters',
      29      'onceregistry',
      30  
      31      # from WRAP_METHOD() in Objects/weakrefobject.c
      32      '__bytes__',
      33      '__reversed__',
      34  
      35      # from COPY_ATTR() in Objects/funcobject.c
      36      '__module__',
      37      '__name__',
      38      '__qualname__',
      39      '__doc__',
      40      '__annotations__',
      41  
      42      # from SLOT* in Objects/typeobject.c
      43      '__abs__',
      44      '__add__',
      45      '__and__',
      46      '__divmod__',
      47      '__float__',
      48      '__floordiv__',
      49      '__getitem__',
      50      '__iadd__',
      51      '__iand__',
      52      '__ifloordiv__',
      53      '__ilshift__',
      54      '__imatmul__',
      55      '__imod__',
      56      '__imul__',
      57      '__int__',
      58      '__invert__',
      59      '__ior__',
      60      '__irshift__',
      61      '__isub__',
      62      '__itruediv__',
      63      '__ixor__',
      64      '__lshift__',
      65      '__matmul__',
      66      '__mod__',
      67      '__mul__',
      68      '__neg__',
      69      '__or__',
      70      '__pos__',
      71      '__pow__',
      72      '__radd__',
      73      '__rand__',
      74      '__rdivmod__',
      75      '__rfloordiv__',
      76      '__rlshift__',
      77      '__rmatmul__',
      78      '__rmod__',
      79      '__rmul__',
      80      '__ror__',
      81      '__rpow__',
      82      '__rrshift__',
      83      '__rshift__',
      84      '__rsub__',
      85      '__rtruediv__',
      86      '__rxor__',
      87      '__str__',
      88      '__sub__',
      89      '__truediv__',
      90      '__xor__',
      91  ]
      92  
      93  
      94  #######################################
      95  # helpers
      96  
      97  def iter_files():
      98      for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
      99          root = os.path.join(ROOT, name)
     100          for dirname, _, files in os.walk(root):
     101              for name in files:
     102                  if not name.endswith(('.c', '.h')):
     103                      continue
     104                  yield os.path.join(dirname, name)
     105  
     106  
     107  def iter_global_strings():
     108      id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
     109      str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
     110      for filename in iter_files():
     111          try:
     112              infile = open(filename, encoding='utf-8')
     113          except FileNotFoundError:
     114              # The file must have been a temporary file.
     115              continue
     116          with infile:
     117              for lno, line in enumerate(infile, 1):
     118                  for m in id_regex.finditer(line):
     119                      identifier, = m.groups()
     120                      yield identifier, None, filename, lno, line
     121                  for m in str_regex.finditer(line):
     122                      varname, string = m.groups()
     123                      yield varname, string, filename, lno, line
     124  
     125  
     126  def iter_to_marker(lines, marker):
     127      for line in lines:
     128          if line.rstrip() == marker:
     129              break
     130          yield line
     131  
     132  
     133  class ESC[4;38;5;81mPrinter:
     134  
     135      def __init__(self, file):
     136          self.level = 0
     137          self.file = file
     138          self.continuation = [False]
     139  
     140      @contextlib.contextmanager
     141      def indent(self):
     142          save_level = self.level
     143          try:
     144              self.level += 1
     145              yield
     146          finally:
     147              self.level = save_level
     148  
     149      def write(self, arg):
     150          eol = '\n'
     151          if self.continuation[-1]:
     152              eol = f' \\{eol}' if arg else f'\\{eol}'
     153          self.file.writelines(("    "*self.level, arg, eol))
     154  
     155      @contextlib.contextmanager
     156      def block(self, prefix, suffix="", *, continuation=None):
     157          if continuation is None:
     158              continuation = self.continuation[-1]
     159          self.continuation.append(continuation)
     160  
     161          self.write(prefix + " {")
     162          with self.indent():
     163              yield
     164          self.continuation.pop()
     165          self.write("}" + suffix)
     166  
     167  
     168  @contextlib.contextmanager
     169  def open_for_changes(filename, orig):
     170      """Like open() but only write to the file if it changed."""
     171      outfile = io.StringIO()
     172      yield outfile
     173      text = outfile.getvalue()
     174      if text != orig:
     175          with open(filename, 'w', encoding='utf-8') as outfile:
     176              outfile.write(text)
     177      else:
     178          print(f'# not changed: {filename}')
     179  
     180  
     181  #######################################
     182  # the global objects
     183  
     184  START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */'
     185  END = '/* End auto-generated code */'
     186  
     187  
     188  def generate_global_strings(identifiers, strings):
     189      filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
     190  
     191      # Read the non-generated part of the file.
     192      with open(filename) as infile:
     193          orig = infile.read()
     194      lines = iter(orig.rstrip().splitlines())
     195      before = '\n'.join(iter_to_marker(lines, START))
     196      for _ in iter_to_marker(lines, END):
     197          pass
     198      after = '\n'.join(lines)
     199  
     200      # Generate the file.
     201      with open_for_changes(filename, orig) as outfile:
     202          printer = Printer(outfile)
     203          printer.write(before)
     204          printer.write(START)
     205          with printer.block('struct _Py_global_strings', ';'):
     206              with printer.block('struct', ' literals;'):
     207                  for literal, name in sorted(strings.items(), key=lambda x: x[1]):
     208                      printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
     209              outfile.write('\n')
     210              with printer.block('struct', ' identifiers;'):
     211                  for name in sorted(identifiers):
     212                      assert name.isidentifier(), name
     213                      printer.write(f'STRUCT_FOR_ID({name})')
     214              with printer.block('struct', ' ascii[128];'):
     215                  printer.write("PyASCIIObject _ascii;")
     216                  printer.write("uint8_t _data[2];")
     217              with printer.block('struct', ' latin1[128];'):
     218                  printer.write("PyCompactUnicodeObject _latin1;")
     219                  printer.write("uint8_t _data[2];")
     220          printer.write(END)
     221          printer.write(after)
     222  
     223  
     224  def generate_runtime_init(identifiers, strings):
     225      # First get some info from the declarations.
     226      nsmallposints = None
     227      nsmallnegints = None
     228      with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
     229          for line in infile:
     230              if line.startswith('#define _PY_NSMALLPOSINTS'):
     231                  nsmallposints = int(line.split()[-1])
     232              elif line.startswith('#define _PY_NSMALLNEGINTS'):
     233                  nsmallnegints = int(line.split()[-1])
     234                  break
     235          else:
     236              raise NotImplementedError
     237      assert nsmallposints and nsmallnegints
     238  
     239      # Then target the runtime initializer.
     240      filename = os.path.join(INTERNAL, 'pycore_runtime_init.h')
     241  
     242      # Read the non-generated part of the file.
     243      with open(filename) as infile:
     244          orig = infile.read()
     245      lines = iter(orig.rstrip().splitlines())
     246      before = '\n'.join(iter_to_marker(lines, START))
     247      for _ in iter_to_marker(lines, END):
     248          pass
     249      after = '\n'.join(lines)
     250  
     251      # Generate the file.
     252      with open_for_changes(filename, orig) as outfile:
     253          printer = Printer(outfile)
     254          printer.write(before)
     255          printer.write(START)
     256          with printer.block('#define _Py_global_objects_INIT', continuation=True):
     257              with printer.block('.singletons =', ','):
     258                  # Global int objects.
     259                  with printer.block('.small_ints =', ','):
     260                      for i in range(-nsmallnegints, nsmallposints):
     261                          printer.write(f'_PyLong_DIGIT_INIT({i}),')
     262                  printer.write('')
     263                  # Global bytes objects.
     264                  printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
     265                  with printer.block('.bytes_characters =', ','):
     266                      for i in range(256):
     267                          printer.write(f'_PyBytes_CHAR_INIT({i}),')
     268                  printer.write('')
     269                  # Global strings.
     270                  with printer.block('.strings =', ','):
     271                      with printer.block('.literals =', ','):
     272                          for literal, name in sorted(strings.items(), key=lambda x: x[1]):
     273                              printer.write(f'INIT_STR({name}, "{literal}"),')
     274                      with printer.block('.identifiers =', ','):
     275                          for name in sorted(identifiers):
     276                              assert name.isidentifier(), name
     277                              printer.write(f'INIT_ID({name}),')
     278                      with printer.block('.ascii =', ','):
     279                          for i in range(128):
     280                              printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
     281                      with printer.block('.latin1 =', ','):
     282                          for i in range(128, 256):
     283                              printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),')
     284                  printer.write('')
     285                  with printer.block('.tuple_empty =', ','):
     286                      printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
     287          printer.write(END)
     288          printer.write(after)
     289  
     290  
     291  def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
     292      identifiers = set(IDENTIFIERS)
     293      strings = {}
     294      for name, string, *_ in iter_global_strings():
     295          if string is None:
     296              if name not in IGNORED:
     297                  identifiers.add(name)
     298          else:
     299              if string not in strings:
     300                  strings[string] = name
     301              elif name != strings[string]:
     302                  raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
     303      return identifiers, strings
     304  
     305  
     306  #######################################
     307  # the script
     308  
     309  def main() -> None:
     310      identifiers, strings = get_identifiers_and_strings()
     311  
     312      generate_global_strings(identifiers, strings)
     313      generate_runtime_init(identifiers, strings)
     314  
     315  
     316  if __name__ == '__main__':
     317      import argparse
     318      parser = argparse.ArgumentParser()
     319      args = parser.parse_args()
     320      main(**vars(args))