(root)/
Python-3.12.0/
Tools/
build/
deepfreeze.py
       1  """Deep freeze
       2  
       3  The script may be executed by _bootstrap_python interpreter.
       4  Shared library extension modules are not available in that case.
       5  On Windows, and in cross-compilation cases, it is executed
       6  by Python 3.10, and 3.11 features are not available.
       7  """
       8  import argparse
       9  import ast
      10  import builtins
      11  import collections
      12  import contextlib
      13  import os
      14  import re
      15  import time
      16  import types
      17  from typing import Dict, FrozenSet, TextIO, Tuple
      18  
      19  import umarshal
      20  from generate_global_objects import get_identifiers_and_strings
      21  
      22  verbose = False
      23  identifiers, strings = get_identifiers_and_strings()
      24  
      25  # This must be kept in sync with opcode.py
      26  RESUME = 151
      27  
      28  def isprintable(b: bytes) -> bool:
      29      return all(0x20 <= c < 0x7f for c in b)
      30  
      31  
      32  def make_string_literal(b: bytes) -> str:
      33      res = ['"']
      34      if isprintable(b):
      35          res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
      36      else:
      37          for i in b:
      38              res.append(f"\\x{i:02x}")
      39      res.append('"')
      40      return "".join(res)
      41  
      42  
      43  CO_FAST_LOCAL = 0x20
      44  CO_FAST_CELL = 0x40
      45  CO_FAST_FREE = 0x80
      46  
      47  next_code_version = 1
      48  
      49  def get_localsplus(code: types.CodeType):
      50      a = collections.defaultdict(int)
      51      for name in code.co_varnames:
      52          a[name] |= CO_FAST_LOCAL
      53      for name in code.co_cellvars:
      54          a[name] |= CO_FAST_CELL
      55      for name in code.co_freevars:
      56          a[name] |= CO_FAST_FREE
      57      return tuple(a.keys()), bytes(a.values())
      58  
      59  
      60  def get_localsplus_counts(code: types.CodeType,
      61                            names: Tuple[str, ...],
      62                            kinds: bytes) -> Tuple[int, int, int, int]:
      63      nlocals = 0
      64      ncellvars = 0
      65      nfreevars = 0
      66      assert len(names) == len(kinds)
      67      for name, kind in zip(names, kinds):
      68          if kind & CO_FAST_LOCAL:
      69              nlocals += 1
      70              if kind & CO_FAST_CELL:
      71                  ncellvars += 1
      72          elif kind & CO_FAST_CELL:
      73              ncellvars += 1
      74          elif kind & CO_FAST_FREE:
      75              nfreevars += 1
      76      assert nlocals == len(code.co_varnames) == code.co_nlocals, \
      77          (nlocals, len(code.co_varnames), code.co_nlocals)
      78      assert ncellvars == len(code.co_cellvars)
      79      assert nfreevars == len(code.co_freevars)
      80      return nlocals, ncellvars, nfreevars
      81  
      82  
      83  PyUnicode_1BYTE_KIND = 1
      84  PyUnicode_2BYTE_KIND = 2
      85  PyUnicode_4BYTE_KIND = 4
      86  
      87  
      88  def analyze_character_width(s: str) -> Tuple[int, bool]:
      89      maxchar = ' '
      90      for c in s:
      91          maxchar = max(maxchar, c)
      92      ascii = False
      93      if maxchar <= '\xFF':
      94          kind = PyUnicode_1BYTE_KIND
      95          ascii = maxchar <= '\x7F'
      96      elif maxchar <= '\uFFFF':
      97          kind = PyUnicode_2BYTE_KIND
      98      else:
      99          kind = PyUnicode_4BYTE_KIND
     100      return kind, ascii
     101  
     102  
     103  def removesuffix(base: str, suffix: str) -> str:
     104      if base.endswith(suffix):
     105          return base[:len(base) - len(suffix)]
     106      return base
     107  
     108  class ESC[4;38;5;81mPrinter:
     109  
     110      def __init__(self, file: TextIO) -> None:
     111          self.level = 0
     112          self.file = file
     113          self.cache: Dict[tuple[type, object, str], str] = {}
     114          self.hits, self.misses = 0, 0
     115          self.finis: list[str] = []
     116          self.inits: list[str] = []
     117          self.write('#include "Python.h"')
     118          self.write('#include "internal/pycore_gc.h"')
     119          self.write('#include "internal/pycore_code.h"')
     120          self.write('#include "internal/pycore_frame.h"')
     121          self.write('#include "internal/pycore_long.h"')
     122          self.write("")
     123  
     124      @contextlib.contextmanager
     125      def indent(self) -> None:
     126          save_level = self.level
     127          try:
     128              self.level += 1
     129              yield
     130          finally:
     131              self.level = save_level
     132  
     133      def write(self, arg: str) -> None:
     134          self.file.writelines(("    "*self.level, arg, "\n"))
     135  
     136      @contextlib.contextmanager
     137      def block(self, prefix: str, suffix: str = "") -> None:
     138          self.write(prefix + " {")
     139          with self.indent():
     140              yield
     141          self.write("}" + suffix)
     142  
     143      def object_head(self, typename: str) -> None:
     144          with self.block(".ob_base =", ","):
     145              self.write(f".ob_refcnt = _Py_IMMORTAL_REFCNT,")
     146              self.write(f".ob_type = &{typename},")
     147  
     148      def object_var_head(self, typename: str, size: int) -> None:
     149          with self.block(".ob_base =", ","):
     150              self.object_head(typename)
     151              self.write(f".ob_size = {size},")
     152  
     153      def field(self, obj: object, name: str) -> None:
     154          self.write(f".{name} = {getattr(obj, name)},")
     155  
     156      def generate_bytes(self, name: str, b: bytes) -> str:
     157          if b == b"":
     158              return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
     159          if len(b) == 1:
     160              return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
     161          self.write("static")
     162          with self.indent():
     163              with self.block("struct"):
     164                  self.write("PyObject_VAR_HEAD")
     165                  self.write("Py_hash_t ob_shash;")
     166                  self.write(f"char ob_sval[{len(b) + 1}];")
     167          with self.block(f"{name} =", ";"):
     168              self.object_var_head("PyBytes_Type", len(b))
     169              self.write(".ob_shash = -1,")
     170              self.write(f".ob_sval = {make_string_literal(b)},")
     171          return f"& {name}.ob_base.ob_base"
     172  
     173      def generate_unicode(self, name: str, s: str) -> str:
     174          if s in strings:
     175              return f"&_Py_STR({strings[s]})"
     176          if s in identifiers:
     177              return f"&_Py_ID({s})"
     178          if len(s) == 1:
     179              c = ord(s)
     180              if c < 128:
     181                  return f"(PyObject *)&_Py_SINGLETON(strings).ascii[{c}]"
     182              elif c < 256:
     183                  return f"(PyObject *)&_Py_SINGLETON(strings).latin1[{c - 128}]"
     184          if re.match(r'\A[A-Za-z0-9_]+\Z', s):
     185              name = f"const_str_{s}"
     186          kind, ascii = analyze_character_width(s)
     187          if kind == PyUnicode_1BYTE_KIND:
     188              datatype = "uint8_t"
     189          elif kind == PyUnicode_2BYTE_KIND:
     190              datatype = "uint16_t"
     191          else:
     192              datatype = "uint32_t"
     193          self.write("static")
     194          with self.indent():
     195              with self.block("struct"):
     196                  if ascii:
     197                      self.write("PyASCIIObject _ascii;")
     198                  else:
     199                      self.write("PyCompactUnicodeObject _compact;")
     200                  self.write(f"{datatype} _data[{len(s)+1}];")
     201          with self.block(f"{name} =", ";"):
     202              if ascii:
     203                  with self.block("._ascii =", ","):
     204                      self.object_head("PyUnicode_Type")
     205                      self.write(f".length = {len(s)},")
     206                      self.write(".hash = -1,")
     207                      with self.block(".state =", ","):
     208                          self.write(".kind = 1,")
     209                          self.write(".compact = 1,")
     210                          self.write(".ascii = 1,")
     211                  self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
     212                  return f"& {name}._ascii.ob_base"
     213              else:
     214                  with self.block("._compact =", ","):
     215                      with self.block("._base =", ","):
     216                          self.object_head("PyUnicode_Type")
     217                          self.write(f".length = {len(s)},")
     218                          self.write(".hash = -1,")
     219                          with self.block(".state =", ","):
     220                              self.write(f".kind = {kind},")
     221                              self.write(".compact = 1,")
     222                              self.write(".ascii = 0,")
     223                      utf8 = s.encode('utf-8')
     224                      self.write(f'.utf8 = {make_string_literal(utf8)},')
     225                      self.write(f'.utf8_length = {len(utf8)},')
     226                  with self.block(f"._data =", ","):
     227                      for i in range(0, len(s), 16):
     228                          data = s[i:i+16]
     229                          self.write(", ".join(map(str, map(ord, data))) + ",")
     230                  return f"& {name}._compact._base.ob_base"
     231  
     232  
     233      def generate_code(self, name: str, code: types.CodeType) -> str:
     234          global next_code_version
     235          # The ordering here matches PyCode_NewWithPosOnlyArgs()
     236          # (but see below).
     237          co_consts = self.generate(name + "_consts", code.co_consts)
     238          co_names = self.generate(name + "_names", code.co_names)
     239          co_filename = self.generate(name + "_filename", code.co_filename)
     240          co_name = self.generate(name + "_name", code.co_name)
     241          co_qualname = self.generate(name + "_qualname", code.co_qualname)
     242          co_linetable = self.generate(name + "_linetable", code.co_linetable)
     243          co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
     244          # These fields are not directly accessible
     245          localsplusnames, localspluskinds = get_localsplus(code)
     246          co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
     247          co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
     248          # Derived values
     249          nlocals, ncellvars, nfreevars = \
     250              get_localsplus_counts(code, localsplusnames, localspluskinds)
     251          co_code_adaptive = make_string_literal(code.co_code)
     252          self.write("static")
     253          with self.indent():
     254              self.write(f"struct _PyCode_DEF({len(code.co_code)})")
     255          with self.block(f"{name} =", ";"):
     256              self.object_var_head("PyCode_Type", len(code.co_code) // 2)
     257              # But the ordering here must match that in cpython/code.h
     258              # (which is a pain because we tend to reorder those for perf)
     259              # otherwise MSVC doesn't like it.
     260              self.write(f".co_consts = {co_consts},")
     261              self.write(f".co_names = {co_names},")
     262              self.write(f".co_exceptiontable = {co_exceptiontable},")
     263              self.field(code, "co_flags")
     264              self.field(code, "co_argcount")
     265              self.field(code, "co_posonlyargcount")
     266              self.field(code, "co_kwonlyargcount")
     267              # The following should remain in sync with _PyFrame_NumSlotsForCodeObject
     268              self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
     269              self.field(code, "co_stacksize")
     270              self.field(code, "co_firstlineno")
     271              self.write(f".co_nlocalsplus = {len(localsplusnames)},")
     272              self.field(code, "co_nlocals")
     273              self.write(f".co_ncellvars = {ncellvars},")
     274              self.write(f".co_nfreevars = {nfreevars},")
     275              self.write(f".co_version = {next_code_version},")
     276              next_code_version += 1
     277              self.write(f".co_localsplusnames = {co_localsplusnames},")
     278              self.write(f".co_localspluskinds = {co_localspluskinds},")
     279              self.write(f".co_filename = {co_filename},")
     280              self.write(f".co_name = {co_name},")
     281              self.write(f".co_qualname = {co_qualname},")
     282              self.write(f".co_linetable = {co_linetable},")
     283              self.write(f"._co_cached = NULL,")
     284              self.write(f".co_code_adaptive = {co_code_adaptive},")
     285              for i, op in enumerate(code.co_code[::2]):
     286                  if op == RESUME:
     287                      self.write(f"._co_firsttraceable = {i},")
     288                      break
     289          name_as_code = f"(PyCodeObject *)&{name}"
     290          self.finis.append(f"_PyStaticCode_Fini({name_as_code});")
     291          self.inits.append(f"_PyStaticCode_Init({name_as_code})")
     292          return f"& {name}.ob_base.ob_base"
     293  
     294      def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
     295          if len(t) == 0:
     296              return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
     297          items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
     298          self.write("static")
     299          with self.indent():
     300              with self.block("struct"):
     301                  self.write("PyGC_Head _gc_head;")
     302                  with self.block("struct", "_object;"):
     303                      self.write("PyObject_VAR_HEAD")
     304                      if t:
     305                          self.write(f"PyObject *ob_item[{len(t)}];")
     306          with self.block(f"{name} =", ";"):
     307              with self.block("._object =", ","):
     308                  self.object_var_head("PyTuple_Type", len(t))
     309                  if items:
     310                      with self.block(f".ob_item =", ","):
     311                          for item in items:
     312                              self.write(item + ",")
     313          return f"& {name}._object.ob_base.ob_base"
     314  
     315      def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
     316          sign = (i > 0) - (i < 0)
     317          i = abs(i)
     318          digits: list[int] = []
     319          while i:
     320              i, rem = divmod(i, digit)
     321              digits.append(rem)
     322          self.write("static")
     323          with self.indent():
     324              with self.block("struct"):
     325                  self.write("PyObject ob_base;")
     326                  self.write("uintptr_t lv_tag;")
     327                  self.write(f"digit ob_digit[{max(1, len(digits))}];")
     328          with self.block(f"{name} =", ";"):
     329              self.object_head("PyLong_Type")
     330              self.write(f".lv_tag = TAG_FROM_SIGN_AND_SIZE({sign}, {len(digits)}),")
     331              if digits:
     332                  ds = ", ".join(map(str, digits))
     333                  self.write(f".ob_digit = {{ {ds} }},")
     334  
     335      def generate_int(self, name: str, i: int) -> str:
     336          if -5 <= i <= 256:
     337              return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
     338          if i >= 0:
     339              name = f"const_int_{i}"
     340          else:
     341              name = f"const_int_negative_{abs(i)}"
     342          if abs(i) < 2**15:
     343              self._generate_int_for_bits(name, i, 2**15)
     344          else:
     345              connective = "if"
     346              for bits_in_digit in 15, 30:
     347                  self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
     348                  self._generate_int_for_bits(name, i, 2**bits_in_digit)
     349                  connective = "elif"
     350              self.write("#else")
     351              self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
     352              self.write("#endif")
     353              # If neither clause applies, it won't compile
     354          return f"& {name}.ob_base"
     355  
     356      def generate_float(self, name: str, x: float) -> str:
     357          with self.block(f"static PyFloatObject {name} =", ";"):
     358              self.object_head("PyFloat_Type")
     359              self.write(f".ob_fval = {x},")
     360          return f"&{name}.ob_base"
     361  
     362      def generate_complex(self, name: str, z: complex) -> str:
     363          with self.block(f"static PyComplexObject {name} =", ";"):
     364              self.object_head("PyComplex_Type")
     365              self.write(f".cval = {{ {z.real}, {z.imag} }},")
     366          return f"&{name}.ob_base"
     367  
     368      def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
     369          try:
     370              fs = sorted(fs)
     371          except TypeError:
     372              # frozen set with incompatible types, fallback to repr()
     373              fs = sorted(fs, key=repr)
     374          ret = self.generate_tuple(name, tuple(fs))
     375          self.write("// TODO: The above tuple should be a frozenset")
     376          return ret
     377  
     378      def generate_file(self, module: str, code: object)-> None:
     379          module = module.replace(".", "_")
     380          self.generate(f"{module}_toplevel", code)
     381          self.write(EPILOGUE.format(name=module))
     382  
     383      def generate(self, name: str, obj: object) -> str:
     384          # Use repr() in the key to distinguish -0.0 from +0.0
     385          key = (type(obj), obj, repr(obj))
     386          if key in self.cache:
     387              self.hits += 1
     388              # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
     389              return self.cache[key]
     390          self.misses += 1
     391          if isinstance(obj, (types.CodeType, umarshal.Code)) :
     392              val = self.generate_code(name, obj)
     393          elif isinstance(obj, tuple):
     394              val = self.generate_tuple(name, obj)
     395          elif isinstance(obj, str):
     396              val = self.generate_unicode(name, obj)
     397          elif isinstance(obj, bytes):
     398              val = self.generate_bytes(name, obj)
     399          elif obj is True:
     400              return "Py_True"
     401          elif obj is False:
     402              return "Py_False"
     403          elif isinstance(obj, int):
     404              val = self.generate_int(name, obj)
     405          elif isinstance(obj, float):
     406              val = self.generate_float(name, obj)
     407          elif isinstance(obj, complex):
     408              val = self.generate_complex(name, obj)
     409          elif isinstance(obj, frozenset):
     410              val = self.generate_frozenset(name, obj)
     411          elif obj is builtins.Ellipsis:
     412              return "Py_Ellipsis"
     413          elif obj is None:
     414              return "Py_None"
     415          else:
     416              raise TypeError(
     417                  f"Cannot generate code for {type(obj).__name__} object")
     418          # print(f"Cache store {key!r:.40}: {val!r:.40}")
     419          self.cache[key] = val
     420          return val
     421  
     422  
     423  EPILOGUE = """
     424  PyObject *
     425  _Py_get_{name}_toplevel(void)
     426  {{
     427      return Py_NewRef((PyObject *) &{name}_toplevel);
     428  }}
     429  """
     430  
     431  FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
     432  FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
     433  
     434  FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
     435  
     436  
     437  def is_frozen_header(source: str) -> bool:
     438      return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
     439  
     440  
     441  def decode_frozen_data(source: str) -> types.CodeType:
     442      lines = source.splitlines()
     443      while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
     444          del lines[0]
     445      while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
     446          del lines[-1]
     447      values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
     448      data = bytes(values)
     449      return umarshal.loads(data)
     450  
     451  
     452  def generate(args: list[str], output: TextIO) -> None:
     453      printer = Printer(output)
     454      for arg in args:
     455          file, modname = arg.rsplit(':', 1)
     456          with open(file, "r", encoding="utf8") as fd:
     457              source = fd.read()
     458              if is_frozen_header(source):
     459                  code = decode_frozen_data(source)
     460              else:
     461                  code = compile(fd.read(), f"<frozen {modname}>", "exec")
     462              printer.generate_file(modname, code)
     463      with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
     464          for p in printer.finis:
     465              printer.write(p)
     466      with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
     467          for p in printer.inits:
     468              with printer.block(f"if ({p} < 0)"):
     469                  printer.write("return -1;")
     470          printer.write("return 0;")
     471      printer.write(f"\nuint32_t _Py_next_func_version = {next_code_version};\n")
     472      if verbose:
     473          print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
     474  
     475  
     476  parser = argparse.ArgumentParser()
     477  parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
     478  parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
     479  parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
     480  
     481  @contextlib.contextmanager
     482  def report_time(label: str):
     483      t0 = time.time()
     484      try:
     485          yield
     486      finally:
     487          t1 = time.time()
     488      if verbose:
     489          print(f"{label}: {t1-t0:.3f} sec")
     490  
     491  
     492  def main() -> None:
     493      global verbose
     494      args = parser.parse_args()
     495      verbose = args.verbose
     496      output = args.output
     497      with open(output, "w", encoding="utf-8") as file:
     498          with report_time("generate"):
     499              generate(args.args, file)
     500      if verbose:
     501          print(f"Wrote {os.path.getsize(output)} bytes to {output}")
     502  
     503  
     504  if __name__ == "__main__":
     505      main()