1 """Deep freeze
2
3 The script may be executed by _bootstrap_python interpreter.
4 Shared library extension modules are not available in that case.
5 On Windows, and in cross-compilation cases, it is executed
6 by Python 3.10, and 3.11 features are not available.
7 """
8 import argparse
9 import ast
10 import builtins
11 import collections
12 import contextlib
13 import os
14 import re
15 import time
16 import types
17 from typing import Dict, FrozenSet, TextIO, Tuple
18
19 import umarshal
20 from generate_global_objects import get_identifiers_and_strings
21
22 verbose = False
23 identifiers, strings = get_identifiers_and_strings()
24
25 # This must be kept in sync with opcode.py
26 RESUME = 151
27
28 def isprintable(b: bytes) -> bool:
29 return all(0x20 <= c < 0x7f for c in b)
30
31
32 def make_string_literal(b: bytes) -> str:
33 res = ['"']
34 if isprintable(b):
35 res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
36 else:
37 for i in b:
38 res.append(f"\\x{i:02x}")
39 res.append('"')
40 return "".join(res)
41
42
43 CO_FAST_LOCAL = 0x20
44 CO_FAST_CELL = 0x40
45 CO_FAST_FREE = 0x80
46
47 next_code_version = 1
48
49 def get_localsplus(code: types.CodeType):
50 a = collections.defaultdict(int)
51 for name in code.co_varnames:
52 a[name] |= CO_FAST_LOCAL
53 for name in code.co_cellvars:
54 a[name] |= CO_FAST_CELL
55 for name in code.co_freevars:
56 a[name] |= CO_FAST_FREE
57 return tuple(a.keys()), bytes(a.values())
58
59
60 def get_localsplus_counts(code: types.CodeType,
61 names: Tuple[str, ...],
62 kinds: bytes) -> Tuple[int, int, int, int]:
63 nlocals = 0
64 ncellvars = 0
65 nfreevars = 0
66 assert len(names) == len(kinds)
67 for name, kind in zip(names, kinds):
68 if kind & CO_FAST_LOCAL:
69 nlocals += 1
70 if kind & CO_FAST_CELL:
71 ncellvars += 1
72 elif kind & CO_FAST_CELL:
73 ncellvars += 1
74 elif kind & CO_FAST_FREE:
75 nfreevars += 1
76 assert nlocals == len(code.co_varnames) == code.co_nlocals, \
77 (nlocals, len(code.co_varnames), code.co_nlocals)
78 assert ncellvars == len(code.co_cellvars)
79 assert nfreevars == len(code.co_freevars)
80 return nlocals, ncellvars, nfreevars
81
82
83 PyUnicode_1BYTE_KIND = 1
84 PyUnicode_2BYTE_KIND = 2
85 PyUnicode_4BYTE_KIND = 4
86
87
88 def analyze_character_width(s: str) -> Tuple[int, bool]:
89 maxchar = ' '
90 for c in s:
91 maxchar = max(maxchar, c)
92 ascii = False
93 if maxchar <= '\xFF':
94 kind = PyUnicode_1BYTE_KIND
95 ascii = maxchar <= '\x7F'
96 elif maxchar <= '\uFFFF':
97 kind = PyUnicode_2BYTE_KIND
98 else:
99 kind = PyUnicode_4BYTE_KIND
100 return kind, ascii
101
102
103 def removesuffix(base: str, suffix: str) -> str:
104 if base.endswith(suffix):
105 return base[:len(base) - len(suffix)]
106 return base
107
108 class ESC[4;38;5;81mPrinter:
109
110 def __init__(self, file: TextIO) -> None:
111 self.level = 0
112 self.file = file
113 self.cache: Dict[tuple[type, object, str], str] = {}
114 self.hits, self.misses = 0, 0
115 self.finis: list[str] = []
116 self.inits: list[str] = []
117 self.write('#include "Python.h"')
118 self.write('#include "internal/pycore_gc.h"')
119 self.write('#include "internal/pycore_code.h"')
120 self.write('#include "internal/pycore_frame.h"')
121 self.write('#include "internal/pycore_long.h"')
122 self.write("")
123
124 @contextlib.contextmanager
125 def indent(self) -> None:
126 save_level = self.level
127 try:
128 self.level += 1
129 yield
130 finally:
131 self.level = save_level
132
133 def write(self, arg: str) -> None:
134 self.file.writelines((" "*self.level, arg, "\n"))
135
136 @contextlib.contextmanager
137 def block(self, prefix: str, suffix: str = "") -> None:
138 self.write(prefix + " {")
139 with self.indent():
140 yield
141 self.write("}" + suffix)
142
143 def object_head(self, typename: str) -> None:
144 with self.block(".ob_base =", ","):
145 self.write(f".ob_refcnt = _Py_IMMORTAL_REFCNT,")
146 self.write(f".ob_type = &{typename},")
147
148 def object_var_head(self, typename: str, size: int) -> None:
149 with self.block(".ob_base =", ","):
150 self.object_head(typename)
151 self.write(f".ob_size = {size},")
152
153 def field(self, obj: object, name: str) -> None:
154 self.write(f".{name} = {getattr(obj, name)},")
155
156 def generate_bytes(self, name: str, b: bytes) -> str:
157 if b == b"":
158 return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
159 if len(b) == 1:
160 return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
161 self.write("static")
162 with self.indent():
163 with self.block("struct"):
164 self.write("PyObject_VAR_HEAD")
165 self.write("Py_hash_t ob_shash;")
166 self.write(f"char ob_sval[{len(b) + 1}];")
167 with self.block(f"{name} =", ";"):
168 self.object_var_head("PyBytes_Type", len(b))
169 self.write(".ob_shash = -1,")
170 self.write(f".ob_sval = {make_string_literal(b)},")
171 return f"& {name}.ob_base.ob_base"
172
173 def generate_unicode(self, name: str, s: str) -> str:
174 if s in strings:
175 return f"&_Py_STR({strings[s]})"
176 if s in identifiers:
177 return f"&_Py_ID({s})"
178 if len(s) == 1:
179 c = ord(s)
180 if c < 128:
181 return f"(PyObject *)&_Py_SINGLETON(strings).ascii[{c}]"
182 elif c < 256:
183 return f"(PyObject *)&_Py_SINGLETON(strings).latin1[{c - 128}]"
184 if re.match(r'\A[A-Za-z0-9_]+\Z', s):
185 name = f"const_str_{s}"
186 kind, ascii = analyze_character_width(s)
187 if kind == PyUnicode_1BYTE_KIND:
188 datatype = "uint8_t"
189 elif kind == PyUnicode_2BYTE_KIND:
190 datatype = "uint16_t"
191 else:
192 datatype = "uint32_t"
193 self.write("static")
194 with self.indent():
195 with self.block("struct"):
196 if ascii:
197 self.write("PyASCIIObject _ascii;")
198 else:
199 self.write("PyCompactUnicodeObject _compact;")
200 self.write(f"{datatype} _data[{len(s)+1}];")
201 with self.block(f"{name} =", ";"):
202 if ascii:
203 with self.block("._ascii =", ","):
204 self.object_head("PyUnicode_Type")
205 self.write(f".length = {len(s)},")
206 self.write(".hash = -1,")
207 with self.block(".state =", ","):
208 self.write(".kind = 1,")
209 self.write(".compact = 1,")
210 self.write(".ascii = 1,")
211 self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
212 return f"& {name}._ascii.ob_base"
213 else:
214 with self.block("._compact =", ","):
215 with self.block("._base =", ","):
216 self.object_head("PyUnicode_Type")
217 self.write(f".length = {len(s)},")
218 self.write(".hash = -1,")
219 with self.block(".state =", ","):
220 self.write(f".kind = {kind},")
221 self.write(".compact = 1,")
222 self.write(".ascii = 0,")
223 utf8 = s.encode('utf-8')
224 self.write(f'.utf8 = {make_string_literal(utf8)},')
225 self.write(f'.utf8_length = {len(utf8)},')
226 with self.block(f"._data =", ","):
227 for i in range(0, len(s), 16):
228 data = s[i:i+16]
229 self.write(", ".join(map(str, map(ord, data))) + ",")
230 return f"& {name}._compact._base.ob_base"
231
232
233 def generate_code(self, name: str, code: types.CodeType) -> str:
234 global next_code_version
235 # The ordering here matches PyCode_NewWithPosOnlyArgs()
236 # (but see below).
237 co_consts = self.generate(name + "_consts", code.co_consts)
238 co_names = self.generate(name + "_names", code.co_names)
239 co_filename = self.generate(name + "_filename", code.co_filename)
240 co_name = self.generate(name + "_name", code.co_name)
241 co_qualname = self.generate(name + "_qualname", code.co_qualname)
242 co_linetable = self.generate(name + "_linetable", code.co_linetable)
243 co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
244 # These fields are not directly accessible
245 localsplusnames, localspluskinds = get_localsplus(code)
246 co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
247 co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
248 # Derived values
249 nlocals, ncellvars, nfreevars = \
250 get_localsplus_counts(code, localsplusnames, localspluskinds)
251 co_code_adaptive = make_string_literal(code.co_code)
252 self.write("static")
253 with self.indent():
254 self.write(f"struct _PyCode_DEF({len(code.co_code)})")
255 with self.block(f"{name} =", ";"):
256 self.object_var_head("PyCode_Type", len(code.co_code) // 2)
257 # But the ordering here must match that in cpython/code.h
258 # (which is a pain because we tend to reorder those for perf)
259 # otherwise MSVC doesn't like it.
260 self.write(f".co_consts = {co_consts},")
261 self.write(f".co_names = {co_names},")
262 self.write(f".co_exceptiontable = {co_exceptiontable},")
263 self.field(code, "co_flags")
264 self.field(code, "co_argcount")
265 self.field(code, "co_posonlyargcount")
266 self.field(code, "co_kwonlyargcount")
267 # The following should remain in sync with _PyFrame_NumSlotsForCodeObject
268 self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
269 self.field(code, "co_stacksize")
270 self.field(code, "co_firstlineno")
271 self.write(f".co_nlocalsplus = {len(localsplusnames)},")
272 self.field(code, "co_nlocals")
273 self.write(f".co_ncellvars = {ncellvars},")
274 self.write(f".co_nfreevars = {nfreevars},")
275 self.write(f".co_version = {next_code_version},")
276 next_code_version += 1
277 self.write(f".co_localsplusnames = {co_localsplusnames},")
278 self.write(f".co_localspluskinds = {co_localspluskinds},")
279 self.write(f".co_filename = {co_filename},")
280 self.write(f".co_name = {co_name},")
281 self.write(f".co_qualname = {co_qualname},")
282 self.write(f".co_linetable = {co_linetable},")
283 self.write(f"._co_cached = NULL,")
284 self.write(f".co_code_adaptive = {co_code_adaptive},")
285 for i, op in enumerate(code.co_code[::2]):
286 if op == RESUME:
287 self.write(f"._co_firsttraceable = {i},")
288 break
289 name_as_code = f"(PyCodeObject *)&{name}"
290 self.finis.append(f"_PyStaticCode_Fini({name_as_code});")
291 self.inits.append(f"_PyStaticCode_Init({name_as_code})")
292 return f"& {name}.ob_base.ob_base"
293
294 def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
295 if len(t) == 0:
296 return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
297 items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
298 self.write("static")
299 with self.indent():
300 with self.block("struct"):
301 self.write("PyGC_Head _gc_head;")
302 with self.block("struct", "_object;"):
303 self.write("PyObject_VAR_HEAD")
304 if t:
305 self.write(f"PyObject *ob_item[{len(t)}];")
306 with self.block(f"{name} =", ";"):
307 with self.block("._object =", ","):
308 self.object_var_head("PyTuple_Type", len(t))
309 if items:
310 with self.block(f".ob_item =", ","):
311 for item in items:
312 self.write(item + ",")
313 return f"& {name}._object.ob_base.ob_base"
314
315 def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
316 sign = (i > 0) - (i < 0)
317 i = abs(i)
318 digits: list[int] = []
319 while i:
320 i, rem = divmod(i, digit)
321 digits.append(rem)
322 self.write("static")
323 with self.indent():
324 with self.block("struct"):
325 self.write("PyObject ob_base;")
326 self.write("uintptr_t lv_tag;")
327 self.write(f"digit ob_digit[{max(1, len(digits))}];")
328 with self.block(f"{name} =", ";"):
329 self.object_head("PyLong_Type")
330 self.write(f".lv_tag = TAG_FROM_SIGN_AND_SIZE({sign}, {len(digits)}),")
331 if digits:
332 ds = ", ".join(map(str, digits))
333 self.write(f".ob_digit = {{ {ds} }},")
334
335 def generate_int(self, name: str, i: int) -> str:
336 if -5 <= i <= 256:
337 return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
338 if i >= 0:
339 name = f"const_int_{i}"
340 else:
341 name = f"const_int_negative_{abs(i)}"
342 if abs(i) < 2**15:
343 self._generate_int_for_bits(name, i, 2**15)
344 else:
345 connective = "if"
346 for bits_in_digit in 15, 30:
347 self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
348 self._generate_int_for_bits(name, i, 2**bits_in_digit)
349 connective = "elif"
350 self.write("#else")
351 self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
352 self.write("#endif")
353 # If neither clause applies, it won't compile
354 return f"& {name}.ob_base"
355
356 def generate_float(self, name: str, x: float) -> str:
357 with self.block(f"static PyFloatObject {name} =", ";"):
358 self.object_head("PyFloat_Type")
359 self.write(f".ob_fval = {x},")
360 return f"&{name}.ob_base"
361
362 def generate_complex(self, name: str, z: complex) -> str:
363 with self.block(f"static PyComplexObject {name} =", ";"):
364 self.object_head("PyComplex_Type")
365 self.write(f".cval = {{ {z.real}, {z.imag} }},")
366 return f"&{name}.ob_base"
367
368 def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
369 try:
370 fs = sorted(fs)
371 except TypeError:
372 # frozen set with incompatible types, fallback to repr()
373 fs = sorted(fs, key=repr)
374 ret = self.generate_tuple(name, tuple(fs))
375 self.write("// TODO: The above tuple should be a frozenset")
376 return ret
377
378 def generate_file(self, module: str, code: object)-> None:
379 module = module.replace(".", "_")
380 self.generate(f"{module}_toplevel", code)
381 self.write(EPILOGUE.format(name=module))
382
383 def generate(self, name: str, obj: object) -> str:
384 # Use repr() in the key to distinguish -0.0 from +0.0
385 key = (type(obj), obj, repr(obj))
386 if key in self.cache:
387 self.hits += 1
388 # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
389 return self.cache[key]
390 self.misses += 1
391 if isinstance(obj, (types.CodeType, umarshal.Code)) :
392 val = self.generate_code(name, obj)
393 elif isinstance(obj, tuple):
394 val = self.generate_tuple(name, obj)
395 elif isinstance(obj, str):
396 val = self.generate_unicode(name, obj)
397 elif isinstance(obj, bytes):
398 val = self.generate_bytes(name, obj)
399 elif obj is True:
400 return "Py_True"
401 elif obj is False:
402 return "Py_False"
403 elif isinstance(obj, int):
404 val = self.generate_int(name, obj)
405 elif isinstance(obj, float):
406 val = self.generate_float(name, obj)
407 elif isinstance(obj, complex):
408 val = self.generate_complex(name, obj)
409 elif isinstance(obj, frozenset):
410 val = self.generate_frozenset(name, obj)
411 elif obj is builtins.Ellipsis:
412 return "Py_Ellipsis"
413 elif obj is None:
414 return "Py_None"
415 else:
416 raise TypeError(
417 f"Cannot generate code for {type(obj).__name__} object")
418 # print(f"Cache store {key!r:.40}: {val!r:.40}")
419 self.cache[key] = val
420 return val
421
422
423 EPILOGUE = """
424 PyObject *
425 _Py_get_{name}_toplevel(void)
426 {{
427 return Py_NewRef((PyObject *) &{name}_toplevel);
428 }}
429 """
430
431 FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
432 FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
433
434 FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
435
436
437 def is_frozen_header(source: str) -> bool:
438 return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
439
440
441 def decode_frozen_data(source: str) -> types.CodeType:
442 lines = source.splitlines()
443 while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
444 del lines[0]
445 while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
446 del lines[-1]
447 values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
448 data = bytes(values)
449 return umarshal.loads(data)
450
451
452 def generate(args: list[str], output: TextIO) -> None:
453 printer = Printer(output)
454 for arg in args:
455 file, modname = arg.rsplit(':', 1)
456 with open(file, "r", encoding="utf8") as fd:
457 source = fd.read()
458 if is_frozen_header(source):
459 code = decode_frozen_data(source)
460 else:
461 code = compile(fd.read(), f"<frozen {modname}>", "exec")
462 printer.generate_file(modname, code)
463 with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
464 for p in printer.finis:
465 printer.write(p)
466 with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
467 for p in printer.inits:
468 with printer.block(f"if ({p} < 0)"):
469 printer.write("return -1;")
470 printer.write("return 0;")
471 printer.write(f"\nuint32_t _Py_next_func_version = {next_code_version};\n")
472 if verbose:
473 print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
474
475
476 parser = argparse.ArgumentParser()
477 parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
478 parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
479 parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
480
481 @contextlib.contextmanager
482 def report_time(label: str):
483 t0 = time.time()
484 try:
485 yield
486 finally:
487 t1 = time.time()
488 if verbose:
489 print(f"{label}: {t1-t0:.3f} sec")
490
491
492 def main() -> None:
493 global verbose
494 args = parser.parse_args()
495 verbose = args.verbose
496 output = args.output
497 with open(output, "w", encoding="utf-8") as file:
498 with report_time("generate"):
499 generate(args.args, file)
500 if verbose:
501 print(f"Wrote {os.path.getsize(output)} bytes to {output}")
502
503
504 if __name__ == "__main__":
505 main()