1 """Deep freeze
2
3 The script may be executed by _bootstrap_python interpreter.
4 Shared library extension modules are not available in that case.
5 On Windows, and in cross-compilation cases, it is executed
6 by Python 3.10, and 3.11 features are not available.
7 """
8 import argparse
9 import ast
10 import builtins
11 import collections
12 import contextlib
13 import os
14 import re
15 import time
16 import types
17 from typing import Dict, FrozenSet, TextIO, Tuple
18
19 import umarshal
20 from generate_global_objects import get_identifiers_and_strings
21
22 verbose = False
23 identifiers, strings = get_identifiers_and_strings()
24
25 # This must be kept in sync with opcode.py
26 RESUME = 151
27
28 def isprintable(b: bytes) -> bool:
29 return all(0x20 <= c < 0x7f for c in b)
30
31
32 def make_string_literal(b: bytes) -> str:
33 res = ['"']
34 if isprintable(b):
35 res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
36 else:
37 for i in b:
38 res.append(f"\\x{i:02x}")
39 res.append('"')
40 return "".join(res)
41
42
43 CO_FAST_LOCAL = 0x20
44 CO_FAST_CELL = 0x40
45 CO_FAST_FREE = 0x80
46
47
48 def get_localsplus(code: types.CodeType):
49 a = collections.defaultdict(int)
50 for name in code.co_varnames:
51 a[name] |= CO_FAST_LOCAL
52 for name in code.co_cellvars:
53 a[name] |= CO_FAST_CELL
54 for name in code.co_freevars:
55 a[name] |= CO_FAST_FREE
56 return tuple(a.keys()), bytes(a.values())
57
58
59 def get_localsplus_counts(code: types.CodeType,
60 names: Tuple[str, ...],
61 kinds: bytes) -> Tuple[int, int, int, int]:
62 nlocals = 0
63 nplaincellvars = 0
64 ncellvars = 0
65 nfreevars = 0
66 assert len(names) == len(kinds)
67 for name, kind in zip(names, kinds):
68 if kind & CO_FAST_LOCAL:
69 nlocals += 1
70 if kind & CO_FAST_CELL:
71 ncellvars += 1
72 elif kind & CO_FAST_CELL:
73 ncellvars += 1
74 nplaincellvars += 1
75 elif kind & CO_FAST_FREE:
76 nfreevars += 1
77 assert nlocals == len(code.co_varnames) == code.co_nlocals, \
78 (nlocals, len(code.co_varnames), code.co_nlocals)
79 assert ncellvars == len(code.co_cellvars)
80 assert nfreevars == len(code.co_freevars)
81 assert len(names) == nlocals + nplaincellvars + nfreevars
82 return nlocals, nplaincellvars, ncellvars, nfreevars
83
84
85 PyUnicode_1BYTE_KIND = 1
86 PyUnicode_2BYTE_KIND = 2
87 PyUnicode_4BYTE_KIND = 4
88
89
90 def analyze_character_width(s: str) -> Tuple[int, bool]:
91 maxchar = ' '
92 for c in s:
93 maxchar = max(maxchar, c)
94 ascii = False
95 if maxchar <= '\xFF':
96 kind = PyUnicode_1BYTE_KIND
97 ascii = maxchar <= '\x7F'
98 elif maxchar <= '\uFFFF':
99 kind = PyUnicode_2BYTE_KIND
100 else:
101 kind = PyUnicode_4BYTE_KIND
102 return kind, ascii
103
104
105 def removesuffix(base: str, suffix: str) -> str:
106 if base.endswith(suffix):
107 return base[:len(base) - len(suffix)]
108 return base
109
110 class ESC[4;38;5;81mPrinter:
111
112 def __init__(self, file: TextIO) -> None:
113 self.level = 0
114 self.file = file
115 self.cache: Dict[tuple[type, object, str], str] = {}
116 self.hits, self.misses = 0, 0
117 self.patchups: list[str] = []
118 self.deallocs: list[str] = []
119 self.interns: list[str] = []
120 self.write('#include "Python.h"')
121 self.write('#include "internal/pycore_gc.h"')
122 self.write('#include "internal/pycore_code.h"')
123 self.write('#include "internal/pycore_long.h"')
124 self.write("")
125
126 @contextlib.contextmanager
127 def indent(self) -> None:
128 save_level = self.level
129 try:
130 self.level += 1
131 yield
132 finally:
133 self.level = save_level
134
135 def write(self, arg: str) -> None:
136 self.file.writelines((" "*self.level, arg, "\n"))
137
138 @contextlib.contextmanager
139 def block(self, prefix: str, suffix: str = "") -> None:
140 self.write(prefix + " {")
141 with self.indent():
142 yield
143 self.write("}" + suffix)
144
145 def object_head(self, typename: str) -> None:
146 with self.block(".ob_base =", ","):
147 self.write(f".ob_refcnt = 999999999,")
148 self.write(f".ob_type = &{typename},")
149
150 def object_var_head(self, typename: str, size: int) -> None:
151 with self.block(".ob_base =", ","):
152 self.object_head(typename)
153 self.write(f".ob_size = {size},")
154
155 def field(self, obj: object, name: str) -> None:
156 self.write(f".{name} = {getattr(obj, name)},")
157
158 def generate_bytes(self, name: str, b: bytes) -> str:
159 if b == b"":
160 return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
161 if len(b) == 1:
162 return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
163 self.write("static")
164 with self.indent():
165 with self.block("struct"):
166 self.write("PyObject_VAR_HEAD")
167 self.write("Py_hash_t ob_shash;")
168 self.write(f"char ob_sval[{len(b) + 1}];")
169 with self.block(f"{name} =", ";"):
170 self.object_var_head("PyBytes_Type", len(b))
171 self.write(".ob_shash = -1,")
172 self.write(f".ob_sval = {make_string_literal(b)},")
173 return f"& {name}.ob_base.ob_base"
174
175 def generate_unicode(self, name: str, s: str) -> str:
176 if s in strings:
177 return f"&_Py_STR({strings[s]})"
178 if s in identifiers:
179 return f"&_Py_ID({s})"
180 if re.match(r'\A[A-Za-z0-9_]+\Z', s):
181 name = f"const_str_{s}"
182 kind, ascii = analyze_character_width(s)
183 if kind == PyUnicode_1BYTE_KIND:
184 datatype = "uint8_t"
185 elif kind == PyUnicode_2BYTE_KIND:
186 datatype = "uint16_t"
187 else:
188 datatype = "uint32_t"
189 self.write("static")
190 with self.indent():
191 with self.block("struct"):
192 if ascii:
193 self.write("PyASCIIObject _ascii;")
194 else:
195 self.write("PyCompactUnicodeObject _compact;")
196 self.write(f"{datatype} _data[{len(s)+1}];")
197 self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
198 with self.block(f"{name} =", ";"):
199 if ascii:
200 with self.block("._ascii =", ","):
201 self.object_head("PyUnicode_Type")
202 self.write(f".length = {len(s)},")
203 self.write(".hash = -1,")
204 with self.block(".state =", ","):
205 self.write(".kind = 1,")
206 self.write(".compact = 1,")
207 self.write(".ascii = 1,")
208 self.write(".ready = 1,")
209 self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
210 return f"& {name}._ascii.ob_base"
211 else:
212 with self.block("._compact =", ","):
213 with self.block("._base =", ","):
214 self.object_head("PyUnicode_Type")
215 self.write(f".length = {len(s)},")
216 self.write(".hash = -1,")
217 with self.block(".state =", ","):
218 self.write(f".kind = {kind},")
219 self.write(".compact = 1,")
220 self.write(".ascii = 0,")
221 self.write(".ready = 1,")
222 with self.block(f"._data =", ","):
223 for i in range(0, len(s), 16):
224 data = s[i:i+16]
225 self.write(", ".join(map(str, map(ord, data))) + ",")
226 if kind == PyUnicode_2BYTE_KIND:
227 self.patchups.append("if (sizeof(wchar_t) == 2) {")
228 self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
229 self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
230 self.patchups.append("}")
231 if kind == PyUnicode_4BYTE_KIND:
232 self.patchups.append("if (sizeof(wchar_t) == 4) {")
233 self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
234 self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
235 self.patchups.append("}")
236 return f"& {name}._compact._base.ob_base"
237
238
239 def generate_code(self, name: str, code: types.CodeType) -> str:
240 # The ordering here matches PyCode_NewWithPosOnlyArgs()
241 # (but see below).
242 co_consts = self.generate(name + "_consts", code.co_consts)
243 co_names = self.generate(name + "_names", code.co_names)
244 co_filename = self.generate(name + "_filename", code.co_filename)
245 co_name = self.generate(name + "_name", code.co_name)
246 co_qualname = self.generate(name + "_qualname", code.co_qualname)
247 co_linetable = self.generate(name + "_linetable", code.co_linetable)
248 co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
249 # These fields are not directly accessible
250 localsplusnames, localspluskinds = get_localsplus(code)
251 co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
252 co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
253 # Derived values
254 nlocals, nplaincellvars, ncellvars, nfreevars = \
255 get_localsplus_counts(code, localsplusnames, localspluskinds)
256 co_code_adaptive = make_string_literal(code.co_code)
257 self.write("static")
258 with self.indent():
259 self.write(f"struct _PyCode_DEF({len(code.co_code)})")
260 with self.block(f"{name} =", ";"):
261 self.object_var_head("PyCode_Type", len(code.co_code) // 2)
262 # But the ordering here must match that in cpython/code.h
263 # (which is a pain because we tend to reorder those for perf)
264 # otherwise MSVC doesn't like it.
265 self.write(f".co_consts = {co_consts},")
266 self.write(f".co_names = {co_names},")
267 self.write(f".co_exceptiontable = {co_exceptiontable},")
268 self.field(code, "co_flags")
269 self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
270 self.write("._co_linearray_entry_size = 0,")
271 self.field(code, "co_argcount")
272 self.field(code, "co_posonlyargcount")
273 self.field(code, "co_kwonlyargcount")
274 self.field(code, "co_stacksize")
275 self.field(code, "co_firstlineno")
276 self.write(f".co_nlocalsplus = {len(localsplusnames)},")
277 self.field(code, "co_nlocals")
278 self.write(f".co_nplaincellvars = {nplaincellvars},")
279 self.write(f".co_ncellvars = {ncellvars},")
280 self.write(f".co_nfreevars = {nfreevars},")
281 self.write(f".co_localsplusnames = {co_localsplusnames},")
282 self.write(f".co_localspluskinds = {co_localspluskinds},")
283 self.write(f".co_filename = {co_filename},")
284 self.write(f".co_name = {co_name},")
285 self.write(f".co_qualname = {co_qualname},")
286 self.write(f".co_linetable = {co_linetable},")
287 self.write(f"._co_code = NULL,")
288 self.write("._co_linearray = NULL,")
289 self.write(f".co_code_adaptive = {co_code_adaptive},")
290 for i, op in enumerate(code.co_code[::2]):
291 if op == RESUME:
292 self.write(f"._co_firsttraceable = {i},")
293 break
294 name_as_code = f"(PyCodeObject *)&{name}"
295 self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
296 self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
297 return f"& {name}.ob_base.ob_base"
298
299 def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
300 if len(t) == 0:
301 return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
302 items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
303 self.write("static")
304 with self.indent():
305 with self.block("struct"):
306 self.write("PyGC_Head _gc_head;")
307 with self.block("struct", "_object;"):
308 self.write("PyObject_VAR_HEAD")
309 if t:
310 self.write(f"PyObject *ob_item[{len(t)}];")
311 with self.block(f"{name} =", ";"):
312 with self.block("._object =", ","):
313 self.object_var_head("PyTuple_Type", len(t))
314 if items:
315 with self.block(f".ob_item =", ","):
316 for item in items:
317 self.write(item + ",")
318 return f"& {name}._object.ob_base.ob_base"
319
320 def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
321 sign = -1 if i < 0 else 0 if i == 0 else +1
322 i = abs(i)
323 digits: list[int] = []
324 while i:
325 i, rem = divmod(i, digit)
326 digits.append(rem)
327 self.write("static")
328 with self.indent():
329 with self.block("struct"):
330 self.write("PyObject_VAR_HEAD")
331 self.write(f"digit ob_digit[{max(1, len(digits))}];")
332 with self.block(f"{name} =", ";"):
333 self.object_var_head("PyLong_Type", sign*len(digits))
334 if digits:
335 ds = ", ".join(map(str, digits))
336 self.write(f".ob_digit = {{ {ds} }},")
337
338 def generate_int(self, name: str, i: int) -> str:
339 if -5 <= i <= 256:
340 return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
341 if i >= 0:
342 name = f"const_int_{i}"
343 else:
344 name = f"const_int_negative_{abs(i)}"
345 if abs(i) < 2**15:
346 self._generate_int_for_bits(name, i, 2**15)
347 else:
348 connective = "if"
349 for bits_in_digit in 15, 30:
350 self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
351 self._generate_int_for_bits(name, i, 2**bits_in_digit)
352 connective = "elif"
353 self.write("#else")
354 self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
355 self.write("#endif")
356 # If neither clause applies, it won't compile
357 return f"& {name}.ob_base.ob_base"
358
359 def generate_float(self, name: str, x: float) -> str:
360 with self.block(f"static PyFloatObject {name} =", ";"):
361 self.object_head("PyFloat_Type")
362 self.write(f".ob_fval = {x},")
363 return f"&{name}.ob_base"
364
365 def generate_complex(self, name: str, z: complex) -> str:
366 with self.block(f"static PyComplexObject {name} =", ";"):
367 self.object_head("PyComplex_Type")
368 self.write(f".cval = {{ {z.real}, {z.imag} }},")
369 return f"&{name}.ob_base"
370
371 def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
372 try:
373 fs = sorted(fs)
374 except TypeError:
375 # frozen set with incompatible types, fallback to repr()
376 fs = sorted(fs, key=repr)
377 ret = self.generate_tuple(name, tuple(fs))
378 self.write("// TODO: The above tuple should be a frozenset")
379 return ret
380
381 def generate_file(self, module: str, code: object)-> None:
382 module = module.replace(".", "_")
383 self.generate(f"{module}_toplevel", code)
384 with self.block(f"static void {module}_do_patchups(void)"):
385 for p in self.patchups:
386 self.write(p)
387 self.patchups.clear()
388 self.write(EPILOGUE.replace("%%NAME%%", module))
389
390 def generate(self, name: str, obj: object) -> str:
391 # Use repr() in the key to distinguish -0.0 from +0.0
392 key = (type(obj), obj, repr(obj))
393 if key in self.cache:
394 self.hits += 1
395 # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
396 return self.cache[key]
397 self.misses += 1
398 if isinstance(obj, (types.CodeType, umarshal.Code)) :
399 val = self.generate_code(name, obj)
400 elif isinstance(obj, tuple):
401 val = self.generate_tuple(name, obj)
402 elif isinstance(obj, str):
403 val = self.generate_unicode(name, obj)
404 elif isinstance(obj, bytes):
405 val = self.generate_bytes(name, obj)
406 elif obj is True:
407 return "Py_True"
408 elif obj is False:
409 return "Py_False"
410 elif isinstance(obj, int):
411 val = self.generate_int(name, obj)
412 elif isinstance(obj, float):
413 val = self.generate_float(name, obj)
414 elif isinstance(obj, complex):
415 val = self.generate_complex(name, obj)
416 elif isinstance(obj, frozenset):
417 val = self.generate_frozenset(name, obj)
418 elif obj is builtins.Ellipsis:
419 return "Py_Ellipsis"
420 elif obj is None:
421 return "Py_None"
422 else:
423 raise TypeError(
424 f"Cannot generate code for {type(obj).__name__} object")
425 # print(f"Cache store {key!r:.40}: {val!r:.40}")
426 self.cache[key] = val
427 return val
428
429
430 EPILOGUE = """
431 PyObject *
432 _Py_get_%%NAME%%_toplevel(void)
433 {
434 %%NAME%%_do_patchups();
435 return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
436 }
437 """
438
439 FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
440 FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
441
442 FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
443
444
445 def is_frozen_header(source: str) -> bool:
446 return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
447
448
449 def decode_frozen_data(source: str) -> types.CodeType:
450 lines = source.splitlines()
451 while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
452 del lines[0]
453 while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
454 del lines[-1]
455 values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
456 data = bytes(values)
457 return umarshal.loads(data)
458
459
460 def generate(args: list[str], output: TextIO) -> None:
461 printer = Printer(output)
462 for arg in args:
463 file, modname = arg.rsplit(':', 1)
464 with open(file, "r", encoding="utf8") as fd:
465 source = fd.read()
466 if is_frozen_header(source):
467 code = decode_frozen_data(source)
468 else:
469 code = compile(fd.read(), f"<frozen {modname}>", "exec")
470 printer.generate_file(modname, code)
471 with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
472 for p in printer.deallocs:
473 printer.write(p)
474 with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
475 for p in printer.interns:
476 with printer.block(f"if ({p} < 0)"):
477 printer.write("return -1;")
478 printer.write("return 0;")
479 if verbose:
480 print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
481
482
483 parser = argparse.ArgumentParser()
484 parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
485 parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
486 parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
487
488 @contextlib.contextmanager
489 def report_time(label: str):
490 t0 = time.time()
491 try:
492 yield
493 finally:
494 t1 = time.time()
495 if verbose:
496 print(f"{label}: {t1-t0:.3f} sec")
497
498
499 def main() -> None:
500 global verbose
501 args = parser.parse_args()
502 verbose = args.verbose
503 output = args.output
504 with open(output, "w", encoding="utf-8") as file:
505 with report_time("generate"):
506 generate(args.args, file)
507 if verbose:
508 print(f"Wrote {os.path.getsize(output)} bytes to {output}")
509
510
511 if __name__ == "__main__":
512 main()