1 #!/usr/bin/env python
2 """Create a WASM asset bundle directory structure.
3
4 The WASM asset bundles are pre-loaded by the final WASM build. The bundle
5 contains:
6
7 - a stripped down, pyc-only stdlib zip file, e.g. {PREFIX}/lib/python311.zip
8 - os.py as marker module {PREFIX}/lib/python3.11/os.py
9 - empty lib-dynload directory, to make sure it is copied into the bundle:
10 {PREFIX}/lib/python3.11/lib-dynload/.empty
11 """
12
13 import argparse
14 import pathlib
15 import shutil
16 import sys
17 import sysconfig
18 import zipfile
19
20 # source directory
21 SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute()
22 SRCDIR_LIB = SRCDIR / "Lib"
23
24
25 # Library directory relative to $(prefix).
26 WASM_LIB = pathlib.PurePath("lib")
27 WASM_STDLIB_ZIP = (
28 WASM_LIB / f"python{sys.version_info.major}{sys.version_info.minor}.zip"
29 )
30 WASM_STDLIB = (
31 WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}"
32 )
33 WASM_DYNLOAD = WASM_STDLIB / "lib-dynload"
34
35
36 # Don't ship large files / packages that are not particularly useful at
37 # the moment.
38 OMIT_FILES = (
39 # regression tests
40 "test/",
41 # package management
42 "ensurepip/",
43 "venv/",
44 # build system
45 "lib2to3/",
46 # deprecated
47 "uu.py",
48 "xdrlib.py",
49 # other platforms
50 "_aix_support.py",
51 "_osx_support.py",
52 # webbrowser
53 "antigravity.py",
54 "webbrowser.py",
55 # Pure Python implementations of C extensions
56 "_pydecimal.py",
57 "_pyio.py",
58 # concurrent threading
59 "concurrent/futures/thread.py",
60 # Misc unused or large files
61 "pydoc_data/",
62 "msilib/",
63 )
64
65 # Synchronous network I/O and protocols are not supported; for example,
66 # socket.create_connection() raises an exception:
67 # "BlockingIOError: [Errno 26] Operation in progress".
68 OMIT_NETWORKING_FILES = (
69 "cgi.py",
70 "cgitb.py",
71 "email/",
72 "ftplib.py",
73 "http/",
74 "imaplib.py",
75 "mailbox.py",
76 "mailcap.py",
77 "nntplib.py",
78 "poplib.py",
79 "smtplib.py",
80 "socketserver.py",
81 "telnetlib.py",
82 # keep urllib.parse for pydoc
83 "urllib/error.py",
84 "urllib/request.py",
85 "urllib/response.py",
86 "urllib/robotparser.py",
87 "wsgiref/",
88 )
89
90 OMIT_MODULE_FILES = {
91 "_asyncio": ["asyncio/"],
92 "audioop": ["aifc.py", "sunau.py", "wave.py"],
93 "_crypt": ["crypt.py"],
94 "_curses": ["curses/"],
95 "_ctypes": ["ctypes/"],
96 "_decimal": ["decimal.py"],
97 "_dbm": ["dbm/ndbm.py"],
98 "_gdbm": ["dbm/gnu.py"],
99 "_json": ["json/"],
100 "_multiprocessing": ["concurrent/futures/process.py", "multiprocessing/"],
101 "pyexpat": ["xml/", "xmlrpc/"],
102 "readline": ["rlcompleter.py"],
103 "_sqlite3": ["sqlite3/"],
104 "_ssl": ["ssl.py"],
105 "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"],
106 "_zoneinfo": ["zoneinfo/"],
107 }
108
109 SYSCONFIG_NAMES = (
110 "_sysconfigdata__emscripten_wasm32-emscripten",
111 "_sysconfigdata__emscripten_wasm32-emscripten",
112 "_sysconfigdata__wasi_wasm32-wasi",
113 "_sysconfigdata__wasi_wasm64-wasi",
114 )
115
116
117 def get_builddir(args: argparse.Namespace) -> pathlib.Path:
118 """Get builddir path from pybuilddir.txt"""
119 with open("pybuilddir.txt", encoding="utf-8") as f:
120 builddir = f.read()
121 return pathlib.Path(builddir)
122
123
124 def get_sysconfigdata(args: argparse.Namespace) -> pathlib.Path:
125 """Get path to sysconfigdata relative to build root"""
126 data_name = sysconfig._get_sysconfigdata_name()
127 if not data_name.startswith(SYSCONFIG_NAMES):
128 raise ValueError(
129 f"Invalid sysconfig data name '{data_name}'.", SYSCONFIG_NAMES
130 )
131 filename = data_name + ".py"
132 return args.builddir / filename
133
134
135 def create_stdlib_zip(
136 args: argparse.Namespace,
137 *,
138 optimize: int = 0,
139 ) -> None:
140 def filterfunc(filename: str) -> bool:
141 pathname = pathlib.Path(filename).resolve()
142 return pathname not in args.omit_files_absolute
143
144 with zipfile.PyZipFile(
145 args.wasm_stdlib_zip,
146 mode="w",
147 compression=args.compression,
148 optimize=optimize,
149 ) as pzf:
150 if args.compresslevel is not None:
151 pzf.compresslevel = args.compresslevel
152 pzf.writepy(args.sysconfig_data)
153 for entry in sorted(args.srcdir_lib.iterdir()):
154 entry = entry.resolve()
155 if entry.name == "__pycache__":
156 continue
157 if entry.name.endswith(".py") or entry.is_dir():
158 # writepy() writes .pyc files (bytecode).
159 pzf.writepy(entry, filterfunc=filterfunc)
160
161
162 def detect_extension_modules(args: argparse.Namespace):
163 modules = {}
164
165 # disabled by Modules/Setup.local ?
166 with open(args.buildroot / "Makefile") as f:
167 for line in f:
168 if line.startswith("MODDISABLED_NAMES="):
169 disabled = line.split("=", 1)[1].strip().split()
170 for modname in disabled:
171 modules[modname] = False
172 break
173
174 # disabled by configure?
175 with open(args.sysconfig_data) as f:
176 data = f.read()
177 loc = {}
178 exec(data, globals(), loc)
179
180 for key, value in loc["build_time_vars"].items():
181 if not key.startswith("MODULE_") or not key.endswith("_STATE"):
182 continue
183 if value not in {"yes", "disabled", "missing", "n/a"}:
184 raise ValueError(f"Unsupported value '{value}' for {key}")
185
186 modname = key[7:-6].lower()
187 if modname not in modules:
188 modules[modname] = value == "yes"
189 return modules
190
191
192 def path(val: str) -> pathlib.Path:
193 return pathlib.Path(val).absolute()
194
195
196 parser = argparse.ArgumentParser()
197 parser.add_argument(
198 "--buildroot",
199 help="absolute path to build root",
200 default=pathlib.Path(".").absolute(),
201 type=path,
202 )
203 parser.add_argument(
204 "--prefix",
205 help="install prefix",
206 default=pathlib.Path("/usr/local"),
207 type=path,
208 )
209
210
211 def main():
212 args = parser.parse_args()
213
214 relative_prefix = args.prefix.relative_to(pathlib.Path("/"))
215 args.srcdir = SRCDIR
216 args.srcdir_lib = SRCDIR_LIB
217 args.wasm_root = args.buildroot / relative_prefix
218 args.wasm_stdlib_zip = args.wasm_root / WASM_STDLIB_ZIP
219 args.wasm_stdlib = args.wasm_root / WASM_STDLIB
220 args.wasm_dynload = args.wasm_root / WASM_DYNLOAD
221
222 # bpo-17004: zipimport supports only zlib compression.
223 # Emscripten ZIP_STORED + -sLZ4=1 linker flags results in larger file.
224 args.compression = zipfile.ZIP_DEFLATED
225 args.compresslevel = 9
226
227 args.builddir = get_builddir(args)
228 args.sysconfig_data = get_sysconfigdata(args)
229 if not args.sysconfig_data.is_file():
230 raise ValueError(f"sysconfigdata file {args.sysconfig_data} missing.")
231
232 extmods = detect_extension_modules(args)
233 omit_files = list(OMIT_FILES)
234 if sysconfig.get_platform().startswith("emscripten"):
235 omit_files.extend(OMIT_NETWORKING_FILES)
236 for modname, modfiles in OMIT_MODULE_FILES.items():
237 if not extmods.get(modname):
238 omit_files.extend(modfiles)
239
240 args.omit_files_absolute = {
241 (args.srcdir_lib / name).resolve() for name in omit_files
242 }
243
244 # Empty, unused directory for dynamic libs, but required for site initialization.
245 args.wasm_dynload.mkdir(parents=True, exist_ok=True)
246 marker = args.wasm_dynload / ".empty"
247 marker.touch()
248 # os.py is a marker for finding the correct lib directory.
249 shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib)
250 # The rest of stdlib that's useful in a WASM context.
251 create_stdlib_zip(args)
252 size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2)
253 parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n")
254
255
256 if __name__ == "__main__":
257 main()