1 #!/usr/bin/env python
2 """Create a WASM asset bundle directory structure.
3
4 The WASM asset bundles are pre-loaded by the final WASM build. The bundle
5 contains:
6
7 - a stripped down, pyc-only stdlib zip file, e.g. {PREFIX}/lib/python311.zip
8 - os.py as marker module {PREFIX}/lib/python3.11/os.py
9 - empty lib-dynload directory, to make sure it is copied into the bundle {PREFIX}/lib/python3.11/lib-dynload/.empty
10 """
11
12 import argparse
13 import pathlib
14 import shutil
15 import sys
16 import sysconfig
17 import zipfile
18
19 # source directory
20 SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute()
21 SRCDIR_LIB = SRCDIR / "Lib"
22
23
24 # Library directory relative to $(prefix).
25 WASM_LIB = pathlib.PurePath("lib")
26 WASM_STDLIB_ZIP = (
27 WASM_LIB / f"python{sys.version_info.major}{sys.version_info.minor}.zip"
28 )
29 WASM_STDLIB = (
30 WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}"
31 )
32 WASM_DYNLOAD = WASM_STDLIB / "lib-dynload"
33
34
35 # Don't ship large files / packages that are not particularly useful at
36 # the moment.
37 OMIT_FILES = (
38 # regression tests
39 "test/",
40 # package management
41 "ensurepip/",
42 "venv/",
43 # build system
44 "distutils/",
45 "lib2to3/",
46 # deprecated
47 "asyncore.py",
48 "asynchat.py",
49 "uu.py",
50 "xdrlib.py",
51 # other platforms
52 "_aix_support.py",
53 "_bootsubprocess.py",
54 "_osx_support.py",
55 # webbrowser
56 "antigravity.py",
57 "webbrowser.py",
58 # Pure Python implementations of C extensions
59 "_pydecimal.py",
60 "_pyio.py",
61 # concurrent threading
62 "concurrent/futures/thread.py",
63 # Misc unused or large files
64 "pydoc_data/",
65 "msilib/",
66 )
67
68 # Synchronous network I/O and protocols are not supported; for example,
69 # socket.create_connection() raises an exception:
70 # "BlockingIOError: [Errno 26] Operation in progress".
71 OMIT_NETWORKING_FILES = (
72 "cgi.py",
73 "cgitb.py",
74 "email/",
75 "ftplib.py",
76 "http/",
77 "imaplib.py",
78 "mailbox.py",
79 "mailcap.py",
80 "nntplib.py",
81 "poplib.py",
82 "smtpd.py",
83 "smtplib.py",
84 "socketserver.py",
85 "telnetlib.py",
86 # keep urllib.parse for pydoc
87 "urllib/error.py",
88 "urllib/request.py",
89 "urllib/response.py",
90 "urllib/robotparser.py",
91 "wsgiref/",
92 )
93
94 OMIT_MODULE_FILES = {
95 "_asyncio": ["asyncio/"],
96 "audioop": ["aifc.py", "sunau.py", "wave.py"],
97 "_crypt": ["crypt.py"],
98 "_curses": ["curses/"],
99 "_ctypes": ["ctypes/"],
100 "_decimal": ["decimal.py"],
101 "_dbm": ["dbm/ndbm.py"],
102 "_gdbm": ["dbm/gnu.py"],
103 "_json": ["json/"],
104 "_multiprocessing": ["concurrent/futures/process.py", "multiprocessing/"],
105 "pyexpat": ["xml/", "xmlrpc/"],
106 "readline": ["rlcompleter.py"],
107 "_sqlite3": ["sqlite3/"],
108 "_ssl": ["ssl.py"],
109 "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"],
110 "_zoneinfo": ["zoneinfo/"],
111 }
112
113 # regression test sub directories
114 OMIT_SUBDIRS = (
115 "ctypes/test/",
116 "tkinter/test/",
117 "unittest/test/",
118 )
119
120 SYSCONFIG_NAMES = (
121 "_sysconfigdata__emscripten_wasm32-emscripten",
122 "_sysconfigdata__emscripten_wasm32-emscripten",
123 "_sysconfigdata__wasi_wasm32-wasi",
124 "_sysconfigdata__wasi_wasm64-wasi",
125 )
126
127
128 def get_builddir(args: argparse.Namespace) -> pathlib.Path:
129 """Get builddir path from pybuilddir.txt"""
130 with open("pybuilddir.txt", encoding="utf-8") as f:
131 builddir = f.read()
132 return pathlib.Path(builddir)
133
134
135 def get_sysconfigdata(args: argparse.Namespace) -> pathlib.Path:
136 """Get path to sysconfigdata relative to build root"""
137 data_name = sysconfig._get_sysconfigdata_name()
138 if not data_name.startswith(SYSCONFIG_NAMES):
139 raise ValueError(
140 f"Invalid sysconfig data name '{data_name}'.", SYSCONFIG_NAMES
141 )
142 filename = data_name + ".py"
143 return args.builddir / filename
144
145
146 def create_stdlib_zip(
147 args: argparse.Namespace,
148 *,
149 optimize: int = 0,
150 ) -> None:
151 def filterfunc(filename: str) -> bool:
152 pathname = pathlib.Path(filename).resolve()
153 return pathname not in args.omit_files_absolute
154
155 with zipfile.PyZipFile(
156 args.wasm_stdlib_zip,
157 mode="w",
158 compression=args.compression,
159 optimize=optimize,
160 ) as pzf:
161 if args.compresslevel is not None:
162 pzf.compresslevel = args.compresslevel
163 pzf.writepy(args.sysconfig_data)
164 for entry in sorted(args.srcdir_lib.iterdir()):
165 entry = entry.resolve()
166 if entry.name == "__pycache__":
167 continue
168 if entry.name.endswith(".py") or entry.is_dir():
169 # writepy() writes .pyc files (bytecode).
170 pzf.writepy(entry, filterfunc=filterfunc)
171
172
173 def detect_extension_modules(args: argparse.Namespace):
174 modules = {}
175
176 # disabled by Modules/Setup.local ?
177 with open(args.buildroot / "Makefile") as f:
178 for line in f:
179 if line.startswith("MODDISABLED_NAMES="):
180 disabled = line.split("=", 1)[1].strip().split()
181 for modname in disabled:
182 modules[modname] = False
183 break
184
185 # disabled by configure?
186 with open(args.sysconfig_data) as f:
187 data = f.read()
188 loc = {}
189 exec(data, globals(), loc)
190
191 for key, value in loc["build_time_vars"].items():
192 if not key.startswith("MODULE_") or not key.endswith("_STATE"):
193 continue
194 if value not in {"yes", "disabled", "missing", "n/a"}:
195 raise ValueError(f"Unsupported value '{value}' for {key}")
196
197 modname = key[7:-6].lower()
198 if modname not in modules:
199 modules[modname] = value == "yes"
200 return modules
201
202
203 def path(val: str) -> pathlib.Path:
204 return pathlib.Path(val).absolute()
205
206
207 parser = argparse.ArgumentParser()
208 parser.add_argument(
209 "--buildroot",
210 help="absolute path to build root",
211 default=pathlib.Path(".").absolute(),
212 type=path,
213 )
214 parser.add_argument(
215 "--prefix",
216 help="install prefix",
217 default=pathlib.Path("/usr/local"),
218 type=path,
219 )
220
221
222 def main():
223 args = parser.parse_args()
224
225 relative_prefix = args.prefix.relative_to(pathlib.Path("/"))
226 args.srcdir = SRCDIR
227 args.srcdir_lib = SRCDIR_LIB
228 args.wasm_root = args.buildroot / relative_prefix
229 args.wasm_stdlib_zip = args.wasm_root / WASM_STDLIB_ZIP
230 args.wasm_stdlib = args.wasm_root / WASM_STDLIB
231 args.wasm_dynload = args.wasm_root / WASM_DYNLOAD
232
233 # bpo-17004: zipimport supports only zlib compression.
234 # Emscripten ZIP_STORED + -sLZ4=1 linker flags results in larger file.
235 args.compression = zipfile.ZIP_DEFLATED
236 args.compresslevel = 9
237
238 args.builddir = get_builddir(args)
239 args.sysconfig_data = get_sysconfigdata(args)
240 if not args.sysconfig_data.is_file():
241 raise ValueError(f"sysconfigdata file {args.sysconfig_data} missing.")
242
243 extmods = detect_extension_modules(args)
244 omit_files = list(OMIT_FILES)
245 if sysconfig.get_platform().startswith("emscripten"):
246 omit_files.extend(OMIT_NETWORKING_FILES)
247 for modname, modfiles in OMIT_MODULE_FILES.items():
248 if not extmods.get(modname):
249 omit_files.extend(modfiles)
250
251 args.omit_files_absolute = {
252 (args.srcdir_lib / name).resolve() for name in omit_files
253 }
254
255 # Empty, unused directory for dynamic libs, but required for site initialization.
256 args.wasm_dynload.mkdir(parents=True, exist_ok=True)
257 marker = args.wasm_dynload / ".empty"
258 marker.touch()
259 # os.py is a marker for finding the correct lib directory.
260 shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib)
261 # The rest of stdlib that's useful in a WASM context.
262 create_stdlib_zip(args)
263 size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2)
264 parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n")
265
266
267 if __name__ == "__main__":
268 main()