1 import contextlib
2 import io
3 import os.path
4 import re
5
6 __file__ = os.path.abspath(__file__)
7 ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
8 INTERNAL = os.path.join(ROOT, 'Include', 'internal')
9
10
11 IGNORED = {
12 'ACTION', # Python/_warnings.c
13 'ATTR', # Python/_warnings.c and Objects/funcobject.c
14 'DUNDER', # Objects/typeobject.c
15 'RDUNDER', # Objects/typeobject.c
16 'SPECIAL', # Objects/weakrefobject.c
17 }
18 IDENTIFIERS = [
19 # from ADD() Python/_warnings.c
20 'default',
21 'ignore',
22
23 # from GET_WARNINGS_ATTR() in Python/_warnings.c
24 'WarningMessage',
25 '_showwarnmsg',
26 '_warn_unawaited_coroutine',
27 'defaultaction',
28 'filters',
29 'onceregistry',
30
31 # from WRAP_METHOD() in Objects/weakrefobject.c
32 '__bytes__',
33 '__reversed__',
34
35 # from COPY_ATTR() in Objects/funcobject.c
36 '__module__',
37 '__name__',
38 '__qualname__',
39 '__doc__',
40 '__annotations__',
41
42 # from SLOT* in Objects/typeobject.c
43 '__abs__',
44 '__add__',
45 '__and__',
46 '__divmod__',
47 '__float__',
48 '__floordiv__',
49 '__getitem__',
50 '__iadd__',
51 '__iand__',
52 '__ifloordiv__',
53 '__ilshift__',
54 '__imatmul__',
55 '__imod__',
56 '__imul__',
57 '__int__',
58 '__invert__',
59 '__ior__',
60 '__irshift__',
61 '__isub__',
62 '__itruediv__',
63 '__ixor__',
64 '__lshift__',
65 '__matmul__',
66 '__mod__',
67 '__mul__',
68 '__neg__',
69 '__or__',
70 '__pos__',
71 '__pow__',
72 '__radd__',
73 '__rand__',
74 '__rdivmod__',
75 '__rfloordiv__',
76 '__rlshift__',
77 '__rmatmul__',
78 '__rmod__',
79 '__rmul__',
80 '__ror__',
81 '__rpow__',
82 '__rrshift__',
83 '__rshift__',
84 '__rsub__',
85 '__rtruediv__',
86 '__rxor__',
87 '__str__',
88 '__sub__',
89 '__truediv__',
90 '__xor__',
91 ]
92
93
94 #######################################
95 # helpers
96
97 def iter_files():
98 for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
99 root = os.path.join(ROOT, name)
100 for dirname, _, files in os.walk(root):
101 for name in files:
102 if not name.endswith(('.c', '.h')):
103 continue
104 yield os.path.join(dirname, name)
105
106
107 def iter_global_strings():
108 id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
109 str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
110 for filename in iter_files():
111 try:
112 infile = open(filename, encoding='utf-8')
113 except FileNotFoundError:
114 # The file must have been a temporary file.
115 continue
116 with infile:
117 for lno, line in enumerate(infile, 1):
118 for m in id_regex.finditer(line):
119 identifier, = m.groups()
120 yield identifier, None, filename, lno, line
121 for m in str_regex.finditer(line):
122 varname, string = m.groups()
123 yield varname, string, filename, lno, line
124
125
126 def iter_to_marker(lines, marker):
127 for line in lines:
128 if line.rstrip() == marker:
129 break
130 yield line
131
132
133 class ESC[4;38;5;81mPrinter:
134
135 def __init__(self, file):
136 self.level = 0
137 self.file = file
138 self.continuation = [False]
139
140 @contextlib.contextmanager
141 def indent(self):
142 save_level = self.level
143 try:
144 self.level += 1
145 yield
146 finally:
147 self.level = save_level
148
149 def write(self, arg):
150 eol = '\n'
151 if self.continuation[-1]:
152 eol = f' \\{eol}' if arg else f'\\{eol}'
153 self.file.writelines((" "*self.level, arg, eol))
154
155 @contextlib.contextmanager
156 def block(self, prefix, suffix="", *, continuation=None):
157 if continuation is None:
158 continuation = self.continuation[-1]
159 self.continuation.append(continuation)
160
161 self.write(prefix + " {")
162 with self.indent():
163 yield
164 self.continuation.pop()
165 self.write("}" + suffix)
166
167
168 @contextlib.contextmanager
169 def open_for_changes(filename, orig):
170 """Like open() but only write to the file if it changed."""
171 outfile = io.StringIO()
172 yield outfile
173 text = outfile.getvalue()
174 if text != orig:
175 with open(filename, 'w', encoding='utf-8') as outfile:
176 outfile.write(text)
177 else:
178 print(f'# not changed: {filename}')
179
180
181 #######################################
182 # the global objects
183
184 START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */'
185 END = '/* End auto-generated code */'
186
187
188 def generate_global_strings(identifiers, strings):
189 filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
190
191 # Read the non-generated part of the file.
192 with open(filename) as infile:
193 orig = infile.read()
194 lines = iter(orig.rstrip().splitlines())
195 before = '\n'.join(iter_to_marker(lines, START))
196 for _ in iter_to_marker(lines, END):
197 pass
198 after = '\n'.join(lines)
199
200 # Generate the file.
201 with open_for_changes(filename, orig) as outfile:
202 printer = Printer(outfile)
203 printer.write(before)
204 printer.write(START)
205 with printer.block('struct _Py_global_strings', ';'):
206 with printer.block('struct', ' literals;'):
207 for literal, name in sorted(strings.items(), key=lambda x: x[1]):
208 printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
209 outfile.write('\n')
210 with printer.block('struct', ' identifiers;'):
211 for name in sorted(identifiers):
212 assert name.isidentifier(), name
213 printer.write(f'STRUCT_FOR_ID({name})')
214 with printer.block('struct', ' ascii[128];'):
215 printer.write("PyASCIIObject _ascii;")
216 printer.write("uint8_t _data[2];")
217 with printer.block('struct', ' latin1[128];'):
218 printer.write("PyCompactUnicodeObject _latin1;")
219 printer.write("uint8_t _data[2];")
220 printer.write(END)
221 printer.write(after)
222
223
224 def generate_runtime_init(identifiers, strings):
225 # First get some info from the declarations.
226 nsmallposints = None
227 nsmallnegints = None
228 with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
229 for line in infile:
230 if line.startswith('#define _PY_NSMALLPOSINTS'):
231 nsmallposints = int(line.split()[-1])
232 elif line.startswith('#define _PY_NSMALLNEGINTS'):
233 nsmallnegints = int(line.split()[-1])
234 break
235 else:
236 raise NotImplementedError
237 assert nsmallposints and nsmallnegints
238
239 # Then target the runtime initializer.
240 filename = os.path.join(INTERNAL, 'pycore_runtime_init.h')
241
242 # Read the non-generated part of the file.
243 with open(filename) as infile:
244 orig = infile.read()
245 lines = iter(orig.rstrip().splitlines())
246 before = '\n'.join(iter_to_marker(lines, START))
247 for _ in iter_to_marker(lines, END):
248 pass
249 after = '\n'.join(lines)
250
251 # Generate the file.
252 with open_for_changes(filename, orig) as outfile:
253 printer = Printer(outfile)
254 printer.write(before)
255 printer.write(START)
256 with printer.block('#define _Py_global_objects_INIT', continuation=True):
257 with printer.block('.singletons =', ','):
258 # Global int objects.
259 with printer.block('.small_ints =', ','):
260 for i in range(-nsmallnegints, nsmallposints):
261 printer.write(f'_PyLong_DIGIT_INIT({i}),')
262 printer.write('')
263 # Global bytes objects.
264 printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
265 with printer.block('.bytes_characters =', ','):
266 for i in range(256):
267 printer.write(f'_PyBytes_CHAR_INIT({i}),')
268 printer.write('')
269 # Global strings.
270 with printer.block('.strings =', ','):
271 with printer.block('.literals =', ','):
272 for literal, name in sorted(strings.items(), key=lambda x: x[1]):
273 printer.write(f'INIT_STR({name}, "{literal}"),')
274 with printer.block('.identifiers =', ','):
275 for name in sorted(identifiers):
276 assert name.isidentifier(), name
277 printer.write(f'INIT_ID({name}),')
278 with printer.block('.ascii =', ','):
279 for i in range(128):
280 printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
281 with printer.block('.latin1 =', ','):
282 for i in range(128, 256):
283 printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),')
284 printer.write('')
285 with printer.block('.tuple_empty =', ','):
286 printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
287 printer.write(END)
288 printer.write(after)
289
290
291 def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
292 identifiers = set(IDENTIFIERS)
293 strings = {}
294 for name, string, *_ in iter_global_strings():
295 if string is None:
296 if name not in IGNORED:
297 identifiers.add(name)
298 else:
299 if string not in strings:
300 strings[string] = name
301 elif name != strings[string]:
302 raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
303 return identifiers, strings
304
305
306 #######################################
307 # the script
308
309 def main() -> None:
310 identifiers, strings = get_identifiers_and_strings()
311
312 generate_global_strings(identifiers, strings)
313 generate_runtime_init(identifiers, strings)
314
315
316 if __name__ == '__main__':
317 import argparse
318 parser = argparse.ArgumentParser()
319 args = parser.parse_args()
320 main(**vars(args))