1 import os.path
2 import re
3
4 from c_parser.preprocessor import (
5 get_preprocessor as _get_preprocessor,
6 )
7 from c_parser import (
8 parse_file as _parse_file,
9 parse_files as _parse_files,
10 )
11 from . import REPO_ROOT
12
13
14 GLOB_ALL = '**/*'
15
16
17 def _abs(relfile):
18 return os.path.join(REPO_ROOT, relfile)
19
20
21 def clean_lines(text):
22 """Clear out comments, blank lines, and leading/trailing whitespace."""
23 lines = (line.strip() for line in text.splitlines())
24 lines = (line.partition('#')[0].rstrip()
25 for line in lines
26 if line and not line.startswith('#'))
27 glob_all = f'{GLOB_ALL} '
28 lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
29 lines = (_abs(line) for line in lines)
30 return list(lines)
31
32
33 '''
34 @begin=sh@
35 ./python ../c-parser/cpython.py
36 --exclude '+../c-parser/EXCLUDED'
37 --macros '+../c-parser/MACROS'
38 --incldirs '+../c-parser/INCL_DIRS'
39 --same './Include/cpython/'
40 Include/*.h
41 Include/internal/*.h
42 Modules/**/*.c
43 Objects/**/*.c
44 Parser/**/*.c
45 Python/**/*.c
46 @end=sh@
47 '''
48
49 # XXX Handle these.
50 # Tab separated:
51 EXCLUDED = clean_lines('''
52 # @begin=conf@
53
54 # OSX
55 Modules/_scproxy.c # SystemConfiguration/SystemConfiguration.h
56
57 # Windows
58 Modules/_winapi.c # windows.h
59 Modules/expat/winconfig.h
60 Modules/overlapped.c # winsock.h
61 Python/dynload_win.c # windows.h
62 Python/thread_nt.h
63
64 # other OS-dependent
65 Python/dynload_aix.c # sys/ldr.h
66 Python/dynload_dl.c # dl.h
67 Python/dynload_hpux.c # dl.h
68 Python/emscripten_signal.c
69 Python/thread_pthread.h
70 Python/thread_pthread_stubs.h
71
72 # only huge constants (safe but parsing is slow)
73 Modules/_ssl_data.h
74 Modules/_ssl_data_31.h
75 Modules/_ssl_data_300.h
76 Modules/_ssl_data_111.h
77 Modules/cjkcodecs/mappings_*.h
78 Modules/unicodedata_db.h
79 Modules/unicodename_db.h
80 Objects/unicodetype_db.h
81
82 # generated
83 Python/deepfreeze/*.c
84 Python/frozen_modules/*.h
85 Python/generated_cases.c.h
86
87 # not actually source
88 Python/bytecodes.c
89
90 # @end=conf@
91 ''')
92
93 # XXX Fix the parser.
94 EXCLUDED += clean_lines('''
95 # The tool should be able to parse these...
96
97 # The problem with xmlparse.c is that something
98 # has gone wrong where # we handle "maybe inline actual"
99 # in Tools/c-analyzer/c_parser/parser/_global.py.
100 Modules/expat/xmlparse.c
101 ''')
102
103 INCL_DIRS = clean_lines('''
104 # @begin=tsv@
105
106 glob dirname
107 * .
108 * ./Include
109 * ./Include/internal
110
111 Modules/_decimal/**/*.c Modules/_decimal/libmpdec
112 Modules/_elementtree.c Modules/expat
113 Modules/_hacl/*.c Modules/_hacl/include
114 Modules/_hacl/*.h Modules/_hacl/include
115 Modules/md5module.c Modules/_hacl/include
116 Modules/sha1module.c Modules/_hacl/include
117 Modules/sha2module.c Modules/_hacl/include
118 Modules/sha3module.c Modules/_hacl/include
119 Objects/stringlib/*.h Objects
120
121 # possible system-installed headers, just in case
122 Modules/_tkinter.c /usr/include/tcl8.6
123 Modules/_uuidmodule.c /usr/include/uuid
124 Modules/nismodule.c /usr/include/tirpc
125 Modules/tkappinit.c /usr/include/tcl
126
127 # @end=tsv@
128 ''')[1:]
129
130 INCLUDES = clean_lines('''
131 # @begin=tsv@
132
133 glob include
134
135 **/*.h Python.h
136 Include/**/*.h object.h
137
138 # for Py_HAVE_CONDVAR
139 Include/internal/pycore_gil.h pycore_condvar.h
140 Python/thread_pthread.h pycore_condvar.h
141
142 # other
143
144 Objects/stringlib/join.h stringlib/stringdefs.h
145 Objects/stringlib/ctype.h stringlib/stringdefs.h
146 Objects/stringlib/transmogrify.h stringlib/stringdefs.h
147 #Objects/stringlib/fastsearch.h stringlib/stringdefs.h
148 #Objects/stringlib/count.h stringlib/stringdefs.h
149 #Objects/stringlib/find.h stringlib/stringdefs.h
150 #Objects/stringlib/partition.h stringlib/stringdefs.h
151 #Objects/stringlib/split.h stringlib/stringdefs.h
152 Objects/stringlib/fastsearch.h stringlib/ucs1lib.h
153 Objects/stringlib/count.h stringlib/ucs1lib.h
154 Objects/stringlib/find.h stringlib/ucs1lib.h
155 Objects/stringlib/partition.h stringlib/ucs1lib.h
156 Objects/stringlib/split.h stringlib/ucs1lib.h
157 Objects/stringlib/find_max_char.h Objects/stringlib/ucs1lib.h
158 Objects/stringlib/count.h Objects/stringlib/fastsearch.h
159 Objects/stringlib/find.h Objects/stringlib/fastsearch.h
160 Objects/stringlib/partition.h Objects/stringlib/fastsearch.h
161 Objects/stringlib/replace.h Objects/stringlib/fastsearch.h
162 Objects/stringlib/split.h Objects/stringlib/fastsearch.h
163
164 # @end=tsv@
165 ''')[1:]
166
167 MACROS = clean_lines('''
168 # @begin=tsv@
169
170 glob name value
171
172 Include/internal/*.h Py_BUILD_CORE 1
173 Python/**/*.c Py_BUILD_CORE 1
174 Python/**/*.h Py_BUILD_CORE 1
175 Parser/**/*.c Py_BUILD_CORE 1
176 Parser/**/*.h Py_BUILD_CORE 1
177 Objects/**/*.c Py_BUILD_CORE 1
178 Objects/**/*.h Py_BUILD_CORE 1
179
180 Modules/_asynciomodule.c Py_BUILD_CORE 1
181 Modules/_codecsmodule.c Py_BUILD_CORE 1
182 Modules/_collectionsmodule.c Py_BUILD_CORE 1
183 Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1
184 Modules/_ctypes/cfield.c Py_BUILD_CORE 1
185 Modules/_cursesmodule.c Py_BUILD_CORE 1
186 Modules/_datetimemodule.c Py_BUILD_CORE 1
187 Modules/_functoolsmodule.c Py_BUILD_CORE 1
188 Modules/_heapqmodule.c Py_BUILD_CORE 1
189 Modules/_io/*.c Py_BUILD_CORE 1
190 Modules/_io/*.h Py_BUILD_CORE 1
191 Modules/_localemodule.c Py_BUILD_CORE 1
192 Modules/_operator.c Py_BUILD_CORE 1
193 Modules/_posixsubprocess.c Py_BUILD_CORE 1
194 Modules/_sre/sre.c Py_BUILD_CORE 1
195 Modules/_threadmodule.c Py_BUILD_CORE 1
196 Modules/_tracemalloc.c Py_BUILD_CORE 1
197 Modules/_weakref.c Py_BUILD_CORE 1
198 Modules/_zoneinfo.c Py_BUILD_CORE 1
199 Modules/atexitmodule.c Py_BUILD_CORE 1
200 Modules/cmathmodule.c Py_BUILD_CORE 1
201 Modules/faulthandler.c Py_BUILD_CORE 1
202 Modules/gcmodule.c Py_BUILD_CORE 1
203 Modules/getpath.c Py_BUILD_CORE 1
204 Modules/getpath_noop.c Py_BUILD_CORE 1
205 Modules/itertoolsmodule.c Py_BUILD_CORE 1
206 Modules/main.c Py_BUILD_CORE 1
207 Modules/mathmodule.c Py_BUILD_CORE 1
208 Modules/posixmodule.c Py_BUILD_CORE 1
209 Modules/sha256module.c Py_BUILD_CORE 1
210 Modules/sha512module.c Py_BUILD_CORE 1
211 Modules/signalmodule.c Py_BUILD_CORE 1
212 Modules/symtablemodule.c Py_BUILD_CORE 1
213 Modules/timemodule.c Py_BUILD_CORE 1
214 Modules/unicodedata.c Py_BUILD_CORE 1
215
216 Modules/_json.c Py_BUILD_CORE_BUILTIN 1
217 Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1
218 Modules/_testinternalcapi.c Py_BUILD_CORE_BUILTIN 1
219
220 Include/cpython/abstract.h Py_CPYTHON_ABSTRACTOBJECT_H 1
221 Include/cpython/bytearrayobject.h Py_CPYTHON_BYTEARRAYOBJECT_H 1
222 Include/cpython/bytesobject.h Py_CPYTHON_BYTESOBJECT_H 1
223 Include/cpython/ceval.h Py_CPYTHON_CEVAL_H 1
224 Include/cpython/code.h Py_CPYTHON_CODE_H 1
225 Include/cpython/dictobject.h Py_CPYTHON_DICTOBJECT_H 1
226 Include/cpython/fileobject.h Py_CPYTHON_FILEOBJECT_H 1
227 Include/cpython/fileutils.h Py_CPYTHON_FILEUTILS_H 1
228 Include/cpython/frameobject.h Py_CPYTHON_FRAMEOBJECT_H 1
229 Include/cpython/import.h Py_CPYTHON_IMPORT_H 1
230 Include/cpython/interpreteridobject.h Py_CPYTHON_INTERPRETERIDOBJECT_H 1
231 Include/cpython/listobject.h Py_CPYTHON_LISTOBJECT_H 1
232 Include/cpython/methodobject.h Py_CPYTHON_METHODOBJECT_H 1
233 Include/cpython/object.h Py_CPYTHON_OBJECT_H 1
234 Include/cpython/objimpl.h Py_CPYTHON_OBJIMPL_H 1
235 Include/cpython/pyerrors.h Py_CPYTHON_ERRORS_H 1
236 Include/cpython/pylifecycle.h Py_CPYTHON_PYLIFECYCLE_H 1
237 Include/cpython/pymem.h Py_CPYTHON_PYMEM_H 1
238 Include/cpython/pystate.h Py_CPYTHON_PYSTATE_H 1
239 Include/cpython/sysmodule.h Py_CPYTHON_SYSMODULE_H 1
240 Include/cpython/traceback.h Py_CPYTHON_TRACEBACK_H 1
241 Include/cpython/tupleobject.h Py_CPYTHON_TUPLEOBJECT_H 1
242 Include/cpython/unicodeobject.h Py_CPYTHON_UNICODEOBJECT_H 1
243
244 # implied include of <unistd.h>
245 Include/**/*.h _POSIX_THREADS 1
246 Include/**/*.h HAVE_PTHREAD_H 1
247
248 # from pyconfig.h
249 Include/cpython/pthread_stubs.h HAVE_PTHREAD_STUBS 1
250 Python/thread_pthread_stubs.h HAVE_PTHREAD_STUBS 1
251
252 # from Objects/bytesobject.c
253 Objects/stringlib/partition.h STRINGLIB_GET_EMPTY() bytes_get_empty()
254 Objects/stringlib/join.h STRINGLIB_MUTABLE 0
255 Objects/stringlib/partition.h STRINGLIB_MUTABLE 0
256 Objects/stringlib/split.h STRINGLIB_MUTABLE 0
257 Objects/stringlib/transmogrify.h STRINGLIB_MUTABLE 0
258
259 # from Makefile
260 Modules/getpath.c PYTHONPATH 1
261 Modules/getpath.c PREFIX ...
262 Modules/getpath.c EXEC_PREFIX ...
263 Modules/getpath.c VERSION ...
264 Modules/getpath.c VPATH ...
265 Modules/getpath.c PLATLIBDIR ...
266 #Modules/_dbmmodule.c USE_GDBM_COMPAT 1
267 Modules/_dbmmodule.c USE_NDBM 1
268 #Modules/_dbmmodule.c USE_BERKDB 1
269
270 # See: setup.py
271 Modules/_decimal/**/*.c CONFIG_64 1
272 Modules/_decimal/**/*.c ASM 1
273 Modules/expat/xmlparse.c HAVE_EXPAT_CONFIG_H 1
274 Modules/expat/xmlparse.c XML_POOR_ENTROPY 1
275 Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1
276
277 # others
278 Modules/_sre/sre_lib.h LOCAL(type) static inline type
279 Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F
280 Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1
281
282 # @end=tsv@
283 ''')[1:]
284
285 # -pthread
286 # -Wno-unused-result
287 # -Wsign-compare
288 # -g
289 # -Og
290 # -Wall
291 # -std=c99
292 # -Wextra
293 # -Wno-unused-result -Wno-unused-parameter
294 # -Wno-missing-field-initializers
295 # -Werror=implicit-function-declaration
296
297 SAME = {
298 _abs('Include/*.h'): [_abs('Include/cpython/')],
299 _abs('Python/ceval.c'): ['Python/generated_cases.c.h'],
300 }
301
302 MAX_SIZES = {
303 # GLOB: (MAXTEXT, MAXLINES),
304 # default: (10_000, 200)
305 # First match wins.
306 _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
307 _abs('Modules/_datetimemodule.c'): (20_000, 300),
308 _abs('Modules/_hacl/*.c'): (200_000, 500),
309 _abs('Modules/posixmodule.c'): (20_000, 500),
310 _abs('Modules/termios.c'): (10_000, 800),
311 _abs('Modules/_testcapimodule.c'): (20_000, 400),
312 _abs('Modules/expat/expat.h'): (10_000, 400),
313 _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
314 _abs('Objects/typeobject.c'): (35_000, 200),
315 _abs('Python/compile.c'): (20_000, 500),
316 _abs('Python/pylifecycle.c'): (500_000, 5000),
317 _abs('Python/pystate.c'): (500_000, 5000),
318
319 # Generated files:
320 _abs('Include/internal/pycore_opcode.h'): (10_000, 1000),
321 _abs('Include/internal/pycore_global_strings.h'): (5_000, 1000),
322 _abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000),
323 _abs('Python/deepfreeze/*.c'): (20_000, 500),
324 _abs('Python/frozen_modules/*.h'): (20_000, 500),
325 _abs('Python/opcode_targets.h'): (10_000, 500),
326 _abs('Python/stdlib_module_names.h'): (5_000, 500),
327
328 # These large files are currently ignored (see above).
329 _abs('Modules/_ssl_data.h'): (80_000, 10_000),
330 _abs('Modules/_ssl_data_300.h'): (80_000, 10_000),
331 _abs('Modules/_ssl_data_111.h'): (80_000, 10_000),
332 _abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000),
333 _abs('Modules/unicodedata_db.h'): (180_000, 3_000),
334 _abs('Modules/unicodename_db.h'): (1_200_000, 15_000),
335 _abs('Objects/unicodetype_db.h'): (240_000, 3_000),
336
337 # Catch-alls:
338 _abs('Include/**/*.h'): (5_000, 500),
339 }
340
341
342 def get_preprocessor(*,
343 file_macros=None,
344 file_includes=None,
345 file_incldirs=None,
346 file_same=None,
347 **kwargs
348 ):
349 macros = tuple(MACROS)
350 if file_macros:
351 macros += tuple(file_macros)
352 includes = tuple(INCLUDES)
353 if file_includes:
354 includes += tuple(file_includes)
355 incldirs = tuple(INCL_DIRS)
356 if file_incldirs:
357 incldirs += tuple(file_incldirs)
358 samefiles = dict(SAME)
359 if file_same:
360 samefiles.update(file_same)
361 return _get_preprocessor(
362 file_macros=macros,
363 file_includes=includes,
364 file_incldirs=incldirs,
365 file_same=samefiles,
366 **kwargs
367 )
368
369
370 def parse_file(filename, *,
371 match_kind=None,
372 ignore_exc=None,
373 log_err=None,
374 ):
375 get_file_preprocessor = get_preprocessor(
376 ignore_exc=ignore_exc,
377 log_err=log_err,
378 )
379 yield from _parse_file(
380 filename,
381 match_kind=match_kind,
382 get_file_preprocessor=get_file_preprocessor,
383 file_maxsizes=MAX_SIZES,
384 )
385
386
387 def parse_files(filenames=None, *,
388 match_kind=None,
389 ignore_exc=None,
390 log_err=None,
391 get_file_preprocessor=None,
392 **file_kwargs
393 ):
394 if get_file_preprocessor is None:
395 get_file_preprocessor = get_preprocessor(
396 ignore_exc=ignore_exc,
397 log_err=log_err,
398 )
399 yield from _parse_files(
400 filenames,
401 match_kind=match_kind,
402 get_file_preprocessor=get_file_preprocessor,
403 file_maxsizes=MAX_SIZES,
404 **file_kwargs
405 )