(root)/
Python-3.12.0/
Tools/
c-analyzer/
cpython/
_parser.py
       1  import os.path
       2  import re
       3  
       4  from c_parser.preprocessor import (
       5      get_preprocessor as _get_preprocessor,
       6  )
       7  from c_parser import (
       8      parse_file as _parse_file,
       9      parse_files as _parse_files,
      10  )
      11  from . import REPO_ROOT
      12  
      13  
      14  GLOB_ALL = '**/*'
      15  
      16  
      17  def _abs(relfile):
      18      return os.path.join(REPO_ROOT, relfile)
      19  
      20  
      21  def clean_lines(text):
      22      """Clear out comments, blank lines, and leading/trailing whitespace."""
      23      lines = (line.strip() for line in text.splitlines())
      24      lines = (line.partition('#')[0].rstrip()
      25               for line in lines
      26               if line and not line.startswith('#'))
      27      glob_all = f'{GLOB_ALL} '
      28      lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
      29      lines = (_abs(line) for line in lines)
      30      return list(lines)
      31  
      32  
      33  '''
      34  @begin=sh@
      35  ./python ../c-parser/cpython.py
      36      --exclude '+../c-parser/EXCLUDED'
      37      --macros '+../c-parser/MACROS'
      38      --incldirs '+../c-parser/INCL_DIRS'
      39      --same './Include/cpython/'
      40      Include/*.h
      41      Include/internal/*.h
      42      Modules/**/*.c
      43      Objects/**/*.c
      44      Parser/**/*.c
      45      Python/**/*.c
      46  @end=sh@
      47  '''
      48  
      49  # XXX Handle these.
      50  # Tab separated:
      51  EXCLUDED = clean_lines('''
      52  # @begin=conf@
      53  
      54  # OSX
      55  Modules/_scproxy.c                # SystemConfiguration/SystemConfiguration.h
      56  
      57  # Windows
      58  Modules/_winapi.c               # windows.h
      59  Modules/expat/winconfig.h
      60  Modules/overlapped.c            # winsock.h
      61  Python/dynload_win.c            # windows.h
      62  Python/thread_nt.h
      63  
      64  # other OS-dependent
      65  Python/dynload_aix.c            # sys/ldr.h
      66  Python/dynload_dl.c             # dl.h
      67  Python/dynload_hpux.c           # dl.h
      68  Python/emscripten_signal.c
      69  Python/thread_pthread.h
      70  Python/thread_pthread_stubs.h
      71  
      72  # only huge constants (safe but parsing is slow)
      73  Modules/_ssl_data.h
      74  Modules/_ssl_data_31.h
      75  Modules/_ssl_data_300.h
      76  Modules/_ssl_data_111.h
      77  Modules/cjkcodecs/mappings_*.h
      78  Modules/unicodedata_db.h
      79  Modules/unicodename_db.h
      80  Objects/unicodetype_db.h
      81  
      82  # generated
      83  Python/deepfreeze/*.c
      84  Python/frozen_modules/*.h
      85  Python/generated_cases.c.h
      86  
      87  # not actually source
      88  Python/bytecodes.c
      89  
      90  # @end=conf@
      91  ''')
      92  
      93  # XXX Fix the parser.
      94  EXCLUDED += clean_lines('''
      95  # The tool should be able to parse these...
      96  
      97  # The problem with xmlparse.c is that something
      98  # has gone wrong where # we handle "maybe inline actual"
      99  # in Tools/c-analyzer/c_parser/parser/_global.py.
     100  Modules/expat/xmlparse.c
     101  ''')
     102  
     103  INCL_DIRS = clean_lines('''
     104  # @begin=tsv@
     105  
     106  glob	dirname
     107  *	.
     108  *	./Include
     109  *	./Include/internal
     110  
     111  Modules/_decimal/**/*.c	Modules/_decimal/libmpdec
     112  Modules/_elementtree.c	Modules/expat
     113  Modules/_hacl/*.c	Modules/_hacl/include
     114  Modules/_hacl/*.h	Modules/_hacl/include
     115  Modules/md5module.c	Modules/_hacl/include
     116  Modules/sha1module.c	Modules/_hacl/include
     117  Modules/sha2module.c	Modules/_hacl/include
     118  Modules/sha3module.c	Modules/_hacl/include
     119  Objects/stringlib/*.h	Objects
     120  
     121  # possible system-installed headers, just in case
     122  Modules/_tkinter.c	/usr/include/tcl8.6
     123  Modules/_uuidmodule.c	/usr/include/uuid
     124  Modules/nismodule.c	/usr/include/tirpc
     125  Modules/tkappinit.c	/usr/include/tcl
     126  
     127  # @end=tsv@
     128  ''')[1:]
     129  
     130  INCLUDES = clean_lines('''
     131  # @begin=tsv@
     132  
     133  glob	include
     134  
     135  **/*.h	Python.h
     136  Include/**/*.h	object.h
     137  
     138  # for Py_HAVE_CONDVAR
     139  Include/internal/pycore_gil.h	pycore_condvar.h
     140  Python/thread_pthread.h	pycore_condvar.h
     141  
     142  # other
     143  
     144  Objects/stringlib/join.h	stringlib/stringdefs.h
     145  Objects/stringlib/ctype.h	stringlib/stringdefs.h
     146  Objects/stringlib/transmogrify.h	stringlib/stringdefs.h
     147  #Objects/stringlib/fastsearch.h	stringlib/stringdefs.h
     148  #Objects/stringlib/count.h	stringlib/stringdefs.h
     149  #Objects/stringlib/find.h	stringlib/stringdefs.h
     150  #Objects/stringlib/partition.h	stringlib/stringdefs.h
     151  #Objects/stringlib/split.h	stringlib/stringdefs.h
     152  Objects/stringlib/fastsearch.h	stringlib/ucs1lib.h
     153  Objects/stringlib/count.h	stringlib/ucs1lib.h
     154  Objects/stringlib/find.h	stringlib/ucs1lib.h
     155  Objects/stringlib/partition.h	stringlib/ucs1lib.h
     156  Objects/stringlib/split.h	stringlib/ucs1lib.h
     157  Objects/stringlib/find_max_char.h	Objects/stringlib/ucs1lib.h
     158  Objects/stringlib/count.h	Objects/stringlib/fastsearch.h
     159  Objects/stringlib/find.h	Objects/stringlib/fastsearch.h
     160  Objects/stringlib/partition.h	Objects/stringlib/fastsearch.h
     161  Objects/stringlib/replace.h	Objects/stringlib/fastsearch.h
     162  Objects/stringlib/split.h	Objects/stringlib/fastsearch.h
     163  
     164  # @end=tsv@
     165  ''')[1:]
     166  
     167  MACROS = clean_lines('''
     168  # @begin=tsv@
     169  
     170  glob	name	value
     171  
     172  Include/internal/*.h	Py_BUILD_CORE	1
     173  Python/**/*.c	Py_BUILD_CORE	1
     174  Python/**/*.h	Py_BUILD_CORE	1
     175  Parser/**/*.c	Py_BUILD_CORE	1
     176  Parser/**/*.h	Py_BUILD_CORE	1
     177  Objects/**/*.c	Py_BUILD_CORE	1
     178  Objects/**/*.h	Py_BUILD_CORE	1
     179  
     180  Modules/_asynciomodule.c	Py_BUILD_CORE	1
     181  Modules/_codecsmodule.c	Py_BUILD_CORE	1
     182  Modules/_collectionsmodule.c	Py_BUILD_CORE	1
     183  Modules/_ctypes/_ctypes.c	Py_BUILD_CORE	1
     184  Modules/_ctypes/cfield.c	Py_BUILD_CORE	1
     185  Modules/_cursesmodule.c	Py_BUILD_CORE	1
     186  Modules/_datetimemodule.c	Py_BUILD_CORE	1
     187  Modules/_functoolsmodule.c	Py_BUILD_CORE	1
     188  Modules/_heapqmodule.c	Py_BUILD_CORE	1
     189  Modules/_io/*.c	Py_BUILD_CORE	1
     190  Modules/_io/*.h	Py_BUILD_CORE	1
     191  Modules/_localemodule.c	Py_BUILD_CORE	1
     192  Modules/_operator.c	Py_BUILD_CORE	1
     193  Modules/_posixsubprocess.c	Py_BUILD_CORE	1
     194  Modules/_sre/sre.c	Py_BUILD_CORE	1
     195  Modules/_threadmodule.c	Py_BUILD_CORE	1
     196  Modules/_tracemalloc.c	Py_BUILD_CORE	1
     197  Modules/_weakref.c	Py_BUILD_CORE	1
     198  Modules/_zoneinfo.c	Py_BUILD_CORE	1
     199  Modules/atexitmodule.c	Py_BUILD_CORE	1
     200  Modules/cmathmodule.c	Py_BUILD_CORE	1
     201  Modules/faulthandler.c	Py_BUILD_CORE	1
     202  Modules/gcmodule.c	Py_BUILD_CORE	1
     203  Modules/getpath.c	Py_BUILD_CORE	1
     204  Modules/getpath_noop.c	Py_BUILD_CORE	1
     205  Modules/itertoolsmodule.c	Py_BUILD_CORE	1
     206  Modules/main.c	Py_BUILD_CORE	1
     207  Modules/mathmodule.c	Py_BUILD_CORE	1
     208  Modules/posixmodule.c	Py_BUILD_CORE	1
     209  Modules/sha256module.c	Py_BUILD_CORE	1
     210  Modules/sha512module.c	Py_BUILD_CORE	1
     211  Modules/signalmodule.c	Py_BUILD_CORE	1
     212  Modules/symtablemodule.c	Py_BUILD_CORE	1
     213  Modules/timemodule.c	Py_BUILD_CORE	1
     214  Modules/unicodedata.c	Py_BUILD_CORE	1
     215  
     216  Modules/_json.c	Py_BUILD_CORE_BUILTIN	1
     217  Modules/_pickle.c	Py_BUILD_CORE_BUILTIN	1
     218  Modules/_testinternalcapi.c	Py_BUILD_CORE_BUILTIN	1
     219  
     220  Include/cpython/abstract.h	Py_CPYTHON_ABSTRACTOBJECT_H	1
     221  Include/cpython/bytearrayobject.h	Py_CPYTHON_BYTEARRAYOBJECT_H	1
     222  Include/cpython/bytesobject.h	Py_CPYTHON_BYTESOBJECT_H	1
     223  Include/cpython/ceval.h	Py_CPYTHON_CEVAL_H	1
     224  Include/cpython/code.h	Py_CPYTHON_CODE_H	1
     225  Include/cpython/dictobject.h	Py_CPYTHON_DICTOBJECT_H	1
     226  Include/cpython/fileobject.h	Py_CPYTHON_FILEOBJECT_H	1
     227  Include/cpython/fileutils.h	Py_CPYTHON_FILEUTILS_H	1
     228  Include/cpython/frameobject.h	Py_CPYTHON_FRAMEOBJECT_H	1
     229  Include/cpython/import.h	Py_CPYTHON_IMPORT_H	1
     230  Include/cpython/interpreteridobject.h	Py_CPYTHON_INTERPRETERIDOBJECT_H	1
     231  Include/cpython/listobject.h	Py_CPYTHON_LISTOBJECT_H	1
     232  Include/cpython/methodobject.h	Py_CPYTHON_METHODOBJECT_H	1
     233  Include/cpython/object.h	Py_CPYTHON_OBJECT_H	1
     234  Include/cpython/objimpl.h	Py_CPYTHON_OBJIMPL_H	1
     235  Include/cpython/pyerrors.h	Py_CPYTHON_ERRORS_H	1
     236  Include/cpython/pylifecycle.h	Py_CPYTHON_PYLIFECYCLE_H	1
     237  Include/cpython/pymem.h	Py_CPYTHON_PYMEM_H	1
     238  Include/cpython/pystate.h	Py_CPYTHON_PYSTATE_H	1
     239  Include/cpython/sysmodule.h	Py_CPYTHON_SYSMODULE_H	1
     240  Include/cpython/traceback.h	Py_CPYTHON_TRACEBACK_H	1
     241  Include/cpython/tupleobject.h	Py_CPYTHON_TUPLEOBJECT_H	1
     242  Include/cpython/unicodeobject.h	Py_CPYTHON_UNICODEOBJECT_H	1
     243  
     244  # implied include of <unistd.h>
     245  Include/**/*.h	_POSIX_THREADS	1
     246  Include/**/*.h	HAVE_PTHREAD_H	1
     247  
     248  # from pyconfig.h
     249  Include/cpython/pthread_stubs.h	HAVE_PTHREAD_STUBS	1
     250  Python/thread_pthread_stubs.h	HAVE_PTHREAD_STUBS	1
     251  
     252  # from Objects/bytesobject.c
     253  Objects/stringlib/partition.h	STRINGLIB_GET_EMPTY()	bytes_get_empty()
     254  Objects/stringlib/join.h	STRINGLIB_MUTABLE	0
     255  Objects/stringlib/partition.h	STRINGLIB_MUTABLE	0
     256  Objects/stringlib/split.h	STRINGLIB_MUTABLE	0
     257  Objects/stringlib/transmogrify.h	STRINGLIB_MUTABLE	0
     258  
     259  # from Makefile
     260  Modules/getpath.c	PYTHONPATH	1
     261  Modules/getpath.c	PREFIX	...
     262  Modules/getpath.c	EXEC_PREFIX	...
     263  Modules/getpath.c	VERSION	...
     264  Modules/getpath.c	VPATH	...
     265  Modules/getpath.c	PLATLIBDIR	...
     266  #Modules/_dbmmodule.c	USE_GDBM_COMPAT	1
     267  Modules/_dbmmodule.c	USE_NDBM	1
     268  #Modules/_dbmmodule.c	USE_BERKDB	1
     269  
     270  # See: setup.py
     271  Modules/_decimal/**/*.c	CONFIG_64	1
     272  Modules/_decimal/**/*.c	ASM	1
     273  Modules/expat/xmlparse.c	HAVE_EXPAT_CONFIG_H	1
     274  Modules/expat/xmlparse.c	XML_POOR_ENTROPY	1
     275  Modules/_dbmmodule.c	HAVE_GDBM_DASH_NDBM_H	1
     276  
     277  # others
     278  Modules/_sre/sre_lib.h	LOCAL(type)	static inline type
     279  Modules/_sre/sre_lib.h	SRE(F)	sre_ucs2_##F
     280  Objects/stringlib/codecs.h	STRINGLIB_IS_UNICODE	1
     281  
     282  # @end=tsv@
     283  ''')[1:]
     284  
     285  # -pthread
     286  # -Wno-unused-result
     287  # -Wsign-compare
     288  # -g
     289  # -Og
     290  # -Wall
     291  # -std=c99
     292  # -Wextra
     293  # -Wno-unused-result -Wno-unused-parameter
     294  # -Wno-missing-field-initializers
     295  # -Werror=implicit-function-declaration
     296  
     297  SAME = {
     298      _abs('Include/*.h'): [_abs('Include/cpython/')],
     299      _abs('Python/ceval.c'): ['Python/generated_cases.c.h'],
     300  }
     301  
     302  MAX_SIZES = {
     303      # GLOB: (MAXTEXT, MAXLINES),
     304      # default: (10_000, 200)
     305      # First match wins.
     306      _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
     307      _abs('Modules/_datetimemodule.c'): (20_000, 300),
     308      _abs('Modules/_hacl/*.c'): (200_000, 500),
     309      _abs('Modules/posixmodule.c'): (20_000, 500),
     310      _abs('Modules/termios.c'): (10_000, 800),
     311      _abs('Modules/_testcapimodule.c'): (20_000, 400),
     312      _abs('Modules/expat/expat.h'): (10_000, 400),
     313      _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
     314      _abs('Objects/typeobject.c'): (35_000, 200),
     315      _abs('Python/compile.c'): (20_000, 500),
     316      _abs('Python/pylifecycle.c'): (500_000, 5000),
     317      _abs('Python/pystate.c'): (500_000, 5000),
     318  
     319      # Generated files:
     320      _abs('Include/internal/pycore_opcode.h'): (10_000, 1000),
     321      _abs('Include/internal/pycore_global_strings.h'): (5_000, 1000),
     322      _abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000),
     323      _abs('Python/deepfreeze/*.c'): (20_000, 500),
     324      _abs('Python/frozen_modules/*.h'): (20_000, 500),
     325      _abs('Python/opcode_targets.h'): (10_000, 500),
     326      _abs('Python/stdlib_module_names.h'): (5_000, 500),
     327  
     328      # These large files are currently ignored (see above).
     329      _abs('Modules/_ssl_data.h'): (80_000, 10_000),
     330      _abs('Modules/_ssl_data_300.h'): (80_000, 10_000),
     331      _abs('Modules/_ssl_data_111.h'): (80_000, 10_000),
     332      _abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000),
     333      _abs('Modules/unicodedata_db.h'): (180_000, 3_000),
     334      _abs('Modules/unicodename_db.h'): (1_200_000, 15_000),
     335      _abs('Objects/unicodetype_db.h'): (240_000, 3_000),
     336  
     337      # Catch-alls:
     338      _abs('Include/**/*.h'): (5_000, 500),
     339  }
     340  
     341  
     342  def get_preprocessor(*,
     343                       file_macros=None,
     344                       file_includes=None,
     345                       file_incldirs=None,
     346                       file_same=None,
     347                       **kwargs
     348                       ):
     349      macros = tuple(MACROS)
     350      if file_macros:
     351          macros += tuple(file_macros)
     352      includes = tuple(INCLUDES)
     353      if file_includes:
     354          includes += tuple(file_includes)
     355      incldirs = tuple(INCL_DIRS)
     356      if file_incldirs:
     357          incldirs += tuple(file_incldirs)
     358      samefiles = dict(SAME)
     359      if file_same:
     360          samefiles.update(file_same)
     361      return _get_preprocessor(
     362          file_macros=macros,
     363          file_includes=includes,
     364          file_incldirs=incldirs,
     365          file_same=samefiles,
     366          **kwargs
     367      )
     368  
     369  
     370  def parse_file(filename, *,
     371                 match_kind=None,
     372                 ignore_exc=None,
     373                 log_err=None,
     374                 ):
     375      get_file_preprocessor = get_preprocessor(
     376          ignore_exc=ignore_exc,
     377          log_err=log_err,
     378      )
     379      yield from _parse_file(
     380          filename,
     381          match_kind=match_kind,
     382          get_file_preprocessor=get_file_preprocessor,
     383          file_maxsizes=MAX_SIZES,
     384      )
     385  
     386  
     387  def parse_files(filenames=None, *,
     388                  match_kind=None,
     389                  ignore_exc=None,
     390                  log_err=None,
     391                  get_file_preprocessor=None,
     392                  **file_kwargs
     393                  ):
     394      if get_file_preprocessor is None:
     395          get_file_preprocessor = get_preprocessor(
     396              ignore_exc=ignore_exc,
     397              log_err=log_err,
     398          )
     399      yield from _parse_files(
     400          filenames,
     401          match_kind=match_kind,
     402          get_file_preprocessor=get_file_preprocessor,
     403          file_maxsizes=MAX_SIZES,
     404          **file_kwargs
     405      )