1  import os.path
       2  import re
       3  
       4  from c_parser.preprocessor import (
       5      get_preprocessor as _get_preprocessor,
       6  )
       7  from c_parser import (
       8      parse_file as _parse_file,
       9      parse_files as _parse_files,
      10  )
      11  from . import REPO_ROOT
      12  
      13  
      14  GLOB_ALL = '**/*'
      15  
      16  
      17  def _abs(relfile):
      18      return os.path.join(REPO_ROOT, relfile)
      19  
      20  
      21  def clean_lines(text):
      22      """Clear out comments, blank lines, and leading/trailing whitespace."""
      23      lines = (line.strip() for line in text.splitlines())
      24      lines = (line.partition('#')[0].rstrip()
      25               for line in lines
      26               if line and not line.startswith('#'))
      27      glob_all = f'{GLOB_ALL} '
      28      lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
      29      lines = (_abs(line) for line in lines)
      30      return list(lines)
      31  
      32  
      33  '''
      34  @begin=sh@
      35  ./python ../c-parser/cpython.py
      36      --exclude '+../c-parser/EXCLUDED'
      37      --macros '+../c-parser/MACROS'
      38      --incldirs '+../c-parser/INCL_DIRS'
      39      --same './Include/cpython/'
      40      Include/*.h
      41      Include/internal/*.h
      42      Modules/**/*.c
      43      Objects/**/*.c
      44      Parser/**/*.c
      45      Python/**/*.c
      46  @end=sh@
      47  '''
      48  
      49  # XXX Handle these.
      50  EXCLUDED = clean_lines('''
      51  # @begin=conf@
      52  
      53  # Rather than fixing for this one, we manually make sure it's okay.
      54  Modules/_sha3/kcp/KeccakP-1600-opt64.c
      55  
      56  # OSX
      57  #Modules/_ctypes/darwin/*.c
      58  #Modules/_ctypes/libffi_osx/*.c
      59  Modules/_scproxy.c                # SystemConfiguration/SystemConfiguration.h
      60  
      61  # Windows
      62  Modules/_winapi.c               # windows.h
      63  Modules/expat/winconfig.h
      64  Modules/overlapped.c            # winsock.h
      65  Python/dynload_win.c            # windows.h
      66  Python/thread_nt.h
      67  
      68  # other OS-dependent
      69  Python/dynload_aix.c            # sys/ldr.h
      70  Python/dynload_dl.c             # dl.h
      71  Python/dynload_hpux.c           # dl.h
      72  Python/thread_pthread.h
      73  Python/emscripten_signal.c
      74  
      75  # only huge constants (safe but parsing is slow)
      76  Modules/_blake2/impl/blake2-kat.h
      77  Modules/_ssl_data.h
      78  Modules/_ssl_data_31.h
      79  Modules/_ssl_data_300.h
      80  Modules/_ssl_data_111.h
      81  Modules/cjkcodecs/mappings_*.h
      82  Modules/unicodedata_db.h
      83  Modules/unicodename_db.h
      84  Objects/unicodetype_db.h
      85  
      86  # generated
      87  Python/deepfreeze/*.c
      88  Python/frozen_modules/*.h
      89  Python/opcode_targets.h
      90  Python/stdlib_module_names.h
      91  
      92  # @end=conf@
      93  ''')
      94  
      95  # XXX Fix the parser.
      96  EXCLUDED += clean_lines('''
      97  # The tool should be able to parse these...
      98  
      99  Modules/hashlib.h
     100  Objects/stringlib/codecs.h
     101  Objects/stringlib/count.h
     102  Objects/stringlib/ctype.h
     103  Objects/stringlib/fastsearch.h
     104  Objects/stringlib/find.h
     105  Objects/stringlib/find_max_char.h
     106  Objects/stringlib/partition.h
     107  Objects/stringlib/replace.h
     108  Objects/stringlib/split.h
     109  
     110  Modules/_dbmmodule.c
     111  Modules/cjkcodecs/_codecs_*.c
     112  Modules/expat/xmlrole.c
     113  Modules/expat/xmlparse.c
     114  Python/initconfig.c
     115  ''')
     116  
     117  INCL_DIRS = clean_lines('''
     118  # @begin=tsv@
     119  
     120  glob	dirname
     121  *	.
     122  *	./Include
     123  *	./Include/internal
     124  
     125  Modules/_tkinter.c	/usr/include/tcl8.6
     126  Modules/tkappinit.c	/usr/include/tcl
     127  Modules/_decimal/**/*.c	Modules/_decimal/libmpdec
     128  
     129  # @end=tsv@
     130  ''')[1:]
     131  
     132  MACROS = clean_lines('''
     133  # @begin=tsv@
     134  
     135  glob	name	value
     136  
     137  Include/internal/*.h	Py_BUILD_CORE	1
     138  Python/**/*.c	Py_BUILD_CORE	1
     139  Parser/**/*.c	Py_BUILD_CORE	1
     140  Objects/**/*.c	Py_BUILD_CORE	1
     141  
     142  Modules/_asynciomodule.c	Py_BUILD_CORE	1
     143  Modules/_collectionsmodule.c	Py_BUILD_CORE	1
     144  Modules/_ctypes/_ctypes.c	Py_BUILD_CORE	1
     145  Modules/_ctypes/cfield.c	Py_BUILD_CORE	1
     146  Modules/_cursesmodule.c	Py_BUILD_CORE	1
     147  Modules/_datetimemodule.c	Py_BUILD_CORE	1
     148  Modules/_functoolsmodule.c	Py_BUILD_CORE	1
     149  Modules/_heapqmodule.c	Py_BUILD_CORE	1
     150  Modules/_io/*.c	Py_BUILD_CORE	1
     151  Modules/_localemodule.c	Py_BUILD_CORE	1
     152  Modules/_operator.c	Py_BUILD_CORE	1
     153  Modules/_posixsubprocess.c	Py_BUILD_CORE	1
     154  Modules/_sre/sre.c	Py_BUILD_CORE	1
     155  Modules/_threadmodule.c	Py_BUILD_CORE	1
     156  Modules/_tracemalloc.c	Py_BUILD_CORE	1
     157  Modules/_weakref.c	Py_BUILD_CORE	1
     158  Modules/_zoneinfo.c	Py_BUILD_CORE	1
     159  Modules/atexitmodule.c	Py_BUILD_CORE	1
     160  Modules/cmathmodule.c	Py_BUILD_CORE	1
     161  Modules/faulthandler.c	Py_BUILD_CORE	1
     162  Modules/gcmodule.c	Py_BUILD_CORE	1
     163  Modules/getpath.c	Py_BUILD_CORE	1
     164  Modules/getpath_noop.c	Py_BUILD_CORE	1
     165  Modules/itertoolsmodule.c	Py_BUILD_CORE	1
     166  Modules/main.c	Py_BUILD_CORE	1
     167  Modules/mathmodule.c	Py_BUILD_CORE	1
     168  Modules/posixmodule.c	Py_BUILD_CORE	1
     169  Modules/sha256module.c	Py_BUILD_CORE	1
     170  Modules/sha512module.c	Py_BUILD_CORE	1
     171  Modules/signalmodule.c	Py_BUILD_CORE	1
     172  Modules/symtablemodule.c	Py_BUILD_CORE	1
     173  Modules/timemodule.c	Py_BUILD_CORE	1
     174  Modules/unicodedata.c	Py_BUILD_CORE	1
     175  Objects/stringlib/codecs.h	Py_BUILD_CORE	1
     176  Objects/stringlib/unicode_format.h	Py_BUILD_CORE	1
     177  Parser/string_parser.h	Py_BUILD_CORE	1
     178  Parser/pegen.h	Py_BUILD_CORE	1
     179  Python/ceval_gil.h	Py_BUILD_CORE	1
     180  Python/condvar.h	Py_BUILD_CORE	1
     181  
     182  Modules/_json.c	Py_BUILD_CORE_BUILTIN	1
     183  Modules/_pickle.c	Py_BUILD_CORE_BUILTIN	1
     184  Modules/_testinternalcapi.c	Py_BUILD_CORE_BUILTIN	1
     185  
     186  Include/cpython/abstract.h	Py_CPYTHON_ABSTRACTOBJECT_H	1
     187  Include/cpython/bytearrayobject.h	Py_CPYTHON_BYTEARRAYOBJECT_H	1
     188  Include/cpython/bytesobject.h	Py_CPYTHON_BYTESOBJECT_H	1
     189  Include/cpython/ceval.h	Py_CPYTHON_CEVAL_H	1
     190  Include/cpython/code.h	Py_CPYTHON_CODE_H	1
     191  Include/cpython/dictobject.h	Py_CPYTHON_DICTOBJECT_H	1
     192  Include/cpython/fileobject.h	Py_CPYTHON_FILEOBJECT_H	1
     193  Include/cpython/fileutils.h	Py_CPYTHON_FILEUTILS_H	1
     194  Include/cpython/frameobject.h	Py_CPYTHON_FRAMEOBJECT_H	1
     195  Include/cpython/import.h	Py_CPYTHON_IMPORT_H	1
     196  Include/cpython/listobject.h	Py_CPYTHON_LISTOBJECT_H	1
     197  Include/cpython/methodobject.h	Py_CPYTHON_METHODOBJECT_H	1
     198  Include/cpython/object.h	Py_CPYTHON_OBJECT_H	1
     199  Include/cpython/objimpl.h	Py_CPYTHON_OBJIMPL_H	1
     200  Include/cpython/pyerrors.h	Py_CPYTHON_ERRORS_H	1
     201  Include/cpython/pylifecycle.h	Py_CPYTHON_PYLIFECYCLE_H	1
     202  Include/cpython/pymem.h	Py_CPYTHON_PYMEM_H	1
     203  Include/cpython/pystate.h	Py_CPYTHON_PYSTATE_H	1
     204  Include/cpython/sysmodule.h	Py_CPYTHON_SYSMODULE_H	1
     205  Include/cpython/traceback.h	Py_CPYTHON_TRACEBACK_H	1
     206  Include/cpython/tupleobject.h	Py_CPYTHON_TUPLEOBJECT_H	1
     207  Include/cpython/unicodeobject.h	Py_CPYTHON_UNICODEOBJECT_H	1
     208  Include/internal/pycore_code.h	SIZEOF_VOID_P	8
     209  
     210  # implied include of pyport.h
     211  Include/**/*.h	PyAPI_DATA(RTYPE)	extern RTYPE
     212  Include/**/*.h	PyAPI_FUNC(RTYPE)	RTYPE
     213  Include/**/*.h	Py_DEPRECATED(VER)	/* */
     214  Include/**/*.h	_Py_NO_RETURN	/* */
     215  Include/**/*.h	PYLONG_BITS_IN_DIGIT	30
     216  Modules/**/*.c	PyMODINIT_FUNC	PyObject*
     217  Objects/unicodeobject.c	PyMODINIT_FUNC	PyObject*
     218  Python/marshal.c	PyMODINIT_FUNC	PyObject*
     219  Python/_warnings.c	PyMODINIT_FUNC	PyObject*
     220  Python/Python-ast.c	PyMODINIT_FUNC	PyObject*
     221  Python/import.c	PyMODINIT_FUNC	PyObject*
     222  Modules/_testcapimodule.c	PyAPI_FUNC(RTYPE)	RTYPE
     223  Python/getargs.c	PyAPI_FUNC(RTYPE)	RTYPE
     224  Objects/stringlib/unicode_format.h	Py_LOCAL_INLINE(type)	static inline type
     225  Include/pymath.h	_Py__has_builtin(x)	0
     226  
     227  # implied include of pymacro.h
     228  */clinic/*.c.h	PyDoc_VAR(name)	static const char name[]
     229  */clinic/*.c.h	PyDoc_STR(str)	str
     230  */clinic/*.c.h	PyDoc_STRVAR(name,str)	PyDoc_VAR(name) = PyDoc_STR(str)
     231  
     232  # implied include of exports.h
     233  #Modules/_io/bytesio.c	Py_EXPORTED_SYMBOL	/* */
     234  
     235  # implied include of object.h
     236  Include/**/*.h	PyObject_HEAD	PyObject ob_base;
     237  Include/**/*.h	PyObject_VAR_HEAD	PyVarObject ob_base;
     238  
     239  # implied include of pyconfig.h
     240  Include/**/*.h	SIZEOF_WCHAR_T	4
     241  
     242  # implied include of <unistd.h>
     243  Include/**/*.h	_POSIX_THREADS	1
     244  Include/**/*.h	HAVE_PTHREAD_H	1
     245  
     246  # from Makefile
     247  Modules/getpath.c	PYTHONPATH	1
     248  Modules/getpath.c	PREFIX	...
     249  Modules/getpath.c	EXEC_PREFIX	...
     250  Modules/getpath.c	VERSION	...
     251  Modules/getpath.c	VPATH	...
     252  Modules/getpath.c	PLATLIBDIR	...
     253  
     254  # from Modules/_sha3/sha3module.c
     255  Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c	PLATFORM_BYTE_ORDER	4321  # force big-endian
     256  Modules/_sha3/kcp/*.c	KeccakOpt	64
     257  Modules/_sha3/kcp/*.c	KeccakP200_excluded	1
     258  Modules/_sha3/kcp/*.c	KeccakP400_excluded	1
     259  Modules/_sha3/kcp/*.c	KeccakP800_excluded	1
     260  
     261  # See: setup.py
     262  Modules/_decimal/**/*.c	CONFIG_64	1
     263  Modules/_decimal/**/*.c	ASM	1
     264  Modules/expat/xmlparse.c	HAVE_EXPAT_CONFIG_H	1
     265  Modules/expat/xmlparse.c	XML_POOR_ENTROPY	1
     266  Modules/_dbmmodule.c	HAVE_GDBM_DASH_NDBM_H	1
     267  
     268  # others
     269  Modules/_sre/sre_lib.h	LOCAL(type)	static inline type
     270  Modules/_sre/sre_lib.h	SRE(F)	sre_ucs2_##F
     271  Objects/stringlib/codecs.h	STRINGLIB_IS_UNICODE	1
     272  Include/internal/pycore_bitutils.h	_Py__has_builtin(B)	0
     273  
     274  # @end=tsv@
     275  ''')[1:]
     276  
     277  # -pthread
     278  # -Wno-unused-result
     279  # -Wsign-compare
     280  # -g
     281  # -Og
     282  # -Wall
     283  # -std=c99
     284  # -Wextra
     285  # -Wno-unused-result -Wno-unused-parameter
     286  # -Wno-missing-field-initializers
     287  # -Werror=implicit-function-declaration
     288  
     289  SAME = [
     290      './Include/cpython/',
     291  ]
     292  
     293  MAX_SIZES = {
     294      _abs('Include/**/*.h'): (5_000, 500),
     295      _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
     296      _abs('Modules/_datetimemodule.c'): (20_000, 300),
     297      _abs('Modules/posixmodule.c'): (20_000, 500),
     298      _abs('Modules/termios.c'): (10_000, 800),
     299      _abs('Modules/_testcapimodule.c'): (20_000, 400),
     300      _abs('Modules/expat/expat.h'): (10_000, 400),
     301      _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
     302      _abs('Objects/typeobject.c'): (20_000, 200),
     303      _abs('Python/compile.c'): (20_000, 500),
     304      _abs('Python/pylifecycle.c'): (500_000, 5000),
     305      _abs('Python/pystate.c'): (500_000, 5000),
     306  }
     307  
     308  
     309  def get_preprocessor(*,
     310                       file_macros=None,
     311                       file_incldirs=None,
     312                       file_same=None,
     313                       **kwargs
     314                       ):
     315      macros = tuple(MACROS)
     316      if file_macros:
     317          macros += tuple(file_macros)
     318      incldirs = tuple(INCL_DIRS)
     319      if file_incldirs:
     320          incldirs += tuple(file_incldirs)
     321      return _get_preprocessor(
     322          file_macros=macros,
     323          file_incldirs=incldirs,
     324          file_same=file_same,
     325          **kwargs
     326      )
     327  
     328  
     329  def parse_file(filename, *,
     330                 match_kind=None,
     331                 ignore_exc=None,
     332                 log_err=None,
     333                 ):
     334      get_file_preprocessor = get_preprocessor(
     335          ignore_exc=ignore_exc,
     336          log_err=log_err,
     337      )
     338      yield from _parse_file(
     339          filename,
     340          match_kind=match_kind,
     341          get_file_preprocessor=get_file_preprocessor,
     342          file_maxsizes=MAX_SIZES,
     343      )
     344  
     345  
     346  def parse_files(filenames=None, *,
     347                  match_kind=None,
     348                  ignore_exc=None,
     349                  log_err=None,
     350                  get_file_preprocessor=None,
     351                  **file_kwargs
     352                  ):
     353      if get_file_preprocessor is None:
     354          get_file_preprocessor = get_preprocessor(
     355              ignore_exc=ignore_exc,
     356              log_err=log_err,
     357          )
     358      yield from _parse_files(
     359          filenames,
     360          match_kind=match_kind,
     361          get_file_preprocessor=get_file_preprocessor,
     362          file_maxsizes=MAX_SIZES,
     363          **file_kwargs
     364      )