1  import logging
       2  import sys
       3  import textwrap
       4  
       5  from c_common.scriptutil import (
       6      VERBOSITY,
       7      add_verbosity_cli,
       8      add_traceback_cli,
       9      add_commands_cli,
      10      add_kind_filtering_cli,
      11      add_files_cli,
      12      add_progress_cli,
      13      process_args_by_key,
      14      configure_logger,
      15      get_prog,
      16  )
      17  from c_parser.info import KIND
      18  import c_parser.__main__ as c_parser
      19  import c_analyzer.__main__ as c_analyzer
      20  import c_analyzer as _c_analyzer
      21  from c_analyzer.info import UNKNOWN
      22  from . import _analyzer, _builtin_types, _capi, _files, _parser, REPO_ROOT
      23  
      24  
      25  logger = logging.getLogger(__name__)
      26  
      27  
      28  CHECK_EXPLANATION = textwrap.dedent('''
      29      -------------------------
      30  
      31      Non-constant global variables are generally not supported
      32      in the CPython repo.  We use a tool to analyze the C code
      33      and report if any unsupported globals are found.  The tool
      34      may be run manually with:
      35  
      36        ./python Tools/c-analyzer/check-c-globals.py --format summary [FILE]
      37  
      38      Occasionally the tool is unable to parse updated code.
      39      If this happens then add the file to the "EXCLUDED" list
      40      in Tools/c-analyzer/cpython/_parser.py and create a new
      41      issue for fixing the tool (and CC ericsnowcurrently
      42      on the issue).
      43  
      44      If the tool reports an unsupported global variable and
      45      it is actually const (and thus supported) then first try
      46      fixing the declaration appropriately in the code.  If that
      47      doesn't work then add the variable to the "should be const"
      48      section of Tools/c-analyzer/cpython/ignored.tsv.
      49  
      50      If the tool otherwise reports an unsupported global variable
      51      then first try to make it non-global, possibly adding to
      52      PyInterpreterState (for core code) or module state (for
      53      extension modules).  In an emergency, you can add the
      54      variable to Tools/c-analyzer/cpython/globals-to-fix.tsv
      55      to get CI passing, but doing so should be avoided.  If
      56      this course it taken, be sure to create an issue for
      57      eliminating the global (and CC ericsnowcurrently).
      58  ''')
      59  
      60  
      61  def _resolve_filenames(filenames):
      62      if filenames:
      63          resolved = (_files.resolve_filename(f) for f in filenames)
      64      else:
      65          resolved = _files.iter_filenames()
      66      return resolved
      67  
      68  
      69  #######################################
      70  # the formats
      71  
      72  def fmt_summary(analysis):
      73      # XXX Support sorting and grouping.
      74      supported = []
      75      unsupported = []
      76      for item in analysis:
      77          if item.supported:
      78              supported.append(item)
      79          else:
      80              unsupported.append(item)
      81      total = 0
      82  
      83      def section(name, groupitems):
      84          nonlocal total
      85          items, render = c_analyzer.build_section(name, groupitems,
      86                                                   relroot=REPO_ROOT)
      87          yield from render()
      88          total += len(items)
      89  
      90      yield ''
      91      yield '===================='
      92      yield 'supported'
      93      yield '===================='
      94  
      95      yield from section('types', supported)
      96      yield from section('variables', supported)
      97  
      98      yield ''
      99      yield '===================='
     100      yield 'unsupported'
     101      yield '===================='
     102  
     103      yield from section('types', unsupported)
     104      yield from section('variables', unsupported)
     105  
     106      yield ''
     107      yield f'grand total: {total}'
     108  
     109  
     110  #######################################
     111  # the checks
     112  
     113  CHECKS = dict(c_analyzer.CHECKS, **{
     114      'globals': _analyzer.check_globals,
     115  })
     116  
     117  #######################################
     118  # the commands
     119  
     120  FILES_KWARGS = dict(excluded=_parser.EXCLUDED, nargs='*')
     121  
     122  
     123  def _cli_parse(parser):
     124      process_output = c_parser.add_output_cli(parser)
     125      process_kind = add_kind_filtering_cli(parser)
     126      process_preprocessor = c_parser.add_preprocessor_cli(
     127          parser,
     128          get_preprocessor=_parser.get_preprocessor,
     129      )
     130      process_files = add_files_cli(parser, **FILES_KWARGS)
     131      return [
     132          process_output,
     133          process_kind,
     134          process_preprocessor,
     135          process_files,
     136      ]
     137  
     138  
     139  def cmd_parse(filenames=None, **kwargs):
     140      filenames = _resolve_filenames(filenames)
     141      if 'get_file_preprocessor' not in kwargs:
     142          kwargs['get_file_preprocessor'] = _parser.get_preprocessor()
     143      c_parser.cmd_parse(
     144          filenames,
     145          relroot=REPO_ROOT,
     146          file_maxsizes=_parser.MAX_SIZES,
     147          **kwargs
     148      )
     149  
     150  
     151  def _cli_check(parser, **kwargs):
     152      return c_analyzer._cli_check(parser, CHECKS, **kwargs, **FILES_KWARGS)
     153  
     154  
     155  def cmd_check(filenames=None, **kwargs):
     156      filenames = _resolve_filenames(filenames)
     157      kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
     158      try:
     159          c_analyzer.cmd_check(
     160              filenames,
     161              relroot=REPO_ROOT,
     162              _analyze=_analyzer.analyze,
     163              _CHECKS=CHECKS,
     164              file_maxsizes=_parser.MAX_SIZES,
     165              **kwargs
     166          )
     167      except SystemExit as exc:
     168          num_failed = exc.args[0] if getattr(exc, 'args', None) else None
     169          if isinstance(num_failed, int):
     170              if num_failed > 0:
     171                  sys.stderr.flush()
     172                  print(CHECK_EXPLANATION, flush=True)
     173          raise  # re-raise
     174      except Exception:
     175          sys.stderr.flush()
     176          print(CHECK_EXPLANATION, flush=True)
     177          raise  # re-raise
     178  
     179  
     180  def cmd_analyze(filenames=None, **kwargs):
     181      formats = dict(c_analyzer.FORMATS)
     182      formats['summary'] = fmt_summary
     183      filenames = _resolve_filenames(filenames)
     184      kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
     185      c_analyzer.cmd_analyze(
     186          filenames,
     187          relroot=REPO_ROOT,
     188          _analyze=_analyzer.analyze,
     189          formats=formats,
     190          file_maxsizes=_parser.MAX_SIZES,
     191          **kwargs
     192      )
     193  
     194  
     195  def _cli_data(parser):
     196      filenames = False
     197      known = True
     198      return c_analyzer._cli_data(parser, filenames, known)
     199  
     200  
     201  def cmd_data(datacmd, **kwargs):
     202      formats = dict(c_analyzer.FORMATS)
     203      formats['summary'] = fmt_summary
     204      filenames = (file
     205                   for file in _resolve_filenames(None)
     206                   if file not in _parser.EXCLUDED)
     207      kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
     208      if datacmd == 'show':
     209          types = _analyzer.read_known()
     210          results = []
     211          for decl, info in types.items():
     212              if info is UNKNOWN:
     213                  if decl.kind in (KIND.STRUCT, KIND.UNION):
     214                      extra = {'unsupported': ['type unknown'] * len(decl.members)}
     215                  else:
     216                      extra = {'unsupported': ['type unknown']}
     217                  info = (info, extra)
     218              results.append((decl, info))
     219              if decl.shortkey == 'struct _object':
     220                  tempinfo = info
     221          known = _analyzer.Analysis.from_results(results)
     222          analyze = None
     223      elif datacmd == 'dump':
     224          known = _analyzer.KNOWN_FILE
     225          def analyze(files, **kwargs):
     226              decls = []
     227              for decl in _analyzer.iter_decls(files, **kwargs):
     228                  if not KIND.is_type_decl(decl.kind):
     229                      continue
     230                  if not decl.filename.endswith('.h'):
     231                      if decl.shortkey not in _analyzer.KNOWN_IN_DOT_C:
     232                          continue
     233                  decls.append(decl)
     234              results = _c_analyzer.analyze_decls(
     235                  decls,
     236                  known={},
     237                  analyze_resolved=_analyzer.analyze_resolved,
     238              )
     239              return _analyzer.Analysis.from_results(results)
     240      else:  # check
     241          known = _analyzer.read_known()
     242          def analyze(files, **kwargs):
     243              return _analyzer.iter_decls(files, **kwargs)
     244      extracolumns = None
     245      c_analyzer.cmd_data(
     246          datacmd,
     247          filenames,
     248          known,
     249          _analyze=analyze,
     250          formats=formats,
     251          extracolumns=extracolumns,
     252          relroot=REPO_ROOT,
     253          **kwargs
     254      )
     255  
     256  
     257  def _cli_capi(parser):
     258      parser.add_argument('--levels', action='append', metavar='LEVEL[,...]')
     259      parser.add_argument(f'--public', dest='levels',
     260                          action='append_const', const='public')
     261      parser.add_argument(f'--no-public', dest='levels',
     262                          action='append_const', const='no-public')
     263      for level in _capi.LEVELS:
     264          parser.add_argument(f'--{level}', dest='levels',
     265                              action='append_const', const=level)
     266      def process_levels(args, *, argv=None):
     267          levels = []
     268          for raw in args.levels or ():
     269              for level in raw.replace(',', ' ').strip().split():
     270                  if level == 'public':
     271                      levels.append('stable')
     272                      levels.append('cpython')
     273                  elif level == 'no-public':
     274                      levels.append('private')
     275                      levels.append('internal')
     276                  elif level in _capi.LEVELS:
     277                      levels.append(level)
     278                  else:
     279                      parser.error(f'expected LEVEL to be one of {sorted(_capi.LEVELS)}, got {level!r}')
     280          args.levels = set(levels)
     281  
     282      parser.add_argument('--kinds', action='append', metavar='KIND[,...]')
     283      for kind in _capi.KINDS:
     284          parser.add_argument(f'--{kind}', dest='kinds',
     285                              action='append_const', const=kind)
     286      def process_kinds(args, *, argv=None):
     287          kinds = []
     288          for raw in args.kinds or ():
     289              for kind in raw.replace(',', ' ').strip().split():
     290                  if kind in _capi.KINDS:
     291                      kinds.append(kind)
     292                  else:
     293                      parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}')
     294          args.kinds = set(kinds)
     295  
     296      parser.add_argument('--group-by', dest='groupby',
     297                          choices=['level', 'kind'])
     298  
     299      parser.add_argument('--format', default='table')
     300      parser.add_argument('--summary', dest='format',
     301                          action='store_const', const='summary')
     302      def process_format(args, *, argv=None):
     303          orig = args.format
     304          args.format = _capi.resolve_format(args.format)
     305          if isinstance(args.format, str):
     306              if args.format not in _capi._FORMATS:
     307                  parser.error(f'unsupported format {orig!r}')
     308  
     309      parser.add_argument('--show-empty', dest='showempty', action='store_true')
     310      parser.add_argument('--no-show-empty', dest='showempty', action='store_false')
     311      parser.set_defaults(showempty=None)
     312  
     313      # XXX Add --sort-by, --sort and --no-sort.
     314  
     315      parser.add_argument('--ignore', dest='ignored', action='append')
     316      def process_ignored(args, *, argv=None):
     317          ignored = []
     318          for raw in args.ignored or ():
     319              ignored.extend(raw.replace(',', ' ').strip().split())
     320          args.ignored = ignored or None
     321  
     322      parser.add_argument('filenames', nargs='*', metavar='FILENAME')
     323      process_progress = add_progress_cli(parser)
     324  
     325      return [
     326          process_levels,
     327          process_kinds,
     328          process_format,
     329          process_ignored,
     330          process_progress,
     331      ]
     332  
     333  
     334  def cmd_capi(filenames=None, *,
     335               levels=None,
     336               kinds=None,
     337               groupby='kind',
     338               format='table',
     339               showempty=None,
     340               ignored=None,
     341               track_progress=None,
     342               verbosity=VERBOSITY,
     343               **kwargs
     344               ):
     345      render = _capi.get_renderer(format)
     346  
     347      filenames = _files.iter_header_files(filenames, levels=levels)
     348      #filenames = (file for file, _ in main_for_filenames(filenames))
     349      if track_progress:
     350          filenames = track_progress(filenames)
     351      items = _capi.iter_capi(filenames)
     352      if levels:
     353          items = (item for item in items if item.level in levels)
     354      if kinds:
     355          items = (item for item in items if item.kind in kinds)
     356  
     357      filter = _capi.resolve_filter(ignored)
     358      if filter:
     359          items = (item for item in items if filter(item, log=lambda msg: logger.log(1, msg)))
     360  
     361      lines = render(
     362          items,
     363          groupby=groupby,
     364          showempty=showempty,
     365          verbose=verbosity > VERBOSITY,
     366      )
     367      print()
     368      for line in lines:
     369          print(line)
     370  
     371  
     372  def _cli_builtin_types(parser):
     373      parser.add_argument('--format', dest='fmt', default='table')
     374  #    parser.add_argument('--summary', dest='format',
     375  #                        action='store_const', const='summary')
     376      def process_format(args, *, argv=None):
     377          orig = args.fmt
     378          args.fmt = _builtin_types.resolve_format(args.fmt)
     379          if isinstance(args.fmt, str):
     380              if args.fmt not in _builtin_types._FORMATS:
     381                  parser.error(f'unsupported format {orig!r}')
     382  
     383      parser.add_argument('--include-modules', dest='showmodules',
     384                          action='store_true')
     385      def process_modules(args, *, argv=None):
     386          pass
     387  
     388      return [
     389          process_format,
     390          process_modules,
     391      ]
     392  
     393  
     394  def cmd_builtin_types(fmt, *,
     395                        showmodules=False,
     396                        verbosity=VERBOSITY,
     397                        ):
     398      render = _builtin_types.get_renderer(fmt)
     399      types = _builtin_types.iter_builtin_types()
     400      match = _builtin_types.resolve_matcher(showmodules)
     401      if match:
     402          types = (t for t in types if match(t, log=lambda msg: logger.log(1, msg)))
     403  
     404      lines = render(
     405          types,
     406  #        verbose=verbosity > VERBOSITY,
     407      )
     408      print()
     409      for line in lines:
     410          print(line)
     411  
     412  
     413  # We do not define any other cmd_*() handlers here,
     414  # favoring those defined elsewhere.
     415  
     416  COMMANDS = {
     417      'check': (
     418          'analyze and fail if the CPython source code has any problems',
     419          [_cli_check],
     420          cmd_check,
     421      ),
     422      'analyze': (
     423          'report on the state of the CPython source code',
     424          [(lambda p: c_analyzer._cli_analyze(p, **FILES_KWARGS))],
     425          cmd_analyze,
     426      ),
     427      'parse': (
     428          'parse the CPython source files',
     429          [_cli_parse],
     430          cmd_parse,
     431      ),
     432      'data': (
     433          'check/manage local data (e.g. known types, ignored vars, caches)',
     434          [_cli_data],
     435          cmd_data,
     436      ),
     437      'capi': (
     438          'inspect the C-API',
     439          [_cli_capi],
     440          cmd_capi,
     441      ),
     442      'builtin-types': (
     443          'show the builtin types',
     444          [_cli_builtin_types],
     445          cmd_builtin_types,
     446      ),
     447  }
     448  
     449  
     450  #######################################
     451  # the script
     452  
     453  def parse_args(argv=sys.argv[1:], prog=None, *, subset=None):
     454      import argparse
     455      parser = argparse.ArgumentParser(
     456          prog=prog or get_prog(),
     457      )
     458  
     459  #    if subset == 'check' or subset == ['check']:
     460  #        if checks is not None:
     461  #            commands = dict(COMMANDS)
     462  #            commands['check'] = list(commands['check'])
     463  #            cli = commands['check'][1][0]
     464  #            commands['check'][1][0] = (lambda p: cli(p, checks=checks))
     465      processors = add_commands_cli(
     466          parser,
     467          commands=COMMANDS,
     468          commonspecs=[
     469              add_verbosity_cli,
     470              add_traceback_cli,
     471          ],
     472          subset=subset,
     473      )
     474  
     475      args = parser.parse_args(argv)
     476      ns = vars(args)
     477  
     478      cmd = ns.pop('cmd')
     479  
     480      verbosity, traceback_cm = process_args_by_key(
     481          args,
     482          argv,
     483          processors[cmd],
     484          ['verbosity', 'traceback_cm'],
     485      )
     486      if cmd != 'parse':
     487          # "verbosity" is sent to the commands, so we put it back.
     488          args.verbosity = verbosity
     489  
     490      return cmd, ns, verbosity, traceback_cm
     491  
     492  
     493  def main(cmd, cmd_kwargs):
     494      try:
     495          run_cmd = COMMANDS[cmd][-1]
     496      except KeyError:
     497          raise ValueError(f'unsupported cmd {cmd!r}')
     498      run_cmd(**cmd_kwargs)
     499  
     500  
     501  if __name__ == '__main__':
     502      cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
     503      configure_logger(verbosity)
     504      with traceback_cm:
     505          main(cmd, cmd_kwargs)