(root)/
Python-3.12.0/
Tools/
c-analyzer/
cpython/
_analyzer.py
       1  import os.path
       2  import re
       3  
       4  from c_common.clsutil import classonly
       5  from c_parser.info import (
       6      KIND,
       7      Declaration,
       8      TypeDeclaration,
       9      Member,
      10      FIXED_TYPE,
      11  )
      12  from c_parser.match import (
      13      is_pots,
      14      is_funcptr,
      15  )
      16  from c_analyzer.match import (
      17      is_system_type,
      18      is_process_global,
      19      is_fixed_type,
      20      is_immutable,
      21  )
      22  import c_analyzer as _c_analyzer
      23  import c_analyzer.info as _info
      24  import c_analyzer.datafiles as _datafiles
      25  from . import _parser, REPO_ROOT
      26  
      27  
      28  _DATA_DIR = os.path.dirname(__file__)
      29  KNOWN_FILE = os.path.join(_DATA_DIR, 'known.tsv')
      30  IGNORED_FILE = os.path.join(_DATA_DIR, 'ignored.tsv')
      31  NEED_FIX_FILE = os.path.join(_DATA_DIR, 'globals-to-fix.tsv')
      32  KNOWN_IN_DOT_C = {
      33      'struct _odictobject': False,
      34      'PyTupleObject': False,
      35      'struct _typeobject': False,
      36      'struct _arena': True,  # ???
      37      'struct _frame': False,
      38      'struct _ts': True,  # ???
      39      'struct PyCodeObject': False,
      40      'struct _is': True,  # ???
      41      'PyWideStringList': True,  # ???
      42      # recursive
      43      'struct _dictkeysobject': False,
      44  }
      45  # These are loaded from the respective .tsv files upon first use.
      46  _KNOWN = {
      47      # {(file, ID) | ID => info | bool}
      48      #'PyWideStringList': True,
      49  }
      50  #_KNOWN = {(Struct(None, typeid.partition(' ')[-1], None)
      51  #           if typeid.startswith('struct ')
      52  #           else TypeDef(None, typeid, None)
      53  #           ): ([], {'unsupported': None if supported else True})
      54  #          for typeid, supported in _KNOWN_IN_DOT_C.items()}
      55  _IGNORED = {
      56      # {ID => reason}
      57  }
      58  
      59  # XXX We should be handling these through known.tsv.
      60  _OTHER_SUPPORTED_TYPES = {
      61      # Holds tuple of strings, which we statically initialize:
      62      '_PyArg_Parser',
      63      # Uses of these should be const, but we don't worry about it.
      64      'PyModuleDef',
      65      'PyModuleDef_Slot[]',
      66      'PyType_Spec',
      67      'PyType_Slot[]',
      68      'PyMethodDef',
      69      'PyMethodDef[]',
      70      'PyMemberDef[]',
      71      'PyGetSetDef[]',
      72      'PyNumberMethods',
      73      'PySequenceMethods',
      74      'PyMappingMethods',
      75      'PyAsyncMethods',
      76      'PyBufferProcs',
      77      'PyStructSequence_Field[]',
      78      'PyStructSequence_Desc',
      79  }
      80  
      81  # XXX We should normalize all cases to a single name,
      82  # e.g. "kwlist" (currently the most common).
      83  _KWLIST_VARIANTS = [
      84      ('*', 'kwlist'),
      85      ('*', 'keywords'),
      86      ('*', 'kwargs'),
      87      ('Modules/_csv.c', 'dialect_kws'),
      88      ('Modules/_datetimemodule.c', 'date_kws'),
      89      ('Modules/_datetimemodule.c', 'datetime_kws'),
      90      ('Modules/_datetimemodule.c', 'time_kws'),
      91      ('Modules/_datetimemodule.c', 'timezone_kws'),
      92      ('Modules/_lzmamodule.c', 'optnames'),
      93      ('Modules/_lzmamodule.c', 'arg_names'),
      94      ('Modules/cjkcodecs/multibytecodec.c', 'incnewkwarglist'),
      95      ('Modules/cjkcodecs/multibytecodec.c', 'streamkwarglist'),
      96      ('Modules/socketmodule.c', 'kwnames'),
      97  ]
      98  
      99  KINDS = frozenset((*KIND.TYPES, KIND.VARIABLE))
     100  
     101  
     102  def read_known():
     103      if not _KNOWN:
     104          # Cache a copy the first time.
     105          extracols = None  # XXX
     106          #extracols = ['unsupported']
     107          known = _datafiles.read_known(KNOWN_FILE, extracols, REPO_ROOT)
     108          # For now we ignore known.values() (i.e. "extra").
     109          types, _ = _datafiles.analyze_known(
     110              known,
     111              analyze_resolved=analyze_resolved,
     112          )
     113          _KNOWN.update(types)
     114      return _KNOWN.copy()
     115  
     116  
     117  def write_known():
     118      raise NotImplementedError
     119      datafiles.write_known(decls, IGNORED_FILE, ['unsupported'], relroot=REPO_ROOT)
     120  
     121  
     122  def read_ignored():
     123      if not _IGNORED:
     124          _IGNORED.update(_datafiles.read_ignored(IGNORED_FILE, relroot=REPO_ROOT))
     125          _IGNORED.update(_datafiles.read_ignored(NEED_FIX_FILE, relroot=REPO_ROOT))
     126      return dict(_IGNORED)
     127  
     128  
     129  def write_ignored():
     130      raise NotImplementedError
     131      _datafiles.write_ignored(variables, IGNORED_FILE, relroot=REPO_ROOT)
     132  
     133  
     134  def analyze(filenames, *,
     135              skip_objects=False,
     136              **kwargs
     137              ):
     138      if skip_objects:
     139          # XXX Set up a filter.
     140          raise NotImplementedError
     141  
     142      known = read_known()
     143  
     144      decls = iter_decls(filenames)
     145      results = _c_analyzer.analyze_decls(
     146          decls,
     147          known,
     148          analyze_resolved=analyze_resolved,
     149      )
     150      analysis = Analysis.from_results(results)
     151  
     152      return analysis
     153  
     154  
     155  def iter_decls(filenames, **kwargs):
     156      decls = _c_analyzer.iter_decls(
     157          filenames,
     158          # We ignore functions (and statements).
     159          kinds=KINDS,
     160          parse_files=_parser.parse_files,
     161          **kwargs
     162      )
     163      for decl in decls:
     164          if not decl.data:
     165              # Ignore forward declarations.
     166              continue
     167          yield decl
     168  
     169  
     170  def analyze_resolved(resolved, decl, types, knowntypes, extra=None):
     171      if decl.kind not in KINDS:
     172          # Skip it!
     173          return None
     174  
     175      typedeps = resolved
     176      if typedeps is _info.UNKNOWN:
     177          if decl.kind in (KIND.STRUCT, KIND.UNION):
     178              typedeps = [typedeps] * len(decl.members)
     179          else:
     180              typedeps = [typedeps]
     181      #assert isinstance(typedeps, (list, TypeDeclaration)), typedeps
     182  
     183      if extra is None:
     184          extra = {}
     185      elif 'unsupported' in extra:
     186          raise NotImplementedError((decl, extra))
     187  
     188      unsupported = _check_unsupported(decl, typedeps, types, knowntypes)
     189      extra['unsupported'] = unsupported
     190  
     191      return typedeps, extra
     192  
     193  
     194  def _check_unsupported(decl, typedeps, types, knowntypes):
     195      if typedeps is None:
     196          raise NotImplementedError(decl)
     197  
     198      if decl.kind in (KIND.STRUCT, KIND.UNION):
     199          return _check_members(decl, typedeps, types, knowntypes)
     200      elif decl.kind is KIND.ENUM:
     201          if typedeps:
     202              raise NotImplementedError((decl, typedeps))
     203          return None
     204      else:
     205          return _check_typedep(decl, typedeps, types, knowntypes)
     206  
     207  
     208  def _check_members(decl, typedeps, types, knowntypes):
     209      if isinstance(typedeps, TypeDeclaration):
     210          raise NotImplementedError((decl, typedeps))
     211  
     212      #members = decl.members or ()  # A forward decl has no members.
     213      members = decl.members
     214      if not members:
     215          # A forward decl has no members, but that shouldn't surface here..
     216          raise NotImplementedError(decl)
     217      if len(members) != len(typedeps):
     218          raise NotImplementedError((decl, typedeps))
     219  
     220      unsupported = []
     221      for member, typedecl in zip(members, typedeps):
     222          checked = _check_typedep(member, typedecl, types, knowntypes)
     223          unsupported.append(checked)
     224      if any(None if v is FIXED_TYPE else v for v in unsupported):
     225          return unsupported
     226      elif FIXED_TYPE in unsupported:
     227          return FIXED_TYPE
     228      else:
     229          return None
     230  
     231  
     232  def _check_typedep(decl, typedecl, types, knowntypes):
     233      if not isinstance(typedecl, TypeDeclaration):
     234          if hasattr(type(typedecl), '__len__'):
     235              if len(typedecl) == 1:
     236                  typedecl, = typedecl
     237      if typedecl is None:
     238          # XXX Fail?
     239          return 'typespec (missing)'
     240      elif typedecl is _info.UNKNOWN:
     241          if _has_other_supported_type(decl):
     242              return None
     243          # XXX Is this right?
     244          return 'typespec (unknown)'
     245      elif not isinstance(typedecl, TypeDeclaration):
     246          raise NotImplementedError((decl, typedecl))
     247  
     248      if isinstance(decl, Member):
     249          return _check_vartype(decl, typedecl, types, knowntypes)
     250      elif not isinstance(decl, Declaration):
     251          raise NotImplementedError(decl)
     252      elif decl.kind is KIND.TYPEDEF:
     253          return _check_vartype(decl, typedecl, types, knowntypes)
     254      elif decl.kind is KIND.VARIABLE:
     255          if not is_process_global(decl):
     256              return None
     257          if _is_kwlist(decl):
     258              return None
     259          if _has_other_supported_type(decl):
     260              return None
     261          checked = _check_vartype(decl, typedecl, types, knowntypes)
     262          return 'mutable' if checked is FIXED_TYPE else checked
     263      else:
     264          raise NotImplementedError(decl)
     265  
     266  
     267  def _is_kwlist(decl):
     268      # keywords for PyArg_ParseTupleAndKeywords()
     269      # "static char *name[]" -> "static const char * const name[]"
     270      # XXX These should be made const.
     271      for relpath, name in _KWLIST_VARIANTS:
     272          if decl.name == name:
     273              if relpath == '*':
     274                  break
     275              assert os.path.isabs(decl.file.filename)
     276              relpath = os.path.normpath(relpath)
     277              if decl.file.filename.endswith(os.path.sep + relpath):
     278                  break
     279      else:
     280          return False
     281      vartype = ''.join(str(decl.vartype).split())
     282      return vartype == 'char*[]'
     283  
     284  
     285  def _has_other_supported_type(decl):
     286      if hasattr(decl, 'file') and decl.file.filename.endswith('.c.h'):
     287          assert 'clinic' in decl.file.filename, (decl,)
     288          if decl.name == '_kwtuple':
     289              return True
     290      vartype = str(decl.vartype).split()
     291      if vartype[0] == 'struct':
     292          vartype = vartype[1:]
     293      vartype = ''.join(vartype)
     294      return vartype in _OTHER_SUPPORTED_TYPES
     295  
     296  
     297  def _check_vartype(decl, typedecl, types, knowntypes):
     298      """Return failure reason."""
     299      checked = _check_typespec(decl, typedecl, types, knowntypes)
     300      if checked:
     301          return checked
     302      if is_immutable(decl.vartype):
     303          return None
     304      if is_fixed_type(decl.vartype):
     305          return FIXED_TYPE
     306      return 'mutable'
     307  
     308  
     309  def _check_typespec(decl, typedecl, types, knowntypes):
     310      typespec = decl.vartype.typespec
     311      if typedecl is not None:
     312          found = types.get(typedecl)
     313          if found is None:
     314              found = knowntypes.get(typedecl)
     315  
     316          if found is not None:
     317              _, extra = found
     318              if extra is None:
     319                  # XXX Under what circumstances does this happen?
     320                  extra = {}
     321              unsupported = extra.get('unsupported')
     322              if unsupported is FIXED_TYPE:
     323                  unsupported = None
     324              return 'typespec' if unsupported else None
     325      # Fall back to default known types.
     326      if is_pots(typespec):
     327          return None
     328      elif is_system_type(typespec):
     329          return None
     330      elif is_funcptr(decl.vartype):
     331          return None
     332      return 'typespec'
     333  
     334  
     335  class ESC[4;38;5;81mAnalyzed(ESC[4;38;5;149m_infoESC[4;38;5;149m.ESC[4;38;5;149mAnalyzed):
     336  
     337      @classonly
     338      def is_target(cls, raw):
     339          if not super().is_target(raw):
     340              return False
     341          if raw.kind not in KINDS:
     342              return False
     343          return True
     344  
     345      #@classonly
     346      #def _parse_raw_result(cls, result, extra):
     347      #    typedecl, extra = super()._parse_raw_result(result, extra)
     348      #    if typedecl is None:
     349      #        return None, extra
     350      #    raise NotImplementedError
     351  
     352      def __init__(self, item, typedecl=None, *, unsupported=None, **extra):
     353          if 'unsupported' in extra:
     354              raise NotImplementedError((item, typedecl, unsupported, extra))
     355          if not unsupported:
     356              unsupported = None
     357          elif isinstance(unsupported, (str, TypeDeclaration)):
     358              unsupported = (unsupported,)
     359          elif unsupported is not FIXED_TYPE:
     360              unsupported = tuple(unsupported)
     361          self.unsupported = unsupported
     362          extra['unsupported'] = self.unsupported  # ...for __repr__(), etc.
     363          if self.unsupported is None:
     364              #self.supported = None
     365              self.supported = True
     366          elif self.unsupported is FIXED_TYPE:
     367              if item.kind is KIND.VARIABLE:
     368                  raise NotImplementedError(item, typedecl, unsupported)
     369              self.supported = True
     370          else:
     371              self.supported = not self.unsupported
     372          super().__init__(item, typedecl, **extra)
     373  
     374      def render(self, fmt='line', *, itemonly=False):
     375          if fmt == 'raw':
     376              yield repr(self)
     377              return
     378          rendered = super().render(fmt, itemonly=itemonly)
     379          # XXX ???
     380          #if itemonly:
     381          #    yield from rendered
     382          supported = self.supported
     383          if fmt in ('line', 'brief'):
     384              rendered, = rendered
     385              parts = [
     386                  '+' if supported else '-' if supported is False else '',
     387                  rendered,
     388              ]
     389              yield '\t'.join(parts)
     390          elif fmt == 'summary':
     391              raise NotImplementedError(fmt)
     392          elif fmt == 'full':
     393              yield from rendered
     394              if supported:
     395                  yield f'\tsupported:\t{supported}'
     396          else:
     397              raise NotImplementedError(fmt)
     398  
     399  
     400  class ESC[4;38;5;81mAnalysis(ESC[4;38;5;149m_infoESC[4;38;5;149m.ESC[4;38;5;149mAnalysis):
     401      _item_class = Analyzed
     402  
     403      @classonly
     404      def build_item(cls, info, result=None):
     405          if not isinstance(info, Declaration) or info.kind not in KINDS:
     406              raise NotImplementedError((info, result))
     407          return super().build_item(info, result)
     408  
     409  
     410  def check_globals(analysis):
     411      # yield (data, failure)
     412      ignored = read_ignored()
     413      for item in analysis:
     414          if item.kind != KIND.VARIABLE:
     415              continue
     416          if item.supported:
     417              continue
     418          if item.id in ignored:
     419              continue
     420          reason = item.unsupported
     421          if not reason:
     422              reason = '???'
     423          elif not isinstance(reason, str):
     424              if len(reason) == 1:
     425                  reason, = reason
     426          reason = f'({reason})'
     427          yield item, f'not supported {reason:20}\t{item.storage or ""} {item.vartype}'