(root)/
Python-3.12.0/
Lib/
modulefinder.py
       1  """Find modules used by a script, using introspection."""
       2  
       3  import dis
       4  import importlib._bootstrap_external
       5  import importlib.machinery
       6  import marshal
       7  import os
       8  import io
       9  import sys
      10  
      11  # Old imp constants:
      12  
      13  _SEARCH_ERROR = 0
      14  _PY_SOURCE = 1
      15  _PY_COMPILED = 2
      16  _C_EXTENSION = 3
      17  _PKG_DIRECTORY = 5
      18  _C_BUILTIN = 6
      19  _PY_FROZEN = 7
      20  
      21  # Modulefinder does a good job at simulating Python's, but it can not
      22  # handle __path__ modifications packages make at runtime.  Therefore there
      23  # is a mechanism whereby you can register extra paths in this map for a
      24  # package, and it will be honored.
      25  
      26  # Note this is a mapping is lists of paths.
      27  packagePathMap = {}
      28  
      29  # A Public interface
      30  def AddPackagePath(packagename, path):
      31      packagePathMap.setdefault(packagename, []).append(path)
      32  
      33  replacePackageMap = {}
      34  
      35  # This ReplacePackage mechanism allows modulefinder to work around
      36  # situations in which a package injects itself under the name
      37  # of another package into sys.modules at runtime by calling
      38  # ReplacePackage("real_package_name", "faked_package_name")
      39  # before running ModuleFinder.
      40  
      41  def ReplacePackage(oldname, newname):
      42      replacePackageMap[oldname] = newname
      43  
      44  
      45  def _find_module(name, path=None):
      46      """An importlib reimplementation of imp.find_module (for our purposes)."""
      47  
      48      # It's necessary to clear the caches for our Finder first, in case any
      49      # modules are being added/deleted/modified at runtime. In particular,
      50      # test_modulefinder.py changes file tree contents in a cache-breaking way:
      51  
      52      importlib.machinery.PathFinder.invalidate_caches()
      53  
      54      spec = importlib.machinery.PathFinder.find_spec(name, path)
      55  
      56      if spec is None:
      57          raise ImportError("No module named {name!r}".format(name=name), name=name)
      58  
      59      # Some special cases:
      60  
      61      if spec.loader is importlib.machinery.BuiltinImporter:
      62          return None, None, ("", "", _C_BUILTIN)
      63  
      64      if spec.loader is importlib.machinery.FrozenImporter:
      65          return None, None, ("", "", _PY_FROZEN)
      66  
      67      file_path = spec.origin
      68  
      69      if spec.loader.is_package(name):
      70          return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
      71  
      72      if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
      73          kind = _PY_SOURCE
      74  
      75      elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
      76          kind = _C_EXTENSION
      77  
      78      elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
      79          kind = _PY_COMPILED
      80  
      81      else:  # Should never happen.
      82          return None, None, ("", "", _SEARCH_ERROR)
      83  
      84      file = io.open_code(file_path)
      85      suffix = os.path.splitext(file_path)[-1]
      86  
      87      return file, file_path, (suffix, "rb", kind)
      88  
      89  
      90  class ESC[4;38;5;81mModule:
      91  
      92      def __init__(self, name, file=None, path=None):
      93          self.__name__ = name
      94          self.__file__ = file
      95          self.__path__ = path
      96          self.__code__ = None
      97          # The set of global names that are assigned to in the module.
      98          # This includes those names imported through starimports of
      99          # Python modules.
     100          self.globalnames = {}
     101          # The set of starimports this module did that could not be
     102          # resolved, ie. a starimport from a non-Python module.
     103          self.starimports = {}
     104  
     105      def __repr__(self):
     106          s = "Module(%r" % (self.__name__,)
     107          if self.__file__ is not None:
     108              s = s + ", %r" % (self.__file__,)
     109          if self.__path__ is not None:
     110              s = s + ", %r" % (self.__path__,)
     111          s = s + ")"
     112          return s
     113  
     114  class ESC[4;38;5;81mModuleFinder:
     115  
     116      def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
     117          if path is None:
     118              path = sys.path
     119          self.path = path
     120          self.modules = {}
     121          self.badmodules = {}
     122          self.debug = debug
     123          self.indent = 0
     124          self.excludes = excludes if excludes is not None else []
     125          self.replace_paths = replace_paths if replace_paths is not None else []
     126          self.processed_paths = []   # Used in debugging only
     127  
     128      def msg(self, level, str, *args):
     129          if level <= self.debug:
     130              for i in range(self.indent):
     131                  print("   ", end=' ')
     132              print(str, end=' ')
     133              for arg in args:
     134                  print(repr(arg), end=' ')
     135              print()
     136  
     137      def msgin(self, *args):
     138          level = args[0]
     139          if level <= self.debug:
     140              self.indent = self.indent + 1
     141              self.msg(*args)
     142  
     143      def msgout(self, *args):
     144          level = args[0]
     145          if level <= self.debug:
     146              self.indent = self.indent - 1
     147              self.msg(*args)
     148  
     149      def run_script(self, pathname):
     150          self.msg(2, "run_script", pathname)
     151          with io.open_code(pathname) as fp:
     152              stuff = ("", "rb", _PY_SOURCE)
     153              self.load_module('__main__', fp, pathname, stuff)
     154  
     155      def load_file(self, pathname):
     156          dir, name = os.path.split(pathname)
     157          name, ext = os.path.splitext(name)
     158          with io.open_code(pathname) as fp:
     159              stuff = (ext, "rb", _PY_SOURCE)
     160              self.load_module(name, fp, pathname, stuff)
     161  
     162      def import_hook(self, name, caller=None, fromlist=None, level=-1):
     163          self.msg(3, "import_hook", name, caller, fromlist, level)
     164          parent = self.determine_parent(caller, level=level)
     165          q, tail = self.find_head_package(parent, name)
     166          m = self.load_tail(q, tail)
     167          if not fromlist:
     168              return q
     169          if m.__path__:
     170              self.ensure_fromlist(m, fromlist)
     171          return None
     172  
     173      def determine_parent(self, caller, level=-1):
     174          self.msgin(4, "determine_parent", caller, level)
     175          if not caller or level == 0:
     176              self.msgout(4, "determine_parent -> None")
     177              return None
     178          pname = caller.__name__
     179          if level >= 1: # relative import
     180              if caller.__path__:
     181                  level -= 1
     182              if level == 0:
     183                  parent = self.modules[pname]
     184                  assert parent is caller
     185                  self.msgout(4, "determine_parent ->", parent)
     186                  return parent
     187              if pname.count(".") < level:
     188                  raise ImportError("relative importpath too deep")
     189              pname = ".".join(pname.split(".")[:-level])
     190              parent = self.modules[pname]
     191              self.msgout(4, "determine_parent ->", parent)
     192              return parent
     193          if caller.__path__:
     194              parent = self.modules[pname]
     195              assert caller is parent
     196              self.msgout(4, "determine_parent ->", parent)
     197              return parent
     198          if '.' in pname:
     199              i = pname.rfind('.')
     200              pname = pname[:i]
     201              parent = self.modules[pname]
     202              assert parent.__name__ == pname
     203              self.msgout(4, "determine_parent ->", parent)
     204              return parent
     205          self.msgout(4, "determine_parent -> None")
     206          return None
     207  
     208      def find_head_package(self, parent, name):
     209          self.msgin(4, "find_head_package", parent, name)
     210          if '.' in name:
     211              i = name.find('.')
     212              head = name[:i]
     213              tail = name[i+1:]
     214          else:
     215              head = name
     216              tail = ""
     217          if parent:
     218              qname = "%s.%s" % (parent.__name__, head)
     219          else:
     220              qname = head
     221          q = self.import_module(head, qname, parent)
     222          if q:
     223              self.msgout(4, "find_head_package ->", (q, tail))
     224              return q, tail
     225          if parent:
     226              qname = head
     227              parent = None
     228              q = self.import_module(head, qname, parent)
     229              if q:
     230                  self.msgout(4, "find_head_package ->", (q, tail))
     231                  return q, tail
     232          self.msgout(4, "raise ImportError: No module named", qname)
     233          raise ImportError("No module named " + qname)
     234  
     235      def load_tail(self, q, tail):
     236          self.msgin(4, "load_tail", q, tail)
     237          m = q
     238          while tail:
     239              i = tail.find('.')
     240              if i < 0: i = len(tail)
     241              head, tail = tail[:i], tail[i+1:]
     242              mname = "%s.%s" % (m.__name__, head)
     243              m = self.import_module(head, mname, m)
     244              if not m:
     245                  self.msgout(4, "raise ImportError: No module named", mname)
     246                  raise ImportError("No module named " + mname)
     247          self.msgout(4, "load_tail ->", m)
     248          return m
     249  
     250      def ensure_fromlist(self, m, fromlist, recursive=0):
     251          self.msg(4, "ensure_fromlist", m, fromlist, recursive)
     252          for sub in fromlist:
     253              if sub == "*":
     254                  if not recursive:
     255                      all = self.find_all_submodules(m)
     256                      if all:
     257                          self.ensure_fromlist(m, all, 1)
     258              elif not hasattr(m, sub):
     259                  subname = "%s.%s" % (m.__name__, sub)
     260                  submod = self.import_module(sub, subname, m)
     261                  if not submod:
     262                      raise ImportError("No module named " + subname)
     263  
     264      def find_all_submodules(self, m):
     265          if not m.__path__:
     266              return
     267          modules = {}
     268          # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
     269          # But we must also collect Python extension modules - although
     270          # we cannot separate normal dlls from Python extensions.
     271          suffixes = []
     272          suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
     273          suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
     274          suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
     275          for dir in m.__path__:
     276              try:
     277                  names = os.listdir(dir)
     278              except OSError:
     279                  self.msg(2, "can't list directory", dir)
     280                  continue
     281              for name in names:
     282                  mod = None
     283                  for suff in suffixes:
     284                      n = len(suff)
     285                      if name[-n:] == suff:
     286                          mod = name[:-n]
     287                          break
     288                  if mod and mod != "__init__":
     289                      modules[mod] = mod
     290          return modules.keys()
     291  
     292      def import_module(self, partname, fqname, parent):
     293          self.msgin(3, "import_module", partname, fqname, parent)
     294          try:
     295              m = self.modules[fqname]
     296          except KeyError:
     297              pass
     298          else:
     299              self.msgout(3, "import_module ->", m)
     300              return m
     301          if fqname in self.badmodules:
     302              self.msgout(3, "import_module -> None")
     303              return None
     304          if parent and parent.__path__ is None:
     305              self.msgout(3, "import_module -> None")
     306              return None
     307          try:
     308              fp, pathname, stuff = self.find_module(partname,
     309                                                     parent and parent.__path__, parent)
     310          except ImportError:
     311              self.msgout(3, "import_module ->", None)
     312              return None
     313  
     314          try:
     315              m = self.load_module(fqname, fp, pathname, stuff)
     316          finally:
     317              if fp:
     318                  fp.close()
     319          if parent:
     320              setattr(parent, partname, m)
     321          self.msgout(3, "import_module ->", m)
     322          return m
     323  
     324      def load_module(self, fqname, fp, pathname, file_info):
     325          suffix, mode, type = file_info
     326          self.msgin(2, "load_module", fqname, fp and "fp", pathname)
     327          if type == _PKG_DIRECTORY:
     328              m = self.load_package(fqname, pathname)
     329              self.msgout(2, "load_module ->", m)
     330              return m
     331          if type == _PY_SOURCE:
     332              co = compile(fp.read(), pathname, 'exec')
     333          elif type == _PY_COMPILED:
     334              try:
     335                  data = fp.read()
     336                  importlib._bootstrap_external._classify_pyc(data, fqname, {})
     337              except ImportError as exc:
     338                  self.msgout(2, "raise ImportError: " + str(exc), pathname)
     339                  raise
     340              co = marshal.loads(memoryview(data)[16:])
     341          else:
     342              co = None
     343          m = self.add_module(fqname)
     344          m.__file__ = pathname
     345          if co:
     346              if self.replace_paths:
     347                  co = self.replace_paths_in_code(co)
     348              m.__code__ = co
     349              self.scan_code(co, m)
     350          self.msgout(2, "load_module ->", m)
     351          return m
     352  
     353      def _add_badmodule(self, name, caller):
     354          if name not in self.badmodules:
     355              self.badmodules[name] = {}
     356          if caller:
     357              self.badmodules[name][caller.__name__] = 1
     358          else:
     359              self.badmodules[name]["-"] = 1
     360  
     361      def _safe_import_hook(self, name, caller, fromlist, level=-1):
     362          # wrapper for self.import_hook() that won't raise ImportError
     363          if name in self.badmodules:
     364              self._add_badmodule(name, caller)
     365              return
     366          try:
     367              self.import_hook(name, caller, level=level)
     368          except ImportError as msg:
     369              self.msg(2, "ImportError:", str(msg))
     370              self._add_badmodule(name, caller)
     371          except SyntaxError as msg:
     372              self.msg(2, "SyntaxError:", str(msg))
     373              self._add_badmodule(name, caller)
     374          else:
     375              if fromlist:
     376                  for sub in fromlist:
     377                      fullname = name + "." + sub
     378                      if fullname in self.badmodules:
     379                          self._add_badmodule(fullname, caller)
     380                          continue
     381                      try:
     382                          self.import_hook(name, caller, [sub], level=level)
     383                      except ImportError as msg:
     384                          self.msg(2, "ImportError:", str(msg))
     385                          self._add_badmodule(fullname, caller)
     386  
     387      def scan_opcodes(self, co):
     388          # Scan the code, and yield 'interesting' opcode combinations
     389          for name in dis._find_store_names(co):
     390              yield "store", (name,)
     391          for name, level, fromlist in dis._find_imports(co):
     392              if level == 0:  # absolute import
     393                  yield "absolute_import", (fromlist, name)
     394              else:  # relative import
     395                  yield "relative_import", (level, fromlist, name)
     396  
     397      def scan_code(self, co, m):
     398          code = co.co_code
     399          scanner = self.scan_opcodes
     400          for what, args in scanner(co):
     401              if what == "store":
     402                  name, = args
     403                  m.globalnames[name] = 1
     404              elif what == "absolute_import":
     405                  fromlist, name = args
     406                  have_star = 0
     407                  if fromlist is not None:
     408                      if "*" in fromlist:
     409                          have_star = 1
     410                      fromlist = [f for f in fromlist if f != "*"]
     411                  self._safe_import_hook(name, m, fromlist, level=0)
     412                  if have_star:
     413                      # We've encountered an "import *". If it is a Python module,
     414                      # the code has already been parsed and we can suck out the
     415                      # global names.
     416                      mm = None
     417                      if m.__path__:
     418                          # At this point we don't know whether 'name' is a
     419                          # submodule of 'm' or a global module. Let's just try
     420                          # the full name first.
     421                          mm = self.modules.get(m.__name__ + "." + name)
     422                      if mm is None:
     423                          mm = self.modules.get(name)
     424                      if mm is not None:
     425                          m.globalnames.update(mm.globalnames)
     426                          m.starimports.update(mm.starimports)
     427                          if mm.__code__ is None:
     428                              m.starimports[name] = 1
     429                      else:
     430                          m.starimports[name] = 1
     431              elif what == "relative_import":
     432                  level, fromlist, name = args
     433                  if name:
     434                      self._safe_import_hook(name, m, fromlist, level=level)
     435                  else:
     436                      parent = self.determine_parent(m, level=level)
     437                      self._safe_import_hook(parent.__name__, None, fromlist, level=0)
     438              else:
     439                  # We don't expect anything else from the generator.
     440                  raise RuntimeError(what)
     441  
     442          for c in co.co_consts:
     443              if isinstance(c, type(co)):
     444                  self.scan_code(c, m)
     445  
     446      def load_package(self, fqname, pathname):
     447          self.msgin(2, "load_package", fqname, pathname)
     448          newname = replacePackageMap.get(fqname)
     449          if newname:
     450              fqname = newname
     451          m = self.add_module(fqname)
     452          m.__file__ = pathname
     453          m.__path__ = [pathname]
     454  
     455          # As per comment at top of file, simulate runtime __path__ additions.
     456          m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
     457  
     458          fp, buf, stuff = self.find_module("__init__", m.__path__)
     459          try:
     460              self.load_module(fqname, fp, buf, stuff)
     461              self.msgout(2, "load_package ->", m)
     462              return m
     463          finally:
     464              if fp:
     465                  fp.close()
     466  
     467      def add_module(self, fqname):
     468          if fqname in self.modules:
     469              return self.modules[fqname]
     470          self.modules[fqname] = m = Module(fqname)
     471          return m
     472  
     473      def find_module(self, name, path, parent=None):
     474          if parent is not None:
     475              # assert path is not None
     476              fullname = parent.__name__+'.'+name
     477          else:
     478              fullname = name
     479          if fullname in self.excludes:
     480              self.msgout(3, "find_module -> Excluded", fullname)
     481              raise ImportError(name)
     482  
     483          if path is None:
     484              if name in sys.builtin_module_names:
     485                  return (None, None, ("", "", _C_BUILTIN))
     486  
     487              path = self.path
     488  
     489          return _find_module(name, path)
     490  
     491      def report(self):
     492          """Print a report to stdout, listing the found modules with their
     493          paths, as well as modules that are missing, or seem to be missing.
     494          """
     495          print()
     496          print("  %-25s %s" % ("Name", "File"))
     497          print("  %-25s %s" % ("----", "----"))
     498          # Print modules found
     499          keys = sorted(self.modules.keys())
     500          for key in keys:
     501              m = self.modules[key]
     502              if m.__path__:
     503                  print("P", end=' ')
     504              else:
     505                  print("m", end=' ')
     506              print("%-25s" % key, m.__file__ or "")
     507  
     508          # Print missing modules
     509          missing, maybe = self.any_missing_maybe()
     510          if missing:
     511              print()
     512              print("Missing modules:")
     513              for name in missing:
     514                  mods = sorted(self.badmodules[name].keys())
     515                  print("?", name, "imported from", ', '.join(mods))
     516          # Print modules that may be missing, but then again, maybe not...
     517          if maybe:
     518              print()
     519              print("Submodules that appear to be missing, but could also be", end=' ')
     520              print("global names in the parent package:")
     521              for name in maybe:
     522                  mods = sorted(self.badmodules[name].keys())
     523                  print("?", name, "imported from", ', '.join(mods))
     524  
     525      def any_missing(self):
     526          """Return a list of modules that appear to be missing. Use
     527          any_missing_maybe() if you want to know which modules are
     528          certain to be missing, and which *may* be missing.
     529          """
     530          missing, maybe = self.any_missing_maybe()
     531          return missing + maybe
     532  
     533      def any_missing_maybe(self):
     534          """Return two lists, one with modules that are certainly missing
     535          and one with modules that *may* be missing. The latter names could
     536          either be submodules *or* just global names in the package.
     537  
     538          The reason it can't always be determined is that it's impossible to
     539          tell which names are imported when "from module import *" is done
     540          with an extension module, short of actually importing it.
     541          """
     542          missing = []
     543          maybe = []
     544          for name in self.badmodules:
     545              if name in self.excludes:
     546                  continue
     547              i = name.rfind(".")
     548              if i < 0:
     549                  missing.append(name)
     550                  continue
     551              subname = name[i+1:]
     552              pkgname = name[:i]
     553              pkg = self.modules.get(pkgname)
     554              if pkg is not None:
     555                  if pkgname in self.badmodules[name]:
     556                      # The package tried to import this module itself and
     557                      # failed. It's definitely missing.
     558                      missing.append(name)
     559                  elif subname in pkg.globalnames:
     560                      # It's a global in the package: definitely not missing.
     561                      pass
     562                  elif pkg.starimports:
     563                      # It could be missing, but the package did an "import *"
     564                      # from a non-Python module, so we simply can't be sure.
     565                      maybe.append(name)
     566                  else:
     567                      # It's not a global in the package, the package didn't
     568                      # do funny star imports, it's very likely to be missing.
     569                      # The symbol could be inserted into the package from the
     570                      # outside, but since that's not good style we simply list
     571                      # it missing.
     572                      missing.append(name)
     573              else:
     574                  missing.append(name)
     575          missing.sort()
     576          maybe.sort()
     577          return missing, maybe
     578  
     579      def replace_paths_in_code(self, co):
     580          new_filename = original_filename = os.path.normpath(co.co_filename)
     581          for f, r in self.replace_paths:
     582              if original_filename.startswith(f):
     583                  new_filename = r + original_filename[len(f):]
     584                  break
     585  
     586          if self.debug and original_filename not in self.processed_paths:
     587              if new_filename != original_filename:
     588                  self.msgout(2, "co_filename %r changed to %r" \
     589                                      % (original_filename,new_filename,))
     590              else:
     591                  self.msgout(2, "co_filename %r remains unchanged" \
     592                                      % (original_filename,))
     593              self.processed_paths.append(original_filename)
     594  
     595          consts = list(co.co_consts)
     596          for i in range(len(consts)):
     597              if isinstance(consts[i], type(co)):
     598                  consts[i] = self.replace_paths_in_code(consts[i])
     599  
     600          return co.replace(co_consts=tuple(consts), co_filename=new_filename)
     601  
     602  
     603  def test():
     604      # Parse command line
     605      import getopt
     606      try:
     607          opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
     608      except getopt.error as msg:
     609          print(msg)
     610          return
     611  
     612      # Process options
     613      debug = 1
     614      domods = 0
     615      addpath = []
     616      exclude = []
     617      for o, a in opts:
     618          if o == '-d':
     619              debug = debug + 1
     620          if o == '-m':
     621              domods = 1
     622          if o == '-p':
     623              addpath = addpath + a.split(os.pathsep)
     624          if o == '-q':
     625              debug = 0
     626          if o == '-x':
     627              exclude.append(a)
     628  
     629      # Provide default arguments
     630      if not args:
     631          script = "hello.py"
     632      else:
     633          script = args[0]
     634  
     635      # Set the path based on sys.path and the script directory
     636      path = sys.path[:]
     637      path[0] = os.path.dirname(script)
     638      path = addpath + path
     639      if debug > 1:
     640          print("path:")
     641          for item in path:
     642              print("   ", repr(item))
     643  
     644      # Create the module finder and turn its crank
     645      mf = ModuleFinder(path, debug, exclude)
     646      for arg in args[1:]:
     647          if arg == '-m':
     648              domods = 1
     649              continue
     650          if domods:
     651              if arg[-2:] == '.*':
     652                  mf.import_hook(arg[:-2], None, ["*"])
     653              else:
     654                  mf.import_hook(arg)
     655          else:
     656              mf.load_file(arg)
     657      mf.run_script(script)
     658      mf.report()
     659      return mf  # for -i debugging
     660  
     661  
     662  if __name__ == '__main__':
     663      try:
     664          mf = test()
     665      except KeyboardInterrupt:
     666          print("\n[interrupted]")