1  """Utilities to support packages."""
       2  
       3  from collections import namedtuple
       4  from functools import singledispatch as simplegeneric
       5  import importlib
       6  import importlib.util
       7  import importlib.machinery
       8  import os
       9  import os.path
      10  import sys
      11  from types import ModuleType
      12  import warnings
      13  
      14  __all__ = [
      15      'get_importer', 'iter_importers', 'get_loader', 'find_loader',
      16      'walk_packages', 'iter_modules', 'get_data',
      17      'read_code', 'extend_path',
      18      'ModuleInfo',
      19  ]
      20  
      21  
      22  ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
      23  ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
      24  
      25  
      26  def read_code(stream):
      27      # This helper is needed in order for the PEP 302 emulation to
      28      # correctly handle compiled files
      29      import marshal
      30  
      31      magic = stream.read(4)
      32      if magic != importlib.util.MAGIC_NUMBER:
      33          return None
      34  
      35      stream.read(12) # Skip rest of the header
      36      return marshal.load(stream)
      37  
      38  
      39  def walk_packages(path=None, prefix='', onerror=None):
      40      """Yields ModuleInfo for all modules recursively
      41      on path, or, if path is None, all accessible modules.
      42  
      43      'path' should be either None or a list of paths to look for
      44      modules in.
      45  
      46      'prefix' is a string to output on the front of every module name
      47      on output.
      48  
      49      Note that this function must import all *packages* (NOT all
      50      modules!) on the given path, in order to access the __path__
      51      attribute to find submodules.
      52  
      53      'onerror' is a function which gets called with one argument (the
      54      name of the package which was being imported) if any exception
      55      occurs while trying to import a package.  If no onerror function is
      56      supplied, ImportErrors are caught and ignored, while all other
      57      exceptions are propagated, terminating the search.
      58  
      59      Examples:
      60  
      61      # list all modules python can access
      62      walk_packages()
      63  
      64      # list all submodules of ctypes
      65      walk_packages(ctypes.__path__, ctypes.__name__+'.')
      66      """
      67  
      68      def seen(p, m={}):
      69          if p in m:
      70              return True
      71          m[p] = True
      72  
      73      for info in iter_modules(path, prefix):
      74          yield info
      75  
      76          if info.ispkg:
      77              try:
      78                  __import__(info.name)
      79              except ImportError:
      80                  if onerror is not None:
      81                      onerror(info.name)
      82              except Exception:
      83                  if onerror is not None:
      84                      onerror(info.name)
      85                  else:
      86                      raise
      87              else:
      88                  path = getattr(sys.modules[info.name], '__path__', None) or []
      89  
      90                  # don't traverse path items we've seen before
      91                  path = [p for p in path if not seen(p)]
      92  
      93                  yield from walk_packages(path, info.name+'.', onerror)
      94  
      95  
      96  def iter_modules(path=None, prefix=''):
      97      """Yields ModuleInfo for all submodules on path,
      98      or, if path is None, all top-level modules on sys.path.
      99  
     100      'path' should be either None or a list of paths to look for
     101      modules in.
     102  
     103      'prefix' is a string to output on the front of every module name
     104      on output.
     105      """
     106      if path is None:
     107          importers = iter_importers()
     108      elif isinstance(path, str):
     109          raise ValueError("path must be None or list of paths to look for "
     110                          "modules in")
     111      else:
     112          importers = map(get_importer, path)
     113  
     114      yielded = {}
     115      for i in importers:
     116          for name, ispkg in iter_importer_modules(i, prefix):
     117              if name not in yielded:
     118                  yielded[name] = 1
     119                  yield ModuleInfo(i, name, ispkg)
     120  
     121  
     122  @simplegeneric
     123  def iter_importer_modules(importer, prefix=''):
     124      if not hasattr(importer, 'iter_modules'):
     125          return []
     126      return importer.iter_modules(prefix)
     127  
     128  
     129  # Implement a file walker for the normal importlib path hook
     130  def _iter_file_finder_modules(importer, prefix=''):
     131      if importer.path is None or not os.path.isdir(importer.path):
     132          return
     133  
     134      yielded = {}
     135      import inspect
     136      try:
     137          filenames = os.listdir(importer.path)
     138      except OSError:
     139          # ignore unreadable directories like import does
     140          filenames = []
     141      filenames.sort()  # handle packages before same-named modules
     142  
     143      for fn in filenames:
     144          modname = inspect.getmodulename(fn)
     145          if modname=='__init__' or modname in yielded:
     146              continue
     147  
     148          path = os.path.join(importer.path, fn)
     149          ispkg = False
     150  
     151          if not modname and os.path.isdir(path) and '.' not in fn:
     152              modname = fn
     153              try:
     154                  dircontents = os.listdir(path)
     155              except OSError:
     156                  # ignore unreadable directories like import does
     157                  dircontents = []
     158              for fn in dircontents:
     159                  subname = inspect.getmodulename(fn)
     160                  if subname=='__init__':
     161                      ispkg = True
     162                      break
     163              else:
     164                  continue    # not a package
     165  
     166          if modname and '.' not in modname:
     167              yielded[modname] = 1
     168              yield prefix + modname, ispkg
     169  
     170  iter_importer_modules.register(
     171      importlib.machinery.FileFinder, _iter_file_finder_modules)
     172  
     173  
     174  try:
     175      import zipimport
     176      from zipimport import zipimporter
     177  
     178      def iter_zipimport_modules(importer, prefix=''):
     179          dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
     180          _prefix = importer.prefix
     181          plen = len(_prefix)
     182          yielded = {}
     183          import inspect
     184          for fn in dirlist:
     185              if not fn.startswith(_prefix):
     186                  continue
     187  
     188              fn = fn[plen:].split(os.sep)
     189  
     190              if len(fn)==2 and fn[1].startswith('__init__.py'):
     191                  if fn[0] not in yielded:
     192                      yielded[fn[0]] = 1
     193                      yield prefix + fn[0], True
     194  
     195              if len(fn)!=1:
     196                  continue
     197  
     198              modname = inspect.getmodulename(fn[0])
     199              if modname=='__init__':
     200                  continue
     201  
     202              if modname and '.' not in modname and modname not in yielded:
     203                  yielded[modname] = 1
     204                  yield prefix + modname, False
     205  
     206      iter_importer_modules.register(zipimporter, iter_zipimport_modules)
     207  
     208  except ImportError:
     209      pass
     210  
     211  
     212  def get_importer(path_item):
     213      """Retrieve a finder for the given path item
     214  
     215      The returned finder is cached in sys.path_importer_cache
     216      if it was newly created by a path hook.
     217  
     218      The cache (or part of it) can be cleared manually if a
     219      rescan of sys.path_hooks is necessary.
     220      """
     221      path_item = os.fsdecode(path_item)
     222      try:
     223          importer = sys.path_importer_cache[path_item]
     224      except KeyError:
     225          for path_hook in sys.path_hooks:
     226              try:
     227                  importer = path_hook(path_item)
     228                  sys.path_importer_cache.setdefault(path_item, importer)
     229                  break
     230              except ImportError:
     231                  pass
     232          else:
     233              importer = None
     234      return importer
     235  
     236  
     237  def iter_importers(fullname=""):
     238      """Yield finders for the given module name
     239  
     240      If fullname contains a '.', the finders will be for the package
     241      containing fullname, otherwise they will be all registered top level
     242      finders (i.e. those on both sys.meta_path and sys.path_hooks).
     243  
     244      If the named module is in a package, that package is imported as a side
     245      effect of invoking this function.
     246  
     247      If no module name is specified, all top level finders are produced.
     248      """
     249      if fullname.startswith('.'):
     250          msg = "Relative module name {!r} not supported".format(fullname)
     251          raise ImportError(msg)
     252      if '.' in fullname:
     253          # Get the containing package's __path__
     254          pkg_name = fullname.rpartition(".")[0]
     255          pkg = importlib.import_module(pkg_name)
     256          path = getattr(pkg, '__path__', None)
     257          if path is None:
     258              return
     259      else:
     260          yield from sys.meta_path
     261          path = sys.path
     262      for item in path:
     263          yield get_importer(item)
     264  
     265  
     266  def get_loader(module_or_name):
     267      """Get a "loader" object for module_or_name
     268  
     269      Returns None if the module cannot be found or imported.
     270      If the named module is not already imported, its containing package
     271      (if any) is imported, in order to establish the package __path__.
     272      """
     273      warnings._deprecated("pkgutil.get_loader",
     274                           f"{warnings._DEPRECATED_MSG}; "
     275                           "use importlib.util.find_spec() instead",
     276                           remove=(3, 14))
     277      if module_or_name in sys.modules:
     278          module_or_name = sys.modules[module_or_name]
     279          if module_or_name is None:
     280              return None
     281      if isinstance(module_or_name, ModuleType):
     282          module = module_or_name
     283          loader = getattr(module, '__loader__', None)
     284          if loader is not None:
     285              return loader
     286          if getattr(module, '__spec__', None) is None:
     287              return None
     288          fullname = module.__name__
     289      else:
     290          fullname = module_or_name
     291      return find_loader(fullname)
     292  
     293  
     294  def find_loader(fullname):
     295      """Find a "loader" object for fullname
     296  
     297      This is a backwards compatibility wrapper around
     298      importlib.util.find_spec that converts most failures to ImportError
     299      and only returns the loader rather than the full spec
     300      """
     301      warnings._deprecated("pkgutil.find_loader",
     302                           f"{warnings._DEPRECATED_MSG}; "
     303                           "use importlib.util.find_spec() instead",
     304                           remove=(3, 14))
     305      if fullname.startswith('.'):
     306          msg = "Relative module name {!r} not supported".format(fullname)
     307          raise ImportError(msg)
     308      try:
     309          spec = importlib.util.find_spec(fullname)
     310      except (ImportError, AttributeError, TypeError, ValueError) as ex:
     311          # This hack fixes an impedance mismatch between pkgutil and
     312          # importlib, where the latter raises other errors for cases where
     313          # pkgutil previously raised ImportError
     314          msg = "Error while finding loader for {!r} ({}: {})"
     315          raise ImportError(msg.format(fullname, type(ex), ex)) from ex
     316      return spec.loader if spec is not None else None
     317  
     318  
     319  def extend_path(path, name):
     320      """Extend a package's path.
     321  
     322      Intended use is to place the following code in a package's __init__.py:
     323  
     324          from pkgutil import extend_path
     325          __path__ = extend_path(__path__, __name__)
     326  
     327      For each directory on sys.path that has a subdirectory that
     328      matches the package name, add the subdirectory to the package's
     329      __path__.  This is useful if one wants to distribute different
     330      parts of a single logical package as multiple directories.
     331  
     332      It also looks for *.pkg files beginning where * matches the name
     333      argument.  This feature is similar to *.pth files (see site.py),
     334      except that it doesn't special-case lines starting with 'import'.
     335      A *.pkg file is trusted at face value: apart from checking for
     336      duplicates, all entries found in a *.pkg file are added to the
     337      path, regardless of whether they are exist the filesystem.  (This
     338      is a feature.)
     339  
     340      If the input path is not a list (as is the case for frozen
     341      packages) it is returned unchanged.  The input path is not
     342      modified; an extended copy is returned.  Items are only appended
     343      to the copy at the end.
     344  
     345      It is assumed that sys.path is a sequence.  Items of sys.path that
     346      are not (unicode or 8-bit) strings referring to existing
     347      directories are ignored.  Unicode items of sys.path that cause
     348      errors when used as filenames may cause this function to raise an
     349      exception (in line with os.path.isdir() behavior).
     350      """
     351  
     352      if not isinstance(path, list):
     353          # This could happen e.g. when this is called from inside a
     354          # frozen package.  Return the path unchanged in that case.
     355          return path
     356  
     357      sname_pkg = name + ".pkg"
     358  
     359      path = path[:] # Start with a copy of the existing path
     360  
     361      parent_package, _, final_name = name.rpartition('.')
     362      if parent_package:
     363          try:
     364              search_path = sys.modules[parent_package].__path__
     365          except (KeyError, AttributeError):
     366              # We can't do anything: find_loader() returns None when
     367              # passed a dotted name.
     368              return path
     369      else:
     370          search_path = sys.path
     371  
     372      for dir in search_path:
     373          if not isinstance(dir, str):
     374              continue
     375  
     376          finder = get_importer(dir)
     377          if finder is not None:
     378              portions = []
     379              if hasattr(finder, 'find_spec'):
     380                  spec = finder.find_spec(final_name)
     381                  if spec is not None:
     382                      portions = spec.submodule_search_locations or []
     383              # Is this finder PEP 420 compliant?
     384              elif hasattr(finder, 'find_loader'):
     385                  _, portions = finder.find_loader(final_name)
     386  
     387              for portion in portions:
     388                  # XXX This may still add duplicate entries to path on
     389                  # case-insensitive filesystems
     390                  if portion not in path:
     391                      path.append(portion)
     392  
     393          # XXX Is this the right thing for subpackages like zope.app?
     394          # It looks for a file named "zope.app.pkg"
     395          pkgfile = os.path.join(dir, sname_pkg)
     396          if os.path.isfile(pkgfile):
     397              try:
     398                  f = open(pkgfile)
     399              except OSError as msg:
     400                  sys.stderr.write("Can't open %s: %s\n" %
     401                                   (pkgfile, msg))
     402              else:
     403                  with f:
     404                      for line in f:
     405                          line = line.rstrip('\n')
     406                          if not line or line.startswith('#'):
     407                              continue
     408                          path.append(line) # Don't check for existence!
     409  
     410      return path
     411  
     412  
     413  def get_data(package, resource):
     414      """Get a resource from a package.
     415  
     416      This is a wrapper round the PEP 302 loader get_data API. The package
     417      argument should be the name of a package, in standard module format
     418      (foo.bar). The resource argument should be in the form of a relative
     419      filename, using '/' as the path separator. The parent directory name '..'
     420      is not allowed, and nor is a rooted name (starting with a '/').
     421  
     422      The function returns a binary string, which is the contents of the
     423      specified resource.
     424  
     425      For packages located in the filesystem, which have already been imported,
     426      this is the rough equivalent of
     427  
     428          d = os.path.dirname(sys.modules[package].__file__)
     429          data = open(os.path.join(d, resource), 'rb').read()
     430  
     431      If the package cannot be located or loaded, or it uses a PEP 302 loader
     432      which does not support get_data(), then None is returned.
     433      """
     434  
     435      spec = importlib.util.find_spec(package)
     436      if spec is None:
     437          return None
     438      loader = spec.loader
     439      if loader is None or not hasattr(loader, 'get_data'):
     440          return None
     441      # XXX needs test
     442      mod = (sys.modules.get(package) or
     443             importlib._bootstrap._load(spec))
     444      if mod is None or not hasattr(mod, '__file__'):
     445          return None
     446  
     447      # Modify the resource name to be compatible with the loader.get_data
     448      # signature - an os.path format "filename" starting with the dirname of
     449      # the package's __file__
     450      parts = resource.split('/')
     451      parts.insert(0, os.path.dirname(mod.__file__))
     452      resource_name = os.path.join(*parts)
     453      return loader.get_data(resource_name)
     454  
     455  
     456  _NAME_PATTERN = None
     457  
     458  def resolve_name(name):
     459      """
     460      Resolve a name to an object.
     461  
     462      It is expected that `name` will be a string in one of the following
     463      formats, where W is shorthand for a valid Python identifier and dot stands
     464      for a literal period in these pseudo-regexes:
     465  
     466      W(.W)*
     467      W(.W)*:(W(.W)*)?
     468  
     469      The first form is intended for backward compatibility only. It assumes that
     470      some part of the dotted name is a package, and the rest is an object
     471      somewhere within that package, possibly nested inside other objects.
     472      Because the place where the package stops and the object hierarchy starts
     473      can't be inferred by inspection, repeated attempts to import must be done
     474      with this form.
     475  
     476      In the second form, the caller makes the division point clear through the
     477      provision of a single colon: the dotted name to the left of the colon is a
     478      package to be imported, and the dotted name to the right is the object
     479      hierarchy within that package. Only one import is needed in this form. If
     480      it ends with the colon, then a module object is returned.
     481  
     482      The function will return an object (which might be a module), or raise one
     483      of the following exceptions:
     484  
     485      ValueError - if `name` isn't in a recognised format
     486      ImportError - if an import failed when it shouldn't have
     487      AttributeError - if a failure occurred when traversing the object hierarchy
     488                       within the imported package to get to the desired object.
     489      """
     490      global _NAME_PATTERN
     491      if _NAME_PATTERN is None:
     492          # Lazy import to speedup Python startup time
     493          import re
     494          dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
     495          _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
     496                                     f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
     497                                     re.UNICODE)
     498  
     499      m = _NAME_PATTERN.match(name)
     500      if not m:
     501          raise ValueError(f'invalid format: {name!r}')
     502      gd = m.groupdict()
     503      if gd.get('cln'):
     504          # there is a colon - a one-step import is all that's needed
     505          mod = importlib.import_module(gd['pkg'])
     506          parts = gd.get('obj')
     507          parts = parts.split('.') if parts else []
     508      else:
     509          # no colon - have to iterate to find the package boundary
     510          parts = name.split('.')
     511          modname = parts.pop(0)
     512          # first part *must* be a module/package.
     513          mod = importlib.import_module(modname)
     514          while parts:
     515              p = parts[0]
     516              s = f'{modname}.{p}'
     517              try:
     518                  mod = importlib.import_module(s)
     519                  parts.pop(0)
     520                  modname = s
     521              except ImportError:
     522                  break
     523      # if we reach this point, mod is the module, already imported, and
     524      # parts is the list of parts in the object hierarchy to be traversed, or
     525      # an empty list if just the module is wanted.
     526      result = mod
     527      for p in parts:
     528          result = getattr(result, p)
     529      return result