1 """Utilities to support packages."""
2
3 from collections import namedtuple
4 from functools import singledispatch as simplegeneric
5 import importlib
6 import importlib.util
7 import importlib.machinery
8 import os
9 import os.path
10 import sys
11 from types import ModuleType
12 import warnings
13
14 __all__ = [
15 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
16 'walk_packages', 'iter_modules', 'get_data',
17 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
18 'ModuleInfo',
19 ]
20
21
22 ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
23 ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
24
25
26 def _get_spec(finder, name):
27 """Return the finder-specific module spec."""
28 # Works with legacy finders.
29 try:
30 find_spec = finder.find_spec
31 except AttributeError:
32 loader = finder.find_module(name)
33 if loader is None:
34 return None
35 return importlib.util.spec_from_loader(name, loader)
36 else:
37 return find_spec(name)
38
39
40 def read_code(stream):
41 # This helper is needed in order for the PEP 302 emulation to
42 # correctly handle compiled files
43 import marshal
44
45 magic = stream.read(4)
46 if magic != importlib.util.MAGIC_NUMBER:
47 return None
48
49 stream.read(12) # Skip rest of the header
50 return marshal.load(stream)
51
52
53 def walk_packages(path=None, prefix='', onerror=None):
54 """Yields ModuleInfo for all modules recursively
55 on path, or, if path is None, all accessible modules.
56
57 'path' should be either None or a list of paths to look for
58 modules in.
59
60 'prefix' is a string to output on the front of every module name
61 on output.
62
63 Note that this function must import all *packages* (NOT all
64 modules!) on the given path, in order to access the __path__
65 attribute to find submodules.
66
67 'onerror' is a function which gets called with one argument (the
68 name of the package which was being imported) if any exception
69 occurs while trying to import a package. If no onerror function is
70 supplied, ImportErrors are caught and ignored, while all other
71 exceptions are propagated, terminating the search.
72
73 Examples:
74
75 # list all modules python can access
76 walk_packages()
77
78 # list all submodules of ctypes
79 walk_packages(ctypes.__path__, ctypes.__name__+'.')
80 """
81
82 def seen(p, m={}):
83 if p in m:
84 return True
85 m[p] = True
86
87 for info in iter_modules(path, prefix):
88 yield info
89
90 if info.ispkg:
91 try:
92 __import__(info.name)
93 except ImportError:
94 if onerror is not None:
95 onerror(info.name)
96 except Exception:
97 if onerror is not None:
98 onerror(info.name)
99 else:
100 raise
101 else:
102 path = getattr(sys.modules[info.name], '__path__', None) or []
103
104 # don't traverse path items we've seen before
105 path = [p for p in path if not seen(p)]
106
107 yield from walk_packages(path, info.name+'.', onerror)
108
109
110 def iter_modules(path=None, prefix=''):
111 """Yields ModuleInfo for all submodules on path,
112 or, if path is None, all top-level modules on sys.path.
113
114 'path' should be either None or a list of paths to look for
115 modules in.
116
117 'prefix' is a string to output on the front of every module name
118 on output.
119 """
120 if path is None:
121 importers = iter_importers()
122 elif isinstance(path, str):
123 raise ValueError("path must be None or list of paths to look for "
124 "modules in")
125 else:
126 importers = map(get_importer, path)
127
128 yielded = {}
129 for i in importers:
130 for name, ispkg in iter_importer_modules(i, prefix):
131 if name not in yielded:
132 yielded[name] = 1
133 yield ModuleInfo(i, name, ispkg)
134
135
136 @simplegeneric
137 def iter_importer_modules(importer, prefix=''):
138 if not hasattr(importer, 'iter_modules'):
139 return []
140 return importer.iter_modules(prefix)
141
142
143 # Implement a file walker for the normal importlib path hook
144 def _iter_file_finder_modules(importer, prefix=''):
145 if importer.path is None or not os.path.isdir(importer.path):
146 return
147
148 yielded = {}
149 import inspect
150 try:
151 filenames = os.listdir(importer.path)
152 except OSError:
153 # ignore unreadable directories like import does
154 filenames = []
155 filenames.sort() # handle packages before same-named modules
156
157 for fn in filenames:
158 modname = inspect.getmodulename(fn)
159 if modname=='__init__' or modname in yielded:
160 continue
161
162 path = os.path.join(importer.path, fn)
163 ispkg = False
164
165 if not modname and os.path.isdir(path) and '.' not in fn:
166 modname = fn
167 try:
168 dircontents = os.listdir(path)
169 except OSError:
170 # ignore unreadable directories like import does
171 dircontents = []
172 for fn in dircontents:
173 subname = inspect.getmodulename(fn)
174 if subname=='__init__':
175 ispkg = True
176 break
177 else:
178 continue # not a package
179
180 if modname and '.' not in modname:
181 yielded[modname] = 1
182 yield prefix + modname, ispkg
183
184 iter_importer_modules.register(
185 importlib.machinery.FileFinder, _iter_file_finder_modules)
186
187
188 def _import_imp():
189 global imp
190 with warnings.catch_warnings():
191 warnings.simplefilter('ignore', DeprecationWarning)
192 imp = importlib.import_module('imp')
193
194 class ESC[4;38;5;81mImpImporter:
195 """PEP 302 Finder that wraps Python's "classic" import algorithm
196
197 ImpImporter(dirname) produces a PEP 302 finder that searches that
198 directory. ImpImporter(None) produces a PEP 302 finder that searches
199 the current sys.path, plus any modules that are frozen or built-in.
200
201 Note that ImpImporter does not currently support being used by placement
202 on sys.meta_path.
203 """
204
205 def __init__(self, path=None):
206 global imp
207 warnings.warn("This emulation is deprecated and slated for removal "
208 "in Python 3.12; use 'importlib' instead",
209 DeprecationWarning)
210 _import_imp()
211 self.path = path
212
213 def find_module(self, fullname, path=None):
214 # Note: we ignore 'path' argument since it is only used via meta_path
215 subname = fullname.split(".")[-1]
216 if subname != fullname and self.path is None:
217 return None
218 if self.path is None:
219 path = None
220 else:
221 path = [os.path.realpath(self.path)]
222 try:
223 file, filename, etc = imp.find_module(subname, path)
224 except ImportError:
225 return None
226 return ImpLoader(fullname, file, filename, etc)
227
228 def iter_modules(self, prefix=''):
229 if self.path is None or not os.path.isdir(self.path):
230 return
231
232 yielded = {}
233 import inspect
234 try:
235 filenames = os.listdir(self.path)
236 except OSError:
237 # ignore unreadable directories like import does
238 filenames = []
239 filenames.sort() # handle packages before same-named modules
240
241 for fn in filenames:
242 modname = inspect.getmodulename(fn)
243 if modname=='__init__' or modname in yielded:
244 continue
245
246 path = os.path.join(self.path, fn)
247 ispkg = False
248
249 if not modname and os.path.isdir(path) and '.' not in fn:
250 modname = fn
251 try:
252 dircontents = os.listdir(path)
253 except OSError:
254 # ignore unreadable directories like import does
255 dircontents = []
256 for fn in dircontents:
257 subname = inspect.getmodulename(fn)
258 if subname=='__init__':
259 ispkg = True
260 break
261 else:
262 continue # not a package
263
264 if modname and '.' not in modname:
265 yielded[modname] = 1
266 yield prefix + modname, ispkg
267
268
269 class ESC[4;38;5;81mImpLoader:
270 """PEP 302 Loader that wraps Python's "classic" import algorithm
271 """
272 code = source = None
273
274 def __init__(self, fullname, file, filename, etc):
275 warnings.warn("This emulation is deprecated and slated for removal in "
276 "Python 3.12; use 'importlib' instead",
277 DeprecationWarning)
278 _import_imp()
279 self.file = file
280 self.filename = filename
281 self.fullname = fullname
282 self.etc = etc
283
284 def load_module(self, fullname):
285 self._reopen()
286 try:
287 mod = imp.load_module(fullname, self.file, self.filename, self.etc)
288 finally:
289 if self.file:
290 self.file.close()
291 # Note: we don't set __loader__ because we want the module to look
292 # normal; i.e. this is just a wrapper for standard import machinery
293 return mod
294
295 def get_data(self, pathname):
296 with open(pathname, "rb") as file:
297 return file.read()
298
299 def _reopen(self):
300 if self.file and self.file.closed:
301 mod_type = self.etc[2]
302 if mod_type==imp.PY_SOURCE:
303 self.file = open(self.filename, 'r')
304 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
305 self.file = open(self.filename, 'rb')
306
307 def _fix_name(self, fullname):
308 if fullname is None:
309 fullname = self.fullname
310 elif fullname != self.fullname:
311 raise ImportError("Loader for module %s cannot handle "
312 "module %s" % (self.fullname, fullname))
313 return fullname
314
315 def is_package(self, fullname):
316 fullname = self._fix_name(fullname)
317 return self.etc[2]==imp.PKG_DIRECTORY
318
319 def get_code(self, fullname=None):
320 fullname = self._fix_name(fullname)
321 if self.code is None:
322 mod_type = self.etc[2]
323 if mod_type==imp.PY_SOURCE:
324 source = self.get_source(fullname)
325 self.code = compile(source, self.filename, 'exec')
326 elif mod_type==imp.PY_COMPILED:
327 self._reopen()
328 try:
329 self.code = read_code(self.file)
330 finally:
331 self.file.close()
332 elif mod_type==imp.PKG_DIRECTORY:
333 self.code = self._get_delegate().get_code()
334 return self.code
335
336 def get_source(self, fullname=None):
337 fullname = self._fix_name(fullname)
338 if self.source is None:
339 mod_type = self.etc[2]
340 if mod_type==imp.PY_SOURCE:
341 self._reopen()
342 try:
343 self.source = self.file.read()
344 finally:
345 self.file.close()
346 elif mod_type==imp.PY_COMPILED:
347 if os.path.exists(self.filename[:-1]):
348 with open(self.filename[:-1], 'r') as f:
349 self.source = f.read()
350 elif mod_type==imp.PKG_DIRECTORY:
351 self.source = self._get_delegate().get_source()
352 return self.source
353
354 def _get_delegate(self):
355 finder = ImpImporter(self.filename)
356 spec = _get_spec(finder, '__init__')
357 return spec.loader
358
359 def get_filename(self, fullname=None):
360 fullname = self._fix_name(fullname)
361 mod_type = self.etc[2]
362 if mod_type==imp.PKG_DIRECTORY:
363 return self._get_delegate().get_filename()
364 elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
365 return self.filename
366 return None
367
368
369 try:
370 import zipimport
371 from zipimport import zipimporter
372
373 def iter_zipimport_modules(importer, prefix=''):
374 dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
375 _prefix = importer.prefix
376 plen = len(_prefix)
377 yielded = {}
378 import inspect
379 for fn in dirlist:
380 if not fn.startswith(_prefix):
381 continue
382
383 fn = fn[plen:].split(os.sep)
384
385 if len(fn)==2 and fn[1].startswith('__init__.py'):
386 if fn[0] not in yielded:
387 yielded[fn[0]] = 1
388 yield prefix + fn[0], True
389
390 if len(fn)!=1:
391 continue
392
393 modname = inspect.getmodulename(fn[0])
394 if modname=='__init__':
395 continue
396
397 if modname and '.' not in modname and modname not in yielded:
398 yielded[modname] = 1
399 yield prefix + modname, False
400
401 iter_importer_modules.register(zipimporter, iter_zipimport_modules)
402
403 except ImportError:
404 pass
405
406
407 def get_importer(path_item):
408 """Retrieve a finder for the given path item
409
410 The returned finder is cached in sys.path_importer_cache
411 if it was newly created by a path hook.
412
413 The cache (or part of it) can be cleared manually if a
414 rescan of sys.path_hooks is necessary.
415 """
416 path_item = os.fsdecode(path_item)
417 try:
418 importer = sys.path_importer_cache[path_item]
419 except KeyError:
420 for path_hook in sys.path_hooks:
421 try:
422 importer = path_hook(path_item)
423 sys.path_importer_cache.setdefault(path_item, importer)
424 break
425 except ImportError:
426 pass
427 else:
428 importer = None
429 return importer
430
431
432 def iter_importers(fullname=""):
433 """Yield finders for the given module name
434
435 If fullname contains a '.', the finders will be for the package
436 containing fullname, otherwise they will be all registered top level
437 finders (i.e. those on both sys.meta_path and sys.path_hooks).
438
439 If the named module is in a package, that package is imported as a side
440 effect of invoking this function.
441
442 If no module name is specified, all top level finders are produced.
443 """
444 if fullname.startswith('.'):
445 msg = "Relative module name {!r} not supported".format(fullname)
446 raise ImportError(msg)
447 if '.' in fullname:
448 # Get the containing package's __path__
449 pkg_name = fullname.rpartition(".")[0]
450 pkg = importlib.import_module(pkg_name)
451 path = getattr(pkg, '__path__', None)
452 if path is None:
453 return
454 else:
455 yield from sys.meta_path
456 path = sys.path
457 for item in path:
458 yield get_importer(item)
459
460
461 def get_loader(module_or_name):
462 """Get a "loader" object for module_or_name
463
464 Returns None if the module cannot be found or imported.
465 If the named module is not already imported, its containing package
466 (if any) is imported, in order to establish the package __path__.
467 """
468 if module_or_name in sys.modules:
469 module_or_name = sys.modules[module_or_name]
470 if module_or_name is None:
471 return None
472 if isinstance(module_or_name, ModuleType):
473 module = module_or_name
474 loader = getattr(module, '__loader__', None)
475 if loader is not None:
476 return loader
477 if getattr(module, '__spec__', None) is None:
478 return None
479 fullname = module.__name__
480 else:
481 fullname = module_or_name
482 return find_loader(fullname)
483
484
485 def find_loader(fullname):
486 """Find a "loader" object for fullname
487
488 This is a backwards compatibility wrapper around
489 importlib.util.find_spec that converts most failures to ImportError
490 and only returns the loader rather than the full spec
491 """
492 if fullname.startswith('.'):
493 msg = "Relative module name {!r} not supported".format(fullname)
494 raise ImportError(msg)
495 try:
496 spec = importlib.util.find_spec(fullname)
497 except (ImportError, AttributeError, TypeError, ValueError) as ex:
498 # This hack fixes an impedance mismatch between pkgutil and
499 # importlib, where the latter raises other errors for cases where
500 # pkgutil previously raised ImportError
501 msg = "Error while finding loader for {!r} ({}: {})"
502 raise ImportError(msg.format(fullname, type(ex), ex)) from ex
503 return spec.loader if spec is not None else None
504
505
506 def extend_path(path, name):
507 """Extend a package's path.
508
509 Intended use is to place the following code in a package's __init__.py:
510
511 from pkgutil import extend_path
512 __path__ = extend_path(__path__, __name__)
513
514 For each directory on sys.path that has a subdirectory that
515 matches the package name, add the subdirectory to the package's
516 __path__. This is useful if one wants to distribute different
517 parts of a single logical package as multiple directories.
518
519 It also looks for *.pkg files beginning where * matches the name
520 argument. This feature is similar to *.pth files (see site.py),
521 except that it doesn't special-case lines starting with 'import'.
522 A *.pkg file is trusted at face value: apart from checking for
523 duplicates, all entries found in a *.pkg file are added to the
524 path, regardless of whether they are exist the filesystem. (This
525 is a feature.)
526
527 If the input path is not a list (as is the case for frozen
528 packages) it is returned unchanged. The input path is not
529 modified; an extended copy is returned. Items are only appended
530 to the copy at the end.
531
532 It is assumed that sys.path is a sequence. Items of sys.path that
533 are not (unicode or 8-bit) strings referring to existing
534 directories are ignored. Unicode items of sys.path that cause
535 errors when used as filenames may cause this function to raise an
536 exception (in line with os.path.isdir() behavior).
537 """
538
539 if not isinstance(path, list):
540 # This could happen e.g. when this is called from inside a
541 # frozen package. Return the path unchanged in that case.
542 return path
543
544 sname_pkg = name + ".pkg"
545
546 path = path[:] # Start with a copy of the existing path
547
548 parent_package, _, final_name = name.rpartition('.')
549 if parent_package:
550 try:
551 search_path = sys.modules[parent_package].__path__
552 except (KeyError, AttributeError):
553 # We can't do anything: find_loader() returns None when
554 # passed a dotted name.
555 return path
556 else:
557 search_path = sys.path
558
559 for dir in search_path:
560 if not isinstance(dir, str):
561 continue
562
563 finder = get_importer(dir)
564 if finder is not None:
565 portions = []
566 if hasattr(finder, 'find_spec'):
567 spec = finder.find_spec(final_name)
568 if spec is not None:
569 portions = spec.submodule_search_locations or []
570 # Is this finder PEP 420 compliant?
571 elif hasattr(finder, 'find_loader'):
572 _, portions = finder.find_loader(final_name)
573
574 for portion in portions:
575 # XXX This may still add duplicate entries to path on
576 # case-insensitive filesystems
577 if portion not in path:
578 path.append(portion)
579
580 # XXX Is this the right thing for subpackages like zope.app?
581 # It looks for a file named "zope.app.pkg"
582 pkgfile = os.path.join(dir, sname_pkg)
583 if os.path.isfile(pkgfile):
584 try:
585 f = open(pkgfile)
586 except OSError as msg:
587 sys.stderr.write("Can't open %s: %s\n" %
588 (pkgfile, msg))
589 else:
590 with f:
591 for line in f:
592 line = line.rstrip('\n')
593 if not line or line.startswith('#'):
594 continue
595 path.append(line) # Don't check for existence!
596
597 return path
598
599
600 def get_data(package, resource):
601 """Get a resource from a package.
602
603 This is a wrapper round the PEP 302 loader get_data API. The package
604 argument should be the name of a package, in standard module format
605 (foo.bar). The resource argument should be in the form of a relative
606 filename, using '/' as the path separator. The parent directory name '..'
607 is not allowed, and nor is a rooted name (starting with a '/').
608
609 The function returns a binary string, which is the contents of the
610 specified resource.
611
612 For packages located in the filesystem, which have already been imported,
613 this is the rough equivalent of
614
615 d = os.path.dirname(sys.modules[package].__file__)
616 data = open(os.path.join(d, resource), 'rb').read()
617
618 If the package cannot be located or loaded, or it uses a PEP 302 loader
619 which does not support get_data(), then None is returned.
620 """
621
622 spec = importlib.util.find_spec(package)
623 if spec is None:
624 return None
625 loader = spec.loader
626 if loader is None or not hasattr(loader, 'get_data'):
627 return None
628 # XXX needs test
629 mod = (sys.modules.get(package) or
630 importlib._bootstrap._load(spec))
631 if mod is None or not hasattr(mod, '__file__'):
632 return None
633
634 # Modify the resource name to be compatible with the loader.get_data
635 # signature - an os.path format "filename" starting with the dirname of
636 # the package's __file__
637 parts = resource.split('/')
638 parts.insert(0, os.path.dirname(mod.__file__))
639 resource_name = os.path.join(*parts)
640 return loader.get_data(resource_name)
641
642
643 _NAME_PATTERN = None
644
645 def resolve_name(name):
646 """
647 Resolve a name to an object.
648
649 It is expected that `name` will be a string in one of the following
650 formats, where W is shorthand for a valid Python identifier and dot stands
651 for a literal period in these pseudo-regexes:
652
653 W(.W)*
654 W(.W)*:(W(.W)*)?
655
656 The first form is intended for backward compatibility only. It assumes that
657 some part of the dotted name is a package, and the rest is an object
658 somewhere within that package, possibly nested inside other objects.
659 Because the place where the package stops and the object hierarchy starts
660 can't be inferred by inspection, repeated attempts to import must be done
661 with this form.
662
663 In the second form, the caller makes the division point clear through the
664 provision of a single colon: the dotted name to the left of the colon is a
665 package to be imported, and the dotted name to the right is the object
666 hierarchy within that package. Only one import is needed in this form. If
667 it ends with the colon, then a module object is returned.
668
669 The function will return an object (which might be a module), or raise one
670 of the following exceptions:
671
672 ValueError - if `name` isn't in a recognised format
673 ImportError - if an import failed when it shouldn't have
674 AttributeError - if a failure occurred when traversing the object hierarchy
675 within the imported package to get to the desired object.
676 """
677 global _NAME_PATTERN
678 if _NAME_PATTERN is None:
679 # Lazy import to speedup Python startup time
680 import re
681 dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
682 _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
683 f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
684 re.UNICODE)
685
686 m = _NAME_PATTERN.match(name)
687 if not m:
688 raise ValueError(f'invalid format: {name!r}')
689 gd = m.groupdict()
690 if gd.get('cln'):
691 # there is a colon - a one-step import is all that's needed
692 mod = importlib.import_module(gd['pkg'])
693 parts = gd.get('obj')
694 parts = parts.split('.') if parts else []
695 else:
696 # no colon - have to iterate to find the package boundary
697 parts = name.split('.')
698 modname = parts.pop(0)
699 # first part *must* be a module/package.
700 mod = importlib.import_module(modname)
701 while parts:
702 p = parts[0]
703 s = f'{modname}.{p}'
704 try:
705 mod = importlib.import_module(s)
706 parts.pop(0)
707 modname = s
708 except ImportError:
709 break
710 # if we reach this point, mod is the module, already imported, and
711 # parts is the list of parts in the object hierarchy to be traversed, or
712 # an empty list if just the module is wanted.
713 result = mod
714 for p in parts:
715 result = getattr(result, p)
716 return result