1 """Utilities to support packages."""
2
3 from collections import namedtuple
4 from functools import singledispatch as simplegeneric
5 import importlib
6 import importlib.util
7 import importlib.machinery
8 import os
9 import os.path
10 import sys
11 from types import ModuleType
12 import warnings
13
14 __all__ = [
15 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
16 'walk_packages', 'iter_modules', 'get_data',
17 'read_code', 'extend_path',
18 'ModuleInfo',
19 ]
20
21
22 ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
23 ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
24
25
26 def read_code(stream):
27 # This helper is needed in order for the PEP 302 emulation to
28 # correctly handle compiled files
29 import marshal
30
31 magic = stream.read(4)
32 if magic != importlib.util.MAGIC_NUMBER:
33 return None
34
35 stream.read(12) # Skip rest of the header
36 return marshal.load(stream)
37
38
39 def walk_packages(path=None, prefix='', onerror=None):
40 """Yields ModuleInfo for all modules recursively
41 on path, or, if path is None, all accessible modules.
42
43 'path' should be either None or a list of paths to look for
44 modules in.
45
46 'prefix' is a string to output on the front of every module name
47 on output.
48
49 Note that this function must import all *packages* (NOT all
50 modules!) on the given path, in order to access the __path__
51 attribute to find submodules.
52
53 'onerror' is a function which gets called with one argument (the
54 name of the package which was being imported) if any exception
55 occurs while trying to import a package. If no onerror function is
56 supplied, ImportErrors are caught and ignored, while all other
57 exceptions are propagated, terminating the search.
58
59 Examples:
60
61 # list all modules python can access
62 walk_packages()
63
64 # list all submodules of ctypes
65 walk_packages(ctypes.__path__, ctypes.__name__+'.')
66 """
67
68 def seen(p, m={}):
69 if p in m:
70 return True
71 m[p] = True
72
73 for info in iter_modules(path, prefix):
74 yield info
75
76 if info.ispkg:
77 try:
78 __import__(info.name)
79 except ImportError:
80 if onerror is not None:
81 onerror(info.name)
82 except Exception:
83 if onerror is not None:
84 onerror(info.name)
85 else:
86 raise
87 else:
88 path = getattr(sys.modules[info.name], '__path__', None) or []
89
90 # don't traverse path items we've seen before
91 path = [p for p in path if not seen(p)]
92
93 yield from walk_packages(path, info.name+'.', onerror)
94
95
96 def iter_modules(path=None, prefix=''):
97 """Yields ModuleInfo for all submodules on path,
98 or, if path is None, all top-level modules on sys.path.
99
100 'path' should be either None or a list of paths to look for
101 modules in.
102
103 'prefix' is a string to output on the front of every module name
104 on output.
105 """
106 if path is None:
107 importers = iter_importers()
108 elif isinstance(path, str):
109 raise ValueError("path must be None or list of paths to look for "
110 "modules in")
111 else:
112 importers = map(get_importer, path)
113
114 yielded = {}
115 for i in importers:
116 for name, ispkg in iter_importer_modules(i, prefix):
117 if name not in yielded:
118 yielded[name] = 1
119 yield ModuleInfo(i, name, ispkg)
120
121
122 @simplegeneric
123 def iter_importer_modules(importer, prefix=''):
124 if not hasattr(importer, 'iter_modules'):
125 return []
126 return importer.iter_modules(prefix)
127
128
129 # Implement a file walker for the normal importlib path hook
130 def _iter_file_finder_modules(importer, prefix=''):
131 if importer.path is None or not os.path.isdir(importer.path):
132 return
133
134 yielded = {}
135 import inspect
136 try:
137 filenames = os.listdir(importer.path)
138 except OSError:
139 # ignore unreadable directories like import does
140 filenames = []
141 filenames.sort() # handle packages before same-named modules
142
143 for fn in filenames:
144 modname = inspect.getmodulename(fn)
145 if modname=='__init__' or modname in yielded:
146 continue
147
148 path = os.path.join(importer.path, fn)
149 ispkg = False
150
151 if not modname and os.path.isdir(path) and '.' not in fn:
152 modname = fn
153 try:
154 dircontents = os.listdir(path)
155 except OSError:
156 # ignore unreadable directories like import does
157 dircontents = []
158 for fn in dircontents:
159 subname = inspect.getmodulename(fn)
160 if subname=='__init__':
161 ispkg = True
162 break
163 else:
164 continue # not a package
165
166 if modname and '.' not in modname:
167 yielded[modname] = 1
168 yield prefix + modname, ispkg
169
170 iter_importer_modules.register(
171 importlib.machinery.FileFinder, _iter_file_finder_modules)
172
173
174 try:
175 import zipimport
176 from zipimport import zipimporter
177
178 def iter_zipimport_modules(importer, prefix=''):
179 dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
180 _prefix = importer.prefix
181 plen = len(_prefix)
182 yielded = {}
183 import inspect
184 for fn in dirlist:
185 if not fn.startswith(_prefix):
186 continue
187
188 fn = fn[plen:].split(os.sep)
189
190 if len(fn)==2 and fn[1].startswith('__init__.py'):
191 if fn[0] not in yielded:
192 yielded[fn[0]] = 1
193 yield prefix + fn[0], True
194
195 if len(fn)!=1:
196 continue
197
198 modname = inspect.getmodulename(fn[0])
199 if modname=='__init__':
200 continue
201
202 if modname and '.' not in modname and modname not in yielded:
203 yielded[modname] = 1
204 yield prefix + modname, False
205
206 iter_importer_modules.register(zipimporter, iter_zipimport_modules)
207
208 except ImportError:
209 pass
210
211
212 def get_importer(path_item):
213 """Retrieve a finder for the given path item
214
215 The returned finder is cached in sys.path_importer_cache
216 if it was newly created by a path hook.
217
218 The cache (or part of it) can be cleared manually if a
219 rescan of sys.path_hooks is necessary.
220 """
221 path_item = os.fsdecode(path_item)
222 try:
223 importer = sys.path_importer_cache[path_item]
224 except KeyError:
225 for path_hook in sys.path_hooks:
226 try:
227 importer = path_hook(path_item)
228 sys.path_importer_cache.setdefault(path_item, importer)
229 break
230 except ImportError:
231 pass
232 else:
233 importer = None
234 return importer
235
236
237 def iter_importers(fullname=""):
238 """Yield finders for the given module name
239
240 If fullname contains a '.', the finders will be for the package
241 containing fullname, otherwise they will be all registered top level
242 finders (i.e. those on both sys.meta_path and sys.path_hooks).
243
244 If the named module is in a package, that package is imported as a side
245 effect of invoking this function.
246
247 If no module name is specified, all top level finders are produced.
248 """
249 if fullname.startswith('.'):
250 msg = "Relative module name {!r} not supported".format(fullname)
251 raise ImportError(msg)
252 if '.' in fullname:
253 # Get the containing package's __path__
254 pkg_name = fullname.rpartition(".")[0]
255 pkg = importlib.import_module(pkg_name)
256 path = getattr(pkg, '__path__', None)
257 if path is None:
258 return
259 else:
260 yield from sys.meta_path
261 path = sys.path
262 for item in path:
263 yield get_importer(item)
264
265
266 def get_loader(module_or_name):
267 """Get a "loader" object for module_or_name
268
269 Returns None if the module cannot be found or imported.
270 If the named module is not already imported, its containing package
271 (if any) is imported, in order to establish the package __path__.
272 """
273 warnings._deprecated("pkgutil.get_loader",
274 f"{warnings._DEPRECATED_MSG}; "
275 "use importlib.util.find_spec() instead",
276 remove=(3, 14))
277 if module_or_name in sys.modules:
278 module_or_name = sys.modules[module_or_name]
279 if module_or_name is None:
280 return None
281 if isinstance(module_or_name, ModuleType):
282 module = module_or_name
283 loader = getattr(module, '__loader__', None)
284 if loader is not None:
285 return loader
286 if getattr(module, '__spec__', None) is None:
287 return None
288 fullname = module.__name__
289 else:
290 fullname = module_or_name
291 return find_loader(fullname)
292
293
294 def find_loader(fullname):
295 """Find a "loader" object for fullname
296
297 This is a backwards compatibility wrapper around
298 importlib.util.find_spec that converts most failures to ImportError
299 and only returns the loader rather than the full spec
300 """
301 warnings._deprecated("pkgutil.find_loader",
302 f"{warnings._DEPRECATED_MSG}; "
303 "use importlib.util.find_spec() instead",
304 remove=(3, 14))
305 if fullname.startswith('.'):
306 msg = "Relative module name {!r} not supported".format(fullname)
307 raise ImportError(msg)
308 try:
309 spec = importlib.util.find_spec(fullname)
310 except (ImportError, AttributeError, TypeError, ValueError) as ex:
311 # This hack fixes an impedance mismatch between pkgutil and
312 # importlib, where the latter raises other errors for cases where
313 # pkgutil previously raised ImportError
314 msg = "Error while finding loader for {!r} ({}: {})"
315 raise ImportError(msg.format(fullname, type(ex), ex)) from ex
316 return spec.loader if spec is not None else None
317
318
319 def extend_path(path, name):
320 """Extend a package's path.
321
322 Intended use is to place the following code in a package's __init__.py:
323
324 from pkgutil import extend_path
325 __path__ = extend_path(__path__, __name__)
326
327 For each directory on sys.path that has a subdirectory that
328 matches the package name, add the subdirectory to the package's
329 __path__. This is useful if one wants to distribute different
330 parts of a single logical package as multiple directories.
331
332 It also looks for *.pkg files beginning where * matches the name
333 argument. This feature is similar to *.pth files (see site.py),
334 except that it doesn't special-case lines starting with 'import'.
335 A *.pkg file is trusted at face value: apart from checking for
336 duplicates, all entries found in a *.pkg file are added to the
337 path, regardless of whether they are exist the filesystem. (This
338 is a feature.)
339
340 If the input path is not a list (as is the case for frozen
341 packages) it is returned unchanged. The input path is not
342 modified; an extended copy is returned. Items are only appended
343 to the copy at the end.
344
345 It is assumed that sys.path is a sequence. Items of sys.path that
346 are not (unicode or 8-bit) strings referring to existing
347 directories are ignored. Unicode items of sys.path that cause
348 errors when used as filenames may cause this function to raise an
349 exception (in line with os.path.isdir() behavior).
350 """
351
352 if not isinstance(path, list):
353 # This could happen e.g. when this is called from inside a
354 # frozen package. Return the path unchanged in that case.
355 return path
356
357 sname_pkg = name + ".pkg"
358
359 path = path[:] # Start with a copy of the existing path
360
361 parent_package, _, final_name = name.rpartition('.')
362 if parent_package:
363 try:
364 search_path = sys.modules[parent_package].__path__
365 except (KeyError, AttributeError):
366 # We can't do anything: find_loader() returns None when
367 # passed a dotted name.
368 return path
369 else:
370 search_path = sys.path
371
372 for dir in search_path:
373 if not isinstance(dir, str):
374 continue
375
376 finder = get_importer(dir)
377 if finder is not None:
378 portions = []
379 if hasattr(finder, 'find_spec'):
380 spec = finder.find_spec(final_name)
381 if spec is not None:
382 portions = spec.submodule_search_locations or []
383 # Is this finder PEP 420 compliant?
384 elif hasattr(finder, 'find_loader'):
385 _, portions = finder.find_loader(final_name)
386
387 for portion in portions:
388 # XXX This may still add duplicate entries to path on
389 # case-insensitive filesystems
390 if portion not in path:
391 path.append(portion)
392
393 # XXX Is this the right thing for subpackages like zope.app?
394 # It looks for a file named "zope.app.pkg"
395 pkgfile = os.path.join(dir, sname_pkg)
396 if os.path.isfile(pkgfile):
397 try:
398 f = open(pkgfile)
399 except OSError as msg:
400 sys.stderr.write("Can't open %s: %s\n" %
401 (pkgfile, msg))
402 else:
403 with f:
404 for line in f:
405 line = line.rstrip('\n')
406 if not line or line.startswith('#'):
407 continue
408 path.append(line) # Don't check for existence!
409
410 return path
411
412
413 def get_data(package, resource):
414 """Get a resource from a package.
415
416 This is a wrapper round the PEP 302 loader get_data API. The package
417 argument should be the name of a package, in standard module format
418 (foo.bar). The resource argument should be in the form of a relative
419 filename, using '/' as the path separator. The parent directory name '..'
420 is not allowed, and nor is a rooted name (starting with a '/').
421
422 The function returns a binary string, which is the contents of the
423 specified resource.
424
425 For packages located in the filesystem, which have already been imported,
426 this is the rough equivalent of
427
428 d = os.path.dirname(sys.modules[package].__file__)
429 data = open(os.path.join(d, resource), 'rb').read()
430
431 If the package cannot be located or loaded, or it uses a PEP 302 loader
432 which does not support get_data(), then None is returned.
433 """
434
435 spec = importlib.util.find_spec(package)
436 if spec is None:
437 return None
438 loader = spec.loader
439 if loader is None or not hasattr(loader, 'get_data'):
440 return None
441 # XXX needs test
442 mod = (sys.modules.get(package) or
443 importlib._bootstrap._load(spec))
444 if mod is None or not hasattr(mod, '__file__'):
445 return None
446
447 # Modify the resource name to be compatible with the loader.get_data
448 # signature - an os.path format "filename" starting with the dirname of
449 # the package's __file__
450 parts = resource.split('/')
451 parts.insert(0, os.path.dirname(mod.__file__))
452 resource_name = os.path.join(*parts)
453 return loader.get_data(resource_name)
454
455
456 _NAME_PATTERN = None
457
458 def resolve_name(name):
459 """
460 Resolve a name to an object.
461
462 It is expected that `name` will be a string in one of the following
463 formats, where W is shorthand for a valid Python identifier and dot stands
464 for a literal period in these pseudo-regexes:
465
466 W(.W)*
467 W(.W)*:(W(.W)*)?
468
469 The first form is intended for backward compatibility only. It assumes that
470 some part of the dotted name is a package, and the rest is an object
471 somewhere within that package, possibly nested inside other objects.
472 Because the place where the package stops and the object hierarchy starts
473 can't be inferred by inspection, repeated attempts to import must be done
474 with this form.
475
476 In the second form, the caller makes the division point clear through the
477 provision of a single colon: the dotted name to the left of the colon is a
478 package to be imported, and the dotted name to the right is the object
479 hierarchy within that package. Only one import is needed in this form. If
480 it ends with the colon, then a module object is returned.
481
482 The function will return an object (which might be a module), or raise one
483 of the following exceptions:
484
485 ValueError - if `name` isn't in a recognised format
486 ImportError - if an import failed when it shouldn't have
487 AttributeError - if a failure occurred when traversing the object hierarchy
488 within the imported package to get to the desired object.
489 """
490 global _NAME_PATTERN
491 if _NAME_PATTERN is None:
492 # Lazy import to speedup Python startup time
493 import re
494 dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
495 _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
496 f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
497 re.UNICODE)
498
499 m = _NAME_PATTERN.match(name)
500 if not m:
501 raise ValueError(f'invalid format: {name!r}')
502 gd = m.groupdict()
503 if gd.get('cln'):
504 # there is a colon - a one-step import is all that's needed
505 mod = importlib.import_module(gd['pkg'])
506 parts = gd.get('obj')
507 parts = parts.split('.') if parts else []
508 else:
509 # no colon - have to iterate to find the package boundary
510 parts = name.split('.')
511 modname = parts.pop(0)
512 # first part *must* be a module/package.
513 mod = importlib.import_module(modname)
514 while parts:
515 p = parts[0]
516 s = f'{modname}.{p}'
517 try:
518 mod = importlib.import_module(s)
519 parts.pop(0)
520 modname = s
521 except ImportError:
522 break
523 # if we reach this point, mod is the module, already imported, and
524 # parts is the list of parts in the object hierarchy to be traversed, or
525 # an empty list if just the module is wanted.
526 result = mod
527 for p in parts:
528 result = getattr(result, p)
529 return result