1 """Find modules used by a script, using introspection."""
2
3 import dis
4 import importlib._bootstrap_external
5 import importlib.machinery
6 import marshal
7 import os
8 import io
9 import sys
10
11 # Old imp constants:
12
13 _SEARCH_ERROR = 0
14 _PY_SOURCE = 1
15 _PY_COMPILED = 2
16 _C_EXTENSION = 3
17 _PKG_DIRECTORY = 5
18 _C_BUILTIN = 6
19 _PY_FROZEN = 7
20
21 # Modulefinder does a good job at simulating Python's, but it can not
22 # handle __path__ modifications packages make at runtime. Therefore there
23 # is a mechanism whereby you can register extra paths in this map for a
24 # package, and it will be honored.
25
26 # Note this is a mapping is lists of paths.
27 packagePathMap = {}
28
29 # A Public interface
30 def AddPackagePath(packagename, path):
31 packagePathMap.setdefault(packagename, []).append(path)
32
33 replacePackageMap = {}
34
35 # This ReplacePackage mechanism allows modulefinder to work around
36 # situations in which a package injects itself under the name
37 # of another package into sys.modules at runtime by calling
38 # ReplacePackage("real_package_name", "faked_package_name")
39 # before running ModuleFinder.
40
41 def ReplacePackage(oldname, newname):
42 replacePackageMap[oldname] = newname
43
44
45 def _find_module(name, path=None):
46 """An importlib reimplementation of imp.find_module (for our purposes)."""
47
48 # It's necessary to clear the caches for our Finder first, in case any
49 # modules are being added/deleted/modified at runtime. In particular,
50 # test_modulefinder.py changes file tree contents in a cache-breaking way:
51
52 importlib.machinery.PathFinder.invalidate_caches()
53
54 spec = importlib.machinery.PathFinder.find_spec(name, path)
55
56 if spec is None:
57 raise ImportError("No module named {name!r}".format(name=name), name=name)
58
59 # Some special cases:
60
61 if spec.loader is importlib.machinery.BuiltinImporter:
62 return None, None, ("", "", _C_BUILTIN)
63
64 if spec.loader is importlib.machinery.FrozenImporter:
65 return None, None, ("", "", _PY_FROZEN)
66
67 file_path = spec.origin
68
69 if spec.loader.is_package(name):
70 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
71
72 if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
73 kind = _PY_SOURCE
74
75 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
76 kind = _C_EXTENSION
77
78 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
79 kind = _PY_COMPILED
80
81 else: # Should never happen.
82 return None, None, ("", "", _SEARCH_ERROR)
83
84 file = io.open_code(file_path)
85 suffix = os.path.splitext(file_path)[-1]
86
87 return file, file_path, (suffix, "rb", kind)
88
89
90 class ESC[4;38;5;81mModule:
91
92 def __init__(self, name, file=None, path=None):
93 self.__name__ = name
94 self.__file__ = file
95 self.__path__ = path
96 self.__code__ = None
97 # The set of global names that are assigned to in the module.
98 # This includes those names imported through starimports of
99 # Python modules.
100 self.globalnames = {}
101 # The set of starimports this module did that could not be
102 # resolved, ie. a starimport from a non-Python module.
103 self.starimports = {}
104
105 def __repr__(self):
106 s = "Module(%r" % (self.__name__,)
107 if self.__file__ is not None:
108 s = s + ", %r" % (self.__file__,)
109 if self.__path__ is not None:
110 s = s + ", %r" % (self.__path__,)
111 s = s + ")"
112 return s
113
114 class ESC[4;38;5;81mModuleFinder:
115
116 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
117 if path is None:
118 path = sys.path
119 self.path = path
120 self.modules = {}
121 self.badmodules = {}
122 self.debug = debug
123 self.indent = 0
124 self.excludes = excludes if excludes is not None else []
125 self.replace_paths = replace_paths if replace_paths is not None else []
126 self.processed_paths = [] # Used in debugging only
127
128 def msg(self, level, str, *args):
129 if level <= self.debug:
130 for i in range(self.indent):
131 print(" ", end=' ')
132 print(str, end=' ')
133 for arg in args:
134 print(repr(arg), end=' ')
135 print()
136
137 def msgin(self, *args):
138 level = args[0]
139 if level <= self.debug:
140 self.indent = self.indent + 1
141 self.msg(*args)
142
143 def msgout(self, *args):
144 level = args[0]
145 if level <= self.debug:
146 self.indent = self.indent - 1
147 self.msg(*args)
148
149 def run_script(self, pathname):
150 self.msg(2, "run_script", pathname)
151 with io.open_code(pathname) as fp:
152 stuff = ("", "rb", _PY_SOURCE)
153 self.load_module('__main__', fp, pathname, stuff)
154
155 def load_file(self, pathname):
156 dir, name = os.path.split(pathname)
157 name, ext = os.path.splitext(name)
158 with io.open_code(pathname) as fp:
159 stuff = (ext, "rb", _PY_SOURCE)
160 self.load_module(name, fp, pathname, stuff)
161
162 def import_hook(self, name, caller=None, fromlist=None, level=-1):
163 self.msg(3, "import_hook", name, caller, fromlist, level)
164 parent = self.determine_parent(caller, level=level)
165 q, tail = self.find_head_package(parent, name)
166 m = self.load_tail(q, tail)
167 if not fromlist:
168 return q
169 if m.__path__:
170 self.ensure_fromlist(m, fromlist)
171 return None
172
173 def determine_parent(self, caller, level=-1):
174 self.msgin(4, "determine_parent", caller, level)
175 if not caller or level == 0:
176 self.msgout(4, "determine_parent -> None")
177 return None
178 pname = caller.__name__
179 if level >= 1: # relative import
180 if caller.__path__:
181 level -= 1
182 if level == 0:
183 parent = self.modules[pname]
184 assert parent is caller
185 self.msgout(4, "determine_parent ->", parent)
186 return parent
187 if pname.count(".") < level:
188 raise ImportError("relative importpath too deep")
189 pname = ".".join(pname.split(".")[:-level])
190 parent = self.modules[pname]
191 self.msgout(4, "determine_parent ->", parent)
192 return parent
193 if caller.__path__:
194 parent = self.modules[pname]
195 assert caller is parent
196 self.msgout(4, "determine_parent ->", parent)
197 return parent
198 if '.' in pname:
199 i = pname.rfind('.')
200 pname = pname[:i]
201 parent = self.modules[pname]
202 assert parent.__name__ == pname
203 self.msgout(4, "determine_parent ->", parent)
204 return parent
205 self.msgout(4, "determine_parent -> None")
206 return None
207
208 def find_head_package(self, parent, name):
209 self.msgin(4, "find_head_package", parent, name)
210 if '.' in name:
211 i = name.find('.')
212 head = name[:i]
213 tail = name[i+1:]
214 else:
215 head = name
216 tail = ""
217 if parent:
218 qname = "%s.%s" % (parent.__name__, head)
219 else:
220 qname = head
221 q = self.import_module(head, qname, parent)
222 if q:
223 self.msgout(4, "find_head_package ->", (q, tail))
224 return q, tail
225 if parent:
226 qname = head
227 parent = None
228 q = self.import_module(head, qname, parent)
229 if q:
230 self.msgout(4, "find_head_package ->", (q, tail))
231 return q, tail
232 self.msgout(4, "raise ImportError: No module named", qname)
233 raise ImportError("No module named " + qname)
234
235 def load_tail(self, q, tail):
236 self.msgin(4, "load_tail", q, tail)
237 m = q
238 while tail:
239 i = tail.find('.')
240 if i < 0: i = len(tail)
241 head, tail = tail[:i], tail[i+1:]
242 mname = "%s.%s" % (m.__name__, head)
243 m = self.import_module(head, mname, m)
244 if not m:
245 self.msgout(4, "raise ImportError: No module named", mname)
246 raise ImportError("No module named " + mname)
247 self.msgout(4, "load_tail ->", m)
248 return m
249
250 def ensure_fromlist(self, m, fromlist, recursive=0):
251 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
252 for sub in fromlist:
253 if sub == "*":
254 if not recursive:
255 all = self.find_all_submodules(m)
256 if all:
257 self.ensure_fromlist(m, all, 1)
258 elif not hasattr(m, sub):
259 subname = "%s.%s" % (m.__name__, sub)
260 submod = self.import_module(sub, subname, m)
261 if not submod:
262 raise ImportError("No module named " + subname)
263
264 def find_all_submodules(self, m):
265 if not m.__path__:
266 return
267 modules = {}
268 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
269 # But we must also collect Python extension modules - although
270 # we cannot separate normal dlls from Python extensions.
271 suffixes = []
272 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
273 suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
274 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
275 for dir in m.__path__:
276 try:
277 names = os.listdir(dir)
278 except OSError:
279 self.msg(2, "can't list directory", dir)
280 continue
281 for name in names:
282 mod = None
283 for suff in suffixes:
284 n = len(suff)
285 if name[-n:] == suff:
286 mod = name[:-n]
287 break
288 if mod and mod != "__init__":
289 modules[mod] = mod
290 return modules.keys()
291
292 def import_module(self, partname, fqname, parent):
293 self.msgin(3, "import_module", partname, fqname, parent)
294 try:
295 m = self.modules[fqname]
296 except KeyError:
297 pass
298 else:
299 self.msgout(3, "import_module ->", m)
300 return m
301 if fqname in self.badmodules:
302 self.msgout(3, "import_module -> None")
303 return None
304 if parent and parent.__path__ is None:
305 self.msgout(3, "import_module -> None")
306 return None
307 try:
308 fp, pathname, stuff = self.find_module(partname,
309 parent and parent.__path__, parent)
310 except ImportError:
311 self.msgout(3, "import_module ->", None)
312 return None
313
314 try:
315 m = self.load_module(fqname, fp, pathname, stuff)
316 finally:
317 if fp:
318 fp.close()
319 if parent:
320 setattr(parent, partname, m)
321 self.msgout(3, "import_module ->", m)
322 return m
323
324 def load_module(self, fqname, fp, pathname, file_info):
325 suffix, mode, type = file_info
326 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
327 if type == _PKG_DIRECTORY:
328 m = self.load_package(fqname, pathname)
329 self.msgout(2, "load_module ->", m)
330 return m
331 if type == _PY_SOURCE:
332 co = compile(fp.read(), pathname, 'exec')
333 elif type == _PY_COMPILED:
334 try:
335 data = fp.read()
336 importlib._bootstrap_external._classify_pyc(data, fqname, {})
337 except ImportError as exc:
338 self.msgout(2, "raise ImportError: " + str(exc), pathname)
339 raise
340 co = marshal.loads(memoryview(data)[16:])
341 else:
342 co = None
343 m = self.add_module(fqname)
344 m.__file__ = pathname
345 if co:
346 if self.replace_paths:
347 co = self.replace_paths_in_code(co)
348 m.__code__ = co
349 self.scan_code(co, m)
350 self.msgout(2, "load_module ->", m)
351 return m
352
353 def _add_badmodule(self, name, caller):
354 if name not in self.badmodules:
355 self.badmodules[name] = {}
356 if caller:
357 self.badmodules[name][caller.__name__] = 1
358 else:
359 self.badmodules[name]["-"] = 1
360
361 def _safe_import_hook(self, name, caller, fromlist, level=-1):
362 # wrapper for self.import_hook() that won't raise ImportError
363 if name in self.badmodules:
364 self._add_badmodule(name, caller)
365 return
366 try:
367 self.import_hook(name, caller, level=level)
368 except ImportError as msg:
369 self.msg(2, "ImportError:", str(msg))
370 self._add_badmodule(name, caller)
371 except SyntaxError as msg:
372 self.msg(2, "SyntaxError:", str(msg))
373 self._add_badmodule(name, caller)
374 else:
375 if fromlist:
376 for sub in fromlist:
377 fullname = name + "." + sub
378 if fullname in self.badmodules:
379 self._add_badmodule(fullname, caller)
380 continue
381 try:
382 self.import_hook(name, caller, [sub], level=level)
383 except ImportError as msg:
384 self.msg(2, "ImportError:", str(msg))
385 self._add_badmodule(fullname, caller)
386
387 def scan_opcodes(self, co):
388 # Scan the code, and yield 'interesting' opcode combinations
389 for name in dis._find_store_names(co):
390 yield "store", (name,)
391 for name, level, fromlist in dis._find_imports(co):
392 if level == 0: # absolute import
393 yield "absolute_import", (fromlist, name)
394 else: # relative import
395 yield "relative_import", (level, fromlist, name)
396
397 def scan_code(self, co, m):
398 code = co.co_code
399 scanner = self.scan_opcodes
400 for what, args in scanner(co):
401 if what == "store":
402 name, = args
403 m.globalnames[name] = 1
404 elif what == "absolute_import":
405 fromlist, name = args
406 have_star = 0
407 if fromlist is not None:
408 if "*" in fromlist:
409 have_star = 1
410 fromlist = [f for f in fromlist if f != "*"]
411 self._safe_import_hook(name, m, fromlist, level=0)
412 if have_star:
413 # We've encountered an "import *". If it is a Python module,
414 # the code has already been parsed and we can suck out the
415 # global names.
416 mm = None
417 if m.__path__:
418 # At this point we don't know whether 'name' is a
419 # submodule of 'm' or a global module. Let's just try
420 # the full name first.
421 mm = self.modules.get(m.__name__ + "." + name)
422 if mm is None:
423 mm = self.modules.get(name)
424 if mm is not None:
425 m.globalnames.update(mm.globalnames)
426 m.starimports.update(mm.starimports)
427 if mm.__code__ is None:
428 m.starimports[name] = 1
429 else:
430 m.starimports[name] = 1
431 elif what == "relative_import":
432 level, fromlist, name = args
433 if name:
434 self._safe_import_hook(name, m, fromlist, level=level)
435 else:
436 parent = self.determine_parent(m, level=level)
437 self._safe_import_hook(parent.__name__, None, fromlist, level=0)
438 else:
439 # We don't expect anything else from the generator.
440 raise RuntimeError(what)
441
442 for c in co.co_consts:
443 if isinstance(c, type(co)):
444 self.scan_code(c, m)
445
446 def load_package(self, fqname, pathname):
447 self.msgin(2, "load_package", fqname, pathname)
448 newname = replacePackageMap.get(fqname)
449 if newname:
450 fqname = newname
451 m = self.add_module(fqname)
452 m.__file__ = pathname
453 m.__path__ = [pathname]
454
455 # As per comment at top of file, simulate runtime __path__ additions.
456 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
457
458 fp, buf, stuff = self.find_module("__init__", m.__path__)
459 try:
460 self.load_module(fqname, fp, buf, stuff)
461 self.msgout(2, "load_package ->", m)
462 return m
463 finally:
464 if fp:
465 fp.close()
466
467 def add_module(self, fqname):
468 if fqname in self.modules:
469 return self.modules[fqname]
470 self.modules[fqname] = m = Module(fqname)
471 return m
472
473 def find_module(self, name, path, parent=None):
474 if parent is not None:
475 # assert path is not None
476 fullname = parent.__name__+'.'+name
477 else:
478 fullname = name
479 if fullname in self.excludes:
480 self.msgout(3, "find_module -> Excluded", fullname)
481 raise ImportError(name)
482
483 if path is None:
484 if name in sys.builtin_module_names:
485 return (None, None, ("", "", _C_BUILTIN))
486
487 path = self.path
488
489 return _find_module(name, path)
490
491 def report(self):
492 """Print a report to stdout, listing the found modules with their
493 paths, as well as modules that are missing, or seem to be missing.
494 """
495 print()
496 print(" %-25s %s" % ("Name", "File"))
497 print(" %-25s %s" % ("----", "----"))
498 # Print modules found
499 keys = sorted(self.modules.keys())
500 for key in keys:
501 m = self.modules[key]
502 if m.__path__:
503 print("P", end=' ')
504 else:
505 print("m", end=' ')
506 print("%-25s" % key, m.__file__ or "")
507
508 # Print missing modules
509 missing, maybe = self.any_missing_maybe()
510 if missing:
511 print()
512 print("Missing modules:")
513 for name in missing:
514 mods = sorted(self.badmodules[name].keys())
515 print("?", name, "imported from", ', '.join(mods))
516 # Print modules that may be missing, but then again, maybe not...
517 if maybe:
518 print()
519 print("Submodules that appear to be missing, but could also be", end=' ')
520 print("global names in the parent package:")
521 for name in maybe:
522 mods = sorted(self.badmodules[name].keys())
523 print("?", name, "imported from", ', '.join(mods))
524
525 def any_missing(self):
526 """Return a list of modules that appear to be missing. Use
527 any_missing_maybe() if you want to know which modules are
528 certain to be missing, and which *may* be missing.
529 """
530 missing, maybe = self.any_missing_maybe()
531 return missing + maybe
532
533 def any_missing_maybe(self):
534 """Return two lists, one with modules that are certainly missing
535 and one with modules that *may* be missing. The latter names could
536 either be submodules *or* just global names in the package.
537
538 The reason it can't always be determined is that it's impossible to
539 tell which names are imported when "from module import *" is done
540 with an extension module, short of actually importing it.
541 """
542 missing = []
543 maybe = []
544 for name in self.badmodules:
545 if name in self.excludes:
546 continue
547 i = name.rfind(".")
548 if i < 0:
549 missing.append(name)
550 continue
551 subname = name[i+1:]
552 pkgname = name[:i]
553 pkg = self.modules.get(pkgname)
554 if pkg is not None:
555 if pkgname in self.badmodules[name]:
556 # The package tried to import this module itself and
557 # failed. It's definitely missing.
558 missing.append(name)
559 elif subname in pkg.globalnames:
560 # It's a global in the package: definitely not missing.
561 pass
562 elif pkg.starimports:
563 # It could be missing, but the package did an "import *"
564 # from a non-Python module, so we simply can't be sure.
565 maybe.append(name)
566 else:
567 # It's not a global in the package, the package didn't
568 # do funny star imports, it's very likely to be missing.
569 # The symbol could be inserted into the package from the
570 # outside, but since that's not good style we simply list
571 # it missing.
572 missing.append(name)
573 else:
574 missing.append(name)
575 missing.sort()
576 maybe.sort()
577 return missing, maybe
578
579 def replace_paths_in_code(self, co):
580 new_filename = original_filename = os.path.normpath(co.co_filename)
581 for f, r in self.replace_paths:
582 if original_filename.startswith(f):
583 new_filename = r + original_filename[len(f):]
584 break
585
586 if self.debug and original_filename not in self.processed_paths:
587 if new_filename != original_filename:
588 self.msgout(2, "co_filename %r changed to %r" \
589 % (original_filename,new_filename,))
590 else:
591 self.msgout(2, "co_filename %r remains unchanged" \
592 % (original_filename,))
593 self.processed_paths.append(original_filename)
594
595 consts = list(co.co_consts)
596 for i in range(len(consts)):
597 if isinstance(consts[i], type(co)):
598 consts[i] = self.replace_paths_in_code(consts[i])
599
600 return co.replace(co_consts=tuple(consts), co_filename=new_filename)
601
602
603 def test():
604 # Parse command line
605 import getopt
606 try:
607 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
608 except getopt.error as msg:
609 print(msg)
610 return
611
612 # Process options
613 debug = 1
614 domods = 0
615 addpath = []
616 exclude = []
617 for o, a in opts:
618 if o == '-d':
619 debug = debug + 1
620 if o == '-m':
621 domods = 1
622 if o == '-p':
623 addpath = addpath + a.split(os.pathsep)
624 if o == '-q':
625 debug = 0
626 if o == '-x':
627 exclude.append(a)
628
629 # Provide default arguments
630 if not args:
631 script = "hello.py"
632 else:
633 script = args[0]
634
635 # Set the path based on sys.path and the script directory
636 path = sys.path[:]
637 path[0] = os.path.dirname(script)
638 path = addpath + path
639 if debug > 1:
640 print("path:")
641 for item in path:
642 print(" ", repr(item))
643
644 # Create the module finder and turn its crank
645 mf = ModuleFinder(path, debug, exclude)
646 for arg in args[1:]:
647 if arg == '-m':
648 domods = 1
649 continue
650 if domods:
651 if arg[-2:] == '.*':
652 mf.import_hook(arg[:-2], None, ["*"])
653 else:
654 mf.import_hook(arg)
655 else:
656 mf.load_file(arg)
657 mf.run_script(script)
658 mf.report()
659 return mf # for -i debugging
660
661
662 if __name__ == '__main__':
663 try:
664 mf = test()
665 except KeyboardInterrupt:
666 print("\n[interrupted]")