1 """Parse a Python module and describe its classes and functions.
2
3 Parse enough of a Python file to recognize imports and class and
4 function definitions, and to find out the superclasses of a class.
5
6 The interface consists of a single function:
7 readmodule_ex(module, path=None)
8 where module is the name of a Python module, and path is an optional
9 list of directories where the module is to be searched. If present,
10 path is prepended to the system search path sys.path. The return value
11 is a dictionary. The keys of the dictionary are the names of the
12 classes and functions defined in the module (including classes that are
13 defined via the from XXX import YYY construct). The values are
14 instances of classes Class and Function. One special key/value pair is
15 present for packages: the key '__path__' has a list as its value which
16 contains the package search path.
17
18 Classes and Functions have a common superclass: _Object. Every instance
19 has the following attributes:
20 module -- name of the module;
21 name -- name of the object;
22 file -- file in which the object is defined;
23 lineno -- line in the file where the object's definition starts;
24 end_lineno -- line in the file where the object's definition ends;
25 parent -- parent of this object, if any;
26 children -- nested objects contained in this object.
27 The 'children' attribute is a dictionary mapping names to objects.
28
29 Instances of Function describe functions with the attributes from _Object,
30 plus the following:
31 is_async -- if a function is defined with an 'async' prefix
32
33 Instances of Class describe classes with the attributes from _Object,
34 plus the following:
35 super -- list of super classes (Class instances if possible);
36 methods -- mapping of method names to beginning line numbers.
37 If the name of a super class is not recognized, the corresponding
38 entry in the list of super classes is not a class instance but a
39 string giving the name of the super class. Since import statements
40 are recognized and imported modules are scanned as well, this
41 shouldn't happen often.
42 """
43
44 import ast
45 import sys
46 import importlib.util
47
48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
49
50 _modules = {} # Initialize cache of modules we've seen.
51
52
53 class ESC[4;38;5;81m_Object:
54 "Information about Python class or function."
55 def __init__(self, module, name, file, lineno, end_lineno, parent):
56 self.module = module
57 self.name = name
58 self.file = file
59 self.lineno = lineno
60 self.end_lineno = end_lineno
61 self.parent = parent
62 self.children = {}
63 if parent is not None:
64 parent.children[name] = self
65
66
67 # Odd Function and Class signatures are for back-compatibility.
68 class ESC[4;38;5;81mFunction(ESC[4;38;5;149m_Object):
69 "Information about a Python function, including methods."
70 def __init__(self, module, name, file, lineno,
71 parent=None, is_async=False, *, end_lineno=None):
72 super().__init__(module, name, file, lineno, end_lineno, parent)
73 self.is_async = is_async
74 if isinstance(parent, Class):
75 parent.methods[name] = lineno
76
77
78 class ESC[4;38;5;81mClass(ESC[4;38;5;149m_Object):
79 "Information about a Python class."
80 def __init__(self, module, name, super_, file, lineno,
81 parent=None, *, end_lineno=None):
82 super().__init__(module, name, file, lineno, end_lineno, parent)
83 self.super = super_ or []
84 self.methods = {}
85
86
87 # These 2 functions are used in these tests
88 # Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
89 def _nest_function(ob, func_name, lineno, end_lineno, is_async=False):
90 "Return a Function after nesting within ob."
91 return Function(ob.module, func_name, ob.file, lineno,
92 parent=ob, is_async=is_async, end_lineno=end_lineno)
93
94 def _nest_class(ob, class_name, lineno, end_lineno, super=None):
95 "Return a Class after nesting within ob."
96 return Class(ob.module, class_name, super, ob.file, lineno,
97 parent=ob, end_lineno=end_lineno)
98
99
100 def readmodule(module, path=None):
101 """Return Class objects for the top-level classes in module.
102
103 This is the original interface, before Functions were added.
104 """
105
106 res = {}
107 for key, value in _readmodule(module, path or []).items():
108 if isinstance(value, Class):
109 res[key] = value
110 return res
111
112 def readmodule_ex(module, path=None):
113 """Return a dictionary with all functions and classes in module.
114
115 Search for module in PATH + sys.path.
116 If possible, include imported superclasses.
117 Do this by reading source, without importing (and executing) it.
118 """
119 return _readmodule(module, path or [])
120
121
122 def _readmodule(module, path, inpackage=None):
123 """Do the hard work for readmodule[_ex].
124
125 If inpackage is given, it must be the dotted name of the package in
126 which we are searching for a submodule, and then PATH must be the
127 package search path; otherwise, we are searching for a top-level
128 module, and path is combined with sys.path.
129 """
130 # Compute the full module name (prepending inpackage if set).
131 if inpackage is not None:
132 fullmodule = "%s.%s" % (inpackage, module)
133 else:
134 fullmodule = module
135
136 # Check in the cache.
137 if fullmodule in _modules:
138 return _modules[fullmodule]
139
140 # Initialize the dict for this module's contents.
141 tree = {}
142
143 # Check if it is a built-in module; we don't do much for these.
144 if module in sys.builtin_module_names and inpackage is None:
145 _modules[module] = tree
146 return tree
147
148 # Check for a dotted module name.
149 i = module.rfind('.')
150 if i >= 0:
151 package = module[:i]
152 submodule = module[i+1:]
153 parent = _readmodule(package, path, inpackage)
154 if inpackage is not None:
155 package = "%s.%s" % (inpackage, package)
156 if not '__path__' in parent:
157 raise ImportError('No package named {}'.format(package))
158 return _readmodule(submodule, parent['__path__'], package)
159
160 # Search the path for the module.
161 f = None
162 if inpackage is not None:
163 search_path = path
164 else:
165 search_path = path + sys.path
166 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
167 if spec is None:
168 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
169 _modules[fullmodule] = tree
170 # Is module a package?
171 if spec.submodule_search_locations is not None:
172 tree['__path__'] = spec.submodule_search_locations
173 try:
174 source = spec.loader.get_source(fullmodule)
175 except (AttributeError, ImportError):
176 # If module is not Python source, we cannot do anything.
177 return tree
178 else:
179 if source is None:
180 return tree
181
182 fname = spec.loader.get_filename(fullmodule)
183 return _create_tree(fullmodule, path, fname, source, tree, inpackage)
184
185
186 class ESC[4;38;5;81m_ModuleBrowser(ESC[4;38;5;149mastESC[4;38;5;149m.ESC[4;38;5;149mNodeVisitor):
187 def __init__(self, module, path, file, tree, inpackage):
188 self.path = path
189 self.tree = tree
190 self.file = file
191 self.module = module
192 self.inpackage = inpackage
193 self.stack = []
194
195 def visit_ClassDef(self, node):
196 bases = []
197 for base in node.bases:
198 name = ast.unparse(base)
199 if name in self.tree:
200 # We know this super class.
201 bases.append(self.tree[name])
202 elif len(names := name.split(".")) > 1:
203 # Super class form is module.class:
204 # look in module for class.
205 *_, module, class_ = names
206 if module in _modules:
207 bases.append(_modules[module].get(class_, name))
208 else:
209 bases.append(name)
210
211 parent = self.stack[-1] if self.stack else None
212 class_ = Class(self.module, node.name, bases, self.file, node.lineno,
213 parent=parent, end_lineno=node.end_lineno)
214 if parent is None:
215 self.tree[node.name] = class_
216 self.stack.append(class_)
217 self.generic_visit(node)
218 self.stack.pop()
219
220 def visit_FunctionDef(self, node, *, is_async=False):
221 parent = self.stack[-1] if self.stack else None
222 function = Function(self.module, node.name, self.file, node.lineno,
223 parent, is_async, end_lineno=node.end_lineno)
224 if parent is None:
225 self.tree[node.name] = function
226 self.stack.append(function)
227 self.generic_visit(node)
228 self.stack.pop()
229
230 def visit_AsyncFunctionDef(self, node):
231 self.visit_FunctionDef(node, is_async=True)
232
233 def visit_Import(self, node):
234 if node.col_offset != 0:
235 return
236
237 for module in node.names:
238 try:
239 try:
240 _readmodule(module.name, self.path, self.inpackage)
241 except ImportError:
242 _readmodule(module.name, [])
243 except (ImportError, SyntaxError):
244 # If we can't find or parse the imported module,
245 # too bad -- don't die here.
246 continue
247
248 def visit_ImportFrom(self, node):
249 if node.col_offset != 0:
250 return
251 try:
252 module = "." * node.level
253 if node.module:
254 module += node.module
255 module = _readmodule(module, self.path, self.inpackage)
256 except (ImportError, SyntaxError):
257 return
258
259 for name in node.names:
260 if name.name in module:
261 self.tree[name.asname or name.name] = module[name.name]
262 elif name.name == "*":
263 for import_name, import_value in module.items():
264 if import_name.startswith("_"):
265 continue
266 self.tree[import_name] = import_value
267
268
269 def _create_tree(fullmodule, path, fname, source, tree, inpackage):
270 mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
271 mbrowser.visit(ast.parse(source))
272 return mbrowser.tree
273
274
275 def _main():
276 "Print module output (default this file) for quick visual check."
277 import os
278 try:
279 mod = sys.argv[1]
280 except:
281 mod = __file__
282 if os.path.exists(mod):
283 path = [os.path.dirname(mod)]
284 mod = os.path.basename(mod)
285 if mod.lower().endswith(".py"):
286 mod = mod[:-3]
287 else:
288 path = []
289 tree = readmodule_ex(mod, path)
290 lineno_key = lambda a: getattr(a, 'lineno', 0)
291 objs = sorted(tree.values(), key=lineno_key, reverse=True)
292 indent_level = 2
293 while objs:
294 obj = objs.pop()
295 if isinstance(obj, list):
296 # Value is a __path__ key.
297 continue
298 if not hasattr(obj, 'indent'):
299 obj.indent = 0
300
301 if isinstance(obj, _Object):
302 new_objs = sorted(obj.children.values(),
303 key=lineno_key, reverse=True)
304 for ob in new_objs:
305 ob.indent = obj.indent + indent_level
306 objs.extend(new_objs)
307 if isinstance(obj, Class):
308 print("{}class {} {} {}"
309 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
310 elif isinstance(obj, Function):
311 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
312
313 if __name__ == "__main__":
314 _main()