1 """Print a summary of specialization stats for all files in the
2 default stats folders.
3 """
4
5 import collections
6 import os.path
7 import opcode
8 from datetime import date
9 import itertools
10 import argparse
11
12 if os.name == "nt":
13 DEFAULT_DIR = "c:\\temp\\py_stats\\"
14 else:
15 DEFAULT_DIR = "/tmp/py_stats/"
16
17 #Create list of all instruction names
18 specialized = iter(opcode._specialized_instructions)
19 opname = ["<0>"]
20 for name in opcode.opname[1:]:
21 if name.startswith("<"):
22 try:
23 name = next(specialized)
24 except StopIteration:
25 pass
26 opname.append(name)
27
28 # opcode_name --> opcode
29 # Sort alphabetically.
30 opmap = {name: i for i, name in enumerate(opname)}
31 opmap = dict(sorted(opmap.items()))
32
33 TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
34
35 def print_specialization_stats(name, family_stats, defines):
36 if "specializable" not in family_stats:
37 return
38 total = sum(family_stats.get(kind, 0) for kind in TOTAL)
39 if total == 0:
40 return
41 with Section(name, 3, f"specialization stats for {name} family"):
42 rows = []
43 for key in sorted(family_stats):
44 if key.startswith("specialization.failure_kinds"):
45 continue
46 if key in ("specialization.hit", "specialization.miss"):
47 label = key[len("specialization."):]
48 elif key == "execution_count":
49 label = "unquickened"
50 elif key in ("specialization.success", "specialization.failure", "specializable"):
51 continue
52 elif key.startswith("pair"):
53 continue
54 else:
55 label = key
56 rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
57 emit_table(("Kind", "Count", "Ratio"), rows)
58 print_title("Specialization attempts", 4)
59 total_attempts = 0
60 for key in ("specialization.success", "specialization.failure"):
61 total_attempts += family_stats.get(key, 0)
62 rows = []
63 for key in ("specialization.success", "specialization.failure"):
64 label = key[len("specialization."):]
65 label = label[0].upper() + label[1:]
66 val = family_stats.get(key, 0)
67 rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
68 emit_table(("", "Count:", "Ratio:"), rows)
69 total_failures = family_stats.get("specialization.failure", 0)
70 failure_kinds = [ 0 ] * 30
71 for key in family_stats:
72 if not key.startswith("specialization.failure_kind"):
73 continue
74 _, index = key[:-1].split("[")
75 index = int(index)
76 failure_kinds[index] = family_stats[key]
77 failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
78 failures.sort(reverse=True)
79 rows = []
80 for value, index in failures:
81 if not value:
82 continue
83 rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
84 emit_table(("Failure kind", "Count:", "Ratio:"), rows)
85
86 def gather_stats():
87 stats = collections.Counter()
88 for filename in os.listdir(DEFAULT_DIR):
89 with open(os.path.join(DEFAULT_DIR, filename)) as fd:
90 for line in fd:
91 key, value = line.split(":")
92 key = key.strip()
93 value = int(value)
94 stats[key] += value
95 return stats
96
97 def extract_opcode_stats(stats):
98 opcode_stats = [ {} for _ in range(256) ]
99 for key, value in stats.items():
100 if not key.startswith("opcode"):
101 continue
102 n, _, rest = key[7:].partition("]")
103 opcode_stats[int(n)][rest.strip(".")] = value
104 return opcode_stats
105
106 def parse_kinds(spec_src):
107 defines = collections.defaultdict(list)
108 for line in spec_src:
109 line = line.strip()
110 if not line.startswith("#define SPEC_FAIL_"):
111 continue
112 line = line[len("#define SPEC_FAIL_"):]
113 name, val = line.split()
114 defines[int(val.strip())].append(name.strip())
115 return defines
116
117 def pretty(defname):
118 return defname.replace("_", " ").lower()
119
120 def kind_to_text(kind, defines, opname):
121 if kind < 7:
122 return pretty(defines[kind][0])
123 if opname.endswith("ATTR"):
124 opname = "ATTR"
125 if opname.endswith("SUBSCR"):
126 opname = "SUBSCR"
127 if opname.startswith("PRECALL"):
128 opname = "CALL"
129 for name in defines[kind]:
130 if name.startswith(opname):
131 return pretty(name[len(opname)+1:])
132 return "kind " + str(kind)
133
134 def categorized_counts(opcode_stats):
135 basic = 0
136 specialized = 0
137 not_specialized = 0
138 specialized_instructions = {
139 op for op in opcode._specialized_instructions
140 if "__" not in op and "ADAPTIVE" not in op}
141 adaptive_instructions = {
142 op for op in opcode._specialized_instructions
143 if "ADAPTIVE" in op}
144 for i, opcode_stat in enumerate(opcode_stats):
145 if "execution_count" not in opcode_stat:
146 continue
147 count = opcode_stat['execution_count']
148 name = opname[i]
149 if "specializable" in opcode_stat:
150 not_specialized += count
151 elif name in adaptive_instructions:
152 not_specialized += count
153 elif name in specialized_instructions:
154 miss = opcode_stat.get("specialization.miss", 0)
155 not_specialized += miss
156 specialized += count - miss
157 else:
158 basic += count
159 return basic, not_specialized, specialized
160
161 def print_title(name, level=2):
162 print("#"*level, name)
163 print()
164
165 class ESC[4;38;5;81mSection:
166
167 def __init__(self, title, level=2, summary=None):
168 self.title = title
169 self.level = level
170 if summary is None:
171 self.summary = title.lower()
172 else:
173 self.summary = summary
174
175 def __enter__(self):
176 print_title(self.title, self.level)
177 print("<details>")
178 print("<summary>", self.summary, "</summary>")
179 print()
180 return self
181
182 def __exit__(*args):
183 print()
184 print("</details>")
185 print()
186
187 def emit_table(header, rows):
188 width = len(header)
189 header_line = "|"
190 under_line = "|"
191 for item in header:
192 under = "---"
193 if item.endswith(":"):
194 item = item[:-1]
195 under += ":"
196 header_line += item + " | "
197 under_line += under + "|"
198 print(header_line)
199 print(under_line)
200 for row in rows:
201 if width is not None and len(row) != width:
202 raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
203 print("|", " | ".join(str(i) for i in row), "|")
204 print()
205
206 def emit_execution_counts(opcode_stats, total):
207 with Section("Execution counts", summary="execution counts for all instructions"):
208 counts = []
209 for i, opcode_stat in enumerate(opcode_stats):
210 if "execution_count" in opcode_stat:
211 count = opcode_stat['execution_count']
212 miss = 0
213 if "specializable" not in opcode_stat:
214 miss = opcode_stat.get("specialization.miss")
215 counts.append((count, opname[i], miss))
216 counts.sort(reverse=True)
217 cumulative = 0
218 rows = []
219 for (count, name, miss) in counts:
220 cumulative += count
221 if miss:
222 miss = f"{100*miss/count:0.1f}%"
223 else:
224 miss = ""
225 rows.append((name, count, f"{100*count/total:0.1f}%",
226 f"{100*cumulative/total:0.1f}%", miss))
227 emit_table(
228 ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
229 rows
230 )
231
232
233 def emit_specialization_stats(opcode_stats):
234 spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
235 with open(spec_path) as spec_src:
236 defines = parse_kinds(spec_src)
237 with Section("Specialization stats", summary="specialization stats by family"):
238 for i, opcode_stat in enumerate(opcode_stats):
239 name = opname[i]
240 print_specialization_stats(name, opcode_stat, defines)
241
242 def emit_specialization_overview(opcode_stats, total):
243 basic, not_specialized, specialized = categorized_counts(opcode_stats)
244 with Section("Specialization effectiveness"):
245 emit_table(("Instructions", "Count:", "Ratio:"), (
246 ("Basic", basic, f"{basic*100/total:0.1f}%"),
247 ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
248 ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
249 ))
250
251 def emit_call_stats(stats):
252 with Section("Call stats", summary="Inlined calls and frame stats"):
253 total = 0
254 for key, value in stats.items():
255 if "Calls to" in key:
256 total += value
257 rows = []
258 for key, value in stats.items():
259 if "Calls to" in key:
260 rows.append((key, value, f"{100*value/total:0.1f}%"))
261 for key, value in stats.items():
262 if key.startswith("Frame"):
263 rows.append((key, value, f"{100*value/total:0.1f}%"))
264 emit_table(("", "Count:", "Ratio:"), rows)
265
266 def emit_object_stats(stats):
267 with Section("Object stats", summary="allocations, frees and dict materializatons"):
268 total = stats.get("Object new values")
269 rows = []
270 for key, value in stats.items():
271 if key.startswith("Object"):
272 if "materialize" in key:
273 materialize = f"{100*value/total:0.1f}%"
274 else:
275 materialize = ""
276 label = key[6:].strip()
277 label = label[0].upper() + label[1:]
278 rows.append((label, value, materialize))
279 emit_table(("", "Count:", "Ratio:"), rows)
280
281 def get_total(opcode_stats):
282 total = 0
283 for opcode_stat in opcode_stats:
284 if "execution_count" in opcode_stat:
285 total += opcode_stat['execution_count']
286 return total
287
288 def emit_pair_counts(opcode_stats, total):
289 pair_counts = []
290 for i, opcode_stat in enumerate(opcode_stats):
291 if i == 0:
292 continue
293 for key, value in opcode_stat.items():
294 if key.startswith("pair_count"):
295 x, _, _ = key[11:].partition("]")
296 if value:
297 pair_counts.append((value, (i, int(x))))
298 with Section("Pair counts", summary="Pair counts for top 100 pairs"):
299 pair_counts.sort(reverse=True)
300 cumulative = 0
301 rows = []
302 for (count, pair) in itertools.islice(pair_counts, 100):
303 i, j = pair
304 cumulative += count
305 rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
306 f"{100*cumulative/total:0.1f}%"))
307 emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
308 rows
309 )
310 with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
311 predecessors = collections.defaultdict(collections.Counter)
312 successors = collections.defaultdict(collections.Counter)
313 total_predecessors = collections.Counter()
314 total_successors = collections.Counter()
315 for count, (first, second) in pair_counts:
316 if count:
317 predecessors[second][first] = count
318 successors[first][second] = count
319 total_predecessors[second] += count
320 total_successors[first] += count
321 for name, i in opmap.items():
322 total1 = total_predecessors[i]
323 total2 = total_successors[i]
324 if total1 == 0 and total2 == 0:
325 continue
326 pred_rows = succ_rows = ()
327 if total1:
328 pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
329 for (pred, count) in predecessors[i].most_common(3)]
330 if total2:
331 succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
332 for (succ, count) in successors[i].most_common(3)]
333 with Section(name, 3, f"Successors and predecessors for {name}"):
334 emit_table(("Predecessors", "Count:", "Percentage:"),
335 pred_rows
336 )
337 emit_table(("Successors", "Count:", "Percentage:"),
338 succ_rows
339 )
340
341 def main():
342 stats = gather_stats()
343 opcode_stats = extract_opcode_stats(stats)
344 total = get_total(opcode_stats)
345 emit_execution_counts(opcode_stats, total)
346 emit_pair_counts(opcode_stats, total)
347 emit_specialization_stats(opcode_stats)
348 emit_specialization_overview(opcode_stats, total)
349 emit_call_stats(stats)
350 emit_object_stats(stats)
351 print("---")
352 print("Stats gathered on:", date.today())
353
354 if __name__ == "__main__":
355 main()