1  """Print a summary of specialization stats for all files in the
       2  default stats folders.
       3  """
       4  
       5  import collections
       6  import os.path
       7  import opcode
       8  from datetime import date
       9  import itertools
      10  import argparse
      11  
      12  if os.name == "nt":
      13      DEFAULT_DIR = "c:\\temp\\py_stats\\"
      14  else:
      15      DEFAULT_DIR = "/tmp/py_stats/"
      16  
      17  #Create list of all instruction names
      18  specialized = iter(opcode._specialized_instructions)
      19  opname = ["<0>"]
      20  for name in opcode.opname[1:]:
      21      if name.startswith("<"):
      22          try:
      23              name = next(specialized)
      24          except StopIteration:
      25              pass
      26      opname.append(name)
      27  
      28  # opcode_name --> opcode
      29  # Sort alphabetically.
      30  opmap = {name: i for i, name in enumerate(opname)}
      31  opmap = dict(sorted(opmap.items()))
      32  
      33  TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
      34  
      35  def print_specialization_stats(name, family_stats, defines):
      36      if "specializable" not in family_stats:
      37          return
      38      total = sum(family_stats.get(kind, 0) for kind in TOTAL)
      39      if total == 0:
      40          return
      41      with Section(name, 3, f"specialization stats for {name} family"):
      42          rows = []
      43          for key in sorted(family_stats):
      44              if key.startswith("specialization.failure_kinds"):
      45                  continue
      46              if key in ("specialization.hit", "specialization.miss"):
      47                  label = key[len("specialization."):]
      48              elif key == "execution_count":
      49                  label = "unquickened"
      50              elif key in ("specialization.success",  "specialization.failure", "specializable"):
      51                  continue
      52              elif key.startswith("pair"):
      53                  continue
      54              else:
      55                  label = key
      56              rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
      57          emit_table(("Kind", "Count", "Ratio"), rows)
      58          print_title("Specialization attempts", 4)
      59          total_attempts = 0
      60          for key in ("specialization.success",  "specialization.failure"):
      61              total_attempts += family_stats.get(key, 0)
      62          rows = []
      63          for key in ("specialization.success",  "specialization.failure"):
      64              label = key[len("specialization."):]
      65              label = label[0].upper() + label[1:]
      66              val = family_stats.get(key, 0)
      67              rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
      68          emit_table(("", "Count:", "Ratio:"), rows)
      69          total_failures = family_stats.get("specialization.failure", 0)
      70          failure_kinds = [ 0 ] * 30
      71          for key in family_stats:
      72              if not key.startswith("specialization.failure_kind"):
      73                  continue
      74              _, index = key[:-1].split("[")
      75              index =  int(index)
      76              failure_kinds[index] = family_stats[key]
      77          failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
      78          failures.sort(reverse=True)
      79          rows = []
      80          for value, index in failures:
      81              if not value:
      82                  continue
      83              rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
      84          emit_table(("Failure kind", "Count:", "Ratio:"), rows)
      85  
      86  def gather_stats():
      87      stats = collections.Counter()
      88      for filename in os.listdir(DEFAULT_DIR):
      89          with open(os.path.join(DEFAULT_DIR, filename)) as fd:
      90              for line in fd:
      91                  key, value = line.split(":")
      92                  key = key.strip()
      93                  value = int(value)
      94                  stats[key] += value
      95      return stats
      96  
      97  def extract_opcode_stats(stats):
      98      opcode_stats = [ {} for _ in range(256) ]
      99      for key, value in stats.items():
     100          if not key.startswith("opcode"):
     101              continue
     102          n, _, rest = key[7:].partition("]")
     103          opcode_stats[int(n)][rest.strip(".")] = value
     104      return opcode_stats
     105  
     106  def parse_kinds(spec_src):
     107      defines = collections.defaultdict(list)
     108      for line in spec_src:
     109          line = line.strip()
     110          if not line.startswith("#define SPEC_FAIL_"):
     111              continue
     112          line = line[len("#define SPEC_FAIL_"):]
     113          name, val = line.split()
     114          defines[int(val.strip())].append(name.strip())
     115      return defines
     116  
     117  def pretty(defname):
     118      return defname.replace("_", " ").lower()
     119  
     120  def kind_to_text(kind, defines, opname):
     121      if kind < 7:
     122          return pretty(defines[kind][0])
     123      if opname.endswith("ATTR"):
     124          opname = "ATTR"
     125      if opname.endswith("SUBSCR"):
     126          opname = "SUBSCR"
     127      if opname.startswith("PRECALL"):
     128          opname = "CALL"
     129      for name in defines[kind]:
     130          if name.startswith(opname):
     131              return pretty(name[len(opname)+1:])
     132      return "kind " + str(kind)
     133  
     134  def categorized_counts(opcode_stats):
     135      basic = 0
     136      specialized = 0
     137      not_specialized = 0
     138      specialized_instructions = {
     139          op for op in opcode._specialized_instructions
     140          if "__" not in op and "ADAPTIVE" not in op}
     141      adaptive_instructions = {
     142          op for op in opcode._specialized_instructions
     143          if "ADAPTIVE" in op}
     144      for i, opcode_stat in enumerate(opcode_stats):
     145          if "execution_count" not in opcode_stat:
     146              continue
     147          count = opcode_stat['execution_count']
     148          name = opname[i]
     149          if "specializable" in opcode_stat:
     150              not_specialized += count
     151          elif name in adaptive_instructions:
     152              not_specialized += count
     153          elif name in specialized_instructions:
     154              miss = opcode_stat.get("specialization.miss", 0)
     155              not_specialized += miss
     156              specialized += count - miss
     157          else:
     158              basic += count
     159      return basic, not_specialized, specialized
     160  
     161  def print_title(name, level=2):
     162      print("#"*level, name)
     163      print()
     164  
     165  class ESC[4;38;5;81mSection:
     166  
     167      def __init__(self, title, level=2, summary=None):
     168          self.title = title
     169          self.level = level
     170          if summary is None:
     171              self.summary = title.lower()
     172          else:
     173              self.summary = summary
     174  
     175      def __enter__(self):
     176          print_title(self.title, self.level)
     177          print("<details>")
     178          print("<summary>", self.summary, "</summary>")
     179          print()
     180          return self
     181  
     182      def __exit__(*args):
     183          print()
     184          print("</details>")
     185          print()
     186  
     187  def emit_table(header, rows):
     188      width = len(header)
     189      header_line = "|"
     190      under_line = "|"
     191      for item in header:
     192          under = "---"
     193          if item.endswith(":"):
     194              item = item[:-1]
     195              under += ":"
     196          header_line += item + " | "
     197          under_line += under + "|"
     198      print(header_line)
     199      print(under_line)
     200      for row in rows:
     201          if width is not None and len(row) != width:
     202              raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
     203          print("|", " | ".join(str(i) for i in row), "|")
     204      print()
     205  
     206  def emit_execution_counts(opcode_stats, total):
     207      with Section("Execution counts", summary="execution counts for all instructions"):
     208          counts = []
     209          for i, opcode_stat in enumerate(opcode_stats):
     210              if "execution_count" in opcode_stat:
     211                  count = opcode_stat['execution_count']
     212                  miss = 0
     213                  if "specializable" not in opcode_stat:
     214                      miss = opcode_stat.get("specialization.miss")
     215                  counts.append((count, opname[i], miss))
     216          counts.sort(reverse=True)
     217          cumulative = 0
     218          rows = []
     219          for (count, name, miss) in counts:
     220              cumulative += count
     221              if miss:
     222                  miss =  f"{100*miss/count:0.1f}%"
     223              else:
     224                  miss = ""
     225              rows.append((name, count, f"{100*count/total:0.1f}%",
     226                          f"{100*cumulative/total:0.1f}%", miss))
     227          emit_table(
     228              ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
     229              rows
     230          )
     231  
     232  
     233  def emit_specialization_stats(opcode_stats):
     234      spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
     235      with open(spec_path) as spec_src:
     236          defines = parse_kinds(spec_src)
     237      with Section("Specialization stats", summary="specialization stats by family"):
     238          for i, opcode_stat in enumerate(opcode_stats):
     239              name = opname[i]
     240              print_specialization_stats(name, opcode_stat, defines)
     241  
     242  def emit_specialization_overview(opcode_stats, total):
     243      basic, not_specialized, specialized = categorized_counts(opcode_stats)
     244      with Section("Specialization effectiveness"):
     245          emit_table(("Instructions", "Count:", "Ratio:"), (
     246              ("Basic", basic, f"{basic*100/total:0.1f}%"),
     247              ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
     248              ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
     249          ))
     250  
     251  def emit_call_stats(stats):
     252      with Section("Call stats", summary="Inlined calls and frame stats"):
     253          total = 0
     254          for key, value in stats.items():
     255              if "Calls to" in key:
     256                  total += value
     257          rows = []
     258          for key, value in stats.items():
     259              if "Calls to" in key:
     260                  rows.append((key, value, f"{100*value/total:0.1f}%"))
     261          for key, value in stats.items():
     262              if key.startswith("Frame"):
     263                  rows.append((key, value, f"{100*value/total:0.1f}%"))
     264          emit_table(("", "Count:", "Ratio:"), rows)
     265  
     266  def emit_object_stats(stats):
     267      with Section("Object stats", summary="allocations, frees and dict materializatons"):
     268          total = stats.get("Object new values")
     269          rows = []
     270          for key, value in stats.items():
     271              if key.startswith("Object"):
     272                  if "materialize" in key:
     273                      materialize = f"{100*value/total:0.1f}%"
     274                  else:
     275                      materialize = ""
     276                  label = key[6:].strip()
     277                  label = label[0].upper() + label[1:]
     278                  rows.append((label, value, materialize))
     279          emit_table(("",  "Count:", "Ratio:"), rows)
     280  
     281  def get_total(opcode_stats):
     282      total = 0
     283      for opcode_stat in opcode_stats:
     284          if "execution_count" in opcode_stat:
     285              total += opcode_stat['execution_count']
     286      return total
     287  
     288  def emit_pair_counts(opcode_stats, total):
     289      pair_counts = []
     290      for i, opcode_stat in enumerate(opcode_stats):
     291          if i == 0:
     292              continue
     293          for key, value in opcode_stat.items():
     294              if key.startswith("pair_count"):
     295                  x, _, _ = key[11:].partition("]")
     296                  if value:
     297                      pair_counts.append((value, (i, int(x))))
     298      with Section("Pair counts", summary="Pair counts for top 100 pairs"):
     299          pair_counts.sort(reverse=True)
     300          cumulative = 0
     301          rows = []
     302          for (count, pair) in itertools.islice(pair_counts, 100):
     303              i, j = pair
     304              cumulative += count
     305              rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
     306                          f"{100*cumulative/total:0.1f}%"))
     307          emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
     308              rows
     309          )
     310      with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
     311          predecessors = collections.defaultdict(collections.Counter)
     312          successors = collections.defaultdict(collections.Counter)
     313          total_predecessors = collections.Counter()
     314          total_successors = collections.Counter()
     315          for count, (first, second) in pair_counts:
     316              if count:
     317                  predecessors[second][first] = count
     318                  successors[first][second] = count
     319                  total_predecessors[second] += count
     320                  total_successors[first] += count
     321          for name, i in opmap.items():
     322              total1 = total_predecessors[i]
     323              total2 = total_successors[i]
     324              if total1 == 0 and total2 == 0:
     325                  continue
     326              pred_rows = succ_rows = ()
     327              if total1:
     328                  pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
     329                               for (pred, count) in predecessors[i].most_common(3)]
     330              if total2:
     331                  succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
     332                               for (succ, count) in successors[i].most_common(3)]
     333              with Section(name, 3, f"Successors and predecessors for {name}"):
     334                  emit_table(("Predecessors", "Count:", "Percentage:"),
     335                      pred_rows
     336                  )
     337                  emit_table(("Successors", "Count:", "Percentage:"),
     338                      succ_rows
     339                  )
     340  
     341  def main():
     342      stats = gather_stats()
     343      opcode_stats = extract_opcode_stats(stats)
     344      total = get_total(opcode_stats)
     345      emit_execution_counts(opcode_stats, total)
     346      emit_pair_counts(opcode_stats, total)
     347      emit_specialization_stats(opcode_stats)
     348      emit_specialization_overview(opcode_stats, total)
     349      emit_call_stats(stats)
     350      emit_object_stats(stats)
     351      print("---")
     352      print("Stats gathered on:", date.today())
     353  
     354  if __name__ == "__main__":
     355      main()