1  """Print a summary of specialization stats for all files in the
       2  default stats folders.
       3  """
       4  
       5  import argparse
       6  import collections
       7  import json
       8  import os.path
       9  import opcode
      10  from datetime import date
      11  import itertools
      12  import sys
      13  
      14  if os.name == "nt":
      15      DEFAULT_DIR = "c:\\temp\\py_stats\\"
      16  else:
      17      DEFAULT_DIR = "/tmp/py_stats/"
      18  
      19  #Create list of all instruction names
      20  specialized = iter(opcode._specialized_instructions)
      21  opname = ["<0>"]
      22  for name in opcode.opname[1:]:
      23      if name.startswith("<"):
      24          try:
      25              name = next(specialized)
      26          except StopIteration:
      27              pass
      28      opname.append(name)
      29  
      30  # opcode_name --> opcode
      31  # Sort alphabetically.
      32  opmap = {name: i for i, name in enumerate(opname)}
      33  opmap = dict(sorted(opmap.items()))
      34  
      35  TOTAL = "specialization.hit", "specialization.miss", "execution_count"
      36  
      37  def format_ratio(num, den):
      38      """
      39      Format a ratio as a percentage. When the denominator is 0, returns the empty
      40      string.
      41      """
      42      if den == 0:
      43          return ""
      44      else:
      45          return f"{num/den:.01%}"
      46  
      47  def join_rows(a_rows, b_rows):
      48      """
      49      Joins two tables together, side-by-side, where the first column in each is a
      50      common key.
      51      """
      52      if len(a_rows) == 0 and len(b_rows) == 0:
      53          return []
      54  
      55      if len(a_rows):
      56          a_ncols = list(set(len(x) for x in a_rows))
      57          if len(a_ncols) != 1:
      58              raise ValueError("Table a is ragged")
      59  
      60      if len(b_rows):
      61          b_ncols = list(set(len(x) for x in b_rows))
      62          if len(b_ncols) != 1:
      63              raise ValueError("Table b is ragged")
      64  
      65      if len(a_rows) and len(b_rows) and a_ncols[0] != b_ncols[0]:
      66          raise ValueError("Tables have different widths")
      67  
      68      if len(a_rows):
      69          ncols = a_ncols[0]
      70      else:
      71          ncols = b_ncols[0]
      72  
      73      default = [""] * (ncols - 1)
      74      a_data = {x[0]: x[1:] for x in a_rows}
      75      b_data = {x[0]: x[1:] for x in b_rows}
      76  
      77      if len(a_data) != len(a_rows) or len(b_data) != len(b_rows):
      78          raise ValueError("Duplicate keys")
      79  
      80      # To preserve ordering, use A's keys as is and then add any in B that aren't
      81      # in A
      82      keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data]
      83      return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys]
      84  
      85  def calculate_specialization_stats(family_stats, total):
      86      rows = []
      87      for key in sorted(family_stats):
      88          if key.startswith("specialization.failure_kinds"):
      89              continue
      90          if key in ("specialization.hit", "specialization.miss"):
      91              label = key[len("specialization."):]
      92          elif key == "execution_count":
      93              continue
      94          elif key in ("specialization.success",  "specialization.failure", "specializable"):
      95              continue
      96          elif key.startswith("pair"):
      97              continue
      98          else:
      99              label = key
     100          rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
     101      return rows
     102  
     103  def calculate_specialization_success_failure(family_stats):
     104      total_attempts = 0
     105      for key in ("specialization.success",  "specialization.failure"):
     106          total_attempts += family_stats.get(key, 0)
     107      rows = []
     108      if total_attempts:
     109          for key in ("specialization.success",  "specialization.failure"):
     110              label = key[len("specialization."):]
     111              label = label[0].upper() + label[1:]
     112              val = family_stats.get(key, 0)
     113              rows.append((label, val, format_ratio(val, total_attempts)))
     114      return rows
     115  
     116  def calculate_specialization_failure_kinds(name, family_stats, defines):
     117      total_failures = family_stats.get("specialization.failure", 0)
     118      failure_kinds = [ 0 ] * 40
     119      for key in family_stats:
     120          if not key.startswith("specialization.failure_kind"):
     121              continue
     122          _, index = key[:-1].split("[")
     123          index = int(index)
     124          failure_kinds[index] = family_stats[key]
     125      failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
     126      failures.sort(reverse=True)
     127      rows = []
     128      for value, index in failures:
     129          if not value:
     130              continue
     131          rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
     132      return rows
     133  
     134  def print_specialization_stats(name, family_stats, defines):
     135      if "specializable" not in family_stats:
     136          return
     137      total = sum(family_stats.get(kind, 0) for kind in TOTAL)
     138      if total == 0:
     139          return
     140      with Section(name, 3, f"specialization stats for {name} family"):
     141          rows = calculate_specialization_stats(family_stats, total)
     142          emit_table(("Kind", "Count", "Ratio"), rows)
     143          rows = calculate_specialization_success_failure(family_stats)
     144          if rows:
     145              print_title("Specialization attempts", 4)
     146              emit_table(("", "Count:", "Ratio:"), rows)
     147              rows = calculate_specialization_failure_kinds(name, family_stats, defines)
     148              emit_table(("Failure kind", "Count:", "Ratio:"), rows)
     149  
     150  def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines):
     151      if "specializable" not in base_family_stats:
     152          return
     153  
     154      base_total = sum(base_family_stats.get(kind, 0) for kind in TOTAL)
     155      head_total = sum(head_family_stats.get(kind, 0) for kind in TOTAL)
     156      if base_total + head_total == 0:
     157          return
     158      with Section(name, 3, f"specialization stats for {name} family"):
     159          base_rows = calculate_specialization_stats(base_family_stats, base_total)
     160          head_rows = calculate_specialization_stats(head_family_stats, head_total)
     161          emit_table(
     162              ("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"),
     163              join_rows(base_rows, head_rows)
     164          )
     165          base_rows = calculate_specialization_success_failure(base_family_stats)
     166          head_rows = calculate_specialization_success_failure(head_family_stats)
     167          rows = join_rows(base_rows, head_rows)
     168          if rows:
     169              print_title("Specialization attempts", 4)
     170              emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows)
     171              base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines)
     172              head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines)
     173              emit_table(
     174                  ("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
     175                  join_rows(base_rows, head_rows)
     176              )
     177  
     178  def gather_stats(input):
     179      # Note the output of this function must be JSON-serializable
     180  
     181      if os.path.isfile(input):
     182          with open(input, "r") as fd:
     183              return json.load(fd)
     184      elif os.path.isdir(input):
     185          stats = collections.Counter()
     186          for filename in os.listdir(input):
     187              with open(os.path.join(input, filename)) as fd:
     188                  for line in fd:
     189                      try:
     190                          key, value = line.split(":")
     191                      except ValueError:
     192                          print(f"Unparsable line: '{line.strip()}' in  {filename}", file=sys.stderr)
     193                          continue
     194                      key = key.strip()
     195                      value = int(value)
     196                      stats[key] += value
     197              stats['__nfiles__'] += 1
     198          return stats
     199      else:
     200          raise ValueError(f"{input:r} is not a file or directory path")
     201  
     202  def extract_opcode_stats(stats):
     203      opcode_stats = [ {} for _ in range(256) ]
     204      for key, value in stats.items():
     205          if not key.startswith("opcode"):
     206              continue
     207          n, _, rest = key[7:].partition("]")
     208          opcode_stats[int(n)][rest.strip(".")] = value
     209      return opcode_stats
     210  
     211  def parse_kinds(spec_src, prefix="SPEC_FAIL"):
     212      defines = collections.defaultdict(list)
     213      start = "#define " + prefix + "_"
     214      for line in spec_src:
     215          line = line.strip()
     216          if not line.startswith(start):
     217              continue
     218          line = line[len(start):]
     219          name, val = line.split()
     220          defines[int(val.strip())].append(name.strip())
     221      return defines
     222  
     223  def pretty(defname):
     224      return defname.replace("_", " ").lower()
     225  
     226  def kind_to_text(kind, defines, opname):
     227      if kind <= 8:
     228          return pretty(defines[kind][0])
     229      if opname == "LOAD_SUPER_ATTR":
     230          opname = "SUPER"
     231      elif opname.endswith("ATTR"):
     232          opname = "ATTR"
     233      elif opname in ("FOR_ITER", "SEND"):
     234          opname = "ITER"
     235      elif opname.endswith("SUBSCR"):
     236          opname = "SUBSCR"
     237      for name in defines[kind]:
     238          if name.startswith(opname):
     239              return pretty(name[len(opname)+1:])
     240      return "kind " + str(kind)
     241  
     242  def categorized_counts(opcode_stats):
     243      basic = 0
     244      specialized = 0
     245      not_specialized = 0
     246      specialized_instructions = {
     247          op for op in opcode._specialized_instructions
     248          if "__" not in op}
     249      for i, opcode_stat in enumerate(opcode_stats):
     250          if "execution_count" not in opcode_stat:
     251              continue
     252          count = opcode_stat['execution_count']
     253          name = opname[i]
     254          if "specializable" in opcode_stat:
     255              not_specialized += count
     256          elif name in specialized_instructions:
     257              miss = opcode_stat.get("specialization.miss", 0)
     258              not_specialized += miss
     259              specialized += count - miss
     260          else:
     261              basic += count
     262      return basic, not_specialized, specialized
     263  
     264  def print_title(name, level=2):
     265      print("#"*level, name)
     266      print()
     267  
     268  class ESC[4;38;5;81mSection:
     269  
     270      def __init__(self, title, level=2, summary=None):
     271          self.title = title
     272          self.level = level
     273          if summary is None:
     274              self.summary = title.lower()
     275          else:
     276              self.summary = summary
     277  
     278      def __enter__(self):
     279          print_title(self.title, self.level)
     280          print("<details>")
     281          print("<summary>", self.summary, "</summary>")
     282          print()
     283          return self
     284  
     285      def __exit__(*args):
     286          print()
     287          print("</details>")
     288          print()
     289  
     290  def to_str(x):
     291      if isinstance(x, int):
     292          return format(x, ",d")
     293      else:
     294          return str(x)
     295  
     296  def emit_table(header, rows):
     297      width = len(header)
     298      header_line = "|"
     299      under_line = "|"
     300      for item in header:
     301          under = "---"
     302          if item.endswith(":"):
     303              item = item[:-1]
     304              under += ":"
     305          header_line += item + " | "
     306          under_line += under + "|"
     307      print(header_line)
     308      print(under_line)
     309      for row in rows:
     310          if width is not None and len(row) != width:
     311              raise ValueError("Wrong number of elements in row '" + str(row) + "'")
     312          print("|", " | ".join(to_str(i) for i in row), "|")
     313      print()
     314  
     315  def calculate_execution_counts(opcode_stats, total):
     316      counts = []
     317      for i, opcode_stat in enumerate(opcode_stats):
     318          if "execution_count" in opcode_stat:
     319              count = opcode_stat['execution_count']
     320              miss = 0
     321              if "specializable" not in opcode_stat:
     322                  miss = opcode_stat.get("specialization.miss")
     323              counts.append((count, opname[i], miss))
     324      counts.sort(reverse=True)
     325      cumulative = 0
     326      rows = []
     327      for (count, name, miss) in counts:
     328          cumulative += count
     329          if miss:
     330              miss = format_ratio(miss, count)
     331          else:
     332              miss = ""
     333          rows.append((name, count, format_ratio(count, total),
     334                       format_ratio(cumulative, total), miss))
     335      return rows
     336  
     337  def emit_execution_counts(opcode_stats, total):
     338      with Section("Execution counts", summary="execution counts for all instructions"):
     339          rows = calculate_execution_counts(opcode_stats, total)
     340          emit_table(
     341              ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
     342              rows
     343          )
     344  
     345  def emit_comparative_execution_counts(
     346      base_opcode_stats, base_total, head_opcode_stats, head_total
     347  ):
     348      with Section("Execution counts", summary="execution counts for all instructions"):
     349          base_rows = calculate_execution_counts(base_opcode_stats, base_total)
     350          head_rows = calculate_execution_counts(head_opcode_stats, head_total)
     351          base_data = dict((x[0], x[1:]) for x in base_rows)
     352          head_data = dict((x[0], x[1:]) for x in head_rows)
     353          opcodes = set(base_data.keys()) | set(head_data.keys())
     354  
     355          rows = []
     356          default = [0, "0.0%", "0.0%", 0]
     357          for opcode in opcodes:
     358              base_entry = base_data.get(opcode, default)
     359              head_entry = head_data.get(opcode, default)
     360              if base_entry[0] == 0:
     361                  change = 1
     362              else:
     363                  change = (head_entry[0] - base_entry[0]) / base_entry[0]
     364              rows.append(
     365                  (opcode, base_entry[0], head_entry[0],
     366                   f"{100*change:0.1f}%"))
     367  
     368          rows.sort(key=lambda x: -abs(float(x[-1][:-1])))
     369  
     370          emit_table(
     371              ("Name", "Base Count:", "Head Count:", "Change:"),
     372              rows
     373          )
     374  
     375  def get_defines():
     376      spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
     377      with open(spec_path) as spec_src:
     378          defines = parse_kinds(spec_src)
     379      return defines
     380  
     381  def emit_specialization_stats(opcode_stats):
     382      defines = get_defines()
     383      with Section("Specialization stats", summary="specialization stats by family"):
     384          for i, opcode_stat in enumerate(opcode_stats):
     385              name = opname[i]
     386              print_specialization_stats(name, opcode_stat, defines)
     387  
     388  def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
     389      defines = get_defines()
     390      with Section("Specialization stats", summary="specialization stats by family"):
     391          for i, (base_opcode_stat, head_opcode_stat) in enumerate(zip(base_opcode_stats, head_opcode_stats)):
     392              name = opname[i]
     393              print_comparative_specialization_stats(name, base_opcode_stat, head_opcode_stat, defines)
     394  
     395  def calculate_specialization_effectiveness(opcode_stats, total):
     396      basic, not_specialized, specialized = categorized_counts(opcode_stats)
     397      return [
     398          ("Basic", basic, format_ratio(basic, total)),
     399          ("Not specialized", not_specialized, format_ratio(not_specialized, total)),
     400          ("Specialized", specialized, format_ratio(specialized, total)),
     401      ]
     402  
     403  def emit_specialization_overview(opcode_stats, total):
     404      with Section("Specialization effectiveness"):
     405          rows = calculate_specialization_effectiveness(opcode_stats, total)
     406          emit_table(("Instructions", "Count:", "Ratio:"), rows)
     407          for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
     408              total = 0
     409              counts = []
     410              for i, opcode_stat in enumerate(opcode_stats):
     411                  # Avoid double counting misses
     412                  if title == "Misses" and "specializable" in opcode_stat:
     413                      continue
     414                  value = opcode_stat.get(field, 0)
     415                  counts.append((value, opname[i]))
     416                  total += value
     417              counts.sort(reverse=True)
     418              if total:
     419                  with Section(f"{title} by instruction", 3):
     420                      rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
     421                      emit_table(("Name", "Count:", "Ratio:"), rows)
     422  
     423  def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
     424      with Section("Specialization effectiveness"):
     425          base_rows = calculate_specialization_effectiveness(base_opcode_stats, base_total)
     426          head_rows = calculate_specialization_effectiveness(head_opcode_stats, head_total)
     427          emit_table(
     428              ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
     429              join_rows(base_rows, head_rows)
     430          )
     431  
     432  def get_stats_defines():
     433      stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h")
     434      with open(stats_path) as stats_src:
     435          defines = parse_kinds(stats_src, prefix="EVAL_CALL")
     436      return defines
     437  
     438  def calculate_call_stats(stats):
     439      defines = get_stats_defines()
     440      total = 0
     441      for key, value in stats.items():
     442          if "Calls to" in key:
     443              total += value
     444              rows = []
     445      for key, value in stats.items():
     446          if "Calls to" in key:
     447              rows.append((key, value, format_ratio(value, total)))
     448          elif key.startswith("Calls "):
     449              name, index = key[:-1].split("[")
     450              index =  int(index)
     451              label = name + " (" + pretty(defines[index][0]) + ")"
     452              rows.append((label, value, format_ratio(value, total)))
     453      for key, value in stats.items():
     454          if key.startswith("Frame"):
     455              rows.append((key, value, format_ratio(value, total)))
     456      return rows
     457  
     458  def emit_call_stats(stats):
     459      with Section("Call stats", summary="Inlined calls and frame stats"):
     460          rows = calculate_call_stats(stats)
     461          emit_table(("", "Count:", "Ratio:"), rows)
     462  
     463  def emit_comparative_call_stats(base_stats, head_stats):
     464      with Section("Call stats", summary="Inlined calls and frame stats"):
     465          base_rows = calculate_call_stats(base_stats)
     466          head_rows = calculate_call_stats(head_stats)
     467          rows = join_rows(base_rows, head_rows)
     468          rows.sort(key=lambda x: -float(x[-1][:-1]))
     469          emit_table(
     470              ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
     471              rows
     472          )
     473  
     474  def calculate_object_stats(stats):
     475      total_materializations = stats.get("Object new values")
     476      total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
     477      total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
     478      total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
     479      rows = []
     480      for key, value in stats.items():
     481          if key.startswith("Object"):
     482              if "materialize" in key:
     483                  ratio = format_ratio(value, total_materializations)
     484              elif "allocations" in key:
     485                  ratio = format_ratio(value, total_allocations)
     486              elif "increfs"     in key:
     487                  ratio = format_ratio(value, total_increfs)
     488              elif "decrefs"     in key:
     489                  ratio = format_ratio(value, total_decrefs)
     490              else:
     491                  ratio = ""
     492              label = key[6:].strip()
     493              label = label[0].upper() + label[1:]
     494              rows.append((label, value, ratio))
     495      return rows
     496  
     497  def emit_object_stats(stats):
     498      with Section("Object stats", summary="allocations, frees and dict materializatons"):
     499          rows = calculate_object_stats(stats)
     500          emit_table(("",  "Count:", "Ratio:"), rows)
     501  
     502  def emit_comparative_object_stats(base_stats, head_stats):
     503      with Section("Object stats", summary="allocations, frees and dict materializatons"):
     504          base_rows = calculate_object_stats(base_stats)
     505          head_rows = calculate_object_stats(head_stats)
     506          emit_table(("",  "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
     507  
     508  def get_total(opcode_stats):
     509      total = 0
     510      for opcode_stat in opcode_stats:
     511          if "execution_count" in opcode_stat:
     512              total += opcode_stat['execution_count']
     513      return total
     514  
     515  def emit_pair_counts(opcode_stats, total):
     516      pair_counts = []
     517      for i, opcode_stat in enumerate(opcode_stats):
     518          if i == 0:
     519              continue
     520          for key, value in opcode_stat.items():
     521              if key.startswith("pair_count"):
     522                  x, _, _ = key[11:].partition("]")
     523                  if value:
     524                      pair_counts.append((value, (i, int(x))))
     525      with Section("Pair counts", summary="Pair counts for top 100 pairs"):
     526          pair_counts.sort(reverse=True)
     527          cumulative = 0
     528          rows = []
     529          for (count, pair) in itertools.islice(pair_counts, 100):
     530              i, j = pair
     531              cumulative += count
     532              rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
     533                           format_ratio(cumulative, total)))
     534          emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
     535              rows
     536          )
     537      with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
     538          predecessors = collections.defaultdict(collections.Counter)
     539          successors = collections.defaultdict(collections.Counter)
     540          total_predecessors = collections.Counter()
     541          total_successors = collections.Counter()
     542          for count, (first, second) in pair_counts:
     543              if count:
     544                  predecessors[second][first] = count
     545                  successors[first][second] = count
     546                  total_predecessors[second] += count
     547                  total_successors[first] += count
     548          for name, i in opmap.items():
     549              total1 = total_predecessors[i]
     550              total2 = total_successors[i]
     551              if total1 == 0 and total2 == 0:
     552                  continue
     553              pred_rows = succ_rows = ()
     554              if total1:
     555                  pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
     556                               for (pred, count) in predecessors[i].most_common(5)]
     557              if total2:
     558                  succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
     559                               for (succ, count) in successors[i].most_common(5)]
     560              with Section(name, 3, f"Successors and predecessors for {name}"):
     561                  emit_table(("Predecessors", "Count:", "Percentage:"),
     562                      pred_rows
     563                  )
     564                  emit_table(("Successors", "Count:", "Percentage:"),
     565                      succ_rows
     566                  )
     567  
     568  def output_single_stats(stats):
     569      opcode_stats = extract_opcode_stats(stats)
     570      total = get_total(opcode_stats)
     571      emit_execution_counts(opcode_stats, total)
     572      emit_pair_counts(opcode_stats, total)
     573      emit_specialization_stats(opcode_stats)
     574      emit_specialization_overview(opcode_stats, total)
     575      emit_call_stats(stats)
     576      emit_object_stats(stats)
     577      with Section("Meta stats", summary="Meta statistics"):
     578          emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
     579  
     580  
     581  def output_comparative_stats(base_stats, head_stats):
     582      base_opcode_stats = extract_opcode_stats(base_stats)
     583      base_total = get_total(base_opcode_stats)
     584  
     585      head_opcode_stats = extract_opcode_stats(head_stats)
     586      head_total = get_total(head_opcode_stats)
     587  
     588      emit_comparative_execution_counts(
     589          base_opcode_stats, base_total, head_opcode_stats, head_total
     590      )
     591      emit_comparative_specialization_stats(
     592          base_opcode_stats, head_opcode_stats
     593      )
     594      emit_comparative_specialization_overview(
     595          base_opcode_stats, base_total, head_opcode_stats, head_total
     596      )
     597      emit_comparative_call_stats(base_stats, head_stats)
     598      emit_comparative_object_stats(base_stats, head_stats)
     599  
     600  def output_stats(inputs, json_output=None):
     601      if len(inputs) == 1:
     602          stats = gather_stats(inputs[0])
     603          if json_output is not None:
     604              json.dump(stats, json_output)
     605          output_single_stats(stats)
     606      elif len(inputs) == 2:
     607          if json_output is not None:
     608              raise ValueError(
     609                  "Can not output to JSON when there are multiple inputs"
     610              )
     611  
     612          base_stats = gather_stats(inputs[0])
     613          head_stats = gather_stats(inputs[1])
     614          output_comparative_stats(base_stats, head_stats)
     615  
     616      print("---")
     617      print("Stats gathered on:", date.today())
     618  
     619  def main():
     620      parser = argparse.ArgumentParser(description="Summarize pystats results")
     621  
     622      parser.add_argument(
     623          "inputs",
     624          nargs="*",
     625          type=str,
     626          default=[DEFAULT_DIR],
     627          help=f"""
     628          Input source(s).
     629          For each entry, if a .json file, the output provided by --json-output from a previous run;
     630          if a directory, a directory containing raw pystats .txt files.
     631          If one source is provided, its stats are printed.
     632          If two sources are provided, comparative stats are printed.
     633          Default is {DEFAULT_DIR}.
     634          """
     635      )
     636  
     637      parser.add_argument(
     638          "--json-output",
     639          nargs="?",
     640          type=argparse.FileType("w"),
     641          help="Output complete raw results to the given JSON file."
     642      )
     643  
     644      args = parser.parse_args()
     645  
     646      if len(args.inputs) > 2:
     647          raise ValueError("0-2 arguments may be provided.")
     648  
     649      output_stats(args.inputs, json_output=args.json_output)
     650  
     651  if __name__ == "__main__":
     652      main()