1  #!/usr/bin/python3
       2  # Plot GNU C Library string microbenchmark output.
       3  # Copyright (C) 2019-2023 Free Software Foundation, Inc.
       4  # This file is part of the GNU C Library.
       5  #
       6  # The GNU C Library is free software; you can redistribute it and/or
       7  # modify it under the terms of the GNU Lesser General Public
       8  # License as published by the Free Software Foundation; either
       9  # version 2.1 of the License, or (at your option) any later version.
      10  #
      11  # The GNU C Library is distributed in the hope that it will be useful,
      12  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14  # Lesser General Public License for more details.
      15  #
      16  # You should have received a copy of the GNU Lesser General Public
      17  # License along with the GNU C Library; if not, see
      18  # <https://www.gnu.org/licenses/>.
      19  """Plot string microbenchmark results.
      20  
      21  Given a benchmark results file in JSON format and a benchmark schema file,
      22  plot the benchmark timings in one of the available representations.
      23  
      24  Separate figure is generated and saved to a file for each 'results' array
      25  found in the benchmark results file. Output filenames and plot titles
      26  are derived from the metadata found in the benchmark results file.
      27  """
      28  import argparse
      29  from collections import defaultdict
      30  import json
      31  import matplotlib as mpl
      32  import numpy as np
      33  import os
      34  import sys
      35  
      36  try:
      37      import jsonschema as validator
      38  except ImportError:
      39      print("Could not find jsonschema module.")
      40      raise
      41  
      42  # Use pre-selected markers for plotting lines to improve readability
      43  markers = [".", "x", "^", "+", "*", "v", "1", ">", "s"]
      44  
      45  # Benchmark variants for which the x-axis scale should be logarithmic
      46  log_variants = {"powers of 2"}
      47  
      48  
      49  def gmean(numbers):
      50      """Compute geometric mean.
      51  
      52      Args:
      53          numbers: 2-D list of numbers
      54      Return:
      55          numpy array with geometric means of numbers along each column
      56      """
      57      a = np.array(numbers, dtype=np.complex)
      58      means = a.prod(0) ** (1.0 / len(a))
      59      return np.real(means)
      60  
      61  
      62  def relativeDifference(x, x_reference):
      63      """Compute per-element relative difference between each row of
      64         a matrix and an array of reference values.
      65  
      66      Args:
      67          x: numpy matrix of shape (n, m)
      68          x_reference: numpy array of size m
      69      Return:
      70          relative difference between rows of x and x_reference (in %)
      71      """
      72      abs_diff = np.subtract(x, x_reference)
      73      return np.divide(np.multiply(abs_diff, 100.0), x_reference)
      74  
      75  
      76  def plotTime(timings, routine, bench_variant, title, outpath):
      77      """Plot absolute timing values.
      78  
      79      Args:
      80          timings: timings to plot
      81          routine: benchmarked string routine name
      82          bench_variant: top-level benchmark variant name
      83          title: figure title (generated so far)
      84          outpath: output file path (generated so far)
      85      Return:
      86          y: y-axis values to plot
      87          title_final: final figure title
      88          outpath_final: file output file path
      89      """
      90      y = timings
      91      plt.figure()
      92  
      93      if not args.values:
      94          plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
      95  
      96      plt.ylabel("timing")
      97      title_final = "%s %s benchmark timings\n%s" % \
      98                    (routine, bench_variant, title)
      99      outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
     100                      (routine, args.plot, bench_variant, outpath))
     101  
     102      return y, title_final, outpath_final
     103  
     104  
     105  def plotRelative(timings, all_timings, routine, ifuncs, bench_variant,
     106                   title, outpath):
     107      """Plot timing values relative to a chosen ifunc
     108  
     109      Args:
     110          timings: timings to plot
     111          all_timings: all collected timings
     112          routine: benchmarked string routine name
     113          ifuncs: names of ifuncs tested
     114          bench_variant: top-level benchmark variant name
     115          title: figure title (generated so far)
     116          outpath: output file path (generated so far)
     117      Return:
     118          y: y-axis values to plot
     119          title_final: final figure title
     120          outpath_final: file output file path
     121      """
     122      # Choose the baseline ifunc
     123      if args.baseline:
     124          baseline = args.baseline.replace("__", "")
     125      else:
     126          baseline = ifuncs[0]
     127  
     128      baseline_index = ifuncs.index(baseline)
     129  
     130      # Compare timings against the baseline
     131      y = relativeDifference(timings, all_timings[baseline_index])
     132  
     133      plt.figure()
     134      plt.axhspan(-args.threshold, args.threshold, color="lightgray", alpha=0.3)
     135      plt.axhline(0, color="k", linestyle="--", linewidth=0.4)
     136      plt.ylabel("relative timing (in %)")
     137      title_final = "Timing comparison against %s\nfor %s benchmark, %s" % \
     138                    (baseline, bench_variant, title)
     139      outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
     140                      (baseline, args.plot, bench_variant, outpath))
     141  
     142      return y, title_final, outpath_final
     143  
     144  
     145  def plotMax(timings, routine, bench_variant, title, outpath):
     146      """Plot results as percentage of the maximum ifunc performance.
     147  
     148      The optimal ifunc is computed on a per-parameter-value basis.
     149      Performance is computed as 1/timing.
     150  
     151      Args:
     152          timings: timings to plot
     153          routine: benchmarked string routine name
     154          bench_variant: top-level benchmark variant name
     155          title: figure title (generated so far)
     156          outpath: output file path (generated so far)
     157      Return:
     158          y: y-axis values to plot
     159          title_final: final figure title
     160          outpath_final: file output file path
     161      """
     162      perf = np.reciprocal(timings)
     163      max_perf = np.max(perf, axis=0)
     164      y = np.add(100.0, relativeDifference(perf, max_perf))
     165  
     166      plt.figure()
     167      plt.axhline(100.0, color="k", linestyle="--", linewidth=0.4)
     168      plt.ylabel("1/timing relative to max (in %)")
     169      title_final = "Performance comparison against max for %s\n%s " \
     170                    "benchmark, %s" % (routine, bench_variant, title)
     171      outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
     172                      (routine, args.plot, bench_variant, outpath))
     173  
     174      return y, title_final, outpath_final
     175  
     176  
     177  def plotThroughput(timings, params, routine, bench_variant, title, outpath):
     178      """Plot throughput.
     179  
     180      Throughput is computed as the varied parameter value over timing.
     181  
     182      Args:
     183          timings: timings to plot
     184          params: varied parameter values
     185          routine: benchmarked string routine name
     186          bench_variant: top-level benchmark variant name
     187          title: figure title (generated so far)
     188          outpath: output file path (generated so far)
     189      Return:
     190          y: y-axis values to plot
     191          title_final: final figure title
     192          outpath_final: file output file path
     193      """
     194      y = np.divide(params, timings)
     195      plt.figure()
     196  
     197      if not args.values:
     198          plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
     199  
     200      plt.ylabel("%s / timing" % args.key)
     201      title_final = "%s %s benchmark throughput results\n%s" % \
     202                    (routine, bench_variant, title)
     203      outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
     204                      (routine, args.plot, bench_variant, outpath))
     205      return y, title_final, outpath_final
     206  
     207  
     208  def finishPlot(x, y, title, outpath, x_scale, plotted_ifuncs):
     209      """Finish generating current Figure.
     210  
     211      Args:
     212          x: x-axis values
     213          y: y-axis values
     214          title: figure title
     215          outpath: output file path
     216          x_scale: x-axis scale
     217          plotted_ifuncs: names of ifuncs to plot
     218      """
     219      plt.xlabel(args.key)
     220      plt.xscale(x_scale)
     221      plt.title(title)
     222  
     223      plt.grid(color="k", linestyle=args.grid, linewidth=0.5, alpha=0.5)
     224  
     225      for i in range(len(plotted_ifuncs)):
     226          plt.plot(x, y[i], marker=markers[i % len(markers)],
     227                   label=plotted_ifuncs[i])
     228  
     229      plt.legend(loc="best", fontsize="small")
     230      plt.savefig("%s_%s.%s" % (outpath, x_scale, args.extension),
     231                  format=args.extension, dpi=args.resolution)
     232  
     233      if args.display:
     234          plt.show()
     235  
     236      plt.close()
     237  
     238  
     239  def plotRecursive(json_iter, routine, ifuncs, bench_variant, title, outpath,
     240                    x_scale):
     241      """Plot benchmark timings.
     242  
     243      Args:
     244          json_iter: reference to json object
     245          routine: benchmarked string routine name
     246          ifuncs: names of ifuncs tested
     247          bench_variant: top-level benchmark variant name
     248          title: figure's title (generated so far)
     249          outpath: output file path (generated so far)
     250          x_scale: x-axis scale
     251      """
     252  
     253      # RECURSIVE CASE: 'variants' array found
     254      if "variants" in json_iter:
     255          # Continue recursive search for 'results' array. Record the
     256          # benchmark variant (configuration) in order to customize
     257          # the title, filename and X-axis scale for the generated figure.
     258          for variant in json_iter["variants"]:
     259              new_title = "%s%s, " % (title, variant["name"])
     260              new_outpath = "%s_%s" % (outpath, variant["name"].replace(" ", "_"))
     261              new_x_scale = "log" if variant["name"] in log_variants else x_scale
     262  
     263              plotRecursive(variant, routine, ifuncs, bench_variant, new_title,
     264                            new_outpath, new_x_scale)
     265          return
     266  
     267      # BASE CASE: 'results' array found
     268      domain = []
     269      timings = defaultdict(list)
     270  
     271      # Collect timings
     272      for result in json_iter["results"]:
     273          domain.append(result[args.key])
     274          timings[result[args.key]].append(result["timings"])
     275  
     276      domain = np.unique(np.array(domain))
     277      averages = []
     278  
     279      # Compute geometric mean if there are multiple timings for each
     280      # parameter value.
     281      for parameter in domain:
     282          averages.append(gmean(timings[parameter]))
     283  
     284      averages = np.array(averages).transpose()
     285  
     286      # Choose ifuncs to plot
     287      if isinstance(args.ifuncs, str):
     288          plotted_ifuncs = ifuncs
     289      else:
     290          plotted_ifuncs = [x.replace("__", "") for x in args.ifuncs]
     291  
     292      plotted_indices = [ifuncs.index(x) for x in plotted_ifuncs]
     293      plotted_vals = averages[plotted_indices,:]
     294  
     295      # Plotting logic specific to each plot type
     296      if args.plot == "time":
     297          codomain, title, outpath = plotTime(plotted_vals, routine,
     298                                     bench_variant, title, outpath)
     299      elif args.plot == "rel":
     300          codomain, title, outpath = plotRelative(plotted_vals, averages, routine,
     301                                     ifuncs, bench_variant, title, outpath)
     302      elif args.plot == "max":
     303          codomain, title, outpath = plotMax(plotted_vals, routine,
     304                                     bench_variant, title, outpath)
     305      elif args.plot == "thru":
     306          codomain, title, outpath = plotThroughput(plotted_vals, domain, routine,
     307                                     bench_variant, title, outpath)
     308  
     309      # Plotting logic shared between plot types
     310      finishPlot(domain, codomain, title, outpath, x_scale, plotted_ifuncs)
     311  
     312  
     313  def main(args):
     314      """Program Entry Point.
     315  
     316      Args:
     317        args: command line arguments (excluding program name)
     318      """
     319  
     320      # Select non-GUI matplotlib backend if interactive display is disabled
     321      if not args.display:
     322          mpl.use("Agg")
     323  
     324      global plt
     325      import matplotlib.pyplot as plt
     326  
     327      schema = None
     328  
     329      with open(args.schema, "r") as f:
     330          schema = json.load(f)
     331  
     332      for filename in args.bench:
     333          bench = None
     334  
     335          if filename == '-':
     336              bench = json.load(sys.stdin)
     337          else:
     338              with open(filename, "r") as f:
     339                  bench = json.load(f)
     340  
     341          validator.validate(bench, schema)
     342  
     343          for function in bench["functions"]:
     344              bench_variant = bench["functions"][function]["bench-variant"]
     345              ifuncs = bench["functions"][function]["ifuncs"]
     346              ifuncs = [x.replace("__", "") for x in ifuncs]
     347  
     348              plotRecursive(bench["functions"][function], function, ifuncs,
     349                            bench_variant, "", "", args.logarithmic)
     350  
     351  
     352  """ main() """
     353  if __name__ == "__main__":
     354  
     355      parser = argparse.ArgumentParser(description=
     356              "Plot string microbenchmark results",
     357              formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     358  
     359      # Required parameter
     360      parser.add_argument("bench", nargs="+",
     361                          help="benchmark results file(s) in json format, " \
     362                          "and/or '-' as a benchmark result file from stdin")
     363  
     364      # Optional parameters
     365      parser.add_argument("-b", "--baseline", type=str,
     366                          help="baseline ifunc for 'rel' plot")
     367      parser.add_argument("-d", "--display", action="store_true",
     368                          help="display figures")
     369      parser.add_argument("-e", "--extension", type=str, default="png",
     370                          choices=["png", "pdf", "svg"],
     371                          help="output file(s) extension")
     372      parser.add_argument("-g", "--grid", action="store_const", default="",
     373                          const="-", help="show grid lines")
     374      parser.add_argument("-i", "--ifuncs", nargs="+", default="all",
     375                          help="ifuncs to plot")
     376      parser.add_argument("-k", "--key", type=str, default="length",
     377                          help="key to access the varied parameter")
     378      parser.add_argument("-l", "--logarithmic", action="store_const",
     379                          default="linear", const="log",
     380                          help="use logarithmic x-axis scale")
     381      parser.add_argument("-o", "--outdir", type=str, default=os.getcwd(),
     382                          help="output directory")
     383      parser.add_argument("-p", "--plot", type=str, default="time",
     384                          choices=["time", "rel", "max", "thru"],
     385                          help="plot absolute timings, relative timings, " \
     386                          "performance relative to max, or throughput")
     387      parser.add_argument("-r", "--resolution", type=int, default=100,
     388                          help="dpi resolution for the generated figures")
     389      parser.add_argument("-s", "--schema", type=str,
     390                          default=os.path.join(os.path.dirname(
     391                          os.path.realpath(__file__)),
     392                          "benchout_strings.schema.json"),
     393                          help="schema file to validate the results file.")
     394      parser.add_argument("-t", "--threshold", type=int, default=5,
     395                          help="threshold to mark in 'rel' graph (in %%)")
     396      parser.add_argument("-v", "--values", action="store_true",
     397                          help="show actual values")
     398  
     399      args = parser.parse_args()
     400      main(args)