1  #!/usr/bin/python
       2  # Copyright (C) 2015-2023 Free Software Foundation, Inc.
       3  # This file is part of the GNU C Library.
       4  #
       5  # The GNU C Library is free software; you can redistribute it and/or
       6  # modify it under the terms of the GNU Lesser General Public
       7  # License as published by the Free Software Foundation; either
       8  # version 2.1 of the License, or (at your option) any later version.
       9  #
      10  # The GNU C Library is distributed in the hope that it will be useful,
      11  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13  # Lesser General Public License for more details.
      14  #
      15  # You should have received a copy of the GNU Lesser General Public
      16  # License along with the GNU C Library; if not, see
      17  # <https://www.gnu.org/licenses/>.
      18  """Compare two benchmark results
      19  
      20  Given two benchmark result files and a threshold, this script compares the
      21  benchmark results and flags differences in performance beyond a given
      22  threshold.
      23  """
      24  import sys
      25  import os
      26  import pylab
      27  import import_bench as bench
      28  import argparse
      29  
      30  def do_compare(func, var, tl1, tl2, par, threshold):
      31      """Compare one of the aggregate measurements
      32  
      33      Helper function to compare one of the aggregate measurements of a function
      34      variant.
      35  
      36      Args:
      37          func: Function name
      38          var: Function variant name
      39          tl1: The first timings list
      40          tl2: The second timings list
      41          par: The aggregate to measure
      42          threshold: The threshold for differences, beyond which the script should
      43          print a warning.
      44      """
      45      try:
      46          v1 = tl1[str(par)]
      47          v2 = tl2[str(par)]
      48          d = abs(v2 - v1) * 100 / v1
      49      except KeyError:
      50          sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par))
      51          return
      52      except ZeroDivisionError:
      53          return
      54  
      55      if d > threshold:
      56          if v1 > v2:
      57              ind = '+++'
      58          else:
      59              ind = '---'
      60          print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
      61                  (ind, func, var, par, d, v1, v2))
      62  
      63  
      64  def compare_runs(pts1, pts2, threshold, stats):
      65      """Compare two benchmark runs
      66  
      67      Args:
      68          pts1: Timing data from first machine
      69          pts2: Timing data from second machine
      70      """
      71  
      72      # XXX We assume that the two benchmarks have identical functions and
      73      # variants.  We cannot compare two benchmarks that may have different
      74      # functions or variants.  Maybe that is something for the future.
      75      for func in pts1['functions'].keys():
      76          for var in pts1['functions'][func].keys():
      77              tl1 = pts1['functions'][func][var]
      78              tl2 = pts2['functions'][func][var]
      79  
      80              # Compare the consolidated numbers
      81              # do_compare(func, var, tl1, tl2, 'max', threshold)
      82              for stat in stats.split():
      83                  do_compare(func, var, tl1, tl2, stat, threshold)
      84  
      85              # Skip over to the next variant or function if there is no detailed
      86              # timing info for the function variant.
      87              if 'timings' not in pts1['functions'][func][var].keys() or \
      88                  'timings' not in pts2['functions'][func][var].keys():
      89                  continue
      90  
      91              # If two lists do not have the same length then it is likely that
      92              # the performance characteristics of the function have changed.
      93              # XXX: It is also likely that there was some measurement that
      94              # strayed outside the usual range.  Such ouiers should not
      95              # happen on an idle machine with identical hardware and
      96              # configuration, but ideal environments are hard to come by.
      97              if len(tl1['timings']) != len(tl2['timings']):
      98                  print('* %s(%s): Timing characteristics changed' %
      99                          (func, var))
     100                  print('\tBefore: [%s]' %
     101                          ', '.join([str(x) for x in tl1['timings']]))
     102                  print('\tAfter: [%s]' %
     103                          ', '.join([str(x) for x in tl2['timings']]))
     104                  continue
     105  
     106              # Collect numbers whose differences cross the threshold we have
     107              # set.
     108              issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
     109                          if abs(y - x) * 100 / x > threshold]
     110  
     111              # Now print them.
     112              for t1, t2 in issues:
     113                  d = abs(t2 - t1) * 100 / t1
     114                  if t2 > t1:
     115                      ind = '-'
     116                  else:
     117                      ind = '+'
     118  
     119                  print("%s %s(%s): (%.2lf%%) from %g to %g" %
     120                          (ind, func, var, d, t1, t2))
     121  
     122  
     123  def plot_graphs(bench1, bench2):
     124      """Plot graphs for functions
     125  
     126      Make scatter plots for the functions and their variants.
     127  
     128      Args:
     129          bench1: Set of points from the first machine
     130          bench2: Set of points from the second machine.
     131      """
     132      for func in bench1['functions'].keys():
     133          for var in bench1['functions'][func].keys():
     134              # No point trying to print a graph if there are no detailed
     135              # timings.
     136              if u'timings' not in bench1['functions'][func][var].keys():
     137                  sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var))
     138                  continue
     139  
     140              pylab.clf()
     141              pylab.ylabel('Time (cycles)')
     142  
     143              # First set of points
     144              length = len(bench1['functions'][func][var]['timings'])
     145              X = [float(x) for x in range(length)]
     146              lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
     147                      1.5 + 100 / length)
     148              pylab.setp(lines, 'color', 'r')
     149  
     150              # Second set of points
     151              length = len(bench2['functions'][func][var]['timings'])
     152              X = [float(x) for x in range(length)]
     153              lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
     154                      1.5 + 100 / length)
     155              pylab.setp(lines, 'color', 'g')
     156  
     157              if var:
     158                  filename = "%s-%s.png" % (func, var)
     159              else:
     160                  filename = "%s.png" % func
     161              sys.stderr.write('Writing out %s' % filename)
     162              pylab.savefig(filename)
     163  
     164  def main(bench1, bench2, schema, threshold, stats):
     165      bench1 = bench.parse_bench(bench1, schema)
     166      bench.do_for_all_timings(bench1, lambda b, f, v:
     167          b['functions'][f][v]['timings'].sort())
     168      bench2 = bench.parse_bench(bench2, schema)
     169      bench.do_for_all_timings(bench2, lambda b, f, v:
     170          b['functions'][f][v]['timings'].sort())
     171  
     172      plot_graphs(bench1, bench2)
     173  
     174      bench.compress_timings(bench1)
     175      bench.compress_timings(bench2)
     176  
     177      compare_runs(bench1, bench2, threshold, stats)
     178  
     179  
     180  if __name__ == '__main__':
     181      parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')
     182  
     183      # Required parameters
     184      parser.add_argument('bench1', help='First bench to compare')
     185      parser.add_argument('bench2', help='Second bench to compare')
     186  
     187      # Optional parameters
     188      parser.add_argument('--schema',
     189                          default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
     190                          help='JSON file to validate source/dest files (default: %(default)s)')
     191      parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)')
     192      parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)')
     193  
     194      args = parser.parse_args()
     195  
     196      main(args.bench1, args.bench2, args.schema, args.threshold, args.stats)