1  #!/usr/bin/python
       2  # Copyright (C) 2015-2023 Free Software Foundation, Inc.
       3  # This file is part of the GNU C Library.
       4  #
       5  # The GNU C Library is free software; you can redistribute it and/or
       6  # modify it under the terms of the GNU Lesser General Public
       7  # License as published by the Free Software Foundation; either
       8  # version 2.1 of the License, or (at your option) any later version.
       9  #
      10  # The GNU C Library is distributed in the hope that it will be useful,
      11  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13  # Lesser General Public License for more details.
      14  #
      15  # You should have received a copy of the GNU Lesser General Public
      16  # License along with the GNU C Library; if not, see
      17  # <https://www.gnu.org/licenses/>.
      18  """Functions to import benchmark data and process it"""
      19  
      20  import json
      21  try:
      22      import jsonschema as validator
      23  except ImportError:
      24      print('Could not find jsonschema module.')
      25      raise
      26  
      27  
      28  def mean(lst):
      29      """Compute and return mean of numbers in a list
      30  
      31      The numpy average function has horrible performance, so implement our
      32      own mean function.
      33  
      34      Args:
      35          lst: The list of numbers to average.
      36      Return:
      37          The mean of members in the list.
      38      """
      39      return sum(lst) / len(lst)
      40  
      41  
      42  def split_list(bench, func, var):
      43      """ Split the list into a smaller set of more distinct points
      44  
      45      Group together points such that the difference between the smallest
      46      point and the mean is less than 1/3rd of the mean.  This means that
      47      the mean is at most 1.5x the smallest member of that group.
      48  
      49      mean - xmin < mean / 3
      50      i.e. 2 * mean / 3 < xmin
      51      i.e. mean < 3 * xmin / 2
      52  
      53      For an evenly distributed group, the largest member will be less than
      54      twice the smallest member of the group.
      55      Derivation:
      56  
      57      An evenly distributed series would be xmin, xmin + d, xmin + 2d...
      58  
      59      mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
      60      and max element is xmin + (n - 1) * d
      61  
      62      Now, mean < 3 * xmin / 2
      63  
      64      3 * xmin > 2 * mean
      65      3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
      66      3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
      67      n * xmin > n * (n - 1) * d
      68      xmin > (n - 1) * d
      69      2 * xmin > xmin + (n-1) * d
      70      2 * xmin > xmax
      71  
      72      Hence, proved.
      73  
      74      Similarly, it is trivial to prove that for a similar aggregation by using
      75      the maximum element, the maximum element in the group must be at most 4/3
      76      times the mean.
      77  
      78      Args:
      79          bench: The benchmark object
      80          func: The function name
      81          var: The function variant name
      82      """
      83      means = []
      84      lst = bench['functions'][func][var]['timings']
      85      last = len(lst) - 1
      86      while lst:
      87          for i in range(last + 1):
      88              avg = mean(lst[i:])
      89              if avg > 0.75 * lst[last]:
      90                  means.insert(0, avg)
      91                  lst = lst[:i]
      92                  last = i - 1
      93                  break
      94      bench['functions'][func][var]['timings'] = means
      95  
      96  
      97  def do_for_all_timings(bench, callback):
      98      """Call a function for all timing objects for each function and its
      99      variants.
     100  
     101      Args:
     102          bench: The benchmark object
     103          callback: The callback function
     104      """
     105      for func in bench['functions'].keys():
     106          for k in bench['functions'][func].keys():
     107              if 'timings' not in bench['functions'][func][k].keys():
     108                  continue
     109  
     110              callback(bench, func, k)
     111  
     112  
     113  def compress_timings(points):
     114      """Club points with close enough values into a single mean value
     115  
     116      See split_list for details on how the clubbing is done.
     117  
     118      Args:
     119          points: The set of points.
     120      """
     121      do_for_all_timings(points, split_list)
     122  
     123  
     124  def parse_bench(filename, schema_filename):
     125      """Parse the input file
     126  
     127      Parse and validate the json file containing the benchmark outputs.  Return
     128      the resulting object.
     129      Args:
     130          filename: Name of the benchmark output file.
     131      Return:
     132          The bench dictionary.
     133      """
     134      with open(schema_filename, 'r') as schemafile:
     135          schema = json.load(schemafile)
     136          with open(filename, 'r') as benchfile:
     137              bench = json.load(benchfile)
     138              validator.validate(bench, schema)
     139              return bench