1  #! /usr/bin/env python3
       2  #
       3  # Class for profiling python code. rev 1.0  6/2/94
       4  #
       5  # Written by James Roskind
       6  # Based on prior profile module by Sjoerd Mullender...
       7  #   which was hacked somewhat by: Guido van Rossum
       8  
       9  """Class for profiling Python code."""
      10  
      11  # Copyright Disney Enterprises, Inc.  All Rights Reserved.
      12  # Licensed to PSF under a Contributor Agreement
      13  #
      14  # Licensed under the Apache License, Version 2.0 (the "License");
      15  # you may not use this file except in compliance with the License.
      16  # You may obtain a copy of the License at
      17  #
      18  # http://www.apache.org/licenses/LICENSE-2.0
      19  #
      20  # Unless required by applicable law or agreed to in writing, software
      21  # distributed under the License is distributed on an "AS IS" BASIS,
      22  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
      23  # either express or implied.  See the License for the specific language
      24  # governing permissions and limitations under the License.
      25  
      26  
      27  import importlib.machinery
      28  import io
      29  import sys
      30  import time
      31  import marshal
      32  
      33  __all__ = ["run", "runctx", "Profile"]
      34  
      35  # Sample timer for use with
      36  #i_count = 0
      37  #def integer_timer():
      38  #       global i_count
      39  #       i_count = i_count + 1
      40  #       return i_count
      41  #itimes = integer_timer # replace with C coded timer returning integers
      42  
      43  class ESC[4;38;5;81m_Utils:
      44      """Support class for utility functions which are shared by
      45      profile.py and cProfile.py modules.
      46      Not supposed to be used directly.
      47      """
      48  
      49      def __init__(self, profiler):
      50          self.profiler = profiler
      51  
      52      def run(self, statement, filename, sort):
      53          prof = self.profiler()
      54          try:
      55              prof.run(statement)
      56          except SystemExit:
      57              pass
      58          finally:
      59              self._show(prof, filename, sort)
      60  
      61      def runctx(self, statement, globals, locals, filename, sort):
      62          prof = self.profiler()
      63          try:
      64              prof.runctx(statement, globals, locals)
      65          except SystemExit:
      66              pass
      67          finally:
      68              self._show(prof, filename, sort)
      69  
      70      def _show(self, prof, filename, sort):
      71          if filename is not None:
      72              prof.dump_stats(filename)
      73          else:
      74              prof.print_stats(sort)
      75  
      76  
      77  #**************************************************************************
      78  # The following are the static member functions for the profiler class
      79  # Note that an instance of Profile() is *not* needed to call them.
      80  #**************************************************************************
      81  
      82  def run(statement, filename=None, sort=-1):
      83      """Run statement under profiler optionally saving results in filename
      84  
      85      This function takes a single argument that can be passed to the
      86      "exec" statement, and an optional file name.  In all cases this
      87      routine attempts to "exec" its first argument and gather profiling
      88      statistics from the execution. If no file name is present, then this
      89      function automatically prints a simple profiling report, sorted by the
      90      standard name string (file/line/function-name) that is presented in
      91      each line.
      92      """
      93      return _Utils(Profile).run(statement, filename, sort)
      94  
      95  def runctx(statement, globals, locals, filename=None, sort=-1):
      96      """Run statement under profiler, supplying your own globals and locals,
      97      optionally saving results in filename.
      98  
      99      statement and filename have the same semantics as profile.run
     100      """
     101      return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
     102  
     103  
     104  class ESC[4;38;5;81mProfile:
     105      """Profiler class.
     106  
     107      self.cur is always a tuple.  Each such tuple corresponds to a stack
     108      frame that is currently active (self.cur[-2]).  The following are the
     109      definitions of its members.  We use this external "parallel stack" to
     110      avoid contaminating the program that we are profiling. (old profiler
     111      used to write into the frames local dictionary!!) Derived classes
     112      can change the definition of some entries, as long as they leave
     113      [-2:] intact (frame and previous tuple).  In case an internal error is
     114      detected, the -3 element is used as the function name.
     115  
     116      [ 0] = Time that needs to be charged to the parent frame's function.
     117             It is used so that a function call will not have to access the
     118             timing data for the parent frame.
     119      [ 1] = Total time spent in this frame's function, excluding time in
     120             subfunctions (this latter is tallied in cur[2]).
     121      [ 2] = Total time spent in subfunctions, excluding time executing the
     122             frame's function (this latter is tallied in cur[1]).
     123      [-3] = Name of the function that corresponds to this frame.
     124      [-2] = Actual frame that we correspond to (used to sync exception handling).
     125      [-1] = Our parent 6-tuple (corresponds to frame.f_back).
     126  
     127      Timing data for each function is stored as a 5-tuple in the dictionary
     128      self.timings[].  The index is always the name stored in self.cur[-3].
     129      The following are the definitions of the members:
     130  
     131      [0] = The number of times this function was called, not counting direct
     132            or indirect recursion,
     133      [1] = Number of times this function appears on the stack, minus one
     134      [2] = Total time spent internal to this function
     135      [3] = Cumulative time that this function was present on the stack.  In
     136            non-recursive functions, this is the total execution time from start
     137            to finish of each invocation of a function, including time spent in
     138            all subfunctions.
     139      [4] = A dictionary indicating for each function name, the number of times
     140            it was called by us.
     141      """
     142  
     143      bias = 0  # calibration constant
     144  
     145      def __init__(self, timer=None, bias=None):
     146          self.timings = {}
     147          self.cur = None
     148          self.cmd = ""
     149          self.c_func_name = ""
     150  
     151          if bias is None:
     152              bias = self.bias
     153          self.bias = bias     # Materialize in local dict for lookup speed.
     154  
     155          if not timer:
     156              self.timer = self.get_time = time.process_time
     157              self.dispatcher = self.trace_dispatch_i
     158          else:
     159              self.timer = timer
     160              t = self.timer() # test out timer function
     161              try:
     162                  length = len(t)
     163              except TypeError:
     164                  self.get_time = timer
     165                  self.dispatcher = self.trace_dispatch_i
     166              else:
     167                  if length == 2:
     168                      self.dispatcher = self.trace_dispatch
     169                  else:
     170                      self.dispatcher = self.trace_dispatch_l
     171                  # This get_time() implementation needs to be defined
     172                  # here to capture the passed-in timer in the parameter
     173                  # list (for performance).  Note that we can't assume
     174                  # the timer() result contains two values in all
     175                  # cases.
     176                  def get_time_timer(timer=timer, sum=sum):
     177                      return sum(timer())
     178                  self.get_time = get_time_timer
     179          self.t = self.get_time()
     180          self.simulate_call('profiler')
     181  
     182      # Heavily optimized dispatch routine for time.process_time() timer
     183  
     184      def trace_dispatch(self, frame, event, arg):
     185          timer = self.timer
     186          t = timer()
     187          t = t[0] + t[1] - self.t - self.bias
     188  
     189          if event == "c_call":
     190              self.c_func_name = arg.__name__
     191  
     192          if self.dispatch[event](self, frame,t):
     193              t = timer()
     194              self.t = t[0] + t[1]
     195          else:
     196              r = timer()
     197              self.t = r[0] + r[1] - t # put back unrecorded delta
     198  
     199      # Dispatch routine for best timer program (return = scalar, fastest if
     200      # an integer but float works too -- and time.process_time() relies on that).
     201  
     202      def trace_dispatch_i(self, frame, event, arg):
     203          timer = self.timer
     204          t = timer() - self.t - self.bias
     205  
     206          if event == "c_call":
     207              self.c_func_name = arg.__name__
     208  
     209          if self.dispatch[event](self, frame, t):
     210              self.t = timer()
     211          else:
     212              self.t = timer() - t  # put back unrecorded delta
     213  
     214      # Dispatch routine for macintosh (timer returns time in ticks of
     215      # 1/60th second)
     216  
     217      def trace_dispatch_mac(self, frame, event, arg):
     218          timer = self.timer
     219          t = timer()/60.0 - self.t - self.bias
     220  
     221          if event == "c_call":
     222              self.c_func_name = arg.__name__
     223  
     224          if self.dispatch[event](self, frame, t):
     225              self.t = timer()/60.0
     226          else:
     227              self.t = timer()/60.0 - t  # put back unrecorded delta
     228  
     229      # SLOW generic dispatch routine for timer returning lists of numbers
     230  
     231      def trace_dispatch_l(self, frame, event, arg):
     232          get_time = self.get_time
     233          t = get_time() - self.t - self.bias
     234  
     235          if event == "c_call":
     236              self.c_func_name = arg.__name__
     237  
     238          if self.dispatch[event](self, frame, t):
     239              self.t = get_time()
     240          else:
     241              self.t = get_time() - t # put back unrecorded delta
     242  
     243      # In the event handlers, the first 3 elements of self.cur are unpacked
     244      # into vrbls w/ 3-letter names.  The last two characters are meant to be
     245      # mnemonic:
     246      #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
     247      #     _it  self.cur[1] "internal time" time spent directly in the function
     248      #     _et  self.cur[2] "external time" time spent in subfunctions
     249  
     250      def trace_dispatch_exception(self, frame, t):
     251          rpt, rit, ret, rfn, rframe, rcur = self.cur
     252          if (rframe is not frame) and rcur:
     253              return self.trace_dispatch_return(rframe, t)
     254          self.cur = rpt, rit+t, ret, rfn, rframe, rcur
     255          return 1
     256  
     257  
     258      def trace_dispatch_call(self, frame, t):
     259          if self.cur and frame.f_back is not self.cur[-2]:
     260              rpt, rit, ret, rfn, rframe, rcur = self.cur
     261              if not isinstance(rframe, Profile.fake_frame):
     262                  assert rframe.f_back is frame.f_back, ("Bad call", rfn,
     263                                                         rframe, rframe.f_back,
     264                                                         frame, frame.f_back)
     265                  self.trace_dispatch_return(rframe, 0)
     266                  assert (self.cur is None or \
     267                          frame.f_back is self.cur[-2]), ("Bad call",
     268                                                          self.cur[-3])
     269          fcode = frame.f_code
     270          fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
     271          self.cur = (t, 0, 0, fn, frame, self.cur)
     272          timings = self.timings
     273          if fn in timings:
     274              cc, ns, tt, ct, callers = timings[fn]
     275              timings[fn] = cc, ns + 1, tt, ct, callers
     276          else:
     277              timings[fn] = 0, 0, 0, 0, {}
     278          return 1
     279  
     280      def trace_dispatch_c_call (self, frame, t):
     281          fn = ("", 0, self.c_func_name)
     282          self.cur = (t, 0, 0, fn, frame, self.cur)
     283          timings = self.timings
     284          if fn in timings:
     285              cc, ns, tt, ct, callers = timings[fn]
     286              timings[fn] = cc, ns+1, tt, ct, callers
     287          else:
     288              timings[fn] = 0, 0, 0, 0, {}
     289          return 1
     290  
     291      def trace_dispatch_return(self, frame, t):
     292          if frame is not self.cur[-2]:
     293              assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
     294              self.trace_dispatch_return(self.cur[-2], 0)
     295  
     296          # Prefix "r" means part of the Returning or exiting frame.
     297          # Prefix "p" means part of the Previous or Parent or older frame.
     298  
     299          rpt, rit, ret, rfn, frame, rcur = self.cur
     300          rit = rit + t
     301          frame_total = rit + ret
     302  
     303          ppt, pit, pet, pfn, pframe, pcur = rcur
     304          self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
     305  
     306          timings = self.timings
     307          cc, ns, tt, ct, callers = timings[rfn]
     308          if not ns:
     309              # This is the only occurrence of the function on the stack.
     310              # Else this is a (directly or indirectly) recursive call, and
     311              # its cumulative time will get updated when the topmost call to
     312              # it returns.
     313              ct = ct + frame_total
     314              cc = cc + 1
     315  
     316          if pfn in callers:
     317              callers[pfn] = callers[pfn] + 1  # hack: gather more
     318              # stats such as the amount of time added to ct courtesy
     319              # of this specific call, and the contribution to cc
     320              # courtesy of this call.
     321          else:
     322              callers[pfn] = 1
     323  
     324          timings[rfn] = cc, ns - 1, tt + rit, ct, callers
     325  
     326          return 1
     327  
     328  
     329      dispatch = {
     330          "call": trace_dispatch_call,
     331          "exception": trace_dispatch_exception,
     332          "return": trace_dispatch_return,
     333          "c_call": trace_dispatch_c_call,
     334          "c_exception": trace_dispatch_return,  # the C function returned
     335          "c_return": trace_dispatch_return,
     336          }
     337  
     338  
     339      # The next few functions play with self.cmd. By carefully preloading
     340      # our parallel stack, we can force the profiled result to include
     341      # an arbitrary string as the name of the calling function.
     342      # We use self.cmd as that string, and the resulting stats look
     343      # very nice :-).
     344  
     345      def set_cmd(self, cmd):
     346          if self.cur[-1]: return   # already set
     347          self.cmd = cmd
     348          self.simulate_call(cmd)
     349  
     350      class ESC[4;38;5;81mfake_code:
     351          def __init__(self, filename, line, name):
     352              self.co_filename = filename
     353              self.co_line = line
     354              self.co_name = name
     355              self.co_firstlineno = 0
     356  
     357          def __repr__(self):
     358              return repr((self.co_filename, self.co_line, self.co_name))
     359  
     360      class ESC[4;38;5;81mfake_frame:
     361          def __init__(self, code, prior):
     362              self.f_code = code
     363              self.f_back = prior
     364  
     365      def simulate_call(self, name):
     366          code = self.fake_code('profile', 0, name)
     367          if self.cur:
     368              pframe = self.cur[-2]
     369          else:
     370              pframe = None
     371          frame = self.fake_frame(code, pframe)
     372          self.dispatch['call'](self, frame, 0)
     373  
     374      # collect stats from pending stack, including getting final
     375      # timings for self.cmd frame.
     376  
     377      def simulate_cmd_complete(self):
     378          get_time = self.get_time
     379          t = get_time() - self.t
     380          while self.cur[-1]:
     381              # We *can* cause assertion errors here if
     382              # dispatch_trace_return checks for a frame match!
     383              self.dispatch['return'](self, self.cur[-2], t)
     384              t = 0
     385          self.t = get_time() - t
     386  
     387  
     388      def print_stats(self, sort=-1):
     389          import pstats
     390          pstats.Stats(self).strip_dirs().sort_stats(sort). \
     391                    print_stats()
     392  
     393      def dump_stats(self, file):
     394          with open(file, 'wb') as f:
     395              self.create_stats()
     396              marshal.dump(self.stats, f)
     397  
     398      def create_stats(self):
     399          self.simulate_cmd_complete()
     400          self.snapshot_stats()
     401  
     402      def snapshot_stats(self):
     403          self.stats = {}
     404          for func, (cc, ns, tt, ct, callers) in self.timings.items():
     405              callers = callers.copy()
     406              nc = 0
     407              for callcnt in callers.values():
     408                  nc += callcnt
     409              self.stats[func] = cc, nc, tt, ct, callers
     410  
     411  
     412      # The following two methods can be called by clients to use
     413      # a profiler to profile a statement, given as a string.
     414  
     415      def run(self, cmd):
     416          import __main__
     417          dict = __main__.__dict__
     418          return self.runctx(cmd, dict, dict)
     419  
     420      def runctx(self, cmd, globals, locals):
     421          self.set_cmd(cmd)
     422          sys.setprofile(self.dispatcher)
     423          try:
     424              exec(cmd, globals, locals)
     425          finally:
     426              sys.setprofile(None)
     427          return self
     428  
     429      # This method is more useful to profile a single function call.
     430      def runcall(self, func, /, *args, **kw):
     431          self.set_cmd(repr(func))
     432          sys.setprofile(self.dispatcher)
     433          try:
     434              return func(*args, **kw)
     435          finally:
     436              sys.setprofile(None)
     437  
     438  
     439      #******************************************************************
     440      # The following calculates the overhead for using a profiler.  The
     441      # problem is that it takes a fair amount of time for the profiler
     442      # to stop the stopwatch (from the time it receives an event).
     443      # Similarly, there is a delay from the time that the profiler
     444      # re-starts the stopwatch before the user's code really gets to
     445      # continue.  The following code tries to measure the difference on
     446      # a per-event basis.
     447      #
     448      # Note that this difference is only significant if there are a lot of
     449      # events, and relatively little user code per event.  For example,
     450      # code with small functions will typically benefit from having the
     451      # profiler calibrated for the current platform.  This *could* be
     452      # done on the fly during init() time, but it is not worth the
     453      # effort.  Also note that if too large a value specified, then
     454      # execution time on some functions will actually appear as a
     455      # negative number.  It is *normal* for some functions (with very
     456      # low call counts) to have such negative stats, even if the
     457      # calibration figure is "correct."
     458      #
     459      # One alternative to profile-time calibration adjustments (i.e.,
     460      # adding in the magic little delta during each event) is to track
     461      # more carefully the number of events (and cumulatively, the number
     462      # of events during sub functions) that are seen.  If this were
     463      # done, then the arithmetic could be done after the fact (i.e., at
     464      # display time).  Currently, we track only call/return events.
     465      # These values can be deduced by examining the callees and callers
     466      # vectors for each functions.  Hence we *can* almost correct the
     467      # internal time figure at print time (note that we currently don't
     468      # track exception event processing counts).  Unfortunately, there
     469      # is currently no similar information for cumulative sub-function
     470      # time.  It would not be hard to "get all this info" at profiler
     471      # time.  Specifically, we would have to extend the tuples to keep
     472      # counts of this in each frame, and then extend the defs of timing
     473      # tuples to include the significant two figures. I'm a bit fearful
     474      # that this additional feature will slow the heavily optimized
     475      # event/time ratio (i.e., the profiler would run slower, fur a very
     476      # low "value added" feature.)
     477      #**************************************************************
     478  
     479      def calibrate(self, m, verbose=0):
     480          if self.__class__ is not Profile:
     481              raise TypeError("Subclasses must override .calibrate().")
     482  
     483          saved_bias = self.bias
     484          self.bias = 0
     485          try:
     486              return self._calibrate_inner(m, verbose)
     487          finally:
     488              self.bias = saved_bias
     489  
     490      def _calibrate_inner(self, m, verbose):
     491          get_time = self.get_time
     492  
     493          # Set up a test case to be run with and without profiling.  Include
     494          # lots of calls, because we're trying to quantify stopwatch overhead.
     495          # Do not raise any exceptions, though, because we want to know
     496          # exactly how many profile events are generated (one call event, +
     497          # one return event, per Python-level call).
     498  
     499          def f1(n):
     500              for i in range(n):
     501                  x = 1
     502  
     503          def f(m, f1=f1):
     504              for i in range(m):
     505                  f1(100)
     506  
     507          f(m)    # warm up the cache
     508  
     509          # elapsed_noprofile <- time f(m) takes without profiling.
     510          t0 = get_time()
     511          f(m)
     512          t1 = get_time()
     513          elapsed_noprofile = t1 - t0
     514          if verbose:
     515              print("elapsed time without profiling =", elapsed_noprofile)
     516  
     517          # elapsed_profile <- time f(m) takes with profiling.  The difference
     518          # is profiling overhead, only some of which the profiler subtracts
     519          # out on its own.
     520          p = Profile()
     521          t0 = get_time()
     522          p.runctx('f(m)', globals(), locals())
     523          t1 = get_time()
     524          elapsed_profile = t1 - t0
     525          if verbose:
     526              print("elapsed time with profiling =", elapsed_profile)
     527  
     528          # reported_time <- "CPU seconds" the profiler charged to f and f1.
     529          total_calls = 0.0
     530          reported_time = 0.0
     531          for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
     532                  p.timings.items():
     533              if funcname in ("f", "f1"):
     534                  total_calls += cc
     535                  reported_time += tt
     536  
     537          if verbose:
     538              print("'CPU seconds' profiler reported =", reported_time)
     539              print("total # calls =", total_calls)
     540          if total_calls != m + 1:
     541              raise ValueError("internal error: total calls = %d" % total_calls)
     542  
     543          # reported_time - elapsed_noprofile = overhead the profiler wasn't
     544          # able to measure.  Divide by twice the number of calls (since there
     545          # are two profiler events per call in this test) to get the hidden
     546          # overhead per event.
     547          mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
     548          if verbose:
     549              print("mean stopwatch overhead per profile event =", mean)
     550          return mean
     551  
     552  #****************************************************************************
     553  
     554  def main():
     555      import os
     556      from optparse import OptionParser
     557  
     558      usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
     559      parser = OptionParser(usage=usage)
     560      parser.allow_interspersed_args = False
     561      parser.add_option('-o', '--outfile', dest="outfile",
     562          help="Save stats to <outfile>", default=None)
     563      parser.add_option('-m', dest="module", action="store_true",
     564          help="Profile a library module.", default=False)
     565      parser.add_option('-s', '--sort', dest="sort",
     566          help="Sort order when printing to stdout, based on pstats.Stats class",
     567          default=-1)
     568  
     569      if not sys.argv[1:]:
     570          parser.print_usage()
     571          sys.exit(2)
     572  
     573      (options, args) = parser.parse_args()
     574      sys.argv[:] = args
     575  
     576      # The script that we're profiling may chdir, so capture the absolute path
     577      # to the output file at startup.
     578      if options.outfile is not None:
     579          options.outfile = os.path.abspath(options.outfile)
     580  
     581      if len(args) > 0:
     582          if options.module:
     583              import runpy
     584              code = "run_module(modname, run_name='__main__')"
     585              globs = {
     586                  'run_module': runpy.run_module,
     587                  'modname': args[0]
     588              }
     589          else:
     590              progname = args[0]
     591              sys.path.insert(0, os.path.dirname(progname))
     592              with io.open_code(progname) as fp:
     593                  code = compile(fp.read(), progname, 'exec')
     594              spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
     595                                                    origin=progname)
     596              globs = {
     597                  '__spec__': spec,
     598                  '__file__': spec.origin,
     599                  '__name__': spec.name,
     600                  '__package__': None,
     601                  '__cached__': None,
     602              }
     603          try:
     604              runctx(code, globs, None, options.outfile, options.sort)
     605          except BrokenPipeError as exc:
     606              # Prevent "Exception ignored" during interpreter shutdown.
     607              sys.stdout = None
     608              sys.exit(exc.errno)
     609      else:
     610          parser.print_usage()
     611      return parser
     612  
     613  # When invoked as main program, invoke the profiler on a script
     614  if __name__ == '__main__':
     615      main()