1  #! /usr/bin/env python3
       2  #
       3  # Class for profiling python code. rev 1.0  6/2/94
       4  #
       5  # Written by James Roskind
       6  # Based on prior profile module by Sjoerd Mullender...
       7  #   which was hacked somewhat by: Guido van Rossum
       8  
       9  """Class for profiling Python code."""
      10  
      11  # Copyright Disney Enterprises, Inc.  All Rights Reserved.
      12  # Licensed to PSF under a Contributor Agreement
      13  #
      14  # Licensed under the Apache License, Version 2.0 (the "License");
      15  # you may not use this file except in compliance with the License.
      16  # You may obtain a copy of the License at
      17  #
      18  # http://www.apache.org/licenses/LICENSE-2.0
      19  #
      20  # Unless required by applicable law or agreed to in writing, software
      21  # distributed under the License is distributed on an "AS IS" BASIS,
      22  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
      23  # either express or implied.  See the License for the specific language
      24  # governing permissions and limitations under the License.
      25  
      26  
      27  import io
      28  import sys
      29  import time
      30  import marshal
      31  
      32  __all__ = ["run", "runctx", "Profile"]
      33  
      34  # Sample timer for use with
      35  #i_count = 0
      36  #def integer_timer():
      37  #       global i_count
      38  #       i_count = i_count + 1
      39  #       return i_count
      40  #itimes = integer_timer # replace with C coded timer returning integers
      41  
      42  class ESC[4;38;5;81m_Utils:
      43      """Support class for utility functions which are shared by
      44      profile.py and cProfile.py modules.
      45      Not supposed to be used directly.
      46      """
      47  
      48      def __init__(self, profiler):
      49          self.profiler = profiler
      50  
      51      def run(self, statement, filename, sort):
      52          prof = self.profiler()
      53          try:
      54              prof.run(statement)
      55          except SystemExit:
      56              pass
      57          finally:
      58              self._show(prof, filename, sort)
      59  
      60      def runctx(self, statement, globals, locals, filename, sort):
      61          prof = self.profiler()
      62          try:
      63              prof.runctx(statement, globals, locals)
      64          except SystemExit:
      65              pass
      66          finally:
      67              self._show(prof, filename, sort)
      68  
      69      def _show(self, prof, filename, sort):
      70          if filename is not None:
      71              prof.dump_stats(filename)
      72          else:
      73              prof.print_stats(sort)
      74  
      75  
      76  #**************************************************************************
      77  # The following are the static member functions for the profiler class
      78  # Note that an instance of Profile() is *not* needed to call them.
      79  #**************************************************************************
      80  
      81  def run(statement, filename=None, sort=-1):
      82      """Run statement under profiler optionally saving results in filename
      83  
      84      This function takes a single argument that can be passed to the
      85      "exec" statement, and an optional file name.  In all cases this
      86      routine attempts to "exec" its first argument and gather profiling
      87      statistics from the execution. If no file name is present, then this
      88      function automatically prints a simple profiling report, sorted by the
      89      standard name string (file/line/function-name) that is presented in
      90      each line.
      91      """
      92      return _Utils(Profile).run(statement, filename, sort)
      93  
      94  def runctx(statement, globals, locals, filename=None, sort=-1):
      95      """Run statement under profiler, supplying your own globals and locals,
      96      optionally saving results in filename.
      97  
      98      statement and filename have the same semantics as profile.run
      99      """
     100      return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
     101  
     102  
     103  class ESC[4;38;5;81mProfile:
     104      """Profiler class.
     105  
     106      self.cur is always a tuple.  Each such tuple corresponds to a stack
     107      frame that is currently active (self.cur[-2]).  The following are the
     108      definitions of its members.  We use this external "parallel stack" to
     109      avoid contaminating the program that we are profiling. (old profiler
     110      used to write into the frames local dictionary!!) Derived classes
     111      can change the definition of some entries, as long as they leave
     112      [-2:] intact (frame and previous tuple).  In case an internal error is
     113      detected, the -3 element is used as the function name.
     114  
     115      [ 0] = Time that needs to be charged to the parent frame's function.
     116             It is used so that a function call will not have to access the
     117             timing data for the parent frame.
     118      [ 1] = Total time spent in this frame's function, excluding time in
     119             subfunctions (this latter is tallied in cur[2]).
     120      [ 2] = Total time spent in subfunctions, excluding time executing the
     121             frame's function (this latter is tallied in cur[1]).
     122      [-3] = Name of the function that corresponds to this frame.
     123      [-2] = Actual frame that we correspond to (used to sync exception handling).
     124      [-1] = Our parent 6-tuple (corresponds to frame.f_back).
     125  
     126      Timing data for each function is stored as a 5-tuple in the dictionary
     127      self.timings[].  The index is always the name stored in self.cur[-3].
     128      The following are the definitions of the members:
     129  
     130      [0] = The number of times this function was called, not counting direct
     131            or indirect recursion,
     132      [1] = Number of times this function appears on the stack, minus one
     133      [2] = Total time spent internal to this function
     134      [3] = Cumulative time that this function was present on the stack.  In
     135            non-recursive functions, this is the total execution time from start
     136            to finish of each invocation of a function, including time spent in
     137            all subfunctions.
     138      [4] = A dictionary indicating for each function name, the number of times
     139            it was called by us.
     140      """
     141  
     142      bias = 0  # calibration constant
     143  
     144      def __init__(self, timer=None, bias=None):
     145          self.timings = {}
     146          self.cur = None
     147          self.cmd = ""
     148          self.c_func_name = ""
     149  
     150          if bias is None:
     151              bias = self.bias
     152          self.bias = bias     # Materialize in local dict for lookup speed.
     153  
     154          if not timer:
     155              self.timer = self.get_time = time.process_time
     156              self.dispatcher = self.trace_dispatch_i
     157          else:
     158              self.timer = timer
     159              t = self.timer() # test out timer function
     160              try:
     161                  length = len(t)
     162              except TypeError:
     163                  self.get_time = timer
     164                  self.dispatcher = self.trace_dispatch_i
     165              else:
     166                  if length == 2:
     167                      self.dispatcher = self.trace_dispatch
     168                  else:
     169                      self.dispatcher = self.trace_dispatch_l
     170                  # This get_time() implementation needs to be defined
     171                  # here to capture the passed-in timer in the parameter
     172                  # list (for performance).  Note that we can't assume
     173                  # the timer() result contains two values in all
     174                  # cases.
     175                  def get_time_timer(timer=timer, sum=sum):
     176                      return sum(timer())
     177                  self.get_time = get_time_timer
     178          self.t = self.get_time()
     179          self.simulate_call('profiler')
     180  
     181      # Heavily optimized dispatch routine for time.process_time() timer
     182  
     183      def trace_dispatch(self, frame, event, arg):
     184          timer = self.timer
     185          t = timer()
     186          t = t[0] + t[1] - self.t - self.bias
     187  
     188          if event == "c_call":
     189              self.c_func_name = arg.__name__
     190  
     191          if self.dispatch[event](self, frame,t):
     192              t = timer()
     193              self.t = t[0] + t[1]
     194          else:
     195              r = timer()
     196              self.t = r[0] + r[1] - t # put back unrecorded delta
     197  
     198      # Dispatch routine for best timer program (return = scalar, fastest if
     199      # an integer but float works too -- and time.process_time() relies on that).
     200  
     201      def trace_dispatch_i(self, frame, event, arg):
     202          timer = self.timer
     203          t = timer() - self.t - self.bias
     204  
     205          if event == "c_call":
     206              self.c_func_name = arg.__name__
     207  
     208          if self.dispatch[event](self, frame, t):
     209              self.t = timer()
     210          else:
     211              self.t = timer() - t  # put back unrecorded delta
     212  
     213      # Dispatch routine for macintosh (timer returns time in ticks of
     214      # 1/60th second)
     215  
     216      def trace_dispatch_mac(self, frame, event, arg):
     217          timer = self.timer
     218          t = timer()/60.0 - self.t - self.bias
     219  
     220          if event == "c_call":
     221              self.c_func_name = arg.__name__
     222  
     223          if self.dispatch[event](self, frame, t):
     224              self.t = timer()/60.0
     225          else:
     226              self.t = timer()/60.0 - t  # put back unrecorded delta
     227  
     228      # SLOW generic dispatch routine for timer returning lists of numbers
     229  
     230      def trace_dispatch_l(self, frame, event, arg):
     231          get_time = self.get_time
     232          t = get_time() - self.t - self.bias
     233  
     234          if event == "c_call":
     235              self.c_func_name = arg.__name__
     236  
     237          if self.dispatch[event](self, frame, t):
     238              self.t = get_time()
     239          else:
     240              self.t = get_time() - t # put back unrecorded delta
     241  
     242      # In the event handlers, the first 3 elements of self.cur are unpacked
     243      # into vrbls w/ 3-letter names.  The last two characters are meant to be
     244      # mnemonic:
     245      #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
     246      #     _it  self.cur[1] "internal time" time spent directly in the function
     247      #     _et  self.cur[2] "external time" time spent in subfunctions
     248  
     249      def trace_dispatch_exception(self, frame, t):
     250          rpt, rit, ret, rfn, rframe, rcur = self.cur
     251          if (rframe is not frame) and rcur:
     252              return self.trace_dispatch_return(rframe, t)
     253          self.cur = rpt, rit+t, ret, rfn, rframe, rcur
     254          return 1
     255  
     256  
     257      def trace_dispatch_call(self, frame, t):
     258          if self.cur and frame.f_back is not self.cur[-2]:
     259              rpt, rit, ret, rfn, rframe, rcur = self.cur
     260              if not isinstance(rframe, Profile.fake_frame):
     261                  assert rframe.f_back is frame.f_back, ("Bad call", rfn,
     262                                                         rframe, rframe.f_back,
     263                                                         frame, frame.f_back)
     264                  self.trace_dispatch_return(rframe, 0)
     265                  assert (self.cur is None or \
     266                          frame.f_back is self.cur[-2]), ("Bad call",
     267                                                          self.cur[-3])
     268          fcode = frame.f_code
     269          fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
     270          self.cur = (t, 0, 0, fn, frame, self.cur)
     271          timings = self.timings
     272          if fn in timings:
     273              cc, ns, tt, ct, callers = timings[fn]
     274              timings[fn] = cc, ns + 1, tt, ct, callers
     275          else:
     276              timings[fn] = 0, 0, 0, 0, {}
     277          return 1
     278  
     279      def trace_dispatch_c_call (self, frame, t):
     280          fn = ("", 0, self.c_func_name)
     281          self.cur = (t, 0, 0, fn, frame, self.cur)
     282          timings = self.timings
     283          if fn in timings:
     284              cc, ns, tt, ct, callers = timings[fn]
     285              timings[fn] = cc, ns+1, tt, ct, callers
     286          else:
     287              timings[fn] = 0, 0, 0, 0, {}
     288          return 1
     289  
     290      def trace_dispatch_return(self, frame, t):
     291          if frame is not self.cur[-2]:
     292              assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
     293              self.trace_dispatch_return(self.cur[-2], 0)
     294  
     295          # Prefix "r" means part of the Returning or exiting frame.
     296          # Prefix "p" means part of the Previous or Parent or older frame.
     297  
     298          rpt, rit, ret, rfn, frame, rcur = self.cur
     299          rit = rit + t
     300          frame_total = rit + ret
     301  
     302          ppt, pit, pet, pfn, pframe, pcur = rcur
     303          self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
     304  
     305          timings = self.timings
     306          cc, ns, tt, ct, callers = timings[rfn]
     307          if not ns:
     308              # This is the only occurrence of the function on the stack.
     309              # Else this is a (directly or indirectly) recursive call, and
     310              # its cumulative time will get updated when the topmost call to
     311              # it returns.
     312              ct = ct + frame_total
     313              cc = cc + 1
     314  
     315          if pfn in callers:
     316              callers[pfn] = callers[pfn] + 1  # hack: gather more
     317              # stats such as the amount of time added to ct courtesy
     318              # of this specific call, and the contribution to cc
     319              # courtesy of this call.
     320          else:
     321              callers[pfn] = 1
     322  
     323          timings[rfn] = cc, ns - 1, tt + rit, ct, callers
     324  
     325          return 1
     326  
     327  
     328      dispatch = {
     329          "call": trace_dispatch_call,
     330          "exception": trace_dispatch_exception,
     331          "return": trace_dispatch_return,
     332          "c_call": trace_dispatch_c_call,
     333          "c_exception": trace_dispatch_return,  # the C function returned
     334          "c_return": trace_dispatch_return,
     335          }
     336  
     337  
     338      # The next few functions play with self.cmd. By carefully preloading
     339      # our parallel stack, we can force the profiled result to include
     340      # an arbitrary string as the name of the calling function.
     341      # We use self.cmd as that string, and the resulting stats look
     342      # very nice :-).
     343  
     344      def set_cmd(self, cmd):
     345          if self.cur[-1]: return   # already set
     346          self.cmd = cmd
     347          self.simulate_call(cmd)
     348  
     349      class ESC[4;38;5;81mfake_code:
     350          def __init__(self, filename, line, name):
     351              self.co_filename = filename
     352              self.co_line = line
     353              self.co_name = name
     354              self.co_firstlineno = 0
     355  
     356          def __repr__(self):
     357              return repr((self.co_filename, self.co_line, self.co_name))
     358  
     359      class ESC[4;38;5;81mfake_frame:
     360          def __init__(self, code, prior):
     361              self.f_code = code
     362              self.f_back = prior
     363  
     364      def simulate_call(self, name):
     365          code = self.fake_code('profile', 0, name)
     366          if self.cur:
     367              pframe = self.cur[-2]
     368          else:
     369              pframe = None
     370          frame = self.fake_frame(code, pframe)
     371          self.dispatch['call'](self, frame, 0)
     372  
     373      # collect stats from pending stack, including getting final
     374      # timings for self.cmd frame.
     375  
     376      def simulate_cmd_complete(self):
     377          get_time = self.get_time
     378          t = get_time() - self.t
     379          while self.cur[-1]:
     380              # We *can* cause assertion errors here if
     381              # dispatch_trace_return checks for a frame match!
     382              self.dispatch['return'](self, self.cur[-2], t)
     383              t = 0
     384          self.t = get_time() - t
     385  
     386  
     387      def print_stats(self, sort=-1):
     388          import pstats
     389          pstats.Stats(self).strip_dirs().sort_stats(sort). \
     390                    print_stats()
     391  
     392      def dump_stats(self, file):
     393          with open(file, 'wb') as f:
     394              self.create_stats()
     395              marshal.dump(self.stats, f)
     396  
     397      def create_stats(self):
     398          self.simulate_cmd_complete()
     399          self.snapshot_stats()
     400  
     401      def snapshot_stats(self):
     402          self.stats = {}
     403          for func, (cc, ns, tt, ct, callers) in self.timings.items():
     404              callers = callers.copy()
     405              nc = 0
     406              for callcnt in callers.values():
     407                  nc += callcnt
     408              self.stats[func] = cc, nc, tt, ct, callers
     409  
     410  
     411      # The following two methods can be called by clients to use
     412      # a profiler to profile a statement, given as a string.
     413  
     414      def run(self, cmd):
     415          import __main__
     416          dict = __main__.__dict__
     417          return self.runctx(cmd, dict, dict)
     418  
     419      def runctx(self, cmd, globals, locals):
     420          self.set_cmd(cmd)
     421          sys.setprofile(self.dispatcher)
     422          try:
     423              exec(cmd, globals, locals)
     424          finally:
     425              sys.setprofile(None)
     426          return self
     427  
     428      # This method is more useful to profile a single function call.
     429      def runcall(self, func, /, *args, **kw):
     430          self.set_cmd(repr(func))
     431          sys.setprofile(self.dispatcher)
     432          try:
     433              return func(*args, **kw)
     434          finally:
     435              sys.setprofile(None)
     436  
     437  
     438      #******************************************************************
     439      # The following calculates the overhead for using a profiler.  The
     440      # problem is that it takes a fair amount of time for the profiler
     441      # to stop the stopwatch (from the time it receives an event).
     442      # Similarly, there is a delay from the time that the profiler
     443      # re-starts the stopwatch before the user's code really gets to
     444      # continue.  The following code tries to measure the difference on
     445      # a per-event basis.
     446      #
     447      # Note that this difference is only significant if there are a lot of
     448      # events, and relatively little user code per event.  For example,
     449      # code with small functions will typically benefit from having the
     450      # profiler calibrated for the current platform.  This *could* be
     451      # done on the fly during init() time, but it is not worth the
     452      # effort.  Also note that if too large a value specified, then
     453      # execution time on some functions will actually appear as a
     454      # negative number.  It is *normal* for some functions (with very
     455      # low call counts) to have such negative stats, even if the
     456      # calibration figure is "correct."
     457      #
     458      # One alternative to profile-time calibration adjustments (i.e.,
     459      # adding in the magic little delta during each event) is to track
     460      # more carefully the number of events (and cumulatively, the number
     461      # of events during sub functions) that are seen.  If this were
     462      # done, then the arithmetic could be done after the fact (i.e., at
     463      # display time).  Currently, we track only call/return events.
     464      # These values can be deduced by examining the callees and callers
     465      # vectors for each functions.  Hence we *can* almost correct the
     466      # internal time figure at print time (note that we currently don't
     467      # track exception event processing counts).  Unfortunately, there
     468      # is currently no similar information for cumulative sub-function
     469      # time.  It would not be hard to "get all this info" at profiler
     470      # time.  Specifically, we would have to extend the tuples to keep
     471      # counts of this in each frame, and then extend the defs of timing
     472      # tuples to include the significant two figures. I'm a bit fearful
     473      # that this additional feature will slow the heavily optimized
     474      # event/time ratio (i.e., the profiler would run slower, fur a very
     475      # low "value added" feature.)
     476      #**************************************************************
     477  
     478      def calibrate(self, m, verbose=0):
     479          if self.__class__ is not Profile:
     480              raise TypeError("Subclasses must override .calibrate().")
     481  
     482          saved_bias = self.bias
     483          self.bias = 0
     484          try:
     485              return self._calibrate_inner(m, verbose)
     486          finally:
     487              self.bias = saved_bias
     488  
     489      def _calibrate_inner(self, m, verbose):
     490          get_time = self.get_time
     491  
     492          # Set up a test case to be run with and without profiling.  Include
     493          # lots of calls, because we're trying to quantify stopwatch overhead.
     494          # Do not raise any exceptions, though, because we want to know
     495          # exactly how many profile events are generated (one call event, +
     496          # one return event, per Python-level call).
     497  
     498          def f1(n):
     499              for i in range(n):
     500                  x = 1
     501  
     502          def f(m, f1=f1):
     503              for i in range(m):
     504                  f1(100)
     505  
     506          f(m)    # warm up the cache
     507  
     508          # elapsed_noprofile <- time f(m) takes without profiling.
     509          t0 = get_time()
     510          f(m)
     511          t1 = get_time()
     512          elapsed_noprofile = t1 - t0
     513          if verbose:
     514              print("elapsed time without profiling =", elapsed_noprofile)
     515  
     516          # elapsed_profile <- time f(m) takes with profiling.  The difference
     517          # is profiling overhead, only some of which the profiler subtracts
     518          # out on its own.
     519          p = Profile()
     520          t0 = get_time()
     521          p.runctx('f(m)', globals(), locals())
     522          t1 = get_time()
     523          elapsed_profile = t1 - t0
     524          if verbose:
     525              print("elapsed time with profiling =", elapsed_profile)
     526  
     527          # reported_time <- "CPU seconds" the profiler charged to f and f1.
     528          total_calls = 0.0
     529          reported_time = 0.0
     530          for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
     531                  p.timings.items():
     532              if funcname in ("f", "f1"):
     533                  total_calls += cc
     534                  reported_time += tt
     535  
     536          if verbose:
     537              print("'CPU seconds' profiler reported =", reported_time)
     538              print("total # calls =", total_calls)
     539          if total_calls != m + 1:
     540              raise ValueError("internal error: total calls = %d" % total_calls)
     541  
     542          # reported_time - elapsed_noprofile = overhead the profiler wasn't
     543          # able to measure.  Divide by twice the number of calls (since there
     544          # are two profiler events per call in this test) to get the hidden
     545          # overhead per event.
     546          mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
     547          if verbose:
     548              print("mean stopwatch overhead per profile event =", mean)
     549          return mean
     550  
     551  #****************************************************************************
     552  
     553  def main():
     554      import os
     555      from optparse import OptionParser
     556  
     557      usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
     558      parser = OptionParser(usage=usage)
     559      parser.allow_interspersed_args = False
     560      parser.add_option('-o', '--outfile', dest="outfile",
     561          help="Save stats to <outfile>", default=None)
     562      parser.add_option('-m', dest="module", action="store_true",
     563          help="Profile a library module.", default=False)
     564      parser.add_option('-s', '--sort', dest="sort",
     565          help="Sort order when printing to stdout, based on pstats.Stats class",
     566          default=-1)
     567  
     568      if not sys.argv[1:]:
     569          parser.print_usage()
     570          sys.exit(2)
     571  
     572      (options, args) = parser.parse_args()
     573      sys.argv[:] = args
     574  
     575      # The script that we're profiling may chdir, so capture the absolute path
     576      # to the output file at startup.
     577      if options.outfile is not None:
     578          options.outfile = os.path.abspath(options.outfile)
     579  
     580      if len(args) > 0:
     581          if options.module:
     582              import runpy
     583              code = "run_module(modname, run_name='__main__')"
     584              globs = {
     585                  'run_module': runpy.run_module,
     586                  'modname': args[0]
     587              }
     588          else:
     589              progname = args[0]
     590              sys.path.insert(0, os.path.dirname(progname))
     591              with io.open_code(progname) as fp:
     592                  code = compile(fp.read(), progname, 'exec')
     593              globs = {
     594                  '__file__': progname,
     595                  '__name__': '__main__',
     596                  '__package__': None,
     597                  '__cached__': None,
     598              }
     599          try:
     600              runctx(code, globs, None, options.outfile, options.sort)
     601          except BrokenPipeError as exc:
     602              # Prevent "Exception ignored" during interpreter shutdown.
     603              sys.stdout = None
     604              sys.exit(exc.errno)
     605      else:
     606          parser.print_usage()
     607      return parser
     608  
     609  # When invoked as main program, invoke the profiler on a script
     610  if __name__ == '__main__':
     611      main()