1 #! /usr/bin/env python3
2 #
3 # Class for profiling python code. rev 1.0 6/2/94
4 #
5 # Written by James Roskind
6 # Based on prior profile module by Sjoerd Mullender...
7 # which was hacked somewhat by: Guido van Rossum
8
9 """Class for profiling Python code."""
10
11 # Copyright Disney Enterprises, Inc. All Rights Reserved.
12 # Licensed to PSF under a Contributor Agreement
13 #
14 # Licensed under the Apache License, Version 2.0 (the "License");
15 # you may not use this file except in compliance with the License.
16 # You may obtain a copy of the License at
17 #
18 # http://www.apache.org/licenses/LICENSE-2.0
19 #
20 # Unless required by applicable law or agreed to in writing, software
21 # distributed under the License is distributed on an "AS IS" BASIS,
22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23 # either express or implied. See the License for the specific language
24 # governing permissions and limitations under the License.
25
26
27 import importlib.machinery
28 import io
29 import sys
30 import time
31 import marshal
32
33 __all__ = ["run", "runctx", "Profile"]
34
35 # Sample timer for use with
36 #i_count = 0
37 #def integer_timer():
38 # global i_count
39 # i_count = i_count + 1
40 # return i_count
41 #itimes = integer_timer # replace with C coded timer returning integers
42
43 class ESC[4;38;5;81m_Utils:
44 """Support class for utility functions which are shared by
45 profile.py and cProfile.py modules.
46 Not supposed to be used directly.
47 """
48
49 def __init__(self, profiler):
50 self.profiler = profiler
51
52 def run(self, statement, filename, sort):
53 prof = self.profiler()
54 try:
55 prof.run(statement)
56 except SystemExit:
57 pass
58 finally:
59 self._show(prof, filename, sort)
60
61 def runctx(self, statement, globals, locals, filename, sort):
62 prof = self.profiler()
63 try:
64 prof.runctx(statement, globals, locals)
65 except SystemExit:
66 pass
67 finally:
68 self._show(prof, filename, sort)
69
70 def _show(self, prof, filename, sort):
71 if filename is not None:
72 prof.dump_stats(filename)
73 else:
74 prof.print_stats(sort)
75
76
77 #**************************************************************************
78 # The following are the static member functions for the profiler class
79 # Note that an instance of Profile() is *not* needed to call them.
80 #**************************************************************************
81
82 def run(statement, filename=None, sort=-1):
83 """Run statement under profiler optionally saving results in filename
84
85 This function takes a single argument that can be passed to the
86 "exec" statement, and an optional file name. In all cases this
87 routine attempts to "exec" its first argument and gather profiling
88 statistics from the execution. If no file name is present, then this
89 function automatically prints a simple profiling report, sorted by the
90 standard name string (file/line/function-name) that is presented in
91 each line.
92 """
93 return _Utils(Profile).run(statement, filename, sort)
94
95 def runctx(statement, globals, locals, filename=None, sort=-1):
96 """Run statement under profiler, supplying your own globals and locals,
97 optionally saving results in filename.
98
99 statement and filename have the same semantics as profile.run
100 """
101 return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
102
103
104 class ESC[4;38;5;81mProfile:
105 """Profiler class.
106
107 self.cur is always a tuple. Each such tuple corresponds to a stack
108 frame that is currently active (self.cur[-2]). The following are the
109 definitions of its members. We use this external "parallel stack" to
110 avoid contaminating the program that we are profiling. (old profiler
111 used to write into the frames local dictionary!!) Derived classes
112 can change the definition of some entries, as long as they leave
113 [-2:] intact (frame and previous tuple). In case an internal error is
114 detected, the -3 element is used as the function name.
115
116 [ 0] = Time that needs to be charged to the parent frame's function.
117 It is used so that a function call will not have to access the
118 timing data for the parent frame.
119 [ 1] = Total time spent in this frame's function, excluding time in
120 subfunctions (this latter is tallied in cur[2]).
121 [ 2] = Total time spent in subfunctions, excluding time executing the
122 frame's function (this latter is tallied in cur[1]).
123 [-3] = Name of the function that corresponds to this frame.
124 [-2] = Actual frame that we correspond to (used to sync exception handling).
125 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
126
127 Timing data for each function is stored as a 5-tuple in the dictionary
128 self.timings[]. The index is always the name stored in self.cur[-3].
129 The following are the definitions of the members:
130
131 [0] = The number of times this function was called, not counting direct
132 or indirect recursion,
133 [1] = Number of times this function appears on the stack, minus one
134 [2] = Total time spent internal to this function
135 [3] = Cumulative time that this function was present on the stack. In
136 non-recursive functions, this is the total execution time from start
137 to finish of each invocation of a function, including time spent in
138 all subfunctions.
139 [4] = A dictionary indicating for each function name, the number of times
140 it was called by us.
141 """
142
143 bias = 0 # calibration constant
144
145 def __init__(self, timer=None, bias=None):
146 self.timings = {}
147 self.cur = None
148 self.cmd = ""
149 self.c_func_name = ""
150
151 if bias is None:
152 bias = self.bias
153 self.bias = bias # Materialize in local dict for lookup speed.
154
155 if not timer:
156 self.timer = self.get_time = time.process_time
157 self.dispatcher = self.trace_dispatch_i
158 else:
159 self.timer = timer
160 t = self.timer() # test out timer function
161 try:
162 length = len(t)
163 except TypeError:
164 self.get_time = timer
165 self.dispatcher = self.trace_dispatch_i
166 else:
167 if length == 2:
168 self.dispatcher = self.trace_dispatch
169 else:
170 self.dispatcher = self.trace_dispatch_l
171 # This get_time() implementation needs to be defined
172 # here to capture the passed-in timer in the parameter
173 # list (for performance). Note that we can't assume
174 # the timer() result contains two values in all
175 # cases.
176 def get_time_timer(timer=timer, sum=sum):
177 return sum(timer())
178 self.get_time = get_time_timer
179 self.t = self.get_time()
180 self.simulate_call('profiler')
181
182 # Heavily optimized dispatch routine for time.process_time() timer
183
184 def trace_dispatch(self, frame, event, arg):
185 timer = self.timer
186 t = timer()
187 t = t[0] + t[1] - self.t - self.bias
188
189 if event == "c_call":
190 self.c_func_name = arg.__name__
191
192 if self.dispatch[event](self, frame,t):
193 t = timer()
194 self.t = t[0] + t[1]
195 else:
196 r = timer()
197 self.t = r[0] + r[1] - t # put back unrecorded delta
198
199 # Dispatch routine for best timer program (return = scalar, fastest if
200 # an integer but float works too -- and time.process_time() relies on that).
201
202 def trace_dispatch_i(self, frame, event, arg):
203 timer = self.timer
204 t = timer() - self.t - self.bias
205
206 if event == "c_call":
207 self.c_func_name = arg.__name__
208
209 if self.dispatch[event](self, frame, t):
210 self.t = timer()
211 else:
212 self.t = timer() - t # put back unrecorded delta
213
214 # Dispatch routine for macintosh (timer returns time in ticks of
215 # 1/60th second)
216
217 def trace_dispatch_mac(self, frame, event, arg):
218 timer = self.timer
219 t = timer()/60.0 - self.t - self.bias
220
221 if event == "c_call":
222 self.c_func_name = arg.__name__
223
224 if self.dispatch[event](self, frame, t):
225 self.t = timer()/60.0
226 else:
227 self.t = timer()/60.0 - t # put back unrecorded delta
228
229 # SLOW generic dispatch routine for timer returning lists of numbers
230
231 def trace_dispatch_l(self, frame, event, arg):
232 get_time = self.get_time
233 t = get_time() - self.t - self.bias
234
235 if event == "c_call":
236 self.c_func_name = arg.__name__
237
238 if self.dispatch[event](self, frame, t):
239 self.t = get_time()
240 else:
241 self.t = get_time() - t # put back unrecorded delta
242
243 # In the event handlers, the first 3 elements of self.cur are unpacked
244 # into vrbls w/ 3-letter names. The last two characters are meant to be
245 # mnemonic:
246 # _pt self.cur[0] "parent time" time to be charged to parent frame
247 # _it self.cur[1] "internal time" time spent directly in the function
248 # _et self.cur[2] "external time" time spent in subfunctions
249
250 def trace_dispatch_exception(self, frame, t):
251 rpt, rit, ret, rfn, rframe, rcur = self.cur
252 if (rframe is not frame) and rcur:
253 return self.trace_dispatch_return(rframe, t)
254 self.cur = rpt, rit+t, ret, rfn, rframe, rcur
255 return 1
256
257
258 def trace_dispatch_call(self, frame, t):
259 if self.cur and frame.f_back is not self.cur[-2]:
260 rpt, rit, ret, rfn, rframe, rcur = self.cur
261 if not isinstance(rframe, Profile.fake_frame):
262 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
263 rframe, rframe.f_back,
264 frame, frame.f_back)
265 self.trace_dispatch_return(rframe, 0)
266 assert (self.cur is None or \
267 frame.f_back is self.cur[-2]), ("Bad call",
268 self.cur[-3])
269 fcode = frame.f_code
270 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
271 self.cur = (t, 0, 0, fn, frame, self.cur)
272 timings = self.timings
273 if fn in timings:
274 cc, ns, tt, ct, callers = timings[fn]
275 timings[fn] = cc, ns + 1, tt, ct, callers
276 else:
277 timings[fn] = 0, 0, 0, 0, {}
278 return 1
279
280 def trace_dispatch_c_call (self, frame, t):
281 fn = ("", 0, self.c_func_name)
282 self.cur = (t, 0, 0, fn, frame, self.cur)
283 timings = self.timings
284 if fn in timings:
285 cc, ns, tt, ct, callers = timings[fn]
286 timings[fn] = cc, ns+1, tt, ct, callers
287 else:
288 timings[fn] = 0, 0, 0, 0, {}
289 return 1
290
291 def trace_dispatch_return(self, frame, t):
292 if frame is not self.cur[-2]:
293 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
294 self.trace_dispatch_return(self.cur[-2], 0)
295
296 # Prefix "r" means part of the Returning or exiting frame.
297 # Prefix "p" means part of the Previous or Parent or older frame.
298
299 rpt, rit, ret, rfn, frame, rcur = self.cur
300 rit = rit + t
301 frame_total = rit + ret
302
303 ppt, pit, pet, pfn, pframe, pcur = rcur
304 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
305
306 timings = self.timings
307 cc, ns, tt, ct, callers = timings[rfn]
308 if not ns:
309 # This is the only occurrence of the function on the stack.
310 # Else this is a (directly or indirectly) recursive call, and
311 # its cumulative time will get updated when the topmost call to
312 # it returns.
313 ct = ct + frame_total
314 cc = cc + 1
315
316 if pfn in callers:
317 callers[pfn] = callers[pfn] + 1 # hack: gather more
318 # stats such as the amount of time added to ct courtesy
319 # of this specific call, and the contribution to cc
320 # courtesy of this call.
321 else:
322 callers[pfn] = 1
323
324 timings[rfn] = cc, ns - 1, tt + rit, ct, callers
325
326 return 1
327
328
329 dispatch = {
330 "call": trace_dispatch_call,
331 "exception": trace_dispatch_exception,
332 "return": trace_dispatch_return,
333 "c_call": trace_dispatch_c_call,
334 "c_exception": trace_dispatch_return, # the C function returned
335 "c_return": trace_dispatch_return,
336 }
337
338
339 # The next few functions play with self.cmd. By carefully preloading
340 # our parallel stack, we can force the profiled result to include
341 # an arbitrary string as the name of the calling function.
342 # We use self.cmd as that string, and the resulting stats look
343 # very nice :-).
344
345 def set_cmd(self, cmd):
346 if self.cur[-1]: return # already set
347 self.cmd = cmd
348 self.simulate_call(cmd)
349
350 class ESC[4;38;5;81mfake_code:
351 def __init__(self, filename, line, name):
352 self.co_filename = filename
353 self.co_line = line
354 self.co_name = name
355 self.co_firstlineno = 0
356
357 def __repr__(self):
358 return repr((self.co_filename, self.co_line, self.co_name))
359
360 class ESC[4;38;5;81mfake_frame:
361 def __init__(self, code, prior):
362 self.f_code = code
363 self.f_back = prior
364
365 def simulate_call(self, name):
366 code = self.fake_code('profile', 0, name)
367 if self.cur:
368 pframe = self.cur[-2]
369 else:
370 pframe = None
371 frame = self.fake_frame(code, pframe)
372 self.dispatch['call'](self, frame, 0)
373
374 # collect stats from pending stack, including getting final
375 # timings for self.cmd frame.
376
377 def simulate_cmd_complete(self):
378 get_time = self.get_time
379 t = get_time() - self.t
380 while self.cur[-1]:
381 # We *can* cause assertion errors here if
382 # dispatch_trace_return checks for a frame match!
383 self.dispatch['return'](self, self.cur[-2], t)
384 t = 0
385 self.t = get_time() - t
386
387
388 def print_stats(self, sort=-1):
389 import pstats
390 pstats.Stats(self).strip_dirs().sort_stats(sort). \
391 print_stats()
392
393 def dump_stats(self, file):
394 with open(file, 'wb') as f:
395 self.create_stats()
396 marshal.dump(self.stats, f)
397
398 def create_stats(self):
399 self.simulate_cmd_complete()
400 self.snapshot_stats()
401
402 def snapshot_stats(self):
403 self.stats = {}
404 for func, (cc, ns, tt, ct, callers) in self.timings.items():
405 callers = callers.copy()
406 nc = 0
407 for callcnt in callers.values():
408 nc += callcnt
409 self.stats[func] = cc, nc, tt, ct, callers
410
411
412 # The following two methods can be called by clients to use
413 # a profiler to profile a statement, given as a string.
414
415 def run(self, cmd):
416 import __main__
417 dict = __main__.__dict__
418 return self.runctx(cmd, dict, dict)
419
420 def runctx(self, cmd, globals, locals):
421 self.set_cmd(cmd)
422 sys.setprofile(self.dispatcher)
423 try:
424 exec(cmd, globals, locals)
425 finally:
426 sys.setprofile(None)
427 return self
428
429 # This method is more useful to profile a single function call.
430 def runcall(self, func, /, *args, **kw):
431 self.set_cmd(repr(func))
432 sys.setprofile(self.dispatcher)
433 try:
434 return func(*args, **kw)
435 finally:
436 sys.setprofile(None)
437
438
439 #******************************************************************
440 # The following calculates the overhead for using a profiler. The
441 # problem is that it takes a fair amount of time for the profiler
442 # to stop the stopwatch (from the time it receives an event).
443 # Similarly, there is a delay from the time that the profiler
444 # re-starts the stopwatch before the user's code really gets to
445 # continue. The following code tries to measure the difference on
446 # a per-event basis.
447 #
448 # Note that this difference is only significant if there are a lot of
449 # events, and relatively little user code per event. For example,
450 # code with small functions will typically benefit from having the
451 # profiler calibrated for the current platform. This *could* be
452 # done on the fly during init() time, but it is not worth the
453 # effort. Also note that if too large a value specified, then
454 # execution time on some functions will actually appear as a
455 # negative number. It is *normal* for some functions (with very
456 # low call counts) to have such negative stats, even if the
457 # calibration figure is "correct."
458 #
459 # One alternative to profile-time calibration adjustments (i.e.,
460 # adding in the magic little delta during each event) is to track
461 # more carefully the number of events (and cumulatively, the number
462 # of events during sub functions) that are seen. If this were
463 # done, then the arithmetic could be done after the fact (i.e., at
464 # display time). Currently, we track only call/return events.
465 # These values can be deduced by examining the callees and callers
466 # vectors for each functions. Hence we *can* almost correct the
467 # internal time figure at print time (note that we currently don't
468 # track exception event processing counts). Unfortunately, there
469 # is currently no similar information for cumulative sub-function
470 # time. It would not be hard to "get all this info" at profiler
471 # time. Specifically, we would have to extend the tuples to keep
472 # counts of this in each frame, and then extend the defs of timing
473 # tuples to include the significant two figures. I'm a bit fearful
474 # that this additional feature will slow the heavily optimized
475 # event/time ratio (i.e., the profiler would run slower, fur a very
476 # low "value added" feature.)
477 #**************************************************************
478
479 def calibrate(self, m, verbose=0):
480 if self.__class__ is not Profile:
481 raise TypeError("Subclasses must override .calibrate().")
482
483 saved_bias = self.bias
484 self.bias = 0
485 try:
486 return self._calibrate_inner(m, verbose)
487 finally:
488 self.bias = saved_bias
489
490 def _calibrate_inner(self, m, verbose):
491 get_time = self.get_time
492
493 # Set up a test case to be run with and without profiling. Include
494 # lots of calls, because we're trying to quantify stopwatch overhead.
495 # Do not raise any exceptions, though, because we want to know
496 # exactly how many profile events are generated (one call event, +
497 # one return event, per Python-level call).
498
499 def f1(n):
500 for i in range(n):
501 x = 1
502
503 def f(m, f1=f1):
504 for i in range(m):
505 f1(100)
506
507 f(m) # warm up the cache
508
509 # elapsed_noprofile <- time f(m) takes without profiling.
510 t0 = get_time()
511 f(m)
512 t1 = get_time()
513 elapsed_noprofile = t1 - t0
514 if verbose:
515 print("elapsed time without profiling =", elapsed_noprofile)
516
517 # elapsed_profile <- time f(m) takes with profiling. The difference
518 # is profiling overhead, only some of which the profiler subtracts
519 # out on its own.
520 p = Profile()
521 t0 = get_time()
522 p.runctx('f(m)', globals(), locals())
523 t1 = get_time()
524 elapsed_profile = t1 - t0
525 if verbose:
526 print("elapsed time with profiling =", elapsed_profile)
527
528 # reported_time <- "CPU seconds" the profiler charged to f and f1.
529 total_calls = 0.0
530 reported_time = 0.0
531 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
532 p.timings.items():
533 if funcname in ("f", "f1"):
534 total_calls += cc
535 reported_time += tt
536
537 if verbose:
538 print("'CPU seconds' profiler reported =", reported_time)
539 print("total # calls =", total_calls)
540 if total_calls != m + 1:
541 raise ValueError("internal error: total calls = %d" % total_calls)
542
543 # reported_time - elapsed_noprofile = overhead the profiler wasn't
544 # able to measure. Divide by twice the number of calls (since there
545 # are two profiler events per call in this test) to get the hidden
546 # overhead per event.
547 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
548 if verbose:
549 print("mean stopwatch overhead per profile event =", mean)
550 return mean
551
552 #****************************************************************************
553
554 def main():
555 import os
556 from optparse import OptionParser
557
558 usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
559 parser = OptionParser(usage=usage)
560 parser.allow_interspersed_args = False
561 parser.add_option('-o', '--outfile', dest="outfile",
562 help="Save stats to <outfile>", default=None)
563 parser.add_option('-m', dest="module", action="store_true",
564 help="Profile a library module.", default=False)
565 parser.add_option('-s', '--sort', dest="sort",
566 help="Sort order when printing to stdout, based on pstats.Stats class",
567 default=-1)
568
569 if not sys.argv[1:]:
570 parser.print_usage()
571 sys.exit(2)
572
573 (options, args) = parser.parse_args()
574 sys.argv[:] = args
575
576 # The script that we're profiling may chdir, so capture the absolute path
577 # to the output file at startup.
578 if options.outfile is not None:
579 options.outfile = os.path.abspath(options.outfile)
580
581 if len(args) > 0:
582 if options.module:
583 import runpy
584 code = "run_module(modname, run_name='__main__')"
585 globs = {
586 'run_module': runpy.run_module,
587 'modname': args[0]
588 }
589 else:
590 progname = args[0]
591 sys.path.insert(0, os.path.dirname(progname))
592 with io.open_code(progname) as fp:
593 code = compile(fp.read(), progname, 'exec')
594 spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
595 origin=progname)
596 globs = {
597 '__spec__': spec,
598 '__file__': spec.origin,
599 '__name__': spec.name,
600 '__package__': None,
601 '__cached__': None,
602 }
603 try:
604 runctx(code, globs, None, options.outfile, options.sort)
605 except BrokenPipeError as exc:
606 # Prevent "Exception ignored" during interpreter shutdown.
607 sys.stdout = None
608 sys.exit(exc.errno)
609 else:
610 parser.print_usage()
611 return parser
612
613 # When invoked as main program, invoke the profiler on a script
614 if __name__ == '__main__':
615 main()