1 #! /usr/bin/env python3
2 #
3 # Class for profiling python code. rev 1.0 6/2/94
4 #
5 # Written by James Roskind
6 # Based on prior profile module by Sjoerd Mullender...
7 # which was hacked somewhat by: Guido van Rossum
8
9 """Class for profiling Python code."""
10
11 # Copyright Disney Enterprises, Inc. All Rights Reserved.
12 # Licensed to PSF under a Contributor Agreement
13 #
14 # Licensed under the Apache License, Version 2.0 (the "License");
15 # you may not use this file except in compliance with the License.
16 # You may obtain a copy of the License at
17 #
18 # http://www.apache.org/licenses/LICENSE-2.0
19 #
20 # Unless required by applicable law or agreed to in writing, software
21 # distributed under the License is distributed on an "AS IS" BASIS,
22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23 # either express or implied. See the License for the specific language
24 # governing permissions and limitations under the License.
25
26
27 import io
28 import sys
29 import time
30 import marshal
31
32 __all__ = ["run", "runctx", "Profile"]
33
34 # Sample timer for use with
35 #i_count = 0
36 #def integer_timer():
37 # global i_count
38 # i_count = i_count + 1
39 # return i_count
40 #itimes = integer_timer # replace with C coded timer returning integers
41
42 class ESC[4;38;5;81m_Utils:
43 """Support class for utility functions which are shared by
44 profile.py and cProfile.py modules.
45 Not supposed to be used directly.
46 """
47
48 def __init__(self, profiler):
49 self.profiler = profiler
50
51 def run(self, statement, filename, sort):
52 prof = self.profiler()
53 try:
54 prof.run(statement)
55 except SystemExit:
56 pass
57 finally:
58 self._show(prof, filename, sort)
59
60 def runctx(self, statement, globals, locals, filename, sort):
61 prof = self.profiler()
62 try:
63 prof.runctx(statement, globals, locals)
64 except SystemExit:
65 pass
66 finally:
67 self._show(prof, filename, sort)
68
69 def _show(self, prof, filename, sort):
70 if filename is not None:
71 prof.dump_stats(filename)
72 else:
73 prof.print_stats(sort)
74
75
76 #**************************************************************************
77 # The following are the static member functions for the profiler class
78 # Note that an instance of Profile() is *not* needed to call them.
79 #**************************************************************************
80
81 def run(statement, filename=None, sort=-1):
82 """Run statement under profiler optionally saving results in filename
83
84 This function takes a single argument that can be passed to the
85 "exec" statement, and an optional file name. In all cases this
86 routine attempts to "exec" its first argument and gather profiling
87 statistics from the execution. If no file name is present, then this
88 function automatically prints a simple profiling report, sorted by the
89 standard name string (file/line/function-name) that is presented in
90 each line.
91 """
92 return _Utils(Profile).run(statement, filename, sort)
93
94 def runctx(statement, globals, locals, filename=None, sort=-1):
95 """Run statement under profiler, supplying your own globals and locals,
96 optionally saving results in filename.
97
98 statement and filename have the same semantics as profile.run
99 """
100 return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
101
102
103 class ESC[4;38;5;81mProfile:
104 """Profiler class.
105
106 self.cur is always a tuple. Each such tuple corresponds to a stack
107 frame that is currently active (self.cur[-2]). The following are the
108 definitions of its members. We use this external "parallel stack" to
109 avoid contaminating the program that we are profiling. (old profiler
110 used to write into the frames local dictionary!!) Derived classes
111 can change the definition of some entries, as long as they leave
112 [-2:] intact (frame and previous tuple). In case an internal error is
113 detected, the -3 element is used as the function name.
114
115 [ 0] = Time that needs to be charged to the parent frame's function.
116 It is used so that a function call will not have to access the
117 timing data for the parent frame.
118 [ 1] = Total time spent in this frame's function, excluding time in
119 subfunctions (this latter is tallied in cur[2]).
120 [ 2] = Total time spent in subfunctions, excluding time executing the
121 frame's function (this latter is tallied in cur[1]).
122 [-3] = Name of the function that corresponds to this frame.
123 [-2] = Actual frame that we correspond to (used to sync exception handling).
124 [-1] = Our parent 6-tuple (corresponds to frame.f_back).
125
126 Timing data for each function is stored as a 5-tuple in the dictionary
127 self.timings[]. The index is always the name stored in self.cur[-3].
128 The following are the definitions of the members:
129
130 [0] = The number of times this function was called, not counting direct
131 or indirect recursion,
132 [1] = Number of times this function appears on the stack, minus one
133 [2] = Total time spent internal to this function
134 [3] = Cumulative time that this function was present on the stack. In
135 non-recursive functions, this is the total execution time from start
136 to finish of each invocation of a function, including time spent in
137 all subfunctions.
138 [4] = A dictionary indicating for each function name, the number of times
139 it was called by us.
140 """
141
142 bias = 0 # calibration constant
143
144 def __init__(self, timer=None, bias=None):
145 self.timings = {}
146 self.cur = None
147 self.cmd = ""
148 self.c_func_name = ""
149
150 if bias is None:
151 bias = self.bias
152 self.bias = bias # Materialize in local dict for lookup speed.
153
154 if not timer:
155 self.timer = self.get_time = time.process_time
156 self.dispatcher = self.trace_dispatch_i
157 else:
158 self.timer = timer
159 t = self.timer() # test out timer function
160 try:
161 length = len(t)
162 except TypeError:
163 self.get_time = timer
164 self.dispatcher = self.trace_dispatch_i
165 else:
166 if length == 2:
167 self.dispatcher = self.trace_dispatch
168 else:
169 self.dispatcher = self.trace_dispatch_l
170 # This get_time() implementation needs to be defined
171 # here to capture the passed-in timer in the parameter
172 # list (for performance). Note that we can't assume
173 # the timer() result contains two values in all
174 # cases.
175 def get_time_timer(timer=timer, sum=sum):
176 return sum(timer())
177 self.get_time = get_time_timer
178 self.t = self.get_time()
179 self.simulate_call('profiler')
180
181 # Heavily optimized dispatch routine for time.process_time() timer
182
183 def trace_dispatch(self, frame, event, arg):
184 timer = self.timer
185 t = timer()
186 t = t[0] + t[1] - self.t - self.bias
187
188 if event == "c_call":
189 self.c_func_name = arg.__name__
190
191 if self.dispatch[event](self, frame,t):
192 t = timer()
193 self.t = t[0] + t[1]
194 else:
195 r = timer()
196 self.t = r[0] + r[1] - t # put back unrecorded delta
197
198 # Dispatch routine for best timer program (return = scalar, fastest if
199 # an integer but float works too -- and time.process_time() relies on that).
200
201 def trace_dispatch_i(self, frame, event, arg):
202 timer = self.timer
203 t = timer() - self.t - self.bias
204
205 if event == "c_call":
206 self.c_func_name = arg.__name__
207
208 if self.dispatch[event](self, frame, t):
209 self.t = timer()
210 else:
211 self.t = timer() - t # put back unrecorded delta
212
213 # Dispatch routine for macintosh (timer returns time in ticks of
214 # 1/60th second)
215
216 def trace_dispatch_mac(self, frame, event, arg):
217 timer = self.timer
218 t = timer()/60.0 - self.t - self.bias
219
220 if event == "c_call":
221 self.c_func_name = arg.__name__
222
223 if self.dispatch[event](self, frame, t):
224 self.t = timer()/60.0
225 else:
226 self.t = timer()/60.0 - t # put back unrecorded delta
227
228 # SLOW generic dispatch routine for timer returning lists of numbers
229
230 def trace_dispatch_l(self, frame, event, arg):
231 get_time = self.get_time
232 t = get_time() - self.t - self.bias
233
234 if event == "c_call":
235 self.c_func_name = arg.__name__
236
237 if self.dispatch[event](self, frame, t):
238 self.t = get_time()
239 else:
240 self.t = get_time() - t # put back unrecorded delta
241
242 # In the event handlers, the first 3 elements of self.cur are unpacked
243 # into vrbls w/ 3-letter names. The last two characters are meant to be
244 # mnemonic:
245 # _pt self.cur[0] "parent time" time to be charged to parent frame
246 # _it self.cur[1] "internal time" time spent directly in the function
247 # _et self.cur[2] "external time" time spent in subfunctions
248
249 def trace_dispatch_exception(self, frame, t):
250 rpt, rit, ret, rfn, rframe, rcur = self.cur
251 if (rframe is not frame) and rcur:
252 return self.trace_dispatch_return(rframe, t)
253 self.cur = rpt, rit+t, ret, rfn, rframe, rcur
254 return 1
255
256
257 def trace_dispatch_call(self, frame, t):
258 if self.cur and frame.f_back is not self.cur[-2]:
259 rpt, rit, ret, rfn, rframe, rcur = self.cur
260 if not isinstance(rframe, Profile.fake_frame):
261 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
262 rframe, rframe.f_back,
263 frame, frame.f_back)
264 self.trace_dispatch_return(rframe, 0)
265 assert (self.cur is None or \
266 frame.f_back is self.cur[-2]), ("Bad call",
267 self.cur[-3])
268 fcode = frame.f_code
269 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
270 self.cur = (t, 0, 0, fn, frame, self.cur)
271 timings = self.timings
272 if fn in timings:
273 cc, ns, tt, ct, callers = timings[fn]
274 timings[fn] = cc, ns + 1, tt, ct, callers
275 else:
276 timings[fn] = 0, 0, 0, 0, {}
277 return 1
278
279 def trace_dispatch_c_call (self, frame, t):
280 fn = ("", 0, self.c_func_name)
281 self.cur = (t, 0, 0, fn, frame, self.cur)
282 timings = self.timings
283 if fn in timings:
284 cc, ns, tt, ct, callers = timings[fn]
285 timings[fn] = cc, ns+1, tt, ct, callers
286 else:
287 timings[fn] = 0, 0, 0, 0, {}
288 return 1
289
290 def trace_dispatch_return(self, frame, t):
291 if frame is not self.cur[-2]:
292 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
293 self.trace_dispatch_return(self.cur[-2], 0)
294
295 # Prefix "r" means part of the Returning or exiting frame.
296 # Prefix "p" means part of the Previous or Parent or older frame.
297
298 rpt, rit, ret, rfn, frame, rcur = self.cur
299 rit = rit + t
300 frame_total = rit + ret
301
302 ppt, pit, pet, pfn, pframe, pcur = rcur
303 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
304
305 timings = self.timings
306 cc, ns, tt, ct, callers = timings[rfn]
307 if not ns:
308 # This is the only occurrence of the function on the stack.
309 # Else this is a (directly or indirectly) recursive call, and
310 # its cumulative time will get updated when the topmost call to
311 # it returns.
312 ct = ct + frame_total
313 cc = cc + 1
314
315 if pfn in callers:
316 callers[pfn] = callers[pfn] + 1 # hack: gather more
317 # stats such as the amount of time added to ct courtesy
318 # of this specific call, and the contribution to cc
319 # courtesy of this call.
320 else:
321 callers[pfn] = 1
322
323 timings[rfn] = cc, ns - 1, tt + rit, ct, callers
324
325 return 1
326
327
328 dispatch = {
329 "call": trace_dispatch_call,
330 "exception": trace_dispatch_exception,
331 "return": trace_dispatch_return,
332 "c_call": trace_dispatch_c_call,
333 "c_exception": trace_dispatch_return, # the C function returned
334 "c_return": trace_dispatch_return,
335 }
336
337
338 # The next few functions play with self.cmd. By carefully preloading
339 # our parallel stack, we can force the profiled result to include
340 # an arbitrary string as the name of the calling function.
341 # We use self.cmd as that string, and the resulting stats look
342 # very nice :-).
343
344 def set_cmd(self, cmd):
345 if self.cur[-1]: return # already set
346 self.cmd = cmd
347 self.simulate_call(cmd)
348
349 class ESC[4;38;5;81mfake_code:
350 def __init__(self, filename, line, name):
351 self.co_filename = filename
352 self.co_line = line
353 self.co_name = name
354 self.co_firstlineno = 0
355
356 def __repr__(self):
357 return repr((self.co_filename, self.co_line, self.co_name))
358
359 class ESC[4;38;5;81mfake_frame:
360 def __init__(self, code, prior):
361 self.f_code = code
362 self.f_back = prior
363
364 def simulate_call(self, name):
365 code = self.fake_code('profile', 0, name)
366 if self.cur:
367 pframe = self.cur[-2]
368 else:
369 pframe = None
370 frame = self.fake_frame(code, pframe)
371 self.dispatch['call'](self, frame, 0)
372
373 # collect stats from pending stack, including getting final
374 # timings for self.cmd frame.
375
376 def simulate_cmd_complete(self):
377 get_time = self.get_time
378 t = get_time() - self.t
379 while self.cur[-1]:
380 # We *can* cause assertion errors here if
381 # dispatch_trace_return checks for a frame match!
382 self.dispatch['return'](self, self.cur[-2], t)
383 t = 0
384 self.t = get_time() - t
385
386
387 def print_stats(self, sort=-1):
388 import pstats
389 pstats.Stats(self).strip_dirs().sort_stats(sort). \
390 print_stats()
391
392 def dump_stats(self, file):
393 with open(file, 'wb') as f:
394 self.create_stats()
395 marshal.dump(self.stats, f)
396
397 def create_stats(self):
398 self.simulate_cmd_complete()
399 self.snapshot_stats()
400
401 def snapshot_stats(self):
402 self.stats = {}
403 for func, (cc, ns, tt, ct, callers) in self.timings.items():
404 callers = callers.copy()
405 nc = 0
406 for callcnt in callers.values():
407 nc += callcnt
408 self.stats[func] = cc, nc, tt, ct, callers
409
410
411 # The following two methods can be called by clients to use
412 # a profiler to profile a statement, given as a string.
413
414 def run(self, cmd):
415 import __main__
416 dict = __main__.__dict__
417 return self.runctx(cmd, dict, dict)
418
419 def runctx(self, cmd, globals, locals):
420 self.set_cmd(cmd)
421 sys.setprofile(self.dispatcher)
422 try:
423 exec(cmd, globals, locals)
424 finally:
425 sys.setprofile(None)
426 return self
427
428 # This method is more useful to profile a single function call.
429 def runcall(self, func, /, *args, **kw):
430 self.set_cmd(repr(func))
431 sys.setprofile(self.dispatcher)
432 try:
433 return func(*args, **kw)
434 finally:
435 sys.setprofile(None)
436
437
438 #******************************************************************
439 # The following calculates the overhead for using a profiler. The
440 # problem is that it takes a fair amount of time for the profiler
441 # to stop the stopwatch (from the time it receives an event).
442 # Similarly, there is a delay from the time that the profiler
443 # re-starts the stopwatch before the user's code really gets to
444 # continue. The following code tries to measure the difference on
445 # a per-event basis.
446 #
447 # Note that this difference is only significant if there are a lot of
448 # events, and relatively little user code per event. For example,
449 # code with small functions will typically benefit from having the
450 # profiler calibrated for the current platform. This *could* be
451 # done on the fly during init() time, but it is not worth the
452 # effort. Also note that if too large a value specified, then
453 # execution time on some functions will actually appear as a
454 # negative number. It is *normal* for some functions (with very
455 # low call counts) to have such negative stats, even if the
456 # calibration figure is "correct."
457 #
458 # One alternative to profile-time calibration adjustments (i.e.,
459 # adding in the magic little delta during each event) is to track
460 # more carefully the number of events (and cumulatively, the number
461 # of events during sub functions) that are seen. If this were
462 # done, then the arithmetic could be done after the fact (i.e., at
463 # display time). Currently, we track only call/return events.
464 # These values can be deduced by examining the callees and callers
465 # vectors for each functions. Hence we *can* almost correct the
466 # internal time figure at print time (note that we currently don't
467 # track exception event processing counts). Unfortunately, there
468 # is currently no similar information for cumulative sub-function
469 # time. It would not be hard to "get all this info" at profiler
470 # time. Specifically, we would have to extend the tuples to keep
471 # counts of this in each frame, and then extend the defs of timing
472 # tuples to include the significant two figures. I'm a bit fearful
473 # that this additional feature will slow the heavily optimized
474 # event/time ratio (i.e., the profiler would run slower, fur a very
475 # low "value added" feature.)
476 #**************************************************************
477
478 def calibrate(self, m, verbose=0):
479 if self.__class__ is not Profile:
480 raise TypeError("Subclasses must override .calibrate().")
481
482 saved_bias = self.bias
483 self.bias = 0
484 try:
485 return self._calibrate_inner(m, verbose)
486 finally:
487 self.bias = saved_bias
488
489 def _calibrate_inner(self, m, verbose):
490 get_time = self.get_time
491
492 # Set up a test case to be run with and without profiling. Include
493 # lots of calls, because we're trying to quantify stopwatch overhead.
494 # Do not raise any exceptions, though, because we want to know
495 # exactly how many profile events are generated (one call event, +
496 # one return event, per Python-level call).
497
498 def f1(n):
499 for i in range(n):
500 x = 1
501
502 def f(m, f1=f1):
503 for i in range(m):
504 f1(100)
505
506 f(m) # warm up the cache
507
508 # elapsed_noprofile <- time f(m) takes without profiling.
509 t0 = get_time()
510 f(m)
511 t1 = get_time()
512 elapsed_noprofile = t1 - t0
513 if verbose:
514 print("elapsed time without profiling =", elapsed_noprofile)
515
516 # elapsed_profile <- time f(m) takes with profiling. The difference
517 # is profiling overhead, only some of which the profiler subtracts
518 # out on its own.
519 p = Profile()
520 t0 = get_time()
521 p.runctx('f(m)', globals(), locals())
522 t1 = get_time()
523 elapsed_profile = t1 - t0
524 if verbose:
525 print("elapsed time with profiling =", elapsed_profile)
526
527 # reported_time <- "CPU seconds" the profiler charged to f and f1.
528 total_calls = 0.0
529 reported_time = 0.0
530 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
531 p.timings.items():
532 if funcname in ("f", "f1"):
533 total_calls += cc
534 reported_time += tt
535
536 if verbose:
537 print("'CPU seconds' profiler reported =", reported_time)
538 print("total # calls =", total_calls)
539 if total_calls != m + 1:
540 raise ValueError("internal error: total calls = %d" % total_calls)
541
542 # reported_time - elapsed_noprofile = overhead the profiler wasn't
543 # able to measure. Divide by twice the number of calls (since there
544 # are two profiler events per call in this test) to get the hidden
545 # overhead per event.
546 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
547 if verbose:
548 print("mean stopwatch overhead per profile event =", mean)
549 return mean
550
551 #****************************************************************************
552
553 def main():
554 import os
555 from optparse import OptionParser
556
557 usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
558 parser = OptionParser(usage=usage)
559 parser.allow_interspersed_args = False
560 parser.add_option('-o', '--outfile', dest="outfile",
561 help="Save stats to <outfile>", default=None)
562 parser.add_option('-m', dest="module", action="store_true",
563 help="Profile a library module.", default=False)
564 parser.add_option('-s', '--sort', dest="sort",
565 help="Sort order when printing to stdout, based on pstats.Stats class",
566 default=-1)
567
568 if not sys.argv[1:]:
569 parser.print_usage()
570 sys.exit(2)
571
572 (options, args) = parser.parse_args()
573 sys.argv[:] = args
574
575 # The script that we're profiling may chdir, so capture the absolute path
576 # to the output file at startup.
577 if options.outfile is not None:
578 options.outfile = os.path.abspath(options.outfile)
579
580 if len(args) > 0:
581 if options.module:
582 import runpy
583 code = "run_module(modname, run_name='__main__')"
584 globs = {
585 'run_module': runpy.run_module,
586 'modname': args[0]
587 }
588 else:
589 progname = args[0]
590 sys.path.insert(0, os.path.dirname(progname))
591 with io.open_code(progname) as fp:
592 code = compile(fp.read(), progname, 'exec')
593 globs = {
594 '__file__': progname,
595 '__name__': '__main__',
596 '__package__': None,
597 '__cached__': None,
598 }
599 try:
600 runctx(code, globs, None, options.outfile, options.sort)
601 except BrokenPipeError as exc:
602 # Prevent "Exception ignored" during interpreter shutdown.
603 sys.stdout = None
604 sys.exit(exc.errno)
605 else:
606 parser.print_usage()
607 return parser
608
609 # When invoked as main program, invoke the profiler on a script
610 if __name__ == '__main__':
611 main()