Contact: fumanchu@aminus.org

Log in as guest/misc to create tickets

root/stopwatch.py

Revision 185 (checked in by fumanchu, 1 year ago)

Much improved stopwatch.py:

  1. New data model for races resulting in error-free click()
  2. Descent into subframes of autowatch
  3. Autowatch reporting methods, including call depth
  4. Autowatch enable/disable; inherits down to instancemethods
  5. Autowatch.is_tracing property.
Line 
1 import datetime
2 import linecache
3 import os
4 import sys
5 import thread
6 import threading
7
8 __version__ = "0.2"
9
10 delta_zero = datetime.timedelta(0)
11
12 # Scale constants
13 microseconds = usec = 1e6
14 milliseconds = msec = 1e3
15 seconds = 1
16 minutes = 1 / 60.0
17 hours = 1 / 3600.0
18
19
20 def seconds(td):
21     """Return the given timedelta as a floating-point number of seconds."""
22     return (td.days * 86400) + td.seconds + (td.microseconds / 1e6)
23
24 class Race(list):
25     """A list of elapsed times."""
26    
27     start = None
28     end = None
29     clicktime = None
30
31
32 class Stopwatch(object):
33     """A thread-safe code timer.
34     
35     Usage:
36         sw = Stopwatch()
37         sw.click('start')
38         foo
39         sw.click('foo done')
40         bar
41         sw.click('bar done')
42         baz
43         sw.click()
44         print sw.format()
45         sw.reset()
46     
47     Clicks and Races
48     ----------------
49     
50     When you call the "click" method, you store the elapsed time since the
51     previous click (or 0 if this is the first click). When you call the "reset"
52     method, that collection of elapsed times (and any labels) is stored as
53     an individual "race" in self.races.
54     
55     Swimlanes
56     ---------
57     
58     By default, calls to the "click" method are threadsafe; that is,
59     you can use the same Stopwatch instance in multiple threads at the
60     same time. Each thread runs its own race.
61     
62     Sometimes, however, you want to collect clicks across threads,
63     or collect several 'races' of clicks in parallel within a single-
64     threaded application. In these cases, you can supply any hashable
65     value, or callable which produces a hashable value, as the swimlane
66     argument to the constructor, and it will be used to isolate and
67     aggregate clicks for that value. You can also pass individual
68     swimlane arguments to the "click", "reset", and "format" methods.
69     For example, you might be tracking various updates to a database
70     row across several web requests; you can pass the row's PK as the
71     swimlane argument to collect those clicks() together into a single
72     race.
73     
74     Note that clicks are stored in RAM, so all clicks within the same race
75     need to at least occur in the same process, if not the same thread.
76     """
77    
78     races = None
79     """An ordered list of Race objects (lists of (index from self.points, elapsed))."""
80    
81     points = None
82     """A dict of (label, index) pairs."""
83    
84     _current = None
85     swimlane = None
86     format_template = u"ST\u00f6PWATCH start: %(start)s clicks: %(points)s \u03a3: %(total)s"
87    
88     def __init__(self, swimlane=thread.get_ident):
89         self.points = {}
90         self.clear()
91         self.swimlane = swimlane
92    
93     def get_current(self, swimlane=None):
94         """Return the current race, creating a new Race if neceessary.
95         
96         If swimlane is None, self.swimlane is used.
97         """
98         if swimlane is None:
99             swimlane = self.swimlane
100         if callable(swimlane):
101             swimlane = swimlane()
102         try:
103             return self._current[swimlane]
104         except KeyError:
105             race = Race()
106             self._current[swimlane] = race
107             return race
108     current = property(get_current, doc="The current race")
109    
110     def click(self, label=None, swimlane=None):
111         """Record the elapsed time, using a label if given.
112         
113         If this is the first click for the current race,
114         datetime.timedelta(0) is recorded as the elapsed time.
115         
116         If swimlane is None, self.swimlane is used.
117         """
118         now = datetime.datetime.now()
119         race = self.get_current(swimlane)
120        
121         if race.clicktime is None:
122             elapsed = delta_zero
123             race.start = now
124         else:
125             elapsed = now - race.clicktime
126         race.clicktime = now
127        
128         line_count = len(self.points)
129         if label is None:
130             label = u'Point %d' % line_count
131        
132         idx = self.points.setdefault(label, line_count)
133         race.append((idx, elapsed))
134    
135     def get_labels(self):
136         """Return a dict of (idx, label) pairs from self.points."""
137         # self.points is in (label, idx) order for max speed while gathering and
138         # writing data. When reading, we want a reverse lookup, but it's
139         # generally OK to provide that all at once with a big calculation:
140         return dict((idx, label) for (label, idx) in self.points.iteritems())
141    
142     def reset(self, swimlane=None, record=True):
143         """Stop and return the current race. If 'record', store it in self.races.
144         
145         If swimlane is None, self.swimlane is used.
146         """
147         if swimlane is None:
148             swimlane = self.swimlane
149         if callable(swimlane):
150             swimlane = swimlane()
151        
152         current = self._current.pop(swimlane, Race())
153         # Use the last clicktime for the end time.
154         current.end = current.clicktime
155        
156         if record:
157             self.races.append(current)
158        
159         return current
160    
161     def clear(self):
162         """Remove all current and recorded races."""
163         self.races = []
164         self._current = {}
165    
166     def format(self, race=None, swimlane=None):
167         """Return the current race in a format suitable for logging.
168         
169         If race is None, the current race (defined either by the given swimlane
170         or self.swimlane) will be used.
171         """
172         if race is None:
173             race = self.get_current(swimlane)
174        
175         labels = self.get_labels()
176         p = {'points': u"".join([u"[%s=%s]" % (labels[idx], td)
177                                  for idx, td in race]),
178              'start': race.start,
179              'total': sum([td for idx, td in race], delta_zero),
180              }
181         return self.format_template % p
182    
183     def _filter_minima(self, minimum):
184         """Return {idx: bool(c.peak/total >= minimum)} for c in self.points."""
185         mask = dict((idx, False) for idx in self.points.itervalues())
186         for race in self.races:
187             tot = sum([td for idx, td in race], delta_zero)
188             if tot:
189                 tot = seconds(tot)
190                 for idx, td in race:
191                     if mask[idx]:
192                         # This index has already proven it meets the minimum.
193                         continue
194                     if (seconds(td) / tot) >= minimum:
195                         mask[idx] = True
196                         if False not in mask.itervalues():
197                             return mask
198         return mask
199    
200     def csv_gen(self, scale=1, minimum=None):
201         """Yield CSV lines for all races recorded by this stopwatch.
202         
203         All numbers yielded will be multiplied by the 'scale' argument, which
204         defaults to 1. Use stopwatch.usec (1e6), for example, to output
205         microseconds instead of seconds with fractions.
206         
207         If 'minimum' is provided, it must be a fraction between 0.0 and 1.0,
208         and will be used to filter out lines which have no peak times
209         above the minimum percentage given of the total time for each race.
210         For example, if minimum is 0.1, then for each column, at least one
211         race must have that column take 10% of the total time for that race;
212         if no such race exists, the column will not be included in the output.
213         However, if any lines are filtered out in this way, an "Extra"
214         column is included with the sum of all removed lines for each race.
215         This allows you to reduce output width but still make proper percentage
216         calculations wih the displayed lines.
217         """
218         if minimum:
219             minmask = self._filter_minima(minimum)
220         else:
221             minmask = {}
222        
223         if False not in minmask.itervalues():
224             # Fast path:
225             labels = self.get_labels()
226             order = self.points.values()
227             order.sort()
228            
229             header = []
230             for idx in order:
231                 label = labels[idx]
232                 if not isinstance(label, basestring):
233                     label = repr(label)
234                 header.append('"%s"' % label.replace('"', '""'))
235             yield u",".join(header) + os.linesep
236            
237             for race in self.races:
238                 cells = [""] * len(order)
239                 for idx, td in race:
240                     cells[order.index(idx)] = "%f" % (seconds(td) * scale)
241                 yield u",".join(cells) + os.linesep
242         else:
243             labels = self.get_labels()
244             order = [idx for idx in minmask if minmask[idx]]
245             order.sort()
246            
247             header = []
248             for idx in order:
249                 label = labels[idx]
250                 if not isinstance(label, basestring):
251                     label = repr(label)
252                 header.append('"%s"' % label.replace('"', '""'))
253             header.append(u'"Extra"')
254             yield u",".join(header) + os.linesep
255            
256             for race in self.races:
257                 cells = [""] * len(order)
258                 extra = 0
259                 for idx, td in race:
260                     td = (seconds(td) * scale)
261                     if minmask[idx]:
262                         cells[order.index(idx)] = "%f" % td
263                     else:
264                         extra += td
265                 cells.append("%f" % extra)
266                 yield u",".join(cells) + os.linesep
267    
268     def write_csv(self, file, mode="ab+", scale=1, minimum=None):
269         """Write CSV lines for all races recorded by this stopwatch to a file.
270         
271         All numbers yielded will be multiplied by the 'scale' argument, which
272         defaults to 1. Use stopwatch.usec (1e6), for example, to output
273         microseconds instead of seconds with fractions.
274         """
275         # TODO: handle file objects, not just filenames
276         open(file, mode).writelines(self.csv_gen(scale=scale, minimum=minimum))
277    
278     def totals(self):
279         """Return the total of all races (as a race itself).
280         
281         For example, given the races:
282         
283             Race([(0, td("00:00")), (1, td("00:02")), (2, td("00:04"))])
284             Race([(0, td("00:00")), (1, td("00:04")), (2, td("00:08"))])
285             Race([(0, td("00:00")), (1, td("00:06")), (2, td("00:18"))])
286         
287         This will return:
288         
289             Race([(0, td("00:00")), (1, td("00:12")), (2, td("00:30"))])
290         """
291         t = dict((idx, delta_zero) for idx in self.points.itervalues())
292         for race in self.races:
293             for idx, elapsed in race:
294                 if elapsed:
295                     t[idx] += elapsed
296         totalrace = Race(sorted(t.items()))
297         if self.races:
298             totalrace.start = self.races[0].start
299             totalrace.end = self.races[-1].end
300         return totalrace
301    
302     def average(self):
303         """Return the average of all races (as a race itself).
304         
305         For example, given the races:
306         
307             Race([(0, td("00:00")), (1, td("00:02")), (2, td("00:04"))])
308             Race([(0, td("00:00")), (1, td("00:04")), (2, td("00:08"))])
309             Race([(0, td("00:00")), (1, td("00:06")), (2, td("00:18"))])
310         
311         This will return:
312         
313             Race([(0, td("00:00")), (1, td("00:04")), (2, td("00:10"))])
314         """
315         t = self.totals()
316        
317         numraces = len(self.races)
318         avgrace = Race([(idx, (td / numraces)) for idx, td in t])
319         if self.races:
320             avgrace.start = self.races[0].start
321             avgrace.end = self.races[-1].end
322         return avgrace
323
324
325 class _GlobalHookLocalStorage(threading.local):
326    
327     def __init__(self):
328         self.settrace_depth = 0
329         self.swimlane = 0
330
331
332 class _GlobalTraceHook(list):
333    
334     local = _GlobalHookLocalStorage()
335    
336     def append(self, autowatch):
337         # Stick the autowatch on the list of calls to watch.
338         list.append(self, autowatch)
339        
340         # Do some simple refcounting so we only hook in once
341         # even if we encounter multiple autowatches.
342         if self.local.settrace_depth == 0:
343             # Because we're using a decorator, this call is always in the same
344             # thread as the lines in our target function. So threading.settrace
345             # is never needed; just call sys.settrace!
346             sys.settrace(self.hook)
347            
348             # Make a new swimlane, which means a new race for our watch.
349             self.local.swimlane = max(autowatch.watch._current.keys() + [0])
350        
351         self.local.settrace_depth += 1
352    
353     def remove(self, autowatch):
354         self.local.settrace_depth -= 1
355         if self.local.settrace_depth == 0:
356             sys.settrace(None)
357             autowatch.watch.reset(swimlane=self.local.swimlane)
358         list.remove(self, autowatch)
359    
360     def hook(self, frame, event, arg):
361         if event == 'call':
362             for autowatch in self[:]:
363                 curframe = frame
364                 for hop in xrange(autowatch.depth):
365                     if curframe.f_code is autowatch.target.func_code:
366                         if (# For non-methods, always allow:
367                             autowatch._instance is None or
368                             # But for methods, we only hook in if the autowatch
369                             # 'self' arg matches the current frame:
370                             autowatch._instance in curframe.f_locals.values()):
371                             return _LocalTraceHook(autowatch, self.local.swimlane, hop)
372                         break
373                    
374                     curframe = curframe.f_back
375                     if curframe is None:
376                         break
377         return None
378 _globaltracehook = _GlobalTraceHook()
379
380
381 class _LocalTraceHook(object):
382    
383     lastline = None
384    
385     def __init__(self, autowatch, swimlane, depth):
386         self.autowatch = autowatch
387         self.swimlane = swimlane
388         self.depth = depth
389    
390     def __call__(self, frame, event, arg):
391         if event in ('line', 'return', 'exception'):
392             filename = frame.f_globals["__file__"]
393             if (filename.endswith(".pyc") or
394                 filename.endswith(".pyo")):
395                 filename = filename[:-1]
396            
397             if self.lastline is None:
398                 # Use the line just above the current one
399                 self.lastline = (filename, frame.f_lineno - 1, self.depth)
400            
401             self.autowatch.watch.click(self.lastline, swimlane=self.swimlane)
402        
403         if event == 'line':
404             # Prep the label for the *next* click
405             self.lastline = (filename, frame.f_lineno, self.depth)
406        
407         return self
408
409
410 class Autowatch(object):
411     """A wrapper to automatically watch significant lines of a single function.
412     
413     Usage:
414     
415         # Wrap the target func
416         obj.func = Autowatch(obj.func, )
417         # Wait for some timing data to accumulate
418         time.passes()
419         # Unwrap our wrapper
420         obj.func = obj.func.target
421     """
422    
423     def __init__(self, target, watch=None, enabled=True, depth=1,
424                  _instance=None, on_finish=None):
425         self.target = target
426         self.watch = watch or Stopwatch()
427         self.enabled = enabled
428         self.depth = depth
429         self.swimlane = id(self)
430         while self.swimlane in self.watch._current:
431             self.swimlane += 1
432         self._instance = _instance
433         self.on_finish = on_finish
434    
435     def trace(self):
436         _globaltracehook.append(self)
437    
438     def untrace(self):
439         _globaltracehook.remove(self)
440    
441     @property
442     def is_tracing(self):
443         return self in _globaltracehook
444    
445     def __call__(self, *args, **kwargs):
446         if not self.enabled:
447             if self._instance is None:
448                 return self.target(*args, **kwargs)
449             else:
450                 return self.target(self._instance, *args, **kwargs)
451        
452         try:
453             self.trace()
454             if self._instance is None:
455                 result = self.target(*args, **kwargs)
456             else:
457                 result = self.target(self._instance, *args, **kwargs)
458         finally:
459             self.untrace()
460        
461         if self.on_finish:
462             self.on_finish(self)
463        
464         return result
465    
466     def __get__(self, instance, owner):
467         if instance is None:
468             return self
469        
470         # Make a new Autowatch instance each time a method gets an
471         # instancemethod proxy, which is pretty much every time it's referenced.
472         return self.__class__(
473             target=self.target, watch=self.watch, enabled=self.enabled,
474             depth=self.depth, _instance=instance, on_finish=self.on_finish)
475    
476     def get_trace(self, race):
477         """Yield (filename, lineno, depth, linetext, elapsed td) for each race."""
478         labels = self.watch.get_labels()
479         for idx, td in race:
480             filename, lineno, depth = labels[idx]
481             yield (filename, lineno, depth,
482                    linecache.getline(filename, lineno).rstrip()[:80], td)
483    
484     def report(self, race):
485         """Yield (filename, lineno, depth, linetext, elapsed td) for each race.
486         
487         The last line yielded will be ("", 0, -1, "", total time).
488         """
489         subtotals = []
490         for filename, lineno, depth, text, td in self.get_trace(race):
491             if depth > len(subtotals) - 1:
492                 subtotals.append(delta_zero)
493             elif depth < len(subtotals) - 1:
494                 td = subtotals.pop()
495             else:
496                 subtotals = [t + td for t in subtotals]
497            
498             yield filename, lineno, depth, text, td
499        
500         if subtotals:
501             yield "", 0, -1, "", subtotals[0]
502         else:
503             yield "", 0, -1, "", delta_zero
504    
505     def format(self, race, filedepth=2, percent_to_mark=10):
506         """Yield a list of formatted lines reporting on the given race."""
507         tds = [(line[-1], i) for i, line in enumerate(self.report(race))]
508         tds.sort()
509         tds = tds[-int(len(tds) * (percent_to_mark / 100.0)):]
510         top_i = [i for td, i in tds]
511         del tds
512        
513         for i, line in enumerate(self.report(race)):
514             filename, lineno, depth, text, td = line
515             yield ('%s%s<%s:%d>%s (%s)' %
516                    ('*' if i in top_i else ' ', '-' * (depth * 4),
517                     '/'.join(filename.split('/')[-filedepth:]),
518                     lineno, text, td))
519
520
521 def autowatch(watch=None, enabled=True, depth=1):
522     """A decorator to automatically watch significant lines of a single function.
523     
524     Usage:
525     
526         @stopwatch.autowatch()
527         def f(a, b, c):
528             ...
529         
530         [f(1, 2, 3) for i in range(100)]
531         f.watch.write_csv('f.csv', scale=stopwatch.usec, minimum=0.1)
532     """
533     def autowatch_wrapper(f):
534         return Autowatch(target=f, watch=watch, enabled=enabled, depth=depth)
535     return autowatch_wrapper
536
Note: See TracBrowser for help on using the browser.