Contact: fumanchu@aminus.org

Log in as guest/dejavu to create tickets

root/branches/crazycache/dejavu/analysis.py

Revision 479 (checked in by fumanchu, 1 year ago)

Moved all content down into the new dejavu top-level folder.

  • Property svn:eol-style set to native
Line 
1 """Analysis tools for dejavu Units."""
2
3 __all__ = ['COUNT', 'CrossTab', 'SUM', 'sort']
4
5 def sort(attrs):
6     """Return a 'cmp' function for list.sort() for Units from attrs.
7     
8     Each item in the attrs sequence should be a str, the name of an
9     attribute on the target Units. Optionally, each item may end with
10     " ASC" or " DESC" to indicate direction.
11     """
12     if isinstance(attrs, basestring):
13         attrs = [attrs]
14     attrs = [(attr.split(" ", 1)[0], attr.endswith(" DESC"))
15              for attr in attrs]
16    
17     def sort_func(x, y):
18         for attr, descending in attrs:
19             xv = getattr(x, attr)
20             if callable(xv):
21                 xv = xv()
22             if xv is None:
23                 diff = -1
24             else:
25                 yv = getattr(y, attr)
26                 if callable(yv):
27                     yv = yv()
28                 if yv is None:
29                     diff = 1
30                 else:
31                     diff = cmp(xv, yv)
32             if descending:
33                 diff = -diff
34             if diff != 0:
35                 return diff
36         return 0
37     return sort_func
38
39
40 def _force_function(attr):
41     """If attr is callable, return it, else wrap it in a function."""
42     if callable(attr):
43         return attr
44    
45     def g(obj):
46         return getattr(obj, attr)
47    
48     return g
49
50
51 def SUM(attribute):
52     """sum(attribute) -> create an aggregate function for use with crosstab().
53     
54     'attribute' can be either the name of an attribute defined for
55     all objects in self.source, or a further callable to which each obj
56     is passed and evaluated.
57     """
58     if callable(attribute):
59         def aggfunc(obj, current_agg_value):
60             a, b = current_agg_value, attribute(obj)
61             if a is None:
62                 return b
63             if b is None:
64                 return a
65             return a + b
66     else:
67         def aggfunc(obj, current_agg_value):
68             a, b = current_agg_value, getattr(obj, attribute)
69             if a is None:
70                 return b
71             if b is None:
72                 return a
73             return a + b
74     return aggfunc
75
76
77 def COUNT(obj, current_agg_value):
78     """count -> an aggregate function for use with crosstab()."""
79     return (current_agg_value or 0) + 1
80
81
82 class CrossTab(list):
83     """Tool to form crosstabs of Unit property values.
84     
85     Example:
86         >>> f = ["a", "b", "cc", "addd", "a4", "6"]
87         >>> group = lambda x: x.isalpha()
88         >>> pivot = lambda x: x.startswith("a")
89         >>> ctab = analysis.CrossTab(f, [group], pivot)
90         >>> rows, columns = ctab.results()
91         >>> rows
92         {(True,): {False: 2, True: 2}, (False,): {False: 1, True: 1}}
93         >>> columns
94         [False, True]
95     """
96    
97     def __init__(self, source=[], groups=[], pivot=None, aggfunc=COUNT):
98         """CrossTab(source, groups, pivot, aggfunc=count)
99         
100         source: a list of objects.
101         
102         groups: a sequence of attribute names or callables,
103             which will form the rows of the result.
104         
105         pivot: either an attribute name or a callable, which will
106             form the columns of the result.
107         """
108         # Iterate through generator if provided. We do this here rather
109         # than results() because we want to allow multiple calls to
110         # results() without exhausting the generator.
111         self.source = [x for x in source]
112        
113         if not isinstance(groups, (tuple, list)):
114             groups = [groups,]
115         self.groups = groups
116        
117         self.pivot = pivot
118         self.aggfunc = aggfunc
119    
120     def results(self):
121         # Force all groups to functions. We do it here instead of __init__
122         # so consumers can still read self.groups as strings
123         # if that's what they supplied.
124         groups = [_force_function(group) for group in self.groups]
125         pivot = _force_function(self.pivot)
126         aggfunc = self.aggfunc
127        
128         rows = {}
129         column_keys = {}
130         for obj in self.source:
131             key = tuple([group(obj) for group in groups])
132             col_key = pivot(obj)
133             column_keys[col_key] = None
134            
135             row = rows.setdefault(key, {})
136             row[col_key] = aggfunc(obj, row.get(col_key))
137        
138         column_keys = column_keys.keys()
139         column_keys.sort()
140         return rows, column_keys
141
Note: See TracBrowser for help on using the browser.