Contact: fumanchu@aminus.org

Log in as guest/dejavu to create tickets

root/tags/1.4.0/analysis.py

Revision 116 (checked in by fumanchu, 3 years ago)

New __all__ module attributes.

  • Property svn:eol-style set to native
Line 
1 """Analysis tools for dejavu Units."""
2
3 __all__ = ['COUNT', 'CrossTab', 'SUM', 'sort']
4
5 def sort(attrs, descending=False):
6     """sort(attrs, descending=False) -> create a function for list.sort() for Units."""
7     if isinstance(attrs, (str, unicode)):
8         attrs = (attrs, )
9     def sort_func(x, y):
10         for attr in attrs:
11             xv = getattr(x, attr)
12             if callable(xv):
13                 xv = xv()
14             if xv is None:
15                 diff = -1
16             else:
17                 yv = getattr(y, attr)
18                 if callable(yv):
19                     yv = yv()
20                 if yv is None:
21                     diff = 1
22                 else:
23                     diff = cmp(xv, yv)
24             if descending:
25                 diff = -diff
26             if diff != 0:
27                 return diff
28         return 0
29     return sort_func
30
31
32 def _force_function(attr):
33     """If attr is callable, return it, else wrap it in a function."""
34     if callable(attr):
35         return attr
36    
37     def g(obj):
38         return getattr(obj, attr)
39    
40     return g
41
42
43 def SUM(attribute):
44     """sum(attribute) -> create an aggregate function for use with crosstab().
45     
46     'attribute' can be either the name of an attribute defined for
47     all objects in self.source, or a further callable to which each obj
48     is passed and evaluated.
49     """
50     if callable(attribute):
51         def aggfunc(obj, current_agg_value):
52             a, b = current_agg_value, attribute(obj)
53             if a is None:
54                 return b
55             if b is None:
56                 return a
57             return a + b
58     else:
59         def aggfunc(obj, current_agg_value):
60             a, b = current_agg_value, getattr(obj, attribute)
61             if a is None:
62                 return b
63             if b is None:
64                 return a
65             return a + b
66     return aggfunc
67
68
69 def COUNT(obj, current_agg_value):
70     """count -> an aggregate function for use with crosstab()."""
71     return (current_agg_value or 0) + 1
72
73
74 class CrossTab(list):
75     """Tool to form crosstabs of Unit property values.
76     
77     Example:
78         >>> f = ["a", "b", "cc", "addd", "a4", "6"]
79         >>> group = lambda x: x.isalpha()
80         >>> pivot = lambda x: x.startswith("a")
81         >>> ctab = analysis.CrossTab(f, [group], pivot)
82         >>> data, columns = ctab.results()
83         >>> data
84         {(True,): {False: 2, True: 2}, (False,): {False: 1, True: 1}}
85         >>> columns
86         [False, True]
87     """
88    
89     def __init__(self, source=[], groups=[], pivot=None, aggfunc=COUNT):
90         """CrossTab(source, groups, pivot, aggfunc=count)
91         
92         source: a list of objects.
93         
94         groups: a sequence of attribute names or callables,
95             which will form the rows of the result.
96         
97         pivot: either an attribute name or a callable, which will
98             form the columns of the result.
99         """
100         # Iterate through generator if provided. We do this here rather
101         # than results() because we want to allow multiple calls to
102         # results() without exhausting the generator.
103         self.source = [x for x in source]
104        
105         if not isinstance(groups, (tuple, list)):
106             groups = [groups,]
107         self.groups = groups
108        
109         self.pivot = pivot
110         self.aggfunc = aggfunc
111    
112     def results(self):
113         # Force all groups to functions. We do it here instead of __init__
114         # so consumers can still read self.groups as strings
115         # if that's what they supplied.
116         groups = [_force_function(group) for group in self.groups]
117         pivot = _force_function(self.pivot)
118         aggfunc = self.aggfunc
119        
120         data = {}
121         columns = {}
122         for obj in self.source:
123             key = tuple([group(obj) for group in groups])
124             val = pivot(obj)
125             columns[val] = None
126            
127             row = data.setdefault(key, {})
128             row[val] = aggfunc(obj, row.get(val))
129        
130         columns = columns.keys()
131         columns.sort()
132         return data, columns
133
Note: See TracBrowser for help on using the browser.