| 1 |
"""Analysis tools for dejavu Units.""" |
|---|
| 2 |
|
|---|
| 3 |
__all__ = ['COUNT', 'CrossTab', 'SUM', 'sort'] |
|---|
| 4 |
|
|---|
| 5 |
def sort(attrs): |
|---|
| 6 |
"""Return a 'cmp' function for list.sort() for Units from attrs. |
|---|
| 7 |
|
|---|
| 8 |
Each item in the attrs sequence should be a str, the name of an |
|---|
| 9 |
attribute on the target Units. Optionally, each item may end with |
|---|
| 10 |
" ASC" or " DESC" to indicate direction. |
|---|
| 11 |
""" |
|---|
| 12 |
if isinstance(attrs, basestring): |
|---|
| 13 |
attrs = [attrs] |
|---|
| 14 |
attrs = [(attr.split(" ", 1)[0], attr.endswith(" DESC")) |
|---|
| 15 |
for attr in attrs] |
|---|
| 16 |
|
|---|
| 17 |
def sort_func(x, y): |
|---|
| 18 |
for attr, descending in attrs: |
|---|
| 19 |
xv = getattr(x, attr) |
|---|
| 20 |
if callable(xv): |
|---|
| 21 |
xv = xv() |
|---|
| 22 |
if xv is None: |
|---|
| 23 |
diff = -1 |
|---|
| 24 |
else: |
|---|
| 25 |
yv = getattr(y, attr) |
|---|
| 26 |
if callable(yv): |
|---|
| 27 |
yv = yv() |
|---|
| 28 |
if yv is None: |
|---|
| 29 |
diff = 1 |
|---|
| 30 |
else: |
|---|
| 31 |
diff = cmp(xv, yv) |
|---|
| 32 |
if descending: |
|---|
| 33 |
diff = -diff |
|---|
| 34 |
if diff != 0: |
|---|
| 35 |
return diff |
|---|
| 36 |
return 0 |
|---|
| 37 |
return sort_func |
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
def _force_function(attr): |
|---|
| 41 |
"""If attr is callable, return it, else wrap it in a function.""" |
|---|
| 42 |
if callable(attr): |
|---|
| 43 |
return attr |
|---|
| 44 |
|
|---|
| 45 |
def g(obj): |
|---|
| 46 |
return getattr(obj, attr) |
|---|
| 47 |
|
|---|
| 48 |
return g |
|---|
| 49 |
|
|---|
| 50 |
|
|---|
| 51 |
def SUM(attribute): |
|---|
| 52 |
"""sum(attribute) -> create an aggregate function for use with crosstab(). |
|---|
| 53 |
|
|---|
| 54 |
'attribute' can be either the name of an attribute defined for |
|---|
| 55 |
all objects in self.source, or a further callable to which each obj |
|---|
| 56 |
is passed and evaluated. |
|---|
| 57 |
""" |
|---|
| 58 |
if callable(attribute): |
|---|
| 59 |
def aggfunc(obj, current_agg_value): |
|---|
| 60 |
a, b = current_agg_value, attribute(obj) |
|---|
| 61 |
if a is None: |
|---|
| 62 |
return b |
|---|
| 63 |
if b is None: |
|---|
| 64 |
return a |
|---|
| 65 |
return a + b |
|---|
| 66 |
else: |
|---|
| 67 |
def aggfunc(obj, current_agg_value): |
|---|
| 68 |
a, b = current_agg_value, getattr(obj, attribute) |
|---|
| 69 |
if a is None: |
|---|
| 70 |
return b |
|---|
| 71 |
if b is None: |
|---|
| 72 |
return a |
|---|
| 73 |
return a + b |
|---|
| 74 |
return aggfunc |
|---|
| 75 |
|
|---|
| 76 |
|
|---|
| 77 |
def COUNT(obj, current_agg_value): |
|---|
| 78 |
"""count -> an aggregate function for use with crosstab().""" |
|---|
| 79 |
return (current_agg_value or 0) + 1 |
|---|
| 80 |
|
|---|
| 81 |
|
|---|
| 82 |
class CrossTab(list): |
|---|
| 83 |
"""Tool to form crosstabs of Unit property values. |
|---|
| 84 |
|
|---|
| 85 |
Example: |
|---|
| 86 |
>>> f = ["a", "b", "cc", "addd", "a4", "6"] |
|---|
| 87 |
>>> group = lambda x: x.isalpha() |
|---|
| 88 |
>>> pivot = lambda x: x.startswith("a") |
|---|
| 89 |
>>> ctab = analysis.CrossTab(f, [group], pivot) |
|---|
| 90 |
>>> rows, columns = ctab.results() |
|---|
| 91 |
>>> rows |
|---|
| 92 |
{(True,): {False: 2, True: 2}, (False,): {False: 1, True: 1}} |
|---|
| 93 |
>>> columns |
|---|
| 94 |
[False, True] |
|---|
| 95 |
""" |
|---|
| 96 |
|
|---|
| 97 |
def __init__(self, source=[], groups=[], pivot=None, aggfunc=COUNT): |
|---|
| 98 |
"""CrossTab(source, groups, pivot, aggfunc=count) |
|---|
| 99 |
|
|---|
| 100 |
source: a list of objects. |
|---|
| 101 |
|
|---|
| 102 |
groups: a sequence of attribute names or callables, |
|---|
| 103 |
which will form the rows of the result. |
|---|
| 104 |
|
|---|
| 105 |
pivot: either an attribute name or a callable, which will |
|---|
| 106 |
form the columns of the result. |
|---|
| 107 |
""" |
|---|
| 108 |
|
|---|
| 109 |
|
|---|
| 110 |
|
|---|
| 111 |
self.source = [x for x in source] |
|---|
| 112 |
|
|---|
| 113 |
if not isinstance(groups, (tuple, list)): |
|---|
| 114 |
groups = [groups,] |
|---|
| 115 |
self.groups = groups |
|---|
| 116 |
|
|---|
| 117 |
self.pivot = pivot |
|---|
| 118 |
self.aggfunc = aggfunc |
|---|
| 119 |
|
|---|
| 120 |
def results(self): |
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 |
|
|---|
| 124 |
groups = [_force_function(group) for group in self.groups] |
|---|
| 125 |
pivot = _force_function(self.pivot) |
|---|
| 126 |
aggfunc = self.aggfunc |
|---|
| 127 |
|
|---|
| 128 |
rows = {} |
|---|
| 129 |
column_keys = {} |
|---|
| 130 |
for obj in self.source: |
|---|
| 131 |
key = tuple([group(obj) for group in groups]) |
|---|
| 132 |
col_key = pivot(obj) |
|---|
| 133 |
column_keys[col_key] = None |
|---|
| 134 |
|
|---|
| 135 |
row = rows.setdefault(key, {}) |
|---|
| 136 |
row[col_key] = aggfunc(obj, row.get(col_key)) |
|---|
| 137 |
|
|---|
| 138 |
column_keys = column_keys.keys() |
|---|
| 139 |
column_keys.sort() |
|---|
| 140 |
return rows, column_keys |
|---|
| 141 |
|
|---|