| 1 |
"""Analysis tools for dejavu Units.""" |
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
def sort(attrs, descending=False): |
|---|
| 5 |
"""sort(attrs, descending=False) -> create a function for list.sort() for Units.""" |
|---|
| 6 |
if isinstance(attrs, (str, unicode)): |
|---|
| 7 |
attrs = (attrs, ) |
|---|
| 8 |
def sort_func(x, y): |
|---|
| 9 |
for attr in attrs: |
|---|
| 10 |
xv = getattr(x, attr) |
|---|
| 11 |
if callable(xv): |
|---|
| 12 |
xv = xv() |
|---|
| 13 |
if xv is None: |
|---|
| 14 |
diff = -1 |
|---|
| 15 |
else: |
|---|
| 16 |
yv = getattr(y, attr) |
|---|
| 17 |
if callable(yv): |
|---|
| 18 |
yv = yv() |
|---|
| 19 |
if yv is None: |
|---|
| 20 |
diff = 1 |
|---|
| 21 |
else: |
|---|
| 22 |
diff = cmp(xv, yv) |
|---|
| 23 |
if descending: |
|---|
| 24 |
diff = -diff |
|---|
| 25 |
if diff != 0: |
|---|
| 26 |
return diff |
|---|
| 27 |
return 0 |
|---|
| 28 |
return sort_func |
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
def _force_function(attr): |
|---|
| 32 |
"""If attr is callable, return it, else wrap it in a function.""" |
|---|
| 33 |
if callable(attr): |
|---|
| 34 |
return attr |
|---|
| 35 |
|
|---|
| 36 |
def g(obj): |
|---|
| 37 |
return getattr(obj, attr) |
|---|
| 38 |
|
|---|
| 39 |
return g |
|---|
| 40 |
|
|---|
| 41 |
|
|---|
| 42 |
def SUM(attribute): |
|---|
| 43 |
"""sum(attribute) -> create an aggregate function for use with crosstab(). |
|---|
| 44 |
|
|---|
| 45 |
'attribute' can be either the name of an attribute defined for |
|---|
| 46 |
all objects in self.source, or a further callable to which each obj |
|---|
| 47 |
is passed and evaluated. |
|---|
| 48 |
""" |
|---|
| 49 |
if callable(attribute): |
|---|
| 50 |
def aggfunc(obj, current_agg_value): |
|---|
| 51 |
a, b = current_agg_value, attribute(obj) |
|---|
| 52 |
if a is None: |
|---|
| 53 |
return b |
|---|
| 54 |
if b is None: |
|---|
| 55 |
return a |
|---|
| 56 |
return a + b |
|---|
| 57 |
else: |
|---|
| 58 |
def aggfunc(obj, current_agg_value): |
|---|
| 59 |
a, b = current_agg_value, getattr(obj, attribute) |
|---|
| 60 |
if a is None: |
|---|
| 61 |
return b |
|---|
| 62 |
if b is None: |
|---|
| 63 |
return a |
|---|
| 64 |
return a + b |
|---|
| 65 |
return aggfunc |
|---|
| 66 |
|
|---|
| 67 |
|
|---|
| 68 |
def COUNT(obj, current_agg_value): |
|---|
| 69 |
"""count -> an aggregate function for use with crosstab().""" |
|---|
| 70 |
return (current_agg_value or 0) + 1 |
|---|
| 71 |
|
|---|
| 72 |
|
|---|
| 73 |
class CrossTab(list): |
|---|
| 74 |
"""Tool to form crosstabs of Unit property values. |
|---|
| 75 |
|
|---|
| 76 |
Example: |
|---|
| 77 |
>>> f = ["a", "b", "cc", "addd", "a4", "6"] |
|---|
| 78 |
>>> group = lambda x: x.isalpha() |
|---|
| 79 |
>>> pivot = lambda x: x.startswith("a") |
|---|
| 80 |
>>> ctab = analysis.CrossTab(f, [group], pivot) |
|---|
| 81 |
>>> data, columns = ctab.results() |
|---|
| 82 |
>>> data |
|---|
| 83 |
{(True,): {False: 2, True: 2}, (False,): {False: 1, True: 1}} |
|---|
| 84 |
>>> columns |
|---|
| 85 |
[False, True] |
|---|
| 86 |
""" |
|---|
| 87 |
|
|---|
| 88 |
def __init__(self, source=[], groups=[], pivot=None, aggfunc=COUNT): |
|---|
| 89 |
"""CrossTab(source, groups, pivot, aggfunc=count) |
|---|
| 90 |
|
|---|
| 91 |
source: a list of objects. |
|---|
| 92 |
|
|---|
| 93 |
groups: a sequence of attribute names or callables, |
|---|
| 94 |
which will form the rows of the result. |
|---|
| 95 |
|
|---|
| 96 |
pivot: either an attribute name or a callable, which will |
|---|
| 97 |
form the columns of the result. |
|---|
| 98 |
""" |
|---|
| 99 |
|
|---|
| 100 |
|
|---|
| 101 |
|
|---|
| 102 |
self.source = [x for x in source] |
|---|
| 103 |
|
|---|
| 104 |
if not isinstance(groups, (tuple, list)): |
|---|
| 105 |
groups = [groups,] |
|---|
| 106 |
self.groups = groups |
|---|
| 107 |
|
|---|
| 108 |
self.pivot = pivot |
|---|
| 109 |
self.aggfunc = aggfunc |
|---|
| 110 |
|
|---|
| 111 |
def results(self): |
|---|
| 112 |
|
|---|
| 113 |
|
|---|
| 114 |
|
|---|
| 115 |
groups = [_force_function(group) for group in self.groups] |
|---|
| 116 |
pivot = _force_function(self.pivot) |
|---|
| 117 |
aggfunc = self.aggfunc |
|---|
| 118 |
|
|---|
| 119 |
data = {} |
|---|
| 120 |
columns = {} |
|---|
| 121 |
for obj in self.source: |
|---|
| 122 |
key = tuple([group(obj) for group in groups]) |
|---|
| 123 |
val = pivot(obj) |
|---|
| 124 |
columns[val] = None |
|---|
| 125 |
|
|---|
| 126 |
row = data.setdefault(key, {}) |
|---|
| 127 |
row[val] = aggfunc(obj, row.get(val)) |
|---|
| 128 |
|
|---|
| 129 |
columns = columns.keys() |
|---|
| 130 |
columns.sort() |
|---|
| 131 |
return data, columns |
|---|
| 132 |
|
|---|