Contact: fumanchu@aminus.org

Log in as guest/geniusql to create tickets

root/trunk/geniusql/genexp.py

Revision 159 (checked in by fumanchu, 3 years ago)

Work on generator expressions, including a new JoinIterator? class (since the outmost-iterator in f_locals must be an iterator).

  • Property svn:eol-style set to native
Line 
1 from geniusql.astwalk import *
2
3
4 abs_sentinel = object()
5
6
7 class GenexpParser(codewalk.Visitor):
8     """Produce an AST from a supplied generator expression.
9     
10     reduce: if True (the default), free variables are turned into Consts
11         wherever possible.
12     
13     Example: k = lambda x: x.Date == datetime.date(2004, 1, 1)
14              q = LambdaParser(k, reduce=False).ast()
15              r = LambdaParser(k, reduce=True).ast()
16     
17     _____ q _____
18     Lambda(['x'], [], 0,
19       Compare(Getattr(Name('x'), 'Date'),
20               [('==', CallFunc(Getattr(Name('datetime'), 'date'),
21                                [Const(2004), Const(1), Const(1)],
22                                None, None))
23               ]))
24     
25     _____ r _____
26     Lambda(['x'], [], 0,
27       Compare(Getattr(Name('x'), 'Date'),
28               [('==', Const(datetime.date(2004, 1, 1)))])
29           )
30     
31     reduce also pre-computes binary operations *and all other builtin or
32     free functions* where all operands are constants, globals, or freevars.
33     For example:
34         Mul((3, 4))
35         
36     is replaced with:
37         Const(12)
38     
39     However, order is important. lambda x: x * 4 * 5 won't see any
40     optimization, because the order of eval is (x * 4) * 5. Rewritten
41     as lambda x: 4 * 5 * x, the "4 * 5" can be replaced with "20".
42     
43     irreducible: a list of constants (globals, freevars, or attributes)
44         which should not be reduced. For example, datetime.date.today
45         would usually be called and the result stored in the AST (since
46         it takes no arguments, and therefore has no free variables).
47         If you want the today function to be stored directly in the AST
48         (so it can be called later), include it in this "irreducible" list.
49         This does not control the conversion of globals and free variables
50         into constants--that happens regardless. It only controls the
51         reduction of complex expressions into simpler ones.
52     
53     env: a dict of objects which will be used to make Consts out of
54         globals and builtins. This is auto-populated with items in
55         __builtin__ and any globals which were present at the time
56         the function was created, so you usually don't have to add
57         anything. However, it can be a handy way for frameworks to
58         provide globals without forcing every caller to import them.
59     """
60    
61     # ([t1.a, t1.b + t2.a] for t1, t2 in relation if t1.a > 13)
62     ##              0 SETUP_LOOP              62 (to 65)
63     ##              3 LOAD_FAST                0 '[outmost-iterable]'
64     ##              6 FOR_ITER                55 (to 64)
65     # Unpack "for t1, t2 in relation"
66     ##              9 UNPACK_SEQUENCE          2
67     ##             12 STORE_FAST               2 (t1)
68     ##             15 STORE_FAST               1 (t2)
69     # The restriction (e.g. "if t1.a > 13")
70     ##             18 LOAD_FAST                2 (t1)
71     ##             21 LOAD_ATTR                3 (a)
72     ##             24 LOAD_CONST               0 (13)
73     ##             27 COMPARE_OP               4 (>)
74     ##             30 JUMP_IF_FALSE           27 (to 60)
75     ##             33 POP_TOP             
76     # The attributes (the yielded list/tuple)
77     ##             34 LOAD_FAST                2 (t1)
78     ##             37 LOAD_ATTR                3 (a)
79     ##             40 LOAD_FAST                2 (t1)
80     ##             43 LOAD_ATTR                4 (b)
81     ##             46 LOAD_FAST                1 (t2)
82     ##             49 LOAD_ATTR                3 (a)
83     ##             52 BINARY_ADD         
84     ##             53 BUILD_LIST               2
85     ##             56 YIELD_VALUE
86     ##             57 JUMP_ABSOLUTE            6
87     ##             60 POP_TOP
88     # Cruft at the end
89     ##             61 JUMP_ABSOLUTE            6
90     ##             64 POP_BLOCK
91     ##             65 LOAD_CONST               1
92     ##             68 RETURN_VALUE
93    
94     def __init__(self, genexp, env=None, reduce=True, irreducible=None):
95         codewalk.Visitor.__init__(self, genexp)
96        
97         self.reduce = reduce
98        
99         if env is None:
100             self.env = {}
101         else:
102             self.env = env.copy()
103         import __builtin__
104         self.env.update(vars(__builtin__))
105         self.env.update(genexp.gi_frame.f_globals)
106        
107         self.relation = genexp.gi_frame.f_locals['[outmost-iterable]']
108        
109         self.ast = AST()
110         fc = genexp.gi_frame.f_code
111         self.ast.args = list(fc.co_varnames)
112         if fc.co_flags & codewalk.CO_VARKEYWORDS:
113             self.ast.dstar_args = self.ast.args.pop()
114         if fc.co_flags & codewalk.CO_VARARGS:
115             self.ast.star_args = self.ast.args.pop()
116        
117         if irreducible is None:
118             irreducible = []
119         self.irreducible = irreducible
120    
121     def walk(self):
122         """Walk self and set self.ast.root."""
123         self.stack = []
124         self.targets = {}
125         self.stage = 1
126        
127         codewalk.Visitor.walk(self)
128        
129         if self.verbose:
130             self.debug("stack:", self.stack)
131    
132     def _may_reduce(self, *terms):
133         """Return True if all terms are ast.Const and not marked irreducible."""
134         for term in terms:
135             if not isinstance(term, ast.Const):
136                 return False
137             if term.value in self.irreducible:
138                 return False
139             if getattr(term.value, "irreducible", False):
140                 return False
141         return True
142    
143     def visit_instruction(self, op, lo=None, hi=None):
144         # Get the instruction pointer for the current instruction.
145         ip = self.cursor - 3
146         if hi is None:
147             ip += 1
148             if lo is None:
149                 ip += 1
150        
151         # This is where we do folding of logical AND and OR operators.
152         # The Python code just writes "a AND b", but the VM (bytecode)
153         # acts more like assembly, using conditional JUMP instructions to
154         # implement logical operators. The map stored in self.targets is
155         # of the form:
156         #     {JUMP target: [(self.stack[-1], ast.And), ...]}
157         # where "JUMP target" is the instruction number of the bytecode
158         # which is the target of the JUMP, and each item in the value list
159         # is a tuple of (top of the calling stack, operation).
160         # It's a list because a single bytecode may be the target of
161         # multiple JUMP instructions.
162         # See visit_JUMP_IF_FALSE / TRUE.
163         terms = self.targets.get(ip)
164         if terms:
165             clause = self.stack[-1]
166             while terms:
167                 term, oper = terms.pop()
168                 if self.stage == 3:
169                     # We're after the YIELD_VALUE, so the term in this case
170                     # is the entire restriction, and doesn't need to be
171                     # combined with TOS. Instead, just file it away.
172                     self.restriction = term
173                     return
174                 elif self.reduce and self._may_reduce(term, clause):
175                     op = ast_to_op[oper]
176                     clause = ast.Const(op(term.value, clause.value))
177                 else:
178                     clause = oper((term, clause))
179             self.stack[-1] = clause
180             if self.verbose:
181                 self.debug("clause:", clause, "\n")
182            
183             if op == 1:
184                 # Py2.4: The current instruction is POP_TOP, which means
185                 # the previous is probably JUMP_*. If so, we don't want to
186                 # pop the value we just placed on the stack and lose it.
187                 # We need to replace the entry that the JUMP_* made in
188                 # self.targets with our new TOS.
189                 target = self.targets[self.last_target_ip]
190                 target[-1] = ((clause, target[-1][1]))
191                 if self.verbose:
192                     self.debug("newtarget:", self.last_target_ip, target)
193    
194     def visit_FOR_ITER(self, lo, hi):
195         self.stage = 2
196    
197     def visit_YIELD_VALUE(self):
198         # The top of stack will be our 'attributes' AST.
199         # Let the upcoming POP_TOP pop it off; we'll just grab it.
200         self.attributes = self.stack[-1]
201         self.stage = 3
202    
203     def visit_BUILD_LIST(self, lo, hi):
204         numterms = lo + (hi << 8)
205         if numterms:
206             self.stack[-numterms:] = [ast.List(self.stack[-numterms:])]
207    
208     def visit_BUILD_MAP(self, lo, hi):
209         # Add an empty Dict and add to its .items in STORE_SUBSCR
210         self.stack.append(ast.Dict([]))
211    
212     def visit_BUILD_TUPLE(self, lo, hi):
213         numterms = lo + (hi << 8)
214         if numterms:
215             self.stack[-numterms:] = [ast.Tuple(self.stack[-numterms:])]
216    
217     def visit_CALL_FUNCTION(self, lo, hi):
218         kwargs = []
219         for i in xrange(hi):
220             val = self.stack.pop()
221             key = self.stack.pop()
222             kwargs.append(ast.Keyword(key, val))
223        
224         if lo:
225             args = self.stack[-lo:]
226             self.stack[-lo:] = []
227         else:
228             args = []
229        
230         func = self.stack[-1]
231        
232         if self.reduce and self._may_reduce(func):
233             if func.value is getattr and not isinstance(args[0], ast.Const):
234                 self.stack[-1] = ast.Getattr(args[0], args[1].value)
235                 return
236             else:
237                 # If all args/kwargs are also Const,
238                 # reduce to a single Const.
239                 argvals = [a.value for a in args if self._may_reduce(a)]
240                 if len(argvals) == len(args):
241                     kwargvals = dict([(k.name, k.expr.value) for k, v in kwargs
242                                       if self._may_reduce(k.expr)])
243                     if len(kwargvals) == len(kwargs):
244                         retval = func.value(*tuple(argvals), **kwargvals)
245                         self.stack[-1] = ast.Const(retval)
246                         return
247        
248         if kwargs:
249             args += kwargs
250         self.stack[-1] = ast.CallFunc(func, args)
251    
252     def visit_COMPARE_OP(self, lo, hi):
253         term1, term2 = self.stack[-2:]
254         op = cmp_op[lo + (hi << 8)]
255         if self.reduce and self._may_reduce(term1, term2):
256             oper = codewalk.comparisons[op]
257             self.stack[-2:] = [ast.Const(oper(term1.value, term2.value))]
258         else:
259             self.stack[-2:] = [ast.Compare(term1, [(op, term2)])]
260         if self.verbose:
261             self.debug(op)
262    
263     def visit_DUP_TOP(self):
264         self.stack.append(self.stack[-1])
265    
266     def visit_JUMP_IF_FALSE(self, lo, hi):
267         # Note that self.cursor has already advanced to the next instruction.
268         target = self.cursor + (lo + (hi << 8))
269         bucket = self.targets.setdefault(target, [])
270         bucket.append((self.stack[-1], ast.And))
271         if self.verbose:
272             self.debug("target:", target, bucket)
273         # Store target ip for the special code in visit_instruction
274         self.last_target_ip = target
275    
276     def visit_JUMP_IF_TRUE(self, lo, hi):
277         # Note that self.cursor has already advanced to the next instruction.
278         target = self.cursor + (lo + (hi << 8))
279         bucket = self.targets.setdefault(target, [])
280         bucket.append((self.stack[-1], ast.Or))
281         if self.verbose:
282             self.debug("target:", target, bucket)
283         # Store target ip for the special code in visit_instruction
284         self.last_target_ip = target
285    
286     def visit_LOAD_ATTR(self, lo, hi):
287         term = self.co_names[lo + (hi << 8)]
288         obj = self.stack[-1]
289         if self.reduce and self._may_reduce(obj):
290             self.stack[-1] = ast.Const(getattr(obj.value, term))
291         else:
292             self.stack[-1] = ast.Getattr(obj, term)
293         if self.verbose:
294             self.debug(term)
295    
296     def visit_LOAD_CONST(self, lo, hi):
297         val = self.co_consts[lo + (hi << 8)]
298         self.stack.append(ast.Const(val))
299         if self.verbose:
300             self.debug(val)
301    
302     def visit_LOAD_DEREF(self, lo, hi):
303         if self.reduce and hasattr(self, '_func'):
304             value = self._func.func_closure[lo + (hi << 8)]
305             self.stack.append(ast.Const(codewalk.deref_cell(value)))
306             return
307        
308         name = self.co_freevars[lo + (hi << 8)]
309         self.stack.append(ast.Name(name))
310    
311     def visit_LOAD_FAST(self, lo, hi):
312         term = self.co_varnames[lo + (hi << 8)]
313         self.stack.append(ast.Name(term))
314         if self.verbose:
315             self.debug(term)
316    
317     def visit_LOAD_GLOBAL(self, lo, hi):
318         name = self.co_names[lo + (hi << 8)]
319         if self.reduce:
320             if name not in self.env:
321                 raise KeyError("'%s' is not present in supplied globals." % name)
322             self.stack.append(ast.Const(self.env[name]))
323             return
324        
325         self.stack.append(ast.Name(name))
326    
327     def visit_POP_TOP(self):
328         self.stack.pop()
329    
330     def visit_ROT_TWO(self):
331         k, v = self.stack[-2:]
332         self.stack[-2:] = [v, k]
333    
334     def visit_ROT_THREE(self):
335         x, k, v = self.stack[-3:]
336         self.stack[-3:] = [v, x, k]
337    
338     def visit_SLICE_PLUS_0(self):
339         obj = self.stack[-1]
340         if self.reduce and self._may_reduce(obj):
341             self.stack[-1] = ast.Const(obj.value[:])
342         else:
343             self.stack[-1] = ast.Slice(obj, 'OP_APPLY', None, None)
344    
345     def visit_SLICE_PLUS_1(self):
346         obj, arg = self.stack[-2:]
347         if self.reduce and self._may_reduce(obj, arg):
348             self.stack[-2:] = [ast.Const(obj.value[arg.value:])]
349         else:
350             self.stack[-2:] = [ast.Slice(obj, 'OP_APPLY', arg, None)]
351    
352     def visit_SLICE_PLUS_2(self):
353         obj, arg = self.stack[-2:]
354         if self.reduce and self._may_reduce(obj, arg):
355             self.stack[-2:] = [ast.Const(obj.value[:arg.value])]
356         else:
357             self.stack[-2:] = [ast.Slice(obj, 'OP_APPLY', None, arg)]
358    
359     def visit_SLICE_PLUS_3(self):
360         obj, arg1, arg2 = self.stack[-3:]
361         if self.reduce and self._may_reduce(obj, arg1, arg2):
362             self.stack[-3:] = [ast.Const(obj.value[arg1.value:arg2.value])]
363         else:
364             self.stack[-3:] = [ast.Slice(obj, 'OP_APPLY', arg1, arg2)]
365    
366     def visit_STORE_SUBSCR(self):
367         # 'x' should be an ast.Dict
368         v, x, k = self.stack[-3:]
369         self.stack[-3:] = []
370         x.items.append((k, v))
371    
372     def visit_UNARY_INVERT(self):
373         term = self.stack[-1]
374         if self.reduce and self._may_reduce(term):
375             self.stack[-1] = ast.Const(~term.value)
376         else:
377             self.stack[-1] = ast.Invert(term)
378    
379     def visit_UNARY_NEGATIVE(self):
380         term = self.stack[-1]
381         if self.reduce and self._may_reduce(term):
382             self.stack[-1] = ast.Const(-term.value)
383         else:
384             self.stack[-1] = ast.UnarySub(term)
385    
386     def visit_UNARY_NOT(self):
387         term = self.stack[-1]
388         if self.reduce and self._may_reduce(term):
389             self.stack[-1] = ast.Const(not term.value)
390         else:
391             self.stack[-1] = ast.Not(term)
392    
393     def visit_UNARY_POSITIVE(self):
394         term = self.stack[-1]
395         if self.reduce and self._may_reduce(term):
396             self.stack[-1] = ast.Const(+term.value)
397         else:
398             self.stack[-1] = ast.UnaryAdd(term)
399    
400     def visit_BINARY_SUBSCR(self):
401         op1, op2 = self.stack[-2:]
402         if self.reduce and self._may_reduce(op1, op2):
403             self.stack[-2:] = [ast.Const(op1.value[op2.value])]
404         else:
405             self.stack[-2:] = [ast.Subscript(op1, 'OP_APPLY', [op2])]
406    
407     def binary_op(self, op):
408         op1, op2 = self.stack[-2:]
409         if self.reduce and self._may_reduce(op1, op2):
410             self.stack[-2:] = [ast.Const(ast_to_op[op](op1.value, op2.value))]
411         else:
412             # Binary ops like ast.Add take a single tuple as a first arg
413             self.stack[-2:] = [op((op1, op2))]
414    
415     def bit_op(self, op):
416         op1, op2 = self.stack[-2:]
417         if self.reduce and self._may_reduce(op1, op2):
418             self.stack[-2:] = [ast.Const(ast_to_op[op](op1.value, op2.value))]
419         else:
420             self.stack[-2:] = [op(op1, op2)]
421
422
423 # Add visit_BINARY methods to LambdaParser.
424 for k, v in binary_to_ast.iteritems():
425     setattr(GenexpParser, "visit_" + k,
426             lambda self, op=v: self.binary_op(op))
427 for k, v in bit_to_ast.iteritems():
428     setattr(GenexpParser, "visit_" + k,
429             lambda self, op=v: self.bit_op(op))
Note: See TracBrowser for help on using the browser.