Contact: fumanchu@aminus.org

Log in as guest/dejavu to create tickets

I think I've seen this ORM somewhere before...

root/trunk/codewalk.py

Revision 41 (checked in by fumanchu, 8 years ago)

1. codewalk.EarlyBinder? now late-binds any object with a bind_late attribute which evaluates to True. Notice that functions are first-class objects and so may be assigned such an attribute.
2. Changed postgres timedelta from pg interval to float8.

Line 
1 """Bytecode visitors, including rewriters and decompilers.
2
3 This work, including the source code, documentation
4 and related data, is placed into the public domain.
5
6 The orginal author is Robert Brewer, Amor Ministries.
7
8 THIS SOFTWARE IS PROVIDED AS-IS, WITHOUT WARRANTY
9 OF ANY KIND, NOT EVEN THE IMPLIED WARRANTY OF
10 MERCHANTABILITY. THE AUTHOR OF THIS SOFTWARE
11 ASSUMES _NO_ RESPONSIBILITY FOR ANY CONSEQUENCE
12 RESULTING FROM THE USE, MODIFICATION, OR
13 REDISTRIBUTION OF THIS SOFTWARE.
14
15 """
16
17 version = "0.3.2"
18
19 changelog = """
20 0.3.2 (8/8/04):
21     1. After profiling, added _co_code_attrs because dir() was so slow.
22
23 0.3.1 (4/29/04):
24     1. Added KeywordInspector.
25     2. Made LambdaDecompiler use function __name__, not repr() (early
26         binding makes e.g. datetime.date into <type datetime.date>).
27     3. Added safe_tuple.
28     4. Added TaintableStack.maxsize to help stack calculation.
29     5. Forgot to call deref_cell from visit_LOAD_DEREF! D'oh!
30     6. Put visit_ methods in alpha order.
31     7. Added name_index to Rewriter.
32     8. Got rid of extra args to EarlyBinder.
33     9. Added reduction of getattr to EarlyBinder as an option.
34     10. Combined 'ands' and 'ors' into 'targets' in LambdaDecompiler.
35     11. Fixed parenthesis problems in LambdaDecompiler.
36     12. Added BranchTracker class.
37 """
38
39 todo = """
40 1. Make a StackCalculator subclass of Visitor, call it in
41     Rewriter.code_object to get co_stacksize.
42 2. Make named_opcodes(bits), which returns names for the first code in
43     each word, numbers otherwise.
44 3. Rewrite dis as a Visitor.
45 """
46
47 from opcode import cmp_op, opname, opmap, HAVE_ARGUMENT
48 import operator
49 import sets
50 from types import CodeType, FunctionType
51
52
53 def numeric_opcodes(bits):
54     """Change named opcode bits to their numeric equivalents."""
55     bitnums = []
56     for x in bits:
57         if isinstance(x, basestring):
58             x = opmap[x]
59         bitnums.append(x)
60     return bitnums
61
62
63 _deref_bytecode = numeric_opcodes(['LOAD_DEREF', 0, 0, 'RETURN_VALUE'])
64 # CodeType(argcount, nlocals, stacksize, flags, codestring, constants,
65 #          names, varnames, filename, name, firstlineno,
66 #          lnotab[, freevars[, cellvars]])
67 _derefblock = CodeType(0, 0, 1, 3, ''.join(map(chr, _deref_bytecode)),
68                        (None,), ('cell',), (), '', '', 2, '', ('cell',))
69 def deref_cell(cell):
70     # FunctionType(code, globals[, name[, argdefs[, closure]]])
71     return FunctionType(_derefblock, {}, "", (), (cell,))()
72
73 def make_closure(*args):
74     def inner():
75         args
76     return inner.func_closure
77
78
79 binary_operators = {'BINARY_POWER': operator.pow,
80                     'BINARY_MULTIPLY': operator.mul,
81                     'BINARY_DIVIDE': operator.div,
82                     'BINARY_FLOOR_DIVIDE': operator.floordiv,
83                     'BINARY_TRUE_DIVIDE': operator.truediv,
84                     'BINARY_MODULO': operator.mod,
85                     'BINARY_ADD': operator.add,
86                     'BINARY_SUBTRACT': operator.sub,
87                     'BINARY_SUBSCR': operator.getitem,
88                     'BINARY_LSHIFT': operator.lshift,
89                     'BINARY_RSHIFT': operator.rshift,
90                     'BINARY_AND': operator.and_,
91                     'BINARY_XOR': operator.xor,
92                     'BINARY_OR': operator.or_,
93                     }
94 inplace_operators = dict([('INPLACE_' + k.split('_')[1], v)
95                           for k, v in binary_operators.iteritems()
96                           if k not in ('BINARY_SUBSCR',)
97                           ])
98
99 binary_repr = {'BINARY_POWER': '**',
100                'BINARY_MULTIPLY': '*',
101                'BINARY_DIVIDE': '/',
102                'BINARY_FLOOR_DIVIDE': '//',
103                'BINARY_TRUE_DIVIDE': '/',
104                'BINARY_MODULO': '%',
105                'BINARY_ADD': '+',
106                'BINARY_SUBTRACT': '-',
107                'BINARY_LSHIFT': '<<',
108                'BINARY_RSHIFT': '>>',
109                'BINARY_AND': '&',
110                'BINARY_XOR': '^',
111                'BINARY_OR': '|',
112                }
113
114 inplace_repr = dict([('INPLACE_' + k.split('_')[1], v + '=')
115                      for k, v in binary_repr.iteritems()])
116
117 comparisons = {'<': operator.lt,
118                '<=': operator.le,
119                '==': operator.eq,
120                '!=': operator.ne,
121                '>': operator.gt,
122                '>=': operator.gt,
123                'in': operator.contains,
124                'not in': lambda x, y: not x in y,
125                'is': operator.is_,
126                'is not': operator.is_not,
127                }
128
129 _co_code_attrs = ['co_argcount', 'co_cellvars', 'co_code', 'co_consts',
130                   'co_filename', 'co_firstlineno', 'co_flags', 'co_freevars',
131                   'co_lnotab', 'co_name', 'co_names', 'co_nlocals',
132                   'co_stacksize', 'co_varnames']
133
134
135 class Visitor(object):
136     """Visitor(obj=function, code object, string, or list of opcodes)."""
137    
138     def __init__(self, obj):
139         # Distill supplied 'obj' arg to a code block string.
140         if isinstance(obj, FunctionType):
141             obj = obj.func_code
142         try:
143             obj = obj.co_code
144         except AttributeError:
145             pass
146        
147         # Map the code block string to a list of opcode numbers.
148         if isinstance(obj, basestring):
149             obj = map(ord, obj)
150         elif isinstance(obj, list):
151             obj = obj[:]
152        
153         self._bytecode = obj
154    
155     def walk(self):
156         self.cursor = 0
157         b = self._bytecode
158         while self.cursor < len(b):
159             op = b[self.cursor]
160             self.cursor += 1
161             if op >= HAVE_ARGUMENT:
162                 lo = b[self.cursor]
163                 self.cursor += 1
164                 hi = b[self.cursor]
165                 self.cursor += 1
166                 args = (lo, hi)
167             else:
168                 args = ()
169            
170             self.visit_instruction(op, *args)
171            
172             name = 'visit_' + opname[op]
173             name = name.replace('+', '_PLUS_')
174            
175             if hasattr(self, name):
176                 getattr(self, name)(*args)
177    
178     def visit_instruction(self, op, lo=None, hi=None):
179         pass
180
181
182 class JumpCodeAdjuster(Visitor):
183     """JumpCodeAdjuster(obj=[func|co|str|list], start, end, newlength).
184     
185     Adjusts jump codes if their target is affected by bytecode changes.
186     
187     start, end: The range of the original bytecode in question.
188     newlength: Length of the codes which overwrote bytecode[start:end].
189     """
190    
191     def __init__(self, obj, start, end, newlength):
192         Visitor.__init__(self, obj)
193         self.start = start
194         self.end = end
195         self.offset = newlength - (end - start)
196    
197     def bytecode(self):
198         """Walk self and return new bytecode."""
199         self.walk()
200         return self.newcode
201    
202     def walk(self):
203         if self.offset == 0:
204             # Avoid costly walk if no changes will be made.
205             self.newcode = self._bytecode
206         else:
207             self.newcode = []
208             Visitor.walk(self)
209    
210     def visit_instruction(self, op, lo=None, hi=None):
211         append = self.newcode.append
212         append(op)
213         if lo is not None:
214             append(lo)
215         if hi is not None:
216             append(hi)
217    
218     def visit_CONTINUE_LOOP(self, lo, hi):
219         self.visit_JUMP_ABSOLUTE(lo, hi)
220    
221     def visit_JUMP_ABSOLUTE(self, lo, hi):
222         target = lo + (hi << 8)
223         if target > self.start:
224             pos = target + self.offset
225             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
226    
227     def visit_JUMP_FORWARD(self, lo, hi):
228         delta = lo + (hi << 8)
229         target = self.cursor + delta
230         if self.cursor < self.end and target > self.start:
231             pos = (target + self.offset) - self.cursor
232             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
233    
234     def visit_JUMP_IF_FALSE(self, lo, hi):
235         self.visit_JUMP_FORWARD(lo, hi)
236    
237     def visit_JUMP_IF_TRUE(self, lo, hi):
238         self.visit_JUMP_FORWARD(lo, hi)
239
240
241 def safe_tuple(seq):
242     """Force func_code attributes to tuples of strings.
243     
244     Many of the func_code attributes must take tuples, not lists,
245     and *cannot* accept unicode items--they must be coerced to strings
246     or the interpreter will crash.
247     """
248     seq = map(str, seq)
249     return tuple(seq)
250
251
252 class Rewriter(Visitor):
253     """Rewriter(obj=function or code object).
254     
255     Produce a new function or code object by rewriting an existing one.
256     
257     Notice that, unlike the base Visitor class, Rewriter does not accept a
258     string or list of opcodes as an initial argument.
259     """
260    
261     def __init__(self, obj):
262         if isinstance(obj, FunctionType):
263             self._func = obj
264             obj = obj.func_code
265        
266         # Copy code object attributes.
267         for name in _co_code_attrs:
268             value = getattr(obj, name)
269             if isinstance(value, tuple):
270                 value = list(value)
271             setattr(self, name, value)
272        
273         try:
274             obj = obj.co_code
275         except AttributeError:
276             pass
277        
278         # Map the code block to a list of opcode numbers.
279         if isinstance(obj, basestring):
280             bytecode = map(ord, obj)
281         elif isinstance(obj, list):
282             bytecode = obj[:]
283         else:
284             raise TypeError("obj arg of incorrect type '%s'" % type(obj))
285        
286         self._bytecode = bytecode
287    
288     def bytecode(self):
289         """Walk self and return new bytecode."""
290         self.walk()
291         return self.newcode
292    
293     def code_object(self):
294         """Walk self and produce a new code object."""
295         self.walk()
296         codestr = ''.join(map(chr, self.newcode))
297         return CodeType(self.co_argcount, self.co_nlocals, self.co_stacksize,
298                         # Notice co_consts should *not* be safe_tupled.
299                         self.co_flags, codestr, tuple(self.co_consts),
300                         safe_tuple(self.co_names), safe_tuple(self.co_varnames),
301                         self.co_filename, self.co_name, self.co_firstlineno,
302                         self.co_lnotab, safe_tuple(self.co_freevars),
303                         safe_tuple(self.co_cellvars))
304    
305     def function(self, newname=None):
306         """Walk self and produce a new function."""
307         try:
308             f = self._func
309         except AttributeError:
310             if newname is None:
311                 newname = ''
312             co = self.code_object()
313             return FunctionType(co, {}, newname)
314         else:
315             if newname is None:
316                 newname = f.func_name
317             co = self.code_object()
318             return FunctionType(co, f.func_globals, newname,
319                                 f.func_defaults, f.func_closure)
320    
321     def const_index(self, value):
322         """The index of value in co_consts, appending it if not found."""
323         for pos, item in enumerate(self.co_consts):
324             try:
325                 if type(value) == type(item) and value == item:
326                     break
327             except TypeError:
328                 pass
329         else:
330             pos = len(self.co_consts)
331             self.co_consts.append(value)
332         return pos
333    
334     def name_index(self, value):
335         """The index of value in co_names, appending it if not found."""
336         valtype = type(value)
337         for pos, item in enumerate(self.co_names):
338             try:
339                 if valtype == type(item) and value == item:
340                     return pos
341             except TypeError:
342                 pass
343        
344         pos = len(self.co_names)
345         self.co_names.append(value)
346         return pos
347    
348     def walk(self):
349         self.newcode = []
350         Visitor.walk(self)
351    
352     def visit_instruction(self, op, lo=None, hi=None):
353         append = self.newcode.append
354         append(op)
355         if lo is not None:
356             append(lo)
357         if hi is not None:
358             append(hi)
359    
360     def put(self, start, end, *bits):
361         """Overwrite self.newcode with new opcodes (numbers or names).
362         
363         If the new codes are of different quantity than the old,
364         modify any jump codes affected.
365         """
366         bitnums = numeric_opcodes(bits)
367        
368         # Adjust jump codes. Notice this comes before bytecode is modified.
369         jca = JumpCodeAdjuster(self.newcode, start, end, len(bitnums))
370         self.newcode = jca.bytecode()
371        
372         # Rewrite bytecode.
373         self.newcode[start:end] = bitnums
374    
375     def tail(self, length, *bits):
376         """Overwrite self.newcode[-length:] with bits."""
377         end = len(self.newcode)
378         self.put(end - length, end, *bits)
379
380
381 class Localizer(Rewriter):
382     """Localizer(func, builtin_only=False, stoplist=[], verbose=False)
383     
384     If a global or builtin is known at compile time, replace it with a constant.
385     
386     This duplicates (and borrows from) Raymond Hettinger's Cookbook recipe
387     at: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/277940
388     """
389     def __init__(self, func, builtin_only=False, stoplist=[], verbose=False):
390         Rewriter.__init__(self, func)
391        
392         import __builtin__
393         self.env = vars(__builtin__).copy()
394         if not builtin_only:
395             self.env.update(func.func_globals)
396        
397         self.stoplist = stoplist
398         self.verbose = verbose
399    
400     def visit_LOAD_GLOBAL(self, lo, hi):
401         name = self.co_names[lo + (hi << 8)]
402         if name in self.env and name not in self.stoplist:
403             value = self.env[name]
404             pos = self.const_index(value)
405             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
406             if self.verbose:
407                 print name, '-->', value
408
409
410 class TaintableStack(list):
411     def __init__(self, seq=[]):
412         list.__init__(self, seq)
413         self._taintindex = sets.Set()
414         self.maxsize = len(seq)
415    
416     def taint(self, index=-1):
417         if index < 0:
418             index += len(self)
419         self._taintindex.add(index)
420    
421     def tainted(self, index=-1):
422         if index < 0:
423             index = len(self) + index
424         return (index in self._taintindex)
425    
426     def pop(self, index=-1):
427         """pop(index) -> Returns a tuple!! of (value, tainted)."""
428         if index < 0:
429             index += len(self)
430         is_tainted = (index in self._taintindex)
431         if is_tainted:
432             self._taintindex.remove(index)
433         return list.pop(self, index), is_tainted
434    
435     def append(self, obj):
436         list.append(self, obj)
437         l = len(self)
438         if l > self.maxsize:
439             self.maxsize = l
440
441
442 class EarlyBinder(Rewriter):
443     """EarlyBinder(func, reduce_getattr=True, bind_late=[])
444     
445     Deep-evaluate function, replacing free vars with constants.
446     
447     reduce_getattr: If True (the default), getattr(x, y) will be
448         replaced with x.y where possible.
449     
450     bind_late: a list of names (globals, freevars, or attributes)
451         which should not be early-bound. For example, if you want
452         datetime.date.today() to be bound late, include 'today'
453         in the bind_late.
454     
455     Example: k = lambda x: x.Date == datetime.date(2004, 1, 1)
456              r = EarlyBinder(k).function()
457             
458     _____ k _____                _____ r _____
459      0 LOAD_FAST     0 (x)        0 LOAD_FAST   0 (x)
460      3 LOAD_ATTR     1 (Date)     3 LOAD_ATTR   1 (Date)
461      6 LOAD_GLOBAL   2 (datetime) 6 LOAD_CONST  6 (datetime.date(2004, 1, 1))
462      9 LOAD_ATTR     3 (date)
463     12 LOAD_CONST    1 (2004)
464     15 LOAD_CONST    2 (1)
465     18 LOAD_CONST    2 (1)
466     21 CALL_FUNCTION 3
467     24 COMPARE_OP    2 (==)       9 COMPARE_OP  2 (==)
468     27 RETURN_VALUE              12 RETURN_VALUE
469     
470     This also pre-computes binary operations *and all other builtin or free
471     functions* where all operands are constants, globals, or freevars.
472     For example:
473         LOAD_CONST        1 (3)
474         LOAD_CONST        2 (4)
475         BINARY_MULTIPLY
476         
477     is replaced with:
478         LOAD_CONST        5 (12)
479     
480     However, order is important. lambda x: x * 4 * 5 won't see any
481     optimization, because the order of eval is (x * 4) * 5. Rewritten
482     as lambda x: 4 * 5 * x, the "4 * 5" can be replaced with "20".
483     """
484    
485     def __init__(self, func, reduce_getattr=True, bind_late=[]):
486         Rewriter.__init__(self, func)
487         self.reduce_getattr = reduce_getattr
488        
489         import __builtin__
490         self.env = vars(__builtin__).copy()
491         self.env.update(func.func_globals)
492        
493         # Keep a stack like the interpreter would. This does *not*
494         # get overwritten when self.newcode does--it emulates the
495         # original instructions (although tainted values may be dummies).
496         # When a local var is pushed onto this stack, it "taints" itself
497         # and any operations which depend upon it.
498         # This stack is not passed out of this class in any way.
499         self.stack = TaintableStack()
500         self.bind_late = bind_late
501    
502     def code_object(self):
503         """Walk self and produce a new code object."""
504         self.walk()
505         codestr = ''.join(map(chr, self.newcode))
506         # Assert CO_NOFREE, since all free vars should have been made constant.
507         self.co_flags |= 0x0040
508         co = CodeType(self.co_argcount, self.co_nlocals, self.stack.maxsize,
509                       self.co_flags, codestr, tuple(self.co_consts),
510                       safe_tuple(self.co_names), safe_tuple(self.co_varnames),
511                       '', self.co_name, 1,
512                       self.co_lnotab, (), ())
513         return co
514    
515     def function(self, newname=None):
516         """Walk self and produce a new function."""
517         try:
518             f = self._func
519         except AttributeError:
520             if newname is None:
521                 newname = ''
522             co = self.code_object()
523             return FunctionType(co, {}, newname)
524         else:
525             if newname is None:
526                 newname = f.func_name
527             co = self.code_object()
528             # All cells should be dereferenced, so force func_closure to None.
529             return FunctionType(co, f.func_globals, newname, f.func_defaults)
530    
531     def reduce(self, number_of_terms, transform=None, overwrite_length=None):
532         """If no stack args are to be bound late, rewrite previous opcodes.
533         
534         number_of_terms: the number of terms to pop off the stack.
535         
536         transform: a callback, to which we send the popped terms. They are
537             transformed in that function as needed, and returned.
538         
539         overwrite_length: the number of previous opcodes to overwrite. If
540             None, it defaults to (number of terms + 1 for the current
541             instruction) * 3.
542         """
543         if overwrite_length is None:
544             # +1 is for current bytecode. If any overwritten bytecode
545             # is not len 3, pass in a value for overwrite_length.
546             overwrite_length = (number_of_terms + 1) * 3
547        
548         # Pop the requested number of terms off the stack.
549         is_tainted = False
550         terms, taints = [], []
551         for i in xrange(number_of_terms):
552             term, taint = self.stack.pop()
553             taints.append(taint)
554             is_tainted |= taint
555             terms.append(term)
556        
557         # Now that all the stack-popping is done...
558         if is_tainted:
559             # We don't have to handle getattr if no args are
560             # tainted, because CALL_FUNCTION will do it normally.
561             if self.reduce_getattr:
562                 if (len(terms) == 3 and terms[2] == getattr
563                     and taints[1] and not taints[0]):
564                     # Form a new LOAD_ATTR instruction.
565                     pos = self.name_index(terms[0])
566                     # Unlike normal CALL_FUNCTION, we can't assume each arg
567                     # is a constant; therefore, our overwrite_length is
568                     # indeterminate. We'll just cheat and keep track of
569                     # the last LOAD_GLOBAL where we looked up getattr. ;)
570                     start = self.last_getattr
571                     # Grab and reuse opcodes of first (LOAD_FAST) term.
572                     bits = self.newcode[start + 3:-6]
573                     bits += ['LOAD_ATTR', pos & 0xFF, pos >> 8]
574                     bits = tuple(bits)
575                     self.put(start, len(self.newcode), *bits)
576                     self.stack.append(None)
577                     self.stack.taint()
578                     return None
579            
580             # Don't form the new object.
581             # Replace TOS with a dummy and taint it.
582             self.stack.append(None)
583             self.stack.taint()
584             return None
585        
586         # Callback the transform.
587         terms.reverse()
588         if transform:
589             result = transform(terms)
590         else:
591             result = terms
592        
593         # Replace TOS with result.
594         self.stack.append(result)
595        
596         # Overwrite bytecodes with new CONST formed from result.
597         pos = self.const_index(result)
598         self.tail(overwrite_length, 'LOAD_CONST', pos & 0xFF, pos >> 8)
599        
600         return result
601    
602     def visit_BUILD_TUPLE(self, lo, hi):
603         self.reduce(lo + (hi << 8), lambda terms: tuple(terms))
604    
605     def visit_BUILD_LIST(self, lo, hi):
606         self.reduce(lo + (hi << 8))
607    
608     def visit_CALL_FUNCTION(self, lo, hi):
609         def call(terms):
610             func = terms.pop(0)
611             args = tuple(terms[:lo])
612             kwargs = {}
613             for i in range(hi):
614                 key = self.terms.pop(0)
615                 val = self.terms.pop(0)
616                 kwargs[key] = val
617             return func(*args, **kwargs)
618         self.reduce(lo + hi + 1, call)
619    
620     def visit_COMPARE_OP(self, lo, hi):
621         op = cmp_op[lo + (hi << 8)]
622         op = comparisons[op]
623         self.reduce(2, lambda terms: op(*terms))
624    
625     def visit_LOAD_ATTR(self, lo, hi):
626         name = self.co_names[lo + (hi << 8)]
627         result = self.reduce(1, lambda terms: getattr(terms[0], name))
628         if result in self.bind_late or getattr(result, 'bind_late', False):
629             self.stack.taint()
630    
631     def visit_LOAD_CONST(self, lo, hi):
632         self.stack.append(self.co_consts[lo + (hi << 8)])
633    
634     def visit_LOAD_DEREF(self, lo, hi):
635         if hasattr(self, '_func'):
636             name = self.co_freevars[lo + (hi << 8)]
637             value = self._func.func_closure[lo + (hi << 8)]
638             value = deref_cell(value)
639             pos = self.const_index(value)
640             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
641             self.stack.append(value)
642             if value in self.bind_late or getattr(value, 'bind_late', False):
643                 self.stack.taint()
644    
645     def visit_LOAD_FAST(self, lo, hi):
646         self.stack.append(self.co_varnames[lo + (hi << 8)])
647         # LOAD_FAST references our bound variable, which is always bound late.
648         self.stack.taint()
649    
650     def visit_LOAD_GLOBAL(self, lo, hi):
651         name = self.co_names[lo + (hi << 8)]
652         if name == 'getattr':
653             self.last_getattr = (len(self.newcode) - 3)
654         if name in self.env:
655             value = self.env[name]
656             pos = self.const_index(value)
657             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
658             self.stack.append(value)
659             if value in self.bind_late or getattr(value, 'bind_late', False):
660                 self.stack.taint()
661         else:
662             raise KeyError("'%s' is not present in supplied globals." % name)
663    
664     def visit_SLICE_PLUS_0(self):
665         self.reduce(1, lambda terms: terms[0][:], 4)
666    
667     def visit_SLICE_PLUS_1(self):
668         self.reduce(2, lambda terms: terms[0][terms[1]:], 7)
669    
670     def visit_SLICE_PLUS_2(self):
671         self.reduce(2, lambda terms: terms[0][:terms[1]], 7)
672    
673     def visit_SLICE_PLUS_3(self):
674         self.reduce(3, lambda terms: terms[0][terms[1]:terms[2]], 10)
675    
676     def binary_op(self, op):
677         def operate(terms):
678             return op(*terms)
679         self.reduce(2, operate, 7)
680
681 # Add visit_BINARY, visit_INPLACE methods to EarlyBinder.
682 for k, v in binary_operators.iteritems():
683     setattr(EarlyBinder, "visit_" + k,
684             lambda self, opr=v: self.binary_op(opr))
685 for k, v in inplace_operators.iteritems():
686     setattr(EarlyBinder, "visit_" + k,
687             # Yes, we really do call binary_op for inplace methods.
688             lambda self, opr=v: self.binary_op(opr))
689
690
691 class LambdaDecompiler(Visitor):
692     """LambdaDecompiler(obj=lambda function or func_code).
693     
694     Produce decompiled Python code (as a string) from a supplied lambda."""
695    
696     def __init__(self, obj):
697         # Distill supplied 'obj' arg to a code block string.
698         if isinstance(obj, FunctionType):
699             obj = obj.func_code
700        
701         # Copy code object attributes.
702         for name in _co_code_attrs:
703             value = getattr(obj, name)
704             if isinstance(value, tuple):
705                 value = list(value)
706             setattr(self, name, value)
707        
708         try:
709             obj = obj.co_code
710         except AttributeError:
711             pass
712        
713         # Map the code block to a list of opcode numbers.
714         if isinstance(obj, basestring):
715             obj = map(ord, obj)
716         elif isinstance(obj, list):
717             obj = obj[:]
718        
719         self._bytecode = obj
720    
721     def code(self, include_func_header=True):
722         self.walk()
723         product = self.stack[0]
724         if include_func_header:
725             args = list(self.co_varnames)
726             if self.co_flags & 0x08:
727                 args[-1] = "**" + args[-1]
728                 if self.co_flags & 0x04:
729                     args[-2] = "*" + args[-2]
730             elif self.co_flags & 0x04:
731                 args[-1] = "*" + args[-1]
732             args = ", ".join(args)
733            
734             product = "lambda %s: %s" % (args, product)
735         return product
736    
737     def walk(self):
738         self.cursor = 0
739         self.stack = []
740         self.newcode = []
741        
742         # Add a list of 'and' and 'or' mappings {target: [(previous TOS, and/or)...]}
743         self.targets = {}
744         b = self._bytecode
745         while self.cursor < len(b):
746             if self.cursor in self.targets:
747                 self.visit_target(self.targets[self.cursor])
748             op = b[self.cursor]
749             self.cursor += 1
750             if op >= HAVE_ARGUMENT:
751                 lo = b[self.cursor]
752                 self.cursor += 1
753                 hi = b[self.cursor]
754                 self.cursor += 1
755                 args = (lo, hi)
756             else:
757                 args = ()
758            
759             self.visit_instruction(op, *args)
760            
761             name = 'visit_' + opname[op]
762             name = name.replace('+', '_PLUS_')
763            
764             if hasattr(self, name):
765                 getattr(self, name)(*args)
766    
767     def visit_target(self, terms):
768         comp = self.stack.pop()
769         while terms:
770             term, operation = terms.pop()
771             comp = "(%s) %s (%s)" % (term, operation, comp)
772         self.stack.append(comp)
773    
774     def visit_BUILD_TUPLE(self, lo, hi):
775         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
776         self.stack.append("(%s)" % terms)
777    
778     def visit_BUILD_LIST(self, lo, hi):
779         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
780         self.stack.append("[%s]" % terms)
781    
782     def visit_CALL_FUNCTION(self, lo, hi):
783         kwargs = {}
784         for i in range(hi):
785             val = self.stack.pop()
786             key = self.stack.pop()
787             kwargs[key] = val
788         kwargs = ", ".join([k + "=" + v for k, v in kwargs.iteritems()])
789        
790         args = []
791         for i in xrange(lo):
792             arg = self.stack.pop()
793             args.append(arg)
794         args.reverse()
795         args = ", ".join(args)
796        
797         if kwargs:
798             args += ", " + kwargs
799        
800         func = self.stack.pop()
801         self.stack.append("%s(%s)" % (func, args))
802    
803     def visit_COMPARE_OP(self, lo, hi):
804         op2, op1 = self.stack.pop(), self.stack.pop()
805         self.stack.append(op1 + " " + cmp_op[lo + (hi << 8)] + " " + op2)
806    
807     def visit_JUMP_IF_FALSE(self, lo, hi):
808         target = self.cursor + (lo + (hi << 8))
809         targetstack = self.targets.setdefault(target, [])
810         targetstack.append((self.stack.pop(), 'and'))
811    
812     def visit_JUMP_IF_TRUE(self, lo, hi):
813         target = self.cursor + (lo + (hi << 8))
814         targetstack = self.targets.setdefault(target, [])
815         targetstack.append((self.stack.pop(), 'or'))
816    
817     def visit_LOAD_ATTR(self, lo, hi):
818         self.stack[-1] += ("." + self.co_names[lo + (hi << 8)])
819    
820     def visit_LOAD_CONST(self, lo, hi):
821         val = self.co_consts[lo + (hi << 8)]
822         if isinstance(val, (FunctionType, type)):
823             # The const in question is a factory function.
824             self.stack.append(val.__module__ + "." + val.__name__)
825         else:
826             r = repr(val)
827             if hasattr(val, "__module__"):
828                 if not r.startswith(val.__module__ + "."):
829                     r = val.__module__ + "." + r
830             self.stack.append(r)
831    
832     def visit_LOAD_FAST(self, lo, hi):
833         self.stack.append(self.co_varnames[lo + (hi << 8)])
834    
835     def visit_LOAD_GLOBAL(self, lo, hi):
836         self.stack.append(self.co_names[lo + (hi << 8)])
837    
838     def visit_SLICE_PLUS_0(self):
839         arg = self.stack.pop()
840         self.stack.append("%s[:]" % arg)
841    
842     def visit_SLICE_PLUS_1(self):
843         args = tuple(self.stack[-2:])
844         del self.stack[-2:]
845         self.stack.append("%s[%s:]" % args)
846    
847     def visit_SLICE_PLUS_2(self):
848         args = tuple(self.stack[-2:])
849         del self.stack[-2:]
850         self.stack.append("%s[:%s]" % args)
851    
852     def visit_SLICE_PLUS_3(self):
853         args = tuple(self.stack[-3:])
854         del self.stack[-3:]
855         self.stack.append("%s[%s:%s]" % args)
856    
857     def visit_UNARY_CONVERT(self):
858         op = self.stack.pop()
859         self.stack.append("`(" + op + ")`")
860    
861     def visit_UNARY_INVERT(self):
862         op = self.stack.pop()
863         self.stack.append("~(" + op + ")")
864    
865     def visit_UNARY_NEGATIVE(self):
866         op = self.stack.pop()
867         self.stack.append("-(" + op + ")")
868    
869     def visit_UNARY_NOT(self):
870         op = self.stack.pop()
871         self.stack.append("not (" + op + ")")
872    
873     def visit_UNARY_POSITIVE(self):
874         op = self.stack.pop()
875         self.stack.append("+(" + op + ")")
876    
877     def binary_op(self, op):
878         op2, op1 = self.stack.pop(), self.stack.pop()
879         self.stack.append(op1 + " " + op + " " + op2)
880    
881     def visit_BINARY_SUBSCR(self):
882         op2, op1 = self.stack.pop(), self.stack.pop()
883         self.stack.append(op1 + "[" + op2 + "]")
884
885 # Add visit_BINARY methods to LambdaDecompiler.
886 for k, v in binary_repr.iteritems():
887     setattr(LambdaDecompiler, "visit_" + k,
888             lambda self, op=v: self.binary_op(op))
889
890
891 class BranchTracker(Visitor):
892     """BranchTracker(obj=[func|co|str|list]).
893     
894     Finds all possible instructions previous to the supplied instruction(s).
895     """
896    
897     def branches(self, instr=None):
898         """Walk self and return all possible instructions previous to instr.
899         
900         If instr is None, the last instruction will be used.
901         """
902         if instr is None:
903             instr = len(self._bytecode) - 1
904         self.watch = {instr: []}
905         self.walk()
906         return self.watch[instr]
907    
908     def visit_instruction(self, op, lo=None, hi=None):
909         if self.cursor in self.watch and op != 113:
910             if lo is None and hi is None:
911                 self.watch[self.cursor].append(self.cursor - 1)
912             else:
913                 self.watch[self.cursor].append(self.cursor - 3)
914    
915     def visit_CONTINUE_LOOP(self, lo, hi):
916         self.visit_JUMP_ABSOLUTE(lo, hi)
917    
918     def visit_JUMP_ABSOLUTE(self, lo, hi):
919         target = lo + (hi << 8)
920         if target in self.watch:
921             self.watch[target].append(self.cursor)
922    
923     def visit_JUMP_FORWARD(self, lo, hi):
924         delta = lo + (hi << 8)
925         target = self.cursor + delta
926         if target in self.watch:
927             self.watch[target].append(self.cursor - 3)
928    
929     def visit_JUMP_IF_FALSE(self, lo, hi):
930         self.visit_JUMP_FORWARD(lo, hi)
931    
932     def visit_JUMP_IF_TRUE(self, lo, hi):
933         self.visit_JUMP_FORWARD(lo, hi)
934
935
936 class KeywordInspector(Rewriter):
937     """Produce a list of all keyword arguments expected."""
938    
939     def __init__(self, obj):
940         """KeywordInspector(obj). List keyword arguments expected."""
941         Rewriter.__init__(self, obj)
942         if not (self.co_flags & 0x0008):
943             raise ValueError("'%s' does not possess **kwargs." % obj)
944         if len(self.co_varnames) <= 1:
945             raise ValueError("'%s' does not possess more than 1 varname." % obj)
946         self._kwargs = []
947         self.flag = None
948    
949     def kwargs(self):
950         """kwargs() -> List of keyword arguments expected."""
951         self.walk()
952         return self._kwargs
953    
954     def visit_instruction(self, op, lo=None, hi=None):
955         if op == 124 and (lo + (hi << 8) == len(self.co_varnames) - 1):
956             self.flag = ''
957         elif op == 100 and self.flag == '':
958             self.flag = self.co_consts[lo + (hi << 8)]
959         elif op == 25 and self.flag:
960             self._kwargs.append(self.flag)
961         else:
962             self.flag = None
963
964 del k, v
965
Note: See TracBrowser for help on using the browser.