Contact: fumanchu@aminus.org

Log in as guest/dejavu to create tickets

I think I've seen this ORM somewhere before...

root/trunk/codewalk.py

Revision 8 (checked in by fumanchu, 9 years ago)

codewalk and logic are now modules in dejavu

Line 
1 """Bytecode visitors, including rewriters and decompilers.
2
3 This work, including the source code, documentation
4 and related data, is placed into the public domain.
5
6 The orginal author is Robert Brewer, Amor Ministries.
7
8 THIS SOFTWARE IS PROVIDED AS-IS, WITHOUT WARRANTY
9 OF ANY KIND, NOT EVEN THE IMPLIED WARRANTY OF
10 MERCHANTABILITY. THE AUTHOR OF THIS SOFTWARE
11 ASSUMES _NO_ RESPONSIBILITY FOR ANY CONSEQUENCE
12 RESULTING FROM THE USE, MODIFICATION, OR
13 REDISTRIBUTION OF THIS SOFTWARE.
14
15 """
16
17 version = "0.3.2"
18
19 changelog = """
20 0.3.2 (8/8/04):
21     1. After profiling, added _co_code_attrs because dir() was so slow.
22
23 0.3.1 (4/29/04):
24     1. Added KeywordInspector.
25     2. Made LambdaDecompiler use function __name__, not repr() (early
26         binding makes e.g. datetime.date into <type datetime.date>).
27     3. Added safe_tuple.
28     4. Added TaintableStack.maxsize to help stack calculation.
29     5. Forgot to call deref_cell from visit_LOAD_DEREF! D'oh!
30     6. Put visit_ methods in alpha order.
31     7. Added name_index to Rewriter.
32     8. Got rid of extra args to EarlyBinder.
33     9. Added reduction of getattr to EarlyBinder as an option.
34     10. Combined 'ands' and 'ors' into 'targets' in LambdaDecompiler.
35     11. Fixed parenthesis problems in LambdaDecompiler.
36     12. Added BranchTracker class.
37 """
38
39 todo = """
40 1. Make a StackCalculator subclass of Visitor, call it in
41     Rewriter.code_object to get co_stacksize.
42 2. Make named_opcodes(bits), which returns names for the first code in
43     each word, numbers otherwise.
44 3. Rewrite dis as a Visitor.
45 """
46
47 from opcode import cmp_op, opname, opmap, HAVE_ARGUMENT
48 import operator
49 import sets
50 from types import CodeType, FunctionType
51
52
53 def numeric_opcodes(bits):
54     """Change named opcode bits to their numeric equivalents."""
55     bitnums = []
56     for x in bits:
57         if isinstance(x, basestring):
58             x = opmap[x]
59         bitnums.append(x)
60     return bitnums
61
62
63 _deref_bytecode = numeric_opcodes(['LOAD_DEREF', 0, 0, 'RETURN_VALUE'])
64 # CodeType(argcount, nlocals, stacksize, flags, codestring, constants,
65 #          names, varnames, filename, name, firstlineno,
66 #          lnotab[, freevars[, cellvars]])
67 _derefblock = CodeType(0, 0, 1, 3, ''.join(map(chr, _deref_bytecode)),
68                        (None,), ('cell',), (), '', '', 2, '', ('cell',))
69 def deref_cell(cell):
70     # FunctionType(code, globals[, name[, argdefs[, closure]]])
71     return FunctionType(_derefblock, {}, "", (), (cell,))()
72
73 def make_closure(*args):
74     def inner():
75         args
76     return inner.func_closure
77
78
79 binary_operators = {'BINARY_POWER': operator.pow,
80                     'BINARY_MULTIPLY': operator.mul,
81                     'BINARY_DIVIDE': operator.div,
82                     'BINARY_FLOOR_DIVIDE': operator.floordiv,
83                     'BINARY_TRUE_DIVIDE': operator.truediv,
84                     'BINARY_MODULO': operator.mod,
85                     'BINARY_ADD': operator.add,
86                     'BINARY_SUBTRACT': operator.sub,
87                     'BINARY_SUBSCR': operator.getitem,
88                     'BINARY_LSHIFT': operator.lshift,
89                     'BINARY_RSHIFT': operator.rshift,
90                     'BINARY_AND': operator.and_,
91                     'BINARY_XOR': operator.xor,
92                     'BINARY_OR': operator.or_,
93                     }
94 inplace_operators = dict([('INPLACE_' + k.split('_')[1], v)
95                           for k, v in binary_operators.iteritems()
96                           if k not in ('BINARY_SUBSCR',)
97                           ])
98 del k, v
99
100 binary_repr = {'BINARY_POWER': '**',
101                'BINARY_MULTIPLY': '*',
102                'BINARY_DIVIDE': '/',
103                'BINARY_FLOOR_DIVIDE': '//',
104                'BINARY_TRUE_DIVIDE': '/',
105                'BINARY_MODULO': '%',
106                'BINARY_ADD': '+',
107                'BINARY_SUBTRACT': '-',
108                'BINARY_LSHIFT': '<<',
109                'BINARY_RSHIFT': '>>',
110                'BINARY_AND': '&',
111                'BINARY_XOR': '^',
112                'BINARY_OR': '|',
113                }
114
115 inplace_repr = dict([('INPLACE_' + k.split('_')[1], v + '=')
116                      for k, v in binary_repr.iteritems()])
117
118 comparisons = {'<': operator.lt,
119                '<=': operator.le,
120                '==': operator.eq,
121                '!=': operator.ne,
122                '>': operator.gt,
123                '>=': operator.gt,
124                'in': operator.contains,
125                'not in': lambda x, y: not x in y,
126                'is': operator.is_,
127                'is not': operator.is_not,
128                }
129
130 _co_code_attrs = ['co_argcount', 'co_cellvars', 'co_code', 'co_consts',
131                   'co_filename', 'co_firstlineno', 'co_flags', 'co_freevars',
132                   'co_lnotab', 'co_name', 'co_names', 'co_nlocals',
133                   'co_stacksize', 'co_varnames']
134
135 class Visitor(object):
136     """Visitor(obj=function, code object, string, or list of opcodes)."""
137    
138     def __init__(self, obj):
139         # Distill supplied 'obj' arg to a code block string.
140         if isinstance(obj, FunctionType):
141             obj = obj.func_code
142         try:
143             obj = obj.co_code
144         except AttributeError:
145             pass
146        
147         # Map the code block string to a list of opcode numbers.
148         if isinstance(obj, basestring):
149             obj = map(ord, obj)
150         elif isinstance(obj, list):
151             obj = obj[:]
152        
153         self._bytecode = obj
154    
155     def walk(self):
156         self.cursor = 0
157         b = self._bytecode
158         while self.cursor < len(b):
159             op = b[self.cursor]
160             self.cursor += 1
161             if op >= HAVE_ARGUMENT:
162                 lo = b[self.cursor]
163                 self.cursor += 1
164                 hi = b[self.cursor]
165                 self.cursor += 1
166                 args = (lo, hi)
167             else:
168                 args = ()
169            
170             self.visit_instruction(op, *args)
171            
172             name = 'visit_' + opname[op]
173             name = name.replace('+', '_PLUS_')
174            
175             if hasattr(self, name):
176                 getattr(self, name)(*args)
177    
178     def visit_instruction(self, op, lo=None, hi=None):
179         pass
180
181
182 class JumpCodeAdjuster(Visitor):
183     """JumpCodeAdjuster(obj=[func|co|str|list], start, end, newlength).
184     
185     Adjusts jump codes if their target is affected by bytecode changes.
186     
187     start, end: The range of the original bytecode in question.
188     newlength: Length of the codes which overwrote bytecode[start:end].
189     """
190    
191     def __init__(self, obj, start, end, newlength):
192         Visitor.__init__(self, obj)
193         self.start = start
194         self.end = end
195         self.offset = newlength - (end - start)
196    
197     def bytecode(self):
198         """Walk self and return new bytecode."""
199         self.walk()
200         return self.newcode
201    
202     def walk(self):
203         if self.offset == 0:
204             # Avoid costly walk if no changes will be made.
205             self.newcode = self._bytecode
206         else:
207             self.newcode = []
208             Visitor.walk(self)
209    
210     def visit_instruction(self, op, lo=None, hi=None):
211         append = self.newcode.append
212         append(op)
213         if lo is not None:
214             append(lo)
215         if hi is not None:
216             append(hi)
217    
218     def visit_CONTINUE_LOOP(self, lo, hi):
219         self.visit_JUMP_ABSOLUTE(lo, hi)
220    
221     def visit_JUMP_ABSOLUTE(self, lo, hi):
222         target = lo + (hi << 8)
223         if target > self.start:
224             pos = target + self.offset
225             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
226    
227     def visit_JUMP_FORWARD(self, lo, hi):
228         delta = lo + (hi << 8)
229         target = self.cursor + delta
230         if self.cursor < self.end and target > self.start:
231             pos = (target + self.offset) - self.cursor
232             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
233    
234     def visit_JUMP_IF_FALSE(self, lo, hi):
235         self.visit_JUMP_FORWARD(lo, hi)
236    
237     def visit_JUMP_IF_TRUE(self, lo, hi):
238         self.visit_JUMP_FORWARD(lo, hi)
239
240
241 def safe_tuple(seq):
242     """Force func_code attributes to tuples of strings.
243     
244     Many of the func_code attributes must take tuples, not lists,
245     and *cannot* accept unicode items--they must be coerced to strings
246     or the interpreter will crash.
247     """
248     seq = map(str, seq)
249     return tuple(seq)
250
251
252 class Rewriter(Visitor):
253     """Rewriter(obj=function or code object).
254     
255     Produce a new function or code object by rewriting an existing one.
256     
257     Notice that, unlike the base Visitor class, Rewriter does not accept a
258     string or list of opcodes as an initial argument.
259     """
260    
261     def __init__(self, obj):
262         if isinstance(obj, FunctionType):
263             self._func = obj
264             obj = obj.func_code
265        
266         # Copy code object attributes.
267         for name in _co_code_attrs:
268             value = getattr(obj, name)
269             if isinstance(value, tuple):
270                 value = list(value)
271             setattr(self, name, value)
272        
273         try:
274             obj = obj.co_code
275         except AttributeError:
276             pass
277        
278         # Map the code block to a list of opcode numbers.
279         if isinstance(obj, basestring):
280             bytecode = map(ord, obj)
281         elif isinstance(obj, list):
282             bytecode = obj[:]
283         else:
284             raise TypeError("obj arg of incorrect type '%s'" % type(obj))
285        
286         self._bytecode = bytecode
287    
288     def bytecode(self):
289         """Walk self and return new bytecode."""
290         self.walk()
291         return self.newcode
292    
293     def code_object(self):
294         """Walk self and produce a new code object."""
295         self.walk()
296         codestr = ''.join(map(chr, self.newcode))
297         return CodeType(self.co_argcount, self.co_nlocals, self.co_stacksize,
298                         # Notice co_consts should *not* be safe_tupled.
299                         self.co_flags, codestr, tuple(self.co_consts),
300                         safe_tuple(self.co_names), safe_tuple(self.co_varnames),
301                         self.co_filename, self.co_name, self.co_firstlineno,
302                         self.co_lnotab, safe_tuple(self.co_freevars),
303                         safe_tuple(self.co_cellvars))
304    
305     def function(self, newname=None):
306         """Walk self and produce a new function."""
307         try:
308             f = self._func
309         except AttributeError:
310             if newname is None:
311                 newname = ''
312             co = self.code_object()
313             return FunctionType(co, {}, newname)
314         else:
315             if newname is None:
316                 newname = f.func_name
317             co = self.code_object()
318             return FunctionType(co, f.func_globals, newname,
319                                 f.func_defaults, f.func_closure)
320    
321     def const_index(self, value):
322         """The index of value in co_consts, appending it if not found."""
323         for pos, item in enumerate(self.co_consts):
324             try:
325                 if type(value) == type(item) and value == item:
326                     break
327             except TypeError:
328                 pass
329         else:
330             pos = len(self.co_consts)
331             self.co_consts.append(value)
332         return pos
333    
334     def name_index(self, value):
335         """The index of value in co_names, appending it if not found."""
336         valtype = type(value)
337         for pos, item in enumerate(self.co_names):
338             try:
339                 if valtype == type(item) and value == item:
340                     return pos
341             except TypeError:
342                 pass
343        
344         pos = len(self.co_names)
345         self.co_names.append(value)
346         return pos
347    
348     def walk(self):
349         self.newcode = []
350         Visitor.walk(self)
351    
352     def visit_instruction(self, op, lo=None, hi=None):
353         append = self.newcode.append
354         append(op)
355         if lo is not None:
356             append(lo)
357         if hi is not None:
358             append(hi)
359    
360     def put(self, start, end, *bits):
361         """Overwrite self.newcode with new opcodes (numbers or names).
362         
363         If the new codes are of different quantity than the old,
364         modify any jump codes affected.
365         """
366         bitnums = numeric_opcodes(bits)
367        
368         # Adjust jump codes. Notice this comes before bytecode is modified.
369         jca = JumpCodeAdjuster(self.newcode, start, end, len(bitnums))
370         self.newcode = jca.bytecode()
371        
372         # Rewrite bytecode.
373         self.newcode[start:end] = bitnums
374    
375     def tail(self, length, *bits):
376         """Overwrite self.newcode[-length:] with bits."""
377         end = len(self.newcode)
378         self.put(end - length, end, *bits)
379
380
381 class Localizer(Rewriter):
382     """Localizer(func, builtin_only=False, stoplist=[], verbose=False)
383     
384     If a global or builtin is known at compile time, replace it with a constant.
385     
386     This duplicates (and borrows from) Raymond Hettinger's Cookbook recipe
387     at: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/277940
388     """
389     def __init__(self, func, builtin_only=False, stoplist=[], verbose=False):
390         Rewriter.__init__(self, func)
391        
392         import __builtin__
393         self.env = vars(__builtin__).copy()
394         if not builtin_only:
395             self.env.update(func.func_globals)
396        
397         self.stoplist = stoplist
398         self.verbose = verbose
399    
400     def visit_LOAD_GLOBAL(self, lo, hi):
401         name = self.co_names[lo + (hi << 8)]
402         if name in self.env and name not in self.stoplist:
403             value = self.env[name]
404             pos = self.const_index(value)
405             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
406             if self.verbose:
407                 print name, '-->', value
408
409
410 class TaintableStack(list):
411     def __init__(self, seq=[]):
412         list.__init__(self, seq)
413         self._taintindex = sets.Set()
414         self.maxsize = len(seq)
415    
416     def taint(self, index=-1):
417         if index < 0:
418             index += len(self)
419         self._taintindex.add(index)
420    
421     def tainted(self, index=-1):
422         if index < 0:
423             index = len(self) + index
424         return (index in self._taintindex)
425    
426     def pop(self, index=-1):
427         """Returns a tuple!! of (value, tainted)."""
428         if index < 0:
429             index += len(self)
430         is_tainted = (index in self._taintindex)
431         if is_tainted:
432             self._taintindex.remove(index)
433         return list.pop(self, index), is_tainted
434    
435     def append(self, obj):
436         list.append(self, obj)
437         l = len(self)
438         if l > self.maxsize:
439             self.maxsize = l
440
441
442 class EarlyBinder(Rewriter):
443     """EarlyBinder(func, reduce_getattr=True, taintlist=[])
444     
445     Deep-evaluate function, replacing free vars with constants.
446     
447     reduce_getattr: If True (the default), getattr(x, y) will be
448         replaced with x.y where possible.
449     
450     taintlist: a list of names (globals, freevars, or attributes)
451         which should not be early-bound. For example, if you want
452         datetime.date.today() to be bound late, include 'today'
453         in the taintlist.
454     
455     Example: k = lambda x: x.Date == datetime.date(2004, 1, 1)
456              r = EarlyBinder(k).function()
457             
458     _____ k _____                _____ r _____
459      0 LOAD_FAST     0 (x)        0 LOAD_FAST   0 (x)
460      3 LOAD_ATTR     1 (Date)     3 LOAD_ATTR   1 (Date)
461      6 LOAD_GLOBAL   2 (datetime) 6 LOAD_CONST  6 (datetime.date(2004, 1, 1))
462      9 LOAD_ATTR     3 (date)
463     12 LOAD_CONST    1 (2004)
464     15 LOAD_CONST    2 (1)
465     18 LOAD_CONST    2 (1)
466     21 CALL_FUNCTION 3
467     24 COMPARE_OP    2 (==)       9 COMPARE_OP  2 (==)
468     27 RETURN_VALUE              12 RETURN_VALUE
469     
470     This also pre-computes binary operations *and all other builtin or free
471     functions* where all operands are constants, globals, or freevars.
472     For example:
473         LOAD_CONST        1 (3)
474         LOAD_CONST        2 (4)
475         BINARY_MULTIPLY
476         
477     is replaced with:
478         LOAD_CONST        5 (12)
479     
480     However, order is important. lambda x: x * 4 * 5 won't see any
481     optimization, because the order of eval is (x * 4) * 5. Rewritten
482     as lambda x: 4 * 5 * x, the "4 * 5" can be replaced with "20".
483     """
484    
485     def __init__(self, func, reduce_getattr=True, taintlist=[]):
486         Rewriter.__init__(self, func)
487         self.reduce_getattr = reduce_getattr
488        
489         import __builtin__
490         self.env = vars(__builtin__).copy()
491         self.env.update(func.func_globals)
492        
493         # Keep a stack like the interpreter would. This does *not*
494         # get overwritten when self.newcode does--it emulates the
495         # original instructions (although tainted values may be dummies).
496         # When a local var is pushed onto this stack, it "taints" itself
497         # and any operations which depend upon it.
498         # This stack is not passed out of this class in any way.
499         self.stack = TaintableStack()
500         self.taintlist = taintlist
501    
502     def code_object(self):
503         """Walk self and produce a new code object."""
504         self.walk()
505         codestr = ''.join(map(chr, self.newcode))
506         # Assert CO_NOFREE, since all free vars should have been made constant.
507         self.co_flags |= 0x0040
508         co = CodeType(self.co_argcount, self.co_nlocals, self.stack.maxsize,
509                       self.co_flags, codestr, tuple(self.co_consts),
510                       safe_tuple(self.co_names), safe_tuple(self.co_varnames),
511                       '', self.co_name, 1,
512                       self.co_lnotab, (), ())
513         return co
514    
515     def function(self, newname=None):
516         """Walk self and produce a new function."""
517         try:
518             f = self._func
519         except AttributeError:
520             if newname is None:
521                 newname = ''
522             co = self.code_object()
523             return FunctionType(co, {}, newname)
524         else:
525             if newname is None:
526                 newname = f.func_name
527             co = self.code_object()
528             # All cells should be dereferenced, so force func_closure to None.
529             return FunctionType(co, f.func_globals, newname, f.func_defaults)
530    
531     def reduce(self, number_of_terms, transform=None, overwrite_length=None):
532         """If no stack args are tainted, rewrite previous opcodes.
533         
534         number_of_terms: the number of terms to pop off the stack.
535         
536         transform: a callback, to which we send the popped terms. They are
537             transformed in that function as needed, and returned.
538         
539         overwrite_length: the number of previous opcodes to overwrite. If
540             None, it defaults to (number of terms + 1 for the current
541             instruction) * 3.
542         """
543         if overwrite_length is None:
544             # +1 is for current bytecode. If any overwritten bytecode
545             # is not len 3, pass in a value for overwrite_length.
546             overwrite_length = (number_of_terms + 1) * 3
547        
548         # Pop the requested number of terms off the stack.
549         is_tainted = False
550         terms, taints = [], []
551         for i in xrange(number_of_terms):
552             term, taint = self.stack.pop()
553             taints.append(taint)
554             is_tainted |= taint
555             terms.append(term)
556        
557         # Now that all the stack-popping is done...
558         if is_tainted:
559             # We don't have to handle getattr if no args are
560             # tainted, because CALL_FUNCTION will do it normally.
561             if self.reduce_getattr:
562                 if (len(terms) == 3
563                     and terms[2] == getattr
564                     and taints[1]
565                     and not taints[0]):
566                         # Form a new LOAD_ATTR instruction.
567                         pos = self.name_index(terms[0])
568                         # Unlike normal CALL_FUNCTION, we can't
569                         # assume each arg is a constant; therefore,
570                         # our overwrite_length is indeterminate.
571                         # We'll just cheat and keep track of the last
572                         # LOAD_GLOBAL where we looked up getattr. ;)
573                         start = self.last_getattr
574                         # Grab and reuse opcodes of first (LOAD_FAST) term.
575                         bits = self.newcode[start + 3:-6]
576                         bits += ['LOAD_ATTR', pos & 0xFF, pos >> 8]
577                         bits = tuple(bits)
578                         self.put(start, len(self.newcode), *bits)
579                         self.stack.append(None)
580                         self.stack.taint()
581                         return True
582            
583             # Don't form the new object.
584             # Replace TOS with a dummy and taint it.
585             self.stack.append(None)
586             self.stack.taint()
587             return False
588        
589         # Callback the transform.
590         terms.reverse()
591         if transform:
592             result = transform(terms)
593         else:
594             result = terms
595        
596         # Replace TOS with result.
597         self.stack.append(result)
598        
599         # Overwrite bytecodes with new CONST formed from result.
600         pos = self.const_index(result)
601         self.tail(overwrite_length, 'LOAD_CONST', pos & 0xFF, pos >> 8)
602         return True
603    
604     def visit_BUILD_TUPLE(self, lo, hi):
605         self.reduce(lo + (hi << 8), lambda terms: tuple(terms))
606    
607     def visit_BUILD_LIST(self, lo, hi):
608         self.reduce(lo + (hi << 8))
609    
610     def visit_CALL_FUNCTION(self, lo, hi):
611         def call(terms):
612             func = terms.pop(0)
613             args = tuple(terms[:lo])
614             kwargs = {}
615             for i in range(hi):
616                 key = self.terms.pop(0)
617                 val = self.terms.pop(0)
618                 kwargs[key] = val
619             return func(*args, **kwargs)
620         self.reduce(lo + hi + 1, call)
621    
622     def visit_COMPARE_OP(self, lo, hi):
623         op = cmp_op[lo + (hi << 8)]
624         op = comparisons[op]
625         self.reduce(2, lambda terms: op(*terms))
626    
627     def visit_LOAD_ATTR(self, lo, hi):
628         name = self.co_names[lo + (hi << 8)]
629         self.reduce(1, lambda terms: getattr(terms[0], name))
630         if name in self.taintlist:
631             self.stack.taint()
632    
633     def visit_LOAD_CONST(self, lo, hi):
634         self.stack.append(self.co_consts[lo + (hi << 8)])
635    
636     def visit_LOAD_DEREF(self, lo, hi):
637         if hasattr(self, '_func'):
638             name = self.co_freevars[lo + (hi << 8)]
639             value = self._func.func_closure[lo + (hi << 8)]
640             value = deref_cell(value)
641             pos = self.const_index(value)
642             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
643             self.stack.append(value)
644             if name in self.taintlist:
645                 self.stack.taint()
646    
647     def visit_LOAD_FAST(self, lo, hi):
648         self.stack.append(self.co_varnames[lo + (hi << 8)])
649         self.stack.taint()
650    
651     def visit_LOAD_GLOBAL(self, lo, hi):
652         name = self.co_names[lo + (hi << 8)]
653         if name == 'getattr':
654             self.last_getattr = (len(self.newcode) - 3)
655         if name in self.env:
656             value = self.env[name]
657             pos = self.const_index(value)
658             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
659             self.stack.append(value)
660             if name in self.taintlist:
661                 self.stack.taint()
662         else:
663             raise KeyError("'%s' is not present in supplied globals." % name)
664    
665     def visit_SLICE_PLUS_0(self):
666         self.reduce(1, lambda terms: terms[0][:], 4)
667    
668     def visit_SLICE_PLUS_1(self):
669         self.reduce(2, lambda terms: terms[0][terms[1]:], 7)
670    
671     def visit_SLICE_PLUS_2(self):
672         self.reduce(2, lambda terms: terms[0][:terms[1]], 7)
673    
674     def visit_SLICE_PLUS_3(self):
675         self.reduce(3, lambda terms: terms[0][terms[1]:terms[2]], 10)
676    
677     def binary_op(self, op):
678         def operate(terms):
679             return op(*terms)
680         self.reduce(2, operate, 7)
681
682 # Add visit_BINARY, visit_INPLACE methods to EarlyBinder.
683 for key, opd in binary_operators.iteritems():
684     setattr(EarlyBinder, "visit_" + key,
685             lambda self, opr=opd: self.binary_op(opr))
686 for key, opd in inplace_operators.iteritems():
687     setattr(EarlyBinder, "visit_" + key,
688             # Yes, we really do call binary_op for inplace methods.
689             lambda self, opr=opd: self.binary_op(opr))
690 del key, opd
691
692
693 class LambdaDecompiler(Visitor):
694     """LambdaDecompiler(obj=lambda function or func_code).
695     
696     Produce decompiled Python code (as a string) from a supplied lambda."""
697    
698     def __init__(self, obj):
699         # Distill supplied 'obj' arg to a code block string.
700         if isinstance(obj, FunctionType):
701             obj = obj.func_code
702        
703         # Copy code object attributes.
704         for name in _co_code_attrs:
705             value = getattr(obj, name)
706             if isinstance(value, tuple):
707                 value = list(value)
708             setattr(self, name, value)
709        
710         try:
711             obj = obj.co_code
712         except AttributeError:
713             pass
714        
715         # Map the code block to a list of opcode numbers.
716         if isinstance(obj, basestring):
717             obj = map(ord, obj)
718         elif isinstance(obj, list):
719             obj = obj[:]
720        
721         self._bytecode = obj
722    
723     def code(self, include_func_header=True):
724         self.walk()
725         product = self.stack[0]
726         if include_func_header:
727             args = list(self.co_varnames)
728             if self.co_flags & 0x08:
729                 args[-1] = "**" + args[-1]
730                 if self.co_flags & 0x04:
731                     args[-2] = "*" + args[-2]
732             elif self.co_flags & 0x04:
733                 args[-1] = "*" + args[-1]
734             args = ", ".join(args)
735            
736             product = "lambda %s: %s" % (args, product)
737         return product
738    
739     def walk(self):
740         self.cursor = 0
741         self.stack = []
742         self.newcode = []
743        
744         # Add a list of 'and' and 'or' mappings {target: [(previous TOS, and_or_or)...]}
745         self.targets = {}
746         b = self._bytecode
747         while self.cursor < len(b):
748             if self.cursor in self.targets:
749                 self.visit_target(self.targets[self.cursor])
750             op = b[self.cursor]
751             self.cursor += 1
752             if op >= HAVE_ARGUMENT:
753                 lo = b[self.cursor]
754                 self.cursor += 1
755                 hi = b[self.cursor]
756                 self.cursor += 1
757                 args = (lo, hi)
758             else:
759                 args = ()
760            
761             self.visit_instruction(op, *args)
762            
763             name = 'visit_' + opname[op]
764             name = name.replace('+', '_PLUS_')
765            
766             if hasattr(self, name):
767                 getattr(self, name)(*args)
768    
769     def visit_target(self, terms):
770         comp = self.stack.pop()
771         while terms:
772             term, operation = terms.pop()
773             comp = "(%s) %s (%s)" % (term, operation, comp)
774         self.stack.append(comp)
775    
776     def visit_BUILD_TUPLE(self, lo, hi):
777         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
778         self.stack.append("(%s)" % terms)
779    
780     def visit_BUILD_LIST(self, lo, hi):
781         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
782         self.stack.append("[%s]" % terms)
783    
784     def visit_CALL_FUNCTION(self, lo, hi):
785         kwargs = {}
786         for i in range(hi):
787             val = self.stack.pop()
788             key = self.stack.pop()
789             kwargs[key] = val
790         kwargs = ", ".join([k + "=" + v for k, v in kwargs.iteritems()])
791        
792         args = []
793         for i in xrange(lo):
794             arg = self.stack.pop()
795             args.append(arg)
796         args.reverse()
797         args = ", ".join(args)
798        
799         if kwargs:
800             args += ", " + kwargs
801        
802         func = self.stack.pop()
803         self.stack.append("%s(%s)" % (func, args))
804    
805     def visit_COMPARE_OP(self, lo, hi):
806         op2, op1 = self.stack.pop(), self.stack.pop()
807         self.stack.append(op1 + " " + cmp_op[lo + (hi << 8)] + " " + op2)
808    
809     def visit_JUMP_IF_FALSE(self, lo, hi):
810         target = self.cursor + (lo + (hi << 8))
811         targetstack = self.targets.setdefault(target, [])
812         targetstack.append((self.stack.pop(), 'and'))
813    
814     def visit_JUMP_IF_TRUE(self, lo, hi):
815         target = self.cursor + (lo + (hi << 8))
816         targetstack = self.targets.setdefault(target, [])
817         targetstack.append((self.stack.pop(), 'or'))
818    
819     def visit_LOAD_ATTR(self, lo, hi):
820         self.stack[-1] += ("." + self.co_names[lo + (hi << 8)])
821    
822     def visit_LOAD_CONST(self, lo, hi):
823         val = self.co_consts[lo + (hi << 8)]
824         if isinstance(val, (FunctionType, type)):
825             # The const in question is a factory function.
826             self.stack.append(val.__module__ + "." + val.__name__)
827         else:
828             r = repr(val)
829             if hasattr(val, "__module__"):
830                 if not r.startswith(val.__module__ + "."):
831                     r = val.__module__ + "." + r
832             self.stack.append(r)
833    
834     def visit_LOAD_FAST(self, lo, hi):
835         self.stack.append(self.co_varnames[lo + (hi << 8)])
836    
837     def visit_LOAD_GLOBAL(self, lo, hi):
838         self.stack.append(self.co_names[lo + (hi << 8)])
839    
840     def visit_SLICE_PLUS_0(self):
841         arg = self.stack.pop()
842         self.stack.append("%s[:]" % arg)
843    
844     def visit_SLICE_PLUS_1(self):
845         args = tuple(self.stack[-2:])
846         del self.stack[-2:]
847         self.stack.append("%s[%s:]" % args)
848    
849     def visit_SLICE_PLUS_2(self):
850         args = tuple(self.stack[-2:])
851         del self.stack[-2:]
852         self.stack.append("%s[:%s]" % args)
853    
854     def visit_SLICE_PLUS_3(self):
855         args = tuple(self.stack[-3:])
856         del self.stack[-3:]
857         self.stack.append("%s[%s:%s]" % args)
858    
859     def visit_UNARY_CONVERT(self):
860         op = self.stack.pop()
861         self.stack.append("`(" + op + ")`")
862    
863     def visit_UNARY_INVERT(self):
864         op = self.stack.pop()
865         self.stack.append("~(" + op + ")")
866    
867     def visit_UNARY_NEGATIVE(self):
868         op = self.stack.pop()
869         self.stack.append("-(" + op + ")")
870    
871     def visit_UNARY_NOT(self):
872         op = self.stack.pop()
873         self.stack.append("not (" + op + ")")
874    
875     def visit_UNARY_POSITIVE(self):
876         op = self.stack.pop()
877         self.stack.append("+(" + op + ")")
878    
879     def binary_op(self, op):
880         op2, op1 = self.stack.pop(), self.stack.pop()
881         self.stack.append(op1 + " " + op + " " + op2)
882    
883     def visit_BINARY_SUBSCR(self):
884         op2, op1 = self.stack.pop(), self.stack.pop()
885         self.stack.append(op1 + "[" + op2 + "]")
886
887 # Add visit_BINARY methods to LambdaDecompiler.
888 for key, op in binary_repr.iteritems():
889     setattr(LambdaDecompiler, "visit_" + key,
890             lambda self, op=op: self.binary_op(op))
891
892
893 class BranchTracker(Visitor):
894     """BranchTracker(obj=[func|co|str|list]).
895     
896     Finds all possible instructions previous to the supplied instruction(s).
897     """
898    
899     def branches(self, instr=None):
900         """Walk self and return all possible instructions previous to instr.
901         
902         If instr is None, the last instruction will be used.
903         """
904         if instr is None:
905             instr = len(self._bytecode) - 1
906         self.watch = {instr: []}
907         self.walk()
908         return self.watch[instr]
909    
910     def visit_instruction(self, op, lo=None, hi=None):
911         if self.cursor in self.watch and op != 113:
912             if lo is None and hi is None:
913                 self.watch[self.cursor].append(self.cursor - 1)
914             else:
915                 self.watch[self.cursor].append(self.cursor - 3)
916    
917     def visit_CONTINUE_LOOP(self, lo, hi):
918         self.visit_JUMP_ABSOLUTE(lo, hi)
919    
920     def visit_JUMP_ABSOLUTE(self, lo, hi):
921         target = lo + (hi << 8)
922         if target in self.watch:
923             self.watch[target].append(self.cursor)
924    
925     def visit_JUMP_FORWARD(self, lo, hi):
926         delta = lo + (hi << 8)
927         target = self.cursor + delta
928         if target in self.watch:
929             self.watch[target].append(self.cursor - 3)
930    
931     def visit_JUMP_IF_FALSE(self, lo, hi):
932         self.visit_JUMP_FORWARD(lo, hi)
933    
934     def visit_JUMP_IF_TRUE(self, lo, hi):
935         self.visit_JUMP_FORWARD(lo, hi)
936
937
938 class KeywordInspector(Rewriter):
939     """Produce a list of all keyword arguments expected."""
940     def __init__(self, obj):
941         Rewriter.__init__(self, obj)
942         assert self.co_flags & 0x0008
943         assert len(self.co_varnames) > 1
944         self._kwargs = []
945         self.flag = None
946    
947     def kwargs(self):
948         self.walk()
949         return self._kwargs
950    
951     def visit_instruction(self, op, lo=None, hi=None):
952         if op == 124 and (lo + (hi << 8) == len(self.co_varnames) - 1):
953             self.flag = ''
954         elif op == 100 and self.flag == '':
955             self.flag = self.co_consts[lo + (hi << 8)]
956         elif op == 25 and self.flag:
957             self._kwargs.append(self.flag)
958         else:
959             self.flag = None
960
Note: See TracBrowser for help on using the browser.