Contact: fumanchu@aminus.org

Log in as guest/dejavu to create tickets

I think I've seen this ORM somewhere before...

root/branches/ldap/codewalk.py

Revision 350 (checked in by fumanchu, 6 years ago)

Minor optimizations.

  • Property svn:eol-style set to native
Line 
1 """Bytecode visitors, including rewriters and decompilers.
2
3 This work, including the source code, documentation
4 and related data, is placed into the public domain.
5
6 The orginal author is Robert Brewer, Amor Ministries.
7
8 THIS SOFTWARE IS PROVIDED AS-IS, WITHOUT WARRANTY
9 OF ANY KIND, NOT EVEN THE IMPLIED WARRANTY OF
10 MERCHANTABILITY. THE AUTHOR OF THIS SOFTWARE
11 ASSUMES _NO_ RESPONSIBILITY FOR ANY CONSEQUENCE
12 RESULTING FROM THE USE, MODIFICATION, OR
13 REDISTRIBUTION OF THIS SOFTWARE.
14
15 """
16
17 from opcode import cmp_op, opname, opmap, HAVE_ARGUMENT
18 _visit_map = [name.replace('+', '_PLUS_') for name in opname]
19
20 import operator
21 import sets
22 from types import CodeType, FunctionType, MethodType
23
24 from compiler.consts import *
25 CO_NOFREE = 0x0040
26
27
28 def named_opcodes(bits):
29     """Change initial numeric opcode bits to their named equivalents."""
30     bitnums = []
31     bits = iter(bits)
32     for x in bits:
33         bitnums.append(opname[x])
34         if x >= HAVE_ARGUMENT:
35             try:
36                 bitnums.append(bits.next())
37                 bitnums.append(bits.next())
38             except StopIteration:
39                 break
40     return bitnums
41
42 def numeric_opcodes(bits):
43     """Change named opcode bits to their numeric equivalents."""
44     bitnums = []
45     for x in bits:
46         if isinstance(x, basestring):
47             x = opmap[x]
48         bitnums.append(x)
49     return bitnums
50
51 _deref_bytecode = numeric_opcodes(['LOAD_DEREF', 0, 0, 'RETURN_VALUE'])
52 # CodeType(argcount, nlocals, stacksize, flags, codestring, constants,
53 #          names, varnames, filename, name, firstlineno,
54 #          lnotab[, freevars[, cellvars]])
55 _derefblock = CodeType(0, 0, 1, 3, ''.join(map(chr, _deref_bytecode)),
56                        (None,), ('cell',), (), '', '', 2, '', ('cell',))
57 def deref_cell(cell):
58     """Return the value of 'cell' (an object from a func_closure)."""
59     # FunctionType(code, globals[, name[, argdefs[, closure]]])
60     return FunctionType(_derefblock, {}, "", (), (cell,))()
61
62 def make_closure(*args):
63     def inner():
64         args
65     return inner.func_closure
66
67
68 binary_operators = {'BINARY_POWER': operator.pow,
69                     'BINARY_MULTIPLY': operator.mul,
70                     'BINARY_DIVIDE': operator.div,
71                     'BINARY_FLOOR_DIVIDE': operator.floordiv,
72                     'BINARY_TRUE_DIVIDE': operator.truediv,
73                     'BINARY_MODULO': operator.mod,
74                     'BINARY_ADD': operator.add,
75                     'BINARY_SUBTRACT': operator.sub,
76                     'BINARY_SUBSCR': operator.getitem,
77                     'BINARY_LSHIFT': operator.lshift,
78                     'BINARY_RSHIFT': operator.rshift,
79                     'BINARY_AND': operator.and_,
80                     'BINARY_XOR': operator.xor,
81                     'BINARY_OR': operator.or_,
82                     }
83 inplace_operators = dict([('INPLACE_' + k.split('_')[1], v)
84                           for k, v in binary_operators.iteritems()
85                           if k not in ('BINARY_SUBSCR',)
86                           ])
87
88 binary_repr = {'BINARY_POWER': '**',
89                'BINARY_MULTIPLY': '*',
90                'BINARY_DIVIDE': '/',
91                'BINARY_FLOOR_DIVIDE': '//',
92                'BINARY_TRUE_DIVIDE': '/',
93                'BINARY_MODULO': '%',
94                'BINARY_ADD': '+',
95                'BINARY_SUBTRACT': '-',
96                'BINARY_LSHIFT': '<<',
97                'BINARY_RSHIFT': '>>',
98                'BINARY_AND': '&',
99                'BINARY_XOR': '^',
100                'BINARY_OR': '|',
101                }
102
103 inplace_repr = dict([('INPLACE_' + k.split('_')[1], v + '=')
104                      for k, v in binary_repr.iteritems()])
105
106 comparisons = {'<': operator.lt,
107                '<=': operator.le,
108                '==': operator.eq,
109                '!=': operator.ne,
110                '>': operator.gt,
111                '>=': operator.gt,
112                'in': operator.contains,
113                'not in': lambda x, y: not x in y,
114                'is': operator.is_,
115                'is not': operator.is_not,
116                }
117
118 # Cache the co_* attributes and types
119 _co_code_attrs = {}
120 for name in dir(deref_cell.func_code):
121     if name.startswith("co_"):
122         _co_code_attrs[name] = type(getattr(deref_cell.func_code, name))
123
124
125 class Visitor(object):
126     """Visitor(obj=function, code object, string, or list of opcodes)."""
127    
128     def __init__(self, obj):
129         self.verbose = False
130        
131         # Distill supplied 'obj' arg to a code block string.
132         if isinstance(obj, MethodType):
133             obj = obj.im_func
134         if isinstance(obj, FunctionType):
135             obj = obj.func_code
136         try:
137             obj = obj.co_code
138         except AttributeError:
139             pass
140        
141         # Map the code block string to a list of opcode numbers.
142         if isinstance(obj, basestring):
143             obj = map(ord, obj)
144         elif isinstance(obj, list):
145             obj = obj[:]
146        
147         self._bytecode = obj
148    
149     def debug(self, *messages):
150         for term in messages:
151             print term,
152    
153     def walk(self):
154         verbose = self.verbose
155        
156         self.cursor = 0
157         b = self._bytecode
158         if verbose:
159             self.debug("\nWALKING: ", b)
160         b_len = len(b)      # Speed hack
161         while self.cursor < b_len:
162             if verbose:
163                 self.debug("\n", self.cursor)
164            
165             op = b[self.cursor]
166             self.cursor += 1
167             if op >= HAVE_ARGUMENT:
168                 lo = b[self.cursor]
169                 self.cursor += 1
170                 hi = b[self.cursor]
171                 self.cursor += 1
172                 args = (lo, hi)
173             else:
174                 args = ()
175            
176             if verbose:
177                 self.debug("visit (%s, %s)" % (op, repr(args)))
178             self.visit_instruction(op, *args)
179            
180             handler = getattr(self, 'visit_' + _visit_map[op], None)
181             if handler:
182                 if verbose:
183                     self.debug("=> %s%s" % (_visit_map[op], repr(args)))
184                 handler(*args)
185    
186     def visit_instruction(self, op, lo=None, hi=None):
187         pass
188
189
190 class JumpCodeAdjuster(Visitor):
191     """JumpCodeAdjuster(obj=[func|co|str|list], start, end, newlength).
192     
193     Adjusts jump codes if their target is affected by bytecode changes.
194     
195     start, end: The range of the original bytecode in question.
196     newlength: Length of the codes which overwrote bytecode[start:end].
197     """
198    
199     def __init__(self, obj, start, end, newlength):
200         Visitor.__init__(self, obj)
201         self.start = start
202         self.end = end
203         self.offset = newlength - (end - start)
204    
205     def bytecode(self):
206         """Walk self and return new bytecode."""
207         self.walk()
208         return self.newcode
209    
210     def walk(self):
211         if self.offset == 0:
212             # Avoid costly walk if no changes will be made.
213             self.newcode = self._bytecode
214         else:
215             self.newcode = []
216             Visitor.walk(self)
217    
218     def visit_instruction(self, op, lo=None, hi=None):
219         append = self.newcode.append
220         append(op)
221         if lo is not None:
222             append(lo)
223         if hi is not None:
224             append(hi)
225    
226     def visit_CONTINUE_LOOP(self, lo, hi):
227         self.visit_JUMP_ABSOLUTE(lo, hi)
228    
229     def visit_JUMP_ABSOLUTE(self, lo, hi):
230         target = lo + (hi << 8)
231         if target > self.start:
232             pos = target + self.offset
233             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
234    
235     def visit_JUMP_FORWARD(self, lo, hi):
236         delta = lo + (hi << 8)
237         target = self.cursor + delta
238         if self.cursor < self.end and target > self.start:
239             pos = (target + self.offset) - self.cursor
240             self.newcode[-2:] = [pos & 0xFF, pos >> 8]
241    
242     def visit_JUMP_IF_FALSE(self, lo, hi):
243         self.visit_JUMP_FORWARD(lo, hi)
244    
245     def visit_JUMP_IF_TRUE(self, lo, hi):
246         self.visit_JUMP_FORWARD(lo, hi)
247
248
249 def safe_tuple(seq):
250     """Force func_code attributes to tuples of strings.
251     
252     Many of the func_code attributes must take tuples, not lists,
253     and *cannot* accept unicode items--they must be cast to strings
254     or the interpreter will crash.
255     """
256     seq = map(str, seq)
257     return tuple(seq)
258
259
260 class Rewriter(Visitor):
261     """Rewriter(obj=function or code object).
262     
263     Produce a new function or code object by rewriting an existing one.
264     
265     Notice that, unlike the base Visitor class, Rewriter does not accept a
266     string or list of opcodes as an initial argument.
267     """
268    
269     def __init__(self, obj):
270         self.verbose = False
271        
272         if isinstance(obj, MethodType):
273             obj = obj.im_func
274         if isinstance(obj, FunctionType):
275             self._func = obj
276             obj = obj.func_code
277        
278         # Copy code object attributes.
279         selfdict = self.__dict__
280         for name, _type in _co_code_attrs.iteritems():
281             value = getattr(obj, name)
282             if _type is tuple:
283                 value = list(value)
284             selfdict[name] = value
285        
286         try:
287             obj = obj.co_code
288         except AttributeError:
289             pass
290        
291         # Map the code block to a list of opcode numbers.
292         if isinstance(obj, basestring):
293             bytecode = map(ord, obj)
294         elif isinstance(obj, list):
295             bytecode = obj[:]
296         else:
297             raise TypeError("obj arg of incorrect type '%s'" % type(obj))
298        
299         self._bytecode = bytecode
300    
301     def bytecode(self):
302         """Walk self and return new bytecode."""
303         self.walk()
304         return self.newcode
305    
306     def code_object(self):
307         """Walk self and produce a new code object."""
308         self.walk()
309         codestr = ''.join(map(chr, self.newcode))
310         return CodeType(self.co_argcount, self.co_nlocals, self.co_stacksize,
311                         # Notice co_consts should *not* be safe_tupled.
312                         self.co_flags, codestr, tuple(self.co_consts),
313                         safe_tuple(self.co_names), safe_tuple(self.co_varnames),
314                         self.co_filename, self.co_name, self.co_firstlineno,
315                         self.co_lnotab, safe_tuple(self.co_freevars),
316                         safe_tuple(self.co_cellvars))
317    
318     def function(self, newname=None):
319         """Walk self and produce a new function."""
320         try:
321             f = self._func
322         except AttributeError:
323             if newname is None:
324                 newname = ''
325             co = self.code_object()
326             return FunctionType(co, {}, newname)
327         else:
328             if newname is None:
329                 newname = f.func_name
330             co = self.code_object()
331             return FunctionType(co, f.func_globals, newname,
332                                 f.func_defaults, f.func_closure)
333    
334     def const_index(self, value):
335         """The index of value in co_consts, appending it if not found."""
336         for pos, item in enumerate(self.co_consts):
337             try:
338                 if type(value) == type(item) and value == item:
339                     break
340             except TypeError:
341                 pass
342         else:
343             pos = len(self.co_consts)
344             self.co_consts.append(value)
345         return pos
346    
347     def name_index(self, value):
348         """The index of value in co_names, appending it if not found."""
349         valtype = type(value)
350         for pos, item in enumerate(self.co_names):
351             try:
352                 if valtype == type(item) and value == item:
353                     return pos
354             except TypeError:
355                 pass
356        
357         pos = len(self.co_names)
358         self.co_names.append(value)
359         return pos
360    
361     def walk(self):
362         self.newcode = []
363         Visitor.walk(self)
364    
365     def visit_instruction(self, op, lo=None, hi=None):
366         append = self.newcode.append
367         append(op)
368         if lo is not None:
369             append(lo)
370         if hi is not None:
371             append(hi)
372    
373     def put(self, start, end, *bits):
374         """Overwrite self.newcode with new opcodes (numbers or names).
375         
376         If the new codes are of different quantity than the old,
377         modify any jump codes affected.
378         """
379         bitnums = numeric_opcodes(bits)
380        
381         # Adjust jump codes. Notice this comes before bytecode is modified.
382         jca = JumpCodeAdjuster(self.newcode, start, end, len(bitnums))
383         self.newcode = jca.bytecode()
384        
385         # Rewrite bytecode.
386         self.newcode[start:end] = bitnums
387    
388     def tail(self, length, *bits):
389         """Overwrite self.newcode[-length:] with bits."""
390         end = len(self.newcode)
391         self.put(end - length, end, *bits)
392
393
394 class Localizer(Rewriter):
395     """Localizer(func, builtin_only=False, stoplist=[], verbose=False)
396     
397     If a global or builtin is known at compile time, replace it with a constant.
398     
399     This duplicates (and borrows from) Raymond Hettinger's Cookbook recipe
400     at: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/277940
401     """
402     def __init__(self, func, builtin_only=False, stoplist=[], verbose=False):
403         Rewriter.__init__(self, func)
404        
405         import __builtin__
406         self.env = vars(__builtin__).copy()
407         if not builtin_only:
408             self.env.update(func.func_globals)
409        
410         self.stoplist = stoplist
411         self.verbose = verbose
412    
413     def visit_LOAD_GLOBAL(self, lo, hi):
414         name = self.co_names[lo + (hi << 8)]
415         if name in self.env and name not in self.stoplist:
416             value = self.env[name]
417             pos = self.const_index(value)
418             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
419             if self.verbose:
420                 self.debug(name, ' --> ', value)
421
422
423 class TaintableStack(list):
424     def __init__(self, seq=[]):
425         list.__init__(self, seq)
426         self._taintindex = sets.Set()
427         self.maxsize = len(seq)
428    
429     def taint(self, index=-1):
430         if index < 0:
431             index += len(self)
432         self._taintindex.add(index)
433    
434     def tainted(self, index=-1):
435         if index < 0:
436             index = len(self) + index
437         return (index in self._taintindex)
438    
439     def pop(self, index=-1):
440         """pop(index) -> Returns a tuple!! of (value, tainted)."""
441         if index < 0:
442             index += len(self)
443         is_tainted = (index in self._taintindex)
444         if is_tainted:
445             self._taintindex.remove(index)
446         return list.pop(self, index), is_tainted
447    
448     def append(self, obj):
449         list.append(self, obj)
450         l = len(self)
451         if l > self.maxsize:
452             self.maxsize = l
453
454
455 class EarlyBinder(Rewriter):
456     """EarlyBinder(func, reduce_getattr=True, bind_late=[])
457     
458     Deep-evaluate function, replacing free vars with constants.
459     
460     reduce_getattr: If True (the default), getattr(x, y) will be
461         replaced with x.y where possible.
462     
463     bind_late: a list of names (globals, freevars, or attributes)
464         which should not be early-bound. For example, if you want
465         datetime.date.today() to be bound late, include 'today'
466         in the bind_late.
467     
468     Example: k = lambda x: x.Date == datetime.date(2004, 1, 1)
469              r = EarlyBinder(k).function()
470             
471     _____ k _____                _____ r _____
472      0 LOAD_FAST     0 (x)        0 LOAD_FAST   0 (x)
473      3 LOAD_ATTR     1 (Date)     3 LOAD_ATTR   1 (Date)
474      6 LOAD_GLOBAL   2 (datetime) 6 LOAD_CONST  6 (datetime.date(2004, 1, 1))
475      9 LOAD_ATTR     3 (date)
476     12 LOAD_CONST    1 (2004)
477     15 LOAD_CONST    2 (1)
478     18 LOAD_CONST    2 (1)
479     21 CALL_FUNCTION 3
480     24 COMPARE_OP    2 (==)       9 COMPARE_OP  2 (==)
481     27 RETURN_VALUE              12 RETURN_VALUE
482     
483     This also pre-computes binary operations *and all other builtin or free
484     functions* where all operands are constants, globals, or freevars.
485     For example:
486         LOAD_CONST        1 (3)
487         LOAD_CONST        2 (4)
488         BINARY_MULTIPLY
489         
490     is replaced with:
491         LOAD_CONST        5 (12)
492     
493     However, order is important. lambda x: x * 4 * 5 won't see any
494     optimization, because the order of eval is (x * 4) * 5. Rewritten
495     as lambda x: 4 * 5 * x, the "4 * 5" can be replaced with "20".
496     """
497    
498     def __init__(self, func, reduce_getattr=True, bind_late=[]):
499         Rewriter.__init__(self, func)
500         self.reduce_getattr = reduce_getattr
501        
502         import __builtin__
503         self.env = vars(__builtin__).copy()
504         self.env.update(func.func_globals)
505        
506         # Keep a stack like the interpreter would. This does *not*
507         # get overwritten when self.newcode does--it emulates the
508         # original instructions (although tainted values may be dummies).
509         # When a local var is pushed onto this stack, it "taints" itself
510         # and any operations which depend upon it.
511         # This stack is not passed out of this class in any way.
512         self.stack = TaintableStack()
513         self.bind_late = bind_late
514    
515     def code_object(self):
516         """Walk self and produce a new code object."""
517         self.walk()
518         codestr = ''.join(map(chr, self.newcode))
519         # Assert CO_NOFREE, since all free vars should have been made constant.
520         self.co_flags |= CO_NOFREE
521         co = CodeType(self.co_argcount, self.co_nlocals, self.stack.maxsize,
522                       self.co_flags, codestr, tuple(self.co_consts),
523                       safe_tuple(self.co_names), safe_tuple(self.co_varnames),
524                       '', self.co_name, 1,
525                       self.co_lnotab, (), ())
526         return co
527    
528     def function(self, newname=None):
529         """Walk self and produce a new function."""
530         try:
531             f = self._func
532         except AttributeError:
533             if newname is None:
534                 newname = ''
535             co = self.code_object()
536             return FunctionType(co, {}, newname)
537         else:
538             if newname is None:
539                 newname = f.func_name
540             co = self.code_object()
541             # All cells should be dereferenced, so force func_closure to None.
542             return FunctionType(co, f.func_globals, newname, f.func_defaults)
543    
544     def reduce(self, number_of_terms, transform=None, overwrite_length=None):
545         """If no stack args are to be bound late, rewrite previous opcodes.
546         
547         number_of_terms: the number of terms to pop off the stack.
548         
549         transform: a callback, to which we send the popped terms. They are
550             transformed in that function as needed, and returned.
551         
552         overwrite_length: the number of previous opcodes to overwrite. If
553             None, it defaults to (number of terms + 1 for the current
554             instruction) * 3.
555         """
556         if overwrite_length is None:
557             # +1 is for current bytecode. If any overwritten bytecode
558             # is not len 3, pass in a value for overwrite_length.
559             overwrite_length = (number_of_terms + 1) * 3
560        
561         # Pop the requested number of terms off the stack.
562         is_tainted = False
563         terms, taints = [], []
564         for i in xrange(number_of_terms):
565             term, taint = self.stack.pop()
566             taints.append(taint)
567             is_tainted |= taint
568             terms.append(term)
569        
570         # Now that all the stack-popping is done...
571         if is_tainted:
572             # We don't have to handle getattr if no args are
573             # tainted, because CALL_FUNCTION will do it normally.
574             if self.reduce_getattr:
575                 if (len(terms) == 3 and terms[2] == getattr
576                     and taints[1] and not taints[0]):
577                     # Form a new LOAD_ATTR instruction.
578                     pos = self.name_index(terms[0])
579                     # Unlike normal CALL_FUNCTION, we can't assume each arg
580                     # is a constant; therefore, our overwrite_length is
581                     # indeterminate. We'll just cheat and keep track of
582                     # the last LOAD_GLOBAL where we looked up getattr. ;)
583                     start = self.last_getattr
584                     # Grab and reuse opcodes of first (LOAD_FAST) term.
585                     bits = self.newcode[start + 3:-6]
586                     bits += ['LOAD_ATTR', pos & 0xFF, pos >> 8]
587                     bits = tuple(bits)
588                     self.put(start, len(self.newcode), *bits)
589                     self.stack.append(None)
590                     self.stack.taint()
591                     return None
592            
593             # Don't form the new object.
594             # Replace TOS with a dummy and taint it.
595             self.stack.append(None)
596             self.stack.taint()
597             return None
598        
599         # Callback the transform.
600         terms.reverse()
601         if transform:
602             result = transform(terms)
603         else:
604             result = terms
605        
606         # Replace TOS with result.
607         self.stack.append(result)
608        
609         # Overwrite bytecodes with new CONST formed from result.
610         pos = self.const_index(result)
611         self.tail(overwrite_length, 'LOAD_CONST', pos & 0xFF, pos >> 8)
612        
613         return result
614    
615     def visit_BUILD_TUPLE(self, lo, hi):
616         self.reduce(lo + (hi << 8), lambda terms: tuple(terms))
617    
618     def visit_BUILD_LIST(self, lo, hi):
619         self.reduce(lo + (hi << 8))
620    
621     def visit_CALL_FUNCTION(self, lo, hi):
622         def call(terms):
623             func = terms.pop(0)
624             args = tuple(terms[:lo])
625             kwargs = {}
626             for i in range(hi):
627                 key = self.terms.pop(0)
628                 val = self.terms.pop(0)
629                 kwargs[key] = val
630             return func(*args, **kwargs)
631         self.reduce(lo + hi + 1, call)
632    
633     def visit_COMPARE_OP(self, lo, hi):
634         op = cmp_op[lo + (hi << 8)]
635         op = comparisons[op]
636         self.reduce(2, lambda terms: op(*terms))
637    
638     def visit_LOAD_ATTR(self, lo, hi):
639         name = self.co_names[lo + (hi << 8)]
640         result = self.reduce(1, lambda terms: getattr(terms[0], name))
641         if result in self.bind_late or getattr(result, 'bind_late', False):
642             self.stack.taint()
643    
644     def visit_LOAD_CONST(self, lo, hi):
645         self.stack.append(self.co_consts[lo + (hi << 8)])
646    
647     def visit_LOAD_DEREF(self, lo, hi):
648         if hasattr(self, '_func'):
649             name = self.co_freevars[lo + (hi << 8)]
650             value = self._func.func_closure[lo + (hi << 8)]
651             value = deref_cell(value)
652             pos = self.const_index(value)
653             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
654             self.stack.append(value)
655             if value in self.bind_late or getattr(value, 'bind_late', False):
656                 self.stack.taint()
657    
658     def visit_LOAD_FAST(self, lo, hi):
659         self.stack.append(self.co_varnames[lo + (hi << 8)])
660         # LOAD_FAST references our bound variable, which is always bound late.
661         self.stack.taint()
662    
663     def visit_LOAD_GLOBAL(self, lo, hi):
664         name = self.co_names[lo + (hi << 8)]
665         if name == 'getattr':
666             self.last_getattr = (len(self.newcode) - 3)
667         if name in self.env:
668             value = self.env[name]
669             pos = self.const_index(value)
670             self.tail(3, 'LOAD_CONST', pos & 0xFF, pos >> 8)
671             self.stack.append(value)
672             if value in self.bind_late or getattr(value, 'bind_late', False):
673                 self.stack.taint()
674         else:
675             raise KeyError("'%s' is not present in supplied globals." % name)
676    
677     def visit_SLICE_PLUS_0(self):
678         self.reduce(1, lambda terms: terms[0][:], 4)
679    
680     def visit_SLICE_PLUS_1(self):
681         self.reduce(2, lambda terms: terms[0][terms[1]:], 7)
682    
683     def visit_SLICE_PLUS_2(self):
684         self.reduce(2, lambda terms: terms[0][:terms[1]], 7)
685    
686     def visit_SLICE_PLUS_3(self):
687         self.reduce(3, lambda terms: terms[0][terms[1]:terms[2]], 10)
688    
689     def binary_op(self, op):
690         def operate(terms):
691             return op(*terms)
692         self.reduce(2, operate, 7)
693
694 # Add visit_BINARY, visit_INPLACE methods to EarlyBinder.
695 for k, v in binary_operators.iteritems():
696     setattr(EarlyBinder, "visit_" + k,
697             lambda self, opr=v: self.binary_op(opr))
698 for k, v in inplace_operators.iteritems():
699     setattr(EarlyBinder, "visit_" + k,
700             # Yes, we really do call binary_op for inplace methods.
701             lambda self, opr=v: self.binary_op(opr))
702
703
704 class MapStackObject(dict):
705    
706     def __add__(self, other):
707         if isinstance(other, basestring):
708             return repr(self) + other
709         return dict.__add__(self, other)
710    
711     def __repr__(self):
712         return "{%s}" % ", ".join([": ".join((k, v))
713                                    for k, v in self.iteritems()])
714
715
716 class LambdaDecompiler(Visitor):
717     """LambdaDecompiler(obj=lambda function or func_code).
718     
719     Produce decompiled Python code (as a string) from a supplied lambda."""
720    
721     def __init__(self, obj):
722         self.verbose = False
723        
724         # Distill supplied 'obj' arg to a code block string.
725         if isinstance(obj, MethodType):
726             obj = obj.im_func
727         if isinstance(obj, FunctionType):
728             obj = obj.func_code
729        
730         # Copy code object attributes.
731         selfdict = self.__dict__
732         for name, _type in _co_code_attrs.iteritems():
733             value = getattr(obj, name)
734             if _type is tuple:
735                 value = list(value)
736             selfdict[name] = value
737        
738         try:
739             obj = obj.co_code
740         except AttributeError:
741             pass
742        
743         # Map the code block to a list of opcode numbers.
744         if isinstance(obj, basestring):
745             obj = map(ord, obj)
746         elif isinstance(obj, list):
747             obj = obj[:]
748        
749         self._bytecode = obj
750    
751     def code(self, include_func_header=True):
752         self.walk()
753         product = self.stack[0]
754         if include_func_header:
755             args = list(self.co_varnames)
756             if self.co_flags & CO_VARKEYWORDS:
757                 args[-1] = "**" + args[-1]
758                 if self.co_flags & CO_VARARGS:
759                     args[-2] = "*" + args[-2]
760             elif self.co_flags & CO_VARARGS:
761                 args[-1] = "*" + args[-1]
762             args = ", ".join(args)
763            
764             product = "lambda %s: %s" % (args, product)
765         return product
766    
767     def walk(self):
768         self.stack = []
769         self.newcode = []
770         self.targets = {}
771        
772         Visitor.walk(self)
773        
774         if self.verbose:
775             self.debug("stack:", self.stack)
776    
777     def visit_instruction(self, op, lo=None, hi=None):
778         # Get the instruction pointer for the current instruction.
779         ip = self.cursor - 3
780         if hi is None:
781             ip += 1
782             if lo is None:
783                 ip += 1
784        
785         terms = self.targets.get(ip)
786         if terms:
787             clause = self.stack[-1]
788             while terms:
789                 term, oper = terms.pop()
790                 clause = "(%s) %s (%s)" % (term, oper, clause)
791             # Replace TOS with the new clause, so that further
792             # combinations have access to it.
793             self.stack[-1] = clause
794             if self.verbose:
795                 self.debug("clause:", clause, "\n")
796            
797             if op == 1:
798                 # Py2.4: The current instruction is POP_TOP, which means
799                 # the previous is probably JUMP_*. If so, we don't want to
800                 # pop the value we just placed on the stack and lose it.
801                 # We need to replace the entry that the JUMP_* made in
802                 # self.targets with our new TOS.
803                 target = self.targets[self.last_target_ip]
804                 target[-1] = ((clause, target[-1][1]))
805                 if self.verbose:
806                     self.debug("newtarget:", self.last_target_ip, target)
807    
808     def visit_BUILD_LIST(self, lo, hi):
809         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
810         self.stack.append("[%s]" % terms)
811    
812     def visit_BUILD_MAP(self, lo, hi):
813         # We're actually going to put a non-string object on the stack here,
814         # with the expectation that the next bytecodes will populate it.
815         self.stack.append(MapStackObject())
816    
817     def visit_BUILD_TUPLE(self, lo, hi):
818         terms = ", ".join([self.stack.pop() for i in range(lo + hi << 8)])
819         self.stack.append("(%s)" % terms)
820    
821     def visit_CALL_FUNCTION(self, lo, hi):
822         kwargs = {}
823         for i in range(hi):
824             val = self.stack.pop()
825             key = self.stack.pop()
826             kwargs[key] = val
827         kwargs = ", ".join([k + "=" + v for k, v in kwargs.iteritems()])
828        
829         args = []
830         for i in xrange(lo):
831             arg = self.stack.pop()
832             args.append(arg)
833         args.reverse()
834         args = ", ".join(args)
835        
836         if kwargs:
837             args += ", " + kwargs
838        
839         func = self.stack.pop()
840         self.stack.append("%s(%s)" % (func, args))
841    
842     def visit_COMPARE_OP(self, lo, hi):
843         term2, term1 = self.stack.pop(), self.stack.pop()
844         op = cmp_op[lo + (hi << 8)]
845         self.stack.append(term1 + " " + op + " " + term2)
846         if self.verbose:
847             self.debug(op)
848    
849     def visit_DUP_TOP(self):
850         self.stack.append(self.stack[-1])
851    
852     def visit_JUMP_IF_FALSE(self, lo, hi):
853         # Note that self.cursor has already advanced to the next instruction.
854         target = self.cursor + (lo + (hi << 8))
855         bucket = self.targets.setdefault(target, [])
856         bucket.append((self.stack[-1], 'and'))
857         if self.verbose:
858             self.debug("target:", target, bucket)
859         # Store target ip for the special code in visit_instruction
860         self.last_target_ip = target
861    
862     def visit_JUMP_IF_TRUE(self, lo, hi):
863         # Note that self.cursor has already advanced to the next instruction.
864         target = self.cursor + (lo + (hi << 8))
865         bucket = self.targets.setdefault(target, [])
866         bucket.append((self.stack[-1], 'or'))
867         if self.verbose:
868             self.debug("target:", target, bucket)
869         # Store target ip for the special code in visit_instruction
870         self.last_target_ip = target
871    
872     def visit_LOAD_ATTR(self, lo, hi):
873         term = self.co_names[lo + (hi << 8)]
874         self.stack[-1] += ("." + term)
875         if self.verbose:
876             self.debug(term)
877    
878     def visit_LOAD_CONST(self, lo, hi):
879         val = self.co_consts[lo + (hi << 8)]
880         if isinstance(val, (FunctionType, type)):
881             # The const in question is a factory function.
882             self.stack.append(val.__module__ + "." + val.__name__)
883         else:
884             term = repr(val)
885             if hasattr(val, "__module__"):
886                 if not term.startswith(val.__module__ + "."):
887                     term = val.__module__ + "." + term
888             self.stack.append(term)
889             if self.verbose:
890                 self.debug(term)
891    
892     def visit_LOAD_FAST(self, lo, hi):
893         term = self.co_varnames[lo + (hi << 8)]
894         self.stack.append(term)
895         if self.verbose:
896             self.debug(term)
897    
898     def visit_LOAD_GLOBAL(self, lo, hi):
899         self.stack.append(self.co_names[lo + (hi << 8)])
900    
901     def visit_POP_TOP(self):
902         self.stack.pop()
903    
904     def visit_ROT_TWO(self):
905         v = self.stack.pop()
906         k = self.stack.pop()
907         self.stack.extend([v, k])
908    
909     def visit_ROT_THREE(self):
910         v = self.stack.pop()
911         k = self.stack.pop()
912         x = self.stack.pop()
913         self.stack.extend([v, x, k])
914    
915     def visit_SLICE_PLUS_0(self):
916         arg = self.stack.pop()
917         self.stack.append("%s[:]" % arg)
918    
919     def visit_SLICE_PLUS_1(self):
920         args = tuple(self.stack[-2:])
921         del self.stack[-2:]
922         self.stack.append("%s[%s:]" % args)
923    
924     def visit_SLICE_PLUS_2(self):
925         args = tuple(self.stack[-2:])
926         del self.stack[-2:]
927         self.stack.append("%s[:%s]" % args)
928    
929     def visit_SLICE_PLUS_3(self):
930         args = tuple(self.stack[-3:])
931         del self.stack[-3:]
932         self.stack.append("%s[%s:%s]" % args)
933    
934     def visit_STORE_SUBSCR(self):
935         k = self.stack.pop()
936         x = self.stack.pop()
937         v = self.stack.pop()
938         x[k] = v
939    
940     def visit_UNARY_CONVERT(self):
941         term = self.stack.pop()
942         self.stack.append("`(" + term + ")`")
943    
944     def visit_UNARY_INVERT(self):
945         term = self.stack.pop()
946         self.stack.append("~(" + term + ")")
947    
948     def visit_UNARY_NEGATIVE(self):
949         term = self.stack.pop()
950         self.stack.append("-(" + term + ")")
951    
952     def visit_UNARY_NOT(self):
953         term = self.stack.pop()
954         self.stack.append("not (" + term + ")")
955    
956     def visit_UNARY_POSITIVE(self):
957         term = self.stack.pop()
958         self.stack.append("+(" + term + ")")
959    
960     def binary_op(self, op):
961         op2, op1 = self.stack.pop(), self.stack.pop()
962         self.stack.append(op1 + " " + op + " " + op2)
963    
964     def visit_BINARY_SUBSCR(self):
965         op2, op1 = self.stack.pop(), self.stack.pop()
966         self.stack.append(op1 + "[" + op2 + "]")
967
968 # Add visit_BINARY methods to LambdaDecompiler.
969 for k, v in binary_repr.iteritems():
970     setattr(LambdaDecompiler, "visit_" + k,
971             lambda self, op=v: self.binary_op(op))
972
973
974 class BranchTracker(Visitor):
975     """BranchTracker(obj=[func|co|str|list]).
976     
977     Finds all possible instructions previous to the supplied instruction(s).
978     """
979    
980     def branches(self, instr=None):
981         """Walk self and return all possible instructions previous to instr.
982         
983         If instr is None, the last instruction will be used.
984         """
985         if instr is None:
986             instr = len(self._bytecode) - 1
987         self.watch = {instr: []}
988         self.walk()
989         return self.watch[instr]
990    
991     def visit_instruction(self, op, lo=None, hi=None):
992         if self.cursor in self.watch and op != 113:
993             if lo is None and hi is None:
994                 self.watch[self.cursor].append(self.cursor - 1)
995             else:
996                 self.watch[self.cursor].append(self.cursor - 3)
997    
998     def visit_CONTINUE_LOOP(self, lo, hi):
999         self.visit_JUMP_ABSOLUTE(lo, hi)
1000    
1001     def visit_JUMP_ABSOLUTE(self, lo, hi):
1002         target = lo + (hi << 8)
1003         if target in self.watch:
1004             self.watch[target].append(self.cursor)
1005    
1006     def visit_JUMP_FORWARD(self, lo, hi):
1007         delta = lo + (hi << 8)
1008         target = self.cursor + delta
1009         if target in self.watch:
1010             self.watch[target].append(self.cursor - 3)
1011    
1012     def visit_JUMP_IF_FALSE(self, lo, hi):
1013         self.visit_JUMP_FORWARD(lo, hi)
1014    
1015     def visit_JUMP_IF_TRUE(self, lo, hi):
1016         self.visit_JUMP_FORWARD(lo, hi)
1017
1018
1019 class KeywordInspector(Rewriter):
1020     """Produce a list of all keyword arguments expected."""
1021    
1022     def __init__(self, obj):
1023         """KeywordInspector(obj). List keyword arguments expected."""
1024         Rewriter.__init__(self, obj)
1025         if not (self.co_flags & CO_VARKEYWORDS):
1026             raise ValueError("'%s' does not possess **kwargs." % obj)
1027         if len(self.co_varnames) <= 1:
1028             raise ValueError("'%s' does not possess more than 1 varname." % obj)
1029         self._kwargs = []
1030         self.flag = None
1031    
1032     def kwargs(self):
1033         """kwargs() -> List of keyword arguments expected."""
1034         self.walk()
1035         return self._kwargs
1036    
1037     def visit_instruction(self, op, lo=None, hi=None):
1038         if op == 124 and (lo + (hi << 8) == len(self.co_varnames) - 1):
1039             self.flag = ''
1040         elif op == 100 and self.flag == '':
1041             self.flag = self.co_consts[lo + (hi << 8)]
1042         elif op == 25 and self.flag:
1043             self._kwargs.append(self.flag)
1044         else:
1045             self.flag = None
1046
1047 del k, v
1048
Note: See TracBrowser for help on using the browser.