3939
4040from xdis import iscode , instruction_size , Instruction
4141from xdis .bytecode import _get_const_info
42- from xdis .codetype import UnicodeForPython3
4342
4443from uncompyle6 .scanners .tok import Token
4544from uncompyle6 .scanner import parse_fn_counts_30_35
45+ from uncompyle6 .util import get_code_name
4646import xdis
4747
4848# Get all the opcodes into globals
@@ -209,11 +209,18 @@ def __init__(self, version, show_asm=None, is_pypy=False):
209209 return
210210
211211 def bound_collection_from_inst (
212- self , insts : list , next_tokens : list , inst : Instruction , t : Token , i : int , collection_type : str
212+ self ,
213+ insts : list ,
214+ next_tokens : list ,
215+ inst : Instruction ,
216+ t : Token ,
217+ i : int ,
218+ collection_type : str ,
213219 ) -> Optional [list ]:
214220 """
215- Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can
216- be parsed much faster, but inserting the token boundary at the beginning of the sequence.
221+ Try to a replace sequence of instruction that ends with a
222+ BUILD_xxx with a sequence that can be parsed much faster, but
223+ inserting the token boundary at the beginning of the sequence.
217224 """
218225 count = t .attr
219226 assert isinstance (count , int )
@@ -291,10 +298,12 @@ def bound_collection_from_inst(
291298 return new_tokens
292299
293300 def bound_map_from_inst (
294- self , insts : list , next_tokens : list , inst : Instruction , t : Token , i : int ) -> Optional [list ]:
301+ self , insts : list , next_tokens : list , inst : Instruction , t : Token , i : int
302+ ) -> Optional [list ]:
295303 """
296- Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
297- be parsed much faster, but inserting the token boundary at the beginning of the sequence.
304+ Try to a sequence of instruction that ends with a BUILD_MAP into
305+ a sequence that can be parsed much faster, but inserting the
306+ token boundary at the beginning of the sequence.
298307 """
299308 count = t .attr
300309 assert isinstance (count , int )
@@ -309,21 +318,18 @@ def bound_map_from_inst(
309318 assert (count * 2 ) <= i
310319
311320 for j in range (collection_start , i , 2 ):
312- if insts [j ].opname not in (
313- "LOAD_CONST" ,
314- ):
321+ if insts [j ].opname not in ("LOAD_CONST" ,):
315322 return None
316- if insts [j + 1 ].opname not in (
317- "LOAD_CONST" ,
318- ):
323+ if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
319324 return None
320325
321326 collection_start = i - (2 * count )
322327 collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
323328
324- # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
325- # add a boundary marker and change LOAD_CONST to something else
326- new_tokens = next_tokens [:- (2 * count )]
329+ # If we get here, all instructions before tokens[i] are LOAD_CONST and
330+ # we can replace add a boundary marker and change LOAD_CONST to
331+ # something else.
332+ new_tokens = next_tokens [: - (2 * count )]
327333 start_offset = insts [collection_start ].offset
328334 new_tokens .append (
329335 Token (
@@ -353,10 +359,10 @@ def bound_map_from_inst(
353359 new_tokens .append (
354360 Token (
355361 opname = "ADD_VALUE" ,
356- attr = insts [j + 1 ].argval ,
357- pattr = insts [j + 1 ].argrepr ,
358- offset = insts [j + 1 ].offset ,
359- linestart = insts [j + 1 ].starts_line ,
362+ attr = insts [j + 1 ].argval ,
363+ pattr = insts [j + 1 ].argrepr ,
364+ offset = insts [j + 1 ].offset ,
365+ linestart = insts [j + 1 ].starts_line ,
360366 has_arg = True ,
361367 has_extended_arg = False ,
362368 opc = self .opc ,
@@ -376,8 +382,9 @@ def bound_map_from_inst(
376382 )
377383 return new_tokens
378384
379- def ingest (self , co , classname = None , code_objects = {}, show_asm = None
380- ) -> Tuple [list , dict ]:
385+ def ingest (
386+ self , co , classname = None , code_objects = {}, show_asm = None
387+ ) -> Tuple [list , dict ]:
381388 """
382389 Create "tokens" the bytecode of an Python code object. Largely these
383390 are the opcode name, but in some cases that has been modified to make parsing
@@ -387,14 +394,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
387394 Some transformations are made to assist the deparsing grammar:
388395 - various types of LOAD_CONST's are categorized in terms of what they load
389396 - COME_FROM instructions are added to assist parsing control structures
390- - operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
397+ - operands with stack argument counts or flag masks are appended to the
398+ opcode name, e.g.:
391399 * BUILD_LIST, BUILD_SET
392- * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
400+ * MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
401+ arguments
393402 - EXTENDED_ARGS instructions are removed
394403
395- Also, when we encounter certain tokens, we add them to a set which will cause custom
396- grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
397- cause specific rules for the specific number of arguments they take.
404+ Also, when we encounter certain tokens, we add them to a set
405+ which will cause custom grammar rules. Specifically, variable
406+ arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
407+ for the specific number of arguments they take.
398408 """
399409
400410 if not show_asm :
@@ -420,7 +430,6 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
420430
421431 n = len (self .insts )
422432 for i , inst in enumerate (self .insts ):
423-
424433 opname = inst .opname
425434 # We need to detect the difference between:
426435 # raise AssertionError
@@ -437,12 +446,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
437446 prev_inst = self .insts [i - 1 ]
438447 assert_can_follow = (
439448 prev_inst .opname in ("JUMP_IF_TRUE" , "JUMP_IF_FALSE" )
440- and i + 1 < n )
449+ and i + 1 < n
450+ )
441451 jump_if_inst = prev_inst
442452 else :
443453 assert_can_follow = (
444- opname in ("POP_JUMP_IF_TRUE" , "POP_JUMP_IF_FALSE" )
445- and i + 1 < n
454+ opname in ("POP_JUMP_IF_TRUE" , "POP_JUMP_IF_FALSE" ) and i + 1 < n
446455 )
447456 jump_if_inst = inst
448457 if assert_can_follow :
@@ -452,7 +461,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
452461 and next_inst .argval == "AssertionError"
453462 and jump_if_inst .argval
454463 ):
455- raise_idx = self .offset2inst_index [self .prev_op [jump_if_inst .argval ]]
464+ raise_idx = self .offset2inst_index [
465+ self .prev_op [jump_if_inst .argval ]
466+ ]
456467 raise_inst = self .insts [raise_idx ]
457468 if raise_inst .opname .startswith ("RAISE_VARARGS" ):
458469 self .load_asserts .add (next_inst .offset )
@@ -468,22 +479,21 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
468479 new_tokens = []
469480
470481 for i , inst in enumerate (self .insts ):
471-
472482 opname = inst .opname
473483 argval = inst .argval
474484 pattr = inst .argrepr
475485
476486 t = Token (
477- opname = opname ,
478- attr = argval ,
479- pattr = pattr ,
480- offset = inst .offset ,
481- linestart = inst .starts_line ,
482- op = inst .opcode ,
483- has_arg = inst .has_arg ,
484- has_extended_arg = inst .has_extended_arg ,
485- opc = self .opc ,
486- )
487+ opname = opname ,
488+ attr = argval ,
489+ pattr = pattr ,
490+ offset = inst .offset ,
491+ linestart = inst .starts_line ,
492+ op = inst .opcode ,
493+ has_arg = inst .has_arg ,
494+ has_extended_arg = inst .has_extended_arg ,
495+ opc = self .opc ,
496+ )
487497
488498 # things that smash new_tokens like BUILD_LIST have to come first.
489499 if opname in (
@@ -502,11 +512,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
502512 if try_tokens is not None :
503513 new_tokens = try_tokens
504514 continue
505- elif opname in (
506- "BUILD_MAP" ,
507- ):
515+ elif opname in ("BUILD_MAP" ,):
508516 try_tokens = self .bound_map_from_inst (
509- self .insts , new_tokens , inst , t , i ,
517+ self .insts ,
518+ new_tokens ,
519+ inst ,
520+ t ,
521+ i ,
510522 )
511523 if try_tokens is not None :
512524 new_tokens = try_tokens
@@ -573,9 +585,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
573585 if op in self .opc .CONST_OPS :
574586 const = argval
575587 if iscode (const ):
576- co_name = const .co_name
577- if isinstance (const .co_name , UnicodeForPython3 ):
578- co_name = const .co_name .value .decode ("utf-8" )
588+ co_name = get_code_name (const )
579589 if co_name == "<lambda>" :
580590 assert opname == "LOAD_CONST"
581591 opname = "LOAD_LAMBDA"
@@ -629,7 +639,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
629639 else :
630640 pos_args , name_pair_args , annotate_args = parse_fn_counts_30_35 (
631641 inst .argval
632- )
642+ )
633643
634644 pattr = f"{ pos_args } positional, { name_pair_args } keyword only, { annotate_args } annotated"
635645
@@ -715,11 +725,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
715725 and self .insts [i + 1 ].opname == "JUMP_FORWARD"
716726 )
717727
718- if (self .version [:2 ] == (3 , 0 ) and self .insts [i + 1 ].opname == "JUMP_FORWARD"
719- and not is_continue ):
728+ if (
729+ self .version [:2 ] == (3 , 0 )
730+ and self .insts [i + 1 ].opname == "JUMP_FORWARD"
731+ and not is_continue
732+ ):
720733 target_prev = self .offset2inst_index [self .prev_op [target ]]
721- is_continue = (
722- self .insts [target_prev ].opname == "SETUP_LOOP" )
734+ is_continue = self .insts [target_prev ].opname == "SETUP_LOOP"
723735
724736 if is_continue or (
725737 inst .offset in self .stmts
@@ -736,7 +748,10 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
736748 # the "continue" is not on a new line.
737749 # There are other situations where we don't catch
738750 # CONTINUE as well.
739- if new_tokens [- 1 ].kind == "JUMP_BACK" and new_tokens [- 1 ].attr <= argval :
751+ if (
752+ new_tokens [- 1 ].kind == "JUMP_BACK"
753+ and new_tokens [- 1 ].attr <= argval
754+ ):
740755 if new_tokens [- 2 ].kind == "BREAK_LOOP" :
741756 del new_tokens [- 1 ]
742757 else :
@@ -809,7 +824,10 @@ def find_jump_targets(self, debug):
809824 if inst .has_arg :
810825 label = self .fixed_jumps .get (offset )
811826 oparg = inst .arg
812- if self .version >= (3 , 6 ) and self .code [offset ] == self .opc .EXTENDED_ARG :
827+ if (
828+ self .version >= (3 , 6 )
829+ and self .code [offset ] == self .opc .EXTENDED_ARG
830+ ):
813831 j = xdis .next_offset (op , self .opc , offset )
814832 next_offset = xdis .next_offset (op , self .opc , j )
815833 else :
@@ -1082,7 +1100,6 @@ def detect_control_flow(self, offset, targets, inst_index):
10821100 and (target > offset )
10831101 and pretarget .offset != offset
10841102 ):
1085-
10861103 # FIXME: hack upon hack...
10871104 # In some cases the pretarget can be a jump to the next instruction
10881105 # and these aren't and/or's either. We limit to 3.5+ since we experienced there
@@ -1104,7 +1121,6 @@ def detect_control_flow(self, offset, targets, inst_index):
11041121
11051122 # Is it an "and" inside an "if" or "while" block
11061123 if op == self .opc .POP_JUMP_IF_FALSE :
1107-
11081124 # Search for another POP_JUMP_IF_FALSE targetting the same op,
11091125 # in current statement, starting from current offset, and filter
11101126 # everything inside inner 'or' jumps and midline ifs
@@ -1357,7 +1373,6 @@ def detect_control_flow(self, offset, targets, inst_index):
13571373 self .fixed_jumps [offset ] = rtarget
13581374 self .not_continue .add (pre_rtarget )
13591375 else :
1360-
13611376 # FIXME: this is very convoluted and based on rather hacky
13621377 # empirical evidence. It should go a way when
13631378 # we have better control-flow analysis
0 commit comments