blackbirdcoder
diff --git a/‎uncompyle6/scanners/scanner3.py‎
Lines changed: 75 additions & 60 deletions b/‎uncompyle6/scanners/scanner3.py‎
Lines changed: 75 additions & 60 deletions
@@ -39,10 +39,10 @@
 
 from xdis import iscode, instruction_size, Instruction
 from xdis.bytecode import _get_const_info
-from xdis.codetype import UnicodeForPython3
 
 from uncompyle6.scanners.tok import Token
 from uncompyle6.scanner import parse_fn_counts_30_35
+from uncompyle6.util import get_code_name
 import xdis
 
 # Get all the opcodes into globals
@@ -209,11 +209,18 @@ def __init__(self, version, show_asm=None, is_pypy=False):
         return
 
     def bound_collection_from_inst(
-        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
+        self,
+        insts: list,
+        next_tokens: list,
+        inst: Instruction,
+        t: Token,
+        i: int,
+        collection_type: str,
     ) -> Optional[list]:
         """
-        Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can
-        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
+        Try to a replace sequence of instruction that ends with a
+        BUILD_xxx with a sequence that can be parsed much faster, but
+        inserting the token boundary at the beginning of the sequence.
         """
         count = t.attr
         assert isinstance(count, int)
@@ -291,10 +298,12 @@ def bound_collection_from_inst(
         return new_tokens
 
     def bound_map_from_inst(
-        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]:
+        self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int
+    ) -> Optional[list]:
         """
-        Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
-        be parsed much faster, but inserting the token boundary at the beginning of the sequence.
+        Try to a sequence of instruction that ends with a BUILD_MAP into
+        a sequence that can be parsed much faster, but inserting the
+        token boundary at the beginning of the sequence.
         """
         count = t.attr
         assert isinstance(count, int)
@@ -309,21 +318,18 @@ def bound_map_from_inst(
         assert (count * 2) <= i
 
         for j in range(collection_start, i, 2):
-            if insts[j].opname not in (
-                "LOAD_CONST",
-            ):
+            if insts[j].opname not in ("LOAD_CONST",):
                 return None
-            if insts[j+1].opname not in (
-                "LOAD_CONST",
-            ):
+            if insts[j + 1].opname not in ("LOAD_CONST",):
                 return None
 
         collection_start = i - (2 * count)
         collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
 
-        # If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
-        # add a boundary marker and change LOAD_CONST to something else
-        new_tokens = next_tokens[:-(2*count)]
+        # If we get here, all instructions before tokens[i] are LOAD_CONST and
+        # we can replace add a boundary marker and change LOAD_CONST to
+        # something else.
+        new_tokens = next_tokens[: -(2 * count)]
         start_offset = insts[collection_start].offset
         new_tokens.append(
             Token(
@@ -353,10 +359,10 @@ def bound_map_from_inst(
             new_tokens.append(
                 Token(
                     opname="ADD_VALUE",
-                    attr=insts[j+1].argval,
-                    pattr=insts[j+1].argrepr,
-                    offset=insts[j+1].offset,
-                    linestart=insts[j+1].starts_line,
+                    attr=insts[j + 1].argval,
+                    pattr=insts[j + 1].argrepr,
+                    offset=insts[j + 1].offset,
+                    linestart=insts[j + 1].starts_line,
                     has_arg=True,
                     has_extended_arg=False,
                     opc=self.opc,
@@ -376,8 +382,9 @@ def bound_map_from_inst(
         )
         return new_tokens
 
-    def ingest(self, co, classname=None, code_objects={}, show_asm=None
-        ) -> Tuple[list, dict]:
+    def ingest(
+        self, co, classname=None, code_objects={}, show_asm=None
+    ) -> Tuple[list, dict]:
         """
         Create "tokens" the bytecode of an Python code object. Largely these
         are the opcode name, but in some cases that has been modified to make parsing
@@ -387,14 +394,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
         Some transformations are made to assist the deparsing grammar:
            -  various types of LOAD_CONST's are categorized in terms of what they load
            -  COME_FROM instructions are added to assist parsing control structures
-           -  operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
+           -  operands with stack argument counts or flag masks are appended to the
+              opcode name, e.g.:
               *  BUILD_LIST, BUILD_SET
-              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
+              *  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
+                 arguments
            -  EXTENDED_ARGS instructions are removed
 
-        Also, when we encounter certain tokens, we add them to a set which will cause custom
-        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
-        cause specific rules for the specific number of arguments they take.
+        Also, when we encounter certain tokens, we add them to a set
+        which will cause custom grammar rules. Specifically, variable
+        arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
+        for the specific number of arguments they take.
         """
 
         if not show_asm:
@@ -420,7 +430,6 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
 
         n = len(self.insts)
         for i, inst in enumerate(self.insts):
-
             opname = inst.opname
             # We need to detect the difference between:
             #   raise AssertionError
@@ -437,12 +446,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                     prev_inst = self.insts[i - 1]
                     assert_can_follow = (
                         prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE")
-                        and i + 1 < n )
+                        and i + 1 < n
+                    )
                     jump_if_inst = prev_inst
             else:
                 assert_can_follow = (
-                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
-                    and i + 1 < n
+                    opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n
                 )
                 jump_if_inst = inst
             if assert_can_follow:
@@ -452,7 +461,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                     and next_inst.argval == "AssertionError"
                     and jump_if_inst.argval
                 ):
-                    raise_idx = self.offset2inst_index[self.prev_op[jump_if_inst.argval]]
+                    raise_idx = self.offset2inst_index[
+                        self.prev_op[jump_if_inst.argval]
+                    ]
                     raise_inst = self.insts[raise_idx]
                     if raise_inst.opname.startswith("RAISE_VARARGS"):
                         self.load_asserts.add(next_inst.offset)
@@ -468,22 +479,21 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
         new_tokens = []
 
         for i, inst in enumerate(self.insts):
-
             opname = inst.opname
             argval = inst.argval
             pattr = inst.argrepr
 
             t = Token(
-                    opname=opname,
-                    attr=argval,
-                    pattr=pattr,
-                    offset=inst.offset,
-                    linestart=inst.starts_line,
-                    op=inst.opcode,
-                    has_arg=inst.has_arg,
-                    has_extended_arg=inst.has_extended_arg,
-                    opc=self.opc,
-                )
+                opname=opname,
+                attr=argval,
+                pattr=pattr,
+                offset=inst.offset,
+                linestart=inst.starts_line,
+                op=inst.opcode,
+                has_arg=inst.has_arg,
+                has_extended_arg=inst.has_extended_arg,
+                opc=self.opc,
+            )
 
             # things that smash new_tokens like BUILD_LIST have to come first.
             if opname in (
@@ -502,11 +512,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                 if try_tokens is not None:
                     new_tokens = try_tokens
                     continue
-            elif opname in (
-                "BUILD_MAP",
-            ):
+            elif opname in ("BUILD_MAP",):
                 try_tokens = self.bound_map_from_inst(
-                    self.insts, new_tokens, inst, t, i,
+                    self.insts,
+                    new_tokens,
+                    inst,
+                    t,
+                    i,
                 )
                 if try_tokens is not None:
                     new_tokens = try_tokens
@@ -573,9 +585,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
             if op in self.opc.CONST_OPS:
                 const = argval
                 if iscode(const):
-                    co_name = const.co_name
-                    if isinstance(const.co_name, UnicodeForPython3):
-                        co_name = const.co_name.value.decode("utf-8")
+                    co_name = get_code_name(const)
                     if co_name == "<lambda>":
                         assert opname == "LOAD_CONST"
                         opname = "LOAD_LAMBDA"
@@ -629,7 +639,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                 else:
                     pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35(
                         inst.argval
-                        )
+                    )
 
                     pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated"
 
@@ -715,11 +725,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                         and self.insts[i + 1].opname == "JUMP_FORWARD"
                     )
 
-                    if (self.version[:2] == (3, 0) and self.insts[i + 1].opname == "JUMP_FORWARD"
-                        and not is_continue):
+                    if (
+                        self.version[:2] == (3, 0)
+                        and self.insts[i + 1].opname == "JUMP_FORWARD"
+                        and not is_continue
+                    ):
                         target_prev = self.offset2inst_index[self.prev_op[target]]
-                        is_continue = (
-                            self.insts[target_prev].opname == "SETUP_LOOP")
+                        is_continue = self.insts[target_prev].opname == "SETUP_LOOP"
 
                     if is_continue or (
                         inst.offset in self.stmts
@@ -736,7 +748,10 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
                         # the "continue" is not on a new line.
                         # There are other situations where we don't catch
                         # CONTINUE as well.
-                        if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
+                        if (
+                            new_tokens[-1].kind == "JUMP_BACK"
+                            and new_tokens[-1].attr <= argval
+                        ):
                             if new_tokens[-2].kind == "BREAK_LOOP":
                                 del new_tokens[-1]
                             else:
@@ -809,7 +824,10 @@ def find_jump_targets(self, debug):
             if inst.has_arg:
                 label = self.fixed_jumps.get(offset)
                 oparg = inst.arg
-                if self.version >= (3, 6) and self.code[offset] == self.opc.EXTENDED_ARG:
+                if (
+                    self.version >= (3, 6)
+                    and self.code[offset] == self.opc.EXTENDED_ARG
+                ):
                     j = xdis.next_offset(op, self.opc, offset)
                     next_offset = xdis.next_offset(op, self.opc, j)
                 else:
@@ -1082,7 +1100,6 @@ def detect_control_flow(self, offset, targets, inst_index):
                 and (target > offset)
                 and pretarget.offset != offset
             ):
-
                 # FIXME: hack upon hack...
                 # In some cases the pretarget can be a jump to the next instruction
                 # and these aren't and/or's either. We limit to 3.5+ since we experienced there
@@ -1104,7 +1121,6 @@ def detect_control_flow(self, offset, targets, inst_index):
 
             # Is it an "and" inside an "if" or "while" block
             if op == self.opc.POP_JUMP_IF_FALSE:
-
                 # Search for another POP_JUMP_IF_FALSE targetting the same op,
                 # in current statement, starting from current offset, and filter
                 # everything inside inner 'or' jumps and midline ifs
@@ -1357,7 +1373,6 @@ def detect_control_flow(self, offset, targets, inst_index):
                     self.fixed_jumps[offset] = rtarget
                     self.not_continue.add(pre_rtarget)
             else:
-
                 # FIXME: this is very convoluted and based on rather hacky
                 # empirical evidence. It should go a way when
                 # we have better control-flow analysis