Skip to content

Commit d660871

Browse files
committed
correct fn name on older 3.x cross decompile...
Also black, lint, and isort some
1 parent dc286b9 commit d660871

File tree

7 files changed

+267
-204
lines changed

7 files changed

+267
-204
lines changed

uncompyle6/scanners/scanner3.py

Lines changed: 75 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@
3939

4040
from xdis import iscode, instruction_size, Instruction
4141
from xdis.bytecode import _get_const_info
42-
from xdis.codetype import UnicodeForPython3
4342

4443
from uncompyle6.scanners.tok import Token
4544
from uncompyle6.scanner import parse_fn_counts_30_35
45+
from uncompyle6.util import get_code_name
4646
import xdis
4747

4848
# Get all the opcodes into globals
@@ -209,11 +209,18 @@ def __init__(self, version, show_asm=None, is_pypy=False):
209209
return
210210

211211
def bound_collection_from_inst(
212-
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int, collection_type: str
212+
self,
213+
insts: list,
214+
next_tokens: list,
215+
inst: Instruction,
216+
t: Token,
217+
i: int,
218+
collection_type: str,
213219
) -> Optional[list]:
214220
"""
215-
Try to a replace sequence of instruction that ends with a BUILD_xxx with a sequence that can
216-
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
221+
Try to a replace sequence of instruction that ends with a
222+
BUILD_xxx with a sequence that can be parsed much faster, but
223+
inserting the token boundary at the beginning of the sequence.
217224
"""
218225
count = t.attr
219226
assert isinstance(count, int)
@@ -291,10 +298,12 @@ def bound_collection_from_inst(
291298
return new_tokens
292299

293300
def bound_map_from_inst(
294-
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int) -> Optional[list]:
301+
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int
302+
) -> Optional[list]:
295303
"""
296-
Try to a sequence of instruction that ends with a BUILD_MAP into a sequence that can
297-
be parsed much faster, but inserting the token boundary at the beginning of the sequence.
304+
Try to a sequence of instruction that ends with a BUILD_MAP into
305+
a sequence that can be parsed much faster, but inserting the
306+
token boundary at the beginning of the sequence.
298307
"""
299308
count = t.attr
300309
assert isinstance(count, int)
@@ -309,21 +318,18 @@ def bound_map_from_inst(
309318
assert (count * 2) <= i
310319

311320
for j in range(collection_start, i, 2):
312-
if insts[j].opname not in (
313-
"LOAD_CONST",
314-
):
321+
if insts[j].opname not in ("LOAD_CONST",):
315322
return None
316-
if insts[j+1].opname not in (
317-
"LOAD_CONST",
318-
):
323+
if insts[j + 1].opname not in ("LOAD_CONST",):
319324
return None
320325

321326
collection_start = i - (2 * count)
322327
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
323328

324-
# If we get here, all instructions before tokens[i] are LOAD_CONST and we can replace
325-
# add a boundary marker and change LOAD_CONST to something else
326-
new_tokens = next_tokens[:-(2*count)]
329+
# If we get here, all instructions before tokens[i] are LOAD_CONST and
330+
# we can replace add a boundary marker and change LOAD_CONST to
331+
# something else.
332+
new_tokens = next_tokens[: -(2 * count)]
327333
start_offset = insts[collection_start].offset
328334
new_tokens.append(
329335
Token(
@@ -353,10 +359,10 @@ def bound_map_from_inst(
353359
new_tokens.append(
354360
Token(
355361
opname="ADD_VALUE",
356-
attr=insts[j+1].argval,
357-
pattr=insts[j+1].argrepr,
358-
offset=insts[j+1].offset,
359-
linestart=insts[j+1].starts_line,
362+
attr=insts[j + 1].argval,
363+
pattr=insts[j + 1].argrepr,
364+
offset=insts[j + 1].offset,
365+
linestart=insts[j + 1].starts_line,
360366
has_arg=True,
361367
has_extended_arg=False,
362368
opc=self.opc,
@@ -376,8 +382,9 @@ def bound_map_from_inst(
376382
)
377383
return new_tokens
378384

379-
def ingest(self, co, classname=None, code_objects={}, show_asm=None
380-
) -> Tuple[list, dict]:
385+
def ingest(
386+
self, co, classname=None, code_objects={}, show_asm=None
387+
) -> Tuple[list, dict]:
381388
"""
382389
Create "tokens" the bytecode of an Python code object. Largely these
383390
are the opcode name, but in some cases that has been modified to make parsing
@@ -387,14 +394,17 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
387394
Some transformations are made to assist the deparsing grammar:
388395
- various types of LOAD_CONST's are categorized in terms of what they load
389396
- COME_FROM instructions are added to assist parsing control structures
390-
- operands with stack argument counts or flag masks are appended to the opcode name, e.g.:
397+
- operands with stack argument counts or flag masks are appended to the
398+
opcode name, e.g.:
391399
* BUILD_LIST, BUILD_SET
392-
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
400+
* MAKE_FUNCTION and FUNCTION_CALLS append the number of positional
401+
arguments
393402
- EXTENDED_ARGS instructions are removed
394403
395-
Also, when we encounter certain tokens, we add them to a set which will cause custom
396-
grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
397-
cause specific rules for the specific number of arguments they take.
404+
Also, when we encounter certain tokens, we add them to a set
405+
which will cause custom grammar rules. Specifically, variable
406+
arg tokens like MAKE_FUNCTION or BUILD_LIST cause specific rules
407+
for the specific number of arguments they take.
398408
"""
399409

400410
if not show_asm:
@@ -420,7 +430,6 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
420430

421431
n = len(self.insts)
422432
for i, inst in enumerate(self.insts):
423-
424433
opname = inst.opname
425434
# We need to detect the difference between:
426435
# raise AssertionError
@@ -437,12 +446,12 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
437446
prev_inst = self.insts[i - 1]
438447
assert_can_follow = (
439448
prev_inst.opname in ("JUMP_IF_TRUE", "JUMP_IF_FALSE")
440-
and i + 1 < n )
449+
and i + 1 < n
450+
)
441451
jump_if_inst = prev_inst
442452
else:
443453
assert_can_follow = (
444-
opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE")
445-
and i + 1 < n
454+
opname in ("POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE") and i + 1 < n
446455
)
447456
jump_if_inst = inst
448457
if assert_can_follow:
@@ -452,7 +461,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
452461
and next_inst.argval == "AssertionError"
453462
and jump_if_inst.argval
454463
):
455-
raise_idx = self.offset2inst_index[self.prev_op[jump_if_inst.argval]]
464+
raise_idx = self.offset2inst_index[
465+
self.prev_op[jump_if_inst.argval]
466+
]
456467
raise_inst = self.insts[raise_idx]
457468
if raise_inst.opname.startswith("RAISE_VARARGS"):
458469
self.load_asserts.add(next_inst.offset)
@@ -468,22 +479,21 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
468479
new_tokens = []
469480

470481
for i, inst in enumerate(self.insts):
471-
472482
opname = inst.opname
473483
argval = inst.argval
474484
pattr = inst.argrepr
475485

476486
t = Token(
477-
opname=opname,
478-
attr=argval,
479-
pattr=pattr,
480-
offset=inst.offset,
481-
linestart=inst.starts_line,
482-
op=inst.opcode,
483-
has_arg=inst.has_arg,
484-
has_extended_arg=inst.has_extended_arg,
485-
opc=self.opc,
486-
)
487+
opname=opname,
488+
attr=argval,
489+
pattr=pattr,
490+
offset=inst.offset,
491+
linestart=inst.starts_line,
492+
op=inst.opcode,
493+
has_arg=inst.has_arg,
494+
has_extended_arg=inst.has_extended_arg,
495+
opc=self.opc,
496+
)
487497

488498
# things that smash new_tokens like BUILD_LIST have to come first.
489499
if opname in (
@@ -502,11 +512,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
502512
if try_tokens is not None:
503513
new_tokens = try_tokens
504514
continue
505-
elif opname in (
506-
"BUILD_MAP",
507-
):
515+
elif opname in ("BUILD_MAP",):
508516
try_tokens = self.bound_map_from_inst(
509-
self.insts, new_tokens, inst, t, i,
517+
self.insts,
518+
new_tokens,
519+
inst,
520+
t,
521+
i,
510522
)
511523
if try_tokens is not None:
512524
new_tokens = try_tokens
@@ -573,9 +585,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
573585
if op in self.opc.CONST_OPS:
574586
const = argval
575587
if iscode(const):
576-
co_name = const.co_name
577-
if isinstance(const.co_name, UnicodeForPython3):
578-
co_name = const.co_name.value.decode("utf-8")
588+
co_name = get_code_name(const)
579589
if co_name == "<lambda>":
580590
assert opname == "LOAD_CONST"
581591
opname = "LOAD_LAMBDA"
@@ -629,7 +639,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
629639
else:
630640
pos_args, name_pair_args, annotate_args = parse_fn_counts_30_35(
631641
inst.argval
632-
)
642+
)
633643

634644
pattr = f"{pos_args} positional, {name_pair_args} keyword only, {annotate_args} annotated"
635645

@@ -715,11 +725,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
715725
and self.insts[i + 1].opname == "JUMP_FORWARD"
716726
)
717727

718-
if (self.version[:2] == (3, 0) and self.insts[i + 1].opname == "JUMP_FORWARD"
719-
and not is_continue):
728+
if (
729+
self.version[:2] == (3, 0)
730+
and self.insts[i + 1].opname == "JUMP_FORWARD"
731+
and not is_continue
732+
):
720733
target_prev = self.offset2inst_index[self.prev_op[target]]
721-
is_continue = (
722-
self.insts[target_prev].opname == "SETUP_LOOP")
734+
is_continue = self.insts[target_prev].opname == "SETUP_LOOP"
723735

724736
if is_continue or (
725737
inst.offset in self.stmts
@@ -736,7 +748,10 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None
736748
# the "continue" is not on a new line.
737749
# There are other situations where we don't catch
738750
# CONTINUE as well.
739-
if new_tokens[-1].kind == "JUMP_BACK" and new_tokens[-1].attr <= argval:
751+
if (
752+
new_tokens[-1].kind == "JUMP_BACK"
753+
and new_tokens[-1].attr <= argval
754+
):
740755
if new_tokens[-2].kind == "BREAK_LOOP":
741756
del new_tokens[-1]
742757
else:
@@ -809,7 +824,10 @@ def find_jump_targets(self, debug):
809824
if inst.has_arg:
810825
label = self.fixed_jumps.get(offset)
811826
oparg = inst.arg
812-
if self.version >= (3, 6) and self.code[offset] == self.opc.EXTENDED_ARG:
827+
if (
828+
self.version >= (3, 6)
829+
and self.code[offset] == self.opc.EXTENDED_ARG
830+
):
813831
j = xdis.next_offset(op, self.opc, offset)
814832
next_offset = xdis.next_offset(op, self.opc, j)
815833
else:
@@ -1082,7 +1100,6 @@ def detect_control_flow(self, offset, targets, inst_index):
10821100
and (target > offset)
10831101
and pretarget.offset != offset
10841102
):
1085-
10861103
# FIXME: hack upon hack...
10871104
# In some cases the pretarget can be a jump to the next instruction
10881105
# and these aren't and/or's either. We limit to 3.5+ since we experienced there
@@ -1104,7 +1121,6 @@ def detect_control_flow(self, offset, targets, inst_index):
11041121

11051122
# Is it an "and" inside an "if" or "while" block
11061123
if op == self.opc.POP_JUMP_IF_FALSE:
1107-
11081124
# Search for another POP_JUMP_IF_FALSE targetting the same op,
11091125
# in current statement, starting from current offset, and filter
11101126
# everything inside inner 'or' jumps and midline ifs
@@ -1357,7 +1373,6 @@ def detect_control_flow(self, offset, targets, inst_index):
13571373
self.fixed_jumps[offset] = rtarget
13581374
self.not_continue.add(pre_rtarget)
13591375
else:
1360-
13611376
# FIXME: this is very convoluted and based on rather hacky
13621377
# empirical evidence. It should go a way when
13631378
# we have better control-flow analysis

0 commit comments

Comments
 (0)