Skip to content

Commit ce01644

Browse files
committed
POC: Configurable autogen instruction
1 parent 9e82fcb commit ce01644

File tree

3 files changed

+445
-288
lines changed

3 files changed

+445
-288
lines changed

crates/compiler-core/generate.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python
2+
from __future__ import annotations
3+
4+
import enum
5+
import functools
6+
import io
7+
import pathlib
8+
import subprocess
9+
import sys
10+
import typing
11+
12+
import tomllib
13+
14+
CPYTHON_VERSION = "v3.13.9"
15+
16+
17+
CRATE_ROOT = pathlib.Path(__file__).parent
18+
CONF_FILE = CRATE_ROOT / "instructions.toml"
19+
OUT_FILE = CRATE_ROOT / "src" / "bytecode" / "instruction.rs"
20+
21+
ROOT = CRATE_ROOT.parents[1]
22+
SUBMODULES = ROOT / "submodules"
23+
CPYTHON_DIR = SUBMODULES / f"cpython-{CPYTHON_VERSION}"
24+
CPYTHON_TOOLS_DIR = CPYTHON_DIR / "Tools" / "cases_generator"
25+
DIS_DOC = CPYTHON_DIR / "Doc" / "library" / "dis.rst"
26+
27+
sys.path.append(CPYTHON_TOOLS_DIR.as_posix())
28+
29+
import analyzer
30+
from generators_common import DEFAULT_INPUT
31+
32+
33+
class Inst:
34+
def __init__(
35+
self, cpython_name: str, override: dict, analysis: analyzer.Analysis
36+
) -> None:
37+
inst = analysis.instructions[cpython_name]
38+
properties = inst.properties
39+
40+
self.name = override.get("name", snake_case_to_pascal_case(cpython_name))
41+
self.id = analysis.opmap[cpython_name]
42+
self.has_oparg = override.get("has_oparg", properties.oparg)
43+
44+
if (oparg_typ := override.get("oparg_typ")) is not None:
45+
self.oparg_typ = getattr(Oparg, oparg_typ)
46+
elif self.has_oparg:
47+
self.oparg_typ = Oparg.from_properties(properties)
48+
49+
if (oparg_name := override.get("oparg_name")) is not None:
50+
self.oparg_name = oparg_name
51+
elif self.has_oparg:
52+
oparg_map = build_oparg_name_map()
53+
self.oparg_name = oparg_map.get(cpython_name, self.oparg_typ.field_name)
54+
55+
@property
56+
def variant(self) -> str:
57+
if self.has_oparg:
58+
fields = f"{{ {self.oparg_name}: Arg<{self.oparg_typ.name}> }}"
59+
else:
60+
fields = ""
61+
62+
return f"{self.name} {fields} = {self.id}"
63+
64+
def __lt__(self, other) -> bool:
65+
return self.name < other.name
66+
67+
68+
@enum.unique
69+
class Oparg(enum.StrEnum):
70+
IntrinsicFunction1 = enum.auto()
71+
IntrinsicFunction2 = enum.auto()
72+
ResumeKind = enum.auto()
73+
Label = enum.auto()
74+
NameIdx = enum.auto()
75+
u32 = enum.auto() # TODO: Remove this; Everything needs to be a newtype
76+
77+
@property
78+
def field_name(self) -> str:
79+
match self:
80+
case self.Label:
81+
return "target"
82+
case self.NameIdx:
83+
return "namei"
84+
case _:
85+
return "idx" # Fallback to `idx`
86+
87+
@classmethod
88+
def from_properties(cls, properties: analyzer.Properties) -> typing.Self:
89+
if properties.uses_co_names:
90+
return cls.NameIdx
91+
elif properties.jumps:
92+
return cls.Label
93+
elif properties.uses_co_consts:
94+
return cls.u32 # TODO: Needs to be `ConstIdx`
95+
elif properties.uses_locals:
96+
return cls.u32 # TODO: Needs to be `ConstIdx`
97+
else:
98+
# TODO: Raise here.
99+
return cls.u32 # Fallback to something generic
100+
101+
102+
@functools.cache
103+
def build_oparg_name_map() -> dict[str, str]:
104+
doc = DIS_DOC.read_text()
105+
106+
out = {}
107+
for line in doc.splitlines():
108+
if not line.startswith(".. opcode:: "):
109+
continue
110+
111+
# At this point `line` would look something like:
112+
#
113+
# `.. opcode:: OPCODE_NAME`
114+
# or
115+
# `.. opcode:: OPCODE_NAME (oparg_name)`
116+
#
117+
# We only care about the later.
118+
119+
parts = line.split()
120+
if len(parts) != 4:
121+
continue
122+
123+
_, _, cpython_name, oparg = parts
124+
out[cpython_name] = oparg.removeprefix("(").removesuffix(")")
125+
126+
return out
127+
128+
129+
def snake_case_to_pascal_case(name: str) -> str:
130+
return name.title().replace("_", "")
131+
132+
133+
def rustfmt(code: str) -> str:
134+
return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True)
135+
136+
137+
def get_analysis() -> analyser.Analysis:
138+
analysis = analyzer.analyze_files([DEFAULT_INPUT])
139+
140+
# We don't differentiate between real and pseudos yet
141+
analysis.instructions |= analysis.pseudos
142+
return analysis
143+
144+
145+
def write_enum(outfile: typing.IO, instructions: list[Inst]) -> None:
146+
variants = ",\n".join(inst.variant for inst in instructions)
147+
outfile.write(
148+
f"""
149+
/// A Single bytecode instruction.
150+
#[repr(u8)]
151+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
152+
pub enum Instruction {{
153+
{variants}
154+
}}
155+
"""
156+
)
157+
158+
159+
def main():
160+
conf = tomllib.loads(CONF_FILE.read_text())
161+
opcodes = conf["opcodes"]
162+
analysis = get_analysis()
163+
instructions = sorted(
164+
Inst(name, opcodes.get(name, {}), analysis)
165+
for name in analysis.instructions
166+
if opcodes.get(name, {}).get("enabled", True)
167+
)
168+
169+
outfile = io.StringIO()
170+
171+
write_enum(outfile, instructions)
172+
173+
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
174+
175+
generated = outfile.getvalue()
176+
output = rustfmt(
177+
f"""
178+
// This file is generated by {script_path}
179+
// Do not edit!
180+
181+
use crate::bytecode::{{Arg, Label, NameIdx}};
182+
183+
{generated}
184+
"""
185+
)
186+
print(output)
187+
OUT_FILE.write_text(output)
188+
189+
190+
if __name__ == "__main__":
191+
main()
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# This file is used by `generate.py` to autogenerate CPython opcodes.
2+
#
3+
# The script will try to detect the properties for each opcode, but it can be useful to override some prooerties. So Ideally,
4+
# we want to have this file empty because that means that we are fully aligned with CPython's opcodes.
5+
#
6+
# enabled : bool
7+
# When false the opcode will not apear as a variant.
8+
# name : str
9+
# Sets the variant name.
10+
# has_oparg : bool
11+
# Whether or not this opcode has an oparg.
12+
# oparg_name : str
13+
# Field name for the variant's oparg (`Foo { name: ... }`).
14+
# oparg_typ : str
15+
# Type of the variant's oparg (`Foo { i: typ }`).
16+
17+
[opcodes]
18+
CALL_INTRINSIC_1 = { oparg_name = "func", oparg_typ = "IntrinsicFunction1" }
19+
CALL_INTRINSIC_2 = { oparg_name = "func", oparg_typ = "IntrinsicFunction2" }
20+
GET_AWAITABLE = { oparg_name = "r#where" } # `where` is a rust keyword
21+
RAISE_VARARGS = { name = "RaiseVarArgs" }
22+
RESUME = { oparg_typ = "ResumeKind" }
23+
YIELD_VALUE = { has_oparg = false } # 3.13 changed this to have an oparg
24+
25+
# Disabled (not implemented)
26+
27+
BINARY_OP_ADD_FLOAT = { enabled = false }
28+
BINARY_OP_ADD_INT = { enabled = false }
29+
BINARY_OP_ADD_UNICODE = { enabled = false }
30+
BINARY_OP_MULTIPLY_FLOAT = { enabled = false }
31+
BINARY_OP_MULTIPLY_INT = { enabled = false }
32+
BINARY_OP_SUBTRACT_FLOAT = { enabled = false }
33+
BINARY_OP_SUBTRACT_INT = { enabled = false }
34+
BINARY_SUBSCR_DICT = { enabled = false }
35+
BINARY_SUBSCR_GETITEM = { enabled = false }
36+
BINARY_SUBSCR_LIST_INT = { enabled = false }
37+
BINARY_SUBSCR_STR_INT = { enabled = false }
38+
BINARY_SUBSCR_TUPLE_INT = { enabled = false }
39+
CACHE = { enabled = false }
40+
INSTRUMENTED_CALL = { enabled = false }
41+
INSTRUMENTED_CALL_FUNCTION_EX = { enabled = false }
42+
INSTRUMENTED_CALL_KW = { enabled = false }
43+
INSTRUMENTED_END_FOR = { enabled = false }
44+
INSTRUMENTED_END_SEND = { enabled = false }
45+
INSTRUMENTED_FOR_ITER = { enabled = false }
46+
INSTRUMENTED_INSTRUCTION = { enabled = false }
47+
INSTRUMENTED_JUMP_BACKWARD = { enabled = false }
48+
INSTRUMENTED_JUMP_FORWARD = { enabled = false }
49+
INSTRUMENTED_LINE = { enabled = false }
50+
INSTRUMENTED_LOAD_SUPER_ATTR = { enabled = false }
51+
INSTRUMENTED_POP_JUMP_IF_FALSE = { enabled = false }
52+
INSTRUMENTED_POP_JUMP_IF_NONE = { enabled = false }
53+
INSTRUMENTED_POP_JUMP_IF_NOT_NONE = { enabled = false }
54+
INSTRUMENTED_POP_JUMP_IF_TRUE = { enabled = false }
55+
INSTRUMENTED_RESUME = { enabled = false }
56+
INSTRUMENTED_RETURN_CONST = { enabled = false }
57+
INSTRUMENTED_RETURN_VALUE = { enabled = false }
58+
INSTRUMENTED_YIELD_VALUE = { enabled = false }
59+
RERAISE = { enabled = false }
60+
SEND_GEN = { enabled = false }
61+
UNPACK_SEQUENCE_TWO_TUPLE = { enabled = false }

0 commit comments

Comments
 (0)