Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .cspell.dict/python-more.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ nbytes
ncallbacks
ndigits
ndim
needsfree
nldecoder
nlocals
NOARGS
Expand All @@ -168,6 +169,7 @@ pycache
pycodecs
pycs
pyexpat
PYTHONAPI
PYTHONBREAKPOINT
PYTHONDEBUG
PYTHONDONTWRITEBYTECODE
Expand Down
5 changes: 3 additions & 2 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@
"makeunicodedata",
"miri",
"notrace",
"oparg",
"openat",
"pyarg",
"pyarg",
"pyargs",
"pyast",
"PyAttr",
Expand Down Expand Up @@ -107,6 +107,7 @@
"pystruct",
"pystructseq",
"pytrace",
"pytype",
"reducelib",
"richcompare",
"RustPython",
Expand All @@ -116,7 +117,6 @@
"sysmodule",
"tracebacks",
"typealiases",
"unconstructible",
"unhashable",
"uninit",
"unraisable",
Expand All @@ -131,6 +131,7 @@
"getrusage",
"nanosleep",
"sigaction",
"sighandler",
"WRLCK",
// win32
"birthtime",
Expand Down
2 changes: 1 addition & 1 deletion crates/stdlib/src/pystruct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub(crate) mod _struct {
// CPython turns str to bytes but we do reversed way here
// The only performance difference is this transition cost
let fmt = match_class!(match obj {
s @ PyStr => s.is_ascii().then_some(s),
s @ PyStr => s.isascii().then_some(s),
b @ PyBytes => ascii::AsciiStr::from_ascii(&b)
.ok()
.map(|s| vm.ctx.new_str(s)),
Expand Down
50 changes: 50 additions & 0 deletions crates/vm/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,56 @@ impl FormatCode {
return Err("embedded null character".to_owned());
}

// PEP3118: Handle extended format specifiers
// T{...} - struct, X{} - function pointer, (...) - array shape, :name: - field name
if c == b'T' || c == b'X' {
// Skip struct/function pointer: consume until matching '}'
if chars.peek() == Some(&b'{') {
chars.next(); // consume '{'
let mut depth = 1;
while depth > 0 {
match chars.next() {
Some(b'{') => depth += 1,
Some(b'}') => depth -= 1,
None => return Err("unmatched '{' in format".to_owned()),
_ => {}
}
}
continue;
}
}

if c == b'(' {
// Skip array shape: consume until matching ')'
let mut depth = 1;
while depth > 0 {
match chars.next() {
Some(b'(') => depth += 1,
Some(b')') => depth -= 1,
None => return Err("unmatched '(' in format".to_owned()),
_ => {}
}
}
continue;
}

if c == b':' {
// Skip field name: consume until next ':'
loop {
match chars.next() {
Some(b':') => break,
None => return Err("unmatched ':' in format".to_owned()),
_ => {}
}
}
continue;
}

if c == b'{' || c == b'}' {
// Skip standalone braces (pointer targets, etc.)
continue;
}

let code = FormatType::try_from(c)
.ok()
.filter(|c| match c {
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/builtins/builtin_func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl PyNativeFunction {
zelf.0.value.doc
}

#[pygetset(name = "__self__")]
#[pygetset]
fn __self__(_zelf: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.none()
}
Expand Down Expand Up @@ -181,7 +181,7 @@ impl PyNativeMethod {
Ok((getattr, (target, name)))
}

#[pygetset(name = "__self__")]
#[pygetset]
fn __self__(zelf: PyRef<Self>, _vm: &VirtualMachine) -> Option<PyObjectRef> {
zelf.func.zelf.clone()
}
Expand Down
5 changes: 2 additions & 3 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -629,12 +629,11 @@ impl GetDescriptor for PyFunction {
vm: &VirtualMachine,
) -> PyResult {
let (_zelf, obj) = Self::_unwrap(&zelf, obj, vm)?;
let obj = if vm.is_none(&obj) && !Self::_cls_is(&cls, obj.class()) {
Ok(if vm.is_none(&obj) && !Self::_cls_is(&cls, obj.class()) {
zelf
} else {
PyBoundMethod::new(obj, zelf).into_ref(&vm.ctx).into()
};
Ok(obj)
})
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/builtins/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,8 @@ impl PyBaseObject {
Ok(())
}

#[pygetset(name = "__class__")]
fn get_class(obj: PyObjectRef) -> PyTypeRef {
#[pygetset]
fn __class__(obj: PyObjectRef) -> PyTypeRef {
obj.class().to_owned()
}

Expand Down
6 changes: 3 additions & 3 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -625,9 +625,9 @@ impl PyStr {
self.data.char_len()
}

#[pymethod(name = "isascii")]
#[pymethod]
#[inline(always)]
pub const fn is_ascii(&self) -> bool {
pub const fn isascii(&self) -> bool {
matches!(self.kind(), StrKind::Ascii)
}

Expand Down Expand Up @@ -960,7 +960,7 @@ impl PyStr {
format_map(&format_string, &mapping, vm)
}

#[pymethod(name = "__format__")]
#[pymethod]
fn __format__(
zelf: PyRef<PyStr>,
spec: PyStrRef,
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/builtins/type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1445,8 +1445,8 @@ impl GetAttr for PyType {

#[pyclass]
impl Py<PyType> {
#[pygetset(name = "__mro__")]
fn get_mro(&self) -> PyTuple {
#[pygetset]
fn __mro__(&self) -> PyTuple {
let elements: Vec<PyObjectRef> = self.mro_map_collect(|x| x.as_object().to_owned());
PyTuple::new_unchecked(elements.into_boxed_slice())
}
Expand Down
11 changes: 6 additions & 5 deletions crates/vm/src/exceptions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,8 +624,8 @@ impl PyBaseException {
*self.context.write() = context;
}

#[pygetset(name = "__suppress_context__")]
pub(super) fn get_suppress_context(&self) -> bool {
#[pygetset]
pub(super) fn __suppress_context__(&self) -> bool {
self.suppress_context.load()
}

Expand Down Expand Up @@ -1112,7 +1112,7 @@ impl serde::Serialize for SerializeException<'_, '_> {
.__context__()
.map(|exc| SerializeExceptionOwned { vm: self.vm, exc }),
)?;
struc.serialize_field("suppress_context", &self.exc.get_suppress_context())?;
struc.serialize_field("suppress_context", &self.exc.__suppress_context__())?;

let args = {
struct Args<'vm>(&'vm VirtualMachine, PyTupleRef);
Expand Down Expand Up @@ -1550,6 +1550,7 @@ pub(super) mod types {
pub struct PyUnboundLocalError(PyNameError);

#[pyexception(name, base = PyException, ctx = "os_error")]
#[repr(C)]
pub struct PyOSError {
base: PyException,
errno: PyAtomicRef<Option<PyObject>>,
Expand Down Expand Up @@ -1857,8 +1858,8 @@ pub(super) mod types {
self.errno.swap_to_temporary_refs(value, vm);
}

#[pygetset(name = "strerror")]
fn get_strerror(&self) -> Option<PyObjectRef> {
#[pygetset]
fn strerror(&self) -> Option<PyObjectRef> {
self.strerror.to_owned()
}

Expand Down
22 changes: 22 additions & 0 deletions crates/vm/src/object/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,28 @@ where
}
}

impl<T: crate::class::PySubclass> Py<T> {
/// Converts `&Py<T>` to `&Py<T::Base>`.
#[inline]
pub fn to_base(&self) -> &Py<T::Base> {
debug_assert!(self.as_object().downcast_ref::<T::Base>().is_some());
// SAFETY: T is #[repr(transparent)] over T::Base,
// so Py<T> and Py<T::Base> have the same layout.
unsafe { &*(self as *const Py<T> as *const Py<T::Base>) }
}

/// Converts `&Py<T>` to `&Py<U>` where U is an ancestor type.
#[inline]
pub fn upcast_ref<U: PyPayload + StaticType>(&self) -> &Py<U>
where
T: StaticType,
{
debug_assert!(T::static_type().is_subtype(U::static_type()));
// SAFETY: T is a subtype of U, so Py<T> can be viewed as Py<U>.
unsafe { &*(self as *const Py<T> as *const Py<U>) }
}
}
Comment on lines +1105 to +1125
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for PySubclass trait definition
echo "=== Searching for PySubclass trait definition ==="
rg -n "trait PySubclass" --type=rust | head -20

echo ""
echo "=== Searching for impl PySubclass ==="
rg -n "impl.*PySubclass" --type=rust | head -20

echo ""
echo "=== Searching for Py<T> type definition ==="
rg -n "^pub struct Py<" --type=rust | head -10

echo ""
echo "=== Checking file structure around lines 900-1130 in core.rs ==="
wc -l crates/vm/src/object/core.rs

Repository: RustPython/RustPython

Length of output: 900


🏁 Script executed:

#!/bin/bash
# Find and display the relevant trait and type definitions
echo "=== Finding PySubclass trait ==="
ast-grep --pattern 'trait PySubclass { $$$ }' 2>/dev/null || rg -A 10 "trait PySubclass" --type=rust

echo ""
echo "=== Finding Py struct definition ==="
rg -B 2 -A 8 "pub struct Py<T>" --type=rust

echo ""
echo "=== Finding PyInner struct definition ==="
rg -B 2 -A 8 "pub struct PyInner<T>" --type=rust

echo ""
echo "=== Checking PySubclass context ==="
rg -B 5 -A 15 "pub fn to_base\|pub fn upcast_ref" crates/vm/src/object/core.rs

Repository: RustPython/RustPython

Length of output: 776


🏁 Script executed:

#!/bin/bash
# Get PySubclass trait definition
echo "=== PySubclass trait definition ==="
sed -n '193,220p' crates/vm/src/class.rs

echo ""
echo "=== PyInner struct definition ==="
rg -B 2 -A 15 "pub struct PyInner<T>" --type=rust crates/vm/src/object/core.rs

echo ""
echo "=== Full context of to_base and upcast_ref methods ==="
sed -n '1105,1125p' crates/vm/src/object/core.rs

echo ""
echo "=== Check PySubclass::Base associated type ==="
rg -A 5 "type Base" crates/vm/src/class.rs | head -15

Repository: RustPython/RustPython

Length of output: 1475


🏁 Script executed:

#!/bin/bash
# Look at PyInner definition
echo "=== PyInner struct definition ==="
rg -B 5 -A 20 "^pub struct PyInner" crates/vm/src/object/core.rs

echo ""
echo "=== Check existing PySubclass implementations ==="
# Get a few real implementations
rg -B 2 -A 10 "impl.*PySubclass" crates/vm/src/exceptions.rs | head -30

echo ""
echo "=== Look for repr attributes on PySubclass implementors ==="
rg -B 5 "impl.*PySubclass" crates/vm/src/class.rs

echo ""
echo "=== Check if PyInner has repr attributes ==="
rg -B 5 "struct PyInner" crates/vm/src/object/core.rs | grep -A 5 "repr"

Repository: RustPython/RustPython

Length of output: 663


🏁 Script executed:

#!/bin/bash
# Get full PyInner definition
echo "=== Full PyInner struct ==="
sed -n '104,130p' crates/vm/src/object/core.rs

echo ""
echo "=== Look at the actual PyOSError structure ==="
rg -B 10 "impl.*PySubclass for PyOSError" crates/vm/src/exceptions.rs | grep -E "struct|pub"

echo ""
echo "=== Search for PyOSError struct definition ==="
rg -B 2 -A 6 "struct PyOSError" crates/vm/src/exceptions.rs

echo ""
echo "=== Check how PyPayload relates to memory layout ==="
rg -A 10 "pub trait PyPayload" crates/vm/src/object/mod.rs

Repository: RustPython/RustPython

Length of output: 1551


🏁 Script executed:

#!/bin/bash
# Check PyException structure
echo "=== PyException definition ==="
rg -B 5 -A 15 "^pub struct PyException" crates/vm/src/exceptions.rs

echo ""
echo "=== Check if PyPayload trait has repr requirements ==="
rg -B 2 -A 20 "pub trait PyPayload" crates/vm/src/

echo ""
echo "=== Check comparisons with PyRef methods mentioned in review ==="
sed -n '1076,1103p' crates/vm/src/object/core.rs

echo ""
echo "=== Look for any repr attribute documentation on PySubclass implementors ==="
rg -B 10 "pub struct PyOSError" crates/vm/src/exceptions.rs | head -20

Repository: RustPython/RustPython

Length of output: 3014


The PySubclass trait doesn't enforce required layout guarantees for unsafe pointer casts.

The to_base() and upcast_ref() methods use unsafe pointer casts based on layout assumptions that are not enforced by the trait definition. Specifically:

  • to_base() assumes "T is #[repr(transparent)] over T::Base", but PySubclass only requires that T::Base is a PyPayload with an as_base() implementation. For example, PyOSError implements PySubclass yet has no #[repr(transparent)] attribute—it's a regular struct with a base field.

  • upcast_ref() assumes Python subtyping implies layout compatibility, which is not guaranteed by Rust type layout rules.

  • debug_assert!() only validates type relationships at runtime, not memory layout, so casts could unsafely reinterpret different layouts.

By contrast, PyRef<T>::into_base() and PyRef<T>::upcast() (lines 1076–1102) safely handle similar conversions by going through PyObjectRef and using type-safe downcasting.

Either enforce #[repr(transparent)] on all PySubclass implementors, use safe downcasting like PyRef, or provide explicit layout guarantees in the trait documentation.

🤖 Prompt for AI Agents
In crates/vm/src/object/core.rs around lines 1105 to 1125, the unsafe pointer
casts in to_base() and upcast_ref() rely on unguaranteed layout assumptions from
PySubclass and must be fixed: either enforce/layout-safety in the trait
(require/document #[repr(transparent)] on all PySubclass implementors) or remove
the unsafe casts and implement these methods using the safe object-level
conversions already used by PyRef (go through self.as_object() and use
downcast_ref/downcast into the appropriate Py<U> or convert via PyObjectRef as
PyRef::into_base()/upcast() do). Update the code to use the safe downcast path
(or add the explicit repr requirement to the trait and tests) and remove the
unsafe transmute-style pointer casts.


impl<T> Borrow<PyObject> for PyRef<T>
where
T: PyPayload,
Expand Down
18 changes: 11 additions & 7 deletions crates/vm/src/protocol/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,18 @@ impl BufferDescriptor {
#[cfg(debug_assertions)]
pub fn validate(self) -> Self {
assert!(self.itemsize != 0);
assert!(self.ndim() != 0);
let mut shape_product = 1;
for (shape, stride, suboffset) in self.dim_desc.iter().cloned() {
shape_product *= shape;
assert!(suboffset >= 0);
assert!(stride != 0);
// ndim=0 is valid for scalar types (e.g., ctypes Structure)
if self.ndim() == 0 {
assert!(self.itemsize == self.len);
} else {
let mut shape_product = 1;
for (shape, stride, suboffset) in self.dim_desc.iter().cloned() {
shape_product *= shape;
assert!(suboffset >= 0);
assert!(stride != 0);
}
assert!(shape_product * self.itemsize == self.len);
}
assert!(shape_product * self.itemsize == self.len);
self
}

Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/stdlib/ast/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ pub(crate) mod _ast {
Ok(())
}

#[pyattr(name = "_fields")]
fn fields(ctx: &Context) -> PyTupleRef {
#[pyattr]
fn _fields(ctx: &Context) -> PyTupleRef {
ctx.empty_tuple.clone()
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/stdlib/codecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ mod _codecs {

#[pyfunction]
fn latin_1_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult {
if args.s.is_ascii() {
if args.s.isascii() {
return Ok((args.s.as_bytes().to_vec(), args.s.byte_len()));
}
do_codec!(latin_1::encode, args, vm)
Expand All @@ -189,7 +189,7 @@ mod _codecs {

#[pyfunction]
fn ascii_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult {
if args.s.is_ascii() {
if args.s.isascii() {
return Ok((args.s.as_bytes().to_vec(), args.s.byte_len()));
}
do_codec!(ascii::encode, args, vm)
Expand Down
Loading
Loading