Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"kwonly",
"lossily",
"makeunicodedata",
"mcache",
"microbenchmark",
"microbenchmarks",
"miri",
Expand Down
276 changes: 252 additions & 24 deletions crates/vm/src/builtins/type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use core::{
ops::Deref,
pin::Pin,
ptr::NonNull,
sync::atomic::{AtomicU32, Ordering},
sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, Ordering},
};
use indexmap::{IndexMap, map::Entry};
use itertools::Itertools;
Expand All @@ -61,6 +61,112 @@ pub struct PyType {
/// generic path).
static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1);

// Method cache (type_cache / MCACHE): direct-mapped cache keyed by
// (tp_version_tag, interned_name_ptr).
//
// Uses a lock-free SeqLock pattern:
// - version acts as both cache key AND sequence counter
// - Read: load version (Acquire), read value ptr, re-check version
// - Write: set version=0 (invalidate), store value, store version (Release)
// No mutex needed on the hot path (cache hit).

const TYPE_CACHE_SIZE_EXP: u32 = 12;
const TYPE_CACHE_SIZE: usize = 1 << TYPE_CACHE_SIZE_EXP;
const TYPE_CACHE_MASK: usize = TYPE_CACHE_SIZE - 1;

struct TypeCacheEntry {
/// tp_version_tag at cache time. 0 = empty/invalid.
version: AtomicU32,
/// Interned attribute name pointer (pointer equality check).
name: AtomicPtr<PyStrInterned>,
/// Cached lookup result as raw pointer. null = empty.
/// The cache holds a strong reference (refcount incremented).
value: AtomicPtr<PyObject>,
}

// SAFETY: TypeCacheEntry is thread-safe:
// - All fields use atomic operations
// - Value pointer is valid as long as version matches (SeqLock pattern)
// - PyObjectRef uses atomic reference counting
unsafe impl Send for TypeCacheEntry {}
unsafe impl Sync for TypeCacheEntry {}

impl TypeCacheEntry {
fn new() -> Self {
Self {
version: AtomicU32::new(0),
name: AtomicPtr::new(core::ptr::null_mut()),
value: AtomicPtr::new(core::ptr::null_mut()),
}
}

/// Take the value out of this entry, returning the owned PyObjectRef.
/// Caller must ensure no concurrent reads can observe this entry
/// (version should be set to 0 first).
fn take_value(&self) -> Option<PyObjectRef> {
let ptr = self.value.swap(core::ptr::null_mut(), Ordering::Relaxed);
// SAFETY: non-null ptr was stored via PyObjectRef::into_raw
NonNull::new(ptr).map(|nn| unsafe { PyObjectRef::from_raw(nn) })
}
}

// std::sync::LazyLock is used here (not crate::common::lock::LazyLock)
// because TYPE_CACHE is a global shared across test threads. The common
// LazyLock delegates to LazyCell in non-threading mode, which is !Sync.
static TYPE_CACHE: std::sync::LazyLock<Box<[TypeCacheEntry]>> = std::sync::LazyLock::new(|| {
(0..TYPE_CACHE_SIZE)
.map(|_| TypeCacheEntry::new())
.collect::<Vec<_>>()
.into_boxed_slice()
});

/// When true, find_name_in_mro skips populating the cache.
/// Set during GC's type_cache_clear to prevent re-population from drops.
static TYPE_CACHE_CLEARING: AtomicBool = AtomicBool::new(false);

/// MCACHE_HASH: XOR of version and name pointer hash, masked to cache size.
#[inline]
fn type_cache_hash(version: u32, name: &'static PyStrInterned) -> usize {
let name_hash = (name as *const PyStrInterned as usize >> 3) as u32;
((version ^ name_hash) as usize) & TYPE_CACHE_MASK
}

/// Invalidate cache entries for a specific version tag and release values.
/// Called from modified() when a type is changed.
fn type_cache_clear_version(version: u32) {
let mut to_drop = Vec::new();
for entry in TYPE_CACHE.iter() {
if entry.version.load(Ordering::Relaxed) == version {
entry.version.store(0, Ordering::Release);
if let Some(v) = entry.take_value() {
to_drop.push(v);
}
}
}
drop(to_drop);
}

/// Clear all method cache entries (_PyType_ClearCache).
/// Called during GC collection to release strong references that might
/// prevent cycle collection.
///
/// Sets TYPE_CACHE_CLEARING to suppress cache re-population during the
/// entire operation, preventing concurrent lookups from repopulating
/// entries while we're clearing them.
pub fn type_cache_clear() {
TYPE_CACHE_CLEARING.store(true, Ordering::Release);
// Invalidate all entries and collect values.
let mut to_drop = Vec::new();
for entry in TYPE_CACHE.iter() {
entry.version.store(0, Ordering::Release);
if let Some(v) = entry.take_value() {
to_drop.push(v);
}
}
drop(to_drop);
TYPE_CACHE_CLEARING.store(false, Ordering::Release);
}

unsafe impl crate::object::Traverse for PyType {
fn traverse(&self, tracer_fn: &mut crate::object::TraverseFn<'_>) {
self.base.traverse(tracer_fn);
Expand Down Expand Up @@ -206,6 +312,18 @@ impl PyType {
/// Assign a fresh version tag. Returns 0 if the version counter has been
/// exhausted, in which case no new cache entries can be created.
pub fn assign_version_tag(&self) -> u32 {
let v = self.tp_version_tag.load(Ordering::Acquire);
if v != 0 {
return v;
}

// Assign versions to all direct bases first (MRO invariant).
for base in self.bases.read().iter() {
if base.assign_version_tag() == 0 {
return 0;
}
}

loop {
let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed);
let Some(next) = current.checked_add(1) else {
Expand All @@ -223,7 +341,17 @@ impl PyType {

/// Invalidate this type's version tag and cascade to all subclasses.
pub fn modified(&self) {
// If already invalidated, all subclasses must also be invalidated
// (guaranteed by the MRO invariant in assign_version_tag).
let old_version = self.tp_version_tag.load(Ordering::Acquire);
if old_version == 0 {
return;
}
self.tp_version_tag.store(0, Ordering::Release);
// Release strong references held by cache entries for this version.
// We hold owned refs that would prevent GC of class attributes after
// type deletion.
type_cache_clear_version(old_version);
let subclasses = self.subclasses.read();
for weak_ref in subclasses.iter() {
if let Some(sub) = weak_ref.upgrade() {
Expand Down Expand Up @@ -574,24 +702,104 @@ impl PyType {

pub fn set_attr(&self, attr_name: &'static PyStrInterned, value: PyObjectRef) {
self.attributes.write().insert(attr_name, value);
self.modified();
}

/// This is the internal get_attr implementation for fast lookup on a class.
/// Internal get_attr implementation for fast lookup on a class.
/// Searches the full MRO (including self) with method cache acceleration.
pub fn get_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
flame_guard!(format!("class_get_attr({:?})", attr_name));

self.get_direct_attr(attr_name)
.or_else(|| self.get_super_attr(attr_name))
self.find_name_in_mro(attr_name)
}
Comment on lines +708 to 712
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Cache invalidation is incomplete for direct attribute mutations.

Now that Line 713 and Line 827 use the cached MRO path, direct self.attributes.write().insert/remove call sites that do not call self.modified() can leave stale cache entries visible (e.g., special setters outside this hunk). Please centralize attribute mutation + invalidation behind one helper and route all runtime mutations through it.

Proposed direction
+    fn mutate_attributes<R>(&self, f: impl FnOnce(&mut PyAttributes) -> R) -> R {
+        let mut attrs = self.attributes.write();
+        let out = f(&mut attrs);
+        drop(attrs);
+        self.modified();
+        out
+    }
+
     pub fn set_attr(&self, attr_name: &'static PyStrInterned, value: PyObjectRef) {
-        self.attributes.write().insert(attr_name, value);
-        self.modified();
+        self.mutate_attributes(|attrs| {
+            attrs.insert(attr_name, value);
+        });
     }

Then migrate other runtime self.attributes.write().insert/swap_remove/shift_remove paths to this helper.

Also applies to: 825-856

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/type.rs` around lines 710 - 714, The get_attr path
uses find_name_in_mro and relies on an MRO cache, but direct mutations via
self.attributes.write().insert/remove/swap_remove/shift_remove bypass
self.modified() and leave stale cache entries; add a single helper on the class
(e.g., a private method like modify_attribute / set_attribute_and_invalidate)
that performs attribute mutations (insert/remove/swap_remove/shift_remove) under
the write lock and calls self.modified() / invalidates the MRO/type cache, then
replace all direct calls to self.attributes.write().* across the file (including
places currently doing insert, remove, swap_remove, shift_remove) to use this
helper so all runtime mutations consistently update caches and avoid stale
get_attr results that rely on find_name_in_mro.


pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
self.attributes.read().get(attr_name).cloned()
}

/// Equivalent to CPython's find_name_in_mro
/// Look in tp_dict of types in MRO - bypasses descriptors and other attribute access machinery
/// find_name_in_mro with method cache (MCACHE).
/// Looks in tp_dict of types in MRO, bypasses descriptors.
///
/// Uses a lock-free SeqLock pattern keyed by version:
/// Read: load version → check name → load value → clone → re-check version
/// Write: version=0 → swap value → set name → version=assigned
fn find_name_in_mro(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
Comment on lines +721 to 724
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify the current protocol shape (single version field reused, no independent seq counter)
rg -n "struct TypeCacheEntry|version: AtomicU32|fn find_name_in_mro|fn has_name_in_mro|entry.version.store\(0|entry.version.store\(assigned|let v2 = entry.version.load" crates/vm/src/builtins/type.rs -C3

Repository: RustPython/RustPython

Length of output: 4022


🏁 Script executed:

# Verify the memory ordering details and drop timing
rg -n "entry.value.load\(Ordering::" crates/vm/src/builtins/type.rs -A1 -B1
rg -n "entry.value.swap\(|drop\(PyObjectRef::from_raw\(" crates/vm/src/builtins/type.rs -B1 -A1
rg -n "fn set_in_cache" crates/vm/src/builtins/type.rs -A 40 | head -50

Repository: RustPython/RustPython

Length of output: 620


🏁 Script executed:

# Check the full write sequence and synchronization context
rg -n "fn set_in_cache" crates/vm/src/builtins/type.rs -A 35 | head -60

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Get the full set_in_cache function including line numbers
sed -n '768,802p' crates/vm/src/builtins/type.rs

Repository: RustPython/RustPython

Length of output: 1498


MCACHE read/write protocol has unsynchronized memory ordering and ABA issues that can cause use-after-free.

The reader loads value with Acquire (line 738), but the writer swaps in new value with Relaxed (line 786), which don't synchronize. The reader's Acquire load only orders with Release stores of the same atomic; it doesn't guarantee visibility of the Relaxed swap between the version invalidation and restoration.

Combined with the ABA window—version cycles assigned → 0 → assigned if the type version unchanged—a reader can:

  1. Load v1 = assigned before writer invalidates
  2. Load old pointer before seeing the new value (no ordering between Acquire and Relaxed swap)
  3. Observe v2 == v1 (same version restored)
  4. Clone and return the stale pointer that is immediately dropped (lines 795)

Use Ordering::Release on the value.swap() call (line 786), or replace the single version field with a dedicated monotonic sequence counter to ensure safe publication and reclamation.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/type.rs` around lines 723 - 726, The MCACHE SeqLock in
find_name_in_mro has an unsynchronized writer: the writer uses a Relaxed swap
for the shared `value` which can race with the reader's Acquire load and allow
an ABA/use-after-free; change the writer's `value.swap(...)` to use
Ordering::Release so the reader's Acquire load pairs with it, or alternatively
replace the single `version` field with a monotonic sequence counter (a
dedicated seq counter) to avoid the assigned→0→assigned ABA window—update the
writer side where `value.swap` is called and ensure the reader still uses
Acquire and rechecks the version/seq after cloning.

// mro[0] is self, so we just iterate through the entire MRO
let version = self.tp_version_tag.load(Ordering::Acquire);
if version != 0 {
let idx = type_cache_hash(version, name);
let entry = &TYPE_CACHE[idx];
let v1 = entry.version.load(Ordering::Acquire);
if v1 == version
&& core::ptr::eq(
entry.name.load(Ordering::Relaxed),
name as *const _ as *mut _,
)
{
let ptr = entry.value.load(Ordering::Acquire);
if !ptr.is_null() {
// SAFETY: The value pointer was stored via PyObjectRef::into_raw
// and is valid as long as the version hasn't changed. We create
// a temporary reference (ManuallyDrop prevents decrement), clone
// it to get our own strong reference, then re-check the version
// to confirm the entry wasn't invalidated during our read.
let cloned = unsafe {
let tmp = core::mem::ManuallyDrop::new(PyObjectRef::from_raw(
NonNull::new_unchecked(ptr),
));
(*tmp).clone()
};
// SeqLock validation: if version changed, discard our clone
let v2 = entry.version.load(Ordering::Acquire);
if v2 == v1 {
return Some(cloned);
}
drop(cloned);
}
}
}

// Assign version BEFORE the MRO walk so that any concurrent
// modified() call during the walk invalidates this version.
let assigned = if version == 0 {
self.assign_version_tag()
} else {
version
};

// MRO walk
let result = self.find_name_in_mro_uncached(name);

// Only cache positive results. Negative results are not cached to
// avoid stale entries from transient MRO walk failures during
// concurrent type modifications.
if let Some(ref found) = result
Comment on lines +770 to +773
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Negative caching is not implemented in the current lookup path.

The code explicitly skips caching misses, so repeated not-found lookups still do full MRO walks and this diverges from the stated MCACHE behavior.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/type.rs` around lines 771 - 774, The lookup currently
only caches hits inside the block that checks "if let Some(ref found) = result",
so misses (result == None) are not recorded; modify the lookup's cache update
logic to record negative entries when result is None by adding an explicit
branch that inserts a sentinel/negative marker into the same MCACHE/attribute
cache using the same cache key (instead of skipping it), and ensure invalidation
paths that already clear the positive cache (e.g., type mutation / MRO change
handlers) also remove or bump versioning for negative entries so they don't
become stale; concretely, replace the "if let Some(ref found) = result" pattern
with a match or if/else that writes either the found value or a negative-cache
marker into the cache (and use the existing cache APIs used for positive entries
so keys and eviction behave consistently).

&& assigned != 0
&& !TYPE_CACHE_CLEARING.load(Ordering::Acquire)
&& self.tp_version_tag.load(Ordering::Acquire) == assigned
{
let idx = type_cache_hash(assigned, name);
let entry = &TYPE_CACHE[idx];
// Invalidate first to prevent readers from seeing partial state
entry.version.store(0, Ordering::Release);
// Swap in new value (refcount held by cache)
let new_ptr = found.clone().into_raw().as_ptr();
let old_ptr = entry.value.swap(new_ptr, Ordering::Relaxed);
entry
.name
.store(name as *const _ as *mut _, Ordering::Relaxed);
// Activate entry — Release ensures value/name writes are visible
entry.version.store(assigned, Ordering::Release);
// Drop previous occupant (its version was already invalidated)
if !old_ptr.is_null() {
unsafe {
drop(PyObjectRef::from_raw(NonNull::new_unchecked(old_ptr)));
}
}
}

result
}

/// Raw MRO walk without cache.
fn find_name_in_mro_uncached(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
for cls in self.mro.read().iter() {
if let Some(value) = cls.attributes.read().get(name) {
return Some(value.clone());
Expand All @@ -600,14 +808,9 @@ impl PyType {
None
}

/// Equivalent to CPython's _PyType_LookupRef
/// Looks up a name through the MRO without setting an exception
/// _PyType_LookupRef: look up a name through the MRO without setting an exception.
pub fn lookup_ref(&self, name: &Py<PyStr>, vm: &VirtualMachine) -> Option<PyObjectRef> {
// Get interned name for efficient lookup
let interned_name = vm.ctx.interned_str(name)?;

// Use find_name_in_mro which matches CPython's behavior
// This bypasses descriptors and other attribute access machinery
self.find_name_in_mro(interned_name)
}

Expand All @@ -617,12 +820,37 @@ impl PyType {
.find_map(|class| class.attributes.read().get(attr_name).cloned())
}

// This is the internal has_attr implementation for fast lookup on a class.
/// Fast lookup for attribute existence on a class.
pub fn has_attr(&self, attr_name: &'static PyStrInterned) -> bool {
self.attributes.read().contains_key(attr_name)
|| self.mro.read()[1..]
.iter()
.any(|c| c.attributes.read().contains_key(attr_name))
self.has_name_in_mro(attr_name)
}

/// Check if attribute exists in MRO, using method cache for fast check.
/// Unlike find_name_in_mro, avoids cloning the value on cache hit.
fn has_name_in_mro(&self, name: &'static PyStrInterned) -> bool {
let version = self.tp_version_tag.load(Ordering::Acquire);
if version != 0 {
let idx = type_cache_hash(version, name);
let entry = &TYPE_CACHE[idx];
let v1 = entry.version.load(Ordering::Acquire);
if v1 == version
&& core::ptr::eq(
entry.name.load(Ordering::Relaxed),
name as *const _ as *mut _,
)
{
let ptr = entry.value.load(Ordering::Acquire);
if !ptr.is_null() {
let v2 = entry.version.load(Ordering::Acquire);
if v2 == v1 {
return true;
}
}
}
}

// Cache miss — use find_name_in_mro which populates cache
self.find_name_in_mro(name).is_some()
}

pub fn get_attributes(&self) -> PyAttributes {
Expand Down Expand Up @@ -1270,8 +1498,8 @@ impl PyType {
PySetterValue::Assign(ref val) => {
let key = identifier!(vm, __type_params__);
self.check_set_special_type_attr(key, vm)?;
let mut attrs = self.attributes.write();
attrs.insert(key, val.clone().into());
self.attributes.write().insert(key, val.clone().into());
self.modified();
}
PySetterValue::Delete => {
// For delete, we still need to check if the type is immutable
Expand All @@ -1281,9 +1509,9 @@ impl PyType {
self.slot_name()
)));
}
let mut attrs = self.attributes.write();
let key = identifier!(vm, __type_params__);
attrs.shift_remove(&key);
self.attributes.write().shift_remove(&key);
self.modified();
}
}
Ok(())
Expand Down
4 changes: 4 additions & 0 deletions crates/vm/src/gc_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,10 @@ impl GcState {
let generation = generation.min(2);
let debug = self.get_debug();

// Clear the method cache to release strong references that
// might prevent cycle collection (_PyType_ClearCache).
crate::builtins::type_::type_cache_clear();

// Step 1: Gather objects from generations 0..=generation
// Hold read locks for the entire collection to prevent other threads
// from untracking objects while we're iterating.
Expand Down
Loading