Skip to content

Commit 0dcc975

Browse files
authored
Add GC infrastructure: object tracking, tp_clear, and helper methods (#6994)
1 parent 5662fa0 commit 0dcc975

File tree

3 files changed

+205
-36
lines changed

3 files changed

+205
-36
lines changed

.cspell.dict/cpython.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ fielddesc
5555
fieldlist
5656
fileutils
5757
finalbody
58+
finalizers
5859
flowgraph
5960
formatfloat
6061
freevar

crates/vm/src/gc_state.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,22 @@ impl GcState {
366366

367367
/// Check if automatic GC should run and run it if needed.
368368
/// Called after object allocation.
369-
/// Currently a stub — returns false.
369+
/// Returns true if GC was run, false otherwise.
370370
pub fn maybe_collect(&self) -> bool {
371+
if !self.is_enabled() {
372+
return false;
373+
}
374+
375+
// _PyObject_GC_Alloc checks thresholds
376+
377+
// Check gen0 threshold
378+
let count0 = self.generations[0].count.load(Ordering::SeqCst) as u32;
379+
let threshold0 = self.generations[0].threshold();
380+
if threshold0 > 0 && count0 >= threshold0 {
381+
self.collect(0);
382+
return true;
383+
}
384+
371385
false
372386
}
373387

@@ -377,12 +391,18 @@ impl GcState {
377391
/// Currently a stub — the actual collection algorithm requires EBR
378392
/// and will be added in a follow-up.
379393
pub fn collect(&self, _generation: usize) -> (usize, usize) {
394+
// gc_collect_main
395+
// Reset gen0 count even though we're not actually collecting
396+
self.generations[0].count.store(0, Ordering::SeqCst);
380397
(0, 0)
381398
}
382399

383400
/// Force collection even if GC is disabled (for manual gc.collect() calls).
401+
/// gc.collect() always runs regardless of gc.isenabled()
384402
/// Currently a stub.
385403
pub fn collect_force(&self, _generation: usize) -> (usize, usize) {
404+
// Reset gen0 count even though we're not actually collecting
405+
self.generations[0].count.store(0, Ordering::SeqCst);
386406
(0, 0)
387407
}
388408

crates/vm/src/object/core.rs

Lines changed: 183 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ use core::{
8282
pub(super) struct Erased;
8383

8484
/// Default dealloc: handles __del__, weakref clearing, tp_clear, and memory free.
85-
/// Equivalent to subtype_dealloc in CPython.
85+
/// Equivalent to subtype_dealloc.
8686
pub(super) unsafe fn default_dealloc<T: PyPayload>(obj: *mut PyObject) {
8787
let obj_ref = unsafe { &*(obj as *const PyObject) };
8888
if let Err(()) = obj_ref.drop_slow_inner() {
@@ -383,58 +383,101 @@ impl WeakRefList {
383383
weak
384384
}
385385

386-
/// PyObject_ClearWeakRefs: clear all weakrefs when the referent dies.
386+
/// Clear all weakrefs and call their callbacks.
387+
/// Called when the owner object is being dropped.
388+
// PyObject_ClearWeakRefs
387389
fn clear(&self, obj: &PyObject) {
388390
let obj_addr = obj as *const PyObject as usize;
389-
let mut to_callback: Vec<(PyRef<PyWeak>, PyObjectRef)> = Vec::new();
391+
let _lock = weakref_lock::lock(obj_addr);
390392

391-
{
392-
let _lock = weakref_lock::lock(obj_addr);
393+
// Clear generic cache
394+
self.generic.store(ptr::null_mut(), Ordering::Relaxed);
393395

394-
// Walk the list, collecting weakrefs with callbacks
395-
let mut current = NonNull::new(self.head.load(Ordering::Relaxed));
396-
while let Some(node) = current {
397-
let next = unsafe { WeakLink::pointers(node).as_ref().get_next() };
396+
// Walk the list, collecting weakrefs with callbacks
397+
let mut callbacks: Vec<(PyRef<PyWeak>, PyObjectRef)> = Vec::new();
398+
let mut current = NonNull::new(self.head.load(Ordering::Relaxed));
399+
while let Some(node) = current {
400+
let next = unsafe { WeakLink::pointers(node).as_ref().get_next() };
398401

399-
let wr = unsafe { node.as_ref() };
402+
let wr = unsafe { node.as_ref() };
400403

401-
// Set wr_object to null (marks weakref as dead)
402-
wr.0.payload
403-
.wr_object
404-
.store(ptr::null_mut(), Ordering::Relaxed);
404+
// Mark weakref as dead
405+
wr.0.payload
406+
.wr_object
407+
.store(ptr::null_mut(), Ordering::Relaxed);
405408

406-
// Unlink from list
407-
unsafe {
408-
let mut ptrs = WeakLink::pointers(node);
409-
ptrs.as_mut().set_prev(None);
410-
ptrs.as_mut().set_next(None);
411-
}
409+
// Unlink from list
410+
unsafe {
411+
let mut ptrs = WeakLink::pointers(node);
412+
ptrs.as_mut().set_prev(None);
413+
ptrs.as_mut().set_next(None);
414+
}
412415

413-
// Collect callback if weakref is still alive (strong_count > 0)
414-
if wr.0.ref_count.get() > 0 {
415-
let cb = unsafe { wr.0.payload.callback.get().replace(None) };
416-
if let Some(cb) = cb {
417-
to_callback.push((wr.to_owned(), cb));
418-
}
416+
// Collect callback if present and weakref is still alive
417+
if wr.0.ref_count.get() > 0 {
418+
let cb = unsafe { wr.0.payload.callback.get().replace(None) };
419+
if let Some(cb) = cb {
420+
callbacks.push((wr.to_owned(), cb));
419421
}
420-
421-
current = next;
422422
}
423423

424-
self.head.store(ptr::null_mut(), Ordering::Relaxed);
425-
self.generic.store(ptr::null_mut(), Ordering::Relaxed);
424+
current = next;
426425
}
426+
self.head.store(ptr::null_mut(), Ordering::Relaxed);
427427

428-
// Call callbacks without holding the lock
429-
for (wr, cb) in to_callback {
428+
// Invoke callbacks outside the lock
429+
drop(_lock);
430+
for (wr, cb) in callbacks {
430431
crate::vm::thread::with_vm(&cb, |vm| {
431-
// TODO: handle unraisable exception
432-
let wr_obj: PyObjectRef = wr.clone().into();
433-
let _ = cb.call((wr_obj,), vm);
432+
let _ = cb.call((wr.clone(),), vm);
434433
});
435434
}
436435
}
437436

437+
/// Clear all weakrefs but DON'T call callbacks. Instead, return them for later invocation.
438+
/// Used by GC to ensure ALL weakrefs are cleared BEFORE any callbacks are invoked.
439+
/// handle_weakrefs() clears all weakrefs first, then invokes callbacks.
440+
fn clear_for_gc_collect_callbacks(&self, obj: &PyObject) -> Vec<(PyRef<PyWeak>, PyObjectRef)> {
441+
let obj_addr = obj as *const PyObject as usize;
442+
let _lock = weakref_lock::lock(obj_addr);
443+
444+
// Clear generic cache
445+
self.generic.store(ptr::null_mut(), Ordering::Relaxed);
446+
447+
let mut callbacks = Vec::new();
448+
let mut current = NonNull::new(self.head.load(Ordering::Relaxed));
449+
while let Some(node) = current {
450+
let next = unsafe { WeakLink::pointers(node).as_ref().get_next() };
451+
452+
let wr = unsafe { node.as_ref() };
453+
454+
// Mark weakref as dead
455+
wr.0.payload
456+
.wr_object
457+
.store(ptr::null_mut(), Ordering::Relaxed);
458+
459+
// Unlink from list
460+
unsafe {
461+
let mut ptrs = WeakLink::pointers(node);
462+
ptrs.as_mut().set_prev(None);
463+
ptrs.as_mut().set_next(None);
464+
}
465+
466+
// Collect callback without invoking
467+
if wr.0.ref_count.get() > 0 {
468+
let cb = unsafe { wr.0.payload.callback.get().replace(None) };
469+
if let Some(cb) = cb {
470+
callbacks.push((wr.to_owned(), cb));
471+
}
472+
}
473+
474+
current = next;
475+
}
476+
self.head.store(ptr::null_mut(), Ordering::Relaxed);
477+
478+
callbacks
479+
}
480+
438481
fn count(&self, obj: &PyObject) -> usize {
439482
let _lock = weakref_lock::lock(obj as *const PyObject as usize);
440483
let mut count = 0usize;
@@ -1044,13 +1087,20 @@ impl PyObject {
10441087
}
10451088

10461089
// __del__ should only be called once (like _PyGC_FINALIZED check in GIL_DISABLED)
1090+
// We call __del__ BEFORE clearing weakrefs to allow the finalizer to access
1091+
// the object's weak references if needed.
10471092
let del = self.class().slots.del.load();
10481093
if let Some(slot_del) = del
10491094
&& !self.gc_finalized()
10501095
{
10511096
self.set_gc_finalized();
10521097
call_slot_del(self, slot_del)?;
10531098
}
1099+
1100+
// Clear weak refs AFTER __del__.
1101+
// Note: This differs from GC behavior which clears weakrefs before finalizers,
1102+
// but for direct deallocation (drop_slow_inner), we need to allow the finalizer
1103+
// to run without triggering use-after-free from WeakRefList operations.
10541104
if let Some(wrl) = self.weak_ref_list() {
10551105
wrl.clear(self);
10561106
}
@@ -1097,6 +1147,104 @@ impl PyObject {
10971147
});
10981148
result
10991149
}
1150+
1151+
/// Call __del__ if present, without triggering object deallocation.
1152+
/// Used by GC to call finalizers before breaking cycles.
1153+
/// This allows proper resurrection detection.
1154+
/// CPython: PyObject_CallFinalizerFromDealloc in Objects/object.c
1155+
pub fn try_call_finalizer(&self) {
1156+
let del = self.class().slots.del.load();
1157+
if let Some(slot_del) = del
1158+
&& !self.gc_finalized()
1159+
{
1160+
// Mark as finalized BEFORE calling __del__ to prevent double-call
1161+
// This ensures drop_slow_inner() won't call __del__ again
1162+
self.set_gc_finalized();
1163+
let result = crate::vm::thread::with_vm(self, |vm| {
1164+
if let Err(e) = slot_del(self, vm)
1165+
&& let Some(del_method) = self.get_class_attr(identifier!(vm, __del__))
1166+
{
1167+
vm.run_unraisable(e, None, del_method);
1168+
}
1169+
});
1170+
let _ = result;
1171+
}
1172+
}
1173+
1174+
/// Clear weakrefs but collect callbacks instead of calling them.
1175+
/// This is used by GC to ensure ALL weakrefs are cleared BEFORE any callbacks run.
1176+
/// Returns collected callbacks as (PyRef<PyWeak>, callback) pairs.
1177+
// = handle_weakrefs
1178+
pub fn gc_clear_weakrefs_collect_callbacks(&self) -> Vec<(PyRef<PyWeak>, PyObjectRef)> {
1179+
if let Some(wrl) = self.weak_ref_list() {
1180+
wrl.clear_for_gc_collect_callbacks(self)
1181+
} else {
1182+
vec![]
1183+
}
1184+
}
1185+
1186+
/// Get raw pointers to referents without incrementing reference counts.
1187+
/// This is used during GC to avoid reference count manipulation.
1188+
/// tp_traverse visits objects without incref
1189+
///
1190+
/// # Safety
1191+
/// The returned pointers are only valid as long as the object is alive
1192+
/// and its contents haven't been modified.
1193+
pub unsafe fn gc_get_referent_ptrs(&self) -> Vec<NonNull<PyObject>> {
1194+
let mut result = Vec::new();
1195+
// Traverse the entire object including dict and slots
1196+
self.0.traverse(&mut |child: &PyObject| {
1197+
result.push(NonNull::from(child));
1198+
});
1199+
result
1200+
}
1201+
1202+
/// Pop edges from this object for cycle breaking.
1203+
/// Returns extracted child references that were removed from this object (tp_clear).
1204+
/// This is used during garbage collection to break circular references.
1205+
///
1206+
/// # Safety
1207+
/// - ptr must be a valid pointer to a PyObject
1208+
/// - The caller must have exclusive access (no other references exist)
1209+
/// - This is only safe during GC when the object is unreachable
1210+
pub unsafe fn gc_clear_raw(ptr: *mut PyObject) -> Vec<PyObjectRef> {
1211+
let mut result = Vec::new();
1212+
let obj = unsafe { &*ptr };
1213+
1214+
// 1. Clear payload-specific references (vtable.clear / tp_clear)
1215+
if let Some(clear_fn) = obj.0.vtable.clear {
1216+
unsafe { clear_fn(ptr, &mut result) };
1217+
}
1218+
1219+
// 2. Clear member slots (subtype_clear)
1220+
for slot in obj.0.slots.iter() {
1221+
if let Some(val) = slot.write().take() {
1222+
result.push(val);
1223+
}
1224+
}
1225+
1226+
result
1227+
}
1228+
1229+
/// Clear this object for cycle breaking (tp_clear).
1230+
/// This version takes &self but should only be called during GC
1231+
/// when exclusive access is guaranteed.
1232+
///
1233+
/// # Safety
1234+
/// - The caller must guarantee exclusive access (no other references exist)
1235+
/// - This is only safe during GC when the object is unreachable
1236+
pub unsafe fn gc_clear(&self) -> Vec<PyObjectRef> {
1237+
// SAFETY: During GC collection, this object is unreachable (gc_refs == 0),
1238+
// meaning no other code has a reference to it. The only references are
1239+
// internal cycle references which we're about to break.
1240+
unsafe { Self::gc_clear_raw(self as *const _ as *mut PyObject) }
1241+
}
1242+
1243+
/// Check if this object has clear capability (tp_clear)
1244+
// Py_TPFLAGS_HAVE_GC types have tp_clear
1245+
pub fn gc_has_clear(&self) -> bool {
1246+
self.0.vtable.clear.is_some() || self.0.dict.is_some() || !self.0.slots.is_empty()
1247+
}
11001248
}
11011249

11021250
impl Borrow<PyObject> for PyObjectRef {

0 commit comments

Comments
 (0)