Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/capi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ crate-type = ["cdylib", "rlib"]

[dependencies]
bitflags = { workspace = true }
itertools = { workspace = true }
num-complex = { workspace = true }
rustpython-vm = { workspace = true, features = ["threading", "compiler"] }
rustpython-stdlib = {workspace = true, features = ["threading"] }
Expand Down
1 change: 1 addition & 0 deletions crates/capi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub mod pyerrors;
pub mod pylifecycle;
pub mod pystate;
pub mod refcount;
pub mod setobject;
pub mod traceback;
pub mod tupleobject;
pub mod unicodeobject;
Expand Down
162 changes: 162 additions & 0 deletions crates/capi/src/setobject.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
use crate::PyObject;
use crate::object::define_py_check;
use crate::pystate::with_vm;
use core::ffi::c_int;
use itertools::process_results;
use rustpython_vm::AsObject;
use rustpython_vm::PyPayload;
use rustpython_vm::TryFromObject;
use rustpython_vm::builtins::{PyFrozenSet, PySet};
use rustpython_vm::function::ArgIterable;

define_py_check!(fn PySet_Check, types.set_type);
define_py_check!(fn PyFrozenSet_Check, types.frozenset_type);

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_New(iterable: *mut PyObject) -> *mut PyObject {
with_vm(|vm| {
if iterable.is_null() {
return Ok(PySet::default().into_ref(&vm.ctx));
}

let iterable = ArgIterable::try_from_object(vm, unsafe { &*iterable }.to_owned())?;
let set = PySet::default().into_ref(&vm.ctx);
for item in iterable.iter(vm)? {
set.add(item?, vm)?;
}
Ok(set)
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyFrozenSet_New(iterable: *mut PyObject) -> *mut PyObject {
with_vm(|vm| {
if iterable.is_null() {
return Ok(vm.ctx.empty_frozenset.to_owned());
}

let iterable = ArgIterable::try_from_object(vm, unsafe { &*iterable }.to_owned())?;
let set = process_results(iterable.iter(vm)?, |it| PyFrozenSet::from_iter(vm, it))??;
Ok(set.into_ref(&vm.ctx))
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Add(set: *mut PyObject, key: *mut PyObject) -> c_int {
with_vm(|vm| {
let set = unsafe { &*set }.try_downcast_ref::<PySet>(vm)?;
let key = unsafe { &*key }.to_owned();
set.add(key, vm)
})
Comment thread
bschoenmaeckers marked this conversation as resolved.
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Clear(set: *mut PyObject) -> c_int {
with_vm(|vm| {
let set = unsafe { &*set }.try_downcast_ref::<PySet>(vm)?;
set.clear();
Ok(())
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Contains(anyset: *mut PyObject, key: *mut PyObject) -> c_int {
with_vm(|vm| {
let anyset = unsafe { &*anyset };
let key = unsafe { &*key };

if let Some(set) = anyset.downcast_ref::<PySet>() {
set.__contains__(key, vm)
} else if let Some(frozenset) = anyset.downcast_ref::<PyFrozenSet>() {
frozenset.__contains__(key, vm)
} else {
Err(vm.new_type_error(format!(
"expected set or frozenset, got '{}'",
anyset.class().name()
)))
}
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Discard(set: *mut PyObject, key: *mut PyObject) -> c_int {
with_vm(|vm| {
let set = unsafe { &*set }.try_downcast_ref::<PySet>(vm)?;
let key = unsafe { &*key };
let had_item = set.__contains__(key, vm)?;
if had_item {
set.discard(key.to_owned(), vm)?;
}
Ok(had_item)
Comment on lines +82 to +90

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🌐 Web query:

CPython C API documentation for PySet_Discard: what are its return-value semantics, and is it defined as a single discard operation rather than a separate contains check followed by discard?

💡 Result:

PySet_Discard returns 1 if the element was found and removed, 0 if the element was not found (no action taken), and -1 if an error is encountered [1][2]. It does not raise a KeyError if the element is missing [1][2]. Regarding your second question, PySet_Discard is implemented as a single, atomic-like lookup-and-discard operation rather than separate "contains" and "discard" checks [3][4][5]. The underlying C implementation uses a function (typically named set_discard_entry in the CPython source) that performs a hash table lookup using the provided key [3][5]. If the key is found, it performs the removal in the same pass by updating the entry (replacing the key with a dummy tombstone, updating the set's size, and decrementing the reference count) [3][4][5]. This single-pass approach is more efficient and avoids the redundancy of performing two distinct lookups [5].

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# 1) Show the exact current implementation around the referenced lines
sed -n '60,120p' crates/capi/src/setobject.rs | nl -ba

# 2) Find the definition and any related helpers
rg -n "PySet_Discard|__contains__|fn discard|discard\(" crates/capi/src/setobject.rs crates/capi/src -S || true

# 3) Search for a bool-returning discard helper in the broader crate
rg -n "discard.*->.*bool|-> *Result<.*bool|discard.*bool|contains.*discard" crates/capi/src -S || true

# 4) Run clippy as requested by guidelines (may be slow; best-effort)
cargo clippy -q || true

Repository: RustPython/RustPython

Length of output: 108


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== setobject.rs around PySet_Discard =="
awk 'NR>=60 && NR<=120 {printf "%5d %s\n", NR, $0}' crates/capi/src/setobject.rs

echo
echo "== occurrences in crates/capi/src =="
rg -n "PySet_Discard|__contains__|\\bPySetInner\\b|\\bPySet\\b|fn +discard\\b|discard\\(" crates/capi/src -S

echo
echo "== look for discard helpers returning bool (best-effort) =="
rg -n "discard.*->.*bool|Result<.*bool>|fn +discard\\b" crates/capi/src -S

echo
echo "== best-effort cargo clippy (per guidelines) =="
cargo clippy -q || echo "cargo clippy failed (toolchain/build error in sandbox)"

Repository: RustPython/RustPython

Length of output: 4212


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Find PySet impl block / method definitions
rg -n "impl +PySet|struct +PySet|pub +fn +discard\\b|fn +discard\\b|__contains__\\b" crates -S | head -n 200

echo "----"
# Narrow to files likely defining PySet methods: search for "__contains__" method on PySet
rg -n "PySet.*__contains__|__contains__\\(.*vm\\)" crates -S | head -n 200

echo "----"
# Search for discard method returning Result/Option/boolean
rg -n "PySet.*discard\\b|fn +discard\\b.*->|discard\\b.*->" crates -S | head -n 200

echo "----"
echo "== best-effort cargo clippy =="
cargo clippy -q

Repository: RustPython/RustPython

Length of output: 11361


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Find PySet impl block / method definitions
rg -n "impl +PySet|struct +PySet|pub +fn +discard\\b|fn +discard\\b|__contains__\\b" crates -S | head -n 200

echo "----"
# Narrow to files likely defining PySet methods: search for "__contains__" method on PySet
rg -n "PySet.*__contains__|__contains__\\(.*vm\\)" crates -S | head -n 200

echo "----"
# Search for discard method returning Result/Option/boolean
rg -n "PySet.*discard\\b|fn +discard\\b.*->|discard\\b.*->" crates -S | head -n 200

echo "----"
echo "== best-effort cargo clippy =="
cargo clippy -q

Repository: RustPython/RustPython

Length of output: 11633


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== locate PySet methods / impls =="
rg -n "impl +PySet|struct +PySet|__contains__\\b|\\bdiscard\\b" crates -S | head -n 200

echo
echo "== locate __contains__ specifically =="
rg -n "PySet.*__contains__|__contains__\\(.*vm\\)" crates -S | head -n 200

echo
echo "== locate discard signatures =="
rg -n "PySet.*discard\\b|fn +discard\\b.*->|\\bdiscard\\b.*->" crates -S | head -n 200

echo
echo "== cargo clippy (best-effort) =="
cargo clippy -q || echo "cargo clippy failed (toolchain/build error in sandbox)"

Repository: RustPython/RustPython

Length of output: 13841


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Print relevant sections of PySet implementation in RustPython
echo "== crates/vm/src/builtins/set.rs: PySetInner::discard (around 320-360) =="
sed -n '300,380p' crates/vm/src/builtins/set.rs | nl -ba || true

echo
echo "== crates/vm/src/builtins/set.rs: PySet::discard (around 640-710) =="
sed -n '640,720p' crates/vm/src/builtins/set.rs | nl -ba || true

echo
echo "== crates/vm/src/builtins/set.rs: PySet::__contains__ (search + print nearby) =="
rg -n "pub fn __contains__\\b|fn __contains__\\b" crates/vm/src/builtins/set.rs
# Print a small window around each match (best-effort)
for ln in $(rg -n "pub fn __contains__\\b|fn __contains__\\b" crates/vm/src/builtins/set.rs | cut -d: -f1 | head -n 20); do
  echo
  echo "--- around line $ln ---"
  sed -n "$((ln-30)),$((ln+30))p" crates/vm/src/builtins/set.rs | head -n 120
done

echo
echo "== search for any public discard returning bool =="
rg -n "pub +fn +discard\\b.*bool|pub\\(crate\\)? +fn +discard\\b.*bool" crates/vm/src/builtins/set.rs

Repository: RustPython/RustPython

Length of output: 3946


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="crates/vm/src/builtins/set.rs"

echo "== PySetInner::discard around ~320-370 =="
sed -n '300,380p' "$FILE"

echo
echo "== PySet::discard around ~650-710 =="
sed -n '650,720p' "$FILE"

echo
echo "== PySetInner::contains around wherever it is (search) =="
rg -n "impl PySetInner|fn contains\\b|PySetInner.*contains\\b" "$FILE" | head -n 80

echo
echo "== show body around first PySetInner::contains match (best-effort) =="
ln="$(rg -n "fn contains\\b" "$FILE" | head -n 1 | cut -d: -f1)"
if [ -n "$ln" ]; then
  start=$((ln-40))
  end=$((ln+60))
  sed -n "${start},${end}p" "$FILE"
fi

Repository: RustPython/RustPython

Length of output: 7814


Avoid the extra membership probe in PySet_Discard.

Current PySet_Discard does __contains__ and then (conditionally) calls discard, repeating the set lookup and any __hash__/__eq__ side effects; CPython’s PySet_Discard performs lookup+removal in one operation and returns 1/0 accordingly. Add a bool-returning discard helper on PySet (wrapping the existing PySetInner::discard(...)->PyResult<bool>) and call it once here.

pub unsafe extern "C" fn PySet_Discard(set: *mut PyObject, key: *mut PyObject) -> c_int {
    with_vm(|vm| {
        let set = unsafe { &*set }.try_downcast_ref::<PySet>(vm)?;
        let key = unsafe { &*key };
        let had_item = set.__contains__(key, vm)?;
        if had_item {
            set.discard(key.to_owned(), vm)?;
        }
        Ok(had_item)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/capi/src/setobject.rs` around lines 82 - 90, Replace the current
two-step membership + removal in PySet_Discard with a single discard call that
returns whether an item was removed: add a bool-returning helper method on PySet
(e.g., PySet::discard_returning_bool) that wraps the existing
PySetInner::discard(...)->PyResult<bool>, then update PySet_Discard to call that
helper once and return its boolean result as c_int; keep using the existing
try_downcast_ref::<PySet> and the same key handling but remove the prior
__contains__ probe so lookup+removal happen in one operation.

})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Pop(set: *mut PyObject) -> *mut PyObject {
with_vm(|vm| {
let set = unsafe { &*set }.try_downcast_ref::<PySet>(vm)?;
set.pop(vm)
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PySet_Size(anyset: *mut PyObject) -> isize {
with_vm(|vm| {
let anyset = unsafe { &*anyset };
if let Some(set) = anyset.downcast_ref::<PySet>() {
set.as_object().length(vm)
} else if let Some(frozenset) = anyset.downcast_ref::<PyFrozenSet>() {
frozenset.as_object().length(vm)
} else {
Err(vm.new_type_error(format!(
"expected set or frozenset, got '{}'",
anyset.class().name()
)))
}
})
}

#[cfg(false)]
mod tests {
use pyo3::prelude::*;
use pyo3::types::{PyFrozenSet, PyInt, PySet};

#[test]
fn new_and_size() {
Python::attach(|py| {
let set = PySet::empty(py).unwrap();
assert!(set.is_instance_of::<PySet>());
assert_eq!(set.len(), 0);

let frozen = PyFrozenSet::empty(py).unwrap();
assert!(frozen.is_instance_of::<PyFrozenSet>());
assert_eq!(frozen.len(), 0);
})
}

#[test]
fn add_contains_discard() {
Python::attach(|py| {
let set = PySet::empty(py).unwrap();
let item = PyInt::new(py, 42);

set.add(&item).unwrap();
assert!(set.contains(&item).unwrap());
set.discard(&item).unwrap();
assert!(!set.contains(&item).unwrap());
})
}

#[test]
fn pop_reduces_size() {
Python::attach(|py| {
let set = PySet::empty(py).unwrap();
set.add(7).unwrap();
assert_eq!(set.len(), 1);

let popped = set.pop().unwrap();
assert_eq!(popped.extract::<i32>().unwrap(), 7);
assert_eq!(set.len(), 0);
})
}
}
10 changes: 5 additions & 5 deletions crates/vm/src/builtins/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ impl PySet {
self.inner.len()
}

fn __contains__(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
pub fn __contains__(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
self.inner.contains(needle, vm)
}

Expand Down Expand Up @@ -679,17 +679,17 @@ impl PySet {
}

#[pymethod]
fn discard(&self, item: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
pub fn discard(&self, item: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
self.inner.discard(&item, vm).map(|_| ())
}

#[pymethod]
fn clear(&self) {
pub fn clear(&self) {
self.inner.clear()
}

#[pymethod]
fn pop(&self, vm: &VirtualMachine) -> PyResult {
pub fn pop(&self, vm: &VirtualMachine) -> PyResult {
self.inner.pop(vm)
}

Expand Down Expand Up @@ -995,7 +995,7 @@ impl PyFrozenSet {
self.inner.len()
}

fn __contains__(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
pub fn __contains__(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
self.inner.contains(needle, vm)
}

Expand Down
Loading