Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
27207a1
ENH: New-style ArrayMethod object sorting with NaN handling
MaanasArora May 14, 2026
6989dbc
BUG: Add sentinel guard for out-of-bound argsorts
MaanasArora May 14, 2026
cd542e2
BUG: Correct return value on rich compare error
MaanasArora May 14, 2026
ae05df2
REF: Rewrite object tag comparisons
MaanasArora May 14, 2026
650dcd6
REF: New `_cmp` function to reuse in object `less`, `greater`, -1 for…
MaanasArora May 15, 2026
87bb30a
ENH: Error handling and early exit in sorts using NPY_CMP macro
MaanasArora May 15, 2026
9255027
Revert "ENH: Error handling and early exit in sorts using NPY_CMP macro"
MaanasArora May 15, 2026
90d0857
ENH: Handle errors from `cmp` in sort functions
MaanasArora May 16, 2026
72aed9d
BUG: Switch NULLs to None in object comparison null handling
MaanasArora May 18, 2026
4865d3a
REF: Simplify object comparison handling in quicksort and aquicksort …
MaanasArora May 18, 2026
ff962da
BENCH: Add object dtype to sort benchmarks
MaanasArora May 25, 2026
2ddcb7d
REF: Fix indentation
MaanasArora May 25, 2026
c81cb08
BUG: Fix swapped parametrizations
MaanasArora Jun 5, 2026
707143a
ENH: Add `greater_equal` to simple dtypes and use in timsort to avoid…
MaanasArora Jun 5, 2026
d8a7577
ENH: Fix use of `Py_LE/GE` and instead invert op for object sort comp…
MaanasArora Jun 8, 2026
a84f7b6
ENH: Optimize `less_equal` and `greater_equal` for `npy_half` type
MaanasArora Jun 8, 2026
6d83c91
DOC: Add release note
MaanasArora Jun 8, 2026
2c2ac85
Revert "ENH: Optimize `less_equal` and `greater_equal` for `npy_half`…
MaanasArora Jun 8, 2026
a99ba57
Revert "ENH: Fix use of `Py_LE/GE` and instead invert op for object s…
MaanasArora Jun 8, 2026
f2133e2
Revert "ENH: Add `greater_equal` to simple dtypes and use in timsort …
MaanasArora Jun 8, 2026
0fc1796
DOC: Clarify release note
MaanasArora Jun 8, 2026
792194f
TST: Fix casting of object arrays in sorting nan test
MaanasArora Jun 8, 2026
efe1704
BUG: Fix comparison logic in heapsort and aheapsort implementations
MaanasArora Jun 8, 2026
cea79da
STYLE: Remove unnecessary blank line
MaanasArora Jun 8, 2026
c279e21
DOC: Update release notes to clarify object array sorting behavior wi…
MaanasArora Jun 8, 2026
3a4f554
Remove presumably unnecessary with errstate and small tweak
seberg Jun 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions benchmarks/benchmarks/bench_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ class Sort(Benchmark):
'uint8',
'int8',
'bool',
'object',
],
[
('random',),
Expand Down Expand Up @@ -274,14 +275,24 @@ def time_sort(self, stable, descending, dtype, array_type):
# This is important because the data is prepared once per benchmark, but
# used across multiple runs.
if descending:
np.sort(self.arr, stable=stable, descending=True)
try:
np.sort(self.arr, stable=stable, descending=True)
except TypeError:
raise SkipNotImplemented(
f"Descending sort is not supported for {dtype}"
)
else:
# for backward compatibility to NumPy 2.0
np.sort(self.arr, stable=stable)

def time_argsort(self, stable, descending, dtype, array_type):
if descending:
np.argsort(self.arr, stable=stable, descending=True)
try:
np.argsort(self.arr, stable=stable, descending=True)
except TypeError:
raise SkipNotImplemented(
f"Descending argsort is not supported for {dtype}"
)
else:
# for backward compatibility to NumPy 2.0
np.argsort(self.arr, stable=stable)
Expand Down
7 changes: 7 additions & 0 deletions doc/release/upcoming_changes/31431.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Object array sorting supports ``descending=True`` and consistently sorts NaN-like objects
-----------------------------------------------------------------------------------------
`np.sort` and `np.argsort` with arrays of dtype ``object``
now support passing `descending=True` to sort in descending order. Objects that
compare as not equal to themselves (``obj != obj``), such as NaN-like objects,
are considered unordered and are sorted to the end of the array, regardless of
the value of ``descending``.
68 changes: 68 additions & 0 deletions numpy/_core/src/common/numpy_tag.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,74 @@ struct string_like_type {
}
};

// This tag is used to register object sorts, which replaces the old generic sort
// that did not handle NaNs at all. It supposes that any object such that
// obj != obj is NaN-like and should be sorted to the end as in other dtypes.
struct object_tag {
using type = PyObject *;
static constexpr NPY_TYPES type_value = NPY_OBJECT;

static int isnan(PyObject *a) {
/* PyObject_RichCompareBool is not used here because it takes a shortcut
* for identical objects, hence will return false for NaN != NaN. */
PyObject *result = PyObject_RichCompare(a, a, Py_NE);
if (result == NULL) {
return -1;
}
int ret = PyObject_IsTrue(result);
Py_DECREF(result);
return ret;
}

static int _cmp(PyObject *a, PyObject *b, int op)
{
if (a == NULL) {
a = Py_None;
}
if (b == NULL) {
b = Py_None;
}
Comment thread
seberg marked this conversation as resolved.

int ret = PyObject_RichCompareBool(a, b, op);
if (ret < 0) {
return -1;
}
if (ret) {
return 1;
}

ret = isnan(a);
if (ret < 0) {
return -1;
}
if (ret) {
return 0;
}
Comment thread
seberg marked this conversation as resolved.

ret = isnan(b);
if (ret < 0) {
return -1;
}
if (ret) {
return 1;
}

return 0;
}

static int less(PyObject *a, PyObject *b) {
return _cmp(a, b, Py_LT);
}

static int less_equal(PyObject *a, PyObject *b) {
return !less(b, a);
}

static int greater(PyObject *a, PyObject *b) {
return _cmp(a, b, Py_GT);
}
};

// Concrete tags consumed by callers.
using bool_tag = integral_type<npy_bool, NPY_BOOL>;
using byte_tag = integral_type<npy_byte, NPY_BYTE>;
Expand Down
46 changes: 34 additions & 12 deletions numpy/_core/src/npysort/npysort_heapsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,23 @@ int heapsort_(type *start, npy_intp n)
{
type tmp, *a;
npy_intp i, j, l;
int ret;

/* The array needs to be offset by one for heapsort indexing */
a = start - 1;

for (l = n >> 1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l << 1; j <= n;) {
if (j < n && npy::cmp<Tag, reverse>(a[j], a[j + 1])) {
j += 1;
if (j < n) {
ret = npy::cmp<Tag, reverse>(a[j], a[j + 1]);
if (ret < 0) return ret;
if (ret) { j += 1; }
}
if (npy::cmp<Tag, reverse>(tmp, a[j])) {

ret = npy::cmp<Tag, reverse>(tmp, a[j]);
if (ret < 0) return ret;
if (ret) {
a[i] = a[j];
i = j;
j += j;
Expand All @@ -64,10 +70,15 @@ int heapsort_(type *start, npy_intp n)
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && npy::cmp<Tag, reverse>(a[j], a[j + 1])) {
j++;
if (j < n) {
ret = npy::cmp<Tag, reverse>(a[j], a[j + 1]);
if (ret < 0) return ret;
if (ret) { j++; }
}
if (npy::cmp<Tag, reverse>(tmp, a[j])) {

ret = npy::cmp<Tag, reverse>(tmp, a[j]);
if (ret < 0) return ret;
if (ret) {
a[i] = a[j];
i = j;
j += j;
Expand Down Expand Up @@ -96,16 +107,22 @@ int aheapsort_(type *vv, npy_intp *tosort, npy_intp n)
{
type *v = vv;
npy_intp *a, i, j, l, tmp;
int ret;
/* The arrays need to be offset by one for heapsort indexing */
a = tosort - 1;

for (l = n >> 1; l > 0; --l) {
tmp = a[l];
for (i = l, j = l << 1; j <= n;) {
if (j < n && npy::cmp<Tag, reverse>(v[a[j]], v[a[j + 1]])) {
j += 1;
if (j < n) {
ret = npy::cmp<Tag, reverse>(v[a[j]], v[a[j + 1]]);
if (ret < 0) return ret;
if (ret) { j += 1; }
}
if (npy::cmp<Tag, reverse>(v[tmp], v[a[j]])) {

ret = npy::cmp<Tag, reverse>(v[tmp], v[a[j]]);
if (ret < 0) return ret;
if (ret) {
a[i] = a[j];
i = j;
j += j;
Expand All @@ -122,10 +139,15 @@ int aheapsort_(type *vv, npy_intp *tosort, npy_intp n)
a[n] = a[1];
n -= 1;
for (i = 1, j = 2; j <= n;) {
if (j < n && npy::cmp<Tag, reverse>(v[a[j]], v[a[j + 1]])) {
j++;
if (j < n) {
ret = npy::cmp<Tag, reverse>(v[a[j]], v[a[j + 1]]);
if (ret < 0) return ret;
if (ret) { j++; }
}
if (npy::cmp<Tag, reverse>(v[tmp], v[a[j]])) {

ret = npy::cmp<Tag, reverse>(v[tmp], v[a[j]]);
if (ret < 0) return ret;
if (ret) {
a[i] = a[j];
i = j;
j += j;
Expand Down
11 changes: 9 additions & 2 deletions numpy/_core/src/npysort/npysort_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@ make_sorts_(PyArray_DTypeMeta *dtypemeta, const char *name)
NPY_DT_SLOTS(dtypemeta)->f.argsort[2] = atimsort_impl<Tag, type>;
}

NPY_ARRAYMETHOD_FLAGS meth_flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
if constexpr (std::is_same_v<Tag, npy::object_tag>) {
// lock the GIL for object sorts
meth_flags = (NPY_ARRAYMETHOD_FLAGS)(meth_flags | NPY_METH_REQUIRES_PYAPI);
}

std::string sort_name = std::string(name) + "_sort";
PyArray_DTypeMeta *sort_dtypes[2] = {dtypemeta, dtypemeta};
PyType_Slot sort_slots[3] = {
Expand All @@ -239,7 +245,7 @@ make_sorts_(PyArray_DTypeMeta *dtypemeta, const char *name)
1,
1,
NPY_NO_CASTING,
NPY_METH_NO_FLOATINGPOINT_ERRORS,
meth_flags,
sort_dtypes,
sort_slots,
};
Expand All @@ -263,7 +269,7 @@ make_sorts_(PyArray_DTypeMeta *dtypemeta, const char *name)
1,
1,
NPY_NO_CASTING,
NPY_METH_NO_FLOATINGPOINT_ERRORS,
meth_flags,
argsort_dtypes,
argsort_slots,
};
Expand Down Expand Up @@ -364,6 +370,7 @@ int register_all_sorts() {
r += make_string_sorts_<npy::string_tag>(&PyArray_BytesDType, "string");
r += make_string_sorts_<npy::unicode_tag>(&PyArray_UnicodeDType, "unicode");
r += make_sorts_<npy::half_tag>(&PyArray_HalfDType, "half");
r += make_sorts_<npy::object_tag>(&PyArray_ObjectDType, "object");

return r;
}
74 changes: 60 additions & 14 deletions numpy/_core/src/npysort/quicksort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,33 @@ quicksort_(type *start, npy_intp num)
int depth[PYA_QS_STACK];
int *psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
int ret;
constexpr bool is_object = std::is_same_v<Tag, npy::object_tag>;

for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_<Tag, type, reverse>(pl, pr - pl + 1);
ret = heapsort_<Tag, type, reverse>(pl, pr - pl + 1);
if (NPY_UNLIKELY(ret < 0)) {
return ret;
}
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (npy::cmp<Tag, reverse>(*pm, *pl)) {
ret = npy::cmp<Tag, reverse>(*pm, *pl);
if (ret < 0) return ret;
if (ret) {
std::swap(*pm, *pl);
}
if (npy::cmp<Tag, reverse>(*pr, *pm)) {
ret = npy::cmp<Tag, reverse>(*pr, *pm);
if (ret < 0) return ret;
if (ret) {
std::swap(*pr, *pm);
}
if (npy::cmp<Tag, reverse>(*pm, *pl)) {
ret = npy::cmp<Tag, reverse>(*pm, *pl);
if (ret < 0) return ret;
if (ret) {
std::swap(*pm, *pl);
}
vp = *pm;
Expand All @@ -186,10 +197,16 @@ quicksort_(type *start, npy_intp num)
for (;;) {
do {
++pi;
} while (npy::cmp<Tag, reverse>(*pi, vp));

ret = npy::cmp<Tag, reverse>(*pi, vp);
if (ret < 0) return ret;
} while ((!is_object || pi < pj) && ret);
do {
--pj;
} while (npy::cmp<Tag, reverse>(vp, *pj));

ret = npy::cmp<Tag, reverse>(vp, *pj);
if (ret < 0) return ret;
} while ((!is_object || pi < pj) && ret);
if (pi >= pj) {
break;
}
Expand All @@ -216,8 +233,16 @@ quicksort_(type *start, npy_intp num)
vp = *pi;
pj = pi;
pk = pi - 1;
while (pj > pl && npy::cmp<Tag, reverse>(vp, *pk)) {

ret = npy::cmp<Tag, reverse>(vp, *pk);
if (ret < 0) return ret;
while (pj > pl && ret) {
*pj-- = *pk--;

if (pj > pl) {
ret = npy::cmp<Tag, reverse>(vp, *pk);
if (ret < 0) return ret;
}
}
*pj = vp;
}
Expand Down Expand Up @@ -259,22 +284,31 @@ aquicksort_(type *vv, npy_intp *tosort, npy_intp num)
int depth[PYA_QS_STACK];
int *psdepth = depth;
int cdepth = npy_get_msb(num) * 2;
int ret;
constexpr bool is_object = std::is_same_v<Tag, npy::object_tag>;

for (;;) {
if (NPY_UNLIKELY(cdepth < 0)) {
aheapsort_<Tag, type, reverse>(vv, pl, pr - pl + 1);
ret = aheapsort_<Tag, type, reverse>(vv, pl, pr - pl + 1);
if (ret < 0) return ret;
goto stack_pop;
}
while ((pr - pl) > SMALL_QUICKSORT) {
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (npy::cmp<Tag, reverse>(v[*pm], v[*pl])) {
ret = npy::cmp<Tag, reverse>(v[*pm], v[*pl]);
if (ret < 0) return ret;
if (ret) {
std::swap(*pm, *pl);
}
if (npy::cmp<Tag, reverse>(v[*pr], v[*pm])) {
ret = npy::cmp<Tag, reverse>(v[*pr], v[*pm]);
if (ret < 0) return ret;
if (ret) {
std::swap(*pr, *pm);
}
if (npy::cmp<Tag, reverse>(v[*pm], v[*pl])) {
ret = npy::cmp<Tag, reverse>(v[*pm], v[*pl]);
if (ret < 0) return ret;
if (ret) {
std::swap(*pm, *pl);
}
vp = v[*pm];
Expand All @@ -284,10 +318,14 @@ aquicksort_(type *vv, npy_intp *tosort, npy_intp num)
for (;;) {
do {
++pi;
} while (npy::cmp<Tag, reverse>(v[*pi], vp));
ret = npy::cmp<Tag, reverse>(v[*pi], vp);
if (ret < 0) return ret;
} while ((!is_object || pi < pj) && ret);
do {
--pj;
} while (npy::cmp<Tag, reverse>(vp, v[*pj]));
ret = npy::cmp<Tag, reverse>(vp, v[*pj]);
if (ret < 0) return ret;
} while ((!is_object || pi < pj) && ret);
if (pi >= pj) {
break;
}
Expand Down Expand Up @@ -315,8 +353,16 @@ aquicksort_(type *vv, npy_intp *tosort, npy_intp num)
vp = v[vi];
pj = pi;
pk = pi - 1;
while (pj > pl && npy::cmp<Tag, reverse>(vp, v[*pk])) {

ret = npy::cmp<Tag, reverse>(vp, v[*pk]);
if (ret < 0) return ret;
while (pj > pl && ret) {
*pj-- = *pk--;

if (pj > pl) {
ret = npy::cmp<Tag, reverse>(vp, v[*pk]);
if (ret < 0) return ret;
}
}
*pj = vi;
}
Expand Down
Loading
Loading