Skip to content

Commit 7752521

Browse files
authored
Merge branch 'main' into suffix_prefix_axis
2 parents c710605 + cf88cc1 commit 7752521

File tree

9 files changed

+39
-20
lines changed

9 files changed

+39
-20
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,7 @@ Performance improvements
897897
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`)
898898
- Performance improvement in :meth:`.GroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`)
899899
- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`)
900+
- Performance improvement in :meth:`.GroupBy.var` with ``ddof`` other than one (:issue:`48152`)
900901
- Performance improvement in :meth:`DataFrame.to_records` when the index is a :class:`MultiIndex` (:issue:`47263`)
901902
- Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`)
902903
- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)

pandas/core/groupby/groupby.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,6 +1744,7 @@ def _cython_agg_general(
17441744
numeric_only: bool | lib.NoDefault,
17451745
min_count: int = -1,
17461746
ignore_failures: bool = True,
1747+
**kwargs,
17471748
):
17481749
# Note: we never get here with how="ohlc" for DataFrameGroupBy;
17491750
# that goes through SeriesGroupBy
@@ -1768,7 +1769,12 @@ def _cython_agg_general(
17681769
def array_func(values: ArrayLike) -> ArrayLike:
17691770
try:
17701771
result = self.grouper._cython_operation(
1771-
"aggregate", values, how, axis=data.ndim - 1, min_count=min_count
1772+
"aggregate",
1773+
values,
1774+
how,
1775+
axis=data.ndim - 1,
1776+
min_count=min_count,
1777+
**kwargs,
17721778
)
17731779
except NotImplementedError:
17741780
# generally if we have numeric_only=False
@@ -2311,20 +2317,13 @@ def var(
23112317

23122318
return self._numba_agg_general(sliding_var, engine_kwargs, ddof)
23132319
else:
2314-
numeric_only_bool = self._resolve_numeric_only("var", numeric_only, axis=0)
2315-
if ddof == 1:
2316-
return self._cython_agg_general(
2317-
"var",
2318-
alt=lambda x: Series(x).var(ddof=ddof),
2319-
numeric_only=numeric_only,
2320-
ignore_failures=numeric_only is lib.no_default,
2321-
)
2322-
else:
2323-
func = lambda x: x.var(ddof=ddof)
2324-
with self._group_selection_context():
2325-
return self._python_agg_general(
2326-
func, raise_on_typeerror=not numeric_only_bool
2327-
)
2320+
return self._cython_agg_general(
2321+
"var",
2322+
alt=lambda x: Series(x).var(ddof=ddof),
2323+
numeric_only=numeric_only,
2324+
ignore_failures=numeric_only is lib.no_default,
2325+
ddof=ddof,
2326+
)
23282327

23292328
@final
23302329
@Substitution(name="groupby")

pandas/core/groupby/ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def _call_cython_op(
600600
elif self.how == "ohlc":
601601
func(result, counts, values, comp_ids, min_count, mask, result_mask)
602602
else:
603-
func(result, counts, values, comp_ids, min_count)
603+
func(result, counts, values, comp_ids, min_count, **kwargs)
604604
else:
605605
# TODO: min_count
606606
if self.uses_mask():

pandas/tests/extension/test_string.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def test_argsort(self, data_for_sorting):
175175
PerformanceWarning,
176176
pa_version_under7p0
177177
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
178+
check_stacklevel=False,
178179
):
179180
super().test_argsort(data_for_sorting)
180181

@@ -183,6 +184,7 @@ def test_argsort_missing(self, data_missing_for_sorting):
183184
PerformanceWarning,
184185
pa_version_under7p0
185186
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
187+
check_stacklevel=False,
186188
):
187189
super().test_argsort_missing(data_missing_for_sorting)
188190

@@ -262,6 +264,7 @@ def test_argsort_missing_array(self, data_missing_for_sorting):
262264
PerformanceWarning,
263265
pa_version_under7p0
264266
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
267+
check_stacklevel=False,
265268
):
266269
super().test_argsort_missing(data_missing_for_sorting)
267270

@@ -278,6 +281,7 @@ def test_nargsort(self, data_missing_for_sorting, na_position, expected):
278281
PerformanceWarning,
279282
pa_version_under7p0
280283
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
284+
check_stacklevel=False,
281285
):
282286
super().test_nargsort(data_missing_for_sorting, na_position, expected)
283287

@@ -287,6 +291,7 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
287291
PerformanceWarning,
288292
pa_version_under7p0
289293
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
294+
check_stacklevel=False,
290295
):
291296
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
292297

@@ -298,6 +303,7 @@ def test_sort_values_missing(
298303
PerformanceWarning,
299304
pa_version_under7p0
300305
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
306+
check_stacklevel=False,
301307
):
302308
super().test_sort_values_missing(
303309
data_missing_for_sorting, ascending, sort_by_key
@@ -309,6 +315,7 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
309315
PerformanceWarning,
310316
pa_version_under7p0
311317
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
318+
check_stacklevel=False,
312319
):
313320
super().test_sort_values_frame(data_for_sorting, ascending)
314321

@@ -367,6 +374,7 @@ def test_groupby_extension_transform(self, data_for_grouping):
367374
PerformanceWarning,
368375
pa_version_under7p0
369376
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
377+
check_stacklevel=False,
370378
):
371379
super().test_groupby_extension_transform(data_for_grouping)
372380

pandas/tests/groupby/test_function.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,7 @@ def test_corrwith_with_1_axis():
15901590
tm.assert_series_equal(result, expected)
15911591

15921592

1593+
@pytest.mark.filterwarnings("ignore:The 'mad' method.*:FutureWarning")
15931594
def test_multiindex_group_all_columns_when_empty(groupby_func):
15941595
# GH 32464
15951596
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])

pandas/tests/groupby/test_groupby.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,7 @@ def test_groupby_duplicate_index():
23492349
tm.assert_series_equal(result, expected)
23502350

23512351

2352+
@pytest.mark.filterwarnings("ignore:.*is deprecated.*:FutureWarning")
23522353
def test_group_on_empty_multiindex(transformation_func, request):
23532354
# GH 47787
23542355
# With one row, those are transforms so the schema should be the same

pandas/tests/groupby/test_groupby_dropna.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,15 @@ def test_no_sort_keep_na(values, dtype, test_series):
439439
gb = df.groupby("key", dropna=False, sort=False)
440440
if test_series:
441441
gb = gb["a"]
442-
result = gb.sum()
443-
expected = pd.DataFrame({"a": [5, 2, 3]}, index=key[:-1].rename("key"))
442+
443+
warn = None
444+
if isinstance(values, pd.arrays.SparseArray):
445+
warn = FutureWarning
446+
msg = "passing a SparseArray to pd.Index will store that array directly"
447+
with tm.assert_produces_warning(warn, match=msg):
448+
result = gb.sum()
449+
expected = pd.DataFrame({"a": [5, 2, 3]}, index=key[:-1].rename("key"))
450+
444451
if test_series:
445452
expected = expected["a"]
446453
if expected.index.is_categorical():

pandas/tests/indexes/multi/test_setops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,4 +572,5 @@ def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
572572
mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
573573
mi_int = mi1.intersection(mi2)
574574

575-
assert mi_int.lexsort_depth == 0
575+
with tm.assert_produces_warning(FutureWarning, match="MultiIndex.lexsort_depth"):
576+
assert mi_int.lexsort_depth == 0

pandas/tests/io/test_stata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,8 @@ def test_big_dates(self, datapath):
817817
# {c : c[-2:] for c in columns}
818818
with tm.ensure_clean() as path:
819819
expected.index.name = "index"
820-
expected.to_stata(path, date_conversion)
820+
with tm.assert_produces_warning(FutureWarning, match="keyword-only"):
821+
expected.to_stata(path, date_conversion)
821822
written_and_read_again = self.read_dta(path)
822823
tm.assert_frame_equal(
823824
written_and_read_again.set_index("index"),

0 commit comments

Comments
 (0)