Skip to content
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
-
- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)

Missing
^^^^^^^
Expand Down
7 changes: 4 additions & 3 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1720,11 +1720,12 @@ def format_percentiles(
raise ValueError("percentiles should all be in the interval [0,1]")

percentiles = 100 * percentiles
percentiles_round_type = percentiles.round().astype(int)

int_idx = np.isclose(percentiles.astype(int), percentiles)
int_idx = np.isclose(percentiles_round_type, percentiles)

if np.all(int_idx):
out = percentiles.astype(int).astype(str)
out = percentiles_round_type.astype(str)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the example in the original issue hit this path or the one below? Just want to make sure we have 2 tests that didn't work before and hit this change and the one below to work correctly now

Copy link
Contributor Author

@weikhor weikhor Sep 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the example in the original issue hit this path or the one below? Just want to make sure we have 2 tests that didn't work before and hit this change and the one below to work correctly now

a) The example from issue will hit this path.

pd.DataFrame(data=[1, 2, 3], columns=['value']).describe(percentiles=[0.29, 0.57, 0.58])
if np.all(int_idx):
        out = percentiles_round_type.astype(str)
        return [i + "%" for i in out]

b) The example below I own create will hit below path.

pd.DataFrame(data=[1, 2, 3], columns=['value']).describe(percentiles=[0.291, 0.57, 0.58])
out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add the pd.DataFrame(data=[1, 2, 3], columns=['value']).describe(percentiles=[0.29, 0.57, 0.58]) case as a unit test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have add test

return [i + "%" for i in out]

unique_pcts = np.unique(percentiles)
Expand All @@ -1737,7 +1738,7 @@ def format_percentiles(
).astype(int)
prec = max(1, prec)
out = np.empty_like(percentiles, dtype=object)
out[int_idx] = percentiles[int_idx].astype(int).astype(str)
out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)

out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
return [i + "%" for i in out]
Expand Down
36 changes: 23 additions & 13 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3306,24 +3306,34 @@ def test_nat_representations(self):
assert f(NaT) == "NaT"


def test_format_percentiles():
result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
expected = ["1.999%", "2.001%", "50%", "66.667%", "99.99%"]
@pytest.mark.parametrize(
"percentiles, expected",
[
(
[0.01999, 0.02001, 0.5, 0.666666, 0.9999],
["1.999%", "2.001%", "50%", "66.667%", "99.99%"],
),
(
[0, 0.5, 0.02001, 0.5, 0.666666, 0.9999],
["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"],
),
([0.281, 0.29, 0.57, 0.58], ["28.1%", "29%", "57%", "58%"]),
([0.28, 0.29, 0.57, 0.58], ["28%", "29%", "57%", "58%"]),
],
)
def test_format_percentiles(percentiles, expected):
result = fmt.format_percentiles(percentiles)
assert result == expected

result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
expected = ["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"]
assert result == expected

@pytest.mark.parametrize(
"percentiles",
[([0.1, np.nan, 0.5]), ([-0.001, 0.1, 0.5]), ([2, 0.1, 0.5]), ([0.1, 0.5, "a"])],
)
def test_error_format_percentiles(percentiles):
msg = r"percentiles should all be in the interval \[0,1\]"
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([0.1, np.nan, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([-0.001, 0.1, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([2, 0.1, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([0.1, 0.5, "a"])
fmt.format_percentiles(percentiles)


def test_format_percentiles_integer_idx():
Expand Down