Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/api/next_api_changes/behavior/violinplot_empty.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Axes.violinplot and cbook.violin_stats ignore non-finite values
---------------------------------------------------------------

`~matplotlib.axes.Axes.violinplot` and `matplotlib.cbook.violin_stats` now ignore masked and non-finite (NaN and inf) values.
5 changes: 4 additions & 1 deletion lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8893,6 +8893,8 @@ def violinplot(self, dataset, positions=None, vert=None,
- sequence of 1D arrays: A violin is drawn for each array in the sequence.
- 2D array: A violin is drawn for each column in the array.

Non-finite and masked values are ignored.

positions : array-like, default: [1, 2, ..., n]
The positions of the violins; i.e. coordinates on the x-axis for
vertical violins (or y-axis for horizontal violins).
Expand Down Expand Up @@ -9264,7 +9266,8 @@ def cycle_color(color, alpha=None):
for stats, pos, width, facecolor in bodies_zip:
# The 0.5 factor reflects the fact that we plot from v-p to v+p.
vals = np.array(stats['vals'])
vals = 0.5 * width * vals / vals.max()
if len(vals) > 0:
vals = 0.5 * width * vals / vals.max()
bodies += [fill(stats['coords'],
-vals + pos if side in ['both', 'low'] else pos,
vals + pos if side in ['both', 'high'] else pos,
Expand Down
52 changes: 29 additions & 23 deletions lib/matplotlib/cbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,8 @@ def violin_stats(X, method=("GaussianKDE", "scott"), points=100, quantiles=None)
----------
X : 1D array or sequence of 1D arrays or 2D array
Sample data that will be used to produce the gaussian kernel density
estimates. Possible values:
estimates. Non-finite and masked values are ignored.
Possible values:

- 1D array: Statistics are computed for that array.
- sequence of 1D arrays: Statistics are computed for each array in the sequence.
Expand Down Expand Up @@ -1586,29 +1587,34 @@ def _kde_method(x, coords):
" must have the same length")

# Zip x and quantiles
for (x, q) in zip(X, quantiles):
# Dictionary of results for this distribution
stats = {}

# Calculate basic stats for the distribution
min_val = np.min(x)
max_val = np.max(x)
quantile_val = np.percentile(x, 100 * q)
for (x, quantile) in zip(X, quantiles):
x = np.asarray(x)
x, = delete_masked_points(x)

# Evaluate the kernel density estimate
coords = np.linspace(min_val, max_val, points)
stats['vals'] = method(x, coords)
stats['coords'] = coords

# Store additional statistics for this distribution
stats['mean'] = np.mean(x)
stats['median'] = np.median(x)
stats['min'] = min_val
stats['max'] = max_val
stats['quantiles'] = np.atleast_1d(quantile_val)

# Append to output
vpstats.append(stats)
if len(x) == 0:
vpstats.append({
'vals': np.array([]),
'coords': np.array([]),
'mean': np.nan,
'median': np.nan,
'min': np.nan,
'max': np.nan,
'quantiles': np.array([]),
})
else:
min_val = np.min(x)
max_val = np.max(x)
coords = np.linspace(min_val, max_val, points)

vpstats.append({
'vals': method(x, coords),
'coords': coords,
'mean': np.mean(x),
'median': np.median(x),
'min': min_val,
'max': max_val,
'quantiles': np.atleast_1d(np.percentile(x, 100 * quantile))
})

return vpstats

Expand Down
7 changes: 7 additions & 0 deletions lib/matplotlib/tests/test_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10360,3 +10360,10 @@ def test_errorbar_uses_rcparams():
assert_allclose([cap.get_markeredgewidth() for cap in caplines], 2.5)
for barcol in barlinecols:
assert_allclose(barcol.get_linewidths(), 1.75)


def test_violinplot_empty_dataset():
fig, ax = plt.subplots()
# This should not raise an exception
parts = ax.violinplot([np.random.randn(100), [], [np.nan, np.nan]])
assert len(parts["bodies"]) == 3
Loading