-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstatistics.hpp
More file actions
185 lines (164 loc) · 5.7 KB
/
statistics.hpp
File metadata and controls
185 lines (164 loc) · 5.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#pragma once
#include <span>
#include <string>
#include <vector>
namespace gnuplotpp {
/**
* @brief Evaluate Gaussian KDE on a provided grid.
* @param samples Input sample values.
* @param x_grid Query points where density is evaluated.
* @param bandwidth Kernel bandwidth; when <=0, Silverman's rule is used.
* @return Density values aligned with x_grid.
*/
std::vector<double> gaussian_kde(std::span<const double> samples,
std::span<const double> x_grid,
double bandwidth = -1.0);
/**
* @brief Compute empirical CDF points (x sorted, p in [0,1]).
* @param samples Input values.
* @param x_sorted Output sorted x values.
* @param p Output cumulative probability values.
*/
void ecdf(std::span<const double> samples,
std::vector<double>& x_sorted,
std::vector<double>& p);
/**
* @brief Compute percentile band across an ensemble matrix.
* @param ensemble Row-major series vectors with equal length.
* @param p_low Lower percentile in [0,1].
* @param p_high Upper percentile in [0,1].
* @param low Output lower band.
* @param high Output upper band.
*/
void percentile_band(const std::vector<std::vector<double>>& ensemble,
double p_low,
double p_high,
std::vector<double>& low,
std::vector<double>& high);
/**
* @brief Build multiple percentile ribbons for fan-chart visualization.
* @param ensemble Row-major series vectors with equal length.
* @param quantiles Ascending quantiles in (0,1), e.g. {0.1,0.25,0.75,0.9}.
* @param lows Output low bands matching each ribbon.
* @param highs Output high bands matching each ribbon.
*/
void fan_chart_bands(const std::vector<std::vector<double>>& ensemble,
const std::vector<double>& quantiles,
std::vector<std::vector<double>>& lows,
std::vector<std::vector<double>>& highs);
/**
* @brief Approximate violin density profile.
* @param samples Input values.
* @param y_grid Output y positions.
* @param half_width Output normalized half-width density [0,1].
* @param points Number of y-grid points.
*/
void violin_profile(std::span<const double> samples,
std::vector<double>& y_grid,
std::vector<double>& half_width,
std::size_t points = 120);
/**
* @brief Simple moving average.
* @param y Input signal.
* @param window Window length.
* @return Smoothed signal.
*/
std::vector<double> moving_average(std::span<const double> y, std::size_t window);
/**
* @brief Uniform downsample by keeping every k-th point.
* @param y Input signal.
* @param k Stride (>=1).
* @return Downsampled signal.
*/
std::vector<double> downsample_uniform(std::span<const double> y, std::size_t k);
/**
* @brief Auto-correlation for lags [0, max_lag].
* @param y Input signal.
* @param max_lag Maximum lag.
* @return Correlation values.
*/
std::vector<double> autocorrelation(std::span<const double> y, std::size_t max_lag);
/**
* @brief Compute Q-Q plot points against a normal distribution.
* @param samples Input samples.
* @param theo Quantile positions of reference normal.
* @param samp Sorted sample quantiles.
*/
void qq_plot_normal(std::span<const double> samples,
std::vector<double>& theo,
std::vector<double>& samp);
/**
* @brief Five-number summary for boxplot style rendering.
*/
struct BoxSummary {
double q1 = 0.0;
double median = 0.0;
double q3 = 0.0;
double whisker_low = 0.0;
double whisker_high = 0.0;
};
/** @brief Linear least-squares fit summary. */
struct LinearFitResult {
double slope = 0.0;
double intercept = 0.0;
double r2 = 0.0;
};
/** @brief Polynomial least-squares fit summary. */
struct PolynomialFitResult {
std::vector<double> coeffs; // c0 + c1*x + c2*x^2 + ...
double r2 = 0.0;
};
/**
* @brief Compute boxplot summary using Tukey 1.5*IQR whiskers.
* @param samples Input data.
* @return Summary statistics.
*/
BoxSummary box_summary(std::span<const double> samples);
/**
* @brief 2D confidence ellipse points from covariance-like samples.
* @param x Samples for x.
* @param y Samples for y.
* @param nsigma Sigma scale (e.g., 1,2,3).
* @param x_ellipse Output x points.
* @param y_ellipse Output y points.
* @param points Number of points.
*/
void confidence_ellipse(std::span<const double> x,
std::span<const double> y,
double nsigma,
std::vector<double>& x_ellipse,
std::vector<double>& y_ellipse,
std::size_t points = 200);
/**
* @brief Fit y = slope*x + intercept.
* @param x X samples.
* @param y Y samples.
* @return Fit summary.
*/
LinearFitResult linear_fit(std::span<const double> x, std::span<const double> y);
/**
* @brief Evaluate linear fit on x values.
* @param fit Fit coefficients.
* @param x X samples.
* @return Fitted y values.
*/
std::vector<double> linear_fit_line(const LinearFitResult& fit, std::span<const double> x);
/**
* @brief Fit polynomial y = c0 + c1*x + ... + cd*x^d.
* @param x X samples.
* @param y Y samples.
* @param degree Polynomial degree.
* @return Polynomial fit summary.
*/
PolynomialFitResult polynomial_fit(std::span<const double> x,
std::span<const double> y,
std::size_t degree);
/**
* @brief Evaluate polynomial fit on x values.
* @param fit Polynomial coefficients.
* @param x X samples.
* @return Fitted y values.
*/
std::vector<double> polynomial_fit_line(const PolynomialFitResult& fit,
std::span<const double> x);
} // namespace gnuplotpp