polyfit/test.rs
1//! A test-suite designed to bridge the gap between statistical rigour and engineering pragmatism.
2//!
3//! # Features
4//!
5//! ## General Purpose Macros
6//!
7//! ### [`crate::function!`]
8//!
9//! DSL for generating monomial polynomials. Great for generating synthetic data sets!
10//! ```rust
11//! polyfit::function!(const f(x) = 5 x^4 - 4 x^3 + 2.5);
12//! let data = f.solve_range(0.0..=100.0, 1.0);
13//! ```
14//!
15//! ### [`crate::basis_select!`]
16//! Automatically fits a dataset against every supported polynomial base and reports the best fits.
17//! - The best 3 models are printed to the console, and if the `plotting` feature is enabled, they are plotted too!
18//! ```rust
19//! # use polyfit::statistics::DegreeBound;
20//! # use polyfit::score::Aic;
21//! # use polyfit::transforms::{ApplyNoise, Strength};
22//! # use polyfit::{function, basis_select};
23//! function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
24//! let data = test
25//! .solve_range(0.0..=100.0, 1.0)
26//! .apply_normal_noise(Strength::Relative(0.1), None);
27//! basis_select!(&data, DegreeBound::Relaxed, &Aic);
28//! ```
29//!
30//! ### [`crate::plot!`]
31//! Macro to plot a polynomial fit or function if the `plotting` feature is enabled.
32//! - Supports custom titles, and axis ranges.
33//! - Can plot multiple functions on the same graph.
34//! - Includes residuals, confidence intervals, and source data for fits
35//! ```rust
36//! # let data = vec![(0.0, 1.0), (1.0, 2.0), (2.0, 0.5), (3.0, 4.0)];
37//! # use polyfit::{MonomialFit};
38//! # use polyfit::statistics::{DegreeBound};
39//! # use polyfit::score::Aic;
40//! let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
41//! # #[cfg(feature = "plotting")]
42//! # {
43//! polyfit::plot!(fit, { title: "My Fit".to_string(), x_range: Some(0.0..3.0), y_range: Some(0.0..5.0) });
44//! # }
45//! ```
46//!
47//! ## Fit quality assertions
48//! These are designed to be used in unit tests to validate fit quality.
49//! - If `plotting` feature is enabled, the fit is plotted on failure to help diagnose issues.
50//!
51//! ### [`crate::assert_close`]
52//! Asserts that two floating-point values are approximately equal within a small tolerance (epsilon).
53//! This is useful for comparing computed values where exact equality is not expected due to rounding errors.
54//! - Uses the machine epsilon for the floating-point type as the tolerance.
55//! - `assert_eq!` equivalent for floats.
56//!
57//! ### [`crate::assert_all_close`]
58//! Asserts that two slices of floating-point values are approximately equal element-wise within a small tolerance (epsilon).
59//! This is useful for comparing arrays of computed values where exact equality is not expected due to rounding errors.
60//! - Uses the machine epsilon for the floating-point type as the tolerance.
61//! - Element-wise [`crate::assert_close`].
62//!
63//! ### [`crate::assert_y`]
64//! Asserts that a fit produces an expected 'y' value at a given 'x' input.
65//! Useful for spot-checking specific predictions of the model.
66//! - Compares the fit's output against an expected value within a small tolerance (epsilon).
67//!
68//! ### [`crate::assert_fits`]
69//! Asserts that a fit matches a known polynomial function.
70//! Compares the fit's r² value against a threshold to ensure it closely follows the expected curve.
71//! Useful if you know the underlying function but want to validate the fitting process.
72//! See [`crate::CurveFit::r_squared`] for more details.
73//!
74//! ### [`crate::assert_r_squared`]
75//! General case of [`crate::assert_fits`] that does not require a known function.
76//! Asserts that the fit's r² value relative to the source data is above a certain threshold.
77//! This is a measure of how well the curve explains how wiggly the data is.
78//! See [`crate::CurveFit::r_squared`] for more details.
79//!
80//! ### [`crate::assert_monotone`]
81//! Asserts that a fit is monotonic (either strictly increasing or strictly decreasing) over the range of the source data.
82//! This is useful for validating models where the output should consistently rise or fall with the input.
83//! See [`crate::CurveFit::monotonicity_violations`] for more details.
84//!
85//! ### [`crate::assert_residuals_normal`]
86//! Asserts that the residuals (the differences between the observed and predicted values) of a fit are normally distributed.
87//! Think of it like making sure the errors are random, not based on some undiscovered pattern.
88//! See [`crate::statistics::residual_normality`] for more details.
89//! - Results will be between 0.0 and 1.0, with values closer to 1.0 indicating a better fit.
90//!
91//! ### [`crate::assert_max_residual`]
92//! Asserts that all residuals (the differences between the observed and predicted values) of a fit are below a certain threshold.
93//! This helps to ensure that the fit is not only accurate, but also consistent.
94//! - This is an absolute measure, unlike [`crate::assert_residuals_normal`] which is a relative measure.
95
96#[macro_use]
97#[cfg(test)]
98pub mod basis_assertions;
99
100mod assertions;
101
102/// Macro to generate a monomial polynomial function.
103///
104/// This is good for using as a data source for testing
105/// - Terms can be listed in any order
106/// - Same-power terms are summed
107/// - Missing terms are 0
108///
109/// The only major limitation is that it needs a space between the coefficient and the variable:
110/// - `20.0 x^3` is valid, but `20.0x^3` is not.
111///
112/// Syntax:
113/// ```text
114/// function!(
115/// [const | static]?
116/// [<name>(<x>) = ]?
117/// [ [+]? <coef> [ x [ ^ <deg> ]? ]? ]+
118/// )
119/// ```
120///
121/// # Example
122/// ```
123/// # use polyfit::function;
124/// function!(test(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // Normal let-binding
125/// function!(const test2(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // const! can live outside functions
126/// function!(static test3(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // I added static for some reason
127/// let test4 = function!(20.0 x^3 + 3.0 x^2 - 2.0); // No auto bindings
128///
129/// // Evaluate at x = 5
130/// println!("{}", test.y(5.0));
131///
132/// // You could visualize it too:
133/// // polyfit::plot_function!(test, x_range = 0.0..1000.0);
134/// ```
135#[macro_export]
136macro_rules! function {
137 ($( $(+)? $c0:literal $(x $( ^ $d0:literal )?)? )+) => { {
138 const LEN: usize = {
139 let mut degree = 0; $(
140 let d2 = 1 $(+ 1 $(* $d0 as usize)?)?;
141 if d2 > degree { degree = d2; }
142 )+
143 degree
144 };
145
146 const COEFS: [f64; LEN] = {
147 let mut coefs = [0.0; LEN];
148 // coef alone is degree 0, 1 if just x, or the power if specified
149 $( coefs[ 0 $(+ 1 $(* $d0 as usize)?)? ] += $c0 as f64; )+
150 coefs
151 };
152
153 $crate::MonomialPolynomial::borrowed(&COEFS)
154 }};
155
156 ($name:ident (x) = $($rest:tt)+ ) => {
157 let $name: $crate::MonomialPolynomial = $crate::function!($($rest)+);
158 };
159
160 (const $name:ident (x) = $($rest:tt)+ ) => {
161 const $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
162 };
163
164 (static $name:ident (x) = $($rest:tt)+ ) => {
165 static $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
166 };
167}
168
169/// Automatically fits a dataset against multiple polynomial bases and reports the best fits.
170///
171/// # Syntax
172/// ```text
173/// basis_select!(data, options = [Basis1<T>, Basis2<T>, …]);
174/// basis_select!(data); // Uses default [MonomialBasis<f64>, ChebyshevBasis<f64>]
175/// ```
176///
177/// # Behavior
178/// - Tries to construct a `CurveFit<Basis>` for each basis in the provided list.
179/// - Uses `CurveFit::new_auto` with for each basis with the provided `DegreeBound` and scoring method ([`crate::score`]).
180/// - For each successful fit, computes:
181/// - r² value against the source data
182/// - p-value for residual normality test
183/// - A combined rating (0.75 * r² + 0.25 * p-value) for overall quality.
184/// - A star rating out of 5 based on the combined rating.
185/// - An equation string representation.
186///
187/// - Displays a summary table of the top 3 fits
188/// - For each of the top 3 fits, prints:
189/// - A plot if the `plotting` feature is enabled
190/// - The equation string
191///
192/// # Parameters
193/// - `$data`: A slice of `(x, y)` points or any type accepted by `CurveFit`.
194/// - `$degree_bound`: The degree bound to use for fitting (see [`crate::statistics::DegreeBound`]).
195/// - `$method`: The scoring method to use for fitting (see [`crate::score`]).
196/// - `options`: Optional. List of basis types to compare. Default is all supported bases.
197///
198/// # Example
199/// ```rust
200/// # use polyfit::statistics::DegreeBound;
201/// # use polyfit::transforms::{ApplyNoise, Strength};
202/// # use polyfit::score::Aic;
203/// # use polyfit::{function, basis_select};
204/// function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
205/// let data = test
206/// .solve_range(0.0..=100.0, 1.0)
207/// .apply_normal_noise(Strength::Relative(0.1), None);
208/// basis_select!(&data, DegreeBound::Relaxed, &Aic);
209/// ```
210///
211/// The example above will output something like:
212/// ```text
213/// [ Evaluating 100 data points against 3 basis options ]
214///
215/// Basis | R² | Residuals Normality | Rating
216/// --------------- | ---------- | -------------------- | ----------
217/// ChebyshevBasis | 99.10% | 62.07% | 90% ☆☆★★★
218/// MonomialBasis | 99.10% | 62.07% | 90% ☆☆★★★
219/// FourierBasis | 82.41% | 0.00% | 62% ☆☆☆☆★
220///
221/// ChebyshevBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 5.38e4T₃(xₛ) + 3.65e5T₂(xₛ) + 9.09e5T₁(xₛ) + 6.12e5
222/// Wrote plot to target\plot_output\chebyshevbasis_src_test.rs_line_307_1757796134.png
223///
224/// MonomialBasis: y(x) = 1.77x³ + 34.28x² - 1.33e3x + 1.45e4
225/// Wrote plot to target\plot_output\monomialbasis_src_test.rs_line_307_1757796135.png
226///
227/// FourierBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 1.72e4cos(2π4xₛ) - 1.42e5sin(2π4xₛ) + 4.27e4cos(2π3xₛ) - 1.98e5sin(2π3xₛ) + 6.79e4cos(2π2xₛ) - 2.94e5sin(2π2xₛ) + 2.98e5cos(2πxₛ) - 5.30e5sin(2πxₛ) + 4.92e5
228/// Wrote plot to target\plot_output\fourierbasis_src_test.rs_line_307_1757796135.png
229/// ```
230#[macro_export]
231macro_rules! basis_select {
232 ($data:expr, $degree_bound:expr, $method:expr) => {{
233 use $crate::basis::*;
234 $crate::basis_select!($data, $degree_bound, $method, options = [
235 ChebyshevBasis<f64> = "Chebyshev",
236 FourierBasis<f64> = "Fourier",
237 LegendreBasis<f64> = "Legendre",
238 PhysicistsHermiteBasis<f64> = "Physicists' Hermite",
239 ProbabilistsHermiteBasis<f64> = "Probabilists' Hermite",
240 LaguerreBasis<f64> = "Laguerre",
241 LogarithmicBasis<f64> = "Logarithmic",
242 ])
243 }};
244
245 ($data:expr, $degree_bound:expr, $method:expr, options = [ $( $basis:path $( = $name:literal)? ),+ $(,)? ]) => {{
246 use $crate::value::CoordExt;
247
248 #[cfg(feature = "plotting")]
249 use $crate::plotting::AsPlottingElement;
250
251 struct FitProps {
252 model_score: f64,
253 rating: f64,
254
255 #[cfg(feature = "plotting")]
256 plot_e: $crate::plotting::PlottingElement<f64>,
257
258 name: &'static str,
259 r2: f64,
260 max_r2: f64,
261 p_value: f64,
262 stars: f64,
263 equation: String,
264 parameters: usize,
265 }
266
267 let num_basis = 0 $( + { let _ = stringify!($basis); 1 } )+;
268 let count = $data.len();
269
270 println!("[ Evaluating {count} data points against {num_basis} basis options ]\n");
271 if count < 100 {
272 println!("[ WARNING - SMALL DATASET ]");
273 println!("[ Results may be misleading for small datasets (<100 points) ]\n");
274 }
275
276 let mut all_normals_zero = true;
277 let mut options = vec![];
278 let mut min_params = usize::MAX;
279 $(
280 if let Ok(fit) = $crate::CurveFit::<$basis>::new_auto($data, $degree_bound, $method) {
281 #[allow(unused_mut, unused_assignments)] let mut name = stringify!($basis); $( name = $name; )?
282 let equation = fit.equation();
283
284 let model_score = fit.model_score($method);
285 let residuals = fit.filtered_residuals().y();
286 let r2 = fit.r_squared(None);
287 let robust_r2 = fit.robust_r_squared(None);
288 let max_r2 = $crate::value::Value::max(r2, robust_r2);
289 let p_value = $crate::statistics::residual_normality(&residuals);
290
291 let rating = (0.75 * max_r2 + 0.25 * p_value).clamp(0.0, 1.0);
292 let parameters = fit.coefficients().len();
293
294 if parameters < min_params {
295 min_params = parameters;
296 }
297
298 if p_value > f64::EPSILON {
299 all_normals_zero = false;
300 }
301
302 #[cfg(feature = "plotting")]
303 let plot_e = fit.as_plotting_element(&[], $crate::statistics::Confidence::P95, None);
304
305 options.push(FitProps {
306 model_score,
307 rating,
308
309 #[cfg(feature = "plotting")]
310 plot_e,
311
312 name,
313 r2,
314 max_r2,
315 p_value,
316 stars: 0.0, // to be filled later
317 equation,
318 parameters,
319 });
320 }
321 )+
322
323 // Give a small penalty to models with more parameters - we divide the score by `params/min_params`
324 let normalizer = $crate::statistics::DomainNormalizer::new((0.3, 1.0), (0.0, 5.0));
325 for o in options.iter_mut() {
326 let param_penalty = o.parameters as f64 / min_params as f64;
327 o.rating /= param_penalty;
328
329 //
330 // Get a star rating out of 5 based on rating
331 o.stars = normalizer.normalize(o.rating).clamp(0.0, 5.0);
332 }
333
334 // Sort by f.model_score, descending
335 options.sort_by(|p1, p2| p1.model_score.total_cmp(&p2.model_score));
336
337 //
338 // Subtract the best model score from all model scores to get relative scores
339 // Then we will use that to calculate a probability-like value for correctness of the model vs all others
340 let best_score = options.first().map_or(0.0, |p| p.model_score);
341 let likelihoods: Vec<_> = options.iter().map(|o| (-0.5 * (o.model_score - best_score)).exp()).collect();
342 let sum_likelihoods: f64 = likelihoods.iter().sum();
343 for (o, l) in options.iter_mut().zip(likelihoods) {
344 o.model_score = l / sum_likelihoods;
345 }
346
347 // let best_3: Vec<_> = options.iter().take(3).collect();
348
349 //
350 // Small table first
351 let (basis, parameters, score, r2, norm, rating) = ("Basis", "Params", "Score Weight", "Fit Quality", "Normality", "Rating");
352 let sep = || { println!("--|-{:-^30}-|-{:-^6}-|-{:-^12}-|-{:-^11}-|-{:-^9}-|-{:-^10}", "", "", "", "", "", ""); };
353 println!("# | {basis:^30} | {parameters:^6} | {score:^12} | {r2:^11} | {norm:^9} | {rating}");
354 sep();
355 for (i, props) in options.iter().enumerate() {
356 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
357 let whole_stars = props.stars.round() as usize;
358
359 let name = props.name;
360 let parameters = props.parameters;
361 let score = props.model_score * 100.0;
362 let r2 = props.max_r2 * 100.0;
363 let norm = props.p_value * 100.0;
364 let rating = props.rating * 100.0;
365
366 let vis_stars = "☆".repeat(5 - whole_stars) + &"★".repeat(whole_stars);
367
368 let score = format!("{score:.2}%");
369 let r2 = format!("{r2:.2}%");
370 let rating = format!("{rating:.0}%");
371
372 let norm = if all_normals_zero {
373 "-----".to_string()
374 } else {
375 format!("{norm:.2}%")
376 };
377
378 let n = i + 1;
379 println!(
380 "{n} | {name:>30} | {parameters:>6} | {score:>12} | {r2:>11} | {norm:>9} | {rating} {vis_stars}",
381 );
382
383 // Separator after best 3
384 if i == 2 {
385 sep();
386 }
387 }
388
389 //
390 // Instructions
391 println!();
392 println!("[ How to interpret the results ]");
393 println!("[ Results may be misleading for small datasets (<100 points) ]");
394 println!(" - Params: Number of parameters (coefficients) in the fitted model. Less means simpler model, less risk of overfitting.");
395 println!(" - Score Weight: Relative likelihood of being the best model among the options tested, based on the scoring method used.");
396 println!(" - Fit Quality: Proportion of variance in the data explained by the model (uses huber loss weighted r2).");
397 println!(" - Normality: How closely the residuals follow a normal distribution (useless for small datasets).");
398 println!(" - Rating: Combined score (0.75 * Fit Quality + 0.25 * Normality) to give an overall quality measure.");
399 println!(" - Stars: A simple star rating out of 5 based on the Rating score. Not scientific.");
400 println!(" - The best 3 models are shown below with their equations and plots (if enabled).");
401
402 for props in &options {
403 println!();
404 println!("{}: {}", props.name, props.equation);
405 println!("Fit R²: {:.4}, Residuals Normality p-value: {:.4}", props.r2, props.p_value);
406
407 #[cfg(feature = "plotting")]
408 {
409 let prefix = props.name.to_lowercase().replace([' ', '\'', '"', '<', '>', ':', ';', ',', '.'], "_");
410 $crate::plot!(props.plot_e, {
411 title: props.name.to_string()
412 }, prefix = prefix);
413 }
414 }
415 }};
416}
417
418#[cfg(test)]
419#[cfg(feature = "transforms")]
420mod tests {
421 #[test]
422 fn test_bselect() {
423 use crate::score::Aic;
424 use crate::statistics::DegreeBound;
425 use crate::transforms::{ApplyNoise, Strength};
426
427 function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
428 let data = test
429 .solve_range(0.0..=1000.0, 1.0)
430 .apply_normal_noise(Strength::Relative(0.3), None);
431 basis_select!(&data, DegreeBound::Relaxed, &Aic);
432 }
433}