polyfit/
test.rs

1//! A test-suite designed to bridge the gap between statistical rigour and engineering pragmatism.
2//!
3//! # Features
4//!
5//! ## General Purpose Macros
6//!
7//! ### [`crate::function!`]
8//!
9//! DSL for generating monomial polynomials. Great for generating synthetic data sets!
10//! ```rust
11//! polyfit::function!(const f(x) = 5 x^4 - 4 x^3 + 2.5);
12//! let data = f.solve_range(0.0..=100.0, 1.0);
13//! ```
14//!
15//! ### [`crate::basis_select!`]
16//! Automatically fits a dataset against every supported polynomial base and reports the best fits.
17//! - The best 3 models are printed to the console, and if the `plotting` feature is enabled, they are plotted too!
18//! ```rust
19//! # use polyfit::statistics::DegreeBound;
20//! # use polyfit::score::Aic;
21//! # use polyfit::transforms::{ApplyNoise, Strength};
22//! # use polyfit::{function, basis_select};
23//! function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
24//! let data = test
25//!     .solve_range(0.0..=100.0, 1.0)
26//!     .apply_normal_noise(Strength::Relative(0.1), None);
27//! basis_select!(&data, DegreeBound::Relaxed, &Aic);
28//! ```
29//!
30//! ### [`crate::plot!`]
31//! Macro to plot a polynomial fit or function if the `plotting` feature is enabled.
32//! - Supports custom titles, and axis ranges.
33//! - Can plot multiple functions on the same graph.
34//! - Includes residuals, confidence intervals, and source data for fits
35//! ```rust
36//! # let data = vec![(0.0, 1.0), (1.0, 2.0), (2.0, 0.5), (3.0, 4.0)];
37//! # use polyfit::{MonomialFit};
38//! # use polyfit::statistics::{DegreeBound};
39//! # use polyfit::score::Aic;
40//! let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
41//! # #[cfg(feature = "plotting")]
42//! # {
43//! polyfit::plot!(fit, { title: "My Fit".to_string(), x_range: Some(0.0..3.0), y_range: Some(0.0..5.0) });
44//! # }
45//! ```
46//!
47//! ## Fit quality assertions
48//! These are designed to be used in unit tests to validate fit quality.
49//! - If `plotting` feature is enabled, the fit is plotted on failure to help diagnose issues.
50//!
51//! ### [`crate::assert_close`]
52//! Asserts that two floating-point values are approximately equal within a small tolerance (epsilon).
53//! This is useful for comparing computed values where exact equality is not expected due to rounding errors.
54//! - Uses the machine epsilon for the floating-point type as the tolerance.
55//! - `assert_eq!` equivalent for floats.
56//!
57//! ### [`crate::assert_all_close`]
58//! Asserts that two slices of floating-point values are approximately equal element-wise within a small tolerance (epsilon).
59//! This is useful for comparing arrays of computed values where exact equality is not expected due to rounding errors.
60//! - Uses the machine epsilon for the floating-point type as the tolerance.
61//! - Element-wise [`crate::assert_close`].
62//!
63//! ### [`crate::assert_y`]
64//! Asserts that a fit produces an expected 'y' value at a given 'x' input.
65//! Useful for spot-checking specific predictions of the model.
66//! - Compares the fit's output against an expected value within a small tolerance (epsilon).
67//!
68//! ### [`crate::assert_fits`]
69//! Asserts that a fit matches a known polynomial function.
70//! Compares the fit's r² value against a threshold to ensure it closely follows the expected curve.
71//! Useful if you know the underlying function but want to validate the fitting process.
72//! See [`crate::CurveFit::r_squared`] for more details.
73//!
74//! ### [`crate::assert_r_squared`]
75//! General case of [`crate::assert_fits`] that does not require a known function.
76//! Asserts that the fit's r² value relative to the source data is above a certain threshold.
77//! This is a measure of how well the curve explains how wiggly the data is.
78//! See [`crate::CurveFit::r_squared`] for more details.
79//!
80//! ### [`crate::assert_monotone`]
81//! Asserts that a fit is monotonic (either strictly increasing or strictly decreasing) over the range of the source data.
82//! This is useful for validating models where the output should consistently rise or fall with the input.
83//! See [`crate::CurveFit::monotonicity_violations`] for more details.
84//!
85//! ### [`crate::assert_residuals_normal`]
86//! Asserts that the residuals (the differences between the observed and predicted values) of a fit are normally distributed.
87//! Think of it like making sure the errors are random, not based on some undiscovered pattern.
88//! See [`crate::statistics::residual_normality`] for more details.
89//! - Results will be between 0.0 and 1.0, with values closer to 1.0 indicating a better fit.
90//!
91//! ### [`crate::assert_max_residual`]
92//! Asserts that all residuals (the differences between the observed and predicted values) of a fit are below a certain threshold.
93//! This helps to ensure that the fit is not only accurate, but also consistent.
94//! - This is an absolute measure, unlike [`crate::assert_residuals_normal`] which is a relative measure.
95
96#[macro_use]
97#[cfg(test)]
98pub mod basis_assertions;
99
100mod assertions;
101
102/// Macro to generate a monomial polynomial function.
103///
104/// This is good for using as a data source for testing
105/// - Terms can be listed in any order
106/// - Same-power terms are summed
107/// - Missing terms are 0
108///
109/// The only major limitation is that it needs a space between the coefficient and the variable:
110/// - `20.0 x^3` is valid, but `20.0x^3` is not.
111///
112/// Syntax:
113/// ```text
114/// function!(
115///     [const | static]?
116///     [<name>(<x>) = ]?
117///     [ [+]? <coef> [ x [ ^ <deg> ]? ]? ]+
118/// )
119/// ```
120///
121/// # Example
122/// ```
123/// # use polyfit::function;
124/// function!(test(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // Normal let-binding
125/// function!(const test2(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // const! can live outside functions
126/// function!(static test3(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // I added static for some reason
127/// let test4 = function!(20.0 x^3 + 3.0 x^2 - 2.0); // No auto bindings
128///
129/// // Evaluate at x = 5
130/// println!("{}", test.y(5.0));
131///
132/// // You could visualize it too:
133/// // polyfit::plot_function!(test, x_range = 0.0..1000.0);
134/// ```
135#[macro_export]
136macro_rules! function {
137    ($( $(+)? $c0:literal $(x $( ^ $d0:literal )?)? )+) => { {
138        const LEN: usize = {
139            let mut degree = 0; $(
140                let d2 = 1 $(+ 1 $(* $d0 as usize)?)?;
141                if d2 > degree { degree = d2; }
142            )+
143            degree
144        };
145
146        const COEFS: [f64; LEN] = {
147            let mut coefs = [0.0; LEN];
148            // coef alone is degree 0, 1 if just x, or the power if specified
149            $( coefs[ 0 $(+ 1 $(* $d0 as usize)?)? ] += $c0 as f64; )+
150            coefs
151        };
152
153        $crate::MonomialPolynomial::borrowed(&COEFS)
154    }};
155
156    ($name:ident (x) = $($rest:tt)+ ) => {
157        let $name: $crate::MonomialPolynomial = $crate::function!($($rest)+);
158    };
159
160    (const $name:ident (x) = $($rest:tt)+ ) => {
161        const $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
162    };
163
164    (static $name:ident (x) = $($rest:tt)+ ) => {
165        static $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
166    };
167}
168
169/// Automatically fits a dataset against multiple polynomial bases and reports the best fits.
170///
171/// # Syntax
172/// ```text
173/// basis_select!(data, options = [Basis1<T>, Basis2<T>, …]);
174/// basis_select!(data); // Uses default [MonomialBasis<f64>, ChebyshevBasis<f64>]
175/// ```
176///
177/// # Behavior
178/// - Tries to construct a `CurveFit<Basis>` for each basis in the provided list.
179/// - Uses `CurveFit::new_auto` with for each basis with the provided `DegreeBound` and scoring method ([`crate::score`]).
180/// - For each successful fit, computes:
181///   - r² value against the source data
182///   - p-value for residual normality test
183///   - A combined rating (0.75 * r² + 0.25 * p-value) for overall quality.
184///   - A star rating out of 5 based on the combined rating.
185///   - An equation string representation.
186///
187/// - Displays a summary table of the top 3 fits
188/// - For each of the top 3 fits, prints:
189///   - A plot if the `plotting` feature is enabled
190///   - The equation string
191///
192/// # Parameters
193/// - `$data`: A slice of `(x, y)` points or any type accepted by `CurveFit`.
194/// - `$degree_bound`: The degree bound to use for fitting (see [`crate::statistics::DegreeBound`]).
195/// - `$method`: The scoring method to use for fitting (see [`crate::score`]).
196/// - `options`: Optional. List of basis types to compare. Default is all supported bases.
197///
198/// # Example
199/// ```rust
200/// # use polyfit::statistics::DegreeBound;
201/// # use polyfit::transforms::{ApplyNoise, Strength};
202/// # use polyfit::score::Aic;
203/// # use polyfit::{function, basis_select};
204/// function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
205/// let data = test
206///     .solve_range(0.0..=100.0, 1.0)
207///     .apply_normal_noise(Strength::Relative(0.1), None);
208/// basis_select!(&data, DegreeBound::Relaxed, &Aic);
209/// ```
210///
211/// The example above will output something like:
212/// ```text
213/// [ Evaluating 100 data points against 3 basis options ]
214///
215///      Basis      |     R²     | Residuals Normality  | Rating
216/// --------------- | ---------- | -------------------- | ----------
217/// ChebyshevBasis  | 99.10%     | 62.07%               | 90% ☆☆★★★
218///  MonomialBasis  | 99.10%     | 62.07%               | 90% ☆☆★★★
219///  FourierBasis   | 82.41%     | 0.00%                | 62% ☆☆☆☆★
220///
221/// ChebyshevBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 5.38e4T₃(xₛ) + 3.65e5T₂(xₛ) + 9.09e5T₁(xₛ) + 6.12e5
222/// Wrote plot to target\plot_output\chebyshevbasis_src_test.rs_line_307_1757796134.png
223///
224/// MonomialBasis: y(x) = 1.77x³ + 34.28x² - 1.33e3x + 1.45e4
225/// Wrote plot to target\plot_output\monomialbasis_src_test.rs_line_307_1757796135.png
226///
227/// FourierBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 1.72e4cos(2π4xₛ) - 1.42e5sin(2π4xₛ) + 4.27e4cos(2π3xₛ) - 1.98e5sin(2π3xₛ) + 6.79e4cos(2π2xₛ) - 2.94e5sin(2π2xₛ) + 2.98e5cos(2πxₛ) - 5.30e5sin(2πxₛ) + 4.92e5
228/// Wrote plot to target\plot_output\fourierbasis_src_test.rs_line_307_1757796135.png
229/// ```
230#[macro_export]
231macro_rules! basis_select {
232    ($data:expr, $degree_bound:expr, $method:expr) => {{
233        use $crate::basis::*;
234        $crate::basis_select!($data, $degree_bound, $method, options = [
235            ChebyshevBasis<f64> = "Chebyshev",
236            FourierBasis<f64> = "Fourier",
237            LegendreBasis<f64> = "Legendre",
238            PhysicistsHermiteBasis<f64> = "Physicists' Hermite",
239            ProbabilistsHermiteBasis<f64> = "Probabilists' Hermite",
240            LaguerreBasis<f64> = "Laguerre",
241            LogarithmicBasis<f64> = "Logarithmic",
242        ])
243    }};
244
245    ($data:expr, $degree_bound:expr, $method:expr, options = [ $( $basis:path $( = $name:literal)? ),+ $(,)? ]) => {{
246        use $crate::value::CoordExt;
247
248        #[cfg(feature = "plotting")]
249        use $crate::plotting::AsPlottingElement;
250
251        struct FitProps {
252            model_score: f64,
253            rating: f64,
254
255            #[cfg(feature = "plotting")]
256            plot_e: $crate::plotting::PlottingElement<f64>,
257
258            name: &'static str,
259            r2: f64,
260            max_r2: f64,
261            p_value: f64,
262            stars: f64,
263            equation: String,
264            parameters: usize,
265        }
266
267        let num_basis = 0 $( + { let _ = stringify!($basis); 1 } )+;
268        let count = $data.len();
269
270        println!("[ Evaluating {count} data points against {num_basis} basis options ]\n");
271        if count < 100 {
272            println!("[ WARNING - SMALL DATASET ]");
273            println!("[ Results may be misleading for small datasets (<100 points) ]\n");
274        }
275
276        let mut all_normals_zero = true;
277        let mut options = vec![];
278        let mut min_params = usize::MAX;
279        $(
280            if let Ok(fit) = $crate::CurveFit::<$basis>::new_auto($data, $degree_bound, $method) {
281                #[allow(unused_mut, unused_assignments)] let mut name = stringify!($basis); $( name = $name; )?
282                let equation = fit.equation();
283
284                let model_score = fit.model_score($method);
285                let residuals = fit.filtered_residuals().y();
286                let r2 = fit.r_squared(None);
287                let robust_r2 = fit.robust_r_squared(None);
288                let max_r2 = $crate::value::Value::max(r2, robust_r2);
289                let p_value = $crate::statistics::residual_normality(&residuals);
290
291                let rating = (0.75 * max_r2 + 0.25 * p_value).clamp(0.0, 1.0);
292                let parameters = fit.coefficients().len();
293
294                if parameters < min_params {
295                    min_params = parameters;
296                }
297
298                if p_value > f64::EPSILON {
299                    all_normals_zero = false;
300                }
301
302                #[cfg(feature = "plotting")]
303                let plot_e = fit.as_plotting_element(&[], $crate::statistics::Confidence::P95, None);
304
305                options.push(FitProps {
306                    model_score,
307                    rating,
308
309                    #[cfg(feature = "plotting")]
310                    plot_e,
311
312                    name,
313                    r2,
314                    max_r2,
315                    p_value,
316                    stars: 0.0, // to be filled later
317                    equation,
318                    parameters,
319                });
320            }
321        )+
322
323        // Give a small penalty to models with more parameters - we divide the score by `params/min_params`
324        let normalizer = $crate::statistics::DomainNormalizer::new((0.3, 1.0), (0.0, 5.0));
325        for o in options.iter_mut() {
326            let param_penalty = o.parameters as f64 / min_params as f64;
327            o.rating /= param_penalty;
328
329            //
330            // Get a star rating out of 5 based on rating
331            o.stars = normalizer.normalize(o.rating).clamp(0.0, 5.0);
332        }
333
334        // Sort by f.model_score, descending
335        options.sort_by(|p1, p2| p1.model_score.total_cmp(&p2.model_score));
336
337        //
338        // Subtract the best model score from all model scores to get relative scores
339        // Then we will use that to calculate a probability-like value for correctness of the model vs all others
340        let best_score = options.first().map_or(0.0, |p| p.model_score);
341        let likelihoods: Vec<_> = options.iter().map(|o| (-0.5 * (o.model_score - best_score)).exp()).collect();
342        let sum_likelihoods: f64 = likelihoods.iter().sum();
343        for (o, l) in options.iter_mut().zip(likelihoods) {
344            o.model_score = l / sum_likelihoods;
345        }
346
347        // let best_3: Vec<_> = options.iter().take(3).collect();
348
349        //
350        // Small table first
351        let (basis, parameters, score, r2, norm, rating) = ("Basis", "Params", "Score Weight", "Fit Quality", "Normality", "Rating");
352        let sep = || { println!("--|-{:-^30}-|-{:-^6}-|-{:-^12}-|-{:-^11}-|-{:-^9}-|-{:-^10}", "", "", "", "", "", ""); };
353        println!("# | {basis:^30} | {parameters:^6} | {score:^12} | {r2:^11} | {norm:^9} | {rating}");
354        sep();
355        for (i, props) in options.iter().enumerate() {
356            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
357            let whole_stars = props.stars.round() as usize;
358
359            let name = props.name;
360            let parameters = props.parameters;
361            let score = props.model_score * 100.0;
362            let r2 = props.max_r2 * 100.0;
363            let norm = props.p_value * 100.0;
364            let rating = props.rating * 100.0;
365
366            let vis_stars = "☆".repeat(5 - whole_stars) + &"★".repeat(whole_stars);
367
368            let score = format!("{score:.2}%");
369            let r2 = format!("{r2:.2}%");
370            let rating = format!("{rating:.0}%");
371
372            let norm = if all_normals_zero {
373                "-----".to_string()
374            } else {
375                format!("{norm:.2}%")
376            };
377
378            let n = i + 1;
379            println!(
380                "{n} | {name:>30} | {parameters:>6} | {score:>12} | {r2:>11} | {norm:>9} | {rating} {vis_stars}",
381            );
382
383            // Separator after best 3
384            if i == 2 {
385                sep();
386            }
387        }
388
389        //
390        // Instructions
391        println!();
392        println!("[ How to interpret the results ]");
393        println!("[ Results may be misleading for small datasets (<100 points) ]");
394        println!(" - Params: Number of parameters (coefficients) in the fitted model. Less means simpler model, less risk of overfitting.");
395        println!(" - Score Weight: Relative likelihood of being the best model among the options tested, based on the scoring method used.");
396        println!(" - Fit Quality: Proportion of variance in the data explained by the model (uses huber loss weighted r2).");
397        println!(" - Normality: How closely the residuals follow a normal distribution (useless for small datasets).");
398        println!(" - Rating: Combined score (0.75 * Fit Quality + 0.25 * Normality) to give an overall quality measure.");
399        println!(" - Stars: A simple star rating out of 5 based on the Rating score. Not scientific.");
400        println!(" - The best 3 models are shown below with their equations and plots (if enabled).");
401
402        for props in &options {
403            println!();
404            println!("{}: {}", props.name, props.equation);
405            println!("Fit R²: {:.4}, Residuals Normality p-value: {:.4}", props.r2, props.p_value);
406
407            #[cfg(feature = "plotting")]
408            {
409                let prefix = props.name.to_lowercase().replace([' ', '\'', '"', '<', '>', ':', ';', ',', '.'], "_");
410                $crate::plot!(props.plot_e, {
411                    title: props.name.to_string()
412                }, prefix = prefix);
413            }
414        }
415    }};
416}
417
418#[cfg(test)]
419#[cfg(feature = "transforms")]
420mod tests {
421    #[test]
422    fn test_bselect() {
423        use crate::score::Aic;
424        use crate::statistics::DegreeBound;
425        use crate::transforms::{ApplyNoise, Strength};
426
427        function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
428        let data = test
429            .solve_range(0.0..=1000.0, 1.0)
430            .apply_normal_noise(Strength::Relative(0.3), None);
431        basis_select!(&data, DegreeBound::Relaxed, &Aic);
432    }
433}