polyfit 0.11.0

Because you don't need to be able to build a powerdrill to use one safely
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
//! A test-suite designed to bridge the gap between statistical rigour and engineering pragmatism.
//!
//! # Features
//!
//! ## General Purpose Macros
//!
//! ### [`crate::function!`]
//!
//! DSL for generating monomial polynomials. Great for generating synthetic data sets!
//! ```rust
//! polyfit::function!(const f(x) = 5 x^4 - 4 x^3 + 2.5);
//! let data = f.solve_range(0.0..=100.0, 1.0);
//! ```
//!
//! ### [`crate::basis_select!`]
//! Automatically fits a dataset against every supported polynomial base and reports the best fits.
//! - The best 3 models are printed to the console, and if the `plotting` feature is enabled, they are plotted too!
//! ```rust
//! # use polyfit::statistics::DegreeBound;
//! # use polyfit::score::Aic;
//! # use polyfit::transforms::{ApplyNoise, Strength};
//! # use polyfit::{function, basis_select};
//! function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
//! let data = test
//!     .solve_range(0.0..=100.0, 1.0)
//!     .apply_normal_noise(Strength::Relative(0.1), None);
//! basis_select!(&data, DegreeBound::Relaxed, &Aic);
//! ```
//!
//! ### [`crate::plot!`]
//! Macro to plot a polynomial fit or function if the `plotting` feature is enabled.
//! - Supports custom titles, and axis ranges.
//! - Can plot multiple functions on the same graph.
//! - Includes residuals, confidence intervals, and source data for fits
//! ```rust
//! # let data = vec![(0.0, 1.0), (1.0, 2.0), (2.0, 0.5), (3.0, 4.0)];
//! # use polyfit::{MonomialFit};
//! # use polyfit::statistics::{DegreeBound};
//! # use polyfit::score::Aic;
//! let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
//! # #[cfg(feature = "plotting")]
//! # {
//! polyfit::plot!(fit, { title: "My Fit".to_string(), x_range: Some(0.0..3.0), y_range: Some(0.0..5.0) });
//! # }
//! ```
//!
//! ## Fit quality assertions
//! These are designed to be used in unit tests to validate fit quality.
//! - If `plotting` feature is enabled, the fit is plotted on failure to help diagnose issues.
//!
//! ### [`crate::assert_close`]
//! Asserts that two floating-point values are approximately equal within a small tolerance (epsilon).
//! This is useful for comparing computed values where exact equality is not expected due to rounding errors.
//! - Uses the machine epsilon for the floating-point type as the tolerance.
//! - `assert_eq!` equivalent for floats.
//!
//! ### [`crate::assert_all_close`]
//! Asserts that two slices of floating-point values are approximately equal element-wise within a small tolerance (epsilon).
//! This is useful for comparing arrays of computed values where exact equality is not expected due to rounding errors.
//! - Uses the machine epsilon for the floating-point type as the tolerance.
//! - Element-wise [`crate::assert_close`].
//!
//! ### [`crate::assert_y`]
//! Asserts that a fit produces an expected 'y' value at a given 'x' input.
//! Useful for spot-checking specific predictions of the model.
//! - Compares the fit's output against an expected value within a small tolerance (epsilon).
//!
//! ### [`crate::assert_fits`]
//! Asserts that a fit matches a known polynomial function.
//! Compares the fit's r² value against a threshold to ensure it closely follows the expected curve.
//! Useful if you know the underlying function but want to validate the fitting process.
//! See [`crate::CurveFit::r_squared`] for more details.
//!
//! ### [`crate::assert_r_squared`]
//! General case of [`crate::assert_fits`] that does not require a known function.
//! Asserts that the fit's r² value relative to the source data is above a certain threshold.
//! This is a measure of how well the curve explains how wiggly the data is.
//! See [`crate::CurveFit::r_squared`] for more details.
//!
//! ### [`crate::assert_monotone`]
//! Asserts that a fit is monotonic (either strictly increasing or strictly decreasing) over the range of the source data.
//! This is useful for validating models where the output should consistently rise or fall with the input.
//! See [`crate::CurveFit::monotonicity_violations`] for more details.
//!
//! ### [`crate::assert_residuals_normal`]
//! Asserts that the residuals (the differences between the observed and predicted values) of a fit are normally distributed.
//! Think of it like making sure the errors are random, not based on some undiscovered pattern.
//! See [`crate::statistics::residual_normality`] for more details.
//! - Results will be between 0.0 and 1.0, with values closer to 1.0 indicating a better fit.
//!
//! ### [`crate::assert_max_residual`]
//! Asserts that all residuals (the differences between the observed and predicted values) of a fit are below a certain threshold.
//! This helps to ensure that the fit is not only accurate, but also consistent.
//! - This is an absolute measure, unlike [`crate::assert_residuals_normal`] which is a relative measure.

#[macro_use]
#[cfg(test)]
pub mod basis_assertions;

#[doc(hidden)]
pub mod assertions;

/// Macro to generate a monomial polynomial function.
///
/// This is good for using as a data source for testing
/// - Terms can be listed in any order
/// - Same-power terms are summed
/// - Missing terms are 0
///
/// The only major limitation is that it needs a space between the coefficient and the variable:
/// - `20.0 x^3` is valid, but `20.0x^3` is not.
///
/// Syntax:
/// ```text
/// function!(
///     [const | static]?
///     [<name>(<x>) = ]?
///     [ [+]? <coef> [ x [ ^ <deg> ]? ]? ]+
/// )
/// ```
///
/// # Example
/// ```
/// # use polyfit::function;
/// function!(test(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // Normal let-binding
/// function!(const test2(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // const! can live outside functions
/// function!(static test3(x) = 20.0 x^3 + 3.0 x^2 - 2.0 x + 4.0); // I added static for some reason
/// let test4 = function!(20.0 x^3 + 3.0 x^2 - 2.0); // No auto bindings
///
/// // Evaluate at x = 5
/// println!("{}", test.y(5.0));
///
/// // You could visualize it too:
/// // polyfit::plot_function!(test, x_range = 0.0..1000.0);
/// ```
#[macro_export]
macro_rules! function {
    ($( $(+)? $c0:literal $($x:ident $( ^ $d0:literal )?)? )+) => { {
        const LEN: usize = {
            let mut degree = 0; $(
                let d2 = 1 $(+ 1 $(* $d0 as usize)?)?;
                if d2 > degree { degree = d2; }
            )+
            degree
        };

        const COEFS: [f64; LEN] = {
            let mut coefs = [0.0; LEN];
            // coef alone is degree 0, 1 if just x, or the power if specified
            $( coefs[ 0 $(+ 1 $(* $d0 as usize)?)? ] += $c0 as f64; )+
            coefs
        };

        $crate::MonomialPolynomial::borrowed(&COEFS)
    }};

    ($name:ident ($x:ident) = $($rest:tt)+ ) => {
        let $name: $crate::MonomialPolynomial = $crate::function!($($rest)+);
    };

    (const $name:ident ($x:ident) = $($rest:tt)+ ) => {
        const $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
    };

    (static $name:ident ($x:ident) = $($rest:tt)+ ) => {
        static $name: $crate::MonomialPolynomial<'static> = $crate::function!($($rest)+);
    };
}

#[doc(hidden)]
pub struct FitProps<T: crate::value::Value> {
    pub model_score: T,
    pub rating: T,

    #[cfg(feature = "plotting")]
    pub plot_e: crate::plotting::PlottingElement<T>,

    pub name: &'static str,
    pub r2: T,
    pub max_r2: T,
    pub p_value: T,
    pub stars: T,
    pub equation: String,
    pub parameters: usize,
}
impl<T: crate::value::Value> FitProps<T> {
    pub fn from_fit<B>(
        fit: &crate::CurveFit<B, T>,
        basis_name: &'static str,
        method: &impl crate::score::ModelScoreProvider<B, T>,
    ) -> FitProps<T>
    where
        B: crate::basis::Basis<T> + crate::display::PolynomialDisplay<T>,
    {
        use crate::value::{CoordExt, FloatClampedCast};

        #[cfg(feature = "plotting")]
        use crate::plotting::AsPlottingElement;

        const R2_WEIGHT: f64 = 0.75;
        const P_VALUE_WEIGHT: f64 = 0.25;

        let equation = fit.equation();

        let model_score = fit.model_score(method);
        let residuals = fit.filtered_residuals().y();
        let r2 = fit.r_squared(None);
        let robust_r2 = fit.robust_r_squared(None);
        let max_r2 = crate::value::Value::max(r2, robust_r2);
        let p_value = crate::statistics::residual_normality(&residuals);

        let r2_weight = R2_WEIGHT.clamped_cast::<T>();
        let p_weight = P_VALUE_WEIGHT.clamped_cast::<T>();
        let rating = r2_weight * max_r2 + p_weight * p_value;
        let rating = crate::value::Value::clamp(rating, T::zero(), T::one());
        let parameters = fit.coefficients().len();

        #[cfg(feature = "plotting")]
        let plot_e = fit.as_plotting_element(&[], crate::statistics::Confidence::P95, None);

        Self {
            model_score,
            rating,

            #[cfg(feature = "plotting")]
            plot_e,

            name: basis_name,
            r2,
            max_r2,
            p_value,
            stars: T::zero(), // to be filled later
            equation,
            parameters,
        }
    }
}

/// Automatically fits a dataset against multiple polynomial bases and reports the best fits.
///
/// # Syntax
/// ```text
/// basis_select!(data, options = [Basis1<T>, Basis2<T>, …]);
/// basis_select!(data); // Uses default [MonomialBasis<f64>, ChebyshevBasis<f64>]
/// ```
///
/// # Behavior
/// - Tries to construct a `CurveFit<Basis>` for each basis in the provided list.
/// - Uses `CurveFit::new_auto` with for each basis with the provided `DegreeBound` and scoring method ([`crate::score`]).
/// - For each successful fit, computes:
///   - r² value against the source data
///   - p-value for residual normality test
///   - A combined rating (0.75 * r² + 0.25 * p-value) for overall quality.
///   - A star rating out of 5 based on the combined rating.
///   - An equation string representation.
///
/// - Displays a summary table of the top 3 fits
/// - For each of the top 3 fits, prints:
///   - A plot if the `plotting` feature is enabled
///   - The equation string
///
/// # Parameters
/// - `$data`: A slice of `(x, y)` points or any type accepted by `CurveFit`.
/// - `$degree_bound`: The degree bound to use for fitting (see [`crate::statistics::DegreeBound`]).
/// - `$method`: The scoring method to use for fitting (see [`crate::score`]).
/// - `options`: Optional. List of basis types to compare. Default is all supported bases.
///
/// # Example
/// ```rust
/// # use polyfit::statistics::DegreeBound;
/// # use polyfit::transforms::{ApplyNoise, Strength};
/// # use polyfit::score::Aic;
/// # use polyfit::{function, basis_select};
/// function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
/// let data = test
///     .solve_range(0.0..=100.0, 1.0)
///     .apply_normal_noise(Strength::Relative(0.1), None);
/// basis_select!(&data, DegreeBound::Relaxed, &Aic);
/// ```
///
/// The example above will output something like:
/// ```text
/// [ Evaluating 100 data points against 3 basis options ]
///
///      Basis      |     R²     | Residuals Normality  | Rating
/// --------------- | ---------- | -------------------- | ----------
/// ChebyshevBasis  | 99.10%     | 62.07%               | 90% ☆☆★★★
///  MonomialBasis  | 99.10%     | 62.07%               | 90% ☆☆★★★
///  FourierBasis   | 82.41%     | 0.00%                | 62% ☆☆☆☆★
///
/// ChebyshevBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 5.38e4T₃(xₛ) + 3.65e5T₂(xₛ) + 9.09e5T₁(xₛ) + 6.12e5
/// Wrote plot to target\plot_output\chebyshevbasis_src_test.rs_line_307_1757796134.png
///
/// MonomialBasis: y(x) = 1.77x³ + 34.28x² - 1.33e3x + 1.45e4
/// Wrote plot to target\plot_output\monomialbasis_src_test.rs_line_307_1757796135.png
///
/// FourierBasis: xₛ = 2(x - a) / (b - a) - 1, a=0.00e0, b=99.00, y(x) = 1.72e4cos(2π4xₛ) - 1.42e5sin(2π4xₛ) + 4.27e4cos(2π3xₛ) - 1.98e5sin(2π3xₛ) + 6.79e4cos(2π2xₛ) - 2.94e5sin(2π2xₛ) + 2.98e5cos(2πxₛ) - 5.30e5sin(2πxₛ) + 4.92e5
/// Wrote plot to target\plot_output\fourierbasis_src_test.rs_line_307_1757796135.png
/// ```
#[macro_export]
macro_rules! basis_select {
    ($data:expr, $degree_bound:expr, $method:expr) => {{
        use $crate::basis::*;
        $crate::basis_select!($data, $degree_bound, $method, options = [
            ChebyshevBasis<f64> = "Chebyshev",
            FourierBasis<f64> = "Fourier",
            LegendreBasis<f64> = "Legendre",
            PhysicistsHermiteBasis<f64> = "Physicists' Hermite",
            ProbabilistsHermiteBasis<f64> = "Probabilists' Hermite",
            LaguerreBasis<f64> = "Laguerre",
            LogarithmicBasis<f64> = "Logarithmic",
        ])
    }};

    ($data:expr, $degree_bound:expr, $method:expr, options = [ $( $basis:path $( = $name:literal)? ),+ $(,)? ]) => {{
        let num_basis = 0 $( + { let _ = stringify!($basis); 1 } )+;
        let count = $data.len();

        println!("[ Evaluating {count} data points against {num_basis} basis options ]\n");
        if count < 100 {
            println!("[ WARNING - SMALL DATASET ]");
            println!("[ Results may be misleading for small datasets (<100 points) ]\n");
        }

        let mut all_normals_zero = true;
        let mut options = vec![];
        let mut min_params = usize::MAX;
        $(
            if let Ok(fit) = $crate::CurveFit::<$basis>::new_auto($data, $degree_bound, $method) {
                #[allow(unused_mut, unused_assignments)] let mut name = stringify!($basis); $( name = $name; )?
                let props = $crate::test::FitProps::from_fit(&fit, name, $method);

                if props.parameters < min_params {
                    min_params = props.parameters;
                }

                if props.p_value > f64::EPSILON {
                    all_normals_zero = false;
                }

                options.push(props);
            }
        )+

        // Give a small penalty to models with more parameters - we divide the score by `params/min_params`
        let normalizer = $crate::statistics::DomainNormalizer::new((0.3, 1.0), (0.0, 5.0));
        for o in options.iter_mut() {
            let param_penalty = o.parameters as f64 / min_params as f64;
            o.rating /= param_penalty;

            //
            // Get a star rating out of 5 based on rating
            o.stars = normalizer.normalize(o.rating).clamp(0.0, 5.0);
        }

        // Sort by f.model_score, descending
        options.sort_by(|p1, p2| p1.model_score.total_cmp(&p2.model_score));

        //
        // Subtract the best model score from all model scores to get relative scores
        // Then we will use that to calculate a probability-like value for correctness of the model vs all others
        let best_score = options.first().map_or(0.0, |p| p.model_score);
        let likelihoods: Vec<_> = options.iter().map(|o| (-0.5 * (o.model_score - best_score)).exp()).collect();
        let sum_likelihoods: f64 = likelihoods.iter().sum();
        for (o, l) in options.iter_mut().zip(likelihoods) {
            o.model_score = l / sum_likelihoods;
        }

        // let best_3: Vec<_> = options.iter().take(3).collect();

        //
        // Small table first
        let (basis, parameters, score, r2, norm, rating) = ("Basis", "Params", "Score Weight", "Fit Quality", "Normality", "Rating");
        let sep = || { println!("--|-{:-^30}-|-{:-^6}-|-{:-^12}-|-{:-^11}-|-{:-^9}-|-{:-^10}", "", "", "", "", "", ""); };
        println!("# | {basis:^30} | {parameters:^6} | {score:^12} | {r2:^11} | {norm:^9} | {rating}");
        sep();
        for (i, props) in options.iter().enumerate() {
            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
            let whole_stars = props.stars.round() as usize;

            let name = props.name;
            let parameters = props.parameters;
            let score = props.model_score * 100.0;
            let r2 = props.max_r2 * 100.0;
            let norm = props.p_value * 100.0;
            let rating = props.rating * 100.0;

            let vis_stars = "".repeat(5 - whole_stars) + &"".repeat(whole_stars);

            let score = format!("{score:.2}%");
            let r2 = format!("{r2:.2}%");
            let rating = format!("{rating:.0}%");

            let norm = if all_normals_zero {
                "-----".to_string()
            } else {
                format!("{norm:.2}%")
            };

            let n = i + 1;
            println!(
                "{n} | {name:>30} | {parameters:>6} | {score:>12} | {r2:>11} | {norm:>9} | {rating} {vis_stars}",
            );

            // Separator after best 3
            if i == 2 {
                sep();
            }
        }

        //
        // Instructions
        println!();
        println!("[ How to interpret the results ]");
        println!("[ Results may be misleading for small datasets (<100 points) ]");
        println!(" - Params: Number of parameters (coefficients) in the fitted model. Less means simpler model, less risk of overfitting.");
        println!(" - Score Weight: Relative likelihood of being the best model among the options tested, based on the scoring method used.");
        println!(" - Fit Quality: Proportion of variance in the data explained by the model (uses huber loss weighted r2).");
        println!(" - Normality: How closely the residuals follow a normal distribution (useless for small datasets).");
        println!(" - Rating: Combined score (0.75 * Fit Quality + 0.25 * Normality) to give an overall quality measure.");
        println!(" - Stars: A simple star rating out of 5 based on the Rating score. Not scientific.");
        println!(" - The best 3 models are shown below with their equations and plots (if enabled).");

        for props in &options {
            println!();
            println!("{}: {}", props.name, props.equation);
            println!("Fit R²: {:.4}, Residuals Normality p-value: {:.4}", props.r2, props.p_value);

            let prefix = props.name.to_lowercase().replace([' ', '\'', '"', '<', '>', ':', ';', ',', '.'], "_");
            $crate::plot!(props.plot_e, {
                title: props.name.to_string()
            }, prefix = prefix);
        }
    }};
}

#[cfg(test)]
#[cfg(feature = "transforms")]
mod tests {
    #[test]
    fn test_bselect() {
        use crate::score::Aic;
        use crate::statistics::DegreeBound;
        use crate::transforms::{ApplyNoise, Strength};

        function!(test(x) = 2.0 x^3 + 3.0 x^2 - 4.0 x + 5.0);
        let data = test
            .solve_range(0.0..=1000.0, 1.0)
            .apply_normal_noise(Strength::Relative(0.3), None);
        basis_select!(&data, DegreeBound::Relaxed, &Aic);
    }
}