polyfit/
fit.rs

1use std::{borrow::Cow, ops::RangeInclusive};
2
3use nalgebra::{DMatrix, DVector, SVD};
4
5use crate::{
6    basis::{
7        Basis, CriticalPoint, DifferentialBasis, IntegralBasis, IntoMonomialBasis, OrthogonalBasis,
8    },
9    display::PolynomialDisplay,
10    error::{Error, Result},
11    score::ModelScoreProvider,
12    statistics::{self, Confidence, ConfidenceBand, DegreeBound, Tolerance},
13    value::{CoordExt, SteppedValues, Value},
14    MonomialPolynomial, Polynomial,
15};
16
17/// Logarithmic series curve
18///
19/// Uses logarithmic basis functions, which are particularly useful for modeling data that exhibits logarithmic growth or decay.
20/// The basis functions include terms like 1, ln(x), (ln(x))^2, ..., (ln(x))^n.
21pub type LogarithmicFit<'data, T = f64> = CurveFit<'data, crate::basis::LogarithmicBasis<T>, T>;
22
23/// Laguerre series curve
24///
25/// Uses Laguerre polynomials, which are orthogonal polynomials defined on the interval \[0, ∞\].
26/// These polynomials are particularly useful in quantum mechanics and numerical analysis.
27pub type LaguerreFit<'data, T = f64> = CurveFit<'data, crate::basis::LaguerreBasis<T>, T>;
28
29/// Physicists' Hermite series curve
30///
31/// Uses Physicists' Hermite polynomials, which are orthogonal polynomials defined on the interval \[-∞, ∞\].
32/// These polynomials are particularly useful in probability, combinatorics, and physics, especially in quantum mechanics.
33pub type PhysicistsHermiteFit<'data, T = f64> =
34    CurveFit<'data, crate::basis::PhysicistsHermiteBasis<T>, T>;
35
36/// Probabilists' Hermite series curve
37///
38/// Uses Probabilists' Hermite polynomials, which are orthogonal polynomials defined on the interval \[-∞, ∞\].
39/// These polynomials are particularly useful in probability theory and statistics, especially in the context of Gaussian distributions.
40pub type ProbabilistsHermiteFit<'data, T = f64> =
41    CurveFit<'data, crate::basis::ProbabilistsHermiteBasis<T>, T>;
42
43/// Legendre series curve
44///
45/// Uses Legendre polynomials, which are orthogonal polynomials defined on the interval \[-1, 1\].
46/// These polynomials are particularly useful for minimizing oscillation in polynomial interpolation.
47pub type LegendreFit<'data, T = f64> = CurveFit<'data, crate::basis::LegendreBasis<T>, T>;
48
49/// Fourier series curve
50///
51/// Uses a Fourier series basis, which is particularly well-suited for modeling periodic functions.
52/// The basis functions include sine and cosine terms, allowing for effective representation of oscillatory behavior.
53pub type FourierFit<'data, T = f64> = CurveFit<'data, crate::basis::FourierBasis<T>, T>;
54
55/// Normalized Chebyshev polynomial curve
56///
57/// Uses the Chebyshev polynomials, which are orthogonal polynomials defined on the interval \[-1, 1\].
58/// These polynomials are particularly useful for minimizing Runge's phenomenon in polynomial interpolation.
59pub type ChebyshevFit<'data, T = f64> = CurveFit<'data, crate::basis::ChebyshevBasis<T>, T>;
60
61/// Non-normalized monomial polynomial curve
62///
63/// Uses the standard monomial functions: 1, x, x^2, ..., x^n
64///
65/// It is the most basic form of polynomial basis and is not normalized.
66/// It can lead to numerical instability for high-degree polynomials.
67pub type MonomialFit<'data, T = f64> = CurveFit<'data, crate::basis::MonomialBasis<T>, T>;
68
69/// Represents the covariance matrix and derived statistics for a curve fit.
70///
71/// Provides tools to evaluate the uncertainty of coefficients and predictions
72/// of a fitted polynomial or other basis function model.
73///
74/// # Type Parameters
75/// - `'a`: Lifetime of the reference to the original curve fit.
76/// - `B`: Basis type used by the curve fit (implements `Basis<T>`).
77/// - `T`: Numeric type (defaults to `f64`) implementing `Value`.
78pub struct CurveFitCovariance<'a, 'data, B, T: Value = f64>
79where
80    B: Basis<T>,
81    B: PolynomialDisplay<T>,
82{
83    fit: &'a CurveFit<'data, B, T>,
84    covariance: DMatrix<T>,
85}
86impl<'a, 'data, B, T: Value> CurveFitCovariance<'a, 'data, B, T>
87where
88    B: Basis<T>,
89    B: PolynomialDisplay<T>,
90{
91    /// Creates a new covariance matrix for a curve fit.
92    ///
93    /// See [`CurveFit::covariance`]
94    ///
95    /// # Errors
96    /// Returns an error if the covariance matrix cannot be computed.
97    pub fn new(fit: &'a CurveFit<'data, B, T>) -> Result<Self> {
98        let n = fit.data.len();
99        let k = fit.basis().k(fit.degree());
100
101        let mut x_matrix = DMatrix::zeros(n, k);
102        for (i, (x, _)) in fit.data.iter().enumerate() {
103            let x = fit.basis().normalize_x(*x);
104            for j in 0..k {
105                x_matrix[(i, j)] = fit.basis().solve_function(j, x);
106            }
107        }
108
109        // Compute (X^T X)^-1
110        let xtx = &x_matrix.transpose() * &x_matrix;
111        let xtx_reg = &xtx + DMatrix::<T>::identity(k, k) * T::epsilon();
112        let svd = xtx_reg.svd(true, true);
113        let xtx_inv = svd.pseudo_inverse(T::epsilon()).map_err(Error::Algebra)?;
114
115        let res_var = fit.residual_variance();
116        let covariance = xtx_inv * res_var;
117        Ok(Self { fit, covariance })
118    }
119
120    /// Computes the standard error of the coefficient at j.
121    ///
122    /// Returns None if the coefficient does not exist.
123    ///
124    /// This is the estimated standard deviation of the coefficient, providing
125    /// a measure of its uncertainty.
126    #[must_use]
127    pub fn coefficient_standard_error(&self, j: usize) -> Option<T> {
128        let cell = self.covariance.get((j, j))?;
129        Some(cell.sqrt())
130    }
131
132    /// Computes the standard error of the coefficients.
133    ///
134    /// This is the estimated standard deviation of the coefficients, providing
135    /// a measure of their uncertainty.
136    #[must_use]
137    pub fn coefficient_standard_errors(&self) -> Vec<T> {
138        let cov = &self.covariance;
139        (0..cov.ncols())
140            .filter_map(|i| self.coefficient_standard_error(i))
141            .collect()
142    }
143
144    /// Computes the variance of the predicted y value at `x`.
145    ///
146    /// This quantifies the uncertainty in the prediction at a specific point.
147    ///
148    /// The square root of the variance gives the standard deviation.
149    pub fn prediction_variance(&self, x: T) -> T {
150        let k = self.fit.basis().k(self.fit.degree());
151        let x = self.fit.basis().normalize_x(x);
152        let phi_x =
153            DVector::from_iterator(k, (0..k).map(|j| self.fit.basis().solve_function(j, x)));
154        (phi_x.transpose() * &self.covariance * phi_x)[(0, 0)]
155    }
156
157    /// Computes the confidence band for an x value.
158    ///
159    /// Returns a confidence band representing the uncertainty in the predicted y value
160    ///
161    /// # Parameters
162    /// - `x`: The x value to compute the confidence band for.
163    /// - `confidence_level`: Desired confidence level (e.g., P95).
164    /// - `noise_tolerance`: Optional additional variance to add to the prediction variance,
165    ///   (e.g., to account for measurement noise).
166    ///
167    /// This estimates the uncertainty in the predicted y value at a specific x
168    /// location, providing a range within which the true value is likely to fall.
169    ///
170    /// # Errors
171    /// Returns an error if the confidence level cannot be cast to the required type.
172    pub fn confidence_band(
173        &self,
174        x: T,
175        confidence_level: Confidence,
176        noise_tolerance: Option<Tolerance<T>>,
177    ) -> Result<ConfidenceBand<T>> {
178        let mut y_var = self.prediction_variance(x);
179        let value = self.fit.y(x)?;
180
181        let abs_tol = match noise_tolerance {
182            None => T::zero(),
183            Some(Tolerance::Absolute(tol)) => tol,
184            Some(Tolerance::Variance(pct)) => {
185                let (data_sdev, _) = statistics::stddev_and_mean(self.fit.data().y_iter());
186                let noise_tolerance = data_sdev * pct;
187                y_var += noise_tolerance * noise_tolerance;
188                T::zero()
189            }
190            Some(Tolerance::Measurement(pct)) => Value::abs(value) * pct,
191        };
192
193        let y_se = y_var.sqrt();
194
195        let z = confidence_level.try_cast::<T>()?;
196        let lower = value - z * y_se - abs_tol;
197        let upper = value + z * y_se + abs_tol;
198        Ok(ConfidenceBand {
199            value,
200            lower,
201            upper,
202            level: confidence_level,
203            tolerance: noise_tolerance,
204        })
205    }
206
207    /// Computes the confidence intervals for all data points in the original dataset.
208    ///
209    /// This evaluates the fitted model at each `x` from the original data and returns
210    /// a `ConfidenceBand` for each point, quantifying the uncertainty of predictions.
211    ///
212    /// # Parameters
213    /// - `confidence_level`: Desired confidence level (e.g., P95).
214    /// - `noise_tolerance`: Optional additional variance to add to the prediction variance,
215    ///
216    /// # Returns
217    /// - `Ok(Vec<(T, ConfidenceBand<T>)>)` containing one confidence band per data point.
218    /// - `Err` if any prediction or type conversion fails.
219    ///
220    /// # Errors
221    /// Returns an error if the confidence level cannot be cast to the required type.
222    pub fn solution_confidence(
223        &self,
224        confidence_level: Confidence,
225        noise_tolerance: Option<Tolerance<T>>,
226    ) -> Result<Vec<(T, ConfidenceBand<T>)>> {
227        let x = self.fit.data().iter().map(|(x, _)| *x);
228        x.map(|x| {
229            Ok((
230                x,
231                self.confidence_band(x, confidence_level, noise_tolerance)?,
232            ))
233        })
234        .collect()
235    }
236
237    /// Identifies outliers in the original dataset based on the confidence intervals.
238    ///
239    /// An outlier is defined as a data point where the actual `y` value falls outside
240    /// the computed confidence band for its corresponding `x`.
241    ///
242    /// The confidence level determines the width of the confidence band. Higher confidence levels have wider bands,
243    /// making it less likely for points to be classified as outliers.
244    ///
245    /// # Parameters
246    /// - `confidence_level`: Confidence level used to determine the bounds (e.g., P95).
247    ///
248    /// # Returns
249    /// - `Ok(Vec<((T, T, ConfidenceBand<T>))>)` containing the index and `(x, y, confidence_band)` of each outlier.
250    /// - `Err` if confidence intervals cannot be computed.
251    ///
252    /// # Errors
253    /// Returns an error if the confidence level cannot be cast to the required type.
254    pub fn outliers(
255        &self,
256        confidence_level: Confidence,
257        noise_tolerance: Option<Tolerance<T>>,
258    ) -> Result<Vec<(T, T, ConfidenceBand<T>)>> {
259        let bands = self.solution_confidence(confidence_level, noise_tolerance)?;
260        let mut outliers = Vec::new();
261
262        for ((x, y), (_, band)) in self.fit.data().iter().zip(bands) {
263            if *y < band.lower || *y > band.upper {
264                outliers.push((*x, *y, band));
265            }
266        }
267
268        Ok(outliers)
269    }
270}
271
272/// Represents a polynomial curve fit for a set of data points.
273///
274/// `CurveFit` computes a polynomial that best fits a given dataset using a
275/// specified polynomial basis (e.g., monomial, Chebyshev). It stores both the
276/// original data and the resulting coefficients.
277///
278/// # For beginners
279/// Most users do **not** need to construct this directly. Use one of the
280/// specialized type aliases for common bases:
281/// - [`crate::MonomialFit`] — uses the standard monomial basis (`1, x, x², …`).
282/// - [`crate::ChebyshevFit`] — uses Chebyshev polynomials to reduce oscillation.
283///
284/// # How it works
285/// - Builds a **basis matrix** with shape `[rows, k]` where `rows`
286///   is the number of data points and `k` is the number of basis functions.
287/// - Forms a **column vector** `b` from the `y` values of the dataset.
288/// - Solves the linear system `A * x = b` using the **SVD** of the basis matrix.
289///   The solution `x` is the vector of polynomial coefficients.
290///
291/// # Type parameters
292/// - `B`: The basis type, implementing [`Basis<T>`].
293/// - `T`: Numeric type (default `f64`) implementing [`Value`].
294///
295/// # Example
296/// ```
297/// # use polyfit::MonomialFit;
298/// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
299/// let fit = MonomialFit::new(data, 2).unwrap();
300/// println!("Coefficients: {:?}", fit.coefficients());
301/// ```
302#[derive(Debug, Clone, PartialEq)]
303pub struct CurveFit<'data, B, T: Value = f64>
304where
305    B: Basis<T>,
306    B: PolynomialDisplay<T>,
307{
308    data: Cow<'data, [(T, T)]>,
309    x_range: RangeInclusive<T>,
310    function: Polynomial<'static, B, T>,
311    k: T,
312}
313impl<'data, T: Value, B> CurveFit<'data, B, T>
314where
315    B: Basis<T>,
316    B: PolynomialDisplay<T>,
317{
318    #[cfg(feature = "parallel")]
319    const MIN_ROWS_TO_PARALLEL: usize = 1_000_000; // We won't bother parallelizing until n > 1 million
320
321    #[cfg(feature = "parallel")]
322    const MAX_STABLE_DIGITS: usize = 8; // Given digits=log10(-epsilon) for T, 10^(digits - MAX_STABLE_DIGITS) gives a threshold for condition number
323
324    fn create_bigx(data: &[(T, T)], basis: &B, k: usize) -> DMatrix<T> {
325        let mut bigx = DMatrix::zeros(data.len(), k);
326        for (row, (x, _)) in bigx.row_iter_mut().zip(data.iter()) {
327            let x = basis.normalize_x(*x);
328            basis.fill_matrix_row(0, x, row);
329        }
330        bigx
331    }
332
333    /// Turns a dataset portion into a basis matrix and y-values vector.
334    fn create_matrix(data: &[(T, T)], basis: &B, k: usize) -> (DMatrix<T>, DVector<T>) {
335        let bigx = Self::create_bigx(data, basis, k);
336        let b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
337        (bigx, b)
338    }
339
340    /// If appropriate, creates the basis matrix in parallel, using the normal equation in chunks
341    /// Otherwise, falls back to the normal `create_matrix`.
342    ///
343    /// The bool indicates if parallel processing was used - needed for `new_auto` to know to slice the matrix properly
344    fn create_parallel_matrix(
345        data: &[(T, T)],
346        basis: &B,
347        k: usize,
348    ) -> (DMatrix<T>, DVector<T>, bool) {
349        #[cfg(not(feature = "parallel"))]
350        {
351            let (m, b) = Self::create_matrix(data, basis, k);
352            (m, b, false)
353        }
354
355        #[cfg(feature = "parallel")]
356        {
357            use rayon::prelude::*;
358
359            // If the data is small, or not stable, don't parallelize
360            let tikhonov = Self::is_data_stable(data, basis, k);
361            if tikhonov.is_none() {
362                let (m, b) = Self::create_matrix(data, basis, k);
363
364                return (m, b, false);
365            }
366
367            // Each thread builds the (xtx, xtb) pair for its chunk, reducing an NxK to KxK problem
368            let threads = rayon::current_num_threads();
369            let chunk_size = (data.len() / threads).max(1);
370            let thread_data: Vec<&[(T, T)]> = data.chunks(chunk_size).collect();
371            let mut partial_results: Vec<(DMatrix<T>, DVector<T>)> = thread_data
372                .into_par_iter()
373                .map(|chunk| {
374                    let (m, b) = Self::create_matrix(chunk, basis, k);
375                    Self::invert_matrix(&m, &b)
376                })
377                .collect();
378
379            // Now accumulate the partial results to get the full (xtx, xtb)
380            let (mut xtx, mut xtb) = partial_results.pop().unwrap_or_else(|| {
381                (
382                    DMatrix::<T>::zeros(k, k), // No data, zero matrix
383                    DVector::<T>::zeros(k),    // No data, zero vector
384                )
385            });
386
387            // We use kahan summation here to reduce numerical error
388            let mut xtx_c = DMatrix::<T>::zeros(k, k);
389            let mut xtb_c = DVector::<T>::zeros(k);
390            for (part_xtx, part_xtb) in partial_results {
391                for i in 0..k {
392                    let y = part_xtb[i] - xtb_c[i];
393                    let t = xtb[i] + y;
394                    xtb_c[i] = (t - xtb[i]) - y;
395                    xtb[i] = t;
396
397                    for j in 0..k {
398                        let y = part_xtx[(i, j)] - xtx_c[(i, j)];
399                        let t = xtx[(i, j)] + y;
400                        xtx_c[(i, j)] = (t - xtx[(i, j)]) - y;
401                        xtx[(i, j)] = t;
402                    }
403                }
404            }
405
406            // We also use Tikhonov regularization to improve stability
407            let tikhonov = tikhonov.unwrap_or(T::zero());
408            let identity = DMatrix::<T>::identity(k, k) * tikhonov;
409            xtx += identity;
410
411            (xtx, xtb, true)
412        }
413    }
414
415    /// Checks if the data is stable for fitting with normal eq mode with the given basis and degree.
416    /// It will also return a tikhonov number based on eigenvalues.
417    ///
418    /// Some(tikhonov) if stable, None if not stable
419    #[cfg(feature = "parallel")]
420    fn is_data_stable(data: &[(T, T)], basis: &B, k: usize) -> Option<T> {
421        if data.len() < Self::MIN_ROWS_TO_PARALLEL {
422            return None; // Not enough data to bother
423        }
424
425        // First we sample ~1% of the data
426        let stride = (data.len() / 100).max(1);
427        let sample: Vec<_> = data.iter().step_by(stride).map(|(x, y)| (*x, *y)).collect();
428
429        // Build the basis matrix for the sample
430        let bigx = Self::create_bigx(&sample, basis, k);
431        let xtx = &bigx.transpose() * bigx;
432
433        // Eigen
434        let eigen = xtx.complex_eigenvalues().map(nalgebra::ComplexField::real);
435        let max_eigen = eigen.max();
436        let min_eigen = eigen.min();
437
438        // Get the condition number, and our threshold for stability
439        let condition_number = (max_eigen / min_eigen).sqrt();
440        let digits_to_keep = T::try_cast(Self::MAX_STABLE_DIGITS).ok()?;
441        let ten = T::try_cast(10).ok()?;
442        let threshold = T::one() / T::epsilon() * ten.powf(-T::one() * digits_to_keep);
443
444        if condition_number >= threshold {
445            return None; // Not stable
446        }
447
448        // We use the mean diagonal trace as the base for tikhonov
449        let mean_trace = xtx.trace() / T::try_cast(k).ok()?;
450        Some(mean_trace * T::epsilon())
451    }
452
453    /// Reduce the n by k / 1 by n into a k by k and k by 1 system.
454    #[cfg(feature = "parallel")]
455    fn invert_matrix(matrix: &DMatrix<T>, b: &DVector<T>) -> (DMatrix<T>, DVector<T>) {
456        let xtx = matrix.transpose() * matrix;
457        let xtb = matrix.transpose() * b;
458        (xtx, xtb)
459    }
460
461    /// Solves the linear system using SVD.
462    fn solve_matrix(xtx: DMatrix<T>, xtb: &DVector<T>) -> Result<Vec<T>> {
463        const SVD_ITER_LIMIT: (usize, usize) = (1000, 10_000);
464
465        let size = xtx.shape();
466
467        // This looks arbitrary, but this is what nalgebra uses internally for its default epsilon
468        // It isn't documented, but I assume it's based on empirical testing.
469        // It also matches their default behavior which is a plus here
470        let svd_eps = T::RealField::default_epsilon() * nalgebra::convert(5.0);
471
472        // Now let's get a count for how many iterations to allow
473        // We can't go by time, it is by iterations
474        // A good rule of thumb for iterations in ill-conditioned problems max(rows, cols)^2 I decided
475        // Basically - if it hasn't converged by ~10k... it probably won't
476        // And this mostly can't happen for orthogonal bases, so unless you use monomial or log basis...
477        let max_dim = size.0.max(size.1);
478        let iters = max_dim
479            .saturating_mul(max_dim)
480            .clamp(SVD_ITER_LIMIT.0, SVD_ITER_LIMIT.1);
481
482        //
483        // Calculate the singular value decomposition of the matrix
484        let decomp =
485            SVD::try_new_unordered(xtx, true, true, svd_eps, iters).ok_or(Error::DidNotConverge)?;
486        // Calculate epsilon value
487        // ~= machine_epsilon * max(size) * max_singular
488        let machine_epsilon = T::epsilon();
489        let max_size = size.0.max(size.1);
490        let sigma_max = decomp.singular_values.max();
491        let epsilon = machine_epsilon * T::try_cast(max_size)? * sigma_max;
492
493        // Solve for X in `SVD * X = b`
494        let big_x = decomp.solve(xtb, epsilon).map_err(Error::Algebra)?;
495        let coefficients: Vec<_> = big_x.data.into();
496
497        // Make sure the coefficients are valid
498        if coefficients.iter().any(|c| c.is_nan()) {
499            return Err(Error::Algebra("NaN in coefficients"));
500        }
501
502        Ok(coefficients)
503    }
504
505    /// Creates a new polynomial curve fit from raw components.
506    fn from_raw(
507        data: Cow<'data, [(T, T)]>,
508        x_range: RangeInclusive<T>,
509        basis: B,
510        coefs: Vec<T>,
511        degree: usize,
512    ) -> Result<Self> {
513        let k = T::try_cast(coefs.len())?;
514        let function = unsafe { Polynomial::from_raw(basis, coefs.into(), degree) }; // Safety: The coefs were generated by the basis
515
516        Ok(Self {
517            data,
518            x_range,
519            function,
520            k,
521        })
522    }
523
524    /// Returns an owned version of this curve fit, with a full copy of the data.
525    #[must_use]
526    pub fn to_owned(&self) -> CurveFit<'static, B, T> {
527        CurveFit {
528            data: Cow::Owned(self.data.to_vec()),
529            x_range: self.x_range.clone(),
530            function: self.function.clone(),
531            k: self.k,
532        }
533    }
534
535    /// Creates a new polynomial curve fit for the given data and degree.
536    ///
537    /// You can also use [`CurveFit::new_auto`] to automatically select the best degree.
538    ///
539    /// This constructor fits a polynomial to the provided `(x, y)` points using
540    /// the chosen basis type `B`. For most users, `B` will be a Chebyshev or
541    /// Monomial basis.
542    ///
543    /// # Parameters
544    /// - `data`: Slice of `(x, y)` points to fit.
545    /// - `degree`: Desired polynomial degree.
546    ///
547    /// # Returns
548    /// Returns `Ok(Self)` if the fit succeeds.
549    ///
550    /// # Errors
551    /// Returns an [`Error`] in the following cases:
552    /// - `Error::NoData`: `data` is empty.
553    /// - `Error::DegreeTooHigh`: `degree >= data.len()`.
554    /// - `Error::Algebra`: the linear system could not be solved.
555    /// - `Error::CastFailed`: a numeric value could not be cast to the target type.
556    ///
557    /// # Behavior
558    /// - Builds the basis matrix internally and fills each row using
559    ///   [`Basis::fill_matrix_row`].
560    /// - Computes `x_range` as the inclusive range of `x` values in the data.
561    /// - Solves the linear system `A * x = b` to determine polynomial coefficients.
562    /// - Uses SVD to solve the system for numerical stability.
563    ///   - Will limit iterations to between 1,000 and 10,000 based on problem size.
564    ///   - Prevents infinite loops by capping iterations.
565    ///   - And if you have not converged by 10,000 iterations, you probably won't.
566    ///   - If you hit that, use an orthogonal basis like Chebyshev or Legendre for better stability.
567    ///
568    /// # Warning
569    /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
570    /// The normal equation is used to reduce the system size only if the data is stable (see below).
571    ///
572    /// <div class="warning">
573    /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
574    /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
575    /// where `digits` is the number of significant digits for type `T`.
576    ///
577    /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
578    /// Tikhonov regularization to improve stability.
579    /// </div>
580    ///
581    /// # Example
582    /// ```
583    /// # use polyfit::ChebyshevFit;
584    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
585    /// let fit = ChebyshevFit::new(data, 2).unwrap();
586    /// println!("Coefficients: {:?}", fit.coefficients());
587    /// ```
588    pub fn new(data: impl Into<Cow<'data, [(T, T)]>>, degree: usize) -> Result<Self> {
589        let data: Cow<_> = data.into();
590
591        // Cannot fit a polynomial of degree 0 or if there is no data.
592        if data.is_empty() {
593            return Err(Error::NoData);
594        } else if degree >= data.len() {
595            return Err(Error::DegreeTooHigh(degree));
596        }
597
598        let x_range = data.x_range().ok_or(Error::NoData)?;
599        let basis = B::from_range(x_range.clone());
600        let k = basis.k(degree);
601
602        let (m, b, _) = Self::create_parallel_matrix(&data, &basis, k);
603        let coefs = Self::solve_matrix(m, &b)?;
604        Self::from_raw(data, x_range, basis, coefs, degree)
605    }
606
607    /// Creates a new weighted polynomial curve fit using Gauss nodes.
608    ///
609    /// This constructor fits a polynomial to the provided sampling function
610    /// evaluated at Gauss nodes, using weights associated with those nodes.
611    ///
612    /// # Parameters
613    /// - `sample_fn`: Function that takes an `x` value and returns the corresponding `y` value.
614    /// - `x_range`: Inclusive range of `x` values to consider for fitting.
615    /// - `degree`: Desired polynomial degree.
616    ///
617    /// # Returns
618    /// Returns `Ok(Self)` if the fit succeeds.
619    ///
620    /// # Errors
621    /// Returns an [`Error`] in the following cases:
622    /// - `Error::DegreeTooHigh`: `degree` is invalid for the chosen basis.
623    /// - `Error::Algebra`: the linear system could not be solved.
624    /// - `Error::CastFailed`: a numeric value could not be cast to the target type.
625    pub fn new_weighted<F>(sample_fn: F, x_range: RangeInclusive<T>, degree: usize) -> Result<Self>
626    where
627        B: OrthogonalBasis<T>,
628        F: Fn(T) -> T,
629    {
630        let basis = B::from_range(x_range.clone());
631        let k = basis.k(degree);
632        let mut data = Vec::with_capacity(k);
633        let nodes = basis.gauss_nodes(k);
634
635        for (x, _) in &nodes {
636            // Gauss nodes are in function-space (normalized), need to denormalize to data-space
637            let x = basis.denormalize_x(*x);
638            let y = sample_fn(x);
639            data.push((x, y));
640        }
641
642        let mut bigx = Self::create_bigx(&data, &basis, k);
643        let mut b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
644        for (i, (mut row, (_, w))) in bigx.row_iter_mut().zip(nodes).enumerate() {
645            let w_sqrt = w.sqrt();
646            for j in 0..k {
647                row[j] *= w_sqrt; // scale basis row
648            }
649            b[i] *= w_sqrt; // scale target
650        }
651
652        let coefs = Self::solve_matrix(bigx, &b)?;
653        Self::from_raw(Cow::Owned(data), x_range, basis, coefs, degree)
654    }
655
656    /// Automatically selects the best polynomial degree and creates a curve fit.
657    ///
658    /// This function fits polynomials of increasing degree to the provided dataset
659    /// and selects the “best” degree according to the specified scoring method.
660    ///
661    /// # Parameters
662    /// - `data`: Slice of `(x, y)` points to fit.
663    /// - `method`: [`crate::score`] to evaluate model quality.  
664    ///   - `AIC`: Akaike Information Criterion (uses `AICc` if `n/k < 4`)  
665    ///   - `BIC`: Bayesian Information Criterion
666    ///
667    /// # Choosing a scoring method
668    /// - Consider the size of your dataset: If you have a small dataset, prefer `AIC` as it penalizes complexity more gently.
669    /// - If your dataset is large, `BIC` may be more appropriate as it imposes a harsher penalty on complexity.
670    ///
671    /// # Returns
672    /// Returns `Ok(Self)` with the fit at the optimal degree.
673    ///
674    /// # Errors
675    /// Returns [`Error`] if:
676    /// - `data` is empty (`Error::NoData`)  
677    /// - A numeric value cannot be represented in the target type (`Error::CastFailed`)
678    ///
679    /// # Behavior
680    /// - Starts with degree 0 and iteratively fits higher degrees up to `data.len() - 1`.
681    /// - Evaluates each fit using `model_score(method)`.
682    /// - Selects best model, where the score is within 2 of the minimum score
683    ///   - As per Burnham and Anderson, models within Δ ≤ 2 are statistically indistinguishable from the best model.
684    /// - Uses SVD to solve the system for numerical stability.
685    ///   - Will limit iterations to between 1,000 and 10,000 based on problem size.
686    ///   - Prevents infinite loops by capping iterations.
687    ///   - And if you have not converged by 10,000 iterations, you probably won't.
688    ///   - If you hit that, use an orthogonal basis like Chebyshev or Legendre for better stability.
689    ///
690    /// # Warning
691    /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
692    /// The normal equation is used to reduce the system size only if the data is stable (see below).
693    ///
694    /// <div class="warning">
695    /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
696    /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
697    ///
698    /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
699    /// Tikhonov regularization to improve stability.
700    /// </div>
701    ///
702    /// # Example
703    /// ```
704    /// # use polyfit::{ChebyshevFit, statistics::DegreeBound, score::Aic};
705    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
706    /// let fit = ChebyshevFit::new_auto(data, DegreeBound::Relaxed, &Aic).unwrap();
707    /// println!("Selected degree: {}", fit.degree());
708    /// ```
709    pub fn new_auto(
710        data: impl Into<Cow<'data, [(T, T)]>>,
711        max_degree: impl Into<DegreeBound>,
712        method: &impl ModelScoreProvider,
713    ) -> Result<Self> {
714        let data: Cow<_> = data.into();
715        let max_degree = max_degree.into().max_degree(data.len());
716        if data.is_empty() {
717            return Err(Error::NoData);
718        }
719
720        // Step 1 - Create the basis and matrix once
721        let x_range = data.x_range().ok_or(Error::NoData)?;
722        let basis = B::from_range(x_range.clone());
723        let max_k = basis.k(max_degree);
724        let (xtx, xtb, normal_eq) = Self::create_parallel_matrix(&data, &basis, max_k);
725
726        #[cfg(not(feature = "parallel"))]
727        let (min_score, model_scores) = {
728            // Step 2 - Build models using increasingly narrow slices of the matrix
729            let mut min_score = T::infinity();
730            let mut model_scores: Vec<(Self, T)> = Vec::with_capacity(max_degree + 1);
731            for degree in 0..=max_degree {
732                let k = basis.k(degree);
733
734                let height = if normal_eq { k } else { xtx.nrows() };
735                let m = xtx.view((0, 0), (height, k)).into_owned();
736                let Ok(coefs) = Self::solve_matrix(m, &xtb) else {
737                    continue;
738                };
739
740                let Ok(model) =
741                    Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
742                else {
743                    continue;
744                };
745
746                let score = model.model_score(method);
747                model_scores.push((model, score));
748                if score < min_score {
749                    min_score = score;
750                }
751            }
752
753            (min_score, model_scores)
754        };
755
756        #[cfg(feature = "parallel")]
757        let (min_score, model_scores) = {
758            use rayon::prelude::*;
759
760            let mut model_scores: Vec<(Self, T)> = (0..=max_degree)
761                .into_par_iter()
762                .filter_map(|degree| {
763                    let k = basis.k(degree);
764
765                    let height = if normal_eq { k } else { xtx.nrows() };
766                    let m = xtx.view((0, 0), (height, k)).into_owned();
767                    let coefs = Self::solve_matrix(m, &xtb).ok()?;
768
769                    let model =
770                        Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
771                            .ok()?;
772
773                    let score = model.model_score(method);
774                    Some((model, score))
775                })
776                .collect();
777
778            // Sort by degree ascending
779            model_scores.sort_by_key(|(m, _)| m.degree());
780
781            let min_score = model_scores
782                .iter()
783                .map(|(_, score)| *score)
784                .fold(T::infinity(), nalgebra::RealField::min);
785
786            (min_score, model_scores)
787        };
788
789        // Step 3 - get delta_score
790        // Re: Burnham and Anderson, use the first delta <=2 (P = 0.37)
791        // Statistically indistinguishable from the top model
792        for (model, score) in model_scores {
793            let delta = score - min_score;
794            if delta <= T::two() {
795                return Ok(model);
796            }
797        }
798
799        Err(Error::NoModel)
800    }
801
802    /// Creates a new polynomial curve fit using K-fold cross-validation to select the best degree.
803    ///
804    /// This function splits the dataset into `folds` subsets, using each subset as a validation set while training on the remaining data.
805    /// It evaluates polynomial fits of increasing degree and selects the best degree based on the specified scoring method.
806    ///
807    /// This method helps prevent overfitting by ensuring that the selected model generalizes well to unseen data, and is particularly useful for small datasets
808    /// or those with very extreme outliers.
809    ///
810    /// # Parameters
811    /// - `data`: Slice of `(x, y)` points to fit.
812    /// - `strategy`: Cross-validation strategy defining the number of folds.
813    /// - `max_degree`: Maximum polynomial degree to consider.
814    /// - `method`: [`ModelScoreProvider`] to evaluate model quality.
815    ///
816    /// # Choosing a scoring method
817    /// - Consider the size of your dataset: If you have a small dataset, prefer `AIC` as it penalizes complexity more gently.
818    /// - If your dataset is large, `BIC` may be more appropriate as it imposes a harsher penalty on complexity.
819    ///
820    /// # Choosing number of folds (strategy)
821    /// The number of folds (k) determines how many subsets the data is split into for cross-validation. The choice of k can impact the `bias-variance trade-off`:
822    /// - Bias is how much your model's predictions differ from the actual values on average
823    /// - Variance is how much your model's predictions change when you use different training data
824    ///
825    /// - Choose [`CvStrategy::MinimizeBias`] (e.g., k=5) with larger datasets to reduce bias. Prevents a model from being too simple to capture data patterns
826    /// - Choose [`CvStrategy::MinimizeVariance`] (e.g., k=10) with smaller datasets to reduce variance. Helps ensure the model generalizes well to unseen data.
827    /// - Choose [`CvStrategy::Balanced`] (e.g., k=7) for a compromise between bias and variance.
828    /// - Choose [`CvStrategy::LeaveOneOut`] (k=n) for very small datasets, but be aware of the high computational cost.
829    /// - Choose [`CvStrategy::Custom(k)`] to specify a custom number of folds, ensuring k >= 2 and k <= n.
830    ///
831    /// # Returns
832    /// Returns `Ok(Self)` with the fit at the optimal degree.
833    ///
834    /// # Errors
835    /// Returns [`Error`] if:
836    /// - `data` is empty or `folds < 2` (`Error::NoData`)  
837    /// - A numeric value cannot be represented in the target type (`Error::CastFailed`)
838    /// - No valid model could be fitted (`Error::NoModel`)
839    ///
840    /// # Warning
841    /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
842    /// The normal equation is used to reduce the system size only if the data is stable (see below).
843    ///
844    /// <div class="warning">
845    /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
846    /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
847    ///
848    /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
849    /// Tikhonov regularization to improve stability.
850    /// </div>
851    ///
852    /// # Example
853    /// ```
854    /// # use polyfit::{ChebyshevFit, statistics::{CvStrategy, DegreeBound}, score::Aic};
855    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
856    /// let fit = ChebyshevFit::new_kfold_cross_validated(data, CvStrategy::LeaveOneOut, DegreeBound::Relaxed, &Aic).unwrap();
857    /// println!("Selected degree: {}", fit.degree());
858    /// ```
859    #[expect(
860        clippy::many_single_char_names,
861        reason = "It's math what do you want from me"
862    )]
863    pub fn new_kfold_cross_validated(
864        data: impl Into<Cow<'data, [(T, T)]>>,
865        strategy: statistics::CvStrategy,
866        max_degree: impl Into<DegreeBound>,
867        method: &impl ModelScoreProvider,
868    ) -> Result<Self> {
869        let data: Cow<_> = data.into();
870        let folds = strategy.k(data.len());
871        let fold_size = data.len() / folds;
872        let max_degree = max_degree.into().max_degree(data.len());
873        if data.is_empty() || folds < 2 || data.len() < folds {
874            return Err(Error::NoData);
875        }
876
877        // Step 1 - Create the basis and matrix once
878        let x_range = data.x_range().ok_or(Error::NoData)?;
879        let basis = B::from_range(x_range.clone());
880        let k = basis.k(max_degree);
881        let (m, b) = Self::create_matrix(data.as_ref(), &basis, k);
882
883        // Step 2 - Precalculate fold boundaries
884        let mut fold_ranges = Vec::with_capacity(folds);
885        for i in 0..folds {
886            let start = i * fold_size;
887            let end = if i == folds - 1 {
888                data.len()
889            } else {
890                (i + 1) * fold_size
891            };
892            fold_ranges.push(start..end);
893        }
894
895        // Step 3 - Use `folds` views into m and b for training and test sets
896        // We do this for each degree candidate, and each fold
897        let mut min_score = T::infinity();
898        let mut candidates = Vec::with_capacity(max_degree + 1);
899        for degree in 0..=max_degree {
900            let k = basis.k(degree);
901            let m = m.view((0, 0), (m.nrows(), k)).into_owned();
902
903            // Evaluate this degree with K-fold cross-validation
904            let mut mean_score = T::zero();
905            let mut n = T::zero();
906            for i in 0..folds {
907                let test_range = &fold_ranges[i];
908                let test_data = &data[test_range.clone()];
909
910                let mut fold_m = DMatrix::zeros(data.len() - test_range.len(), k);
911                let fold_b = DVector::from_iterator(
912                    data.len() - test_data.len(),
913                    b.iter().enumerate().filter_map(|(j, &y)| {
914                        if test_range.contains(&j) {
915                            None
916                        } else {
917                            Some(y)
918                        }
919                    }),
920                );
921
922                // Copy relevent rows from m into fold_m
923                for (i, src) in m.row_iter().enumerate() {
924                    if !test_range.contains(&i) {
925                        let dst_index = if i < test_range.start {
926                            i
927                        } else {
928                            i - test_range.len()
929                        };
930                        let mut dst = fold_m.row_mut(dst_index);
931                        dst.copy_from(&src);
932                    }
933                }
934
935                let Ok(coefs) = Self::solve_matrix(fold_m, &fold_b) else {
936                    continue;
937                };
938
939                let Ok(model) =
940                    Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
941                else {
942                    continue;
943                };
944
945                let y = test_data.y_iter();
946                let predicted = model.as_polynomial().solve(test_data.x_iter());
947                let y_fit = predicted.y_iter();
948                mean_score += method.score(y, y_fit, model.k);
949                n += T::one();
950            }
951
952            mean_score /= n;
953            candidates.push((degree, mean_score));
954            if mean_score < min_score {
955                min_score = mean_score;
956            }
957        }
958
959        // Step 4 - Select the best model within 2 AIC units of the minimum (Burnham and Anderson 2002)
960        for (degree, score) in candidates {
961            let delta = score - min_score;
962            if delta <= T::two() {
963                return Self::new(data, degree);
964            }
965        }
966
967        Err(Error::NoModel)
968    }
969
970    /// Prunes coefficients that are statistically insignificant based on a t-test.
971    ///
972    /// # Parameters
973    /// - `confidence`: Confidence level for determining significance (e.g., P95, P99)
974    ///
975    /// # Errors
976    /// Returns an error if the covariance matrix cannot be computed.
977    ///
978    /// # Returns
979    /// A vector of `(index, coefficient)` for all pruned coefficients.
980    ///
981    /// # Notes
982    /// - Modifies `self` in-place, zeroing out insignificant coefficients.
983    /// - Uses the standard errors derived from the covariance matrix.
984    /// - Ignores coefficients whose absolute value is smaller than `T::epsilon()`.
985    pub fn prune_insignificant(&mut self, confidence: Confidence) -> Result<Vec<(usize, T)>> {
986        let covariance = self.covariance()?;
987        let se = covariance.coefficient_standard_errors();
988        let coeffs = self.coefficients();
989
990        let df = self.data().len().saturating_sub(coeffs.len());
991        let t_crit = confidence.t_score(df);
992        let t_crit = T::try_cast(t_crit)?;
993
994        let mut insignificant = Vec::new();
995        for (i, (&c, s)) in coeffs.iter().zip(se).enumerate() {
996            let t = Value::abs(c) / s;
997            if t < t_crit && c > T::epsilon() {
998                insignificant.push((i, c));
999            }
1000        }
1001
1002        let coefs_mut = self.function.coefficients_mut();
1003        for (i, _) in &insignificant {
1004            coefs_mut[*i] = T::zero();
1005        }
1006
1007        Ok(insignificant)
1008    }
1009
1010    /// Returns a reference to the basis function.
1011    pub(crate) fn basis(&self) -> &B {
1012        self.function.basis()
1013    }
1014
1015    /// Computes the covariance matrix and related statistics for this curve fit.
1016    ///
1017    /// The returned [`CurveFitCovariance`] provides:
1018    /// - Covariance matrix of the fitted coefficients.
1019    /// - Standard errors of the coefficients.
1020    /// - Prediction variance at a specific `x`.
1021    /// - Confidence intervals for predictions.
1022    ///
1023    /// # Returns
1024    /// - `Ok(CurveFitCovariance<'_, B, T>)` on success.
1025    ///
1026    /// # Errors
1027    /// Returns `Err(Error::Algebra)` if `(Xᵀ X)` is singular or nearly singular,
1028    /// i.e., the pseudo-inverse cannot be computed. Causes include too few data points
1029    /// relative to parameters or collinear/linearly dependent basis functions.
1030    ///
1031    /// # Example
1032    /// ```rust
1033    /// # use polyfit::statistics::Confidence;
1034    /// # use polyfit::MonomialFit;
1035    /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1036    /// let cov = model.covariance().unwrap();
1037    /// let se = cov.coefficient_standard_errors();
1038    /// let band = cov.confidence_band(1.0, Confidence::P95, None).unwrap();
1039    /// println!("Predicted CI at x=1: {} - {}", band.min(), band.max());
1040    /// ```
1041    pub fn covariance(&self) -> Result<CurveFitCovariance<'_, '_, B, T>> {
1042        CurveFitCovariance::new(self)
1043    }
1044
1045    /// Finds the critical points (where the derivative is zero) of a polynomial in this basis.
1046    ///
1047    /// This corresponds to the polynomial's local minima and maxima (The `x` values where curvature changes).
1048    ///
1049    /// <div class="warning">
1050    ///
1051    /// **Technical Details**
1052    ///
1053    /// The critical points are found by solving the equation `f'(x) = 0`, where `f'(x)` is the derivative of the polynomial.
1054    ///
1055    /// This is done with by finding the eigenvalues of the companion matrix of the derivative polynomial.
1056    /// </div>
1057    ///
1058    /// # Returns
1059    /// A vector of `x` values where the critical points occur.
1060    ///
1061    /// # Requirements
1062    /// - The polynomial's basis `B` must implement [`DifferentialBasis`].
1063    ///
1064    /// # Errors
1065    /// Returns an error if the critical points cannot be found.
1066    ///
1067    /// # Example
1068    /// ```rust
1069    /// # use polyfit::MonomialPolynomial;
1070    /// # use polyfit::statistics::Confidence;
1071    /// # use polyfit::MonomialFit;
1072    /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1073    /// let critical_points = model.critical_points().unwrap();
1074    /// ```
1075    pub fn critical_points(&self) -> Result<Vec<CriticalPoint<T>>>
1076    where
1077        B: DifferentialBasis<T>,
1078        B::B2: DifferentialBasis<T>,
1079    {
1080        let points = self.function.critical_points(self.x_range.clone())?;
1081        Ok(points)
1082    }
1083
1084    /// Computes the definite integral (area under the curve) of the fitted polynomial
1085    /// between `x_min` and `x_max`.
1086    ///
1087    /// <div class="warning">
1088    ///
1089    /// **Technical Details**
1090    ///
1091    /// The area under the curve is computed using the definite integral of the polynomial
1092    /// between the specified bounds:
1093    /// ```math
1094    /// Area = ∫[x_min to x_max] f(x) dx = F(x_max) - F(x_min)
1095    /// ```
1096    /// </div>
1097    ///
1098    /// # Parameters
1099    /// - `x_min`: Lower bound of integration.
1100    /// - `x_max`: Upper bound of integration.
1101    /// - `constant`: Constant of integration (value at x = 0) for the indefinite integral.
1102    ///
1103    /// # Requirements
1104    /// - The polynomial's basis `B` must implement [`IntegralBasis`].
1105    ///
1106    /// # Returns
1107    /// - `Ok(T)`: The computed area under the curve between `x_min` and `x_max`.
1108    /// - `Err`: If computing the integral fails (e.g., basis cannot compute integral coefficients).
1109    ///
1110    /// # Errors
1111    /// If the basis cannot compute the integral coefficients, an error is returned.
1112    ///
1113    /// # Example
1114    /// ```rust
1115    /// # use polyfit::statistics::Confidence;
1116    /// # use polyfit::MonomialFit;
1117    /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1118    /// let area = model.area_under_curve(0.0, 3.0, None).unwrap();
1119    /// println!("Area under curve: {}", area);
1120    /// ```
1121    pub fn area_under_curve(&self, x_min: T, x_max: T, constant: Option<T>) -> Result<T>
1122    where
1123        B: IntegralBasis<T>,
1124    {
1125        self.function.area_under_curve(x_min, x_max, constant)
1126    }
1127
1128    /// Returns the X-values where the function is not monotone (i.e., where the derivative changes sign).
1129    ///
1130    /// # Errors
1131    /// Returns an error if the derivative cannot be computed.
1132    ///
1133    /// # Example
1134    /// ```rust
1135    /// # use polyfit::MonomialFit;
1136    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1137    /// let fit = MonomialFit::new(data, 2).unwrap();
1138    /// let violations = fit.monotonicity_violations().unwrap();
1139    /// ```
1140    pub fn monotonicity_violations(&self) -> Result<Vec<T>>
1141    where
1142        B: DifferentialBasis<T>,
1143    {
1144        self.function.monotonicity_violations(self.x_range.clone())
1145    }
1146
1147    /// Computes the quality score of the polynomial fit using the specified method.
1148    ///
1149    /// This evaluates how well the fitted polynomial represents the data, taking
1150    /// into account both the fit error and model complexity.
1151    ///
1152    /// # Parameters
1153    /// - `method`: [`ModelScoreProvider`] to use for scoring.  
1154    ///   - `AIC`: Akaike Information Criterion (uses `AICc` if `n/k < 4`)  
1155    ///   - `BIC`: Bayesian Information Criterion
1156    ///
1157    /// # Returns
1158    /// The score as a numeric value (`T`). Lower scores indicate better models.
1159    ///
1160    /// # Example
1161    /// ```
1162    /// # use polyfit::{ChebyshevFit, score::Aic};
1163    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1164    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1165    /// let score = fit.model_score(&Aic);
1166    /// println!("Model score: {}", score);
1167    /// ```
1168    pub fn model_score(&self, method: &impl ModelScoreProvider) -> T {
1169        let y = self.data.y_iter();
1170        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1171        method.score(y, y_fit, self.k)
1172    }
1173
1174    /// Computes the residuals of the fit.
1175    ///
1176    /// Residuals are the differences between the observed `y` values and the predicted `y` values from the fitted polynomial.
1177    /// They provide insight into the fit quality and can be used for diagnostic purposes.
1178    ///
1179    /// <div class="warning">
1180    ///
1181    /// **Technical Details**
1182    ///
1183    /// ```math
1184    /// residual_i = y_i - f(x_i)
1185    /// where
1186    ///   y_i = observed value, f(x_i) = predicted value from the polynomial at x_i
1187    /// ```
1188    /// </div>
1189    ///
1190    /// # Returns
1191    /// A vector of residuals, where each element corresponds to a data point.
1192    ///
1193    pub fn residuals(&self) -> Vec<(T, T)> {
1194        let y = self.data.y_iter();
1195        y.zip(self.solution())
1196            .map(|(y, (x, y_fit))| (x, y - y_fit))
1197            .collect()
1198    }
1199
1200    /// Computes the residuals of the fit, filtering out small residuals likely due to floating point noise.
1201    ///
1202    /// This form can help minimize the impact of floating point precision and rounding.
1203    ///
1204    /// <div class="warning">
1205    ///
1206    /// **Technical Details**
1207    ///
1208    /// ```math
1209    /// max_val = max(|y_i|, |f(x_i)|, 1)
1210    /// epsilon = machine_epsilon * sqrt(n) * max_val
1211    /// residual_i = y_i - f(x_i) if |y_i - f(x_i)| >= epsilon else 0
1212    /// where
1213    ///   y_i = observed value, f(x_i) = predicted value from the polynomial at x_i, n = number of data points
1214    /// ```
1215    /// </div>
1216    ///
1217    /// # Returns
1218    /// A vector of scaled residuals, where each element corresponds to a data point.
1219    pub fn filtered_residuals(&self) -> Vec<(T, T)> {
1220        // Get max(|y|, |y_fit|, 1)
1221        let max_val = self
1222            .data
1223            .iter()
1224            .chain(self.solution().iter())
1225            .map(|(_, y)| y.abs())
1226            .fold(T::zero(), nalgebra::RealField::max);
1227        let max_val = nalgebra::RealField::max(max_val, T::one());
1228
1229        // Residual epsilon
1230        let root_n = T::from_positive_int(self.data.len()).sqrt();
1231        let epsilon = T::epsilon() * root_n * max_val;
1232
1233        let y = self.data.y_iter();
1234        y.zip(self.solution())
1235            .filter_map(|(y, (x, y_fit))| {
1236                let r = y - y_fit;
1237                let r = if nalgebra::ComplexField::abs(r) < epsilon {
1238                    None?
1239                } else {
1240                    r
1241                };
1242                Some((x, r))
1243            })
1244            .collect()
1245    }
1246
1247    /// Computes the residual variance of the model's predictions.
1248    ///
1249    /// See [`statistics::residual_variance`].
1250    ///
1251    /// Residual variance is the unbiased estimate of the variance of the
1252    /// errors (σ²) after fitting a model. It's used for confidence intervals
1253    /// and covariance estimates of the fitted parameters.
1254    pub fn residual_variance(&self) -> T {
1255        let y = self.data.y_iter();
1256        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1257        statistics::residual_variance(y, y_fit, self.k)
1258    }
1259
1260    /// Computes the mean squared error (MSE) of this fit against its source data.
1261    ///
1262    /// See [`statistics::mean_squared_error`].
1263    ///
1264    /// MSE measures the average squared difference between the observed and predicted values.
1265    /// Lower values indicate a better fit.
1266    pub fn mean_squared_error(&self) -> T {
1267        let y = self.data.y_iter();
1268        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1269        statistics::mean_squared_error(y, y_fit)
1270    }
1271
1272    /// Computes the root mean squared error (RMSE) of this fit against its source data.
1273    ///
1274    /// See [`statistics::root_mean_squared_error`].
1275    ///
1276    /// RMSE is the square root of the MSE, giving error in the same units as the original data.
1277    /// Lower values indicate a closer fit.
1278    pub fn root_mean_squared_error(&self) -> T {
1279        let y = self.data.y_iter();
1280        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1281        statistics::root_mean_squared_error(y, y_fit)
1282    }
1283
1284    /// Computes the mean absolute error (MAE) of this fit against its source data.
1285    ///
1286    /// See [`statistics::mean_absolute_error`].
1287    ///
1288    /// MAE measures the average absolute difference between observed and predicted values.
1289    /// Lower values indicate a better fit.
1290    pub fn mean_absolute_error(&self) -> T {
1291        let y = self.data.y_iter();
1292        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1293        statistics::mean_absolute_error(y, y_fit)
1294    }
1295
1296    /// Calculates the R-squared value for the model compared to provided data.
1297    ///
1298    /// If not provided, uses the original data used to create the fit.
1299    ///
1300    /// That way you can test the fit against a different dataset if desired.
1301    ///
1302    /// R-squared is a statistical measure of how well the polynomial explains
1303    /// the variance in the data. Values closer to 1 indicate a better fit.
1304    ///
1305    /// # Parameters
1306    /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1307    ///
1308    /// See [`statistics::r_squared`] for more details.
1309    ///
1310    /// # Returns
1311    /// The R-squared value as type `T`.
1312    ///
1313    /// # Example
1314    /// ```
1315    /// # use polyfit::{ChebyshevFit, CurveFit};
1316    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1317    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1318    /// let r2 = fit.r_squared(None);
1319    /// println!("R² = {}", r2);
1320    /// ```
1321    pub fn r_squared(&self, data: Option<&[(T, T)]>) -> T {
1322        let data = data.unwrap_or(self.data());
1323
1324        let y = data.iter().map(|&(_, y)| y);
1325        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1326
1327        statistics::r_squared(y, y_fit)
1328    }
1329
1330    /// Uses huber loss to compute a robust R-squared value.
1331    /// - More robust to outliers than traditional R².
1332    /// - Values closer to 1 indicate a better fit.
1333    ///
1334    /// If Data not provided, uses the original data used to create the fit.
1335    ///
1336    /// That way you can test the fit against a different dataset if desired.
1337    ///
1338    /// <div class="warning">
1339    ///
1340    /// **Technical Details**
1341    ///
1342    /// ```math
1343    /// R²_robust = 1 - (Σ huber_loss(y_i - y_fit_i, delta)) / (Σ (y_i - y_fit_i)²)
1344    /// where
1345    ///   huber_loss(r, delta) = { 0.5 * r²                    if |r| ≤ delta
1346    ///                          { delta * (|r| - 0.5 * delta) if |r| > delta
1347    ///  delta = 1.345 * MAD
1348    ///  MAD = median( |y_i - y_fit_i| )
1349    ///  where
1350    ///    y_i = observed values, y_fit_i = predicted values
1351    /// ```
1352    /// </div>
1353    ///
1354    /// # Type Parameters
1355    /// - `T`: A numeric type implementing the `Value` trait.
1356    ///
1357    /// # Parameters
1358    /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1359    ///
1360    /// # Returns
1361    /// The robust R² as a `T`.
1362    ///
1363    /// # Example
1364    /// ```rust
1365    /// # use polyfit::{ChebyshevFit, CurveFit};
1366    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1367    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1368    /// let r2 = fit.robust_r_squared(None);
1369    /// println!("R² = {}", r2);
1370    /// ```
1371    pub fn robust_r_squared(&self, data: Option<&[(T, T)]>) -> T {
1372        let data = data.unwrap_or(self.data());
1373
1374        let y = data.iter().map(|&(_, y)| y);
1375        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1376
1377        statistics::robust_r_squared(y, y_fit)
1378    }
1379
1380    /// Computes the adjusted R-squared value.
1381    ///
1382    /// Adjusted R² accounts for the number of predictors in a model, penalizing
1383    /// overly complex models. Use it to compare models of different degrees.
1384    ///
1385    /// If Data not provided, uses the original data used to create the fit.
1386    ///
1387    /// That way you can test the fit against a different dataset if desired.
1388    ///
1389    /// <div class="warning">
1390    ///
1391    /// **Technical Details**
1392    ///
1393    /// ```math
1394    /// R²_adj = R² - (1 - R²) * (k / (n - k))
1395    /// where
1396    ///   n = number of observations, k = number of model parameters
1397    /// ```
1398    /// [`r_squared`] is used to compute R²
1399    /// </div>
1400    ///
1401    /// # Type Parameters
1402    /// - `T`: A numeric type implementing the `Value` trait.
1403    ///
1404    /// # Parameters
1405    /// - `data`: A slice of `(x, y)` pairs to compare against
1406    ///
1407    /// # Returns
1408    /// The adjusted R² as a `T`.
1409    ///
1410    /// # Example
1411    /// ```rust
1412    /// # use polyfit::{ChebyshevFit, CurveFit};
1413    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1414    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1415    /// let r2 = fit.adjusted_r_squared(None);
1416    /// println!("R² = {}", r2);
1417    /// ```
1418    pub fn adjusted_r_squared(&self, data: Option<&[(T, T)]>) -> T {
1419        let data = data.unwrap_or(self.data());
1420
1421        let y = data.iter().map(|&(_, y)| y);
1422        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1423
1424        statistics::adjusted_r_squared(y, y_fit, self.k)
1425    }
1426
1427    /// Calculates the R-squared value for the model compared to provided function.
1428    ///
1429    /// R-squared is a statistical measure of how well the polynomial explains
1430    /// the variance in the data. Values closer to 1 indicate a better fit.
1431    ///
1432    /// # Parameters
1433    /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1434    ///
1435    /// See [`statistics::r_squared`] for more details.
1436    ///
1437    /// # Returns
1438    /// The R-squared value as type `T`.
1439    ///
1440    /// # Example
1441    /// ```
1442    /// # use polyfit::{ChebyshevFit, MonomialPolynomial};
1443    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1444    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1445    /// let target = MonomialPolynomial::borrowed(&[1.0, 2.0, 1.0]);
1446    /// let r2 = fit.r_squared_against(&target);
1447    /// println!("R² vs target polynomial = {}", r2);
1448    /// ```
1449    pub fn r_squared_against<C>(&self, function: &Polynomial<C, T>) -> T
1450    where
1451        C: Basis<T>,
1452        C: PolynomialDisplay<T>,
1453    {
1454        let data: Vec<_> = self
1455            .data()
1456            .iter()
1457            .map(|&(x, _)| (x, function.y(x)))
1458            .collect();
1459        self.r_squared(Some(&data))
1460    }
1461
1462    /// Computes the folded root mean squared error (RMSE) with uncertainty estimation.
1463    ///
1464    /// This is a measure of how far the predicted values are from the actual values, on average.
1465    ///
1466    /// Doing it over K-fold cross-validation (Splitting the data into K subsets, and leaving one out for testing each time)
1467    /// tells us how the model performance changes as the data varies.
1468    ///
1469    /// When to use each strategy:
1470    /// - `MinimizeBias`: When the dataset is small and you want to avoid underfitting. Prevents a model from being too simple to capture data patterns.
1471    /// - `MinimizeVariance`: When the dataset is large and you want to avoid overfitting. Helps ensure the model generalizes well to unseen data.
1472    /// - `Balanced`: When you want a good trade-off between bias and variance, suitable for moderately sized datasets or when unsure.
1473    /// - `Custom`: Specify your own number of folds (k) based on domain knowledge or specific requirements. Use with caution
1474    ///
1475    /// The returned [`statistics::UncertainValue<T>`] includes both the mean RMSE and its uncertainty (standard deviation across folds).
1476    ///
1477    /// You can use [`statistics::UncertainValue::confidence_band`] to get confidence intervals for the RMSE.
1478    ///
1479    /// # Parameters
1480    /// - `strategy`: The cross-validation strategy to use (e.g., K-Fold with K=5).
1481    ///
1482    /// # Returns
1483    /// An [`statistics::UncertainValue<T>`] representing the folded RMSE with uncertainty.
1484    pub fn folded_rmse(&self, strategy: statistics::CvStrategy) -> statistics::UncertainValue<T> {
1485        let y = self.data.y_iter();
1486        let y_fit = self.solution().into_iter().map(|(_, y)| y);
1487        statistics::folded_rmse(y, y_fit, strategy)
1488    }
1489
1490    /// Returns the degree of the polynomial.
1491    ///
1492    /// The number of actual components, or basis functions, in the expression of a degree is defined by the basis.
1493    ///
1494    /// That number is called k. For most basis choices, `k = degree + 1`.
1495    pub fn degree(&self) -> usize {
1496        self.function.degree()
1497    }
1498
1499    /// Returns a reference to the polynomial’s coefficients.
1500    ///
1501    /// The index of each coefficient the jth basis function.
1502    ///
1503    /// For example in a monomial expression `y(x) = 2x^2 - 3x + 1`;
1504    /// coefficients = [1.0, -3.0, 2.0]
1505    ///
1506    /// <div class="warning">
1507    ///
1508    /// **Technical Details**
1509    ///
1510    /// Formally, for each coefficient *j*, and the jth basis function *`B_j(x)`*, the relationship is:
1511    /// ```math
1512    /// y(x) = Σ (c_j * B_j(x))
1513    /// ```
1514    /// </div>
1515    pub fn coefficients(&self) -> &[T] {
1516        self.function.coefficients()
1517    }
1518
1519    /// Returns a reference to the data points used for fitting.
1520    ///
1521    /// Each element is a `(x, y)` tuple representing a data point.
1522    pub fn data(&self) -> &[(T, T)] {
1523        &self.data
1524    }
1525
1526    /// Returns the inclusive range of x-values in the dataset.
1527    pub fn x_range(&self) -> RangeInclusive<T> {
1528        self.x_range.clone()
1529    }
1530
1531    /// Returns the inclusive range of y-values in the dataset.
1532    ///
1533    /// This is computed dynamically from the stored data points. Use sparingly
1534    pub fn y_range(&self) -> RangeInclusive<T> {
1535        let min_y = self
1536            .data
1537            .iter()
1538            .map(|&(_, y)| y)
1539            .fold(T::infinity(), <T as nalgebra::RealField>::min);
1540        let max_y = self
1541            .data
1542            .iter()
1543            .map(|&(_, y)| y)
1544            .fold(T::neg_infinity(), <T as nalgebra::RealField>::max);
1545        min_y..=max_y
1546    }
1547
1548    /// Evaluates the polynomial at a given x-value.
1549    ///
1550    /// <div class="warning">
1551    ///
1552    /// **Technical Details**
1553    ///
1554    /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1555    /// ```math
1556    /// y(x) = Σ (c_j * B_j(x))
1557    /// ```
1558    /// </div>
1559    ///
1560    /// # Parameters
1561    /// - `x`: The point at which to evaluate the polynomial.
1562    ///
1563    /// # Returns
1564    /// The corresponding y-value as `T` if `x` is within the valid range.
1565    ///
1566    /// # Errors
1567    /// Returns [`Error::DataRange`] if `x` is outside the original data bounds.
1568    ///
1569    /// # Notes
1570    /// - Polynomial fits are generally only stable within the x-range used for fitting.
1571    /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`] to get
1572    ///   a pure polynomial function that ignores the original x-range.
1573    ///
1574    /// # Example
1575    /// ```
1576    /// # use polyfit::{ChebyshevFit, CurveFit};
1577    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1578    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1579    /// let y = fit.y(1.0).unwrap();
1580    /// println!("y(1.0) = {}", y);
1581    /// ```
1582    pub fn y(&self, x: T) -> Result<T> {
1583        if !self.x_range.contains(&x) {
1584            return Err(Error::DataRange(
1585                format!("{}", self.x_range.start()),
1586                format!("{}", self.x_range.end()),
1587            ));
1588        }
1589
1590        Ok(self.function.y(x))
1591    }
1592
1593    /// Returns the fitted y-values corresponding to the original x-values.
1594    ///
1595    /// This produces a vector of `(x, y)` pairs for the same x-values used in
1596    /// the source data. It is guaranteed to succeed because all x-values are
1597    /// within the curve's valid range.
1598    ///
1599    /// # Notes
1600    /// - Useful for quickly plotting or analyzing the fitted curve against the
1601    ///   original data points.
1602    /// - The method internally calls [`CurveFit::y`] but is infallible because
1603    ///   it only evaluates x-values within the valid range.
1604    ///
1605    /// # Example
1606    /// ```
1607    /// # use polyfit::{ChebyshevFit, CurveFit};
1608    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1609    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1610    /// let points = fit.solution();
1611    /// for (x, y) in points {
1612    ///     println!("x = {}, y = {}", x, y);
1613    /// }
1614    /// ```
1615    pub fn solution(&self) -> Vec<(T, T)> {
1616        self.data()
1617            .iter()
1618            .map(|&(x, _)| (x, self.function.y(x)))
1619            .collect()
1620    }
1621
1622    /// Evaluates the curve at multiple x-values.
1623    ///
1624    /// <div class="warning">
1625    ///
1626    /// **Technical Details**
1627    ///
1628    /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1629    /// ```math
1630    /// y(x) = Σ (c_j * B_j(x))
1631    /// ```
1632    /// </div>
1633    ///
1634    /// # Parameters
1635    /// - `x`: An iterator of x-values to evaluate.
1636    ///
1637    /// # Returns
1638    /// A vector of `(x, y)` pairs corresponding to each input x-value.
1639    ///
1640    /// # Errors
1641    /// Returns [`Error::DataRange`] if any x-value is outside the original data range.
1642    ///
1643    /// # Notes
1644    /// - Curve fits are generally only stable within the x-range used for fitting.
1645    /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`]
1646    ///   to get a pure polynomial function that ignores the original x-range.
1647    ///
1648    /// # Example
1649    /// ```
1650    /// # use polyfit::{ChebyshevFit, CurveFit};
1651    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1652    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1653    /// let points = fit.solve([0.0, 0.5, 1.0]).unwrap();
1654    /// for (x, y) in points {
1655    ///     println!("x = {}, y = {}", x, y);
1656    /// }
1657    /// ```
1658    pub fn solve(&self, x: impl IntoIterator<Item = T>) -> Result<Vec<(T, T)>> {
1659        x.into_iter().map(|x| Ok((x, self.y(x)?))).collect()
1660    }
1661
1662    /// Evaluates the curve at evenly spaced points over a range.
1663    ///
1664    /// <div class="warning">
1665    ///
1666    /// **Technical Details**
1667    ///
1668    /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1669    /// ```math
1670    /// y(x) = Σ (c_j * B_j(x))
1671    /// ```
1672    /// </div>
1673    ///
1674    /// # Parameters
1675    /// - `range`: The start and end x-values to evaluate.
1676    /// - `step`: The increment between points.
1677    ///
1678    /// # Returns
1679    /// A vector of `(x, y)` pairs corresponding to each x-value in the range.
1680    ///
1681    /// # Errors
1682    /// Returns [`Error::DataRange`] if any x-value is outside the original data range.
1683    ///
1684    /// # Notes
1685    /// - Curve fits are only stable within the x-range used for fitting.
1686    /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`]
1687    ///   to get a pure polynomial function.
1688    ///
1689    /// # Example
1690    /// ```
1691    /// # use polyfit::{ChebyshevFit, CurveFit};
1692    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1693    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1694    /// let points = fit.solve_range(0.0..=2.0, 0.5).unwrap();
1695    /// for (x, y) in points {
1696    ///     println!("x = {}, y = {}", x, y);
1697    /// }
1698    /// ```
1699    pub fn solve_range(&self, range: RangeInclusive<T>, step: T) -> Result<Vec<(T, T)>> {
1700        self.solve(SteppedValues::new(range, step))
1701    }
1702
1703    /// Returns a pure polynomial representation of the curve fit.
1704    ///
1705    /// This allows evaluation of the polynomial at **any x-value**, without
1706    /// restriction to the original data range. Unlike [`CurveFit::y`] or
1707    /// [`CurveFit::solve`], this does not perform range checks, so use with
1708    /// caution outside the fit’s stable region.
1709    ///
1710    /// The [`Polynomial`] form is considered a canonical function, not a fit estimate.
1711    ///
1712    /// # Returns
1713    /// A reference to the [`Polynomial`] that this fit uses internally.
1714    ///
1715    /// # Example
1716    /// ```
1717    /// # use polyfit::{ChebyshevFit, CurveFit};
1718    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1719    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1720    /// let poly = fit.as_polynomial();
1721    /// let y = poly.y(10.0); // can evaluate outside original x-range
1722    /// ```
1723    pub fn as_polynomial(&self) -> &Polynomial<'_, B, T> {
1724        &self.function
1725    }
1726
1727    /// Returns a pure polynomial representation of the curve fit.
1728    ///
1729    /// This is primarily a memory optimization in practice; You become responsible for
1730    /// maintaining the stability around the x-bounds, but the copy of the original data,
1731    /// and the Vandermonde matrix are dropped.
1732    ///
1733    /// This allows evaluation of the polynomial at **any x-value**, without
1734    /// restriction to the original data range. Unlike [`CurveFit::y`] or
1735    /// [`CurveFit::solve`], this does not perform range checks, so use with
1736    /// caution outside the fit’s stable region.
1737    ///
1738    /// The [`Polynomial`] form is considered a canonical function, not a fit estimate.
1739    ///
1740    /// # Returns
1741    /// The [`Polynomial`] that this fit uses internally
1742    ///
1743    /// # Example
1744    /// ```
1745    /// # use polyfit::{ChebyshevFit, CurveFit};
1746    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1747    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1748    /// let poly = fit.as_polynomial();
1749    /// let y = poly.y(10.0); // can evaluate outside original x-range
1750    /// ```
1751    pub fn into_polynomial(self) -> Polynomial<'static, B, T> {
1752        self.function
1753    }
1754
1755    /// Converts the curve fit into a monomial polynomial.
1756    ///
1757    /// This produces a [`MonomialPolynomial`] representation of the curve,
1758    /// which uses the standard monomial basis `1, x, x^2, …`.
1759    ///
1760    /// # Returns
1761    /// A monomial polynomial with owned coefficients.
1762    ///
1763    /// # Errors
1764    /// Returns an error if the current basis cannot be converted to monomial form.
1765    /// This requires that the basis implements [`IntoMonomialBasis`].
1766    ///
1767    /// # Example
1768    /// ```
1769    /// # use polyfit::{ChebyshevFit, CurveFit, MonomialPolynomial};
1770    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1771    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1772    /// let mono_poly = fit.as_monomial().unwrap();
1773    /// let y = mono_poly.y(1.5);
1774    /// ```
1775    pub fn as_monomial(&self) -> Result<MonomialPolynomial<'static, T>>
1776    where
1777        B: IntoMonomialBasis<T>,
1778    {
1779        let mut coefficients = self.coefficients().to_vec();
1780        self.basis().as_monomial(&mut coefficients)?;
1781        Ok(MonomialPolynomial::owned(coefficients))
1782    }
1783
1784    /// Computes the energy contribution of each coefficient in an orthogonal basis.
1785    ///
1786    /// This is a measure of how much each basis function contributes to the resulting polynomial.
1787    ///
1788    /// It can be useful for understanding the significance of each term
1789    ///
1790    /// <div class="warning">
1791    ///
1792    /// **Technical Details**
1793    ///
1794    /// For an orthogonal basis, the energy contribution of each coefficient is calculated as:
1795    /// ```math
1796    /// E_j = c_j^2 * N_j
1797    /// ```
1798    /// where:
1799    /// - `E_j` is the energy contribution of the jth coefficient.
1800    /// - `c_j` is the jth coefficient.
1801    /// - `N_j` is the normalization factor for the jth basis function, provided by the basis.
1802    ///
1803    /// </div>
1804    ///
1805    /// # Returns
1806    /// A vector of energy contributions for each coefficient.
1807    ///
1808    /// # Errors
1809    /// Returns an error if the series is not orthogonal, like with integrated Fourier series.
1810    pub fn coefficient_energies(&self) -> Result<Vec<T>>
1811    where
1812        B: OrthogonalBasis<T>,
1813    {
1814        self.function.coefficient_energies()
1815    }
1816
1817    /// Computes a smoothness metric for the polynomial.
1818    ///
1819    /// This metric quantifies how "smooth" the polynomial is, with lower values indicating smoother curves.
1820    ///
1821    /// <div class="warning">
1822    ///
1823    /// **Technical Details**
1824    ///
1825    /// The smoothness is calculated as a weighted average of the coefficient energies, where higher-degree coefficients are penalized more heavily.
1826    /// The formula used is:
1827    /// ```math
1828    /// Smoothness = (Σ (k^2 * E_k)) / (Σ E_k)
1829    /// ```
1830    /// where:
1831    /// - `k` is the degree of the basis function.
1832    /// - `E_k` is the energy contribution of the k-th coefficient.
1833    /// </div>
1834    ///
1835    /// # Returns
1836    /// A smoothness value, where lower values indicate a smoother polynomial.
1837    ///
1838    /// # Errors
1839    /// Returns an error if the series is not orthogonal, like with integrated Fourier series.
1840    pub fn smoothness(&self) -> Result<T>
1841    where
1842        B: OrthogonalBasis<T>,
1843    {
1844        self.function.smoothness()
1845    }
1846
1847    /// Applies a spectral energy filter to the series.
1848    ///
1849    /// This uses the properties of a orthogonal basis to de-noise the polynomial by removing higher-degree terms that contribute little to the overall energy.
1850    /// Terms are split into "signal" and "noise" based on their energy contributions, and the polynomial is truncated to only include the signal components.
1851    ///
1852    /// Remaining terms are smoothly attenuated to prevent ringing artifacts from a hard cutoff.
1853    ///
1854    /// <div class="warning">
1855    ///
1856    /// **Technical Details**
1857    ///
1858    /// The energy of each coefficient is calculated using the formula:
1859    ///
1860    /// ```math
1861    /// E_j = c_j^2 * N_j
1862    /// ```
1863    /// where:
1864    /// - `E_j` is the energy contribution of the jth coefficient.
1865    /// - `c_j` is the jth coefficient.
1866    /// - `N_j` is the normalization factor for the jth basis function, provided by the basis.
1867    ///
1868    /// Generalized Cross-Validation (GCV) is used to determine the optimal cutoff degree `K` that minimizes the prediction error using:
1869    /// `GCV(K) = (suffix[0] - suffix[K]) / K^2`, where `suffix` is the suffix sum of energies.
1870    ///
1871    /// A Lanczos Sigma filter with p=1 is applied to smoothly attenuate coefficients up to the cutoff degree, reducing Gibbs ringing artifacts.
1872    /// </div>
1873    ///
1874    /// # Notes
1875    /// - This method modifies the polynomial in place.
1876    ///
1877    /// # Errors
1878    /// Returns an error if the basis is not orthogonal. This can be checked with [`Polynomial::is_orthogonal`].
1879    pub fn spectral_energy_filter(&mut self) -> Result<()>
1880    where
1881        B: OrthogonalBasis<T>,
1882    {
1883        self.function.spectral_energy_filter()
1884    }
1885
1886    /// Returns a human-readable string of the polynomial equation.
1887    ///
1888    /// The output shows the polynomial in standard mathematical notation, for example:
1889    /// ```text
1890    /// y = 1.0x^3 + 2.0x^2 + 3.0x + 4.0
1891    /// ```
1892    ///
1893    /// # Notes
1894    /// - Requires the basis to implement [`PolynomialDisplay`] for formatting.
1895    /// - This operation is infallible and guaranteed to succeed, hence no error return.
1896    ///
1897    /// # Example
1898    /// ```
1899    /// # use polyfit::{ChebyshevFit, CurveFit};
1900    /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1901    /// let fit = ChebyshevFit::new(data, 2).unwrap();
1902    /// println!("{}", fit.equation());
1903    /// ```
1904    #[expect(clippy::missing_panics_doc, reason = "Infallible operation")]
1905    pub fn equation(&self) -> String {
1906        let mut output = String::new();
1907        self.basis()
1908            .format_polynomial(&mut output, self.coefficients())
1909            .expect("String should be infallible");
1910        output
1911    }
1912
1913    /// Returns the properties of the curve fit.
1914    ///
1915    /// This is a comprehensive summary of the fit's characteristics.
1916    pub fn properties(&self) -> FitProperties<T> {
1917        FitProperties {
1918            degree: self.degree(),
1919            data_points: self.data().len(),
1920            coefficients: self.coefficients().to_vec(),
1921            coefficient_errors: self
1922                .covariance()
1923                .map(|cov| cov.coefficient_standard_errors())
1924                .ok(),
1925            mse: self.mean_squared_error(),
1926            r_squared: self.r_squared(None),
1927        }
1928    }
1929}
1930
1931impl<B, T: Value> AsRef<Polynomial<'_, B, T>> for CurveFit<'_, B, T>
1932where
1933    B: Basis<T>,
1934    B: PolynomialDisplay<T>,
1935{
1936    fn as_ref(&self) -> &Polynomial<'static, B, T> {
1937        &self.function
1938    }
1939}
1940
1941impl<T: Value, B> std::fmt::Display for CurveFit<'_, B, T>
1942where
1943    B: Basis<T>,
1944    B: PolynomialDisplay<T>,
1945{
1946    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1947        write!(f, "{}", self.equation())
1948    }
1949}
1950
1951/// A set of diagnostic properties for a curve fit.
1952///
1953/// Can be serialize to JSON or other formats.
1954#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
1955pub struct FitProperties<T: Value> {
1956    /// The degree of the fitted polynomial.
1957    pub degree: usize,
1958
1959    /// The number of data points used in the fit.
1960    pub data_points: usize,
1961
1962    /// The coefficients of the fitted polynomial.
1963    pub coefficients: Vec<T>,
1964
1965    /// The standard errors of the fitted coefficients.
1966    pub coefficient_errors: Option<Vec<T>>,
1967
1968    /// The mean squared error of the fit.
1969    pub mse: T,
1970
1971    /// The R² value of the fit, if available.
1972    pub r_squared: T,
1973}
1974
1975#[cfg(test)]
1976#[cfg(feature = "transforms")]
1977mod tests {
1978    use crate::{
1979        assert_close, assert_fits,
1980        score::Aic,
1981        statistics::CvStrategy,
1982        transforms::{ApplyNoise, Strength},
1983        MonomialFit,
1984    };
1985
1986    use super::*;
1987
1988    #[test]
1989    fn test_curvefit_new_and_coefficients() {
1990        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1991        let fit = MonomialFit::new(data, 2).unwrap();
1992        let coefs = fit.coefficients();
1993        assert_eq!(coefs.len(), 3);
1994    }
1995
1996    #[test]
1997    fn test_big() {
1998        function!(poly(x) = 1.0 + 2.0 x^1 + 3.0 x^2 + 4.0 x^3 + 5.0 x^4 + 6.0 x^5);
1999        let data = poly.solve_range(0.0..=10_000_000.0, 1.0);
2000        let fit = ChebyshevFit::new(&data, 5).unwrap();
2001        assert_fits!(poly, fit, 0.999);
2002    }
2003
2004    #[test]
2005    fn test_curvefit_y_in_range() {
2006        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2007        let fit = MonomialFit::new(data, 2).unwrap();
2008        let y: f64 = fit.y(1.0).unwrap();
2009        assert!(y.is_finite());
2010    }
2011
2012    #[test]
2013    fn test_curvefit_y_out_of_range() {
2014        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2015        let fit = MonomialFit::new(data, 2).unwrap();
2016        let y = fit.y(-1.0);
2017        assert!(y.is_err());
2018    }
2019
2020    #[test]
2021    fn test_curvefit_solution_matches_data_len() {
2022        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2023        let fit = MonomialFit::new(data, 2).unwrap();
2024        let solution = fit.solution();
2025        assert_eq!(solution.len(), data.len());
2026    }
2027
2028    #[test]
2029    fn test_curvefit_covariance_and_standard_errors() {
2030        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2031        let fit = MonomialFit::new(data, 2).unwrap();
2032        let cov = fit.covariance().unwrap();
2033        let errors = cov.coefficient_standard_errors();
2034        assert_eq!(errors.len(), 3);
2035        for err in errors {
2036            assert!(err >= 0.0);
2037        }
2038    }
2039
2040    #[test]
2041    fn test_curvefit_confidence_band() {
2042        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2043        let fit = MonomialFit::new(data, 2).unwrap();
2044        let cov = fit.covariance().unwrap();
2045        let band = cov.confidence_band(1.0, Confidence::P95, None).unwrap();
2046        assert!(band.lower <= band.upper);
2047        assert!(band.value >= band.lower && band.value <= band.upper);
2048    }
2049
2050    #[test]
2051    fn test_curvefit_model_score_and_r_squared() {
2052        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2053        let fit = MonomialFit::new(data, 2).unwrap();
2054        let score: f64 = fit.model_score(&Aic);
2055        let r2 = fit.r_squared(None);
2056        assert!(score.is_finite());
2057        assert_close!(r2, 1.0);
2058
2059        function!(mono(x) = 1.0 + 2.0 x^1); // strictly increasing
2060        let data = mono
2061            .solve_range(0.0..=1000.0, 1.0)
2062            .apply_normal_noise(Strength::Relative(0.3), None);
2063        let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
2064        assert!(fit.r_squared(None) < 1.0);
2065        assert!(fit.model_score(&Aic).is_finite());
2066    }
2067
2068    #[test]
2069    fn test_curvefit_as_polynomial_and_into_polynomial() {
2070        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2071        let fit = MonomialFit::new(data, 2).unwrap();
2072        let poly_ref = fit.as_polynomial();
2073        let poly_owned = fit.clone().into_polynomial();
2074        assert_eq!(poly_ref.coefficients(), poly_owned.coefficients());
2075    }
2076
2077    #[test]
2078    fn test_curvefit_properties() {
2079        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2080        let fit = MonomialFit::new(data, 2).unwrap();
2081        let props = fit.properties();
2082        assert_eq!(props.degree, 2);
2083        assert_eq!(props.data_points, 3);
2084        assert_eq!(props.coefficients.len(), 3);
2085        assert!(props.mse >= 0.0);
2086        assert!(props.r_squared <= 1.0 && props.r_squared >= 0.0);
2087    }
2088
2089    #[test]
2090    fn test_curvefit_new_auto_selects_best_degree() {
2091        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0), (3.0, 13.0)];
2092        let fit = MonomialFit::new_auto(data, DegreeBound::Relaxed, &Aic).unwrap();
2093        assert!(fit.degree() < data.len());
2094    }
2095
2096    #[test]
2097    fn test_curvefit_solve_and_solve_range() {
2098        let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2099        let fit = MonomialFit::new(data, 2).unwrap();
2100        let xs = vec![0.0, 1.0, 2.0];
2101        let points = fit.solve(xs.clone()).unwrap();
2102        assert_eq!(points.len(), xs.len());
2103        let range_points = fit.solve_range(0.0..=2.0, 1.0).unwrap();
2104        assert_eq!(range_points.len(), 3);
2105    }
2106
2107    #[test]
2108    fn test_kfold() {
2109        function!(mono(x) = 5 x^5 - 3 x^3 + 2 x^2 + 1.0);
2110        let data = mono
2111            .solve_range(0.0..=1000.0, 1.0)
2112            .apply_salt_pepper_noise(
2113                0.01,
2114                Strength::Absolute(-1000.0),
2115                Strength::Absolute(1000.0),
2116                None,
2117            )
2118            .apply_poisson_noise(Strength::Relative(2.0), None);
2119        crate::plot!([data, mono]);
2120        let fit = MonomialFit::new_kfold_cross_validated(
2121            &data,
2122            CvStrategy::MinimizeBias,
2123            DegreeBound::Relaxed,
2124            &Aic,
2125        )
2126        .unwrap();
2127        assert_fits!(mono, fit, 0.7);
2128    }
2129}
polyfit/fit.rs

polyfit/
fit.rs