polyfit/fit.rs
1use std::{borrow::Cow, ops::RangeInclusive};
2
3use nalgebra::{DMatrix, DVector, SVD};
4
5use crate::{
6 basis::{
7 Basis, CriticalPoint, DifferentialBasis, IntegralBasis, IntoMonomialBasis, OrthogonalBasis,
8 },
9 display::PolynomialDisplay,
10 error::{Error, Result},
11 score::ModelScoreProvider,
12 statistics::{self, Confidence, ConfidenceBand, DegreeBound, Tolerance},
13 value::{CoordExt, SteppedValues, Value},
14 MonomialPolynomial, Polynomial,
15};
16
17/// Logarithmic series curve
18///
19/// Uses logarithmic basis functions, which are particularly useful for modeling data that exhibits logarithmic growth or decay.
20/// The basis functions include terms like 1, ln(x), (ln(x))^2, ..., (ln(x))^n.
21pub type LogarithmicFit<'data, T = f64> = CurveFit<'data, crate::basis::LogarithmicBasis<T>, T>;
22
23/// Laguerre series curve
24///
25/// Uses Laguerre polynomials, which are orthogonal polynomials defined on the interval \[0, ∞\].
26/// These polynomials are particularly useful in quantum mechanics and numerical analysis.
27pub type LaguerreFit<'data, T = f64> = CurveFit<'data, crate::basis::LaguerreBasis<T>, T>;
28
29/// Physicists' Hermite series curve
30///
31/// Uses Physicists' Hermite polynomials, which are orthogonal polynomials defined on the interval \[-∞, ∞\].
32/// These polynomials are particularly useful in probability, combinatorics, and physics, especially in quantum mechanics.
33pub type PhysicistsHermiteFit<'data, T = f64> =
34 CurveFit<'data, crate::basis::PhysicistsHermiteBasis<T>, T>;
35
36/// Probabilists' Hermite series curve
37///
38/// Uses Probabilists' Hermite polynomials, which are orthogonal polynomials defined on the interval \[-∞, ∞\].
39/// These polynomials are particularly useful in probability theory and statistics, especially in the context of Gaussian distributions.
40pub type ProbabilistsHermiteFit<'data, T = f64> =
41 CurveFit<'data, crate::basis::ProbabilistsHermiteBasis<T>, T>;
42
43/// Legendre series curve
44///
45/// Uses Legendre polynomials, which are orthogonal polynomials defined on the interval \[-1, 1\].
46/// These polynomials are particularly useful for minimizing oscillation in polynomial interpolation.
47pub type LegendreFit<'data, T = f64> = CurveFit<'data, crate::basis::LegendreBasis<T>, T>;
48
49/// Fourier series curve
50///
51/// Uses a Fourier series basis, which is particularly well-suited for modeling periodic functions.
52/// The basis functions include sine and cosine terms, allowing for effective representation of oscillatory behavior.
53pub type FourierFit<'data, T = f64> = CurveFit<'data, crate::basis::FourierBasis<T>, T>;
54
55/// Normalized Chebyshev polynomial curve
56///
57/// Uses the Chebyshev polynomials, which are orthogonal polynomials defined on the interval \[-1, 1\].
58/// These polynomials are particularly useful for minimizing Runge's phenomenon in polynomial interpolation.
59pub type ChebyshevFit<'data, T = f64> = CurveFit<'data, crate::basis::ChebyshevBasis<T>, T>;
60
61/// Non-normalized monomial polynomial curve
62///
63/// Uses the standard monomial functions: 1, x, x^2, ..., x^n
64///
65/// It is the most basic form of polynomial basis and is not normalized.
66/// It can lead to numerical instability for high-degree polynomials.
67pub type MonomialFit<'data, T = f64> = CurveFit<'data, crate::basis::MonomialBasis<T>, T>;
68
69/// Represents the covariance matrix and derived statistics for a curve fit.
70///
71/// Provides tools to evaluate the uncertainty of coefficients and predictions
72/// of a fitted polynomial or other basis function model.
73///
74/// # Type Parameters
75/// - `'a`: Lifetime of the reference to the original curve fit.
76/// - `B`: Basis type used by the curve fit (implements `Basis<T>`).
77/// - `T`: Numeric type (defaults to `f64`) implementing `Value`.
78pub struct CurveFitCovariance<'a, 'data, B, T: Value = f64>
79where
80 B: Basis<T>,
81 B: PolynomialDisplay<T>,
82{
83 fit: &'a CurveFit<'data, B, T>,
84 covariance: DMatrix<T>,
85}
86impl<'a, 'data, B, T: Value> CurveFitCovariance<'a, 'data, B, T>
87where
88 B: Basis<T>,
89 B: PolynomialDisplay<T>,
90{
91 /// Creates a new covariance matrix for a curve fit.
92 ///
93 /// See [`CurveFit::covariance`]
94 ///
95 /// # Errors
96 /// Returns an error if the covariance matrix cannot be computed.
97 pub fn new(fit: &'a CurveFit<'data, B, T>) -> Result<Self> {
98 let n = fit.data.len();
99 let k = fit.basis().k(fit.degree());
100
101 let mut x_matrix = DMatrix::zeros(n, k);
102 for (i, (x, _)) in fit.data.iter().enumerate() {
103 let x = fit.basis().normalize_x(*x);
104 for j in 0..k {
105 x_matrix[(i, j)] = fit.basis().solve_function(j, x);
106 }
107 }
108
109 // Compute (X^T X)^-1
110 let xtx = &x_matrix.transpose() * &x_matrix;
111 let xtx_reg = &xtx + DMatrix::<T>::identity(k, k) * T::epsilon();
112 let svd = xtx_reg.svd(true, true);
113 let xtx_inv = svd.pseudo_inverse(T::epsilon()).map_err(Error::Algebra)?;
114
115 let res_var = fit.residual_variance();
116 let covariance = xtx_inv * res_var;
117 Ok(Self { fit, covariance })
118 }
119
120 /// Computes the standard error of the coefficient at j.
121 ///
122 /// Returns None if the coefficient does not exist.
123 ///
124 /// This is the estimated standard deviation of the coefficient, providing
125 /// a measure of its uncertainty.
126 #[must_use]
127 pub fn coefficient_standard_error(&self, j: usize) -> Option<T> {
128 let cell = self.covariance.get((j, j))?;
129 Some(cell.sqrt())
130 }
131
132 /// Computes the standard error of the coefficients.
133 ///
134 /// This is the estimated standard deviation of the coefficients, providing
135 /// a measure of their uncertainty.
136 #[must_use]
137 pub fn coefficient_standard_errors(&self) -> Vec<T> {
138 let cov = &self.covariance;
139 (0..cov.ncols())
140 .filter_map(|i| self.coefficient_standard_error(i))
141 .collect()
142 }
143
144 /// Computes the variance of the predicted y value at `x`.
145 ///
146 /// This quantifies the uncertainty in the prediction at a specific point.
147 ///
148 /// The square root of the variance gives the standard deviation.
149 pub fn prediction_variance(&self, x: T) -> T {
150 let k = self.fit.basis().k(self.fit.degree());
151 let x = self.fit.basis().normalize_x(x);
152 let phi_x =
153 DVector::from_iterator(k, (0..k).map(|j| self.fit.basis().solve_function(j, x)));
154 (phi_x.transpose() * &self.covariance * phi_x)[(0, 0)]
155 }
156
157 /// Computes the confidence band for an x value.
158 ///
159 /// Returns a confidence band representing the uncertainty in the predicted y value
160 ///
161 /// # Parameters
162 /// - `x`: The x value to compute the confidence band for.
163 /// - `confidence_level`: Desired confidence level (e.g., P95).
164 /// - `noise_tolerance`: Optional additional variance to add to the prediction variance,
165 /// (e.g., to account for measurement noise).
166 ///
167 /// This estimates the uncertainty in the predicted y value at a specific x
168 /// location, providing a range within which the true value is likely to fall.
169 ///
170 /// # Errors
171 /// Returns an error if the confidence level cannot be cast to the required type.
172 pub fn confidence_band(
173 &self,
174 x: T,
175 confidence_level: Confidence,
176 noise_tolerance: Option<Tolerance<T>>,
177 ) -> Result<ConfidenceBand<T>> {
178 let mut y_var = self.prediction_variance(x);
179 let value = self.fit.y(x)?;
180
181 let abs_tol = match noise_tolerance {
182 None => T::zero(),
183 Some(Tolerance::Absolute(tol)) => tol,
184 Some(Tolerance::Variance(pct)) => {
185 let (data_sdev, _) = statistics::stddev_and_mean(self.fit.data().y_iter());
186 let noise_tolerance = data_sdev * pct;
187 y_var += noise_tolerance * noise_tolerance;
188 T::zero()
189 }
190 Some(Tolerance::Measurement(pct)) => Value::abs(value) * pct,
191 };
192
193 let y_se = y_var.sqrt();
194
195 let z = confidence_level.try_cast::<T>()?;
196 let lower = value - z * y_se - abs_tol;
197 let upper = value + z * y_se + abs_tol;
198 Ok(ConfidenceBand {
199 value,
200 lower,
201 upper,
202 level: confidence_level,
203 tolerance: noise_tolerance,
204 })
205 }
206
207 /// Computes the confidence intervals for all data points in the original dataset.
208 ///
209 /// This evaluates the fitted model at each `x` from the original data and returns
210 /// a `ConfidenceBand` for each point, quantifying the uncertainty of predictions.
211 ///
212 /// # Parameters
213 /// - `confidence_level`: Desired confidence level (e.g., P95).
214 /// - `noise_tolerance`: Optional additional variance to add to the prediction variance,
215 ///
216 /// # Returns
217 /// - `Ok(Vec<(T, ConfidenceBand<T>)>)` containing one confidence band per data point.
218 /// - `Err` if any prediction or type conversion fails.
219 ///
220 /// # Errors
221 /// Returns an error if the confidence level cannot be cast to the required type.
222 pub fn solution_confidence(
223 &self,
224 confidence_level: Confidence,
225 noise_tolerance: Option<Tolerance<T>>,
226 ) -> Result<Vec<(T, ConfidenceBand<T>)>> {
227 let x = self.fit.data().iter().map(|(x, _)| *x);
228 x.map(|x| {
229 Ok((
230 x,
231 self.confidence_band(x, confidence_level, noise_tolerance)?,
232 ))
233 })
234 .collect()
235 }
236
237 /// Identifies outliers in the original dataset based on the confidence intervals.
238 ///
239 /// An outlier is defined as a data point where the actual `y` value falls outside
240 /// the computed confidence band for its corresponding `x`.
241 ///
242 /// The confidence level determines the width of the confidence band. Higher confidence levels have wider bands,
243 /// making it less likely for points to be classified as outliers.
244 ///
245 /// # Parameters
246 /// - `confidence_level`: Confidence level used to determine the bounds (e.g., P95).
247 ///
248 /// # Returns
249 /// - `Ok(Vec<((T, T, ConfidenceBand<T>))>)` containing the index and `(x, y, confidence_band)` of each outlier.
250 /// - `Err` if confidence intervals cannot be computed.
251 ///
252 /// # Errors
253 /// Returns an error if the confidence level cannot be cast to the required type.
254 pub fn outliers(
255 &self,
256 confidence_level: Confidence,
257 noise_tolerance: Option<Tolerance<T>>,
258 ) -> Result<Vec<(T, T, ConfidenceBand<T>)>> {
259 let bands = self.solution_confidence(confidence_level, noise_tolerance)?;
260 let mut outliers = Vec::new();
261
262 for ((x, y), (_, band)) in self.fit.data().iter().zip(bands) {
263 if *y < band.lower || *y > band.upper {
264 outliers.push((*x, *y, band));
265 }
266 }
267
268 Ok(outliers)
269 }
270}
271
272/// Represents a polynomial curve fit for a set of data points.
273///
274/// `CurveFit` computes a polynomial that best fits a given dataset using a
275/// specified polynomial basis (e.g., monomial, Chebyshev). It stores both the
276/// original data and the resulting coefficients.
277///
278/// # For beginners
279/// Most users do **not** need to construct this directly. Use one of the
280/// specialized type aliases for common bases:
281/// - [`crate::MonomialFit`] — uses the standard monomial basis (`1, x, x², …`).
282/// - [`crate::ChebyshevFit`] — uses Chebyshev polynomials to reduce oscillation.
283///
284/// # How it works
285/// - Builds a **basis matrix** with shape `[rows, k]` where `rows`
286/// is the number of data points and `k` is the number of basis functions.
287/// - Forms a **column vector** `b` from the `y` values of the dataset.
288/// - Solves the linear system `A * x = b` using the **SVD** of the basis matrix.
289/// The solution `x` is the vector of polynomial coefficients.
290///
291/// # Type parameters
292/// - `B`: The basis type, implementing [`Basis<T>`].
293/// - `T`: Numeric type (default `f64`) implementing [`Value`].
294///
295/// # Example
296/// ```
297/// # use polyfit::MonomialFit;
298/// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
299/// let fit = MonomialFit::new(data, 2).unwrap();
300/// println!("Coefficients: {:?}", fit.coefficients());
301/// ```
302#[derive(Debug, Clone, PartialEq)]
303pub struct CurveFit<'data, B, T: Value = f64>
304where
305 B: Basis<T>,
306 B: PolynomialDisplay<T>,
307{
308 data: Cow<'data, [(T, T)]>,
309 x_range: RangeInclusive<T>,
310 function: Polynomial<'static, B, T>,
311 k: T,
312}
313impl<'data, T: Value, B> CurveFit<'data, B, T>
314where
315 B: Basis<T>,
316 B: PolynomialDisplay<T>,
317{
318 #[cfg(feature = "parallel")]
319 const MIN_ROWS_TO_PARALLEL: usize = 1_000_000; // We won't bother parallelizing until n > 1 million
320
321 #[cfg(feature = "parallel")]
322 const MAX_STABLE_DIGITS: usize = 8; // Given digits=log10(-epsilon) for T, 10^(digits - MAX_STABLE_DIGITS) gives a threshold for condition number
323
324 fn create_bigx(data: &[(T, T)], basis: &B, k: usize) -> DMatrix<T> {
325 let mut bigx = DMatrix::zeros(data.len(), k);
326 for (row, (x, _)) in bigx.row_iter_mut().zip(data.iter()) {
327 let x = basis.normalize_x(*x);
328 basis.fill_matrix_row(0, x, row);
329 }
330 bigx
331 }
332
333 /// Turns a dataset portion into a basis matrix and y-values vector.
334 fn create_matrix(data: &[(T, T)], basis: &B, k: usize) -> (DMatrix<T>, DVector<T>) {
335 let bigx = Self::create_bigx(data, basis, k);
336 let b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
337 (bigx, b)
338 }
339
340 /// If appropriate, creates the basis matrix in parallel, using the normal equation in chunks
341 /// Otherwise, falls back to the normal `create_matrix`.
342 ///
343 /// The bool indicates if parallel processing was used - needed for `new_auto` to know to slice the matrix properly
344 fn create_parallel_matrix(
345 data: &[(T, T)],
346 basis: &B,
347 k: usize,
348 ) -> (DMatrix<T>, DVector<T>, bool) {
349 #[cfg(not(feature = "parallel"))]
350 {
351 let (m, b) = Self::create_matrix(data, basis, k);
352 (m, b, false)
353 }
354
355 #[cfg(feature = "parallel")]
356 {
357 use rayon::prelude::*;
358
359 // If the data is small, or not stable, don't parallelize
360 let tikhonov = Self::is_data_stable(data, basis, k);
361 if tikhonov.is_none() {
362 let (m, b) = Self::create_matrix(data, basis, k);
363
364 return (m, b, false);
365 }
366
367 // Each thread builds the (xtx, xtb) pair for its chunk, reducing an NxK to KxK problem
368 let threads = rayon::current_num_threads();
369 let chunk_size = (data.len() / threads).max(1);
370 let thread_data: Vec<&[(T, T)]> = data.chunks(chunk_size).collect();
371 let mut partial_results: Vec<(DMatrix<T>, DVector<T>)> = thread_data
372 .into_par_iter()
373 .map(|chunk| {
374 let (m, b) = Self::create_matrix(chunk, basis, k);
375 Self::invert_matrix(&m, &b)
376 })
377 .collect();
378
379 // Now accumulate the partial results to get the full (xtx, xtb)
380 let (mut xtx, mut xtb) = partial_results.pop().unwrap_or_else(|| {
381 (
382 DMatrix::<T>::zeros(k, k), // No data, zero matrix
383 DVector::<T>::zeros(k), // No data, zero vector
384 )
385 });
386
387 // We use kahan summation here to reduce numerical error
388 let mut xtx_c = DMatrix::<T>::zeros(k, k);
389 let mut xtb_c = DVector::<T>::zeros(k);
390 for (part_xtx, part_xtb) in partial_results {
391 for i in 0..k {
392 let y = part_xtb[i] - xtb_c[i];
393 let t = xtb[i] + y;
394 xtb_c[i] = (t - xtb[i]) - y;
395 xtb[i] = t;
396
397 for j in 0..k {
398 let y = part_xtx[(i, j)] - xtx_c[(i, j)];
399 let t = xtx[(i, j)] + y;
400 xtx_c[(i, j)] = (t - xtx[(i, j)]) - y;
401 xtx[(i, j)] = t;
402 }
403 }
404 }
405
406 // We also use Tikhonov regularization to improve stability
407 let tikhonov = tikhonov.unwrap_or(T::zero());
408 let identity = DMatrix::<T>::identity(k, k) * tikhonov;
409 xtx += identity;
410
411 (xtx, xtb, true)
412 }
413 }
414
415 /// Checks if the data is stable for fitting with normal eq mode with the given basis and degree.
416 /// It will also return a tikhonov number based on eigenvalues.
417 ///
418 /// Some(tikhonov) if stable, None if not stable
419 #[cfg(feature = "parallel")]
420 fn is_data_stable(data: &[(T, T)], basis: &B, k: usize) -> Option<T> {
421 if data.len() < Self::MIN_ROWS_TO_PARALLEL {
422 return None; // Not enough data to bother
423 }
424
425 // First we sample ~1% of the data
426 let stride = (data.len() / 100).max(1);
427 let sample: Vec<_> = data.iter().step_by(stride).map(|(x, y)| (*x, *y)).collect();
428
429 // Build the basis matrix for the sample
430 let bigx = Self::create_bigx(&sample, basis, k);
431 let xtx = &bigx.transpose() * bigx;
432
433 // Eigen
434 let eigen = xtx.complex_eigenvalues().map(nalgebra::ComplexField::real);
435 let max_eigen = eigen.max();
436 let min_eigen = eigen.min();
437
438 // Get the condition number, and our threshold for stability
439 let condition_number = (max_eigen / min_eigen).sqrt();
440 let digits_to_keep = T::try_cast(Self::MAX_STABLE_DIGITS).ok()?;
441 let ten = T::try_cast(10).ok()?;
442 let threshold = T::one() / T::epsilon() * ten.powf(-T::one() * digits_to_keep);
443
444 if condition_number >= threshold {
445 return None; // Not stable
446 }
447
448 // We use the mean diagonal trace as the base for tikhonov
449 let mean_trace = xtx.trace() / T::try_cast(k).ok()?;
450 Some(mean_trace * T::epsilon())
451 }
452
453 /// Reduce the n by k / 1 by n into a k by k and k by 1 system.
454 #[cfg(feature = "parallel")]
455 fn invert_matrix(matrix: &DMatrix<T>, b: &DVector<T>) -> (DMatrix<T>, DVector<T>) {
456 let xtx = matrix.transpose() * matrix;
457 let xtb = matrix.transpose() * b;
458 (xtx, xtb)
459 }
460
461 /// Solves the linear system using SVD.
462 fn solve_matrix(xtx: DMatrix<T>, xtb: &DVector<T>) -> Result<Vec<T>> {
463 const SVD_ITER_LIMIT: (usize, usize) = (1000, 10_000);
464
465 let size = xtx.shape();
466
467 // This looks arbitrary, but this is what nalgebra uses internally for its default epsilon
468 // It isn't documented, but I assume it's based on empirical testing.
469 // It also matches their default behavior which is a plus here
470 let svd_eps = T::RealField::default_epsilon() * nalgebra::convert(5.0);
471
472 // Now let's get a count for how many iterations to allow
473 // We can't go by time, it is by iterations
474 // A good rule of thumb for iterations in ill-conditioned problems max(rows, cols)^2 I decided
475 // Basically - if it hasn't converged by ~10k... it probably won't
476 // And this mostly can't happen for orthogonal bases, so unless you use monomial or log basis...
477 let max_dim = size.0.max(size.1);
478 let iters = max_dim
479 .saturating_mul(max_dim)
480 .clamp(SVD_ITER_LIMIT.0, SVD_ITER_LIMIT.1);
481
482 //
483 // Calculate the singular value decomposition of the matrix
484 let decomp =
485 SVD::try_new_unordered(xtx, true, true, svd_eps, iters).ok_or(Error::DidNotConverge)?;
486 // Calculate epsilon value
487 // ~= machine_epsilon * max(size) * max_singular
488 let machine_epsilon = T::epsilon();
489 let max_size = size.0.max(size.1);
490 let sigma_max = decomp.singular_values.max();
491 let epsilon = machine_epsilon * T::try_cast(max_size)? * sigma_max;
492
493 // Solve for X in `SVD * X = b`
494 let big_x = decomp.solve(xtb, epsilon).map_err(Error::Algebra)?;
495 let coefficients: Vec<_> = big_x.data.into();
496
497 // Make sure the coefficients are valid
498 if coefficients.iter().any(|c| c.is_nan()) {
499 return Err(Error::Algebra("NaN in coefficients"));
500 }
501
502 Ok(coefficients)
503 }
504
505 /// Creates a new polynomial curve fit from raw components.
506 fn from_raw(
507 data: Cow<'data, [(T, T)]>,
508 x_range: RangeInclusive<T>,
509 basis: B,
510 coefs: Vec<T>,
511 degree: usize,
512 ) -> Result<Self> {
513 let k = T::try_cast(coefs.len())?;
514 let function = unsafe { Polynomial::from_raw(basis, coefs.into(), degree) }; // Safety: The coefs were generated by the basis
515
516 Ok(Self {
517 data,
518 x_range,
519 function,
520 k,
521 })
522 }
523
524 /// Returns an owned version of this curve fit, with a full copy of the data.
525 #[must_use]
526 pub fn to_owned(&self) -> CurveFit<'static, B, T> {
527 CurveFit {
528 data: Cow::Owned(self.data.to_vec()),
529 x_range: self.x_range.clone(),
530 function: self.function.clone(),
531 k: self.k,
532 }
533 }
534
535 /// Creates a new polynomial curve fit for the given data and degree.
536 ///
537 /// You can also use [`CurveFit::new_auto`] to automatically select the best degree.
538 ///
539 /// This constructor fits a polynomial to the provided `(x, y)` points using
540 /// the chosen basis type `B`. For most users, `B` will be a Chebyshev or
541 /// Monomial basis.
542 ///
543 /// # Parameters
544 /// - `data`: Slice of `(x, y)` points to fit.
545 /// - `degree`: Desired polynomial degree.
546 ///
547 /// # Returns
548 /// Returns `Ok(Self)` if the fit succeeds.
549 ///
550 /// # Errors
551 /// Returns an [`Error`] in the following cases:
552 /// - `Error::NoData`: `data` is empty.
553 /// - `Error::DegreeTooHigh`: `degree >= data.len()`.
554 /// - `Error::Algebra`: the linear system could not be solved.
555 /// - `Error::CastFailed`: a numeric value could not be cast to the target type.
556 ///
557 /// # Behavior
558 /// - Builds the basis matrix internally and fills each row using
559 /// [`Basis::fill_matrix_row`].
560 /// - Computes `x_range` as the inclusive range of `x` values in the data.
561 /// - Solves the linear system `A * x = b` to determine polynomial coefficients.
562 /// - Uses SVD to solve the system for numerical stability.
563 /// - Will limit iterations to between 1,000 and 10,000 based on problem size.
564 /// - Prevents infinite loops by capping iterations.
565 /// - And if you have not converged by 10,000 iterations, you probably won't.
566 /// - If you hit that, use an orthogonal basis like Chebyshev or Legendre for better stability.
567 ///
568 /// # Warning
569 /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
570 /// The normal equation is used to reduce the system size only if the data is stable (see below).
571 ///
572 /// <div class="warning">
573 /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
574 /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
575 /// where `digits` is the number of significant digits for type `T`.
576 ///
577 /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
578 /// Tikhonov regularization to improve stability.
579 /// </div>
580 ///
581 /// # Example
582 /// ```
583 /// # use polyfit::ChebyshevFit;
584 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
585 /// let fit = ChebyshevFit::new(data, 2).unwrap();
586 /// println!("Coefficients: {:?}", fit.coefficients());
587 /// ```
588 pub fn new(data: impl Into<Cow<'data, [(T, T)]>>, degree: usize) -> Result<Self> {
589 let data: Cow<_> = data.into();
590
591 // Cannot fit a polynomial of degree 0 or if there is no data.
592 if data.is_empty() {
593 return Err(Error::NoData);
594 } else if degree >= data.len() {
595 return Err(Error::DegreeTooHigh(degree));
596 }
597
598 let x_range = data.x_range().ok_or(Error::NoData)?;
599 let basis = B::from_range(x_range.clone());
600 let k = basis.k(degree);
601
602 let (m, b, _) = Self::create_parallel_matrix(&data, &basis, k);
603 let coefs = Self::solve_matrix(m, &b)?;
604 Self::from_raw(data, x_range, basis, coefs, degree)
605 }
606
607 /// Creates a new weighted polynomial curve fit using Gauss nodes.
608 ///
609 /// This constructor fits a polynomial to the provided sampling function
610 /// evaluated at Gauss nodes, using weights associated with those nodes.
611 ///
612 /// # Parameters
613 /// - `sample_fn`: Function that takes an `x` value and returns the corresponding `y` value.
614 /// - `x_range`: Inclusive range of `x` values to consider for fitting.
615 /// - `degree`: Desired polynomial degree.
616 ///
617 /// # Returns
618 /// Returns `Ok(Self)` if the fit succeeds.
619 ///
620 /// # Errors
621 /// Returns an [`Error`] in the following cases:
622 /// - `Error::DegreeTooHigh`: `degree` is invalid for the chosen basis.
623 /// - `Error::Algebra`: the linear system could not be solved.
624 /// - `Error::CastFailed`: a numeric value could not be cast to the target type.
625 pub fn new_weighted<F>(sample_fn: F, x_range: RangeInclusive<T>, degree: usize) -> Result<Self>
626 where
627 B: OrthogonalBasis<T>,
628 F: Fn(T) -> T,
629 {
630 let basis = B::from_range(x_range.clone());
631 let k = basis.k(degree);
632 let mut data = Vec::with_capacity(k);
633 let nodes = basis.gauss_nodes(k);
634
635 for (x, _) in &nodes {
636 // Gauss nodes are in function-space (normalized), need to denormalize to data-space
637 let x = basis.denormalize_x(*x);
638 let y = sample_fn(x);
639 data.push((x, y));
640 }
641
642 let mut bigx = Self::create_bigx(&data, &basis, k);
643 let mut b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
644 for (i, (mut row, (_, w))) in bigx.row_iter_mut().zip(nodes).enumerate() {
645 let w_sqrt = w.sqrt();
646 for j in 0..k {
647 row[j] *= w_sqrt; // scale basis row
648 }
649 b[i] *= w_sqrt; // scale target
650 }
651
652 let coefs = Self::solve_matrix(bigx, &b)?;
653 Self::from_raw(Cow::Owned(data), x_range, basis, coefs, degree)
654 }
655
656 /// Automatically selects the best polynomial degree and creates a curve fit.
657 ///
658 /// This function fits polynomials of increasing degree to the provided dataset
659 /// and selects the “best” degree according to the specified scoring method.
660 ///
661 /// # Parameters
662 /// - `data`: Slice of `(x, y)` points to fit.
663 /// - `method`: [`crate::score`] to evaluate model quality.
664 /// - `AIC`: Akaike Information Criterion (uses `AICc` if `n/k < 4`)
665 /// - `BIC`: Bayesian Information Criterion
666 ///
667 /// # Choosing a scoring method
668 /// - Consider the size of your dataset: If you have a small dataset, prefer `AIC` as it penalizes complexity more gently.
669 /// - If your dataset is large, `BIC` may be more appropriate as it imposes a harsher penalty on complexity.
670 ///
671 /// # Returns
672 /// Returns `Ok(Self)` with the fit at the optimal degree.
673 ///
674 /// # Errors
675 /// Returns [`Error`] if:
676 /// - `data` is empty (`Error::NoData`)
677 /// - A numeric value cannot be represented in the target type (`Error::CastFailed`)
678 ///
679 /// # Behavior
680 /// - Starts with degree 0 and iteratively fits higher degrees up to `data.len() - 1`.
681 /// - Evaluates each fit using `model_score(method)`.
682 /// - Selects best model, where the score is within 2 of the minimum score
683 /// - As per Burnham and Anderson, models within Δ ≤ 2 are statistically indistinguishable from the best model.
684 /// - Uses SVD to solve the system for numerical stability.
685 /// - Will limit iterations to between 1,000 and 10,000 based on problem size.
686 /// - Prevents infinite loops by capping iterations.
687 /// - And if you have not converged by 10,000 iterations, you probably won't.
688 /// - If you hit that, use an orthogonal basis like Chebyshev or Legendre for better stability.
689 ///
690 /// # Warning
691 /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
692 /// The normal equation is used to reduce the system size only if the data is stable (see below).
693 ///
694 /// <div class="warning">
695 /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
696 /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
697 ///
698 /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
699 /// Tikhonov regularization to improve stability.
700 /// </div>
701 ///
702 /// # Example
703 /// ```
704 /// # use polyfit::{ChebyshevFit, statistics::DegreeBound, score::Aic};
705 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
706 /// let fit = ChebyshevFit::new_auto(data, DegreeBound::Relaxed, &Aic).unwrap();
707 /// println!("Selected degree: {}", fit.degree());
708 /// ```
709 pub fn new_auto(
710 data: impl Into<Cow<'data, [(T, T)]>>,
711 max_degree: impl Into<DegreeBound>,
712 method: &impl ModelScoreProvider,
713 ) -> Result<Self> {
714 let data: Cow<_> = data.into();
715 let max_degree = max_degree.into().max_degree(data.len());
716 if data.is_empty() {
717 return Err(Error::NoData);
718 }
719
720 // Step 1 - Create the basis and matrix once
721 let x_range = data.x_range().ok_or(Error::NoData)?;
722 let basis = B::from_range(x_range.clone());
723 let max_k = basis.k(max_degree);
724 let (xtx, xtb, normal_eq) = Self::create_parallel_matrix(&data, &basis, max_k);
725
726 #[cfg(not(feature = "parallel"))]
727 let (min_score, model_scores) = {
728 // Step 2 - Build models using increasingly narrow slices of the matrix
729 let mut min_score = T::infinity();
730 let mut model_scores: Vec<(Self, T)> = Vec::with_capacity(max_degree + 1);
731 for degree in 0..=max_degree {
732 let k = basis.k(degree);
733
734 let height = if normal_eq { k } else { xtx.nrows() };
735 let m = xtx.view((0, 0), (height, k)).into_owned();
736 let Ok(coefs) = Self::solve_matrix(m, &xtb) else {
737 continue;
738 };
739
740 let Ok(model) =
741 Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
742 else {
743 continue;
744 };
745
746 let score = model.model_score(method);
747 model_scores.push((model, score));
748 if score < min_score {
749 min_score = score;
750 }
751 }
752
753 (min_score, model_scores)
754 };
755
756 #[cfg(feature = "parallel")]
757 let (min_score, model_scores) = {
758 use rayon::prelude::*;
759
760 let mut model_scores: Vec<(Self, T)> = (0..=max_degree)
761 .into_par_iter()
762 .filter_map(|degree| {
763 let k = basis.k(degree);
764
765 let height = if normal_eq { k } else { xtx.nrows() };
766 let m = xtx.view((0, 0), (height, k)).into_owned();
767 let coefs = Self::solve_matrix(m, &xtb).ok()?;
768
769 let model =
770 Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
771 .ok()?;
772
773 let score = model.model_score(method);
774 Some((model, score))
775 })
776 .collect();
777
778 // Sort by degree ascending
779 model_scores.sort_by_key(|(m, _)| m.degree());
780
781 let min_score = model_scores
782 .iter()
783 .map(|(_, score)| *score)
784 .fold(T::infinity(), nalgebra::RealField::min);
785
786 (min_score, model_scores)
787 };
788
789 // Step 3 - get delta_score
790 // Re: Burnham and Anderson, use the first delta <=2 (P = 0.37)
791 // Statistically indistinguishable from the top model
792 for (model, score) in model_scores {
793 let delta = score - min_score;
794 if delta <= T::two() {
795 return Ok(model);
796 }
797 }
798
799 Err(Error::NoModel)
800 }
801
802 /// Creates a new polynomial curve fit using K-fold cross-validation to select the best degree.
803 ///
804 /// This function splits the dataset into `folds` subsets, using each subset as a validation set while training on the remaining data.
805 /// It evaluates polynomial fits of increasing degree and selects the best degree based on the specified scoring method.
806 ///
807 /// This method helps prevent overfitting by ensuring that the selected model generalizes well to unseen data, and is particularly useful for small datasets
808 /// or those with very extreme outliers.
809 ///
810 /// # Parameters
811 /// - `data`: Slice of `(x, y)` points to fit.
812 /// - `strategy`: Cross-validation strategy defining the number of folds.
813 /// - `max_degree`: Maximum polynomial degree to consider.
814 /// - `method`: [`ModelScoreProvider`] to evaluate model quality.
815 ///
816 /// # Choosing a scoring method
817 /// - Consider the size of your dataset: If you have a small dataset, prefer `AIC` as it penalizes complexity more gently.
818 /// - If your dataset is large, `BIC` may be more appropriate as it imposes a harsher penalty on complexity.
819 ///
820 /// # Choosing number of folds (strategy)
821 /// The number of folds (k) determines how many subsets the data is split into for cross-validation. The choice of k can impact the `bias-variance trade-off`:
822 /// - Bias is how much your model's predictions differ from the actual values on average
823 /// - Variance is how much your model's predictions change when you use different training data
824 ///
825 /// - Choose [`CvStrategy::MinimizeBias`] (e.g., k=5) with larger datasets to reduce bias. Prevents a model from being too simple to capture data patterns
826 /// - Choose [`CvStrategy::MinimizeVariance`] (e.g., k=10) with smaller datasets to reduce variance. Helps ensure the model generalizes well to unseen data.
827 /// - Choose [`CvStrategy::Balanced`] (e.g., k=7) for a compromise between bias and variance.
828 /// - Choose [`CvStrategy::LeaveOneOut`] (k=n) for very small datasets, but be aware of the high computational cost.
829 /// - Choose [`CvStrategy::Custom(k)`] to specify a custom number of folds, ensuring k >= 2 and k <= n.
830 ///
831 /// # Returns
832 /// Returns `Ok(Self)` with the fit at the optimal degree.
833 ///
834 /// # Errors
835 /// Returns [`Error`] if:
836 /// - `data` is empty or `folds < 2` (`Error::NoData`)
837 /// - A numeric value cannot be represented in the target type (`Error::CastFailed`)
838 /// - No valid model could be fitted (`Error::NoModel`)
839 ///
840 /// # Warning
841 /// For datasets with >1,000,000 points, the basis matrix may be constructed in parallel.
842 /// The normal equation is used to reduce the system size only if the data is stable (see below).
843 ///
844 /// <div class="warning">
845 /// For a given problem of NxK, where N is the number of data points and K is the number of basis functions,
846 /// if N > 1e6, and condition number of (X^T X) > 10^(digits - 8), then the normal equation will be used.
847 ///
848 /// We use Kahan summation to reduce numerical error when accumulating the partial results, as well as
849 /// Tikhonov regularization to improve stability.
850 /// </div>
851 ///
852 /// # Example
853 /// ```
854 /// # use polyfit::{ChebyshevFit, statistics::{CvStrategy, DegreeBound}, score::Aic};
855 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
856 /// let fit = ChebyshevFit::new_kfold_cross_validated(data, CvStrategy::LeaveOneOut, DegreeBound::Relaxed, &Aic).unwrap();
857 /// println!("Selected degree: {}", fit.degree());
858 /// ```
859 #[expect(
860 clippy::many_single_char_names,
861 reason = "It's math what do you want from me"
862 )]
863 pub fn new_kfold_cross_validated(
864 data: impl Into<Cow<'data, [(T, T)]>>,
865 strategy: statistics::CvStrategy,
866 max_degree: impl Into<DegreeBound>,
867 method: &impl ModelScoreProvider,
868 ) -> Result<Self> {
869 let data: Cow<_> = data.into();
870 let folds = strategy.k(data.len());
871 let fold_size = data.len() / folds;
872 let max_degree = max_degree.into().max_degree(data.len());
873 if data.is_empty() || folds < 2 || data.len() < folds {
874 return Err(Error::NoData);
875 }
876
877 // Step 1 - Create the basis and matrix once
878 let x_range = data.x_range().ok_or(Error::NoData)?;
879 let basis = B::from_range(x_range.clone());
880 let k = basis.k(max_degree);
881 let (m, b) = Self::create_matrix(data.as_ref(), &basis, k);
882
883 // Step 2 - Precalculate fold boundaries
884 let mut fold_ranges = Vec::with_capacity(folds);
885 for i in 0..folds {
886 let start = i * fold_size;
887 let end = if i == folds - 1 {
888 data.len()
889 } else {
890 (i + 1) * fold_size
891 };
892 fold_ranges.push(start..end);
893 }
894
895 // Step 3 - Use `folds` views into m and b for training and test sets
896 // We do this for each degree candidate, and each fold
897 let mut min_score = T::infinity();
898 let mut candidates = Vec::with_capacity(max_degree + 1);
899 for degree in 0..=max_degree {
900 let k = basis.k(degree);
901 let m = m.view((0, 0), (m.nrows(), k)).into_owned();
902
903 // Evaluate this degree with K-fold cross-validation
904 let mut mean_score = T::zero();
905 let mut n = T::zero();
906 for i in 0..folds {
907 let test_range = &fold_ranges[i];
908 let test_data = &data[test_range.clone()];
909
910 let mut fold_m = DMatrix::zeros(data.len() - test_range.len(), k);
911 let fold_b = DVector::from_iterator(
912 data.len() - test_data.len(),
913 b.iter().enumerate().filter_map(|(j, &y)| {
914 if test_range.contains(&j) {
915 None
916 } else {
917 Some(y)
918 }
919 }),
920 );
921
922 // Copy relevent rows from m into fold_m
923 for (i, src) in m.row_iter().enumerate() {
924 if !test_range.contains(&i) {
925 let dst_index = if i < test_range.start {
926 i
927 } else {
928 i - test_range.len()
929 };
930 let mut dst = fold_m.row_mut(dst_index);
931 dst.copy_from(&src);
932 }
933 }
934
935 let Ok(coefs) = Self::solve_matrix(fold_m, &fold_b) else {
936 continue;
937 };
938
939 let Ok(model) =
940 Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
941 else {
942 continue;
943 };
944
945 let y = test_data.y_iter();
946 let predicted = model.as_polynomial().solve(test_data.x_iter());
947 let y_fit = predicted.y_iter();
948 mean_score += method.score(y, y_fit, model.k);
949 n += T::one();
950 }
951
952 mean_score /= n;
953 candidates.push((degree, mean_score));
954 if mean_score < min_score {
955 min_score = mean_score;
956 }
957 }
958
959 // Step 4 - Select the best model within 2 AIC units of the minimum (Burnham and Anderson 2002)
960 for (degree, score) in candidates {
961 let delta = score - min_score;
962 if delta <= T::two() {
963 return Self::new(data, degree);
964 }
965 }
966
967 Err(Error::NoModel)
968 }
969
970 /// Prunes coefficients that are statistically insignificant based on a t-test.
971 ///
972 /// # Parameters
973 /// - `confidence`: Confidence level for determining significance (e.g., P95, P99)
974 ///
975 /// # Errors
976 /// Returns an error if the covariance matrix cannot be computed.
977 ///
978 /// # Returns
979 /// A vector of `(index, coefficient)` for all pruned coefficients.
980 ///
981 /// # Notes
982 /// - Modifies `self` in-place, zeroing out insignificant coefficients.
983 /// - Uses the standard errors derived from the covariance matrix.
984 /// - Ignores coefficients whose absolute value is smaller than `T::epsilon()`.
985 pub fn prune_insignificant(&mut self, confidence: Confidence) -> Result<Vec<(usize, T)>> {
986 let covariance = self.covariance()?;
987 let se = covariance.coefficient_standard_errors();
988 let coeffs = self.coefficients();
989
990 let df = self.data().len().saturating_sub(coeffs.len());
991 let t_crit = confidence.t_score(df);
992 let t_crit = T::try_cast(t_crit)?;
993
994 let mut insignificant = Vec::new();
995 for (i, (&c, s)) in coeffs.iter().zip(se).enumerate() {
996 let t = Value::abs(c) / s;
997 if t < t_crit && c > T::epsilon() {
998 insignificant.push((i, c));
999 }
1000 }
1001
1002 let coefs_mut = self.function.coefficients_mut();
1003 for (i, _) in &insignificant {
1004 coefs_mut[*i] = T::zero();
1005 }
1006
1007 Ok(insignificant)
1008 }
1009
1010 /// Returns a reference to the basis function.
1011 pub(crate) fn basis(&self) -> &B {
1012 self.function.basis()
1013 }
1014
1015 /// Computes the covariance matrix and related statistics for this curve fit.
1016 ///
1017 /// The returned [`CurveFitCovariance`] provides:
1018 /// - Covariance matrix of the fitted coefficients.
1019 /// - Standard errors of the coefficients.
1020 /// - Prediction variance at a specific `x`.
1021 /// - Confidence intervals for predictions.
1022 ///
1023 /// # Returns
1024 /// - `Ok(CurveFitCovariance<'_, B, T>)` on success.
1025 ///
1026 /// # Errors
1027 /// Returns `Err(Error::Algebra)` if `(Xᵀ X)` is singular or nearly singular,
1028 /// i.e., the pseudo-inverse cannot be computed. Causes include too few data points
1029 /// relative to parameters or collinear/linearly dependent basis functions.
1030 ///
1031 /// # Example
1032 /// ```rust
1033 /// # use polyfit::statistics::Confidence;
1034 /// # use polyfit::MonomialFit;
1035 /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1036 /// let cov = model.covariance().unwrap();
1037 /// let se = cov.coefficient_standard_errors();
1038 /// let band = cov.confidence_band(1.0, Confidence::P95, None).unwrap();
1039 /// println!("Predicted CI at x=1: {} - {}", band.min(), band.max());
1040 /// ```
1041 pub fn covariance(&self) -> Result<CurveFitCovariance<'_, '_, B, T>> {
1042 CurveFitCovariance::new(self)
1043 }
1044
1045 /// Finds the critical points (where the derivative is zero) of a polynomial in this basis.
1046 ///
1047 /// This corresponds to the polynomial's local minima and maxima (The `x` values where curvature changes).
1048 ///
1049 /// <div class="warning">
1050 ///
1051 /// **Technical Details**
1052 ///
1053 /// The critical points are found by solving the equation `f'(x) = 0`, where `f'(x)` is the derivative of the polynomial.
1054 ///
1055 /// This is done with by finding the eigenvalues of the companion matrix of the derivative polynomial.
1056 /// </div>
1057 ///
1058 /// # Returns
1059 /// A vector of `x` values where the critical points occur.
1060 ///
1061 /// # Requirements
1062 /// - The polynomial's basis `B` must implement [`DifferentialBasis`].
1063 ///
1064 /// # Errors
1065 /// Returns an error if the critical points cannot be found.
1066 ///
1067 /// # Example
1068 /// ```rust
1069 /// # use polyfit::MonomialPolynomial;
1070 /// # use polyfit::statistics::Confidence;
1071 /// # use polyfit::MonomialFit;
1072 /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1073 /// let critical_points = model.critical_points().unwrap();
1074 /// ```
1075 pub fn critical_points(&self) -> Result<Vec<CriticalPoint<T>>>
1076 where
1077 B: DifferentialBasis<T>,
1078 B::B2: DifferentialBasis<T>,
1079 {
1080 let points = self.function.critical_points(self.x_range.clone())?;
1081 Ok(points)
1082 }
1083
1084 /// Computes the definite integral (area under the curve) of the fitted polynomial
1085 /// between `x_min` and `x_max`.
1086 ///
1087 /// <div class="warning">
1088 ///
1089 /// **Technical Details**
1090 ///
1091 /// The area under the curve is computed using the definite integral of the polynomial
1092 /// between the specified bounds:
1093 /// ```math
1094 /// Area = ∫[x_min to x_max] f(x) dx = F(x_max) - F(x_min)
1095 /// ```
1096 /// </div>
1097 ///
1098 /// # Parameters
1099 /// - `x_min`: Lower bound of integration.
1100 /// - `x_max`: Upper bound of integration.
1101 /// - `constant`: Constant of integration (value at x = 0) for the indefinite integral.
1102 ///
1103 /// # Requirements
1104 /// - The polynomial's basis `B` must implement [`IntegralBasis`].
1105 ///
1106 /// # Returns
1107 /// - `Ok(T)`: The computed area under the curve between `x_min` and `x_max`.
1108 /// - `Err`: If computing the integral fails (e.g., basis cannot compute integral coefficients).
1109 ///
1110 /// # Errors
1111 /// If the basis cannot compute the integral coefficients, an error is returned.
1112 ///
1113 /// # Example
1114 /// ```rust
1115 /// # use polyfit::statistics::Confidence;
1116 /// # use polyfit::MonomialFit;
1117 /// # let model = MonomialFit::new(&[(0.0, 0.0), (1.0, 1.0)], 1).unwrap();
1118 /// let area = model.area_under_curve(0.0, 3.0, None).unwrap();
1119 /// println!("Area under curve: {}", area);
1120 /// ```
1121 pub fn area_under_curve(&self, x_min: T, x_max: T, constant: Option<T>) -> Result<T>
1122 where
1123 B: IntegralBasis<T>,
1124 {
1125 self.function.area_under_curve(x_min, x_max, constant)
1126 }
1127
1128 /// Returns the X-values where the function is not monotone (i.e., where the derivative changes sign).
1129 ///
1130 /// # Errors
1131 /// Returns an error if the derivative cannot be computed.
1132 ///
1133 /// # Example
1134 /// ```rust
1135 /// # use polyfit::MonomialFit;
1136 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1137 /// let fit = MonomialFit::new(data, 2).unwrap();
1138 /// let violations = fit.monotonicity_violations().unwrap();
1139 /// ```
1140 pub fn monotonicity_violations(&self) -> Result<Vec<T>>
1141 where
1142 B: DifferentialBasis<T>,
1143 {
1144 self.function.monotonicity_violations(self.x_range.clone())
1145 }
1146
1147 /// Computes the quality score of the polynomial fit using the specified method.
1148 ///
1149 /// This evaluates how well the fitted polynomial represents the data, taking
1150 /// into account both the fit error and model complexity.
1151 ///
1152 /// # Parameters
1153 /// - `method`: [`ModelScoreProvider`] to use for scoring.
1154 /// - `AIC`: Akaike Information Criterion (uses `AICc` if `n/k < 4`)
1155 /// - `BIC`: Bayesian Information Criterion
1156 ///
1157 /// # Returns
1158 /// The score as a numeric value (`T`). Lower scores indicate better models.
1159 ///
1160 /// # Example
1161 /// ```
1162 /// # use polyfit::{ChebyshevFit, score::Aic};
1163 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1164 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1165 /// let score = fit.model_score(&Aic);
1166 /// println!("Model score: {}", score);
1167 /// ```
1168 pub fn model_score(&self, method: &impl ModelScoreProvider) -> T {
1169 let y = self.data.y_iter();
1170 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1171 method.score(y, y_fit, self.k)
1172 }
1173
1174 /// Computes the residuals of the fit.
1175 ///
1176 /// Residuals are the differences between the observed `y` values and the predicted `y` values from the fitted polynomial.
1177 /// They provide insight into the fit quality and can be used for diagnostic purposes.
1178 ///
1179 /// <div class="warning">
1180 ///
1181 /// **Technical Details**
1182 ///
1183 /// ```math
1184 /// residual_i = y_i - f(x_i)
1185 /// where
1186 /// y_i = observed value, f(x_i) = predicted value from the polynomial at x_i
1187 /// ```
1188 /// </div>
1189 ///
1190 /// # Returns
1191 /// A vector of residuals, where each element corresponds to a data point.
1192 ///
1193 pub fn residuals(&self) -> Vec<(T, T)> {
1194 let y = self.data.y_iter();
1195 y.zip(self.solution())
1196 .map(|(y, (x, y_fit))| (x, y - y_fit))
1197 .collect()
1198 }
1199
1200 /// Computes the residuals of the fit, filtering out small residuals likely due to floating point noise.
1201 ///
1202 /// This form can help minimize the impact of floating point precision and rounding.
1203 ///
1204 /// <div class="warning">
1205 ///
1206 /// **Technical Details**
1207 ///
1208 /// ```math
1209 /// max_val = max(|y_i|, |f(x_i)|, 1)
1210 /// epsilon = machine_epsilon * sqrt(n) * max_val
1211 /// residual_i = y_i - f(x_i) if |y_i - f(x_i)| >= epsilon else 0
1212 /// where
1213 /// y_i = observed value, f(x_i) = predicted value from the polynomial at x_i, n = number of data points
1214 /// ```
1215 /// </div>
1216 ///
1217 /// # Returns
1218 /// A vector of scaled residuals, where each element corresponds to a data point.
1219 pub fn filtered_residuals(&self) -> Vec<(T, T)> {
1220 // Get max(|y|, |y_fit|, 1)
1221 let max_val = self
1222 .data
1223 .iter()
1224 .chain(self.solution().iter())
1225 .map(|(_, y)| y.abs())
1226 .fold(T::zero(), nalgebra::RealField::max);
1227 let max_val = nalgebra::RealField::max(max_val, T::one());
1228
1229 // Residual epsilon
1230 let root_n = T::from_positive_int(self.data.len()).sqrt();
1231 let epsilon = T::epsilon() * root_n * max_val;
1232
1233 let y = self.data.y_iter();
1234 y.zip(self.solution())
1235 .filter_map(|(y, (x, y_fit))| {
1236 let r = y - y_fit;
1237 let r = if nalgebra::ComplexField::abs(r) < epsilon {
1238 None?
1239 } else {
1240 r
1241 };
1242 Some((x, r))
1243 })
1244 .collect()
1245 }
1246
1247 /// Computes the residual variance of the model's predictions.
1248 ///
1249 /// See [`statistics::residual_variance`].
1250 ///
1251 /// Residual variance is the unbiased estimate of the variance of the
1252 /// errors (σ²) after fitting a model. It's used for confidence intervals
1253 /// and covariance estimates of the fitted parameters.
1254 pub fn residual_variance(&self) -> T {
1255 let y = self.data.y_iter();
1256 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1257 statistics::residual_variance(y, y_fit, self.k)
1258 }
1259
1260 /// Computes the mean squared error (MSE) of this fit against its source data.
1261 ///
1262 /// See [`statistics::mean_squared_error`].
1263 ///
1264 /// MSE measures the average squared difference between the observed and predicted values.
1265 /// Lower values indicate a better fit.
1266 pub fn mean_squared_error(&self) -> T {
1267 let y = self.data.y_iter();
1268 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1269 statistics::mean_squared_error(y, y_fit)
1270 }
1271
1272 /// Computes the root mean squared error (RMSE) of this fit against its source data.
1273 ///
1274 /// See [`statistics::root_mean_squared_error`].
1275 ///
1276 /// RMSE is the square root of the MSE, giving error in the same units as the original data.
1277 /// Lower values indicate a closer fit.
1278 pub fn root_mean_squared_error(&self) -> T {
1279 let y = self.data.y_iter();
1280 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1281 statistics::root_mean_squared_error(y, y_fit)
1282 }
1283
1284 /// Computes the mean absolute error (MAE) of this fit against its source data.
1285 ///
1286 /// See [`statistics::mean_absolute_error`].
1287 ///
1288 /// MAE measures the average absolute difference between observed and predicted values.
1289 /// Lower values indicate a better fit.
1290 pub fn mean_absolute_error(&self) -> T {
1291 let y = self.data.y_iter();
1292 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1293 statistics::mean_absolute_error(y, y_fit)
1294 }
1295
1296 /// Calculates the R-squared value for the model compared to provided data.
1297 ///
1298 /// If not provided, uses the original data used to create the fit.
1299 ///
1300 /// That way you can test the fit against a different dataset if desired.
1301 ///
1302 /// R-squared is a statistical measure of how well the polynomial explains
1303 /// the variance in the data. Values closer to 1 indicate a better fit.
1304 ///
1305 /// # Parameters
1306 /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1307 ///
1308 /// See [`statistics::r_squared`] for more details.
1309 ///
1310 /// # Returns
1311 /// The R-squared value as type `T`.
1312 ///
1313 /// # Example
1314 /// ```
1315 /// # use polyfit::{ChebyshevFit, CurveFit};
1316 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1317 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1318 /// let r2 = fit.r_squared(None);
1319 /// println!("R² = {}", r2);
1320 /// ```
1321 pub fn r_squared(&self, data: Option<&[(T, T)]>) -> T {
1322 let data = data.unwrap_or(self.data());
1323
1324 let y = data.iter().map(|&(_, y)| y);
1325 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1326
1327 statistics::r_squared(y, y_fit)
1328 }
1329
1330 /// Uses huber loss to compute a robust R-squared value.
1331 /// - More robust to outliers than traditional R².
1332 /// - Values closer to 1 indicate a better fit.
1333 ///
1334 /// If Data not provided, uses the original data used to create the fit.
1335 ///
1336 /// That way you can test the fit against a different dataset if desired.
1337 ///
1338 /// <div class="warning">
1339 ///
1340 /// **Technical Details**
1341 ///
1342 /// ```math
1343 /// R²_robust = 1 - (Σ huber_loss(y_i - y_fit_i, delta)) / (Σ (y_i - y_fit_i)²)
1344 /// where
1345 /// huber_loss(r, delta) = { 0.5 * r² if |r| ≤ delta
1346 /// { delta * (|r| - 0.5 * delta) if |r| > delta
1347 /// delta = 1.345 * MAD
1348 /// MAD = median( |y_i - y_fit_i| )
1349 /// where
1350 /// y_i = observed values, y_fit_i = predicted values
1351 /// ```
1352 /// </div>
1353 ///
1354 /// # Type Parameters
1355 /// - `T`: A numeric type implementing the `Value` trait.
1356 ///
1357 /// # Parameters
1358 /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1359 ///
1360 /// # Returns
1361 /// The robust R² as a `T`.
1362 ///
1363 /// # Example
1364 /// ```rust
1365 /// # use polyfit::{ChebyshevFit, CurveFit};
1366 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1367 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1368 /// let r2 = fit.robust_r_squared(None);
1369 /// println!("R² = {}", r2);
1370 /// ```
1371 pub fn robust_r_squared(&self, data: Option<&[(T, T)]>) -> T {
1372 let data = data.unwrap_or(self.data());
1373
1374 let y = data.iter().map(|&(_, y)| y);
1375 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1376
1377 statistics::robust_r_squared(y, y_fit)
1378 }
1379
1380 /// Computes the adjusted R-squared value.
1381 ///
1382 /// Adjusted R² accounts for the number of predictors in a model, penalizing
1383 /// overly complex models. Use it to compare models of different degrees.
1384 ///
1385 /// If Data not provided, uses the original data used to create the fit.
1386 ///
1387 /// That way you can test the fit against a different dataset if desired.
1388 ///
1389 /// <div class="warning">
1390 ///
1391 /// **Technical Details**
1392 ///
1393 /// ```math
1394 /// R²_adj = R² - (1 - R²) * (k / (n - k))
1395 /// where
1396 /// n = number of observations, k = number of model parameters
1397 /// ```
1398 /// [`r_squared`] is used to compute R²
1399 /// </div>
1400 ///
1401 /// # Type Parameters
1402 /// - `T`: A numeric type implementing the `Value` trait.
1403 ///
1404 /// # Parameters
1405 /// - `data`: A slice of `(x, y)` pairs to compare against
1406 ///
1407 /// # Returns
1408 /// The adjusted R² as a `T`.
1409 ///
1410 /// # Example
1411 /// ```rust
1412 /// # use polyfit::{ChebyshevFit, CurveFit};
1413 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1414 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1415 /// let r2 = fit.adjusted_r_squared(None);
1416 /// println!("R² = {}", r2);
1417 /// ```
1418 pub fn adjusted_r_squared(&self, data: Option<&[(T, T)]>) -> T {
1419 let data = data.unwrap_or(self.data());
1420
1421 let y = data.iter().map(|&(_, y)| y);
1422 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1423
1424 statistics::adjusted_r_squared(y, y_fit, self.k)
1425 }
1426
1427 /// Calculates the R-squared value for the model compared to provided function.
1428 ///
1429 /// R-squared is a statistical measure of how well the polynomial explains
1430 /// the variance in the data. Values closer to 1 indicate a better fit.
1431 ///
1432 /// # Parameters
1433 /// - `data`: A slice of `(x, y)` pairs to compare against the polynomial fit.
1434 ///
1435 /// See [`statistics::r_squared`] for more details.
1436 ///
1437 /// # Returns
1438 /// The R-squared value as type `T`.
1439 ///
1440 /// # Example
1441 /// ```
1442 /// # use polyfit::{ChebyshevFit, MonomialPolynomial};
1443 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1444 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1445 /// let target = MonomialPolynomial::borrowed(&[1.0, 2.0, 1.0]);
1446 /// let r2 = fit.r_squared_against(&target);
1447 /// println!("R² vs target polynomial = {}", r2);
1448 /// ```
1449 pub fn r_squared_against<C>(&self, function: &Polynomial<C, T>) -> T
1450 where
1451 C: Basis<T>,
1452 C: PolynomialDisplay<T>,
1453 {
1454 let data: Vec<_> = self
1455 .data()
1456 .iter()
1457 .map(|&(x, _)| (x, function.y(x)))
1458 .collect();
1459 self.r_squared(Some(&data))
1460 }
1461
1462 /// Computes the folded root mean squared error (RMSE) with uncertainty estimation.
1463 ///
1464 /// This is a measure of how far the predicted values are from the actual values, on average.
1465 ///
1466 /// Doing it over K-fold cross-validation (Splitting the data into K subsets, and leaving one out for testing each time)
1467 /// tells us how the model performance changes as the data varies.
1468 ///
1469 /// When to use each strategy:
1470 /// - `MinimizeBias`: When the dataset is small and you want to avoid underfitting. Prevents a model from being too simple to capture data patterns.
1471 /// - `MinimizeVariance`: When the dataset is large and you want to avoid overfitting. Helps ensure the model generalizes well to unseen data.
1472 /// - `Balanced`: When you want a good trade-off between bias and variance, suitable for moderately sized datasets or when unsure.
1473 /// - `Custom`: Specify your own number of folds (k) based on domain knowledge or specific requirements. Use with caution
1474 ///
1475 /// The returned [`statistics::UncertainValue<T>`] includes both the mean RMSE and its uncertainty (standard deviation across folds).
1476 ///
1477 /// You can use [`statistics::UncertainValue::confidence_band`] to get confidence intervals for the RMSE.
1478 ///
1479 /// # Parameters
1480 /// - `strategy`: The cross-validation strategy to use (e.g., K-Fold with K=5).
1481 ///
1482 /// # Returns
1483 /// An [`statistics::UncertainValue<T>`] representing the folded RMSE with uncertainty.
1484 pub fn folded_rmse(&self, strategy: statistics::CvStrategy) -> statistics::UncertainValue<T> {
1485 let y = self.data.y_iter();
1486 let y_fit = self.solution().into_iter().map(|(_, y)| y);
1487 statistics::folded_rmse(y, y_fit, strategy)
1488 }
1489
1490 /// Returns the degree of the polynomial.
1491 ///
1492 /// The number of actual components, or basis functions, in the expression of a degree is defined by the basis.
1493 ///
1494 /// That number is called k. For most basis choices, `k = degree + 1`.
1495 pub fn degree(&self) -> usize {
1496 self.function.degree()
1497 }
1498
1499 /// Returns a reference to the polynomial’s coefficients.
1500 ///
1501 /// The index of each coefficient the jth basis function.
1502 ///
1503 /// For example in a monomial expression `y(x) = 2x^2 - 3x + 1`;
1504 /// coefficients = [1.0, -3.0, 2.0]
1505 ///
1506 /// <div class="warning">
1507 ///
1508 /// **Technical Details**
1509 ///
1510 /// Formally, for each coefficient *j*, and the jth basis function *`B_j(x)`*, the relationship is:
1511 /// ```math
1512 /// y(x) = Σ (c_j * B_j(x))
1513 /// ```
1514 /// </div>
1515 pub fn coefficients(&self) -> &[T] {
1516 self.function.coefficients()
1517 }
1518
1519 /// Returns a reference to the data points used for fitting.
1520 ///
1521 /// Each element is a `(x, y)` tuple representing a data point.
1522 pub fn data(&self) -> &[(T, T)] {
1523 &self.data
1524 }
1525
1526 /// Returns the inclusive range of x-values in the dataset.
1527 pub fn x_range(&self) -> RangeInclusive<T> {
1528 self.x_range.clone()
1529 }
1530
1531 /// Returns the inclusive range of y-values in the dataset.
1532 ///
1533 /// This is computed dynamically from the stored data points. Use sparingly
1534 pub fn y_range(&self) -> RangeInclusive<T> {
1535 let min_y = self
1536 .data
1537 .iter()
1538 .map(|&(_, y)| y)
1539 .fold(T::infinity(), <T as nalgebra::RealField>::min);
1540 let max_y = self
1541 .data
1542 .iter()
1543 .map(|&(_, y)| y)
1544 .fold(T::neg_infinity(), <T as nalgebra::RealField>::max);
1545 min_y..=max_y
1546 }
1547
1548 /// Evaluates the polynomial at a given x-value.
1549 ///
1550 /// <div class="warning">
1551 ///
1552 /// **Technical Details**
1553 ///
1554 /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1555 /// ```math
1556 /// y(x) = Σ (c_j * B_j(x))
1557 /// ```
1558 /// </div>
1559 ///
1560 /// # Parameters
1561 /// - `x`: The point at which to evaluate the polynomial.
1562 ///
1563 /// # Returns
1564 /// The corresponding y-value as `T` if `x` is within the valid range.
1565 ///
1566 /// # Errors
1567 /// Returns [`Error::DataRange`] if `x` is outside the original data bounds.
1568 ///
1569 /// # Notes
1570 /// - Polynomial fits are generally only stable within the x-range used for fitting.
1571 /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`] to get
1572 /// a pure polynomial function that ignores the original x-range.
1573 ///
1574 /// # Example
1575 /// ```
1576 /// # use polyfit::{ChebyshevFit, CurveFit};
1577 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1578 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1579 /// let y = fit.y(1.0).unwrap();
1580 /// println!("y(1.0) = {}", y);
1581 /// ```
1582 pub fn y(&self, x: T) -> Result<T> {
1583 if !self.x_range.contains(&x) {
1584 return Err(Error::DataRange(
1585 format!("{}", self.x_range.start()),
1586 format!("{}", self.x_range.end()),
1587 ));
1588 }
1589
1590 Ok(self.function.y(x))
1591 }
1592
1593 /// Returns the fitted y-values corresponding to the original x-values.
1594 ///
1595 /// This produces a vector of `(x, y)` pairs for the same x-values used in
1596 /// the source data. It is guaranteed to succeed because all x-values are
1597 /// within the curve's valid range.
1598 ///
1599 /// # Notes
1600 /// - Useful for quickly plotting or analyzing the fitted curve against the
1601 /// original data points.
1602 /// - The method internally calls [`CurveFit::y`] but is infallible because
1603 /// it only evaluates x-values within the valid range.
1604 ///
1605 /// # Example
1606 /// ```
1607 /// # use polyfit::{ChebyshevFit, CurveFit};
1608 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1609 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1610 /// let points = fit.solution();
1611 /// for (x, y) in points {
1612 /// println!("x = {}, y = {}", x, y);
1613 /// }
1614 /// ```
1615 pub fn solution(&self) -> Vec<(T, T)> {
1616 self.data()
1617 .iter()
1618 .map(|&(x, _)| (x, self.function.y(x)))
1619 .collect()
1620 }
1621
1622 /// Evaluates the curve at multiple x-values.
1623 ///
1624 /// <div class="warning">
1625 ///
1626 /// **Technical Details**
1627 ///
1628 /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1629 /// ```math
1630 /// y(x) = Σ (c_j * B_j(x))
1631 /// ```
1632 /// </div>
1633 ///
1634 /// # Parameters
1635 /// - `x`: An iterator of x-values to evaluate.
1636 ///
1637 /// # Returns
1638 /// A vector of `(x, y)` pairs corresponding to each input x-value.
1639 ///
1640 /// # Errors
1641 /// Returns [`Error::DataRange`] if any x-value is outside the original data range.
1642 ///
1643 /// # Notes
1644 /// - Curve fits are generally only stable within the x-range used for fitting.
1645 /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`]
1646 /// to get a pure polynomial function that ignores the original x-range.
1647 ///
1648 /// # Example
1649 /// ```
1650 /// # use polyfit::{ChebyshevFit, CurveFit};
1651 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1652 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1653 /// let points = fit.solve([0.0, 0.5, 1.0]).unwrap();
1654 /// for (x, y) in points {
1655 /// println!("x = {}, y = {}", x, y);
1656 /// }
1657 /// ```
1658 pub fn solve(&self, x: impl IntoIterator<Item = T>) -> Result<Vec<(T, T)>> {
1659 x.into_iter().map(|x| Ok((x, self.y(x)?))).collect()
1660 }
1661
1662 /// Evaluates the curve at evenly spaced points over a range.
1663 ///
1664 /// <div class="warning">
1665 ///
1666 /// **Technical Details**
1667 ///
1668 /// Given [`Basis::k`] coefficients and basis functions, and for each pair of coefficients *`c_j`* and basis function *`B_j(x)`*, this function returns:
1669 /// ```math
1670 /// y(x) = Σ (c_j * B_j(x))
1671 /// ```
1672 /// </div>
1673 ///
1674 /// # Parameters
1675 /// - `range`: The start and end x-values to evaluate.
1676 /// - `step`: The increment between points.
1677 ///
1678 /// # Returns
1679 /// A vector of `(x, y)` pairs corresponding to each x-value in the range.
1680 ///
1681 /// # Errors
1682 /// Returns [`Error::DataRange`] if any x-value is outside the original data range.
1683 ///
1684 /// # Notes
1685 /// - Curve fits are only stable within the x-range used for fitting.
1686 /// - To evaluate outside the original bounds, use [`CurveFit::as_polynomial`]
1687 /// to get a pure polynomial function.
1688 ///
1689 /// # Example
1690 /// ```
1691 /// # use polyfit::{ChebyshevFit, CurveFit};
1692 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1693 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1694 /// let points = fit.solve_range(0.0..=2.0, 0.5).unwrap();
1695 /// for (x, y) in points {
1696 /// println!("x = {}, y = {}", x, y);
1697 /// }
1698 /// ```
1699 pub fn solve_range(&self, range: RangeInclusive<T>, step: T) -> Result<Vec<(T, T)>> {
1700 self.solve(SteppedValues::new(range, step))
1701 }
1702
1703 /// Returns a pure polynomial representation of the curve fit.
1704 ///
1705 /// This allows evaluation of the polynomial at **any x-value**, without
1706 /// restriction to the original data range. Unlike [`CurveFit::y`] or
1707 /// [`CurveFit::solve`], this does not perform range checks, so use with
1708 /// caution outside the fit’s stable region.
1709 ///
1710 /// The [`Polynomial`] form is considered a canonical function, not a fit estimate.
1711 ///
1712 /// # Returns
1713 /// A reference to the [`Polynomial`] that this fit uses internally.
1714 ///
1715 /// # Example
1716 /// ```
1717 /// # use polyfit::{ChebyshevFit, CurveFit};
1718 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1719 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1720 /// let poly = fit.as_polynomial();
1721 /// let y = poly.y(10.0); // can evaluate outside original x-range
1722 /// ```
1723 pub fn as_polynomial(&self) -> &Polynomial<'_, B, T> {
1724 &self.function
1725 }
1726
1727 /// Returns a pure polynomial representation of the curve fit.
1728 ///
1729 /// This is primarily a memory optimization in practice; You become responsible for
1730 /// maintaining the stability around the x-bounds, but the copy of the original data,
1731 /// and the Vandermonde matrix are dropped.
1732 ///
1733 /// This allows evaluation of the polynomial at **any x-value**, without
1734 /// restriction to the original data range. Unlike [`CurveFit::y`] or
1735 /// [`CurveFit::solve`], this does not perform range checks, so use with
1736 /// caution outside the fit’s stable region.
1737 ///
1738 /// The [`Polynomial`] form is considered a canonical function, not a fit estimate.
1739 ///
1740 /// # Returns
1741 /// The [`Polynomial`] that this fit uses internally
1742 ///
1743 /// # Example
1744 /// ```
1745 /// # use polyfit::{ChebyshevFit, CurveFit};
1746 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1747 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1748 /// let poly = fit.as_polynomial();
1749 /// let y = poly.y(10.0); // can evaluate outside original x-range
1750 /// ```
1751 pub fn into_polynomial(self) -> Polynomial<'static, B, T> {
1752 self.function
1753 }
1754
1755 /// Converts the curve fit into a monomial polynomial.
1756 ///
1757 /// This produces a [`MonomialPolynomial`] representation of the curve,
1758 /// which uses the standard monomial basis `1, x, x^2, …`.
1759 ///
1760 /// # Returns
1761 /// A monomial polynomial with owned coefficients.
1762 ///
1763 /// # Errors
1764 /// Returns an error if the current basis cannot be converted to monomial form.
1765 /// This requires that the basis implements [`IntoMonomialBasis`].
1766 ///
1767 /// # Example
1768 /// ```
1769 /// # use polyfit::{ChebyshevFit, CurveFit, MonomialPolynomial};
1770 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1771 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1772 /// let mono_poly = fit.as_monomial().unwrap();
1773 /// let y = mono_poly.y(1.5);
1774 /// ```
1775 pub fn as_monomial(&self) -> Result<MonomialPolynomial<'static, T>>
1776 where
1777 B: IntoMonomialBasis<T>,
1778 {
1779 let mut coefficients = self.coefficients().to_vec();
1780 self.basis().as_monomial(&mut coefficients)?;
1781 Ok(MonomialPolynomial::owned(coefficients))
1782 }
1783
1784 /// Computes the energy contribution of each coefficient in an orthogonal basis.
1785 ///
1786 /// This is a measure of how much each basis function contributes to the resulting polynomial.
1787 ///
1788 /// It can be useful for understanding the significance of each term
1789 ///
1790 /// <div class="warning">
1791 ///
1792 /// **Technical Details**
1793 ///
1794 /// For an orthogonal basis, the energy contribution of each coefficient is calculated as:
1795 /// ```math
1796 /// E_j = c_j^2 * N_j
1797 /// ```
1798 /// where:
1799 /// - `E_j` is the energy contribution of the jth coefficient.
1800 /// - `c_j` is the jth coefficient.
1801 /// - `N_j` is the normalization factor for the jth basis function, provided by the basis.
1802 ///
1803 /// </div>
1804 ///
1805 /// # Returns
1806 /// A vector of energy contributions for each coefficient.
1807 ///
1808 /// # Errors
1809 /// Returns an error if the series is not orthogonal, like with integrated Fourier series.
1810 pub fn coefficient_energies(&self) -> Result<Vec<T>>
1811 where
1812 B: OrthogonalBasis<T>,
1813 {
1814 self.function.coefficient_energies()
1815 }
1816
1817 /// Computes a smoothness metric for the polynomial.
1818 ///
1819 /// This metric quantifies how "smooth" the polynomial is, with lower values indicating smoother curves.
1820 ///
1821 /// <div class="warning">
1822 ///
1823 /// **Technical Details**
1824 ///
1825 /// The smoothness is calculated as a weighted average of the coefficient energies, where higher-degree coefficients are penalized more heavily.
1826 /// The formula used is:
1827 /// ```math
1828 /// Smoothness = (Σ (k^2 * E_k)) / (Σ E_k)
1829 /// ```
1830 /// where:
1831 /// - `k` is the degree of the basis function.
1832 /// - `E_k` is the energy contribution of the k-th coefficient.
1833 /// </div>
1834 ///
1835 /// # Returns
1836 /// A smoothness value, where lower values indicate a smoother polynomial.
1837 ///
1838 /// # Errors
1839 /// Returns an error if the series is not orthogonal, like with integrated Fourier series.
1840 pub fn smoothness(&self) -> Result<T>
1841 where
1842 B: OrthogonalBasis<T>,
1843 {
1844 self.function.smoothness()
1845 }
1846
1847 /// Applies a spectral energy filter to the series.
1848 ///
1849 /// This uses the properties of a orthogonal basis to de-noise the polynomial by removing higher-degree terms that contribute little to the overall energy.
1850 /// Terms are split into "signal" and "noise" based on their energy contributions, and the polynomial is truncated to only include the signal components.
1851 ///
1852 /// Remaining terms are smoothly attenuated to prevent ringing artifacts from a hard cutoff.
1853 ///
1854 /// <div class="warning">
1855 ///
1856 /// **Technical Details**
1857 ///
1858 /// The energy of each coefficient is calculated using the formula:
1859 ///
1860 /// ```math
1861 /// E_j = c_j^2 * N_j
1862 /// ```
1863 /// where:
1864 /// - `E_j` is the energy contribution of the jth coefficient.
1865 /// - `c_j` is the jth coefficient.
1866 /// - `N_j` is the normalization factor for the jth basis function, provided by the basis.
1867 ///
1868 /// Generalized Cross-Validation (GCV) is used to determine the optimal cutoff degree `K` that minimizes the prediction error using:
1869 /// `GCV(K) = (suffix[0] - suffix[K]) / K^2`, where `suffix` is the suffix sum of energies.
1870 ///
1871 /// A Lanczos Sigma filter with p=1 is applied to smoothly attenuate coefficients up to the cutoff degree, reducing Gibbs ringing artifacts.
1872 /// </div>
1873 ///
1874 /// # Notes
1875 /// - This method modifies the polynomial in place.
1876 ///
1877 /// # Errors
1878 /// Returns an error if the basis is not orthogonal. This can be checked with [`Polynomial::is_orthogonal`].
1879 pub fn spectral_energy_filter(&mut self) -> Result<()>
1880 where
1881 B: OrthogonalBasis<T>,
1882 {
1883 self.function.spectral_energy_filter()
1884 }
1885
1886 /// Returns a human-readable string of the polynomial equation.
1887 ///
1888 /// The output shows the polynomial in standard mathematical notation, for example:
1889 /// ```text
1890 /// y = 1.0x^3 + 2.0x^2 + 3.0x + 4.0
1891 /// ```
1892 ///
1893 /// # Notes
1894 /// - Requires the basis to implement [`PolynomialDisplay`] for formatting.
1895 /// - This operation is infallible and guaranteed to succeed, hence no error return.
1896 ///
1897 /// # Example
1898 /// ```
1899 /// # use polyfit::{ChebyshevFit, CurveFit};
1900 /// let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1901 /// let fit = ChebyshevFit::new(data, 2).unwrap();
1902 /// println!("{}", fit.equation());
1903 /// ```
1904 #[expect(clippy::missing_panics_doc, reason = "Infallible operation")]
1905 pub fn equation(&self) -> String {
1906 let mut output = String::new();
1907 self.basis()
1908 .format_polynomial(&mut output, self.coefficients())
1909 .expect("String should be infallible");
1910 output
1911 }
1912
1913 /// Returns the properties of the curve fit.
1914 ///
1915 /// This is a comprehensive summary of the fit's characteristics.
1916 pub fn properties(&self) -> FitProperties<T> {
1917 FitProperties {
1918 degree: self.degree(),
1919 data_points: self.data().len(),
1920 coefficients: self.coefficients().to_vec(),
1921 coefficient_errors: self
1922 .covariance()
1923 .map(|cov| cov.coefficient_standard_errors())
1924 .ok(),
1925 mse: self.mean_squared_error(),
1926 r_squared: self.r_squared(None),
1927 }
1928 }
1929}
1930
1931impl<B, T: Value> AsRef<Polynomial<'_, B, T>> for CurveFit<'_, B, T>
1932where
1933 B: Basis<T>,
1934 B: PolynomialDisplay<T>,
1935{
1936 fn as_ref(&self) -> &Polynomial<'static, B, T> {
1937 &self.function
1938 }
1939}
1940
1941impl<T: Value, B> std::fmt::Display for CurveFit<'_, B, T>
1942where
1943 B: Basis<T>,
1944 B: PolynomialDisplay<T>,
1945{
1946 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1947 write!(f, "{}", self.equation())
1948 }
1949}
1950
1951/// A set of diagnostic properties for a curve fit.
1952///
1953/// Can be serialize to JSON or other formats.
1954#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
1955pub struct FitProperties<T: Value> {
1956 /// The degree of the fitted polynomial.
1957 pub degree: usize,
1958
1959 /// The number of data points used in the fit.
1960 pub data_points: usize,
1961
1962 /// The coefficients of the fitted polynomial.
1963 pub coefficients: Vec<T>,
1964
1965 /// The standard errors of the fitted coefficients.
1966 pub coefficient_errors: Option<Vec<T>>,
1967
1968 /// The mean squared error of the fit.
1969 pub mse: T,
1970
1971 /// The R² value of the fit, if available.
1972 pub r_squared: T,
1973}
1974
1975#[cfg(test)]
1976#[cfg(feature = "transforms")]
1977mod tests {
1978 use crate::{
1979 assert_close, assert_fits,
1980 score::Aic,
1981 statistics::CvStrategy,
1982 transforms::{ApplyNoise, Strength},
1983 MonomialFit,
1984 };
1985
1986 use super::*;
1987
1988 #[test]
1989 fn test_curvefit_new_and_coefficients() {
1990 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
1991 let fit = MonomialFit::new(data, 2).unwrap();
1992 let coefs = fit.coefficients();
1993 assert_eq!(coefs.len(), 3);
1994 }
1995
1996 #[test]
1997 fn test_big() {
1998 function!(poly(x) = 1.0 + 2.0 x^1 + 3.0 x^2 + 4.0 x^3 + 5.0 x^4 + 6.0 x^5);
1999 let data = poly.solve_range(0.0..=10_000_000.0, 1.0);
2000 let fit = ChebyshevFit::new(&data, 5).unwrap();
2001 assert_fits!(poly, fit, 0.999);
2002 }
2003
2004 #[test]
2005 fn test_curvefit_y_in_range() {
2006 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2007 let fit = MonomialFit::new(data, 2).unwrap();
2008 let y: f64 = fit.y(1.0).unwrap();
2009 assert!(y.is_finite());
2010 }
2011
2012 #[test]
2013 fn test_curvefit_y_out_of_range() {
2014 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2015 let fit = MonomialFit::new(data, 2).unwrap();
2016 let y = fit.y(-1.0);
2017 assert!(y.is_err());
2018 }
2019
2020 #[test]
2021 fn test_curvefit_solution_matches_data_len() {
2022 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2023 let fit = MonomialFit::new(data, 2).unwrap();
2024 let solution = fit.solution();
2025 assert_eq!(solution.len(), data.len());
2026 }
2027
2028 #[test]
2029 fn test_curvefit_covariance_and_standard_errors() {
2030 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2031 let fit = MonomialFit::new(data, 2).unwrap();
2032 let cov = fit.covariance().unwrap();
2033 let errors = cov.coefficient_standard_errors();
2034 assert_eq!(errors.len(), 3);
2035 for err in errors {
2036 assert!(err >= 0.0);
2037 }
2038 }
2039
2040 #[test]
2041 fn test_curvefit_confidence_band() {
2042 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2043 let fit = MonomialFit::new(data, 2).unwrap();
2044 let cov = fit.covariance().unwrap();
2045 let band = cov.confidence_band(1.0, Confidence::P95, None).unwrap();
2046 assert!(band.lower <= band.upper);
2047 assert!(band.value >= band.lower && band.value <= band.upper);
2048 }
2049
2050 #[test]
2051 fn test_curvefit_model_score_and_r_squared() {
2052 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2053 let fit = MonomialFit::new(data, 2).unwrap();
2054 let score: f64 = fit.model_score(&Aic);
2055 let r2 = fit.r_squared(None);
2056 assert!(score.is_finite());
2057 assert_close!(r2, 1.0);
2058
2059 function!(mono(x) = 1.0 + 2.0 x^1); // strictly increasing
2060 let data = mono
2061 .solve_range(0.0..=1000.0, 1.0)
2062 .apply_normal_noise(Strength::Relative(0.3), None);
2063 let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
2064 assert!(fit.r_squared(None) < 1.0);
2065 assert!(fit.model_score(&Aic).is_finite());
2066 }
2067
2068 #[test]
2069 fn test_curvefit_as_polynomial_and_into_polynomial() {
2070 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2071 let fit = MonomialFit::new(data, 2).unwrap();
2072 let poly_ref = fit.as_polynomial();
2073 let poly_owned = fit.clone().into_polynomial();
2074 assert_eq!(poly_ref.coefficients(), poly_owned.coefficients());
2075 }
2076
2077 #[test]
2078 fn test_curvefit_properties() {
2079 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2080 let fit = MonomialFit::new(data, 2).unwrap();
2081 let props = fit.properties();
2082 assert_eq!(props.degree, 2);
2083 assert_eq!(props.data_points, 3);
2084 assert_eq!(props.coefficients.len(), 3);
2085 assert!(props.mse >= 0.0);
2086 assert!(props.r_squared <= 1.0 && props.r_squared >= 0.0);
2087 }
2088
2089 #[test]
2090 fn test_curvefit_new_auto_selects_best_degree() {
2091 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0), (3.0, 13.0)];
2092 let fit = MonomialFit::new_auto(data, DegreeBound::Relaxed, &Aic).unwrap();
2093 assert!(fit.degree() < data.len());
2094 }
2095
2096 #[test]
2097 fn test_curvefit_solve_and_solve_range() {
2098 let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
2099 let fit = MonomialFit::new(data, 2).unwrap();
2100 let xs = vec![0.0, 1.0, 2.0];
2101 let points = fit.solve(xs.clone()).unwrap();
2102 assert_eq!(points.len(), xs.len());
2103 let range_points = fit.solve_range(0.0..=2.0, 1.0).unwrap();
2104 assert_eq!(range_points.len(), 3);
2105 }
2106
2107 #[test]
2108 fn test_kfold() {
2109 function!(mono(x) = 5 x^5 - 3 x^3 + 2 x^2 + 1.0);
2110 let data = mono
2111 .solve_range(0.0..=1000.0, 1.0)
2112 .apply_salt_pepper_noise(
2113 0.01,
2114 Strength::Absolute(-1000.0),
2115 Strength::Absolute(1000.0),
2116 None,
2117 )
2118 .apply_poisson_noise(Strength::Relative(2.0), None);
2119 crate::plot!([data, mono]);
2120 let fit = MonomialFit::new_kfold_cross_validated(
2121 &data,
2122 CvStrategy::MinimizeBias,
2123 DegreeBound::Relaxed,
2124 &Aic,
2125 )
2126 .unwrap();
2127 assert_fits!(mono, fit, 0.7);
2128 }
2129}