use std::{borrow::Cow, ops::RangeInclusive};
use nalgebra::{DMatrix, DVector, SVD};
use crate::{
basis::{
Basis, CriticalPoint, DifferentialBasis, IntegralBasis, IntoMonomialBasis, OrthogonalBasis,
RootFindingBasis,
},
display::PolynomialDisplay,
error::{Error, Result},
score::ModelScoreProvider,
statistics::{self, Confidence, ConfidenceBand, CvStrategy, DegreeBound, Tolerance},
value::{CoordExt, SteppedValues, Value},
MonomialPolynomial, Polynomial,
};
pub type LogarithmicFit<'data, T = f64> = CurveFit<'data, crate::basis::LogarithmicBasis<T>, T>;
pub type LaguerreFit<'data, T = f64> = CurveFit<'data, crate::basis::LaguerreBasis<T>, T>;
pub type PhysicistsHermiteFit<'data, T = f64> =
CurveFit<'data, crate::basis::PhysicistsHermiteBasis<T>, T>;
pub type ProbabilistsHermiteFit<'data, T = f64> =
CurveFit<'data, crate::basis::ProbabilistsHermiteBasis<T>, T>;
pub type LegendreFit<'data, T = f64> = CurveFit<'data, crate::basis::LegendreBasis<T>, T>;
pub type FourierFit<'data, T = f64> = CurveFit<'data, crate::basis::FourierBasis<T>, T>;
pub type LinearAugmentedFourierFit<'data, T = f64> =
CurveFit<'data, crate::basis::LinearAugmentedFourierBasis<T>, T>;
pub type ChebyshevFit<'data, T = f64> = CurveFit<'data, crate::basis::ChebyshevBasis<T>, T>;
pub type MonomialFit<'data, T = f64> = CurveFit<'data, crate::basis::MonomialBasis<T>, T>;
pub struct CurveFitCovariance<'a, 'data, B, T: Value = f64>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
fit: &'a CurveFit<'data, B, T>,
covariance: DMatrix<T>,
}
impl<'a, 'data, B, T: Value> CurveFitCovariance<'a, 'data, B, T>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
pub fn new(fit: &'a CurveFit<'data, B, T>) -> Result<Self> {
let n = fit.data.len();
let k = fit.basis().k(fit.degree());
let mut x_matrix = DMatrix::zeros(n, k);
for (i, (x, _)) in fit.data.iter().enumerate() {
let x = fit.basis().normalize_x(*x);
for j in 0..k {
x_matrix[(i, j)] = fit.basis().solve_function(j, x);
}
}
let xtx = &x_matrix.transpose() * &x_matrix;
let xtx_reg = &xtx + DMatrix::<T>::identity(k, k) * T::epsilon();
let svd = xtx_reg.svd(true, true);
let xtx_inv = svd.pseudo_inverse(T::epsilon()).map_err(Error::Algebra)?;
let res_var = fit.residual_variance();
let covariance = xtx_inv * res_var;
Ok(Self { fit, covariance })
}
#[must_use]
pub fn coefficient_standard_error(&self, j: usize) -> Option<T> {
let cell = self.covariance.get((j, j))?;
Some(cell.sqrt())
}
#[must_use]
pub fn coefficient_standard_errors(&self) -> Vec<T> {
let cov = &self.covariance;
(0..cov.ncols())
.filter_map(|i| self.coefficient_standard_error(i))
.collect()
}
pub fn prediction_variance(&self, x: T) -> T {
let k = self.fit.basis().k(self.fit.degree());
let x = self.fit.basis().normalize_x(x);
let phi_x =
DVector::from_iterator(k, (0..k).map(|j| self.fit.basis().solve_function(j, x)));
(phi_x.transpose() * &self.covariance * phi_x)[(0, 0)]
}
pub fn confidence_band(
&self,
x: T,
confidence_level: Confidence,
noise_tolerance: Option<Tolerance<T>>,
) -> Result<ConfidenceBand<T>> {
let mut y_var = self.prediction_variance(x);
let value = self.fit.y(x)?;
let abs_tol = match noise_tolerance {
None => T::zero(),
Some(Tolerance::Absolute(tol)) => tol,
Some(Tolerance::Variance(pct)) => {
let (data_sdev, _) = statistics::stddev_and_mean(self.fit.data().y_iter());
let noise_tolerance = data_sdev * pct;
y_var += noise_tolerance * noise_tolerance;
T::zero()
}
Some(Tolerance::Measurement(pct)) => Value::abs(value) * pct,
};
let y_se = y_var.sqrt();
let z = confidence_level.try_cast::<T>()?;
let lower = value - z * y_se - abs_tol;
let upper = value + z * y_se + abs_tol;
Ok(ConfidenceBand {
value,
lower,
upper,
level: confidence_level,
tolerance: noise_tolerance,
})
}
pub fn solution_confidence(
&self,
confidence_level: Confidence,
noise_tolerance: Option<Tolerance<T>>,
) -> Result<Vec<(T, ConfidenceBand<T>)>> {
let x = self.fit.data().iter().map(|(x, _)| *x);
x.map(|x| {
Ok((
x,
self.confidence_band(x, confidence_level, noise_tolerance)?,
))
})
.collect()
}
pub fn outliers(
&self,
confidence_level: Confidence,
noise_tolerance: Option<Tolerance<T>>,
) -> Result<Vec<(T, T, ConfidenceBand<T>)>> {
let bands = self.solution_confidence(confidence_level, noise_tolerance)?;
let mut outliers = Vec::new();
for ((x, y), (_, band)) in self.fit.data().iter().zip(bands) {
if *y < band.lower || *y > band.upper {
outliers.push((*x, *y, band));
}
}
Ok(outliers)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct CurveFit<'data, B, T: Value = f64>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
data: Cow<'data, [(T, T)]>,
x_range: RangeInclusive<T>,
function: Polynomial<'static, B, T>,
k: T,
}
impl<'data, T: Value, B> CurveFit<'data, B, T>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
#[cfg(feature = "parallel")]
const MIN_ROWS_TO_PARALLEL: usize = 1_000_000;
#[cfg(feature = "parallel")]
const MAX_STABLE_DIGITS: usize = 8;
fn create_bigx(data: &[(T, T)], basis: &B, k: usize) -> DMatrix<T> {
let mut bigx = DMatrix::zeros(data.len(), k);
for (row, (x, _)) in bigx.row_iter_mut().zip(data.iter()) {
let x = basis.normalize_x(*x);
basis.fill_matrix_row(0, x, row);
}
bigx
}
fn create_matrix(data: &[(T, T)], basis: &B, k: usize) -> (DMatrix<T>, DVector<T>) {
let bigx = Self::create_bigx(data, basis, k);
let b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
(bigx, b)
}
fn create_parallel_matrix(
data: &[(T, T)],
basis: &B,
k: usize,
) -> (DMatrix<T>, DVector<T>, bool) {
#[cfg(not(feature = "parallel"))]
{
let (m, b) = Self::create_matrix(data, basis, k);
(m, b, false)
}
#[cfg(feature = "parallel")]
{
use rayon::prelude::*;
let tikhonov = Self::is_data_stable(data, basis, k);
if tikhonov.is_none() {
let (m, b) = Self::create_matrix(data, basis, k);
return (m, b, false);
}
let threads = rayon::current_num_threads();
let chunk_size = (data.len() / threads).max(1);
let thread_data: Vec<&[(T, T)]> = data.chunks(chunk_size).collect();
let mut partial_results: Vec<(DMatrix<T>, DVector<T>)> = thread_data
.into_par_iter()
.map(|chunk| {
let (m, b) = Self::create_matrix(chunk, basis, k);
Self::invert_matrix(&m, &b)
})
.collect();
let (mut xtx, mut xtb) = partial_results.pop().unwrap_or_else(|| {
(
DMatrix::<T>::zeros(k, k), DVector::<T>::zeros(k), )
});
let mut xtx_c = DMatrix::<T>::zeros(k, k);
let mut xtb_c = DVector::<T>::zeros(k);
for (part_xtx, part_xtb) in partial_results {
for i in 0..k {
let y = part_xtb[i] - xtb_c[i];
let t = xtb[i] + y;
xtb_c[i] = (t - xtb[i]) - y;
xtb[i] = t;
for j in 0..k {
let y = part_xtx[(i, j)] - xtx_c[(i, j)];
let t = xtx[(i, j)] + y;
xtx_c[(i, j)] = (t - xtx[(i, j)]) - y;
xtx[(i, j)] = t;
}
}
}
let tikhonov = tikhonov.unwrap_or(T::zero());
let identity = DMatrix::<T>::identity(k, k) * tikhonov;
xtx += identity;
(xtx, xtb, true)
}
}
#[cfg(feature = "parallel")]
fn is_data_stable(data: &[(T, T)], basis: &B, k: usize) -> Option<T> {
if data.len() < Self::MIN_ROWS_TO_PARALLEL {
return None; }
let stride = (data.len() / 100).max(1);
let sample: Vec<_> = data.iter().step_by(stride).map(|(x, y)| (*x, *y)).collect();
let bigx = Self::create_bigx(&sample, basis, k);
let xtx = &bigx.transpose() * bigx;
let eigen = xtx.complex_eigenvalues().map(nalgebra::ComplexField::real);
let max_eigen = eigen.max();
let min_eigen = eigen.min();
let condition_number = (max_eigen / min_eigen).sqrt();
let digits_to_keep = T::try_cast(Self::MAX_STABLE_DIGITS).ok()?;
let ten = T::try_cast(10).ok()?;
let threshold = T::one() / T::epsilon() * ten.powf(-T::one() * digits_to_keep);
if condition_number >= threshold {
return None; }
let mean_trace = xtx.trace() / T::try_cast(k).ok()?;
Some(mean_trace * T::epsilon())
}
#[cfg(feature = "parallel")]
fn invert_matrix(matrix: &DMatrix<T>, b: &DVector<T>) -> (DMatrix<T>, DVector<T>) {
let xtx = matrix.transpose() * matrix;
let xtb = matrix.transpose() * b;
(xtx, xtb)
}
fn solve_matrix(xtx: DMatrix<T>, xtb: &DVector<T>) -> Result<Vec<T>> {
const SVD_ITER_LIMIT: (usize, usize) = (1000, 10_000);
let size = xtx.shape();
let svd_eps = T::RealField::default_epsilon() * nalgebra::convert(5.0);
let max_dim = size.0.max(size.1);
let iters = max_dim
.saturating_mul(max_dim)
.clamp(SVD_ITER_LIMIT.0, SVD_ITER_LIMIT.1);
let decomp =
SVD::try_new_unordered(xtx, true, true, svd_eps, iters).ok_or(Error::DidNotConverge)?;
let machine_epsilon = T::epsilon();
let max_size = size.0.max(size.1);
let sigma_max = decomp.singular_values.max();
let epsilon = machine_epsilon * T::try_cast(max_size)? * sigma_max;
let big_x = decomp.solve(xtb, epsilon).map_err(Error::Algebra)?;
let coefficients: Vec<_> = big_x.data.into();
if coefficients.iter().any(|c| c.is_nan()) {
return Err(Error::Algebra("NaN in coefficients"));
}
Ok(coefficients)
}
fn from_raw(
data: Cow<'data, [(T, T)]>,
x_range: RangeInclusive<T>,
basis: B,
coefs: Vec<T>,
degree: usize,
) -> Result<Self> {
let k = T::try_cast(coefs.len())?;
let function = unsafe { Polynomial::from_raw(basis, coefs.into(), degree) };
Ok(Self {
data,
x_range,
function,
k,
})
}
#[must_use]
pub fn to_owned(&self) -> CurveFit<'static, B, T> {
CurveFit {
data: Cow::Owned(self.data.to_vec()),
x_range: self.x_range.clone(),
function: self.function.clone(),
k: self.k,
}
}
pub fn new(data: impl Into<Cow<'data, [(T, T)]>>, degree: usize) -> Result<Self> {
let data: Cow<_> = data.into();
if data.is_empty() {
return Err(Error::NoData);
} else if degree >= data.len() {
return Err(Error::DegreeTooHigh(degree));
}
let x_range = data.x_range().ok_or(Error::NoData)?;
let basis = B::from_range(x_range.clone());
let k = basis.k(degree);
let (m, b, _) = Self::create_parallel_matrix(&data, &basis, k);
let coefs = Self::solve_matrix(m, &b)?;
Self::from_raw(data, x_range, basis, coefs, degree)
}
pub fn new_weighted<F>(sample_fn: F, x_range: RangeInclusive<T>, degree: usize) -> Result<Self>
where
B: OrthogonalBasis<T>,
F: Fn(T) -> T,
{
let basis = B::from_range(x_range.clone());
let k = basis.k(degree);
let mut data = Vec::with_capacity(k);
let nodes = basis.gauss_nodes(k);
for (x, _) in &nodes {
let x = basis.denormalize_x(*x);
let y = sample_fn(x);
data.push((x, y));
}
let mut bigx = Self::create_bigx(&data, &basis, k);
let mut b = DVector::from_iterator(data.len(), data.iter().map(|&(_, y)| y));
for (i, (mut row, (_, w))) in bigx.row_iter_mut().zip(nodes).enumerate() {
let w_sqrt = w.sqrt();
for j in 0..k {
row[j] *= w_sqrt; }
b[i] *= w_sqrt; }
let coefs = Self::solve_matrix(bigx, &b)?;
Self::from_raw(Cow::Owned(data), x_range, basis, coefs, degree)
}
pub fn new_auto(
data: impl Into<Cow<'data, [(T, T)]>>,
max_degree: impl Into<DegreeBound>,
method: &impl ModelScoreProvider<B, T>,
) -> Result<Self> {
let data: Cow<_> = data.into();
let max_degree = max_degree.into().max_degree(data.len());
if data.is_empty() {
return Err(Error::NoData);
}
let x_range = data.x_range().ok_or(Error::NoData)?;
let basis = B::from_range(x_range.clone());
let max_k = basis.k(max_degree);
let (xtx, xtb, normal_eq) = Self::create_parallel_matrix(&data, &basis, max_k);
#[cfg(not(feature = "parallel"))]
let (min_score, model_scores) = {
let mut min_score = T::infinity();
let mut model_scores: Vec<(Self, T)> = Vec::with_capacity(max_degree + 1);
for degree in 0..=max_degree {
let k = basis.k(degree);
let height = if normal_eq { k } else { xtx.nrows() };
let m = xtx.view((0, 0), (height, k)).into_owned();
let Ok(coefs) = Self::solve_matrix(m, &xtb) else {
continue;
};
let Ok(model) =
Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
else {
continue;
};
let score = model.model_score(method);
model_scores.push((model, score));
if score < min_score {
min_score = score;
}
}
(min_score, model_scores)
};
#[cfg(feature = "parallel")]
let (min_score, model_scores) = {
use rayon::prelude::*;
let mut model_scores: Vec<(Self, T)> = (0..=max_degree)
.into_par_iter()
.filter_map(|degree| {
let k = basis.k(degree);
let height = if normal_eq { k } else { xtx.nrows() };
let m = xtx.view((0, 0), (height, k)).into_owned();
let coefs = Self::solve_matrix(m, &xtb).ok()?;
let model =
Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
.ok()?;
let score = model.model_score(method);
Some((model, score))
})
.collect();
model_scores.sort_by_key(|(m, _)| m.degree());
let min_score = model_scores
.iter()
.map(|(_, score)| *score)
.fold(T::infinity(), nalgebra::RealField::min);
(min_score, model_scores)
};
let min_dist = method
.minimum_significant_distance()
.map_or(T::zero(), T::from_positive_int);
for (model, score) in model_scores {
let delta = score - min_score;
if delta <= min_dist {
return Ok(model);
}
}
Err(Error::NoModel)
}
#[expect(
clippy::many_single_char_names,
reason = "It's math what do you want from me"
)]
pub fn new_kfold_cross_validated(
data: impl Into<Cow<'data, [(T, T)]>>,
strategy: impl Into<CvStrategy>,
max_degree: impl Into<DegreeBound>,
method: &impl ModelScoreProvider<B, T>,
) -> Result<Self> {
let data: Cow<_> = data.into();
let folds = strategy.into().k(data.len());
let fold_size = data.len() / folds;
let max_degree = max_degree.into().max_degree(data.len());
if data.is_empty() || folds < 2 || data.len() < folds {
return Err(Error::NoData);
}
let x_range = data.x_range().ok_or(Error::NoData)?;
let basis = B::from_range(x_range.clone());
let k = basis.k(max_degree);
let (m, b) = Self::create_matrix(data.as_ref(), &basis, k);
let mut fold_ranges = Vec::with_capacity(folds);
for i in 0..folds {
let start = i * fold_size;
let end = if i == folds - 1 {
data.len()
} else {
(i + 1) * fold_size
};
fold_ranges.push(start..end);
}
let mut min_score = T::infinity();
let mut candidates = Vec::with_capacity(max_degree + 1);
for degree in 0..=max_degree {
let k = basis.k(degree);
let m = m.view((0, 0), (m.nrows(), k)).into_owned();
let mut mean_score = T::zero();
let mut n = T::zero();
for i in 0..folds {
let test_range = &fold_ranges[i];
let test_data = &data[test_range.clone()];
let mut fold_m = DMatrix::zeros(data.len() - test_range.len(), k);
let fold_b = DVector::from_iterator(
data.len() - test_data.len(),
b.iter().enumerate().filter_map(|(j, &y)| {
if test_range.contains(&j) {
None
} else {
Some(y)
}
}),
);
for (i, src) in m.row_iter().enumerate() {
if !test_range.contains(&i) {
let dst_index = if i < test_range.start {
i
} else {
i - test_range.len()
};
let mut dst = fold_m.row_mut(dst_index);
dst.copy_from(&src);
}
}
let Ok(coefs) = Self::solve_matrix(fold_m, &fold_b) else {
continue;
};
let Ok(model) =
Self::from_raw(data.clone(), x_range.clone(), basis.clone(), coefs, degree)
else {
continue;
};
let y = test_data.y_iter();
let predicted = model.as_polynomial().solve(test_data.x_iter());
let y_fit = predicted.y_iter();
let score = method.score(&model, y, y_fit, model.k);
mean_score += score;
n += T::one();
}
mean_score /= n;
candidates.push((degree, mean_score));
if mean_score < min_score {
min_score = mean_score;
}
}
let min_dist = method
.minimum_significant_distance()
.map_or(T::zero(), T::from_positive_int);
for (degree, score) in candidates {
let delta = score - min_score;
if delta <= min_dist {
return Self::new(data, degree);
}
}
Err(Error::NoModel)
}
pub fn prune_insignificant(&mut self, confidence: Confidence) -> Result<Vec<(usize, T)>> {
let covariance = self.covariance()?;
let se = covariance.coefficient_standard_errors();
let coeffs = self.coefficients();
let df = self.data().len().saturating_sub(coeffs.len());
let t_crit = confidence.t_score(df);
let t_crit = T::try_cast(t_crit)?;
let mut insignificant = Vec::new();
for (i, (&c, s)) in coeffs.iter().zip(se).enumerate() {
let t = Value::abs(c) / s;
if t < t_crit && c > T::epsilon() {
insignificant.push((i, c));
}
}
let coefs_mut = self.function.coefficients_mut();
for (i, _) in &insignificant {
coefs_mut[*i] = T::zero();
}
Ok(insignificant)
}
pub(crate) fn basis(&self) -> &B {
self.function.basis()
}
pub fn covariance(&self) -> Result<CurveFitCovariance<'_, '_, B, T>> {
CurveFitCovariance::new(self)
}
pub fn critical_points(&self) -> Result<Vec<CriticalPoint<T>>>
where
B: DifferentialBasis<T>,
B::B2: DifferentialBasis<T>,
B::B2: RootFindingBasis<T>,
{
let points = self.function.critical_points(self.x_range.clone())?;
Ok(points)
}
pub fn area_under_curve(&self, x_min: T, x_max: T, constant: Option<T>) -> Result<T>
where
B: IntegralBasis<T>,
{
self.function.area_under_curve(x_min, x_max, constant)
}
pub fn monotonicity_violations(&self) -> Result<Vec<T>>
where
B: DifferentialBasis<T>,
B::B2: RootFindingBasis<T>,
{
self.function.monotonicity_violations(self.x_range.clone())
}
pub fn model_score(&self, method: &impl ModelScoreProvider<B, T>) -> T {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
method.score(self, y, y_fit, self.k)
}
pub fn residuals(&self) -> Vec<(T, T)> {
let y = self.data.y_iter();
y.zip(self.solution())
.map(|(y, (x, y_fit))| (x, y - y_fit))
.collect()
}
pub fn filtered_residuals(&self) -> Vec<(T, T)> {
let max_val = self
.data
.iter()
.chain(self.solution().iter())
.map(|(_, y)| y.abs())
.fold(T::zero(), nalgebra::RealField::max);
let max_val = nalgebra::RealField::max(max_val, T::one());
let root_n = T::from_positive_int(self.data.len()).sqrt();
let epsilon = T::epsilon() * root_n * max_val;
let y = self.data.y_iter();
y.zip(self.solution())
.filter_map(|(y, (x, y_fit))| {
let r = y - y_fit;
let r = if nalgebra::ComplexField::abs(r) < epsilon {
None?
} else {
r
};
Some((x, r))
})
.collect()
}
pub fn residual_variance(&self) -> T {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::residual_variance(y, y_fit, self.k)
}
pub fn mean_squared_error(&self) -> T {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::mean_squared_error(y, y_fit)
}
pub fn root_mean_squared_error(&self) -> T {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::root_mean_squared_error(y, y_fit)
}
pub fn mean_absolute_error(&self) -> T {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::mean_absolute_error(y, y_fit)
}
pub fn r_squared(&self, data: Option<&[(T, T)]>) -> T {
let data = data.unwrap_or(self.data());
let y = data.iter().map(|&(_, y)| y);
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::r_squared(y, y_fit)
}
pub fn robust_r_squared(&self, data: Option<&[(T, T)]>) -> T {
let data = data.unwrap_or(self.data());
let y = data.iter().map(|&(_, y)| y);
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::robust_r_squared(y, y_fit)
}
pub fn adjusted_r_squared(&self, data: Option<&[(T, T)]>) -> T {
let data = data.unwrap_or(self.data());
let y = data.iter().map(|&(_, y)| y);
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::adjusted_r_squared(y, y_fit, self.k)
}
pub fn r_squared_against<C>(&self, function: &Polynomial<C, T>) -> T
where
C: Basis<T>,
C: PolynomialDisplay<T>,
{
let data: Vec<_> = self
.data()
.iter()
.map(|&(x, _)| (x, function.y(x)))
.collect();
self.r_squared(Some(&data))
}
pub fn folded_rmse(&self, strategy: statistics::CvStrategy) -> statistics::UncertainValue<T> {
let y = self.data.y_iter();
let y_fit = self.solution().into_iter().map(|(_, y)| y);
statistics::folded_rmse(y, y_fit, strategy)
}
pub fn degree(&self) -> usize {
self.function.degree()
}
pub fn coefficients(&self) -> &[T] {
self.function.coefficients()
}
pub fn data(&self) -> &[(T, T)] {
&self.data
}
pub fn x_range(&self) -> RangeInclusive<T> {
self.x_range.clone()
}
pub fn y_range(&self) -> RangeInclusive<T> {
let min_y = self
.data
.iter()
.map(|&(_, y)| y)
.fold(T::infinity(), <T as nalgebra::RealField>::min);
let max_y = self
.data
.iter()
.map(|&(_, y)| y)
.fold(T::neg_infinity(), <T as nalgebra::RealField>::max);
min_y..=max_y
}
pub fn y(&self, x: T) -> Result<T> {
if !self.x_range.contains(&x) {
return Err(Error::DataRange(
format!("{}", self.x_range.start()),
format!("{}", self.x_range.end()),
));
}
Ok(self.function.y(x))
}
pub fn solution(&self) -> Vec<(T, T)> {
self.data()
.iter()
.map(|&(x, _)| (x, self.function.y(x)))
.collect()
}
pub fn solve(&self, x: impl IntoIterator<Item = T>) -> Result<Vec<(T, T)>> {
x.into_iter().map(|x| Ok((x, self.y(x)?))).collect()
}
pub fn solve_range(&self, range: RangeInclusive<T>, step: T) -> Result<Vec<(T, T)>> {
self.solve(SteppedValues::new(range, step))
}
pub fn as_polynomial(&self) -> &Polynomial<'_, B, T> {
&self.function
}
pub fn into_polynomial(self) -> Polynomial<'static, B, T> {
self.function
}
pub fn as_monomial(&self) -> Result<MonomialPolynomial<'static, T>>
where
B: IntoMonomialBasis<T>,
{
let mut coefficients = self.coefficients().to_vec();
self.basis().as_monomial(&mut coefficients)?;
Ok(MonomialPolynomial::owned(coefficients))
}
pub fn coefficient_energies(&self) -> Vec<T>
where
B: OrthogonalBasis<T>,
{
self.function.coefficient_energies()
}
pub fn smoothness(&self) -> T
where
B: OrthogonalBasis<T>,
{
self.function.smoothness()
}
pub fn spectral_energy_filter(&mut self)
where
B: OrthogonalBasis<T>,
{
self.function.spectral_energy_filter();
}
#[expect(clippy::missing_panics_doc, reason = "Infallible operation")]
pub fn equation(&self) -> String {
let mut output = String::new();
self.basis()
.format_polynomial(&mut output, self.coefficients())
.expect("String should be infallible");
output
}
pub fn properties(&self) -> FitProperties<T> {
FitProperties {
degree: self.degree(),
data_points: self.data().len(),
coefficients: self.coefficients().to_vec(),
coefficient_errors: self
.covariance()
.map(|cov| cov.coefficient_standard_errors())
.ok(),
mse: self.mean_squared_error(),
r_squared: self.r_squared(None),
}
}
}
impl<B, T: Value> AsRef<Polynomial<'_, B, T>> for CurveFit<'_, B, T>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
fn as_ref(&self) -> &Polynomial<'static, B, T> {
&self.function
}
}
impl<T: Value, B> std::fmt::Display for CurveFit<'_, B, T>
where
B: Basis<T>,
B: PolynomialDisplay<T>,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.equation())
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
pub struct FitProperties<T: Value> {
pub degree: usize,
pub data_points: usize,
pub coefficients: Vec<T>,
pub coefficient_errors: Option<Vec<T>>,
pub mse: T,
pub r_squared: T,
}
#[cfg(test)]
#[cfg(feature = "transforms")]
mod tests {
use crate::{
assert_close, assert_fits, assert_true,
score::Aic,
statistics::CvStrategy,
transforms::{ApplyNoise, Strength},
MonomialFit,
};
use super::*;
#[test]
fn test_curvefit_new_and_coefficients() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let coefs = fit.coefficients();
assert_eq!(coefs.len(), 3);
}
#[test]
fn test_big() {
function!(poly(x) = 1.0 + 2.0 x^1 + 3.0 x^2 + 4.0 x^3 + 5.0 x^4 + 6.0 x^5);
let data = poly.solve_range(0.0..=10_000_000.0, 1.0);
let fit = ChebyshevFit::new(&data, 5).unwrap();
assert_fits!(poly, fit, 0.999);
}
#[test]
fn test_curvefit_y_in_range() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let y: f64 = fit.y(1.0).unwrap();
assert!(y.is_finite());
}
#[test]
fn test_curvefit_y_out_of_range() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let y = fit.y(-1.0);
assert!(y.is_err());
}
#[test]
fn test_curvefit_solution_matches_data_len() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let solution = fit.solution();
assert_eq!(solution.len(), data.len());
}
#[test]
fn test_curvefit_covariance_and_standard_errors() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let cov = fit.covariance().unwrap();
let errors = cov.coefficient_standard_errors();
assert_eq!(errors.len(), 3);
for err in errors {
assert!(err >= 0.0);
}
}
#[test]
fn test_curvefit_confidence_band() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let cov = fit.covariance().unwrap();
let band = cov.confidence_band(1.0, Confidence::P95, None).unwrap();
assert!(band.lower <= band.upper);
assert!(band.value >= band.lower && band.value <= band.upper);
}
#[test]
fn test_curvefit_model_score_and_r_squared() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let score: f64 = fit.model_score(&Aic);
let r2 = fit.r_squared(None);
assert!(score.is_finite());
assert_close!(r2, 1.0);
function!(mono(x) = 1.0 + 2.0 x^1); let data = mono
.solve_range(0.0..=1000.0, 1.0)
.apply_normal_noise(Strength::Relative(0.3), None);
let fit = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
assert!(fit.r_squared(None) < 1.0);
assert!(fit.model_score(&Aic).is_finite());
}
#[test]
fn test_curvefit_as_polynomial_and_into_polynomial() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let poly_ref = fit.as_polynomial();
let poly_owned = fit.clone().into_polynomial();
assert_eq!(poly_ref.coefficients(), poly_owned.coefficients());
}
#[test]
fn test_curvefit_properties() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let props = fit.properties();
assert_eq!(props.degree, 2);
assert_eq!(props.data_points, 3);
assert_eq!(props.coefficients.len(), 3);
assert!(props.mse >= 0.0);
assert!(props.r_squared <= 1.0 && props.r_squared >= 0.0);
}
#[test]
fn test_curvefit_new_auto_selects_best_degree() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0), (3.0, 13.0)];
let fit = MonomialFit::new_auto(data, DegreeBound::Relaxed, &Aic).unwrap();
assert!(fit.degree() < data.len());
}
#[test]
fn test_curvefit_solve_and_solve_range() {
let data = &[(0.0, 1.0), (1.0, 3.0), (2.0, 7.0)];
let fit = MonomialFit::new(data, 2).unwrap();
let xs = vec![0.0, 1.0, 2.0];
let points = fit.solve(xs.clone()).unwrap();
assert_eq!(points.len(), xs.len());
let range_points = fit.solve_range(0.0..=2.0, 1.0).unwrap();
assert_eq!(range_points.len(), 3);
}
#[test]
fn test_kfold() {
function!(mono(x) = 5 x^5 - 3 x^3 + 2 x^2 + 1.0);
let data = mono
.solve_range(0.0..=1000.0, 1.0)
.apply_salt_pepper_noise(
0.01,
Strength::Absolute(-1000.0),
Strength::Absolute(1000.0),
None,
)
.apply_poisson_noise(Strength::Relative(2.0), None);
crate::plot!([data, mono]);
let fit = MonomialFit::new_kfold_cross_validated(
&data,
CvStrategy::MinimizeBias,
DegreeBound::Relaxed,
&Aic,
)
.unwrap();
let k_r = fit.r_squared(None);
let fit2 = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
let n_r = fit2.r_squared(None);
let k_r = (k_r * 10.0).round() / 10.0;
let n_r = (n_r * 10.0).round() / 10.0;
assert_true!(
k_r >= n_r,
"K-Fold R² ({k_r}) should be better than normal R² ({n_r})"
);
}
#[test]
fn test_kfold_degenerate() {
let mut src = crate::transforms::SeedSource::from_seeds([
0xf060_c3f8_a373_ed60,
0x6e58_31a0_b26d_1c6d,
]);
function!(mono(x) = 5 x^5 - 3 x^3 + 2 x^2 + 1.0);
let data = mono
.solve_range(0.0..=1000.0, 1.0)
.apply_salt_pepper_noise(
0.01,
Strength::Absolute(-1000.0),
Strength::Absolute(1000.0),
Some(src.seed()),
)
.apply_poisson_noise(Strength::Relative(2.0), Some(src.seed()));
crate::plot!([data, mono]);
let fit = MonomialFit::new_kfold_cross_validated(
&data,
CvStrategy::MinimizeBias,
DegreeBound::Relaxed,
&Aic,
)
.unwrap();
let k_r = fit.r_squared(None);
let fit2 = MonomialFit::new_auto(&data, DegreeBound::Relaxed, &Aic).unwrap();
let n_r = fit2.r_squared(None);
assert_true!(
k_r >= n_r,
"K-Fold R² ({k_r}) should be better than normal R² ({n_r})"
);
}
}