use std::ops::RangeInclusive;
use crate::{
basis::Basis,
display::PolynomialDisplay,
error::Error,
score::{Bic, ModelScoreProvider},
value::{CoordExt, FloatClampedCast, IntClampedCast, SteppedValues, Value},
Polynomial,
};
pub fn residual_variance<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
k: T,
) -> T {
let mut ss_total = T::zero();
let mut n = T::zero();
for (y, y_fit) in y.zip(y_fit) {
ss_total += Value::powi(y - y_fit, 2);
n += T::one();
}
if n == k {
return T::zero();
}
ss_total / (n - k)
}
pub fn r_squared<T: Value>(y: impl Iterator<Item = T>, y_fit: impl Iterator<Item = T>) -> T {
let (r2, _) = r_squared_with_n(y, y_fit);
r2
}
pub fn mean<T: Value>(data: impl Iterator<Item = T>) -> T {
let mut sum = T::zero();
let mut count = T::zero();
for value in data {
sum += value;
count += T::one();
}
sum / count
}
#[must_use]
pub fn median<T: Value>(data: &[T]) -> T {
let mut data = data.to_vec();
data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = data.len();
if n == 0 {
return T::zero();
}
if n % 2 == 1 {
data[n / 2]
} else {
(data[n / 2 - 1] + data[n / 2]) / T::two()
}
}
pub fn stddev_and_mean<T: Value>(data: impl Iterator<Item = T>) -> (T, T) {
let data: Vec<_> = data.collect();
let mean = mean(data.iter().copied());
let mut sum_sq_diff = T::zero();
let mut count = T::zero();
for value in data {
sum_sq_diff += Value::powi(value - mean, 2);
count += T::one();
}
let dev = (sum_sq_diff / count).sqrt();
(dev, mean)
}
pub fn skewness_and_kurtosis<T: Value>(residuals: &[T]) -> (T, T) {
let (stddev, mean) = stddev_and_mean(residuals.iter().copied());
if stddev == T::zero() {
return (T::zero(), T::zero());
}
let mut skewness = T::zero();
let mut kurtosis = T::zero();
let mut n = T::zero();
for &r in residuals {
let diff = (r - mean) / stddev;
skewness += Value::powi(diff, 3);
kurtosis += Value::powi(diff, 4);
n += T::one();
}
skewness /= n;
kurtosis = kurtosis / n - (T::two() + T::one());
(skewness, kurtosis)
}
pub fn residual_normality<T: Value>(residuals: &[T]) -> T {
let six = T::try_cast(6.0).unwrap();
let twentyfour = T::try_cast(24.0).unwrap();
let mut n = T::zero();
for _ in residuals {
n += T::one();
}
if n == T::zero() {
return T::one();
}
let (skew, kurt) = skewness_and_kurtosis(residuals);
let se_skew = (six / n).sqrt();
let se_kurt = (twentyfour / n).sqrt();
let z_skew = skew / se_skew;
let z_kurt = kurt / se_kurt;
let k_squared = z_skew * z_skew + z_kurt * z_kurt;
(-k_squared / T::two()).exp()
}
pub fn spread<T: Value>(data: impl Iterator<Item = T>) -> T {
let mut min = T::infinity();
let mut max = T::neg_infinity();
for value in data {
if value < min {
min = value;
}
if value > max {
max = value;
}
}
max - min
}
pub fn robust_r_squared<T: Value>(y: impl Iterator<Item = T>, y_fit: impl Iterator<Item = T>) -> T {
let y: Vec<_> = y.collect();
let y_fit: Vec<_> = y_fit.collect();
let mad = median_absolute_deviation(y.iter().copied(), y_fit.iter().copied());
let huber_const = huber_const();
let mut tse = T::zero();
for (&y, &y_fit) in y.iter().zip(y_fit.iter()) {
tse += huber_loss(y - y_fit, huber_const, mad);
}
let mut tss = T::zero();
for (y, y_fit) in y.into_iter().zip(y_fit) {
tss += Value::powi(y - y_fit, 2);
}
T::one() - (tse / tss)
}
pub fn adjusted_r_squared<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
k: T,
) -> T {
let (r2, n) = r_squared_with_n(y, y_fit);
r2 - (T::one() - r2) * k / (n - k)
}
fn r_squared_with_n<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> (T, T) {
let y: Vec<T> = y.collect();
let y_fit: Vec<T> = y_fit.collect();
let mut y_mean = T::zero();
let mut y_n = T::zero();
for y in &y {
y_mean += *y;
y_n += T::one();
}
y_mean /= y_n;
let mut ss_total = T::zero();
let mut ss_residual = T::zero();
for (y, y_fit) in y.into_iter().zip(y_fit) {
ss_total += Value::powi(y - y_mean, 2);
ss_residual += Value::powi(y - y_fit, 2);
}
(T::one() - (ss_residual / ss_total), y_n)
}
pub fn mean_absolute_error<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let mut total = T::zero();
let mut n = T::zero();
for (y, y_fit) in y.zip(y_fit) {
total += Value::abs(y - y_fit);
n += T::one();
}
total / n
}
pub fn root_mean_squared_error<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let (mse, _) = mse_with_n(y, y_fit);
mse.sqrt()
}
pub fn mean_squared_error<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let (mse, _) = mse_with_n(y, y_fit);
mse
}
pub fn huber_log_likelihood<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let (log_likelihood, _) = robust_mse_with_n(y, y_fit);
log_likelihood
}
fn mse_with_n<T: Value>(y: impl Iterator<Item = T>, y_fit: impl Iterator<Item = T>) -> (T, T) {
let mut total = T::zero();
let mut n = T::zero();
for (y, y_fit) in y.zip(y_fit) {
total += Value::powi(y - y_fit, 2);
n += T::one();
}
(total / n, n)
}
pub(crate) fn robust_mse_with_n<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> (T, T) {
let y: Vec<_> = y.collect();
let y_fit: Vec<_> = y_fit.collect();
let mad = median_absolute_deviation(y.iter().copied(), y_fit.iter().copied());
let huber_const = huber_const();
let mut total_loss = T::zero();
let mut n = T::zero();
for (yi, fi) in y.into_iter().zip(y_fit) {
total_loss += huber_loss(yi - fi, huber_const, mad);
n += T::one();
}
(total_loss / n, n)
}
pub fn huber_loss<T: Value>(r: T, huber_const: T, median_absolute_deviation: T) -> T {
let delta = huber_const * median_absolute_deviation;
let r = Value::abs(r);
let half = T::one() / T::two();
if r <= delta {
half * r * r
} else {
delta * (r - half * delta)
}
}
#[must_use]
pub fn huber_const<T: Value>() -> T {
T::try_cast(1.345).unwrap_or(T::one())
}
pub fn median_absolute_deviation<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let mut residuals: Vec<T> = y.zip(y_fit).map(|(yi, fi)| Value::abs(yi - fi)).collect();
if residuals.is_empty() {
return T::zero();
}
let median_r = median(&residuals);
for r in &mut residuals {
*r = Value::abs(*r - median_r);
}
median(&residuals)
}
pub fn median_squared_deviation<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
) -> T {
let mut residuals: Vec<T> = y
.zip(y_fit)
.map(|(yi, fi)| Value::powi(yi - fi, 2))
.collect();
if residuals.is_empty() {
return T::zero();
}
let median_r = median(&residuals);
for r in &mut residuals {
*r = Value::powi(*r - median_r, 2);
}
median(&residuals)
}
pub struct DerivationError<T: Value> {
pub x: T,
pub finite_diff: T,
pub derivative: T,
pub diff: T,
pub rel_tol: T,
}
impl<T: Value> std::fmt::Display for DerivationError<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Derivative check failed at x = {}: finite difference = {}, derivative = {}, |diff| = {}, rel_tol = {}",
self.x, self.finite_diff, self.derivative, self.diff, self.rel_tol
)
}
}
pub fn bayes_factor<T: Value, B1, B2>(
m1: &crate::CurveFit<B1, T>,
m2: &crate::CurveFit<B2, T>,
data: &[(T, T)],
) -> Result<T, Error>
where
B1: Basis<T> + PolynomialDisplay<T>,
B2: Basis<T> + PolynomialDisplay<T>,
{
let y1 = m1.solve(data.x_iter())?.y();
let y2 = m2.solve(data.x_iter())?.y();
let k1 = T::from_positive_int(m1.coefficients().len());
let bic1 = Bic.score(m1, y1.into_iter(), data.y_iter(), k1);
let k2 = T::from_positive_int(m2.coefficients().len());
let bic2 = Bic.score(m2, y2.into_iter(), data.y_iter(), k2);
Ok(((bic2 - bic1) / T::two()).exp())
}
pub fn is_derivative<T: Value, B, B2>(
f: &Polynomial<B, T>,
f_prime: &Polynomial<B2, T>,
domain: &RangeInclusive<T>,
) -> Result<(), DerivationError<T>>
where
B: Basis<T> + PolynomialDisplay<T>,
B2: Basis<T> + PolynomialDisplay<T>,
{
let range = *domain.end() - *domain.start();
let one_hundred = 100.0.clamped_cast::<T>();
let steps = Value::clamp(
one_hundred * (range),
one_hundred,
10_000.0.clamped_cast::<T>(),
);
let step = (range) / steps;
let tol = T::epsilon().sqrt();
for x in SteppedValues::new(domain.clone(), step) {
let h = T::epsilon().sqrt() * Value::max(T::one(), Value::abs(x));
let xhp = x + h;
let xhm = x - h;
if xhp > *domain.end() || xhm < *domain.start() {
continue;
}
let finite_diff = (f.y(xhp) - f.y(xhm)) / (T::two() * h);
let derivative = f_prime.y(x);
let rel_tol = tol.sqrt() * Value::max(Value::abs(derivative), T::one());
let diff = Value::abs(finite_diff - derivative);
if diff > rel_tol {
return Err(DerivationError {
x,
finite_diff,
derivative,
diff,
rel_tol,
});
}
}
let dx = 1e-6.clamped_cast::<T>();
let x1 = *domain.start();
let x2 = *domain.start() + dx;
if x2 <= *domain.end() {
let delta_y = f.y(x2) - f.y(x1);
let derivative_y = f_prime.y(x1);
let rel_tol = tol.sqrt() * Value::max(Value::abs(derivative_y), T::one());
let diff = Value::abs(delta_y / dx - derivative_y);
if diff > rel_tol {
return Err(DerivationError {
x: x1,
finite_diff: delta_y / dx,
derivative: derivative_y,
diff,
rel_tol,
});
}
}
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DegreeBound {
Conservative,
Relaxed,
Aggressive,
Custom(usize),
}
impl From<usize> for DegreeBound {
fn from(value: usize) -> Self {
DegreeBound::Custom(value)
}
}
impl DegreeBound {
#[must_use]
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
pub fn max_degree(self, n: usize) -> usize {
let theoretical_max = n.saturating_sub(1);
match self {
DegreeBound::Custom(d) => d.min(theoretical_max),
DegreeBound::Conservative | DegreeBound::Relaxed | DegreeBound::Aggressive => {
let (hard_cap, max_n_per_k, est_smoothness) = match self {
DegreeBound::Conservative => (8, 15, 2),
DegreeBound::Relaxed => (15, 8, 1),
DegreeBound::Aggressive => (theoretical_max, 8, 0),
DegreeBound::Custom(_) => unreachable!(),
};
let smooth_lim = (n as f64)
.powf(1.0 / (2.0 * f64::from(est_smoothness) + 1.0))
.floor();
let smooth_lim = smooth_lim as usize;
let obs_lim = (n / max_n_per_k).saturating_sub(1);
smooth_lim.min(obs_lim).min(hard_cap).min(theoretical_max)
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Confidence {
P80,
P90,
P95,
P98,
P99,
P999,
Custom(f64),
}
impl Confidence {
#[allow(clippy::approx_constant)]
#[rustfmt::skip]
const T_TABLE: &[&[f64]] = &[
&[3.078, 6.314, 12.71, 31.82, 63.66, 636.62],
&[1.886, 2.92, 4.303, 6.965, 9.925, 31.599],
&[1.638, 2.353, 3.182, 4.541, 5.841, 12.924],
&[1.533, 2.132, 2.776, 3.747, 4.604, 8.610],
&[1.476, 2.015, 2.571, 3.365, 4.032, 6.869],
&[1.440, 1.943, 2.447, 3.143, 3.707, 5.959],
&[1.415, 1.895, 2.365, 2.998, 3.499, 5.408],
&[1.397, 1.86, 2.306, 2.896, 3.355, 5.041],
&[1.383, 1.833, 2.262, 2.821, 3.25, 4.781],
&[1.372, 1.812, 2.228, 2.764, 3.169, 4.587],
&[1.363, 1.796, 2.201, 2.718, 3.106, 4.437],
&[1.356, 1.782, 2.179, 2.681, 3.055, 4.318],
&[1.350, 1.771, 2.16, 2.65, 3.012, 4.221],
&[1.345, 1.761, 2.145, 2.624, 2.977, 4.140],
&[1.341, 1.753, 2.131, 2.602, 2.947, 4.073],
&[1.337, 1.746, 2.12, 2.583, 2.921, 4.015],
&[1.333, 1.74, 2.11, 2.567, 2.898, 3.965],
&[1.330, 1.734, 2.101, 2.552, 2.878, 3.922],
&[1.328, 1.729, 2.093, 2.539, 2.861, 3.883],
&[1.325, 1.725, 2.086, 2.528, 2.845, 3.850],
&[1.323, 1.721, 2.08, 2.518, 2.831, 3.819],
&[1.321, 1.717, 2.074, 2.508, 2.819, 3.792],
&[1.319, 1.714, 2.069, 2.5, 2.807, 3.768],
&[1.318, 1.711, 2.064, 2.492, 2.797, 3.745],
&[1.316, 1.708, 2.06, 2.485, 2.787, 3.725],
&[1.315, 1.706, 2.056, 2.479, 2.779, 3.707],
&[1.314, 1.703, 2.052, 2.473, 2.771, 3.690],
&[1.313, 1.701, 2.048, 2.467, 2.763, 3.674],
&[1.311, 1.699, 2.045, 2.462, 2.756, 3.659],
&[1.310, 1.697, 2.042, 2.457, 2.75, 3.646],
];
#[must_use]
pub fn percentage(&self) -> f64 {
match self {
Confidence::P80 => 0.8,
Confidence::P90 => 0.9,
Confidence::P95 => 0.95,
Confidence::P98 => 0.98,
Confidence::P99 => 0.99,
Confidence::P999 => 0.999,
Confidence::Custom(z) => *z,
}
}
#[must_use]
pub fn alpha(&self) -> f64 {
1.0 - self.percentage()
}
#[must_use]
pub fn t_score(self, df: usize) -> f64 {
match df {
0 => f64::INFINITY,
i if i >= Self::T_TABLE.len() => self.z_score(),
i => {
let col = match self {
Confidence::P80 => 0,
Confidence::P90 => 1,
Confidence::P95 => 2,
Confidence::P98 => 3,
Confidence::P99 => 4,
Confidence::P999 => 5,
Confidence::Custom(_) => return self.z_score(),
};
Self::T_TABLE[i - 1][col]
}
}
}
#[must_use]
pub fn z_score(self) -> f64 {
match self {
Confidence::P80 => 1.281,
Confidence::P90 => 1.644,
Confidence::P95 => 1.959,
Confidence::P98 => 2.326,
Confidence::P99 => 2.575,
Confidence::P999 => 3.290,
Confidence::Custom(z) => z,
}
}
pub fn try_cast<T: Value>(self) -> crate::error::Result<T> {
T::try_cast(self.z_score())
}
}
impl std::fmt::Display for Confidence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Confidence::P80 => write!(f, "80%"),
Confidence::P90 => write!(f, "90%"),
Confidence::P95 => write!(f, "95%"),
Confidence::P98 => write!(f, "98%"),
Confidence::P99 => write!(f, "99%"),
Confidence::P999 => write!(f, "99.9%"),
Confidence::Custom(z) => write!(f, "{z}σ"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Tolerance<T: Value> {
Absolute(T),
Variance(T),
Measurement(T),
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ConfidenceBand<T: Value> {
pub(crate) level: Confidence,
pub(crate) tolerance: Option<Tolerance<T>>,
pub(crate) value: T,
pub(crate) lower: T,
pub(crate) upper: T,
}
impl<T: Value> ConfidenceBand<T> {
pub fn tolerance(&self) -> Option<Tolerance<T>> {
self.tolerance
}
pub fn confidence(&self) -> Confidence {
self.level
}
pub fn value(&self) -> T {
self.value
}
pub fn min(&self) -> T {
self.lower
}
pub fn max(&self) -> T {
self.upper
}
pub fn center(&self) -> T {
(self.lower + self.upper) / T::two()
}
pub fn width(&self) -> T {
self.upper - self.lower
}
}
impl<T: Value> std::fmt::Display for ConfidenceBand<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (min, y, max) = (self.min(), self.center(), self.max());
let confidence = self.level;
write!(f, "{y} ({min}, {max}) [confidence = {confidence}]")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct DomainNormalizer<T: Value> {
src_range: (T, T),
dst_range: (T, T),
shift: T,
scale: T,
}
impl<T: Value> Default for DomainNormalizer<T> {
fn default() -> Self {
DomainNormalizer {
src_range: (T::zero(), T::one()),
dst_range: (T::zero(), T::one()),
shift: T::zero(),
scale: T::one(),
}
}
}
impl<T: Value> DomainNormalizer<T> {
pub fn new(src_range: (T, T), dst_range: (T, T)) -> Self {
let (src_min, src_max) = src_range;
let (dst_min, dst_max) = dst_range;
if dst_min == T::neg_infinity() && dst_max == T::infinity() {
return DomainNormalizer {
src_range,
dst_range,
shift: T::zero(),
scale: T::one(),
};
}
if dst_min == T::neg_infinity() {
return DomainNormalizer {
src_range,
dst_range,
shift: -src_max + dst_max,
scale: T::one(),
};
}
if dst_max == T::infinity() {
return DomainNormalizer {
src_range,
dst_range,
shift: -src_min + dst_min,
scale: T::one(),
};
}
let scale = (dst_max - dst_min) / (src_max - src_min);
let shift = dst_min - scale * src_min;
DomainNormalizer {
src_range,
dst_range,
shift,
scale,
}
}
pub fn from_range(src_range: RangeInclusive<T>, dst_range: (T, T)) -> Self {
let (min, max) = src_range.into_inner();
Self::new((min, max), dst_range)
}
pub fn from_data(src: impl Iterator<Item = T>, dst_range: (T, T)) -> Option<Self> {
let range = src.fold(None, |acc: Option<(T, T)>, x| {
Some(match acc {
None => (x, x),
Some((min, max)) => (
nalgebra::RealField::min(min, x),
nalgebra::RealField::max(max, x),
),
})
})?;
Some(Self::new(range, dst_range))
}
pub fn shift(&self) -> T {
self.shift
}
pub fn scale(&self) -> T {
self.scale
}
pub fn src_range(&self) -> (T, T) {
self.src_range
}
pub fn dst_range(&self) -> (T, T) {
self.dst_range
}
#[inline(always)]
pub fn normalize(&self, x: T) -> T {
self.scale * x + self.shift
}
pub fn denormalize(&self, x: T) -> T {
(x - self.shift) / self.scale
}
pub fn denormalize_complex(&self, z: nalgebra::Complex<T>) -> nalgebra::Complex<T> {
let s = self.scale();
let sh = self.shift();
nalgebra::Complex::new(
z.re / s + sh, z.im / s, )
}
#[must_use]
pub fn denormalize_coefs(&self, coefs: &[T]) -> Vec<T> {
let (x_min, x_max) = self.src_range();
let (d_min, d_max) = self.dst_range();
let (alpha, beta) = if d_min == T::neg_infinity() && d_max == T::infinity() {
(T::one(), T::zero()) } else if d_min == T::neg_infinity() {
let beta = d_max - x_max;
(T::one(), beta)
} else if d_max == T::infinity() {
let beta = d_min - x_min;
(T::one(), beta)
} else {
let alpha = (d_max - d_min) / (x_max - x_min);
let beta = d_min - alpha * x_min;
(alpha, beta)
};
let mut unnorm = vec![T::zero(); coefs.len()];
for (i, &c) in coefs.iter().enumerate() {
for j in 0..=i {
let binom = T::factorial(i) / (T::factorial(j) * T::factorial(i - j));
unnorm[j] += c
* binom
* Value::powi(alpha, j.clamped_cast())
* Value::powi(beta, (i - j).clamped_cast());
}
}
unnorm
}
}
impl<T: Value> std::fmt::Display for DomainNormalizer<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (src_min, src_max) = self.src_range;
let (dst_min, dst_max) = self.dst_range;
let dst_min = if dst_min == T::neg_infinity() {
"-∞".to_string()
} else {
dst_min.to_string()
};
let dst_max = if dst_max == T::infinity() {
"∞".to_string()
} else if Value::abs_sub(dst_max, T::pi()) < T::epsilon() {
"π".to_string()
} else if Value::abs_sub(dst_max, T::two_pi()) < T::epsilon() {
"2π".to_string()
} else {
dst_max.to_string()
};
if self.shift == T::zero() && self.scale == T::one() {
return write!(f, "T[ {dst_min}..{dst_max} ]");
}
write!(f, "T[ {src_min}..{src_max} -> {dst_min}..{dst_max} ]")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CvStrategy {
MinimizeVariance,
MinimizeBias,
LeaveOneOut,
Balanced,
#[allow(missing_docs)]
Custom { k: usize },
}
impl CvStrategy {
#[must_use]
pub fn k(self, n: usize) -> usize {
match self {
CvStrategy::MinimizeVariance => 5,
CvStrategy::MinimizeBias => 10,
CvStrategy::LeaveOneOut => n,
CvStrategy::Balanced => 7,
CvStrategy::Custom { k } => k,
}
}
}
impl From<usize> for CvStrategy {
fn from(k: usize) -> Self {
CvStrategy::Custom { k }
}
}
pub fn cross_validation_split<I: Clone>(data: &[I], strategy: CvStrategy) -> Vec<Vec<I>> {
let n = data.len();
let k = strategy.k(n);
let fold_size = n / k;
let mut folds: Vec<Vec<I>> = Vec::with_capacity(k);
for i in 0..k {
let start = i * fold_size;
let end = if i == k - 1 { n } else { start + fold_size };
folds.push(data[start..end].to_vec());
}
folds
}
pub fn folded_rmse<T: Value>(
y: impl Iterator<Item = T>,
y_fit: impl Iterator<Item = T>,
strategy: CvStrategy,
) -> UncertainValue<T> {
let data: Vec<(T, T)> = y.zip(y_fit).collect();
let folds = cross_validation_split(&data, strategy);
let mut rmses = Vec::with_capacity(folds.len());
for i in 0..folds.len() {
let training_set: Vec<(T, T)> = folds
.iter()
.enumerate()
.filter_map(|(j, fold)| if j == i { None } else { Some(fold.clone()) })
.flatten()
.collect();
let (train_y, train_y_fit): (Vec<T>, Vec<T>) = training_set.into_iter().unzip();
let rmse = root_mean_squared_error::<T>(train_y.into_iter(), train_y_fit.into_iter());
rmses.push(rmse);
}
UncertainValue::new_from_values(rmses.into_iter())
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct UncertainValue<T: Value> {
pub mean: T,
pub std_dev: T,
}
impl<T: Value> UncertainValue<T> {
pub fn new(mean: T, std_dev: T) -> Self {
UncertainValue { mean, std_dev }
}
pub fn new_from_values(values: impl Iterator<Item = T>) -> Self {
let (std_dev, mean) = stddev_and_mean(values);
UncertainValue { mean, std_dev }
}
pub fn range(&self, confidence: Confidence) -> (T, T) {
let z = T::from_f64(confidence.z_score()).unwrap_or(T::one());
let margin = z * self.std_dev;
(self.mean - margin, self.mean + margin)
}
pub fn confidence_band(&self, confidence: Confidence) -> ConfidenceBand<T> {
let (min, max) = self.range(confidence);
ConfidenceBand {
level: confidence,
tolerance: None,
value: self.mean,
lower: min,
upper: max,
}
}
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
#[test]
fn residual_variance_zero_error() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![1.0, 2.0, 3.0];
let var = residual_variance::<f64>(y.into_iter(), y_fit.into_iter(), 1.0);
assert_eq!(var, 0.0);
}
#[test]
fn residual_variance_simple_case() {
let y = vec![1.0, 2.0];
let y_fit = vec![0.0, 0.0];
let var = residual_variance::<f64>(y.into_iter(), y_fit.into_iter(), 1.0);
assert_eq!(var, 5.0);
}
#[test]
fn residual_variance_mse_equivalence() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![0.0, 0.0, 0.0];
let var = residual_variance::<f64>(y.into_iter(), y_fit.into_iter(), 0.0);
assert!((var - 14.0 / 3.0).abs() < 1e-12);
}
#[test]
fn residual_variance_invalid_degrees_of_freedom() {
let y = vec![1.0, 2.0];
let y_fit = vec![1.0, 2.0];
let var = residual_variance::<f64>(y.into_iter(), y_fit.into_iter(), 2.0);
assert_eq!(var, 0.0);
}
#[test]
fn r_squared_perfect_fit() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![1.0, 2.0, 3.0];
let r2 = r_squared::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(r2, 1.0);
}
#[test]
fn r_squared_bad_fit() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![2.0, 2.0, 2.0];
let r2 = r_squared::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(r2, 0.0);
}
#[test]
fn r_squared_partial_fit() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![0.0, 2.0, 4.0];
let r2 = r_squared::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(r2, 0.0);
}
#[test]
fn r_squared_negative_case() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![10.0, 10.0, 10.0];
let r2 = r_squared::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(r2, -96.0);
}
#[test]
fn r_squared_constant_y() {
let y = vec![2.0, 2.0, 2.0];
let y_fit = vec![2.0, 2.0, 2.0];
let r2 = r_squared::<f64>(y.into_iter(), y_fit.into_iter());
assert!(r2.is_nan()); }
#[test]
fn adjusted_r_squared_perfect_fit() {
let y = vec![1.0, 2.0, 3.0, 4.0];
let y_fit = y.clone(); let r2_adj = adjusted_r_squared::<f64>(y.into_iter(), y_fit.into_iter(), 2.0);
assert_eq!(r2_adj, 1.0);
}
#[test]
fn adjusted_r_squared_equals_r2_when_k1() {
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let y_fit = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let r2 = r_squared(y.clone().into_iter(), y_fit.clone().into_iter());
let r2_adj = adjusted_r_squared::<f64>(y.into_iter(), y_fit.into_iter(), 1.0);
assert!((r2 - r2_adj).abs() < 1e-12);
}
#[test]
fn adjusted_r_squared_penalizes_complexity() {
let y = vec![1.0, 2.0, 3.0, 4.0];
let y_fit = vec![1.1, 1.9, 3.2, 3.8]; let r2 = r_squared(y.clone().into_iter(), y_fit.clone().into_iter());
let r2_adj = adjusted_r_squared::<f64>(y.into_iter(), y_fit.into_iter(), 3.0);
assert!(r2_adj < r2);
}
#[test]
fn adjusted_r_squared_negative_case() {
let y = vec![1.0, 2.0, 3.0, 4.0];
let y_fit = vec![10.0, 10.0, 10.0, 10.0]; let r2_adj = adjusted_r_squared::<f64>(y.into_iter(), y_fit.into_iter(), 2.0);
assert!(r2_adj < 0.0);
}
#[test]
fn adjusted_r_squared_invalid_degrees_of_freedom() {
let y = vec![1.0, 2.0];
let y_fit = vec![1.0, 2.0];
let r2_adj = adjusted_r_squared::<f64>(y.into_iter(), y_fit.into_iter(), 2.0);
assert!(r2_adj.is_nan());
}
#[test]
fn mae_zero_error() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![1.0, 2.0, 3.0];
let mae = mean_absolute_error::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(mae, 0.0);
}
#[test]
fn mae_simple_case() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![2.0, 2.0, 2.0];
let mae = mean_absolute_error::<f64>(y.into_iter(), y_fit.into_iter());
assert!((mae - 2.0 / 3.0).abs() < 1e-12);
}
#[test]
fn mae_symmetric() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![2.0, 2.0, 2.0];
let mae1 = mean_absolute_error::<f64>(y.clone().into_iter(), y_fit.clone().into_iter());
let mae2 = mean_absolute_error::<f64>(y_fit.into_iter(), y.into_iter());
assert_eq!(mae1, mae2);
}
#[test]
fn mae_with_negatives() {
let y = vec![-1.0, -2.0];
let y_fit = vec![1.0, 2.0];
let mae = mean_absolute_error::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(mae, 3.0);
}
#[test]
fn mae_empty_input() {
let y: Vec<f64> = vec![];
let y_fit: Vec<f64> = vec![];
let mae = mean_absolute_error::<f64>(y.into_iter(), y_fit.into_iter());
assert!(mae.is_nan());
}
#[test]
fn mse_zero_error() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![1.0, 2.0, 3.0];
let mse = mean_squared_error::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(mse, 0.0);
}
#[test]
fn mse_simple_case() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![2.0, 2.0, 2.0];
let mse = mean_squared_error::<f64>(y.into_iter(), y_fit.into_iter());
assert!((mse - 2.0 / 3.0).abs() < 1e-12);
}
#[test]
fn mse_with_negatives() {
let y = vec![-1.0, -2.0];
let y_fit = vec![1.0, 2.0];
let mse = mean_squared_error::<f64>(y.into_iter(), y_fit.into_iter());
assert_eq!(mse, 10.0);
}
#[test]
fn mse_symmetric() {
let y = vec![1.0, 2.0, 3.0];
let y_fit = vec![2.0, 2.0, 2.0];
let mse1 = mean_squared_error::<f64>(y.clone().into_iter(), y_fit.clone().into_iter());
let mse2 = mean_squared_error::<f64>(y_fit.into_iter(), y.into_iter());
assert_eq!(mse1, mse2);
}
#[test]
fn mse_empty_input_returns_nan() {
let y: Vec<f64> = vec![];
let y_fit: Vec<f64> = vec![];
let mse = mean_squared_error::<f64>(y.into_iter(), y_fit.into_iter());
assert!(mse.is_nan());
}
}