use crate::{
core::{f_p_value, t_critical_quantile},
distributions::student_t_cdf,
error::{Error, Result},
linalg::Matrix,
serialization::types::ModelType,
impl_serialization,
};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WlsFit {
pub coefficients: Vec<f64>,
pub standard_errors: Vec<f64>,
pub t_statistics: Vec<f64>,
pub p_values: Vec<f64>,
pub conf_int_lower: Vec<f64>,
pub conf_int_upper: Vec<f64>,
pub r_squared: f64,
pub adj_r_squared: f64,
pub f_statistic: f64,
pub f_p_value: f64,
pub residual_std_error: f64,
pub df_residuals: isize,
pub df_model: isize,
pub fitted_values: Vec<f64>,
pub residuals: Vec<f64>,
pub mse: f64,
pub rmse: f64,
pub mae: f64,
pub n: usize,
pub k: usize,
}
pub fn wls_regression(
y: &[f64],
x_vars: &[Vec<f64>],
weights: &[f64],
) -> Result<WlsFit> {
let n = y.len();
let k = x_vars.len();
if n <= k + 1 {
return Err(Error::InsufficientData {
required: k + 2,
available: n,
});
}
for (i, x_var) in x_vars.iter().enumerate() {
if x_var.len() != n {
return Err(Error::InvalidInput(format!(
"x[{}] has {} elements, expected {}",
i,
x_var.len(),
n
)));
}
}
if weights.len() != n {
return Err(Error::InvalidInput(format!(
"weights has {} elements, expected {}",
weights.len(),
n
)));
}
for (i, &w) in weights.iter().enumerate() {
if w < 0.0 {
return Err(Error::InvalidInput(format!(
"weights[{}] is negative ({}), weights must be non-negative",
i, w
)));
}
}
let weight_sum: f64 = weights.iter().sum();
if weight_sum <= 0.0 {
return Err(Error::InvalidInput(
"Sum of weights is zero or negative".to_string()
));
}
let mut x_data = Vec::with_capacity(n * (k + 1));
for i in 0..n {
x_data.push(1.0); for j in 0..k {
x_data.push(x_vars[j][i]);
}
}
let x = Matrix::new(n, k + 1, x_data);
let decomp = crate::loess::wls::weighted_least_squares_with_decomposition(&x, y, weights)?;
let coefficients = decomp.coefficients;
let fitted_values: Vec<f64> = (0..n)
.map(|i| {
let mut y_hat = coefficients[0]; for j in 0..k {
y_hat += coefficients[j + 1] * x_vars[j][i];
}
y_hat
})
.collect();
let residuals: Vec<f64> = y.iter().zip(fitted_values.iter())
.map(|(yi, y_hat)| yi - y_hat)
.collect();
let p = k + 1; let df_residuals = n as isize - p as isize;
let df_model = k as isize;
if df_residuals <= 0 {
return Err(Error::InsufficientData {
required: p + 1,
available: n,
});
}
let rss: f64 = residuals.iter().map(|r| r * r).sum();
let mse = rss / df_residuals as f64;
let residual_std_error = mse.sqrt();
let ss_tot: f64 = {
let y_mean = y.iter().sum::<f64>() / n as f64;
y.iter().map(|yi| (yi - y_mean).powi(2)).sum()
};
let r_squared = if ss_tot > 0.0 {
1.0 - (rss / ss_tot)
} else {
0.0
};
let adj_r_squared = if df_residuals > 1 {
1.0 - ((1.0 - r_squared) * (n - 1) as f64 / df_residuals as f64)
} else {
r_squared
};
let cov = if let Some(ref r_inv) = decomp.r_inv {
compute_covariance_from_qr(r_inv, &decomp.column_scales, mse, p)
} else if let Some((ref v, ref singular_values)) = decomp.svd_components {
compute_covariance_from_svd(v, singular_values, &decomp.column_scales, mse, p)
} else {
return Err(Error::SingularMatrix);
};
let mut standard_errors = Vec::with_capacity(p);
for i in 0..p {
let se = cov.get(i, i).sqrt();
standard_errors.push(se);
}
let mut t_statistics = Vec::with_capacity(p);
let mut p_values = Vec::with_capacity(p);
for i in 0..p {
let t = coefficients[i] / standard_errors[i];
t_statistics.push(t);
let p = 2.0 * (1.0 - student_t_cdf(t.abs(), df_residuals as f64));
p_values.push(p);
}
let alpha = 0.05;
let t_critical = t_critical_quantile(df_residuals as f64, alpha);
let conf_int_lower: Vec<f64> = coefficients
.iter()
.zip(&standard_errors)
.map(|(&b, &se)| b - t_critical * se)
.collect();
let conf_int_upper: Vec<f64> = coefficients
.iter()
.zip(&standard_errors)
.map(|(&b, &se)| b + t_critical * se)
.collect();
let f_statistic = if ss_tot > rss && k > 0 {
((ss_tot - rss) / k as f64) / (rss / df_residuals as f64)
} else {
0.0
};
let f_p_value = if f_statistic > 0.0 {
f_p_value(f_statistic, k as f64, df_residuals as f64)
} else {
1.0
};
let rmse = mse.sqrt();
let mae = residuals.iter().map(|r| r.abs()).sum::<f64>() / n as f64;
Ok(WlsFit {
coefficients,
standard_errors,
t_statistics,
p_values,
conf_int_lower,
conf_int_upper,
r_squared,
adj_r_squared,
f_statistic,
f_p_value,
residual_std_error,
df_residuals,
df_model,
fitted_values,
residuals,
mse,
rmse,
mae,
n,
k,
})
}
fn compute_covariance_from_qr(
r_inv: &Matrix,
column_scales: &[f64],
mse: f64,
p: usize,
) -> Matrix {
let mut cov = Matrix::zeros(p, p);
for i in 0..p {
for j in 0..p {
let mut sum = 0.0;
for l in 0..p {
sum += r_inv.get(i, l) * r_inv.get(j, l);
}
cov.set(i, j, mse * sum / (column_scales[i] * column_scales[j]));
}
}
cov
}
fn compute_covariance_from_svd(
v: &Matrix,
singular_values: &[f64],
column_scales: &[f64],
mse: f64,
p: usize,
) -> Matrix {
let max_sigma = singular_values.first().copied().unwrap_or(0.0);
let tol = if max_sigma > 0.0 {
max_sigma * 100.0 * f64::EPSILON
} else {
f64::EPSILON
};
let mut cov = Matrix::zeros(p, p);
for i in 0..p {
for j in 0..p {
let mut sum = 0.0;
for l in 0..singular_values.len().min(p) {
if singular_values[l] > tol {
let inv_sigma_sq = 1.0 / (singular_values[l] * singular_values[l]);
sum += v.get(i, l) * v.get(j, l) * inv_sigma_sq;
}
}
cov.set(i, j, mse * sum / (column_scales[i] * column_scales[j]));
}
}
cov
}
impl_serialization!(WlsFit, ModelType::WLS, "WLS");
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_wls_equal_weights_matches_ols() {
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let weights = vec![1.0; 5];
let fit = wls_regression(&y, &[x], &weights).unwrap();
assert!((fit.coefficients[0] - 0.0).abs() < 1e-10);
assert!((fit.coefficients[1] - 1.0).abs() < 1e-10);
assert_eq!(fit.k, 1);
assert_eq!(fit.n, 5);
assert!(fit.standard_errors.len() == 2);
assert!(fit.t_statistics.len() == 2);
assert!(fit.p_values.len() == 2);
assert!(fit.f_statistic > 0.0);
assert!(fit.f_p_value < 0.05); }
#[test]
fn test_wls_with_weighted_data() {
let y = vec![2.0, 4.0, 6.0, 8.0, 100.0]; let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let weights_low = vec![1.0, 1.0, 1.0, 1.0, 0.01];
let fit_low = wls_regression(&y, &[x.clone()], &weights_low).unwrap();
let weights_high = vec![1.0, 1.0, 1.0, 1.0, 10.0];
let fit_high = wls_regression(&y, &[x], &weights_high).unwrap();
assert!(fit_low.coefficients[1] < fit_high.coefficients[1]);
}
#[test]
fn test_wls_negative_weight_error() {
let y = vec![1.0, 2.0, 3.0];
let x = vec![1.0, 2.0, 3.0];
let weights = vec![1.0, -1.0, 1.0];
let result = wls_regression(&y, &[x], &weights);
assert!(result.is_err());
}
#[test]
fn test_wls_multiple_predictors() {
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x2 = vec![1.0, 4.0, 2.0, 5.0, 3.0]; let weights = vec![1.0; 5];
let fit = wls_regression(&y, &[x1, x2], &weights).unwrap();
assert_eq!(fit.k, 2); assert_eq!(fit.coefficients.len(), 3); assert_eq!(fit.fitted_values.len(), 5);
assert_eq!(fit.standard_errors.len(), 3);
assert_eq!(fit.t_statistics.len(), 3);
assert_eq!(fit.p_values.len(), 3);
}
#[test]
fn test_wls_insufficient_data() {
let y = vec![1.0, 2.0];
let x1 = vec![1.0, 2.0];
let x2 = vec![0.5, 1.0]; let weights = vec![1.0, 1.0];
let result = wls_regression(&y, &[x1, x2], &weights);
assert!(result.is_err());
}
#[test]
fn test_wls_statistics_completeness() {
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let weights = vec![1.0; 5];
let fit = wls_regression(&y, &[x], &weights).unwrap();
assert_eq!(fit.coefficients.len(), 2);
assert_eq!(fit.standard_errors.len(), 2);
assert_eq!(fit.t_statistics.len(), 2);
assert_eq!(fit.p_values.len(), 2);
assert!(fit.r_squared >= 0.0 && fit.r_squared <= 1.0);
assert!(fit.adj_r_squared >= 0.0 && fit.adj_r_squared <= 1.0);
assert!(fit.f_statistic >= 0.0);
assert!(fit.f_p_value >= 0.0 && fit.f_p_value <= 1.0);
assert!(fit.residual_std_error >= 0.0);
assert_eq!(fit.df_residuals, 3); assert_eq!(fit.df_model, 1);
assert_eq!(fit.fitted_values.len(), 5);
assert_eq!(fit.residuals.len(), 5);
assert!(fit.mse >= 0.0);
assert!(fit.rmse >= 0.0);
assert!(fit.mae >= 0.0);
assert_eq!(fit.n, 5);
assert_eq!(fit.k, 1);
}
#[test]
fn test_wls_zero_sum_weights_error() {
let y = vec![1.0, 2.0, 3.0];
let x = vec![1.0, 2.0, 3.0];
let weights = vec![0.0, 0.0, 0.0];
let result = wls_regression(&y, &[x], &weights);
assert!(result.is_err());
}
#[test]
fn test_wls_svd_fallback_computes_standard_errors() {
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x2 = vec![2.0, 4.0, 6.0, 8.0, 10.0]; let weights = vec![1.0; 5];
let result = wls_regression(&y, &[x1, x2], &weights);
match result {
Ok(fit) => {
for se in &fit.standard_errors {
assert!(se.is_finite(), "Standard error should be finite, got {}", se);
}
}
Err(_) => {
}
}
}
}