use crate::OaxacaError;
use nalgebra::{DMatrix, DVector};
#[derive(Debug)]
#[allow(dead_code)]
pub struct OlsResult {
pub coefficients: DVector<f64>,
pub vcov: DMatrix<f64>,
pub residuals: DVector<f64>,
}
pub fn ols(
y: &DVector<f64>,
x: &DMatrix<f64>,
weights: Option<&DVector<f64>>,
) -> Result<OlsResult, OaxacaError> {
let (xtx, xty, n_obs) = if let Some(w) = weights {
let w_sqrt = w.map(|v| v.sqrt());
let mut x_w = x.clone();
for j in 0..x.ncols() {
let mut col = x_w.column_mut(j);
col.component_mul_assign(&w_sqrt);
}
let y_w = y.component_mul(&w_sqrt);
let xtx = x_w.transpose() * &x_w;
let xty = x_w.transpose() * &y_w;
let n = w.sum();
(xtx, xty, n)
} else {
let xtx = x.transpose() * x;
let xty = x.transpose() * y;
let n = x.nrows() as f64;
(xtx, xty, n)
};
let cholesky = xtx.cholesky().ok_or_else(|| {
OaxacaError::NalgebraError(
"Failed to perform Cholesky decomposition. Matrix may be singular or not positive definite due to multicollinearity.".to_string(),
)
})?;
let coefficients = cholesky.solve(&xty);
let y_hat = x * &coefficients;
let residuals = y - y_hat;
let k = x.ncols() as f64;
let sse = if let Some(w) = weights {
let weighted_residuals = residuals.component_mul(w); residuals.dot(&weighted_residuals) } else {
residuals.norm_squared()
};
let sigma_squared = sse / (n_obs - k);
let xtx_inv = cholesky.inverse();
let vcov = xtx_inv * sigma_squared;
Ok(OlsResult {
coefficients,
vcov,
residuals,
})
}
#[cfg(test)]
mod tests {
use super::*;
use nalgebra::{DMatrix, DVector};
#[test]
fn test_ols_simple_regression() {
let x = DMatrix::from_vec(
5,
2,
vec![
1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0,
],
);
let y = DVector::from_vec(vec![1.0, 3.0, 5.0, 7.0, 9.0]);
let result = ols(&y, &x, None).expect("OLS calculation failed on valid data");
let coeffs = result.coefficients;
assert_eq!(coeffs.len(), 2);
assert!((coeffs[0] - 1.0).abs() < 1e-9, "Intercept is incorrect");
assert!((coeffs[1] - 2.0).abs() < 1e-9, "Slope is incorrect");
}
#[test]
fn test_ols_handles_singular_matrix() {
let x = DMatrix::from_vec(
3,
2,
vec![
1.0, 1.0, 1.0, 2.0, 2.0, 2.0,
],
);
let y = DVector::from_vec(vec![1.0, 2.0, 3.0]);
let result = ols(&y, &x, None);
assert!(result.is_err());
match result {
Err(OaxacaError::NalgebraError(msg)) => {
assert!(msg.contains("Failed to perform Cholesky decomposition"));
}
_ => panic!("Expected a NalgebraError for a singular matrix, but got something else."),
}
}
}