use crate::error::GreenersError;
use crate::CovarianceType; use ndarray::{Array1, Array2};
use ndarray_linalg::Inverse;
use statrs::distribution::{ContinuousCDF, FisherSnedecor, StudentsT};
use std::fmt;
#[derive(Debug)]
pub struct OlsResult {
pub params: Array1<f64>,
pub std_errors: Array1<f64>,
pub t_values: Array1<f64>,
pub p_values: Array1<f64>,
pub conf_lower: Array1<f64>,
pub conf_upper: Array1<f64>,
pub r_squared: f64,
pub adj_r_squared: f64,
pub f_statistic: f64,
pub prob_f: f64,
pub log_likelihood: f64,
pub aic: f64,
pub bic: f64,
pub n_obs: usize,
pub df_resid: usize,
pub df_model: usize,
pub sigma: f64,
pub cov_type: CovarianceType, }
impl fmt::Display for OlsResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let cov_str = match self.cov_type {
CovarianceType::NonRobust => "Non-Robust".to_string(),
CovarianceType::HC1 => "Robust (HC1)".to_string(),
CovarianceType::NeweyWest(lags) => format!("HAC (Newey-West, L={})", lags),
};
writeln!(f, "\n{:=^78}", " OLS Regression Results ")?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4}",
"Dep. Variable:", "y", "R-squared:", self.r_squared
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4}",
"Model:", "OLS", "Adj. R-squared:", self.adj_r_squared
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4}",
"Covariance Type:", cov_str, "F-statistic:", self.f_statistic
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4e}",
"No. Observations:", self.n_obs, "Prob (F-statistic):", self.prob_f
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4}",
"Df Residuals:", self.df_resid, "Log-Likelihood:", self.log_likelihood
)?;
writeln!(
f,
"{:<20} {:>15.4} || {:<20} {:>15.4}",
"AIC:", self.aic, "BIC:", self.bic
)?;
writeln!(f, "\n{:-^78}", "")?;
writeln!(
f,
"{:<10} | {:>10} | {:>10} | {:>8} | {:>8} | {:>18}",
"Variable", "coef", "std err", "t", "P>|t|", "[0.025 0.975]"
)?;
writeln!(f, "{:-^78}", "")?;
for i in 0..self.params.len() {
writeln!(
f,
"x{:<9} | {:>10.4} | {:>10.4} | {:>8.3} | {:>8.3} | {:>8.4} {:>8.4}",
i,
self.params[i],
self.std_errors[i],
self.t_values[i],
self.p_values[i],
self.conf_lower[i],
self.conf_upper[i]
)?;
}
writeln!(f, "{:=^78}", "")
}
}
pub struct OLS;
impl OLS {
pub fn fit(
y: &Array1<f64>,
x: &Array2<f64>,
cov_type: CovarianceType,
) -> Result<OlsResult, GreenersError> {
let n = x.nrows();
let k = x.ncols();
if y.len() != n {
return Err(GreenersError::ShapeMismatch(format!(
"y: {}, X: {}",
y.len(),
n
)));
}
if n <= k {
return Err(GreenersError::ShapeMismatch(
"Degrees of freedom <= 0".into(),
));
}
let x_t = x.t();
let xt_x = x_t.dot(x);
let xt_x_inv = xt_x.inv()?;
let xt_y = x_t.dot(y);
let beta = xt_x_inv.dot(&xt_y);
let predicted = x.dot(&beta);
let residuals = y - &predicted;
let ssr = residuals.dot(&residuals);
let df_resid = n - k;
let df_model = k - 1;
let sigma2 = ssr / (df_resid as f64);
let sigma = sigma2.sqrt();
let cov_matrix = match cov_type {
CovarianceType::NonRobust => &xt_x_inv * sigma2,
CovarianceType::HC1 => {
let u_squared = residuals.mapv(|r| r.powi(2));
let mut x_weighted = x.clone();
for (i, mut row) in x_weighted.axis_iter_mut(nd::Axis(0)).enumerate() {
row *= u_squared[i];
}
let meat = x_t.dot(&x_weighted);
let bread = &xt_x_inv;
let sandwich = bread.dot(&meat).dot(bread);
let correction = (n as f64) / (df_resid as f64);
sandwich * correction
}
CovarianceType::NeweyWest(lags) => {
let u_squared = residuals.mapv(|r| r.powi(2));
let mut x_weighted = x.clone();
for (i, mut row) in x_weighted.axis_iter_mut(nd::Axis(0)).enumerate() {
row *= u_squared[i];
}
let mut meat = x_t.dot(&x_weighted);
for l in 1..=lags {
let weight = 1.0 - (l as f64) / ((lags + 1) as f64);
let mut omega_l = Array2::<f64>::zeros((k, k));
for t in l..n {
let u_t = residuals[t];
let u_prev = residuals[t - l];
let x_row_t = x.row(t);
let x_row_prev = x.row(t - l);
let scale = u_t * u_prev;
for i in 0..k {
for j in 0..k {
omega_l[[i, j]] += scale * x_row_t[i] * x_row_prev[j];
}
}
}
let omega_l_t = omega_l.t();
let term = &omega_l + &omega_l_t;
meat = meat + (&term * weight);
}
let bread = &xt_x_inv;
let sandwich = bread.dot(&meat).dot(bread);
let correction = (n as f64) / (df_resid as f64);
sandwich * correction
}
};
let std_errors = cov_matrix.diag().mapv(f64::sqrt);
let t_values = &beta / &std_errors;
let t_dist = StudentsT::new(0.0, 1.0, df_resid as f64)
.map_err(|_| GreenersError::OptimizationFailed)?;
let p_values = t_values.mapv(|t| 2.0 * (1.0 - t_dist.cdf(t.abs())));
let t_crit = t_dist.inverse_cdf(0.975);
let margin_error = &std_errors * t_crit;
let conf_lower = &beta - &margin_error;
let conf_upper = &beta + &margin_error;
let y_mean = y.mean().unwrap_or(0.0);
let sst = y.mapv(|val| (val - y_mean).powi(2)).sum();
let r_squared = if sst.abs() < 1e-12 {
0.0
} else {
1.0 - (ssr / sst)
};
let adj_r_squared = 1.0 - (1.0 - r_squared) * ((n as f64 - 1.0) / (df_resid as f64));
let msm = (sst - ssr) / (df_model as f64);
let f_statistic = if sigma2 < 1e-12 { 0.0 } else { msm / sigma2 };
let prob_f = if df_model > 0 {
let f_dist = FisherSnedecor::new(df_model as f64, df_resid as f64)
.map_err(|_| GreenersError::OptimizationFailed)?;
1.0 - f_dist.cdf(f_statistic)
} else {
f64::NAN
};
let n_f64 = n as f64;
let log_likelihood =
-n_f64 / 2.0 * ((2.0 * std::f64::consts::PI).ln() + (ssr / n_f64).ln() + 1.0);
let aic = 2.0 * (k as f64) - 2.0 * log_likelihood;
let bic = (k as f64) * n_f64.ln() - 2.0 * log_likelihood;
Ok(OlsResult {
params: beta,
std_errors,
t_values,
p_values,
conf_lower,
conf_upper,
r_squared,
adj_r_squared,
f_statistic,
prob_f,
log_likelihood,
aic,
bic,
n_obs: n,
df_resid,
df_model,
sigma,
cov_type,
})
}
}
use ndarray as nd;