use crate::core::{
IntervalType, LambdaScaling, PredictionResult, RegressionOptions, RegressionOptionsBuilder,
RegressionResult, SolverType,
};
use crate::inference::{compute_prediction_intervals, CoefficientInference};
use crate::solvers::ols::OlsRegressor;
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use crate::utils::{center_columns, center_vector, detect_constant_columns};
use faer::prelude::Solve;
use faer::{Col, Mat};
use statrs::distribution::{ContinuousCDF, FisherSnedecor, StudentsT};
#[derive(Debug, Clone)]
pub struct RidgeRegressor {
options: RegressionOptions,
}
impl RidgeRegressor {
pub fn new(options: RegressionOptions) -> Self {
Self { options }
}
pub fn builder() -> RidgeRegressorBuilder {
RidgeRegressorBuilder::default()
}
}
impl Regressor for RidgeRegressor {
type Fitted = FittedRidge;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
if self.options.lambda == 0.0 {
let ols = OlsRegressor::new(self.options.clone());
let ols_fitted = ols.fit(x, y)?;
let xtx_inverse = crate::inference::compute_xtx_inverse_augmented(x).ok();
return Ok(FittedRidge {
options: self.options.clone(),
result: ols_fitted.result().clone(),
xtx_reg_inverse: xtx_inverse,
});
}
let n_samples = x.nrows();
let n_features = x.ncols();
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
if n_samples < 2 {
return Err(RegressionError::InsufficientObservations {
needed: 2,
got: n_samples,
});
}
let _constant_cols = detect_constant_columns(x, self.options.rank_tolerance);
if self.options.with_intercept {
let (x_centered, x_means) = center_columns(x);
let (y_centered, y_mean) = center_vector(y);
let coefficients = match self.options.solver {
SolverType::Qr => self.solve_ridge_qr(&x_centered, &y_centered)?,
SolverType::Svd => self.solve_ridge_svd(&x_centered, &y_centered)?,
SolverType::Cholesky => self.solve_ridge_cholesky(&x_centered, &y_centered)?,
};
let mut intercept = y_mean;
for j in 0..n_features {
intercept -= x_means[j] * coefficients[j];
}
let mut fitted_values = Col::zeros(n_samples);
let mut residuals = Col::zeros(n_samples);
for i in 0..n_samples {
let mut pred = intercept;
for j in 0..n_features {
pred += x[(i, j)] * coefficients[j];
}
fitted_values[i] = pred;
residuals[i] = y[i] - pred;
}
let aliased = vec![false; n_features]; let rank = n_features; let n_params = n_features + 1;
let result = self.compute_statistics(
x,
y,
&coefficients,
Some(intercept),
&residuals,
&fitted_values,
&aliased,
rank,
n_params,
)?;
let xtx_reg_inverse = self.compute_xtx_reg_inverse_augmented(x);
Ok(FittedRidge {
options: self.options.clone(),
result,
xtx_reg_inverse,
})
} else {
let coefficients = match self.options.solver {
SolverType::Qr => self.solve_ridge_qr(x, y)?,
SolverType::Svd => self.solve_ridge_svd(x, y)?,
SolverType::Cholesky => self.solve_ridge_cholesky(x, y)?,
};
let mut fitted_values = Col::zeros(n_samples);
let mut residuals = Col::zeros(n_samples);
for i in 0..n_samples {
let mut pred = 0.0;
for j in 0..n_features {
pred += x[(i, j)] * coefficients[j];
}
fitted_values[i] = pred;
residuals[i] = y[i] - pred;
}
let aliased = vec![false; n_features];
let rank = n_features;
let n_params = n_features;
let result = self.compute_statistics(
x,
y,
&coefficients,
None,
&residuals,
&fitted_values,
&aliased,
rank,
n_params,
)?;
let xtx_reg_inverse = self.compute_xtx_reg_inverse(x);
Ok(FittedRidge {
options: self.options.clone(),
result,
xtx_reg_inverse,
})
}
}
}
impl RidgeRegressor {
fn compute_xtx_reg_inverse_augmented(&self, x: &Mat<f64>) -> Option<Mat<f64>> {
let n_samples = x.nrows();
let n_features = x.ncols();
let aug_size = n_features + 1;
let lambda = self.effective_lambda(n_samples);
let mut xtx_aug: Mat<f64> = Mat::zeros(aug_size, aug_size);
for i in 0..n_samples {
xtx_aug[(0, 0)] += 1.0;
for j in 0..n_features {
xtx_aug[(0, j + 1)] += x[(i, j)];
xtx_aug[(j + 1, 0)] += x[(i, j)];
for k in 0..n_features {
xtx_aug[(j + 1, k + 1)] += x[(i, j)] * x[(i, k)];
}
}
}
for j in 1..aug_size {
xtx_aug[(j, j)] += lambda;
}
let qr: faer::linalg::solvers::Qr<f64> = xtx_aug.qr();
let q = qr.compute_Q();
let r = qr.R();
for i in 0..aug_size {
if r[(i, i)].abs() < 1e-14 {
return None;
}
}
let mut inv = Mat::zeros(aug_size, aug_size);
let qt = q.transpose();
for col in 0..aug_size {
for i in (0..aug_size).rev() {
let mut sum = qt[(i, col)];
for j in (i + 1)..aug_size {
sum -= r[(i, j)] * inv[(j, col)];
}
inv[(i, col)] = sum / r[(i, i)];
}
}
Some(inv)
}
fn compute_xtx_reg_inverse(&self, x: &Mat<f64>) -> Option<Mat<f64>> {
let n_samples = x.nrows();
let n_features = x.ncols();
let lambda = self.effective_lambda(n_samples);
let xtx = x.transpose() * x;
let mut xtx_reg = xtx.clone();
for i in 0..n_features {
xtx_reg[(i, i)] += lambda;
}
let qr: faer::linalg::solvers::Qr<f64> = xtx_reg.qr();
let q = qr.compute_Q();
let r = qr.R();
for i in 0..n_features {
if r[(i, i)].abs() < 1e-14 {
return None;
}
}
let mut inv = Mat::zeros(n_features, n_features);
let qt = q.transpose();
for col in 0..n_features {
for i in (0..n_features).rev() {
let mut sum = qt[(i, col)];
for j in (i + 1)..n_features {
sum -= r[(i, j)] * inv[(j, col)];
}
inv[(i, col)] = sum / r[(i, i)];
}
}
Some(inv)
}
fn effective_lambda(&self, n_samples: usize) -> f64 {
match self.options.lambda_scaling {
LambdaScaling::Raw => self.options.lambda,
LambdaScaling::Glmnet => self.options.lambda * n_samples as f64,
}
}
fn solve_ridge_qr(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Col<f64>, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let lambda = self.effective_lambda(n_samples);
let xtx = x.transpose() * x;
let mut xtx_reg = xtx.clone();
for i in 0..n_features {
xtx_reg[(i, i)] += lambda;
}
let xty = x.transpose() * y;
let qr = xtx_reg.qr();
let q = qr.compute_Q();
let r = qr.R();
for i in 0..n_features {
if r[(i, i)].abs() < 1e-14 {
return Err(RegressionError::SingularMatrix);
}
}
let qty = q.transpose() * &xty;
let mut coefficients = Col::zeros(n_features);
for i in (0..n_features).rev() {
let mut sum = qty[i];
for j in (i + 1)..n_features {
sum -= r[(i, j)] * coefficients[j];
}
coefficients[i] = sum / r[(i, i)];
}
Ok(coefficients)
}
fn solve_ridge_svd(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Col<f64>, RegressionError> {
let n_samples = x.nrows();
let lambda = self.effective_lambda(n_samples);
let svd = x.svd().map_err(|_| RegressionError::SingularMatrix)?;
let u = svd.U();
let s = svd.S();
let s_col = s.column_vector();
let v = svd.V();
let n_sv = s_col.nrows();
let uty = u.transpose() * y;
let mut d = Col::zeros(n_sv);
for i in 0..n_sv {
let si = s_col[i];
d[i] = si / (si * si + lambda) * uty[i];
}
let coefficients = v * &d;
let mut coeff = Col::zeros(coefficients.nrows());
for i in 0..coefficients.nrows() {
coeff[i] = coefficients[i];
}
Ok(coeff)
}
fn solve_ridge_cholesky(
&self,
x: &Mat<f64>,
y: &Col<f64>,
) -> Result<Col<f64>, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let lambda = self.effective_lambda(n_samples);
let xtx = x.transpose() * x;
let mut xtx_reg = xtx.clone();
for i in 0..n_features {
xtx_reg[(i, i)] += lambda;
}
let xty = x.transpose() * y;
let llt = xtx_reg
.llt(faer::Side::Lower)
.map_err(|_| RegressionError::SingularMatrix)?;
let coefficients = llt.solve(&xty);
let mut coeff = Col::zeros(coefficients.nrows());
for i in 0..coefficients.nrows() {
coeff[i] = coefficients[i];
}
Ok(coeff)
}
#[allow(clippy::too_many_arguments)]
fn compute_statistics(
&self,
x: &Mat<f64>,
y: &Col<f64>,
coefficients: &Col<f64>,
intercept: Option<f64>,
residuals: &Col<f64>,
fitted_values: &Col<f64>,
aliased: &[bool],
rank: usize,
n_params: usize,
) -> Result<RegressionResult, RegressionError> {
let n = y.nrows();
let n_features = x.ncols();
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let rss: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
let r_squared = if tss > 0.0 {
(1.0 - rss / tss).clamp(0.0, 1.0)
} else if rss < 1e-10 {
1.0
} else {
0.0
};
let df_total = (n - 1) as f64;
let df_resid = (n - n_params) as f64;
let adj_r_squared = if df_resid > 0.0 && df_total > 0.0 {
1.0 - (1.0 - r_squared) * df_total / df_resid
} else {
f64::NAN
};
let mse = if df_resid > 0.0 {
rss / df_resid
} else {
f64::NAN
};
let rmse = mse.sqrt();
let ess = tss - rss;
let df_model = (n_params - if intercept.is_some() { 1 } else { 0 }) as f64;
let f_statistic = if df_model > 0.0 && df_resid > 0.0 && mse > 0.0 {
(ess / df_model) / mse
} else {
f64::NAN
};
let f_pvalue = if f_statistic.is_finite() && df_model > 0.0 && df_resid > 0.0 {
let f_dist = FisherSnedecor::new(df_model, df_resid).ok();
f_dist.map_or(f64::NAN, |d| 1.0 - d.cdf(f_statistic))
} else {
f64::NAN
};
let log_likelihood = if mse > 0.0 {
-0.5 * n as f64 * (1.0 + (2.0 * std::f64::consts::PI).ln() + mse.ln())
} else {
f64::NAN
};
let k = n_params as f64;
let aic = if log_likelihood.is_finite() {
2.0 * k - 2.0 * log_likelihood
} else {
f64::NAN
};
let aicc = if log_likelihood.is_finite() && (n as f64 - k - 1.0) > 0.0 {
aic + 2.0 * k * (k + 1.0) / (n as f64 - k - 1.0)
} else {
f64::NAN
};
let bic = if log_likelihood.is_finite() {
k * (n as f64).ln() - 2.0 * log_likelihood
} else {
f64::NAN
};
let mut result = RegressionResult::empty(n_features, n);
result.coefficients = coefficients.clone();
result.intercept = intercept;
result.residuals = residuals.clone();
result.fitted_values = fitted_values.clone();
result.rank = rank;
result.n_parameters = n_params;
result.n_observations = n;
result.aliased = aliased.to_vec();
result.rank_tolerance = self.options.rank_tolerance;
result.r_squared = r_squared;
result.adj_r_squared = adj_r_squared;
result.mse = mse;
result.rmse = rmse;
result.f_statistic = f_statistic;
result.f_pvalue = f_pvalue;
result.aic = aic;
result.aicc = aicc;
result.bic = bic;
result.log_likelihood = log_likelihood;
result.confidence_level = self.options.confidence_level;
if self.options.compute_inference {
self.compute_inference(x, &mut result)?;
}
Ok(result)
}
fn compute_inference(
&self,
x: &Mat<f64>,
result: &mut RegressionResult,
) -> Result<(), RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let df = result.residual_df() as f64;
let lambda = self.effective_lambda(n_samples);
if df <= 0.0 || !result.mse.is_finite() {
return Ok(());
}
let xtx = x.transpose() * x;
let mut xtx_reg = xtx.clone();
for i in 0..n_features {
xtx_reg[(i, i)] += lambda;
}
let qr = xtx_reg.qr();
let q = qr.compute_Q();
let r = qr.R();
let mut xtx_inv = Mat::zeros(n_features, n_features);
let qt = q.transpose();
for col in 0..n_features {
for i in (0..n_features).rev() {
let mut sum = qt[(i, col)];
for j in (i + 1)..n_features {
sum -= r[(i, j)] * xtx_inv[(j, col)];
}
xtx_inv[(i, col)] = sum / r[(i, i)];
}
}
let mut std_errors = Col::zeros(n_features);
for j in 0..n_features {
let var = result.mse * xtx_inv[(j, j)];
std_errors[j] = if var >= 0.0 { var.sqrt() } else { f64::NAN };
}
let t_stats = CoefficientInference::t_statistics(&result.coefficients, &std_errors);
let p_vals = CoefficientInference::p_values(&t_stats, df);
let (ci_lower, ci_upper) = CoefficientInference::confidence_intervals(
&result.coefficients,
&std_errors,
df,
self.options.confidence_level,
);
result.std_errors = Some(std_errors);
result.t_statistics = Some(t_stats);
result.p_values = Some(p_vals);
result.conf_interval_lower = Some(ci_lower);
result.conf_interval_upper = Some(ci_upper);
if let Some(intercept) = result.intercept {
let se_int = (result.mse / result.n_observations as f64).sqrt();
let t_int = intercept / se_int;
let t_dist = StudentsT::new(0.0, 1.0, df).ok();
let p_int = t_dist.map_or(f64::NAN, |d| 2.0 * (1.0 - d.cdf(t_int.abs())));
let t_crit = t_dist.map_or(f64::NAN, |d| {
d.inverse_cdf(1.0 - (1.0 - self.options.confidence_level) / 2.0)
});
let ci_int = (intercept - t_crit * se_int, intercept + t_crit * se_int);
result.intercept_std_error = Some(se_int);
result.intercept_t_statistic = Some(t_int);
result.intercept_p_value = Some(p_int);
result.intercept_conf_interval = Some(ci_int);
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct FittedRidge {
options: RegressionOptions,
result: RegressionResult,
xtx_reg_inverse: Option<Mat<f64>>,
}
impl FittedRidge {
pub fn options(&self) -> &RegressionOptions {
&self.options
}
pub fn lambda(&self) -> f64 {
self.options.lambda
}
}
impl FittedRegressor for FittedRidge {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n_samples = x.nrows();
let n_features = x.ncols();
let mut predictions = Col::zeros(n_samples);
let intercept = self.result.intercept.unwrap_or(0.0);
for i in 0..n_samples {
let mut pred = intercept;
for j in 0..n_features {
pred += x[(i, j)] * self.result.coefficients[j];
}
predictions[i] = pred;
}
predictions
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(interval_type) => match &self.xtx_reg_inverse {
Some(xtx_inv) => {
let df = self.result.residual_df() as f64;
let has_intercept = self.result.intercept.is_some();
compute_prediction_intervals(
x,
xtx_inv,
&predictions,
self.result.mse,
df,
level,
interval_type,
has_intercept,
)
}
None => {
let n = x.nrows();
let mut lower = Col::zeros(n);
let mut upper = Col::zeros(n);
let mut se = Col::zeros(n);
for i in 0..n {
lower[i] = f64::NAN;
upper[i] = f64::NAN;
se[i] = f64::NAN;
}
PredictionResult::with_intervals(predictions, lower, upper, se)
}
},
}
}
}
#[derive(Debug, Clone, Default)]
pub struct RidgeRegressorBuilder {
builder: RegressionOptionsBuilder,
}
impl RidgeRegressorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.builder = self.builder.with_intercept(include);
self
}
pub fn lambda(mut self, lambda: f64) -> Self {
self.builder = self.builder.lambda(lambda);
self
}
pub fn lambda_scaling(mut self, scaling: LambdaScaling) -> Self {
self.builder = self.builder.lambda_scaling(scaling);
self
}
pub fn compute_inference(mut self, compute: bool) -> Self {
self.builder = self.builder.compute_inference(compute);
self
}
pub fn confidence_level(mut self, level: f64) -> Self {
self.builder = self.builder.confidence_level(level);
self
}
pub fn solve_method(mut self, solver: SolverType) -> Self {
self.builder = self.builder.solver(solver);
self
}
pub fn build(self) -> RidgeRegressor {
RidgeRegressor::new(self.builder.build_unchecked())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ridge_basic() {
let x = Mat::from_fn(10, 1, |i, _| i as f64);
let y = Col::from_fn(10, |i| 2.0 + 3.0 * i as f64);
let model = RidgeRegressor::builder()
.with_intercept(true)
.lambda(0.01)
.build();
let fitted = model.fit(&x, &y).expect("model should fit");
assert!(fitted.r_squared() > 0.99);
}
}