use crate::core::{
IntervalType, LambdaScaling, PredictionResult, RegressionOptions, RegressionOptionsBuilder,
RegressionResult,
};
use crate::inference::{
compute_prediction_intervals, compute_xtx_inverse_augmented_reduced,
compute_xtx_inverse_reduced,
};
use crate::solvers::ridge::RidgeRegressor;
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use crate::utils::{center_columns, center_vector, detect_constant_columns};
use argmin::core::{CostFunction, Executor, Gradient, State};
use argmin::solver::linesearch::MoreThuenteLineSearch;
use argmin::solver::quasinewton::LBFGS;
use faer::{Col, Mat};
use statrs::distribution::{ContinuousCDF, FisherSnedecor};
#[derive(Debug, Clone)]
pub struct ElasticNetRegressor {
options: RegressionOptions,
}
impl ElasticNetRegressor {
pub fn new(options: RegressionOptions) -> Self {
Self { options }
}
pub fn builder() -> ElasticNetRegressorBuilder {
ElasticNetRegressorBuilder::default()
}
fn soft_threshold(z: f64, gamma: f64) -> f64 {
if z > gamma {
z - gamma
} else if z < -gamma {
z + gamma
} else {
0.0
}
}
}
#[derive(Clone)]
struct ElasticNetCost<'a> {
x: &'a Mat<f64>,
y: &'a Col<f64>,
lambda: f64,
alpha: f64,
}
impl ElasticNetCost<'_> {
fn compute_residuals(&self, beta: &[f64]) -> Vec<f64> {
let n = self.x.nrows();
let p = self.x.ncols();
(0..n)
.map(|i| {
let pred: f64 = (0..p).map(|j| self.x[(i, j)] * beta[j]).sum();
self.y[i] - pred
})
.collect()
}
}
impl CostFunction for ElasticNetCost<'_> {
type Param = Vec<f64>;
type Output = f64;
fn cost(&self, beta: &Self::Param) -> Result<Self::Output, argmin::core::Error> {
let residuals = self.compute_residuals(beta);
let rss: f64 = residuals.iter().map(|r| r * r).sum();
let l2_penalty: f64 = beta.iter().map(|b| b * b).sum();
Ok(rss + self.lambda * (1.0 - self.alpha) * l2_penalty)
}
}
impl Gradient for ElasticNetCost<'_> {
type Param = Vec<f64>;
type Gradient = Vec<f64>;
fn gradient(&self, beta: &Self::Param) -> Result<Self::Gradient, argmin::core::Error> {
let residuals = self.compute_residuals(beta);
let p = self.x.ncols();
let n = self.x.nrows();
let grad: Vec<f64> = (0..p)
.map(|j| {
let xtresid: f64 = (0..n).map(|i| self.x[(i, j)] * residuals[i]).sum();
-2.0 * xtresid + 2.0 * self.lambda * (1.0 - self.alpha) * beta[j]
})
.collect();
Ok(grad)
}
}
impl Regressor for ElasticNetRegressor {
type Fitted = FittedElasticNet;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
if self.options.alpha == 0.0 {
let ridge = RidgeRegressor::new(self.options.clone());
let ridge_fitted = ridge.fit(x, y)?;
let aliased = ridge_fitted.result().aliased.clone();
let xtx_inverse = compute_xtx_inverse_augmented_reduced(x, &aliased).ok();
return Ok(FittedElasticNet {
options: self.options.clone(),
result: ridge_fitted.result().clone(),
xtx_inverse,
aliased,
});
}
let n_samples = x.nrows();
let n_features = x.ncols();
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
if n_samples < 2 {
return Err(RegressionError::InsufficientObservations {
needed: 2,
got: n_samples,
});
}
if self.options.with_intercept {
let constant_cols = detect_constant_columns(x, self.options.rank_tolerance);
let (x_centered, x_means) = center_columns(x);
let (y_centered, y_mean) = center_vector(y);
let coefficients = self.fit_lbfgs(&x_centered, &y_centered)?;
let mut intercept = y_mean;
for j in 0..n_features {
if !constant_cols[j] {
intercept -= x_means[j] * coefficients[j];
}
}
let mut aliased = constant_cols.clone();
for j in 0..n_features {
if coefficients[j].abs() < 1e-10 {
aliased[j] = true;
}
}
let mut fitted_values = Col::zeros(n_samples);
let mut residuals = Col::zeros(n_samples);
for i in 0..n_samples {
let mut pred = intercept;
for j in 0..n_features {
if !constant_cols[j] {
pred += x[(i, j)] * coefficients[j];
}
}
fitted_values[i] = pred;
residuals[i] = y[i] - pred;
}
let n_nonzero = coefficients
.iter()
.enumerate()
.filter(|(j, &c)| !constant_cols[*j] && c.abs() > 1e-10)
.count();
let n_params = n_nonzero + 1;
let result = self.compute_statistics(
x,
y,
&coefficients,
Some(intercept),
&residuals,
&fitted_values,
&constant_cols,
n_nonzero,
n_params,
)?;
let xtx_inverse = compute_xtx_inverse_augmented_reduced(x, &constant_cols).ok();
Ok(FittedElasticNet {
options: self.options.clone(),
result,
xtx_inverse,
aliased: constant_cols,
})
} else {
let mut aliased = vec![false; n_features];
for j in 0..n_features {
let mut col_sq = 0.0;
for i in 0..n_samples {
col_sq += x[(i, j)] * x[(i, j)];
}
if col_sq < self.options.rank_tolerance {
aliased[j] = true;
}
}
let coefficients = self.fit_lbfgs(x, y)?;
for j in 0..n_features {
if coefficients[j].abs() < 1e-10 {
aliased[j] = true;
}
}
let mut fitted_values = Col::zeros(n_samples);
let mut residuals = Col::zeros(n_samples);
for i in 0..n_samples {
let mut pred = 0.0;
for j in 0..n_features {
pred += x[(i, j)] * coefficients[j];
}
fitted_values[i] = pred;
residuals[i] = y[i] - pred;
}
let n_nonzero = coefficients
.iter()
.enumerate()
.filter(|(j, &c)| !aliased[*j] && c.abs() > 1e-10)
.count();
let n_params = n_nonzero;
let result = self.compute_statistics(
x,
y,
&coefficients,
None,
&residuals,
&fitted_values,
&aliased,
n_nonzero,
n_params,
)?;
let xtx_inverse = compute_xtx_inverse_reduced(x, &aliased).ok();
Ok(FittedElasticNet {
options: self.options.clone(),
result,
xtx_inverse,
aliased,
})
}
}
}
impl ElasticNetRegressor {
fn fit_lbfgs(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Col<f64>, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let alpha = self.options.alpha;
let lambda = match self.options.lambda_scaling {
LambdaScaling::Raw => self.options.lambda,
LambdaScaling::Glmnet => self.options.lambda * n_samples as f64,
};
let cost = ElasticNetCost {
x,
y,
lambda,
alpha,
};
let linesearch = MoreThuenteLineSearch::new();
let solver = LBFGS::new(linesearch, 7);
let init = vec![0.0; n_features];
let result = Executor::new(cost, solver)
.configure(|state| {
state
.param(init)
.max_iters(self.options.max_iterations as u64)
})
.run()
.map_err(|e| RegressionError::NumericalError(format!("L-BFGS failed: {}", e)))?;
let beta_smooth = result
.state()
.get_best_param()
.ok_or_else(|| {
RegressionError::NumericalError("L-BFGS did not return parameters".to_string())
})?
.clone();
let l1_threshold = lambda * alpha;
let coefficients = Col::from_fn(n_features, |j| {
Self::soft_threshold(beta_smooth[j], l1_threshold)
});
Ok(coefficients)
}
#[allow(clippy::too_many_arguments)]
fn compute_statistics(
&self,
x: &Mat<f64>,
y: &Col<f64>,
coefficients: &Col<f64>,
intercept: Option<f64>,
residuals: &Col<f64>,
fitted_values: &Col<f64>,
aliased: &[bool],
rank: usize,
n_params: usize,
) -> Result<RegressionResult, RegressionError> {
let n = y.nrows();
let n_features = x.ncols();
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let rss: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
let r_squared = if tss > 0.0 {
(1.0 - rss / tss).clamp(0.0, 1.0)
} else if rss < 1e-10 {
1.0
} else {
0.0
};
let df_total = (n - 1) as f64;
let df_resid = (n.saturating_sub(n_params.max(1))) as f64;
let adj_r_squared = if df_resid > 0.0 && df_total > 0.0 {
1.0 - (1.0 - r_squared) * df_total / df_resid
} else {
f64::NAN
};
let mse = if df_resid > 0.0 {
rss / df_resid
} else {
rss / n as f64
};
let rmse = mse.sqrt();
let ess = tss - rss;
let df_model = (n_params.saturating_sub(if intercept.is_some() { 1 } else { 0 })) as f64;
let f_statistic = if df_model > 0.0 && df_resid > 0.0 && mse > 0.0 {
(ess / df_model.max(1.0)) / mse
} else {
f64::NAN
};
let f_pvalue = if f_statistic.is_finite() && df_model > 0.0 && df_resid > 0.0 {
let f_dist = FisherSnedecor::new(df_model.max(1.0), df_resid).ok();
f_dist.map_or(f64::NAN, |d| 1.0 - d.cdf(f_statistic))
} else {
f64::NAN
};
let log_likelihood = if mse > 0.0 {
-0.5 * n as f64 * (1.0 + (2.0 * std::f64::consts::PI).ln() + mse.ln())
} else {
f64::NAN
};
let k = n_params as f64;
let aic = if log_likelihood.is_finite() {
2.0 * k - 2.0 * log_likelihood
} else {
f64::NAN
};
let aicc = if log_likelihood.is_finite() && (n as f64 - k - 1.0) > 0.0 {
aic + 2.0 * k * (k + 1.0) / (n as f64 - k - 1.0)
} else {
f64::NAN
};
let bic = if log_likelihood.is_finite() {
k * (n as f64).ln() - 2.0 * log_likelihood
} else {
f64::NAN
};
let mut result = RegressionResult::empty(n_features, n);
result.coefficients = coefficients.clone();
result.intercept = intercept;
result.residuals = residuals.clone();
result.fitted_values = fitted_values.clone();
result.rank = rank;
result.n_parameters = n_params;
result.n_observations = n;
result.aliased = aliased.to_vec();
result.rank_tolerance = self.options.rank_tolerance;
result.r_squared = r_squared;
result.adj_r_squared = adj_r_squared;
result.mse = mse;
result.rmse = rmse;
result.f_statistic = f_statistic;
result.f_pvalue = f_pvalue;
result.aic = aic;
result.aicc = aicc;
result.bic = bic;
result.log_likelihood = log_likelihood;
result.confidence_level = self.options.confidence_level;
Ok(result)
}
}
#[derive(Debug, Clone)]
pub struct FittedElasticNet {
options: RegressionOptions,
result: RegressionResult,
xtx_inverse: Option<Mat<f64>>,
aliased: Vec<bool>,
}
impl FittedElasticNet {
pub fn options(&self) -> &RegressionOptions {
&self.options
}
pub fn lambda(&self) -> f64 {
self.options.lambda
}
pub fn alpha(&self) -> f64 {
self.options.alpha
}
pub fn n_nonzero(&self) -> usize {
self.result
.coefficients
.iter()
.filter(|&&c| c.abs() > 1e-10)
.count()
}
}
impl FittedRegressor for FittedElasticNet {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n_samples = x.nrows();
let n_features = x.ncols();
let mut predictions = Col::zeros(n_samples);
let intercept = self.result.intercept.unwrap_or(0.0);
for i in 0..n_samples {
let mut pred = intercept;
for j in 0..n_features {
pred += x[(i, j)] * self.result.coefficients[j];
}
predictions[i] = pred;
}
predictions
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(interval_type) => {
match &self.xtx_inverse {
Some(xtx_inv) => {
let df = self.result.residual_df() as f64;
let has_intercept = self.result.intercept.is_some();
let x_reduced = self.reduce_to_non_aliased(x);
compute_prediction_intervals(
&x_reduced,
xtx_inv,
&predictions,
self.result.mse,
df,
level,
interval_type,
has_intercept,
)
}
None => {
let n = x.nrows();
let mut lower = Col::zeros(n);
let mut upper = Col::zeros(n);
let mut se = Col::zeros(n);
for i in 0..n {
lower[i] = f64::NAN;
upper[i] = f64::NAN;
se[i] = f64::NAN;
}
PredictionResult::with_intervals(predictions, lower, upper, se)
}
}
}
}
}
}
impl FittedElasticNet {
fn reduce_to_non_aliased(&self, x: &Mat<f64>) -> Mat<f64> {
let n_samples = x.nrows();
let non_aliased_cols: Vec<usize> = self
.aliased
.iter()
.enumerate()
.filter(|(_, &is_aliased)| !is_aliased)
.map(|(j, _)| j)
.collect();
let n_reduced = non_aliased_cols.len();
let mut x_reduced = Mat::zeros(n_samples, n_reduced);
for i in 0..n_samples {
for (k, &j) in non_aliased_cols.iter().enumerate() {
x_reduced[(i, k)] = x[(i, j)];
}
}
x_reduced
}
}
#[derive(Debug, Clone, Default)]
pub struct ElasticNetRegressorBuilder {
builder: RegressionOptionsBuilder,
}
impl ElasticNetRegressorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.builder = self.builder.with_intercept(include);
self
}
pub fn lambda(mut self, lambda: f64) -> Self {
self.builder = self.builder.lambda(lambda);
self
}
pub fn alpha(mut self, alpha: f64) -> Self {
self.builder = self.builder.alpha(alpha);
self
}
pub fn lambda_scaling(mut self, scaling: LambdaScaling) -> Self {
self.builder = self.builder.lambda_scaling(scaling);
self
}
pub fn max_iterations(mut self, max_iter: usize) -> Self {
self.builder = self.builder.max_iterations(max_iter);
self
}
pub fn tolerance(mut self, tol: f64) -> Self {
self.builder = self.builder.tolerance(tol);
self
}
pub fn build(self) -> ElasticNetRegressor {
ElasticNetRegressor::new(self.builder.build_unchecked())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_elastic_net_basic() {
let x = Mat::from_fn(20, 2, |i, j| ((i + j) as f64) * 0.1);
let mut y = Col::zeros(20);
for i in 0..20 {
y[i] = 1.0 + 2.0 * x[(i, 0)] + 3.0 * x[(i, 1)];
}
let model = ElasticNetRegressor::builder()
.with_intercept(true)
.lambda(0.01)
.alpha(0.5)
.build();
let fitted = model.fit(&x, &y).expect("model should fit");
assert!(fitted.r_squared() > 0.9);
}
#[test]
fn test_lasso_sparsity() {
let x = Mat::from_fn(50, 5, |i, j| ((i + j) as f64) * 0.1);
let mut y = Col::zeros(50);
for i in 0..50 {
y[i] = 1.0 + 2.0 * x[(i, 0)] + 3.0 * x[(i, 1)];
}
let model = ElasticNetRegressor::builder()
.with_intercept(true)
.lambda(1.0)
.alpha(1.0) .build();
let fitted = model.fit(&x, &y).expect("model should fit");
let n_zero = fitted
.coefficients()
.iter()
.filter(|&&c| c.abs() < 1e-10)
.count();
assert!(
n_zero >= 1,
"Expected some zero coefficients, got {}",
n_zero
);
}
}