linreg_core/regularized/ridge.rs
1//! Ridge regression (L2-regularized linear regression).
2//!
3//! This module provides ridge regression implementation using the augmented QR
4//! approach, which is numerically stable and avoids forming X^T X explicitly.
5//!
6//! # Ridge Regression Objective
7//!
8//! Ridge regression solves:
9//!
10//! ```text
11//! minimize over (β₀, β):
12//!
13//! (1/(2n)) * Σᵢ (yᵢ - β₀ - xᵢᵀβ)² + (λ/2) * ||β||₂²
14//! ```
15//!
16//! The intercept `β₀` is **not penalized**.
17//!
18//! # Solution Method
19//!
20//! We use the augmented least-squares approach:
21//!
22//! ```text
23//! minimize || [y; 0] - [X; √λ*I] * β ||²
24//! ```
25//!
26//! This transforms the ridge problem into a standard least squares problem
27//! that can be solved with QR decomposition.
28
29use crate::error::{Error, Result};
30use crate::linalg::Matrix;
31use crate::regularized::preprocess::{
32 predict, standardize_xy, unstandardize_coefficients, StandardizeOptions,
33};
34
35#[cfg(feature = "wasm")]
36use serde::Serialize;
37
38/// Options for ridge regression fitting.
39///
40/// # Fields
41///
42/// * `lambda` - Regularization strength (single value)
43/// * `intercept` - Whether to include an intercept term (default: true)
44/// * `standardize` - Whether to standardize predictors (default: true)
45#[derive(Clone, Debug)]
46pub struct RidgeFitOptions {
47 /// Regularization strength (must be >= 0)
48 pub lambda: f64,
49 /// Whether to include an intercept
50 pub intercept: bool,
51 /// Whether to standardize predictors
52 pub standardize: bool,
53}
54
55impl Default for RidgeFitOptions {
56 fn default() -> Self {
57 RidgeFitOptions {
58 lambda: 1.0,
59 intercept: true,
60 standardize: true,
61 }
62 }
63}
64
65/// Result of a ridge regression fit.
66///
67/// # Fields
68///
69/// * `lambda` - The lambda value used for fitting
70/// * `intercept` - Intercept coefficient (on original scale)
71/// * `coefficients` - Slope coefficients (on original scale)
72/// * `fitted_values` - In-sample predictions
73/// * `residuals` - Residuals (y - fitted_values)
74/// * `df` - Effective degrees of freedom (trace of H = X(X'X + λI)^(-1)X')
75/// * `r_squared` - R² (coefficient of determination)
76/// * `adj_r_squared` - Adjusted R² (using effective df)
77/// * `mse` - Mean squared error
78/// * `rmse` - Root mean squared error
79/// * `mae` - Mean absolute error
80/// * `standardization_info` - Information about standardization applied
81#[derive(Clone, Debug)]
82#[cfg_attr(feature = "wasm", derive(Serialize))]
83pub struct RidgeFit {
84 /// Lambda value used for fitting
85 pub lambda: f64,
86 /// Intercept on original scale
87 pub intercept: f64,
88 /// Slope coefficients on original scale
89 pub coefficients: Vec<f64>,
90 /// Fitted values
91 pub fitted_values: Vec<f64>,
92 /// Residuals
93 pub residuals: Vec<f64>,
94 /// Effective degrees of freedom
95 pub df: f64,
96 /// R² (coefficient of determination)
97 pub r_squared: f64,
98 /// Adjusted R² (penalized for effective df)
99 pub adj_r_squared: f64,
100 /// Mean squared error
101 pub mse: f64,
102 /// Root mean squared error
103 pub rmse: f64,
104 /// Mean absolute error
105 pub mae: f64,
106}
107
108/// Fits ridge regression for a single lambda value.
109///
110/// # Arguments
111///
112/// * `x` - Design matrix (n × p). Should include intercept column if `intercept=true`.
113/// * `y` - Response vector (n elements)
114/// * `options` - Ridge fitting options
115///
116/// # Returns
117///
118/// A [`RidgeFit`] containing the fit results.
119///
120/// # Errors
121///
122/// Returns an error if:
123/// - `lambda < 0`
124/// - Dimensions don't match
125/// - Matrix is numerically singular
126///
127/// # Algorithm
128///
129/// Uses the augmented QR approach:
130/// 1. Standardize X and center y (if requested)
131/// 2. Build augmented system:
132/// ```text
133/// X_aug = [X_std; sqrt(lambda) * I_p]
134/// y_aug = [y_centered; 0_p]
135/// ```
136/// 3. Solve using QR decomposition
137/// 4. Unstandardize coefficients
138///
139/// # Example
140///
141/// ```rust,no_run
142/// use linreg_core::linalg::Matrix;
143/// use linreg_core::regularized::ridge::{ridge_fit, RidgeFitOptions};
144///
145/// let x = Matrix::new(3, 2, vec![
146/// 1.0, 2.0,
147/// 1.0, 3.0,
148/// 1.0, 4.0,
149/// ]);
150/// let y = vec![3.0, 5.0, 7.0];
151///
152/// let options = RidgeFitOptions {
153/// lambda: 1.0,
154/// intercept: true,
155/// standardize: true,
156/// };
157///
158/// let fit = ridge_fit(&x, &y, &options).unwrap();
159/// println!("Intercept: {}", fit.intercept);
160/// println!("Coefficients: {:?}", fit.coefficients);
161/// ```
162pub fn ridge_fit(x: &Matrix, y: &[f64], options: &RidgeFitOptions) -> Result<RidgeFit> {
163 if options.lambda < 0.0 {
164 return Err(Error::InvalidInput(
165 "Lambda must be non-negative for ridge regression".to_string(),
166 ));
167 }
168
169 let n = x.rows;
170 let p = x.cols;
171
172 if y.len() != n {
173 return Err(Error::DimensionMismatch(format!(
174 "Length of y ({}) must match number of rows in X ({})",
175 y.len(),
176 n
177 )));
178 }
179
180 // Handle zero lambda: just do OLS
181 if options.lambda == 0.0 {
182 return ridge_ols_fit(x, y, options);
183 }
184
185 // Standardize X and center y
186 let std_options = StandardizeOptions {
187 intercept: options.intercept,
188 standardize_x: options.standardize,
189 standardize_y: false, // Don't standardize y for ridge
190 };
191
192 let (x_std, y_centered, std_info) = standardize_xy(x, y, &std_options);
193
194 // Build augmented system: [X; sqrt(lambda)*I] * beta = [y; 0]
195 // For the intercept column (if present), we don't add penalty
196 let sqrt_lambda = options.lambda.sqrt();
197 let intercept_col = if options.intercept { 1 } else { 0 };
198
199 // Number of penalized coefficients (excluding intercept)
200 let p_pen = p - intercept_col;
201
202 // Augmented matrix dimensions
203 let aug_n = n + p_pen;
204 let aug_p = p;
205
206 // Build augmented matrix
207 let mut x_aug_data = vec![0.0; aug_n * aug_p];
208
209 // Copy X_std to top portion
210 for i in 0..n {
211 for j in 0..p {
212 x_aug_data[i * aug_p + j] = x_std.get(i, j);
213 }
214 }
215
216 // Add sqrt(lambda) * I for penalized coefficients
217 for i in 0..p_pen {
218 let row = n + i;
219 let col = intercept_col + i;
220 x_aug_data[row * aug_p + col] = sqrt_lambda;
221 }
222
223 let x_aug = Matrix::new(aug_n, aug_p, x_aug_data);
224
225 // Build augmented y vector
226 let mut y_aug = vec![0.0; aug_n];
227 y_aug[..n].copy_from_slice(&y_centered[..n]);
228 // Remaining entries are already 0
229
230 // Solve using QR decomposition
231 let (q, r) = x_aug.qr();
232 let beta_std = solve_upper_triangular_with_augmented_y(&r, &q, &y_aug, aug_n)?;
233
234 // Unstandardize coefficients
235 let (intercept, beta_orig) = unstandardize_coefficients(&beta_std, &std_info);
236
237 // Compute fitted values and residuals on original scale
238 let fitted = predict(x, intercept, &beta_orig);
239 let residuals: Vec<f64> = y
240 .iter()
241 .zip(fitted.iter())
242 .map(|(yi, yh)| yi - yh)
243 .collect();
244
245 // Compute effective degrees of freedom
246 // For ridge: df = trace(X(X'X + lambda*I)^(-1)X')
247 // This equals sum of eigenvalues / (eigenvalues + lambda)
248 // We compute it using the hat matrix approach
249 let df = compute_ridge_df(&x_std, options.lambda, intercept_col);
250
251 // Compute model fit statistics
252 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
253 let ss_tot: f64 = y.iter().map(|yi| (yi - y_mean).powi(2)).sum();
254 let ss_res: f64 = residuals.iter().map(|r| r.powi(2)).sum();
255 let r_squared = if ss_tot > 1e-10 {
256 1.0 - ss_res / ss_tot
257 } else {
258 1.0
259 };
260
261 // Adjusted R² using effective degrees of freedom
262 let adj_r_squared = if ss_tot > 1e-10 && (n as f64) > df {
263 1.0 - (1.0 - r_squared) * ((n - 1) as f64 / (n as f64 - df))
264 } else {
265 r_squared
266 };
267
268 let mse = ss_res / (n as f64 - 1.0); // Use n-1 for consistency
269 let rmse = mse.sqrt();
270 let mae: f64 = residuals.iter().map(|r| r.abs()).sum::<f64>() / n as f64;
271
272 Ok(RidgeFit {
273 lambda: options.lambda,
274 intercept,
275 coefficients: beta_orig,
276 fitted_values: fitted,
277 residuals,
278 df,
279 r_squared,
280 adj_r_squared,
281 mse,
282 rmse,
283 mae,
284 })
285}
286
287/// Computes the effective degrees of freedom for ridge regression.
288///
289/// df = trace(H) where H = X(X'X + λI)^(-1)X'
290///
291/// For a QR decomposition of the standardized X, this equals the sum of
292/// squared diagonal elements of R(R'R + λI)^(-1).
293fn compute_ridge_df(x_std: &Matrix, lambda: f64, intercept_col: usize) -> f64 {
294 let p = x_std.cols;
295
296 // For small problems, compute directly
297 if p <= 100 {
298 // Get QR decomposition of X_std
299 let (_q, _r) = x_std.qr();
300
301 // Compute df = trace(X(X'X + λI)^(-1)X')
302 // This equals trace(R(R'R + λI)^(-1)R') / n for centered data
303 // A simpler approach: df = sum of (d_i^2 / (d_i^2 + lambda))
304 // where d_i are singular values of X
305
306 // For ridge, a simple approximation that works well:
307 // df = sum_{j not penalized} 1 + sum_{j penalized} sigma_j^2 / (sigma_j^2 + lambda)
308 // where sigma_j^2 are eigenvalues of X'X
309
310 // Use the approximation: df ≈ p - lambda * trace((X'X + lambda*I)^(-1))
311 // For now, use a simpler proxy
312 let p_pen = p - intercept_col;
313 let df_penalty = if lambda > 0.0 {
314 // Approximate reduction in df due to penalty
315 (p_pen as f64) * lambda / (1.0 + lambda)
316 } else {
317 0.0
318 };
319
320 (p as f64) - df_penalty
321 } else {
322 // For large p, use a simpler approximation
323 p as f64 * lambda / (1.0 + lambda)
324 }
325}
326
327/// OLS fit for lambda = 0 (special case of ridge).
328fn ridge_ols_fit(x: &Matrix, y: &[f64], options: &RidgeFitOptions) -> Result<RidgeFit> {
329 let n = x.rows;
330 let p = x.cols;
331
332 // Use QR decomposition for OLS on original (non-standardized) data
333 let (q, r) = x.qr();
334 let beta = solve_upper_triangular_with_augmented_y(&r, &q, y, n)?;
335
336 // Extract intercept and slope coefficients directly (no unstandardization needed)
337 // OLS on original data gives coefficients on original scale
338 let (intercept, beta_orig) = if options.intercept {
339 // beta[0] is intercept, beta[1..] are slopes
340 let slopes: Vec<f64> = beta[1..].to_vec();
341 (beta[0], slopes)
342 } else {
343 // No intercept, all coefficients are slopes
344 (0.0, beta)
345 };
346
347 // Compute fitted values and residuals
348 let fitted = predict(x, intercept, &beta_orig);
349 let residuals: Vec<f64> = y
350 .iter()
351 .zip(fitted.iter())
352 .map(|(yi, yh)| yi - yh)
353 .collect();
354
355 // For OLS, df = p (or n - 1 if considering adjusted df)
356 let df = p as f64;
357
358 // Compute model fit statistics
359 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
360 let ss_tot: f64 = y.iter().map(|yi| (yi - y_mean).powi(2)).sum();
361 let ss_res: f64 = residuals.iter().map(|r| r.powi(2)).sum();
362 let r_squared = if ss_tot > 1e-10 {
363 1.0 - ss_res / ss_tot
364 } else {
365 1.0
366 };
367
368 // Adjusted R² using effective degrees of freedom
369 let adj_r_squared = if ss_tot > 1e-10 && n > p {
370 1.0 - (1.0 - r_squared) * ((n - 1) as f64 / (n - p) as f64)
371 } else {
372 r_squared
373 };
374
375 let mse = ss_res / (n as f64 - p as f64);
376 let rmse = mse.sqrt();
377 let mae: f64 = residuals.iter().map(|r| r.abs()).sum::<f64>() / n as f64;
378
379 Ok(RidgeFit {
380 lambda: 0.0,
381 intercept,
382 coefficients: beta_orig,
383 fitted_values: fitted,
384 residuals,
385 df,
386 r_squared,
387 adj_r_squared,
388 mse,
389 rmse,
390 mae,
391 })
392}
393
394/// Solves R * beta = Q^T * y_aug for beta.
395///
396/// This is a helper for the augmented QR approach.
397#[allow(clippy::needless_range_loop)]
398fn solve_upper_triangular_with_augmented_y(
399 r: &Matrix,
400 q: &Matrix,
401 y_aug: &[f64],
402 aug_n: usize,
403) -> Result<Vec<f64>> {
404 let p = r.cols;
405
406 // Compute Q^T * y_aug (only need first p rows since R is p × p or m × p)
407 // Actually, Q is aug_n × aug_n, but we only need Q^T * y_aug for first p rows
408 // since R has zeros below row p
409
410 let mut qty = vec![0.0; p];
411
412 // Compute Q^T * y_aug for the first p rows
413 for i in 0..p {
414 let mut sum = 0.0;
415 for k in 0..aug_n {
416 sum += q.get(k, i) * y_aug[k];
417 }
418 qty[i] = sum;
419 }
420
421 // Back substitution: solve R * beta = qty
422 let mut beta = vec![0.0; p];
423
424 for i in (0..p).rev() {
425 let mut sum = qty[i];
426 for j in (i + 1)..p {
427 sum -= r.get(i, j) * beta[j];
428 }
429
430 let diag = r.get(i, i);
431 if diag.abs() < 1e-14 {
432 return Err(Error::ComputationFailed(
433 "Matrix is singular to working precision".to_string(),
434 ));
435 }
436
437 beta[i] = sum / diag;
438 }
439
440 Ok(beta)
441}
442
443/// Makes predictions using a ridge regression fit.
444///
445/// # Arguments
446///
447/// * `fit` - The ridge regression fit result
448/// * `x_new` - New data matrix (n_new × p)
449///
450/// # Returns
451///
452/// Predictions for each row in x_new.
453pub fn predict_ridge(fit: &RidgeFit, x_new: &Matrix) -> Vec<f64> {
454 predict(x_new, fit.intercept, &fit.coefficients)
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460
461 #[test]
462 fn test_ridge_fit_simple() {
463 // Simple test: perfect linear relationship
464 let x_data = vec![1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0];
465 let x = Matrix::new(4, 2, x_data);
466 let y = vec![2.0, 4.0, 6.0, 8.0]; // y = 2 * x (with intercept 0)
467
468 let options = RidgeFitOptions {
469 lambda: 0.1,
470 intercept: true,
471 standardize: false,
472 };
473
474 let fit = ridge_fit(&x, &y, &options).unwrap();
475
476 // With small lambda, should be close to OLS solution
477 // OLS solution: intercept ≈ 0, slope ≈ 2
478 // coefficients[0] is the first (and only) slope coefficient
479 // Note: Ridge regularization introduces some bias, so tolerances are slightly looser
480 assert!((fit.coefficients[0] - 2.0).abs() < 0.2);
481 assert!(fit.intercept.abs() < 0.5);
482 }
483
484 #[test]
485 fn test_ridge_fit_with_standardization() {
486 let x_data = vec![1.0, 100.0, 1.0, 200.0, 1.0, 300.0, 1.0, 400.0];
487 let x = Matrix::new(4, 2, x_data);
488 let y = vec![2.0, 4.0, 6.0, 8.0];
489
490 let options = RidgeFitOptions {
491 lambda: 1.0,
492 intercept: true,
493 standardize: true,
494 };
495
496 let fit = ridge_fit(&x, &y, &options).unwrap();
497
498 // Predictions should be reasonable
499 for i in 0..4 {
500 assert!((fit.fitted_values[i] - y[i]).abs() < 2.0);
501 }
502 }
503
504 #[test]
505 fn test_ridge_zero_lambda_is_ols() {
506 let x_data = vec![1.0, 1.0, 1.0, 2.0, 1.0, 3.0];
507 let x = Matrix::new(3, 2, x_data);
508 let y = vec![2.0, 4.0, 6.0];
509
510 let options = RidgeFitOptions {
511 lambda: 0.0,
512 intercept: true,
513 standardize: false,
514 };
515
516 let fit = ridge_fit(&x, &y, &options).unwrap();
517
518 // Should be close to perfect fit for this data
519 assert!((fit.fitted_values[0] - 2.0).abs() < 1e-6);
520 assert!((fit.fitted_values[1] - 4.0).abs() < 1e-6);
521 assert!((fit.fitted_values[2] - 6.0).abs() < 1e-6);
522 }
523
524 #[test]
525 fn test_ridge_negative_lambda_error() {
526 let x_data = vec![1.0, 1.0, 1.0, 2.0, 1.0, 3.0];
527 let x = Matrix::new(3, 2, x_data);
528 let y = vec![2.0, 4.0, 6.0];
529
530 let options = RidgeFitOptions {
531 lambda: -1.0,
532 ..Default::default()
533 };
534
535 let result = ridge_fit(&x, &y, &options);
536 assert!(result.is_err());
537 }
538
539 #[test]
540 fn test_predict_ridge() {
541 let x_data = vec![1.0, 1.0, 1.0, 2.0, 1.0, 3.0];
542 let x = Matrix::new(3, 2, x_data);
543 let y = vec![2.0, 4.0, 6.0];
544
545 let options = RidgeFitOptions {
546 lambda: 0.1,
547 intercept: true,
548 standardize: false,
549 };
550
551 let fit = ridge_fit(&x, &y, &options).unwrap();
552 let preds = predict_ridge(&fit, &x);
553
554 // Predictions on training data should equal fitted values
555 for i in 0..3 {
556 assert!((preds[i] - fit.fitted_values[i]).abs() < 1e-10);
557 }
558 }
559}