linreg_core/regularized/path.rs
1//! Lambda path generation for regularized regression.
2//!
3//! This module provides utilities for generating a sequence of lambda values
4//! for regularization paths, matching the R glmnet implementation's output/approach.
5//!
6//! # Lambda Path Construction
7//!
8//! The lambda path follows this geometric decay pattern (following the glmnet R implementation):
9//!
10//! ```text
11//! lambda[0] = infinity // effectively infinite (all coefficients zero)
12//! lambda[1] = lambda_decay_factor * lambda_max // First "real" lambda
13//! lambda[k] = lambda[k-1] * lambda_decay_factor // Geometric decay
14//! ```
15//!
16//! Where (mapping implementation variables to the Friedman et al. paper):
17//! - `LAMBDA_EFFECTIVE_INFINITY = infinity`: Sentinel value (effectively infinite lambda)
18//! - `lambda_max`: Theoretical maximum lambda (paper: $\lambda_{max}$)
19//! - `lambda_decay_factor`: Geometric decay factor (paper: $(\epsilon)^{1/(K-1)}$)
20//! - `lambda_min_ratio`: Minimum lambda ratio (paper: $\epsilon$)
21//! - `eps = 1.0e-6`: Implementation constant for stability
22//!
23//! # Note on `LAMBDA_EFFECTIVE_INFINITY`
24//!
25//! The first lambda value is set to infinity, which produces all-zero coefficients.
26//! This matches glmnet's behavior and provides a complete view of the regularization
27//! path, showing the exact point where each coefficient enters the model as lambda
28//! decreases. Users who don't need this can simply ignore the first element.
29
30use crate::linalg::Matrix;
31
32/// Represents effectively infinite lambda.
33///
34/// At this lambda value, all penalized coefficients are zero.
35const LAMBDA_EFFECTIVE_INFINITY: f64 = f64::INFINITY;
36
37/// epsilon constant - minimum lambda_min_ratio value.
38const LAMBDA_MIN_RATIO_MINIMUM: f64 = 1.0e-6;
39
40/// Options for generating a lambda path.
41///
42/// # Fields
43///
44/// * `nlambda` - Number of lambda values (default: 100)
45/// * `lambda_min_ratio` - Lambda minimum ratio (default: None, which auto-selects based on n vs p)
46/// * `alpha` - Elastic net mixing parameter (0 = ridge, 1 = lasso)
47/// * `eps_for_ridge` - Small alpha to use for computing lambda_max when alpha=0 (default: 0.001)
48///
49/// # Example
50///
51/// ```
52/// # use linreg_core::regularized::path::LambdaPathOptions;
53/// let opts = LambdaPathOptions {
54/// nlambda: 50,
55/// lambda_min_ratio: Some(0.01),
56/// alpha: 0.5,
57/// eps_for_ridge: 0.001,
58/// };
59///
60/// assert_eq!(opts.nlambda, 50);
61/// assert_eq!(opts.alpha, 0.5);
62/// ```
63#[derive(Clone, Debug)]
64pub struct LambdaPathOptions {
65 /// Number of lambda values to generate
66 pub nlambda: usize,
67 /// Lambda minimum ratio.
68 /// If None, auto-selects: 0.0001 if n >= p, else 0.01
69 pub lambda_min_ratio: Option<f64>,
70 /// Elastic net mixing parameter (0 = ridge, 1 = lasso)
71 pub alpha: f64,
72 /// Small value to use for ridge regression max lambda calculation
73 /// to avoid numerical issues (default: 1e-3)
74 pub eps_for_ridge: f64,
75}
76
77impl Default for LambdaPathOptions {
78 fn default() -> Self {
79 LambdaPathOptions {
80 nlambda: 100,
81 lambda_min_ratio: None,
82 alpha: 1.0,
83 eps_for_ridge: 1e-3,
84 }
85 }
86}
87
88/// Computes `lambda_max`: the smallest lambda for which all
89/// penalized coefficients are zero.
90///
91/// Formula:
92/// ```text
93/// lambda_max = max(|X_j^T y|/vp(j)) / max(alpha, 1e-3)
94/// ```
95/// where `g(j) = |X_j^T y|` is the absolute correlation.
96///
97/// Key points:
98/// - y is assumed to be STANDARDIZED to unit norm (||y|| = 1)
99/// - X columns are assumed to be STANDARDIZED to unit norm
100/// - The `1e-3` minimum prevents division issues for pure ridge
101///
102/// # Arguments
103///
104/// * `x` - Standardized design matrix (n × p), first column is intercept if present
105/// * `y` - Standardized response vector (||y|| = 1)
106/// * `alpha` - Elastic net mixing parameter
107/// * `penalty_factor` - Per-feature penalty factors (optional, defaults to all 1.0)
108/// * `intercept_col` - Index of intercept column (typically 0, or None if no intercept)
109///
110/// # Returns
111///
112/// The `lambda_max` value used by glmnet for lambda path construction.
113#[allow(clippy::needless_range_loop)]
114pub fn compute_lambda_max(
115 x: &Matrix,
116 y: &[f64],
117 alpha: f64,
118 penalty_factor: Option<&[f64]>,
119 intercept_col: Option<usize>,
120) -> f64 {
121 // Use max(alpha, 1e-3) to avoid division issues
122 let alpha_clamped = alpha.max(1e-3);
123
124 let p = x.cols;
125 let mut max_corr: f64 = 0.0;
126
127 for j in 0..p {
128 // Skip intercept column
129 if let Some(ic) = intercept_col {
130 if j == ic {
131 continue;
132 }
133 }
134
135 // Compute absolute correlation: |x_j^T y|
136 // Matrix is row-major, so we iterate through rows for each column
137 let mut corr = 0.0;
138 for i in 0..x.rows {
139 corr += x.get(i, j) * y[i];
140 }
141 let corr = corr.abs();
142
143 // Apply penalty factor if provided
144 let effective_corr = if let Some(penalty_factors) = penalty_factor {
145 if j < penalty_factors.len() && penalty_factors[j] > 0.0 {
146 corr / penalty_factors[j]
147 } else {
148 corr
149 }
150 } else {
151 corr
152 };
153
154 max_corr = max_corr.max(effective_corr);
155 }
156
157 // Formula: lambda_max = max(|X^T y|) / max(alpha, 1e-3)
158 max_corr / alpha_clamped
159}
160
161/// Generates a lambda path.
162///
163/// ```text
164/// lambda[0] = inf // Large value (all coefficients zero)
165/// lambda[1] = lambda_decay_factor * lambda_max // First real lambda
166/// lambda[k] = lambda[k-1] * lambda_decay_factor // Geometric decay
167/// ```
168///
169/// Where:
170/// - `lambda_max = max(|X^T y|) / max(alpha, 1e-3)`: Theoretical lambda_max
171/// - `lambda_decay_factor = max(lambda_min_ratio, eps)^(1/(nlambda-1))`: Geometric decay factor
172/// - `eps = 1.0e-6`: Minimum lambda_min_ratio value
173///
174/// # Arguments
175///
176/// * `x` - Standardized design matrix (n × p), columns are unit norm
177/// * `y` - Standardized response vector (||y|| = 1)
178/// * `options` - Lambda path generation options
179/// * `penalty_factor` - Optional per-feature penalty factors
180/// * `intercept_col` - Index of intercept column (typically 0)
181///
182/// # Returns
183///
184/// A vector of lambda values in **decreasing** order.
185///
186/// # First Lambda (LAMBDA_EFFECTIVE_INFINITY)
187///
188/// The first lambda value is set to `infinity`, which effectively produces
189/// all-zero coefficients. This matches R's cursed behavior.
190/// PROOF OF CONCEPT/R EQUIVALENCE: We may want to make this optional in future versions.
191///
192/// # Default lambda_min_ratio
193///
194/// Following glmnet:
195/// - If `n >= p`: `lambda_min_ratio = 0.0001`
196/// - If `n < p`: `lambda_min_ratio = 0.01`
197pub fn make_lambda_path(
198 x: &Matrix,
199 y: &[f64],
200 options: &LambdaPathOptions,
201 penalty_factor: Option<&[f64]>,
202 intercept_col: Option<usize>,
203) -> Vec<f64> {
204 let n = x.rows;
205 let p = x.cols;
206
207 // Determine default lambda_min_ratio
208 let default_lambda_min_ratio = if n >= p { 0.0001 } else { 0.01 };
209 let lambda_min_ratio = options.lambda_min_ratio.unwrap_or(default_lambda_min_ratio);
210
211 // Compute geometric decay factor: lambda_decay_factor = max(lambda_min_ratio, eps)^(1/(nlambda-1))
212 let lambda_min_ratio_clamped = lambda_min_ratio.max(LAMBDA_MIN_RATIO_MINIMUM);
213 let lambda_decay_factor = lambda_min_ratio_clamped.powf(1.0 / (options.nlambda - 1) as f64);
214
215 // Compute lambda_max = max(|X^T y|) / max(alpha, 1e-3)
216 let lambda_max = compute_lambda_max(x, y, options.alpha, penalty_factor, intercept_col);
217
218 // Build lambda path following glmnet's algorithm:
219 // lambda[0] = INF
220 // lambda[1] = lambda_decay_factor * lambda_max
221 // lambda[k] = lambda[k-1] * lambda_decay_factor
222 let mut lambdas = Vec::with_capacity(options.nlambda);
223
224 for k in 0..options.nlambda {
225 if k == 0 {
226 // First lambda: effectively infinite
227 lambdas.push(LAMBDA_EFFECTIVE_INFINITY);
228 } else if k == 1 {
229 // Second lambda: lambda_decay_factor * lambda_max
230 lambdas.push(lambda_decay_factor * lambda_max);
231 } else {
232 // Remaining lambdas: geometric decay
233 lambdas.push(lambdas[k - 1] * lambda_decay_factor);
234 }
235 }
236
237 lambdas
238}
239
240/// Extracts a specific set of lambdas from a path.
241///
242/// This is useful when you want to evaluate at specific lambda values
243/// rather than using the full path.
244///
245/// # Arguments
246///
247/// * `full_path` - The complete lambda path (must be in decreasing order)
248/// * `indices` - Indices of lambdas to extract
249///
250/// # Returns
251///
252/// A new vector containing the specified lambda values.
253///
254/// # Example
255///
256/// ```
257/// # use linreg_core::regularized::path::extract_lambdas;
258/// let path = vec![1.0, 0.5, 0.25, 0.125, 0.0625];
259/// let indices = vec![0, 2, 4];
260/// let extracted = extract_lambdas(&path, &indices);
261/// assert_eq!(extracted, vec![1.0, 0.25, 0.0625]);
262/// ```
263pub fn extract_lambdas(full_path: &[f64], indices: &[usize]) -> Vec<f64> {
264 indices.iter().map(|&i| full_path[i]).collect()
265}
266
267#[cfg(test)]
268mod tests {
269 use super::*;
270
271 #[test]
272 fn test_compute_lambda_max() {
273 // Simple test: X = [1, x], y standardized to unit norm
274 let x_data = vec![1.0, -1.0, 1.0, 0.0, 1.0, 1.0];
275 let x = Matrix::new(3, 2, x_data);
276
277 // Standardize y to unit norm (||y|| = 1)
278 let y = vec![-1.0, 0.0, 1.0];
279 let y_norm: f64 = y.iter().map(|&yi| yi * yi).sum::<f64>().sqrt();
280 let y_standardized: Vec<f64> = y.iter().map(|&yi| yi / y_norm).collect();
281
282 let lambda_max = compute_lambda_max(&x, &y_standardized, 1.0, None, Some(0));
283
284 // Column 1 of X: [-1, 0, 1]
285 // y_standardized = [-1/sqrt(2), 0, 1/sqrt(2)]
286 // dot = (-1)*(-1/sqrt(2)) + 0*0 + 1*(1/sqrt(2)) = 2/sqrt(2) = sqrt(2)
287 // lambda_max = sqrt(2) / max(1.0, 1e-3) = sqrt(2)
288 assert!((lambda_max - 2.0_f64.sqrt()).abs() < 1e-10);
289 }
290
291 #[test]
292 fn test_make_lambda_path_glmnet_style() {
293 let x_data = vec![1.0, -1.0, 1.0, 0.0, 1.0, 1.0];
294 let x = Matrix::new(3, 2, x_data);
295 let y = vec![-1.0, 0.0, 1.0];
296
297 // Standardize y to unit norm
298 let y_norm: f64 = y.iter().map(|&yi| yi * yi).sum::<f64>().sqrt();
299 let y_standardized: Vec<f64> = y.iter().map(|&yi| yi / y_norm).collect();
300
301 let options = LambdaPathOptions {
302 nlambda: 10,
303 lambda_min_ratio: Some(0.01),
304 alpha: 1.0,
305 eps_for_ridge: 0.001,
306 };
307
308 let path = make_lambda_path(&x, &y_standardized, &options, None, Some(0));
309
310 assert_eq!(path.len(), 10);
311
312 // First lambda should be LAMBDA_EFFECTIVE_INFINITY (effectively infinite)
313 assert_eq!(path[0], LAMBDA_EFFECTIVE_INFINITY);
314
315 // Second lambda should be: lambda_decay_factor * lambda_max
316 // where lambda_max = sqrt(2) and lambda_decay_factor = 0.01^(1/9)
317 let lambda_max = 2.0_f64.sqrt();
318 let lambda_decay_factor = 0.01_f64.powf(1.0 / 9.0);
319 assert!((path[1] - lambda_decay_factor * lambda_max).abs() < 1e-10);
320
321 // Path should be decreasing (each lambda < previous)
322 for i in 1..path.len() {
323 assert!(path[i] < path[i - 1]);
324 }
325 }
326
327 #[test]
328 fn test_lambda_max_with_small_alpha() {
329 let x_data = vec![1.0, -1.0, 1.0, 0.0, 1.0, 1.0];
330 let x = Matrix::new(3, 2, x_data);
331 let y = vec![-1.0, 0.0, 1.0];
332
333 let y_norm: f64 = y.iter().map(|&yi| yi * yi).sum::<f64>().sqrt();
334 let y_standardized: Vec<f64> = y.iter().map(|&yi| yi / y_norm).collect();
335
336 // For very small alpha, should use max(alpha, 1e-3) = 1e-3
337 let lambda_max = compute_lambda_max(&x, &y_standardized, 0.0001, None, Some(0));
338
339 // lambda_max = sqrt(2) / max(0.0001, 1e-3) = sqrt(2) / 1e-3
340 let expected = 2.0_f64.sqrt() / 1e-3;
341 assert!((lambda_max - expected).abs() < 1e-10);
342 }
343
344 #[test]
345 fn test_extract_lambdas() {
346 let path = vec![10.0, 5.0, 2.5, 1.25, 0.625];
347 let indices = vec![0, 2, 4];
348 let extracted = extract_lambdas(&path, &indices);
349
350 assert_eq!(extracted, vec![10.0, 2.5, 0.625]);
351 }
352}