sklears_svm/
regularization_path.rs

1//! Regularization Path Algorithms for SVM
2//!
3//! This module implements algorithms to compute the full regularization path for
4//! various types of regularized SVMs. The regularization path shows how the solution
5//! changes as the regularization parameter varies, which is useful for model selection
6//! and understanding the trade-off between complexity and fit.
7//!
8//! Algorithms included:
9//! - Lasso Path: For L1-regularized linear SVMs
10//! - Elastic Net Path: For combined L1/L2 regularized SVMs  
11//! - Group Lasso Path: For group-structured regularization
12//! - Adaptive Lasso Path: With adaptive weights for feature selection
13
14use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1};
15use sklears_core::{
16    error::{Result, SklearsError},
17    types::Float,
18};
19use std::collections::HashMap;
20
21/// Regularization path algorithm types
22#[derive(Debug, Clone, PartialEq, Default)]
23pub enum RegularizationPathType {
24    /// Lasso regularization path (L1)
25    #[default]
26    Lasso,
27    /// Elastic Net regularization path (L1 + L2)
28    ElasticNet { l1_ratio: Float },
29    /// Group Lasso regularization path
30    GroupLasso { groups: Vec<Vec<usize>> },
31    /// Adaptive Lasso with feature-specific weights
32    AdaptiveLasso { weights: Array1<Float> },
33    /// Fused Lasso for sequence data
34    FusedLasso,
35}
36
37/// Cross-validation strategy for path selection
38#[derive(Debug, Clone, PartialEq)]
39pub enum CrossValidationStrategy {
40    /// K-fold cross-validation
41    KFold { k: usize },
42    /// Leave-one-out cross-validation
43    LeaveOneOut,
44    /// Time series split for temporal data
45    TimeSeriesSplit { n_splits: usize },
46    /// Stratified K-fold for classification
47    StratifiedKFold { k: usize },
48}
49
50impl Default for CrossValidationStrategy {
51    fn default() -> Self {
52        CrossValidationStrategy::KFold { k: 5 }
53    }
54}
55
56/// Configuration for regularization path computation
57#[derive(Debug, Clone)]
58pub struct RegularizationPathConfig {
59    /// Type of regularization path
60    pub path_type: RegularizationPathType,
61    /// Number of lambda values to compute
62    pub n_lambdas: usize,
63    /// Minimum lambda value (relative to lambda_max)
64    pub lambda_min_ratio: Float,
65    /// Custom lambda values (if provided, overrides n_lambdas)
66    pub lambdas: Option<Array1<Float>>,
67    /// Tolerance for convergence
68    pub tol: Float,
69    /// Maximum number of iterations per lambda
70    pub max_iter: usize,
71    /// Whether to fit an intercept
72    pub fit_intercept: bool,
73    /// Cross-validation strategy
74    pub cv_strategy: CrossValidationStrategy,
75    /// Whether to standardize features
76    pub standardize: bool,
77    /// Early stopping for path computation
78    pub early_stopping: bool,
79    /// Minimum improvement for early stopping
80    pub min_improvement: Float,
81    /// Verbose output
82    pub verbose: bool,
83}
84
85impl Default for RegularizationPathConfig {
86    fn default() -> Self {
87        Self {
88            path_type: RegularizationPathType::default(),
89            n_lambdas: 100,
90            lambda_min_ratio: 1e-4,
91            lambdas: None,
92            tol: 1e-4,
93            max_iter: 1000,
94            fit_intercept: true,
95            cv_strategy: CrossValidationStrategy::default(),
96            standardize: true,
97            early_stopping: true,
98            min_improvement: 1e-6,
99            verbose: false,
100        }
101    }
102}
103
104/// Results of regularization path computation
105#[derive(Debug, Clone)]
106pub struct RegularizationPathResult {
107    /// Lambda values used
108    pub lambdas: Array1<Float>,
109    /// Coefficient paths (n_lambdas × n_features)
110    pub coef_path: Array2<Float>,
111    /// Intercept paths (n_lambdas)
112    pub intercept_path: Array1<Float>,
113    /// Cross-validation scores (n_lambdas)
114    pub cv_scores: Array1<Float>,
115    /// Standard errors of CV scores (n_lambdas)
116    pub cv_scores_std: Array1<Float>,
117    /// Number of non-zero coefficients at each lambda
118    pub n_nonzero: Array1<usize>,
119    /// Indices of selected features at each lambda
120    pub active_features: Vec<Vec<usize>>,
121    /// Best lambda value (based on CV)
122    pub best_lambda: Float,
123    /// Index of best lambda
124    pub best_lambda_idx: usize,
125    /// Lambda at 1 standard error rule
126    pub lambda_1se: Float,
127    /// Index of lambda at 1 standard error rule
128    pub lambda_1se_idx: usize,
129}
130
131/// Regularization Path Solver for SVMs
132#[derive(Debug)]
133pub struct RegularizationPathSolver {
134    config: RegularizationPathConfig,
135}
136
137impl Default for RegularizationPathSolver {
138    fn default() -> Self {
139        Self::new(RegularizationPathConfig::default())
140    }
141}
142
143impl RegularizationPathSolver {
144    /// Create a new regularization path solver
145    pub fn new(config: RegularizationPathConfig) -> Self {
146        Self { config }
147    }
148
149    /// Fit regularization path
150    pub fn fit_path(
151        &self,
152        x: &Array2<Float>,
153        y: &Array1<Float>,
154    ) -> Result<RegularizationPathResult> {
155        let n_samples = x.nrows();
156        let n_features = x.ncols();
157
158        if n_samples != y.len() {
159            return Err(SklearsError::InvalidInput(
160                "Shape mismatch: X and y must have the same number of samples".to_string(),
161            ));
162        }
163
164        // Standardize features if requested
165        let (x_processed, _feature_means, feature_stds) = if self.config.standardize {
166            self.standardize_features(x)?
167        } else {
168            (
169                x.clone(),
170                Array1::zeros(n_features),
171                Array1::ones(n_features),
172            )
173        };
174
175        // Center targets
176        let y_mean = if self.config.fit_intercept {
177            y.mean().unwrap_or(0.0)
178        } else {
179            0.0
180        };
181        let y_centered = y.mapv(|val| val - y_mean);
182
183        // Compute lambda sequence
184        let mut lambdas = if let Some(custom_lambdas) = &self.config.lambdas {
185            custom_lambdas.clone()
186        } else {
187            self.compute_lambda_sequence(&x_processed, &y_centered)?
188        };
189
190        let n_lambdas = lambdas.len();
191
192        // Initialize path storage
193        let mut coef_path = Array2::zeros((n_lambdas, n_features));
194        let mut intercept_path = Array1::zeros(n_lambdas);
195        let mut cv_scores = Array1::zeros(n_lambdas);
196        let mut cv_scores_std = Array1::zeros(n_lambdas);
197        let mut n_nonzero = Array1::zeros(n_lambdas);
198        let mut active_features = Vec::with_capacity(n_lambdas);
199
200        // Initial coefficient vector
201        let mut coef = Array1::zeros(n_features);
202
203        // Compute path
204        for (i, &lambda) in lambdas.iter().enumerate() {
205            if self.config.verbose && i % 10 == 0 {
206                println!(
207                    "Computing path for lambda {}/{}: {:.6}",
208                    i + 1,
209                    n_lambdas,
210                    lambda
211                );
212            }
213
214            // Warm start from previous solution
215            let (new_coef, intercept) =
216                self.solve_for_lambda(&x_processed, &y_centered, lambda, &coef, y_mean)?;
217
218            coef = new_coef.clone();
219
220            // Store results
221            coef_path.row_mut(i).assign(&new_coef);
222            intercept_path[i] = intercept;
223
224            // Count non-zero coefficients
225            let nonzero_count = new_coef
226                .iter()
227                .filter(|&&x| x.abs() > self.config.tol)
228                .count();
229            n_nonzero[i] = nonzero_count;
230
231            // Track active features
232            let active: Vec<usize> = new_coef
233                .iter()
234                .enumerate()
235                .filter(|(_, &x)| x.abs() > self.config.tol)
236                .map(|(idx, _)| idx)
237                .collect();
238            active_features.push(active);
239
240            // Cross-validation for this lambda
241            let (cv_score, cv_std) =
242                self.cross_validate_lambda(&x_processed, &y_centered, lambda, y_mean)?;
243            cv_scores[i] = cv_score;
244            cv_scores_std[i] = cv_std;
245
246            // Early stopping check
247            if self.config.early_stopping && i > 10 {
248                let recent_improvement = if i >= 5 {
249                    let recent_avg = cv_scores.slice(s![i - 4..=i]).mean().unwrap();
250                    let prev_avg = cv_scores.slice(s![i - 9..=i - 5]).mean().unwrap();
251                    recent_avg - prev_avg
252                } else {
253                    Float::INFINITY
254                };
255
256                if recent_improvement.abs() < self.config.min_improvement {
257                    if self.config.verbose {
258                        println!("Early stopping at lambda index {i}");
259                    }
260                    // Truncate arrays
261                    let actual_n_lambdas = i + 1;
262                    lambdas = lambdas.slice(s![..actual_n_lambdas]).to_owned();
263                    coef_path = coef_path.slice(s![..actual_n_lambdas, ..]).to_owned();
264                    intercept_path = intercept_path.slice(s![..actual_n_lambdas]).to_owned();
265                    cv_scores = cv_scores.slice(s![..actual_n_lambdas]).to_owned();
266                    cv_scores_std = cv_scores_std.slice(s![..actual_n_lambdas]).to_owned();
267                    n_nonzero = n_nonzero.slice(s![..actual_n_lambdas]).to_owned();
268                    active_features.truncate(actual_n_lambdas);
269                    break;
270                }
271            }
272        }
273
274        // Reverse standardization for coefficients
275        if self.config.standardize {
276            for i in 0..coef_path.nrows() {
277                for j in 0..n_features {
278                    if feature_stds[j] > 1e-10 {
279                        coef_path[[i, j]] /= feature_stds[j];
280                    }
281                }
282            }
283        }
284
285        // Find best lambda (minimum CV error)
286        let best_lambda_idx = cv_scores
287            .iter()
288            .enumerate()
289            .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
290            .map(|(idx, _)| idx)
291            .unwrap_or(0);
292        let best_lambda = lambdas[best_lambda_idx];
293
294        // Find lambda at 1 standard error rule
295        let best_score = cv_scores[best_lambda_idx];
296        let best_std = cv_scores_std[best_lambda_idx];
297        let threshold = best_score + best_std;
298
299        let lambda_1se_idx = (0..lambdas.len())
300            .find(|&i| cv_scores[i] <= threshold && lambdas[i] >= best_lambda)
301            .unwrap_or(best_lambda_idx);
302        let lambda_1se = lambdas[lambda_1se_idx];
303
304        Ok(RegularizationPathResult {
305            lambdas,
306            coef_path,
307            intercept_path,
308            cv_scores,
309            cv_scores_std,
310            n_nonzero,
311            active_features,
312            best_lambda,
313            best_lambda_idx,
314            lambda_1se,
315            lambda_1se_idx,
316        })
317    }
318
319    /// Standardize features
320    fn standardize_features(
321        &self,
322        x: &Array2<Float>,
323    ) -> Result<(Array2<Float>, Array1<Float>, Array1<Float>)> {
324        let n_features = x.ncols();
325        let mut means = Array1::zeros(n_features);
326        let mut stds = Array1::ones(n_features);
327
328        // Compute means
329        for j in 0..n_features {
330            means[j] = x.column(j).mean().unwrap_or(0.0);
331        }
332
333        // Compute standard deviations
334        for j in 0..n_features {
335            let variance = x
336                .column(j)
337                .iter()
338                .map(|&val| (val - means[j]).powi(2))
339                .sum::<Float>()
340                / (x.nrows() - 1) as Float;
341            stds[j] = variance.sqrt().max(1e-10);
342        }
343
344        // Standardize
345        let mut x_std = x.clone();
346        for i in 0..x.nrows() {
347            for j in 0..n_features {
348                x_std[[i, j]] = (x_std[[i, j]] - means[j]) / stds[j];
349            }
350        }
351
352        Ok((x_std, means, stds))
353    }
354
355    /// Compute lambda sequence
356    fn compute_lambda_sequence(
357        &self,
358        x: &Array2<Float>,
359        y: &Array1<Float>,
360    ) -> Result<Array1<Float>> {
361        // Compute lambda_max (smallest lambda that gives all-zero solution)
362        let lambda_max = match &self.config.path_type {
363            RegularizationPathType::Lasso | RegularizationPathType::AdaptiveLasso { .. } => {
364                self.compute_lasso_lambda_max(x, y)?
365            }
366            RegularizationPathType::ElasticNet { l1_ratio } => {
367                self.compute_lasso_lambda_max(x, y)? / l1_ratio
368            }
369            RegularizationPathType::GroupLasso { .. } => {
370                self.compute_group_lasso_lambda_max(x, y)?
371            }
372            RegularizationPathType::FusedLasso => self.compute_fused_lasso_lambda_max(x, y)?,
373        };
374
375        let lambda_min = lambda_max * self.config.lambda_min_ratio;
376
377        // Create log-spaced sequence
378        let mut lambdas = Array1::zeros(self.config.n_lambdas);
379        let log_max = lambda_max.ln();
380        let log_min = lambda_min.ln();
381        let step = (log_max - log_min) / (self.config.n_lambdas - 1) as Float;
382
383        for i in 0..self.config.n_lambdas {
384            lambdas[i] = (log_max - i as Float * step).exp();
385        }
386
387        Ok(lambdas)
388    }
389
390    /// Compute lambda_max for Lasso
391    fn compute_lasso_lambda_max(&self, x: &Array2<Float>, y: &Array1<Float>) -> Result<Float> {
392        let mut max_correlation: Float = 0.0;
393
394        for j in 0..x.ncols() {
395            let correlation = x
396                .column(j)
397                .iter()
398                .zip(y.iter())
399                .map(|(&xi, &yi)| xi * yi)
400                .sum::<Float>()
401                .abs()
402                / x.nrows() as Float;
403
404            max_correlation = max_correlation.max(correlation);
405        }
406
407        Ok(max_correlation)
408    }
409
410    /// Compute lambda_max for Group Lasso
411    fn compute_group_lasso_lambda_max(
412        &self,
413        x: &Array2<Float>,
414        y: &Array1<Float>,
415    ) -> Result<Float> {
416        if let RegularizationPathType::GroupLasso { groups } = &self.config.path_type {
417            let mut max_group_norm: Float = 0.0;
418
419            for group in groups {
420                let mut group_norm = 0.0;
421                for &feature_idx in group {
422                    if feature_idx < x.ncols() {
423                        let correlation = x
424                            .column(feature_idx)
425                            .iter()
426                            .zip(y.iter())
427                            .map(|(&xi, &yi)| xi * yi)
428                            .sum::<Float>()
429                            / x.nrows() as Float;
430                        group_norm += correlation * correlation;
431                    }
432                }
433                group_norm = group_norm.sqrt();
434                max_group_norm = max_group_norm.max(group_norm);
435            }
436
437            Ok(max_group_norm)
438        } else {
439            Err(SklearsError::InvalidInput(
440                "Invalid path type for Group Lasso lambda_max computation".to_string(),
441            ))
442        }
443    }
444
445    /// Compute lambda_max for Fused Lasso
446    fn compute_fused_lasso_lambda_max(
447        &self,
448        x: &Array2<Float>,
449        y: &Array1<Float>,
450    ) -> Result<Float> {
451        let lasso_max = self.compute_lasso_lambda_max(x, y)?;
452
453        // For fused lasso, also consider difference penalties
454        let mut max_diff_correlation: Float = 0.0;
455        for j in 0..(x.ncols() - 1) {
456            let diff_feature: Array1<Float> = x
457                .rows()
458                .into_iter()
459                .map(|row| row[j + 1] - row[j])
460                .collect();
461
462            let correlation = diff_feature
463                .iter()
464                .zip(y.iter())
465                .map(|(&xi, &yi)| xi * yi)
466                .sum::<Float>()
467                .abs()
468                / x.nrows() as Float;
469
470            max_diff_correlation = max_diff_correlation.max(correlation);
471        }
472
473        Ok(lasso_max.max(max_diff_correlation))
474    }
475
476    /// Solve for a specific lambda value
477    fn solve_for_lambda(
478        &self,
479        x: &Array2<Float>,
480        y: &Array1<Float>,
481        lambda: Float,
482        initial_coef: &Array1<Float>,
483        y_mean: Float,
484    ) -> Result<(Array1<Float>, Float)> {
485        match &self.config.path_type {
486            RegularizationPathType::Lasso => self.solve_lasso(x, y, lambda, initial_coef, y_mean),
487            RegularizationPathType::ElasticNet { l1_ratio } => {
488                self.solve_elastic_net(x, y, lambda, *l1_ratio, initial_coef, y_mean)
489            }
490            RegularizationPathType::GroupLasso { groups } => {
491                self.solve_group_lasso(x, y, lambda, groups, initial_coef, y_mean)
492            }
493            RegularizationPathType::AdaptiveLasso { weights } => {
494                self.solve_adaptive_lasso(x, y, lambda, weights, initial_coef, y_mean)
495            }
496            RegularizationPathType::FusedLasso => {
497                self.solve_fused_lasso(x, y, lambda, initial_coef, y_mean)
498            }
499        }
500    }
501
502    /// Solve Lasso for a specific lambda using coordinate descent
503    fn solve_lasso(
504        &self,
505        x: &Array2<Float>,
506        y: &Array1<Float>,
507        lambda: Float,
508        initial_coef: &Array1<Float>,
509        y_mean: Float,
510    ) -> Result<(Array1<Float>, Float)> {
511        let n_samples = x.nrows();
512        let n_features = x.ncols();
513        let mut coef = initial_coef.clone();
514        let mut intercept = y_mean;
515
516        // Precompute X^T X diagonal for efficiency
517        let mut xtx_diag = Array1::zeros(n_features);
518        for j in 0..n_features {
519            xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>();
520        }
521
522        // Coordinate descent
523        for _ in 0..self.config.max_iter {
524            let mut converged = true;
525
526            for j in 0..n_features {
527                let old_coef_j = coef[j];
528
529                // Compute residual without feature j
530                let mut residual_sum = 0.0;
531                for i in 0..n_samples {
532                    let mut prediction = intercept;
533                    for k in 0..n_features {
534                        if k != j {
535                            prediction += coef[k] * x[[i, k]];
536                        }
537                    }
538                    residual_sum += x[[i, j]] * (y[i] - prediction);
539                }
540
541                // Soft thresholding
542                let threshold = lambda * n_samples as Float;
543                if residual_sum > threshold {
544                    coef[j] = (residual_sum - threshold) / xtx_diag[j];
545                } else if residual_sum < -threshold {
546                    coef[j] = (residual_sum + threshold) / xtx_diag[j];
547                } else {
548                    coef[j] = 0.0;
549                }
550
551                if (coef[j] - old_coef_j).abs() > self.config.tol {
552                    converged = false;
553                }
554            }
555
556            // Update intercept
557            if self.config.fit_intercept {
558                let mut residual_sum = 0.0;
559                for i in 0..n_samples {
560                    let mut prediction = 0.0;
561                    for k in 0..n_features {
562                        prediction += coef[k] * x[[i, k]];
563                    }
564                    residual_sum += y[i] - prediction;
565                }
566                intercept = residual_sum / n_samples as Float;
567            }
568
569            if converged {
570                break;
571            }
572        }
573
574        Ok((coef, intercept))
575    }
576
577    /// Solve Elastic Net for a specific lambda
578    fn solve_elastic_net(
579        &self,
580        x: &Array2<Float>,
581        y: &Array1<Float>,
582        lambda: Float,
583        l1_ratio: Float,
584        initial_coef: &Array1<Float>,
585        y_mean: Float,
586    ) -> Result<(Array1<Float>, Float)> {
587        let n_samples = x.nrows();
588        let n_features = x.ncols();
589        let mut coef = initial_coef.clone();
590        let mut intercept = y_mean;
591
592        let l1_penalty = lambda * l1_ratio;
593        let l2_penalty = lambda * (1.0 - l1_ratio);
594
595        // Precompute X^T X diagonal
596        let mut xtx_diag = Array1::zeros(n_features);
597        for j in 0..n_features {
598            xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>()
599                + l2_penalty * n_samples as Float;
600        }
601
602        // Coordinate descent
603        for _ in 0..self.config.max_iter {
604            let mut converged = true;
605
606            for j in 0..n_features {
607                let old_coef_j = coef[j];
608
609                // Compute residual without feature j
610                let mut residual_sum = 0.0;
611                for i in 0..n_samples {
612                    let mut prediction = intercept;
613                    for k in 0..n_features {
614                        if k != j {
615                            prediction += coef[k] * x[[i, k]];
616                        }
617                    }
618                    residual_sum += x[[i, j]] * (y[i] - prediction);
619                }
620
621                // Soft thresholding with L2 penalty
622                let threshold = l1_penalty * n_samples as Float;
623                if residual_sum > threshold {
624                    coef[j] = (residual_sum - threshold) / xtx_diag[j];
625                } else if residual_sum < -threshold {
626                    coef[j] = (residual_sum + threshold) / xtx_diag[j];
627                } else {
628                    coef[j] = 0.0;
629                }
630
631                if (coef[j] - old_coef_j).abs() > self.config.tol {
632                    converged = false;
633                }
634            }
635
636            // Update intercept
637            if self.config.fit_intercept {
638                let mut residual_sum = 0.0;
639                for i in 0..n_samples {
640                    let mut prediction = 0.0;
641                    for k in 0..n_features {
642                        prediction += coef[k] * x[[i, k]];
643                    }
644                    residual_sum += y[i] - prediction;
645                }
646                intercept = residual_sum / n_samples as Float;
647            }
648
649            if converged {
650                break;
651            }
652        }
653
654        Ok((coef, intercept))
655    }
656
657    /// Solve Group Lasso for a specific lambda
658    fn solve_group_lasso(
659        &self,
660        x: &Array2<Float>,
661        y: &Array1<Float>,
662        lambda: Float,
663        groups: &[Vec<usize>],
664        initial_coef: &Array1<Float>,
665        y_mean: Float,
666    ) -> Result<(Array1<Float>, Float)> {
667        let n_samples = x.nrows();
668        let n_features = x.ncols();
669        let mut coef = initial_coef.clone();
670        let intercept = y_mean;
671
672        // Group coordinate descent
673        for _ in 0..self.config.max_iter {
674            let mut converged = true;
675
676            for group in groups {
677                let group_size = group.len();
678                let mut old_group_coef = Array1::zeros(group_size);
679                for (idx, &feature_idx) in group.iter().enumerate() {
680                    if feature_idx < n_features {
681                        old_group_coef[idx] = coef[feature_idx];
682                    }
683                }
684
685                // Compute group gradient
686                let mut group_gradient = Array1::zeros(group_size);
687                for i in 0..n_samples {
688                    let mut prediction = intercept;
689                    for k in 0..n_features {
690                        prediction += coef[k] * x[[i, k]];
691                    }
692                    let residual = y[i] - prediction;
693
694                    for (idx, &feature_idx) in group.iter().enumerate() {
695                        if feature_idx < n_features {
696                            group_gradient[idx] +=
697                                x[[i, feature_idx]] * residual / n_samples as Float;
698                        }
699                    }
700                }
701
702                // Group soft thresholding
703                let group_norm = group_gradient
704                    .iter()
705                    .map(|&x: &Float| x * x)
706                    .sum::<Float>()
707                    .sqrt();
708                if group_norm > lambda {
709                    let shrinkage_factor = (1.0 - lambda / group_norm).max(0.0);
710                    for (idx, &feature_idx) in group.iter().enumerate() {
711                        if feature_idx < n_features {
712                            coef[feature_idx] = group_gradient[idx] * shrinkage_factor;
713                            if (coef[feature_idx] - old_group_coef[idx]).abs() > self.config.tol {
714                                converged = false;
715                            }
716                        }
717                    }
718                } else {
719                    // Shrink entire group to zero
720                    for &feature_idx in group {
721                        if feature_idx < n_features {
722                            if coef[feature_idx].abs() > self.config.tol {
723                                converged = false;
724                            }
725                            coef[feature_idx] = 0.0;
726                        }
727                    }
728                }
729            }
730
731            if converged {
732                break;
733            }
734        }
735
736        Ok((coef, intercept))
737    }
738
739    /// Solve Adaptive Lasso for a specific lambda
740    fn solve_adaptive_lasso(
741        &self,
742        x: &Array2<Float>,
743        y: &Array1<Float>,
744        lambda: Float,
745        weights: &Array1<Float>,
746        initial_coef: &Array1<Float>,
747        y_mean: Float,
748    ) -> Result<(Array1<Float>, Float)> {
749        let n_samples = x.nrows();
750        let n_features = x.ncols();
751        let mut coef = initial_coef.clone();
752        let intercept = y_mean;
753
754        // Precompute X^T X diagonal
755        let mut xtx_diag = Array1::zeros(n_features);
756        for j in 0..n_features {
757            xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>();
758        }
759
760        // Coordinate descent with adaptive weights
761        for _ in 0..self.config.max_iter {
762            let mut converged = true;
763
764            for j in 0..n_features {
765                let old_coef_j = coef[j];
766
767                // Compute residual without feature j
768                let mut residual_sum = 0.0;
769                for i in 0..n_samples {
770                    let mut prediction = intercept;
771                    for k in 0..n_features {
772                        if k != j {
773                            prediction += coef[k] * x[[i, k]];
774                        }
775                    }
776                    residual_sum += x[[i, j]] * (y[i] - prediction);
777                }
778
779                // Adaptive soft thresholding
780                let adaptive_threshold = lambda * weights[j] * n_samples as Float;
781                if residual_sum > adaptive_threshold {
782                    coef[j] = (residual_sum - adaptive_threshold) / xtx_diag[j];
783                } else if residual_sum < -adaptive_threshold {
784                    coef[j] = (residual_sum + adaptive_threshold) / xtx_diag[j];
785                } else {
786                    coef[j] = 0.0;
787                }
788
789                if (coef[j] - old_coef_j).abs() > self.config.tol {
790                    converged = false;
791                }
792            }
793
794            if converged {
795                break;
796            }
797        }
798
799        Ok((coef, intercept))
800    }
801
802    /// Solve Fused Lasso for a specific lambda
803    fn solve_fused_lasso(
804        &self,
805        x: &Array2<Float>,
806        y: &Array1<Float>,
807        lambda: Float,
808        initial_coef: &Array1<Float>,
809        y_mean: Float,
810    ) -> Result<(Array1<Float>, Float)> {
811        // For simplicity, implement as standard Lasso
812        // A full implementation would include difference penalties
813        self.solve_lasso(x, y, lambda, initial_coef, y_mean)
814    }
815
816    /// Cross-validate for a specific lambda value
817    fn cross_validate_lambda(
818        &self,
819        x: &Array2<Float>,
820        y: &Array1<Float>,
821        lambda: Float,
822        y_mean: Float,
823    ) -> Result<(Float, Float)> {
824        let n_samples = x.nrows();
825        let mut cv_scores = Vec::new();
826
827        match &self.config.cv_strategy {
828            CrossValidationStrategy::KFold { k } => {
829                let fold_size = n_samples / k;
830
831                for fold in 0..*k {
832                    let test_start = fold * fold_size;
833                    let test_end = if fold == k - 1 {
834                        n_samples
835                    } else {
836                        (fold + 1) * fold_size
837                    };
838
839                    // Create train/test splits
840                    let mut train_indices = Vec::new();
841                    let mut test_indices = Vec::new();
842
843                    for i in 0..n_samples {
844                        if i >= test_start && i < test_end {
845                            test_indices.push(i);
846                        } else {
847                            train_indices.push(i);
848                        }
849                    }
850
851                    if train_indices.is_empty() || test_indices.is_empty() {
852                        continue;
853                    }
854
855                    // Extract train/test data
856                    let x_train = self.extract_rows(x, &train_indices)?;
857                    let y_train = self.extract_elements(y, &train_indices)?;
858                    let x_test = self.extract_rows(x, &test_indices)?;
859                    let y_test = self.extract_elements(y, &test_indices)?;
860
861                    // Train on fold
862                    let zero_coef = Array1::zeros(x.ncols());
863                    let (coef_fold, intercept_fold) =
864                        self.solve_for_lambda(&x_train, &y_train, lambda, &zero_coef, y_mean)?;
865
866                    // Evaluate on test fold
867                    let mut test_error = 0.0;
868                    for i in 0..x_test.nrows() {
869                        let mut prediction = if self.config.fit_intercept {
870                            intercept_fold
871                        } else {
872                            0.0
873                        };
874
875                        for j in 0..x_test.ncols() {
876                            prediction += coef_fold[j] * x_test[[i, j]];
877                        }
878
879                        test_error += (y_test[i] - prediction).powi(2);
880                    }
881                    test_error /= x_test.nrows() as Float;
882                    cv_scores.push(test_error);
883                }
884            }
885            _ => {
886                // For other CV strategies, implement similarly
887                // Using simple holdout for now
888                let n_test = n_samples / 5;
889                let n_train = n_samples - n_test;
890
891                let x_train = x.slice(s![..n_train, ..]).to_owned();
892                let y_train = y.slice(s![..n_train]).to_owned();
893                let x_test = x.slice(s![n_train.., ..]).to_owned();
894                let y_test = y.slice(s![n_train..]).to_owned();
895
896                let zero_coef = Array1::zeros(x.ncols());
897                let (coef_fold, intercept_fold) =
898                    self.solve_for_lambda(&x_train, &y_train, lambda, &zero_coef, y_mean)?;
899
900                let mut test_error = 0.0;
901                for i in 0..x_test.nrows() {
902                    let mut prediction = if self.config.fit_intercept {
903                        intercept_fold
904                    } else {
905                        0.0
906                    };
907
908                    for j in 0..x_test.ncols() {
909                        prediction += coef_fold[j] * x_test[[i, j]];
910                    }
911
912                    test_error += (y_test[i] - prediction).powi(2);
913                }
914                test_error /= x_test.nrows() as Float;
915                cv_scores.push(test_error);
916            }
917        }
918
919        if cv_scores.is_empty() {
920            return Ok((Float::INFINITY, 0.0));
921        }
922
923        let mean_score = cv_scores.iter().sum::<Float>() / cv_scores.len() as Float;
924        let variance = cv_scores
925            .iter()
926            .map(|&score| (score - mean_score).powi(2))
927            .sum::<Float>()
928            / cv_scores.len() as Float;
929        let std_score = variance.sqrt();
930
931        Ok((mean_score, std_score))
932    }
933
934    /// Extract specific rows from a matrix
935    fn extract_rows(&self, matrix: &Array2<Float>, indices: &[usize]) -> Result<Array2<Float>> {
936        let n_features = matrix.ncols();
937        let mut result = Array2::zeros((indices.len(), n_features));
938
939        for (i, &idx) in indices.iter().enumerate() {
940            if idx < matrix.nrows() {
941                result.row_mut(i).assign(&matrix.row(idx));
942            }
943        }
944
945        Ok(result)
946    }
947
948    /// Extract specific elements from an array
949    fn extract_elements(&self, array: &Array1<Float>, indices: &[usize]) -> Result<Array1<Float>> {
950        let mut result = Array1::zeros(indices.len());
951
952        for (i, &idx) in indices.iter().enumerate() {
953            if idx < array.len() {
954                result[i] = array[idx];
955            }
956        }
957
958        Ok(result)
959    }
960}
961
962impl RegularizationPathResult {
963    /// Get coefficient at a specific lambda value
964    pub fn coef_at_lambda(&self, lambda: Float) -> Option<ArrayView1<'_, Float>> {
965        let idx = self
966            .lambdas
967            .iter()
968            .position(|&l| (l - lambda).abs() < 1e-10)?;
969        Some(self.coef_path.row(idx))
970    }
971
972    /// Get the coefficient path for a specific feature
973    pub fn feature_path(&self, feature_idx: usize) -> Option<ArrayView1<'_, Float>> {
974        if feature_idx < self.coef_path.ncols() {
975            Some(self.coef_path.column(feature_idx))
976        } else {
977            None
978        }
979    }
980
981    /// Find the sparsest model within 1 standard error of the best
982    pub fn sparse_model_1se(&self) -> (Float, ArrayView1<'_, Float>) {
983        let lambda = self.lambda_1se;
984        let coef = self.coef_path.row(self.lambda_1se_idx);
985        (lambda, coef)
986    }
987
988    /// Get summary statistics
989    pub fn summary(&self) -> HashMap<String, Float> {
990        let mut summary = HashMap::new();
991
992        summary.insert("n_lambdas".to_string(), self.lambdas.len() as Float);
993        summary.insert("best_lambda".to_string(), self.best_lambda);
994        summary.insert("lambda_1se".to_string(), self.lambda_1se);
995        summary.insert(
996            "best_cv_score".to_string(),
997            self.cv_scores[self.best_lambda_idx],
998        );
999        summary.insert(
1000            "min_nonzero_features".to_string(),
1001            *self.n_nonzero.iter().min().unwrap_or(&0) as Float,
1002        );
1003        summary.insert(
1004            "max_nonzero_features".to_string(),
1005            *self.n_nonzero.iter().max().unwrap_or(&0) as Float,
1006        );
1007
1008        summary
1009    }
1010}
1011
1012#[allow(non_snake_case)]
1013#[cfg(test)]
1014mod tests {
1015    use super::*;
1016    use scirs2_core::ndarray::array;
1017
1018    #[test]
1019    fn test_regularization_path_config() {
1020        let config = RegularizationPathConfig {
1021            path_type: RegularizationPathType::Lasso,
1022            n_lambdas: 50,
1023            lambda_min_ratio: 1e-3,
1024            ..Default::default()
1025        };
1026
1027        assert_eq!(config.n_lambdas, 50);
1028        assert_eq!(config.lambda_min_ratio, 1e-3);
1029        assert!(matches!(config.path_type, RegularizationPathType::Lasso));
1030    }
1031
1032    #[test]
1033    fn test_lambda_sequence_computation() {
1034        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
1035        let y = array![1.0, 2.0, 3.0];
1036
1037        let config = RegularizationPathConfig::default();
1038        let solver = RegularizationPathSolver::new(config);
1039
1040        let lambdas = solver.compute_lambda_sequence(&x, &y).unwrap();
1041
1042        assert_eq!(lambdas.len(), 100);
1043        assert!(lambdas[0] > lambdas[lambdas.len() - 1]); // Decreasing sequence
1044
1045        for i in 1..lambdas.len() {
1046            assert!(lambdas[i - 1] >= lambdas[i]); // Non-increasing
1047        }
1048    }
1049
1050    #[test]
1051    #[ignore = "Slow test: computes regularization path. Run with --ignored flag"]
1052    fn test_lasso_path() {
1053        let x = array![
1054            [1.0, 2.0, 0.1],
1055            [2.0, 3.0, 0.2],
1056            [3.0, 4.0, 0.3],
1057            [4.0, 5.0, 0.4],
1058            [5.0, 6.0, 0.5],
1059        ];
1060        let y = array![1.0, 2.0, 3.0, 4.0, 5.0];
1061
1062        let config = RegularizationPathConfig {
1063            path_type: RegularizationPathType::Lasso,
1064            n_lambdas: 20,
1065            max_iter: 100,
1066            verbose: false,
1067            ..Default::default()
1068        };
1069
1070        let solver = RegularizationPathSolver::new(config);
1071        let result = solver.fit_path(&x, &y).unwrap();
1072
1073        assert_eq!(result.lambdas.len(), 20);
1074        assert_eq!(result.coef_path.nrows(), 20);
1075        assert_eq!(result.coef_path.ncols(), 3);
1076        assert_eq!(result.intercept_path.len(), 20);
1077        assert_eq!(result.cv_scores.len(), 20);
1078
1079        // Check that sparsity increases with regularization
1080        assert!(result.n_nonzero[0] >= result.n_nonzero[result.n_nonzero.len() - 1]);
1081
1082        // Check best lambda selection
1083        assert!(result.best_lambda_idx < result.lambdas.len());
1084        assert!(result.lambda_1se_idx < result.lambdas.len());
1085
1086        // Test coefficient extraction
1087        let best_coef = result.coef_path.row(result.best_lambda_idx);
1088        assert_eq!(best_coef.len(), 3);
1089
1090        // Test summary
1091        let summary = result.summary();
1092        assert!(summary.contains_key("best_lambda"));
1093        assert!(summary.contains_key("lambda_1se"));
1094    }
1095
1096    #[test]
1097    fn test_elastic_net_path_type() {
1098        let path_type = RegularizationPathType::ElasticNet { l1_ratio: 0.5 };
1099
1100        if let RegularizationPathType::ElasticNet { l1_ratio } = path_type {
1101            assert_eq!(l1_ratio, 0.5);
1102        } else {
1103            panic!("Expected ElasticNet path type");
1104        }
1105    }
1106
1107    #[test]
1108    fn test_group_lasso_path_type() {
1109        let groups = vec![vec![0, 1], vec![2, 3], vec![4]];
1110        let path_type = RegularizationPathType::GroupLasso {
1111            groups: groups.clone(),
1112        };
1113
1114        if let RegularizationPathType::GroupLasso { groups: g } = path_type {
1115            assert_eq!(g.len(), 3);
1116            assert_eq!(g[0], vec![0, 1]);
1117        } else {
1118            panic!("Expected GroupLasso path type");
1119        }
1120    }
1121
1122    #[test]
1123    fn test_standardization() {
1124        let x = array![[1.0, 10.0], [2.0, 20.0], [3.0, 30.0]];
1125        let config = RegularizationPathConfig::default();
1126        let solver = RegularizationPathSolver::new(config);
1127
1128        let (x_std, means, _stds) = solver.standardize_features(&x).unwrap();
1129
1130        // Check means are approximately zero after standardization
1131        for j in 0..x_std.ncols() {
1132            let col_mean = x_std.column(j).mean().unwrap();
1133            assert!((col_mean).abs() < 1e-10);
1134        }
1135
1136        // Check original means and stds
1137        assert!((means[0] - 2.0).abs() < 1e-10);
1138        assert!((means[1] - 20.0).abs() < 1e-10);
1139    }
1140}
sklears_svm/regularization_path.rs

sklears_svm/
regularization_path.rs