1use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1};
15use sklears_core::{
16 error::{Result, SklearsError},
17 types::Float,
18};
19use std::collections::HashMap;
20
21#[derive(Debug, Clone, PartialEq, Default)]
23pub enum RegularizationPathType {
24 #[default]
26 Lasso,
27 ElasticNet { l1_ratio: Float },
29 GroupLasso { groups: Vec<Vec<usize>> },
31 AdaptiveLasso { weights: Array1<Float> },
33 FusedLasso,
35}
36
37#[derive(Debug, Clone, PartialEq)]
39pub enum CrossValidationStrategy {
40 KFold { k: usize },
42 LeaveOneOut,
44 TimeSeriesSplit { n_splits: usize },
46 StratifiedKFold { k: usize },
48}
49
50impl Default for CrossValidationStrategy {
51 fn default() -> Self {
52 CrossValidationStrategy::KFold { k: 5 }
53 }
54}
55
56#[derive(Debug, Clone)]
58pub struct RegularizationPathConfig {
59 pub path_type: RegularizationPathType,
61 pub n_lambdas: usize,
63 pub lambda_min_ratio: Float,
65 pub lambdas: Option<Array1<Float>>,
67 pub tol: Float,
69 pub max_iter: usize,
71 pub fit_intercept: bool,
73 pub cv_strategy: CrossValidationStrategy,
75 pub standardize: bool,
77 pub early_stopping: bool,
79 pub min_improvement: Float,
81 pub verbose: bool,
83}
84
85impl Default for RegularizationPathConfig {
86 fn default() -> Self {
87 Self {
88 path_type: RegularizationPathType::default(),
89 n_lambdas: 100,
90 lambda_min_ratio: 1e-4,
91 lambdas: None,
92 tol: 1e-4,
93 max_iter: 1000,
94 fit_intercept: true,
95 cv_strategy: CrossValidationStrategy::default(),
96 standardize: true,
97 early_stopping: true,
98 min_improvement: 1e-6,
99 verbose: false,
100 }
101 }
102}
103
104#[derive(Debug, Clone)]
106pub struct RegularizationPathResult {
107 pub lambdas: Array1<Float>,
109 pub coef_path: Array2<Float>,
111 pub intercept_path: Array1<Float>,
113 pub cv_scores: Array1<Float>,
115 pub cv_scores_std: Array1<Float>,
117 pub n_nonzero: Array1<usize>,
119 pub active_features: Vec<Vec<usize>>,
121 pub best_lambda: Float,
123 pub best_lambda_idx: usize,
125 pub lambda_1se: Float,
127 pub lambda_1se_idx: usize,
129}
130
131#[derive(Debug)]
133pub struct RegularizationPathSolver {
134 config: RegularizationPathConfig,
135}
136
137impl Default for RegularizationPathSolver {
138 fn default() -> Self {
139 Self::new(RegularizationPathConfig::default())
140 }
141}
142
143impl RegularizationPathSolver {
144 pub fn new(config: RegularizationPathConfig) -> Self {
146 Self { config }
147 }
148
149 pub fn fit_path(
151 &self,
152 x: &Array2<Float>,
153 y: &Array1<Float>,
154 ) -> Result<RegularizationPathResult> {
155 let n_samples = x.nrows();
156 let n_features = x.ncols();
157
158 if n_samples != y.len() {
159 return Err(SklearsError::InvalidInput(
160 "Shape mismatch: X and y must have the same number of samples".to_string(),
161 ));
162 }
163
164 let (x_processed, _feature_means, feature_stds) = if self.config.standardize {
166 self.standardize_features(x)?
167 } else {
168 (
169 x.clone(),
170 Array1::zeros(n_features),
171 Array1::ones(n_features),
172 )
173 };
174
175 let y_mean = if self.config.fit_intercept {
177 y.mean().unwrap_or(0.0)
178 } else {
179 0.0
180 };
181 let y_centered = y.mapv(|val| val - y_mean);
182
183 let mut lambdas = if let Some(custom_lambdas) = &self.config.lambdas {
185 custom_lambdas.clone()
186 } else {
187 self.compute_lambda_sequence(&x_processed, &y_centered)?
188 };
189
190 let n_lambdas = lambdas.len();
191
192 let mut coef_path = Array2::zeros((n_lambdas, n_features));
194 let mut intercept_path = Array1::zeros(n_lambdas);
195 let mut cv_scores = Array1::zeros(n_lambdas);
196 let mut cv_scores_std = Array1::zeros(n_lambdas);
197 let mut n_nonzero = Array1::zeros(n_lambdas);
198 let mut active_features = Vec::with_capacity(n_lambdas);
199
200 let mut coef = Array1::zeros(n_features);
202
203 for (i, &lambda) in lambdas.iter().enumerate() {
205 if self.config.verbose && i % 10 == 0 {
206 println!(
207 "Computing path for lambda {}/{}: {:.6}",
208 i + 1,
209 n_lambdas,
210 lambda
211 );
212 }
213
214 let (new_coef, intercept) =
216 self.solve_for_lambda(&x_processed, &y_centered, lambda, &coef, y_mean)?;
217
218 coef = new_coef.clone();
219
220 coef_path.row_mut(i).assign(&new_coef);
222 intercept_path[i] = intercept;
223
224 let nonzero_count = new_coef
226 .iter()
227 .filter(|&&x| x.abs() > self.config.tol)
228 .count();
229 n_nonzero[i] = nonzero_count;
230
231 let active: Vec<usize> = new_coef
233 .iter()
234 .enumerate()
235 .filter(|(_, &x)| x.abs() > self.config.tol)
236 .map(|(idx, _)| idx)
237 .collect();
238 active_features.push(active);
239
240 let (cv_score, cv_std) =
242 self.cross_validate_lambda(&x_processed, &y_centered, lambda, y_mean)?;
243 cv_scores[i] = cv_score;
244 cv_scores_std[i] = cv_std;
245
246 if self.config.early_stopping && i > 10 {
248 let recent_improvement = if i >= 5 {
249 let recent_avg = cv_scores.slice(s![i - 4..=i]).mean().unwrap();
250 let prev_avg = cv_scores.slice(s![i - 9..=i - 5]).mean().unwrap();
251 recent_avg - prev_avg
252 } else {
253 Float::INFINITY
254 };
255
256 if recent_improvement.abs() < self.config.min_improvement {
257 if self.config.verbose {
258 println!("Early stopping at lambda index {i}");
259 }
260 let actual_n_lambdas = i + 1;
262 lambdas = lambdas.slice(s![..actual_n_lambdas]).to_owned();
263 coef_path = coef_path.slice(s![..actual_n_lambdas, ..]).to_owned();
264 intercept_path = intercept_path.slice(s![..actual_n_lambdas]).to_owned();
265 cv_scores = cv_scores.slice(s![..actual_n_lambdas]).to_owned();
266 cv_scores_std = cv_scores_std.slice(s![..actual_n_lambdas]).to_owned();
267 n_nonzero = n_nonzero.slice(s![..actual_n_lambdas]).to_owned();
268 active_features.truncate(actual_n_lambdas);
269 break;
270 }
271 }
272 }
273
274 if self.config.standardize {
276 for i in 0..coef_path.nrows() {
277 for j in 0..n_features {
278 if feature_stds[j] > 1e-10 {
279 coef_path[[i, j]] /= feature_stds[j];
280 }
281 }
282 }
283 }
284
285 let best_lambda_idx = cv_scores
287 .iter()
288 .enumerate()
289 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
290 .map(|(idx, _)| idx)
291 .unwrap_or(0);
292 let best_lambda = lambdas[best_lambda_idx];
293
294 let best_score = cv_scores[best_lambda_idx];
296 let best_std = cv_scores_std[best_lambda_idx];
297 let threshold = best_score + best_std;
298
299 let lambda_1se_idx = (0..lambdas.len())
300 .find(|&i| cv_scores[i] <= threshold && lambdas[i] >= best_lambda)
301 .unwrap_or(best_lambda_idx);
302 let lambda_1se = lambdas[lambda_1se_idx];
303
304 Ok(RegularizationPathResult {
305 lambdas,
306 coef_path,
307 intercept_path,
308 cv_scores,
309 cv_scores_std,
310 n_nonzero,
311 active_features,
312 best_lambda,
313 best_lambda_idx,
314 lambda_1se,
315 lambda_1se_idx,
316 })
317 }
318
319 fn standardize_features(
321 &self,
322 x: &Array2<Float>,
323 ) -> Result<(Array2<Float>, Array1<Float>, Array1<Float>)> {
324 let n_features = x.ncols();
325 let mut means = Array1::zeros(n_features);
326 let mut stds = Array1::ones(n_features);
327
328 for j in 0..n_features {
330 means[j] = x.column(j).mean().unwrap_or(0.0);
331 }
332
333 for j in 0..n_features {
335 let variance = x
336 .column(j)
337 .iter()
338 .map(|&val| (val - means[j]).powi(2))
339 .sum::<Float>()
340 / (x.nrows() - 1) as Float;
341 stds[j] = variance.sqrt().max(1e-10);
342 }
343
344 let mut x_std = x.clone();
346 for i in 0..x.nrows() {
347 for j in 0..n_features {
348 x_std[[i, j]] = (x_std[[i, j]] - means[j]) / stds[j];
349 }
350 }
351
352 Ok((x_std, means, stds))
353 }
354
355 fn compute_lambda_sequence(
357 &self,
358 x: &Array2<Float>,
359 y: &Array1<Float>,
360 ) -> Result<Array1<Float>> {
361 let lambda_max = match &self.config.path_type {
363 RegularizationPathType::Lasso | RegularizationPathType::AdaptiveLasso { .. } => {
364 self.compute_lasso_lambda_max(x, y)?
365 }
366 RegularizationPathType::ElasticNet { l1_ratio } => {
367 self.compute_lasso_lambda_max(x, y)? / l1_ratio
368 }
369 RegularizationPathType::GroupLasso { .. } => {
370 self.compute_group_lasso_lambda_max(x, y)?
371 }
372 RegularizationPathType::FusedLasso => self.compute_fused_lasso_lambda_max(x, y)?,
373 };
374
375 let lambda_min = lambda_max * self.config.lambda_min_ratio;
376
377 let mut lambdas = Array1::zeros(self.config.n_lambdas);
379 let log_max = lambda_max.ln();
380 let log_min = lambda_min.ln();
381 let step = (log_max - log_min) / (self.config.n_lambdas - 1) as Float;
382
383 for i in 0..self.config.n_lambdas {
384 lambdas[i] = (log_max - i as Float * step).exp();
385 }
386
387 Ok(lambdas)
388 }
389
390 fn compute_lasso_lambda_max(&self, x: &Array2<Float>, y: &Array1<Float>) -> Result<Float> {
392 let mut max_correlation: Float = 0.0;
393
394 for j in 0..x.ncols() {
395 let correlation = x
396 .column(j)
397 .iter()
398 .zip(y.iter())
399 .map(|(&xi, &yi)| xi * yi)
400 .sum::<Float>()
401 .abs()
402 / x.nrows() as Float;
403
404 max_correlation = max_correlation.max(correlation);
405 }
406
407 Ok(max_correlation)
408 }
409
410 fn compute_group_lasso_lambda_max(
412 &self,
413 x: &Array2<Float>,
414 y: &Array1<Float>,
415 ) -> Result<Float> {
416 if let RegularizationPathType::GroupLasso { groups } = &self.config.path_type {
417 let mut max_group_norm: Float = 0.0;
418
419 for group in groups {
420 let mut group_norm = 0.0;
421 for &feature_idx in group {
422 if feature_idx < x.ncols() {
423 let correlation = x
424 .column(feature_idx)
425 .iter()
426 .zip(y.iter())
427 .map(|(&xi, &yi)| xi * yi)
428 .sum::<Float>()
429 / x.nrows() as Float;
430 group_norm += correlation * correlation;
431 }
432 }
433 group_norm = group_norm.sqrt();
434 max_group_norm = max_group_norm.max(group_norm);
435 }
436
437 Ok(max_group_norm)
438 } else {
439 Err(SklearsError::InvalidInput(
440 "Invalid path type for Group Lasso lambda_max computation".to_string(),
441 ))
442 }
443 }
444
445 fn compute_fused_lasso_lambda_max(
447 &self,
448 x: &Array2<Float>,
449 y: &Array1<Float>,
450 ) -> Result<Float> {
451 let lasso_max = self.compute_lasso_lambda_max(x, y)?;
452
453 let mut max_diff_correlation: Float = 0.0;
455 for j in 0..(x.ncols() - 1) {
456 let diff_feature: Array1<Float> = x
457 .rows()
458 .into_iter()
459 .map(|row| row[j + 1] - row[j])
460 .collect();
461
462 let correlation = diff_feature
463 .iter()
464 .zip(y.iter())
465 .map(|(&xi, &yi)| xi * yi)
466 .sum::<Float>()
467 .abs()
468 / x.nrows() as Float;
469
470 max_diff_correlation = max_diff_correlation.max(correlation);
471 }
472
473 Ok(lasso_max.max(max_diff_correlation))
474 }
475
476 fn solve_for_lambda(
478 &self,
479 x: &Array2<Float>,
480 y: &Array1<Float>,
481 lambda: Float,
482 initial_coef: &Array1<Float>,
483 y_mean: Float,
484 ) -> Result<(Array1<Float>, Float)> {
485 match &self.config.path_type {
486 RegularizationPathType::Lasso => self.solve_lasso(x, y, lambda, initial_coef, y_mean),
487 RegularizationPathType::ElasticNet { l1_ratio } => {
488 self.solve_elastic_net(x, y, lambda, *l1_ratio, initial_coef, y_mean)
489 }
490 RegularizationPathType::GroupLasso { groups } => {
491 self.solve_group_lasso(x, y, lambda, groups, initial_coef, y_mean)
492 }
493 RegularizationPathType::AdaptiveLasso { weights } => {
494 self.solve_adaptive_lasso(x, y, lambda, weights, initial_coef, y_mean)
495 }
496 RegularizationPathType::FusedLasso => {
497 self.solve_fused_lasso(x, y, lambda, initial_coef, y_mean)
498 }
499 }
500 }
501
502 fn solve_lasso(
504 &self,
505 x: &Array2<Float>,
506 y: &Array1<Float>,
507 lambda: Float,
508 initial_coef: &Array1<Float>,
509 y_mean: Float,
510 ) -> Result<(Array1<Float>, Float)> {
511 let n_samples = x.nrows();
512 let n_features = x.ncols();
513 let mut coef = initial_coef.clone();
514 let mut intercept = y_mean;
515
516 let mut xtx_diag = Array1::zeros(n_features);
518 for j in 0..n_features {
519 xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>();
520 }
521
522 for _ in 0..self.config.max_iter {
524 let mut converged = true;
525
526 for j in 0..n_features {
527 let old_coef_j = coef[j];
528
529 let mut residual_sum = 0.0;
531 for i in 0..n_samples {
532 let mut prediction = intercept;
533 for k in 0..n_features {
534 if k != j {
535 prediction += coef[k] * x[[i, k]];
536 }
537 }
538 residual_sum += x[[i, j]] * (y[i] - prediction);
539 }
540
541 let threshold = lambda * n_samples as Float;
543 if residual_sum > threshold {
544 coef[j] = (residual_sum - threshold) / xtx_diag[j];
545 } else if residual_sum < -threshold {
546 coef[j] = (residual_sum + threshold) / xtx_diag[j];
547 } else {
548 coef[j] = 0.0;
549 }
550
551 if (coef[j] - old_coef_j).abs() > self.config.tol {
552 converged = false;
553 }
554 }
555
556 if self.config.fit_intercept {
558 let mut residual_sum = 0.0;
559 for i in 0..n_samples {
560 let mut prediction = 0.0;
561 for k in 0..n_features {
562 prediction += coef[k] * x[[i, k]];
563 }
564 residual_sum += y[i] - prediction;
565 }
566 intercept = residual_sum / n_samples as Float;
567 }
568
569 if converged {
570 break;
571 }
572 }
573
574 Ok((coef, intercept))
575 }
576
577 fn solve_elastic_net(
579 &self,
580 x: &Array2<Float>,
581 y: &Array1<Float>,
582 lambda: Float,
583 l1_ratio: Float,
584 initial_coef: &Array1<Float>,
585 y_mean: Float,
586 ) -> Result<(Array1<Float>, Float)> {
587 let n_samples = x.nrows();
588 let n_features = x.ncols();
589 let mut coef = initial_coef.clone();
590 let mut intercept = y_mean;
591
592 let l1_penalty = lambda * l1_ratio;
593 let l2_penalty = lambda * (1.0 - l1_ratio);
594
595 let mut xtx_diag = Array1::zeros(n_features);
597 for j in 0..n_features {
598 xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>()
599 + l2_penalty * n_samples as Float;
600 }
601
602 for _ in 0..self.config.max_iter {
604 let mut converged = true;
605
606 for j in 0..n_features {
607 let old_coef_j = coef[j];
608
609 let mut residual_sum = 0.0;
611 for i in 0..n_samples {
612 let mut prediction = intercept;
613 for k in 0..n_features {
614 if k != j {
615 prediction += coef[k] * x[[i, k]];
616 }
617 }
618 residual_sum += x[[i, j]] * (y[i] - prediction);
619 }
620
621 let threshold = l1_penalty * n_samples as Float;
623 if residual_sum > threshold {
624 coef[j] = (residual_sum - threshold) / xtx_diag[j];
625 } else if residual_sum < -threshold {
626 coef[j] = (residual_sum + threshold) / xtx_diag[j];
627 } else {
628 coef[j] = 0.0;
629 }
630
631 if (coef[j] - old_coef_j).abs() > self.config.tol {
632 converged = false;
633 }
634 }
635
636 if self.config.fit_intercept {
638 let mut residual_sum = 0.0;
639 for i in 0..n_samples {
640 let mut prediction = 0.0;
641 for k in 0..n_features {
642 prediction += coef[k] * x[[i, k]];
643 }
644 residual_sum += y[i] - prediction;
645 }
646 intercept = residual_sum / n_samples as Float;
647 }
648
649 if converged {
650 break;
651 }
652 }
653
654 Ok((coef, intercept))
655 }
656
657 fn solve_group_lasso(
659 &self,
660 x: &Array2<Float>,
661 y: &Array1<Float>,
662 lambda: Float,
663 groups: &[Vec<usize>],
664 initial_coef: &Array1<Float>,
665 y_mean: Float,
666 ) -> Result<(Array1<Float>, Float)> {
667 let n_samples = x.nrows();
668 let n_features = x.ncols();
669 let mut coef = initial_coef.clone();
670 let intercept = y_mean;
671
672 for _ in 0..self.config.max_iter {
674 let mut converged = true;
675
676 for group in groups {
677 let group_size = group.len();
678 let mut old_group_coef = Array1::zeros(group_size);
679 for (idx, &feature_idx) in group.iter().enumerate() {
680 if feature_idx < n_features {
681 old_group_coef[idx] = coef[feature_idx];
682 }
683 }
684
685 let mut group_gradient = Array1::zeros(group_size);
687 for i in 0..n_samples {
688 let mut prediction = intercept;
689 for k in 0..n_features {
690 prediction += coef[k] * x[[i, k]];
691 }
692 let residual = y[i] - prediction;
693
694 for (idx, &feature_idx) in group.iter().enumerate() {
695 if feature_idx < n_features {
696 group_gradient[idx] +=
697 x[[i, feature_idx]] * residual / n_samples as Float;
698 }
699 }
700 }
701
702 let group_norm = group_gradient
704 .iter()
705 .map(|&x: &Float| x * x)
706 .sum::<Float>()
707 .sqrt();
708 if group_norm > lambda {
709 let shrinkage_factor = (1.0 - lambda / group_norm).max(0.0);
710 for (idx, &feature_idx) in group.iter().enumerate() {
711 if feature_idx < n_features {
712 coef[feature_idx] = group_gradient[idx] * shrinkage_factor;
713 if (coef[feature_idx] - old_group_coef[idx]).abs() > self.config.tol {
714 converged = false;
715 }
716 }
717 }
718 } else {
719 for &feature_idx in group {
721 if feature_idx < n_features {
722 if coef[feature_idx].abs() > self.config.tol {
723 converged = false;
724 }
725 coef[feature_idx] = 0.0;
726 }
727 }
728 }
729 }
730
731 if converged {
732 break;
733 }
734 }
735
736 Ok((coef, intercept))
737 }
738
739 fn solve_adaptive_lasso(
741 &self,
742 x: &Array2<Float>,
743 y: &Array1<Float>,
744 lambda: Float,
745 weights: &Array1<Float>,
746 initial_coef: &Array1<Float>,
747 y_mean: Float,
748 ) -> Result<(Array1<Float>, Float)> {
749 let n_samples = x.nrows();
750 let n_features = x.ncols();
751 let mut coef = initial_coef.clone();
752 let intercept = y_mean;
753
754 let mut xtx_diag = Array1::zeros(n_features);
756 for j in 0..n_features {
757 xtx_diag[j] = x.column(j).iter().map(|&val| val * val).sum::<Float>();
758 }
759
760 for _ in 0..self.config.max_iter {
762 let mut converged = true;
763
764 for j in 0..n_features {
765 let old_coef_j = coef[j];
766
767 let mut residual_sum = 0.0;
769 for i in 0..n_samples {
770 let mut prediction = intercept;
771 for k in 0..n_features {
772 if k != j {
773 prediction += coef[k] * x[[i, k]];
774 }
775 }
776 residual_sum += x[[i, j]] * (y[i] - prediction);
777 }
778
779 let adaptive_threshold = lambda * weights[j] * n_samples as Float;
781 if residual_sum > adaptive_threshold {
782 coef[j] = (residual_sum - adaptive_threshold) / xtx_diag[j];
783 } else if residual_sum < -adaptive_threshold {
784 coef[j] = (residual_sum + adaptive_threshold) / xtx_diag[j];
785 } else {
786 coef[j] = 0.0;
787 }
788
789 if (coef[j] - old_coef_j).abs() > self.config.tol {
790 converged = false;
791 }
792 }
793
794 if converged {
795 break;
796 }
797 }
798
799 Ok((coef, intercept))
800 }
801
802 fn solve_fused_lasso(
804 &self,
805 x: &Array2<Float>,
806 y: &Array1<Float>,
807 lambda: Float,
808 initial_coef: &Array1<Float>,
809 y_mean: Float,
810 ) -> Result<(Array1<Float>, Float)> {
811 self.solve_lasso(x, y, lambda, initial_coef, y_mean)
814 }
815
816 fn cross_validate_lambda(
818 &self,
819 x: &Array2<Float>,
820 y: &Array1<Float>,
821 lambda: Float,
822 y_mean: Float,
823 ) -> Result<(Float, Float)> {
824 let n_samples = x.nrows();
825 let mut cv_scores = Vec::new();
826
827 match &self.config.cv_strategy {
828 CrossValidationStrategy::KFold { k } => {
829 let fold_size = n_samples / k;
830
831 for fold in 0..*k {
832 let test_start = fold * fold_size;
833 let test_end = if fold == k - 1 {
834 n_samples
835 } else {
836 (fold + 1) * fold_size
837 };
838
839 let mut train_indices = Vec::new();
841 let mut test_indices = Vec::new();
842
843 for i in 0..n_samples {
844 if i >= test_start && i < test_end {
845 test_indices.push(i);
846 } else {
847 train_indices.push(i);
848 }
849 }
850
851 if train_indices.is_empty() || test_indices.is_empty() {
852 continue;
853 }
854
855 let x_train = self.extract_rows(x, &train_indices)?;
857 let y_train = self.extract_elements(y, &train_indices)?;
858 let x_test = self.extract_rows(x, &test_indices)?;
859 let y_test = self.extract_elements(y, &test_indices)?;
860
861 let zero_coef = Array1::zeros(x.ncols());
863 let (coef_fold, intercept_fold) =
864 self.solve_for_lambda(&x_train, &y_train, lambda, &zero_coef, y_mean)?;
865
866 let mut test_error = 0.0;
868 for i in 0..x_test.nrows() {
869 let mut prediction = if self.config.fit_intercept {
870 intercept_fold
871 } else {
872 0.0
873 };
874
875 for j in 0..x_test.ncols() {
876 prediction += coef_fold[j] * x_test[[i, j]];
877 }
878
879 test_error += (y_test[i] - prediction).powi(2);
880 }
881 test_error /= x_test.nrows() as Float;
882 cv_scores.push(test_error);
883 }
884 }
885 _ => {
886 let n_test = n_samples / 5;
889 let n_train = n_samples - n_test;
890
891 let x_train = x.slice(s![..n_train, ..]).to_owned();
892 let y_train = y.slice(s![..n_train]).to_owned();
893 let x_test = x.slice(s![n_train.., ..]).to_owned();
894 let y_test = y.slice(s![n_train..]).to_owned();
895
896 let zero_coef = Array1::zeros(x.ncols());
897 let (coef_fold, intercept_fold) =
898 self.solve_for_lambda(&x_train, &y_train, lambda, &zero_coef, y_mean)?;
899
900 let mut test_error = 0.0;
901 for i in 0..x_test.nrows() {
902 let mut prediction = if self.config.fit_intercept {
903 intercept_fold
904 } else {
905 0.0
906 };
907
908 for j in 0..x_test.ncols() {
909 prediction += coef_fold[j] * x_test[[i, j]];
910 }
911
912 test_error += (y_test[i] - prediction).powi(2);
913 }
914 test_error /= x_test.nrows() as Float;
915 cv_scores.push(test_error);
916 }
917 }
918
919 if cv_scores.is_empty() {
920 return Ok((Float::INFINITY, 0.0));
921 }
922
923 let mean_score = cv_scores.iter().sum::<Float>() / cv_scores.len() as Float;
924 let variance = cv_scores
925 .iter()
926 .map(|&score| (score - mean_score).powi(2))
927 .sum::<Float>()
928 / cv_scores.len() as Float;
929 let std_score = variance.sqrt();
930
931 Ok((mean_score, std_score))
932 }
933
934 fn extract_rows(&self, matrix: &Array2<Float>, indices: &[usize]) -> Result<Array2<Float>> {
936 let n_features = matrix.ncols();
937 let mut result = Array2::zeros((indices.len(), n_features));
938
939 for (i, &idx) in indices.iter().enumerate() {
940 if idx < matrix.nrows() {
941 result.row_mut(i).assign(&matrix.row(idx));
942 }
943 }
944
945 Ok(result)
946 }
947
948 fn extract_elements(&self, array: &Array1<Float>, indices: &[usize]) -> Result<Array1<Float>> {
950 let mut result = Array1::zeros(indices.len());
951
952 for (i, &idx) in indices.iter().enumerate() {
953 if idx < array.len() {
954 result[i] = array[idx];
955 }
956 }
957
958 Ok(result)
959 }
960}
961
962impl RegularizationPathResult {
963 pub fn coef_at_lambda(&self, lambda: Float) -> Option<ArrayView1<'_, Float>> {
965 let idx = self
966 .lambdas
967 .iter()
968 .position(|&l| (l - lambda).abs() < 1e-10)?;
969 Some(self.coef_path.row(idx))
970 }
971
972 pub fn feature_path(&self, feature_idx: usize) -> Option<ArrayView1<'_, Float>> {
974 if feature_idx < self.coef_path.ncols() {
975 Some(self.coef_path.column(feature_idx))
976 } else {
977 None
978 }
979 }
980
981 pub fn sparse_model_1se(&self) -> (Float, ArrayView1<'_, Float>) {
983 let lambda = self.lambda_1se;
984 let coef = self.coef_path.row(self.lambda_1se_idx);
985 (lambda, coef)
986 }
987
988 pub fn summary(&self) -> HashMap<String, Float> {
990 let mut summary = HashMap::new();
991
992 summary.insert("n_lambdas".to_string(), self.lambdas.len() as Float);
993 summary.insert("best_lambda".to_string(), self.best_lambda);
994 summary.insert("lambda_1se".to_string(), self.lambda_1se);
995 summary.insert(
996 "best_cv_score".to_string(),
997 self.cv_scores[self.best_lambda_idx],
998 );
999 summary.insert(
1000 "min_nonzero_features".to_string(),
1001 *self.n_nonzero.iter().min().unwrap_or(&0) as Float,
1002 );
1003 summary.insert(
1004 "max_nonzero_features".to_string(),
1005 *self.n_nonzero.iter().max().unwrap_or(&0) as Float,
1006 );
1007
1008 summary
1009 }
1010}
1011
1012#[allow(non_snake_case)]
1013#[cfg(test)]
1014mod tests {
1015 use super::*;
1016 use scirs2_core::ndarray::array;
1017
1018 #[test]
1019 fn test_regularization_path_config() {
1020 let config = RegularizationPathConfig {
1021 path_type: RegularizationPathType::Lasso,
1022 n_lambdas: 50,
1023 lambda_min_ratio: 1e-3,
1024 ..Default::default()
1025 };
1026
1027 assert_eq!(config.n_lambdas, 50);
1028 assert_eq!(config.lambda_min_ratio, 1e-3);
1029 assert!(matches!(config.path_type, RegularizationPathType::Lasso));
1030 }
1031
1032 #[test]
1033 fn test_lambda_sequence_computation() {
1034 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
1035 let y = array![1.0, 2.0, 3.0];
1036
1037 let config = RegularizationPathConfig::default();
1038 let solver = RegularizationPathSolver::new(config);
1039
1040 let lambdas = solver.compute_lambda_sequence(&x, &y).unwrap();
1041
1042 assert_eq!(lambdas.len(), 100);
1043 assert!(lambdas[0] > lambdas[lambdas.len() - 1]); for i in 1..lambdas.len() {
1046 assert!(lambdas[i - 1] >= lambdas[i]); }
1048 }
1049
1050 #[test]
1051 #[ignore = "Slow test: computes regularization path. Run with --ignored flag"]
1052 fn test_lasso_path() {
1053 let x = array![
1054 [1.0, 2.0, 0.1],
1055 [2.0, 3.0, 0.2],
1056 [3.0, 4.0, 0.3],
1057 [4.0, 5.0, 0.4],
1058 [5.0, 6.0, 0.5],
1059 ];
1060 let y = array![1.0, 2.0, 3.0, 4.0, 5.0];
1061
1062 let config = RegularizationPathConfig {
1063 path_type: RegularizationPathType::Lasso,
1064 n_lambdas: 20,
1065 max_iter: 100,
1066 verbose: false,
1067 ..Default::default()
1068 };
1069
1070 let solver = RegularizationPathSolver::new(config);
1071 let result = solver.fit_path(&x, &y).unwrap();
1072
1073 assert_eq!(result.lambdas.len(), 20);
1074 assert_eq!(result.coef_path.nrows(), 20);
1075 assert_eq!(result.coef_path.ncols(), 3);
1076 assert_eq!(result.intercept_path.len(), 20);
1077 assert_eq!(result.cv_scores.len(), 20);
1078
1079 assert!(result.n_nonzero[0] >= result.n_nonzero[result.n_nonzero.len() - 1]);
1081
1082 assert!(result.best_lambda_idx < result.lambdas.len());
1084 assert!(result.lambda_1se_idx < result.lambdas.len());
1085
1086 let best_coef = result.coef_path.row(result.best_lambda_idx);
1088 assert_eq!(best_coef.len(), 3);
1089
1090 let summary = result.summary();
1092 assert!(summary.contains_key("best_lambda"));
1093 assert!(summary.contains_key("lambda_1se"));
1094 }
1095
1096 #[test]
1097 fn test_elastic_net_path_type() {
1098 let path_type = RegularizationPathType::ElasticNet { l1_ratio: 0.5 };
1099
1100 if let RegularizationPathType::ElasticNet { l1_ratio } = path_type {
1101 assert_eq!(l1_ratio, 0.5);
1102 } else {
1103 panic!("Expected ElasticNet path type");
1104 }
1105 }
1106
1107 #[test]
1108 fn test_group_lasso_path_type() {
1109 let groups = vec![vec![0, 1], vec![2, 3], vec![4]];
1110 let path_type = RegularizationPathType::GroupLasso {
1111 groups: groups.clone(),
1112 };
1113
1114 if let RegularizationPathType::GroupLasso { groups: g } = path_type {
1115 assert_eq!(g.len(), 3);
1116 assert_eq!(g[0], vec![0, 1]);
1117 } else {
1118 panic!("Expected GroupLasso path type");
1119 }
1120 }
1121
1122 #[test]
1123 fn test_standardization() {
1124 let x = array![[1.0, 10.0], [2.0, 20.0], [3.0, 30.0]];
1125 let config = RegularizationPathConfig::default();
1126 let solver = RegularizationPathSolver::new(config);
1127
1128 let (x_std, means, _stds) = solver.standardize_features(&x).unwrap();
1129
1130 for j in 0..x_std.ncols() {
1132 let col_mean = x_std.column(j).mean().unwrap();
1133 assert!((col_mean).abs() < 1e-10);
1134 }
1135
1136 assert!((means[0] - 2.0).abs() < 1e-10);
1138 assert!((means[1] - 20.0).abs() < 1e-10);
1139 }
1140}