1use crate::base::SelectorMixin;
4use scirs2_core::ndarray::{Array1, Array2};
5use sklears_core::{
8 error::{validate, Result as SklResult, SklearsError},
9 traits::{Estimator, Fit, Trained, Transform, Untrained},
10 types::Float,
11};
12use std::marker::PhantomData;
13
14#[derive(Debug, Clone)]
17pub struct LassoSelector<State = Untrained> {
18 alpha: f64,
19 max_iter: usize,
20 tolerance: f64,
21 threshold: Option<f64>,
22 state: PhantomData<State>,
23 coefficients_: Option<Array1<Float>>,
25 selected_features_: Option<Vec<usize>>,
26 n_features_: Option<usize>,
27}
28
29impl LassoSelector<Untrained> {
30 pub fn new() -> Self {
32 Self {
33 alpha: 1.0,
34 max_iter: 1000,
35 tolerance: 1e-4,
36 threshold: None,
37 state: PhantomData,
38 coefficients_: None,
39 selected_features_: None,
40 n_features_: None,
41 }
42 }
43
44 pub fn alpha(mut self, alpha: f64) -> Self {
46 if alpha < 0.0 {
47 panic!("alpha must be non-negative");
48 }
49 self.alpha = alpha;
50 self
51 }
52
53 pub fn max_iter(mut self, max_iter: usize) -> Self {
55 self.max_iter = max_iter;
56 self
57 }
58
59 pub fn tolerance(mut self, tolerance: f64) -> Self {
61 self.tolerance = tolerance;
62 self
63 }
64
65 pub fn threshold(mut self, threshold: Option<f64>) -> Self {
67 self.threshold = threshold;
68 self
69 }
70}
71
72impl Default for LassoSelector<Untrained> {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl Estimator for LassoSelector<Untrained> {
79 type Config = ();
80 type Error = SklearsError;
81 type Float = f64;
82
83 fn config(&self) -> &Self::Config {
84 &()
85 }
86}
87
88impl Fit<Array2<Float>, Array1<Float>> for LassoSelector<Untrained> {
89 type Fitted = LassoSelector<Trained>;
90
91 fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
92 validate::check_consistent_length(x, y)?;
93
94 let (n_samples, n_features) = x.dim();
95 if n_samples < 2 {
96 return Err(SklearsError::InvalidInput(
97 "LASSO requires at least 2 samples".to_string(),
98 ));
99 }
100
101 let (x_normalized, _x_mean, x_std) = normalize_and_center(x)?;
103 let y_mean = y.mean().unwrap_or(0.0);
104 let y_centered: Array1<Float> = y - y_mean;
105
106 let mut coefficients = Array1::zeros(n_features);
108
109 for _iter in 0..self.max_iter {
110 let old_coefficients = coefficients.clone();
111
112 for j in 0..n_features {
113 if x_std[j] <= Float::EPSILON {
114 continue; }
116
117 let mut residual = y_centered.clone();
119 for k in 0..n_features {
120 if k != j {
121 let x_k = x_normalized.column(k);
122 for i in 0..n_samples {
123 residual[i] -= coefficients[k] * x_k[i];
124 }
125 }
126 }
127
128 let x_j = x_normalized.column(j);
130 let rho = x_j.dot(&residual) / n_samples as Float;
131
132 coefficients[j] = soft_threshold(rho, self.alpha);
134 }
135
136 let diff = (&coefficients - &old_coefficients).mapv(|x| x.abs()).sum();
138 if diff < self.tolerance {
139 break;
140 }
141 }
142
143 for j in 0..n_features {
145 if x_std[j] > Float::EPSILON {
146 coefficients[j] /= x_std[j];
147 }
148 }
149
150 let threshold = self.threshold.unwrap_or_else(|| {
152 let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
153 abs_coeffs.mean().unwrap_or(0.0)
154 });
155
156 let selected_features: Vec<usize> = coefficients
157 .iter()
158 .enumerate()
159 .filter(|(_, &coeff)| coeff.abs() > threshold)
160 .map(|(idx, _)| idx)
161 .collect();
162
163 if selected_features.is_empty() {
164 return Err(SklearsError::InvalidInput(
165 "No features selected by LASSO. Try reducing alpha or threshold.".to_string(),
166 ));
167 }
168
169 Ok(LassoSelector {
170 alpha: self.alpha,
171 max_iter: self.max_iter,
172 tolerance: self.tolerance,
173 threshold: self.threshold,
174 state: PhantomData,
175 coefficients_: Some(coefficients),
176 selected_features_: Some(selected_features),
177 n_features_: Some(n_features),
178 })
179 }
180}
181
182impl Transform<Array2<Float>> for LassoSelector<Trained> {
183 fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
184 validate::check_n_features(x, self.n_features_.unwrap())?;
185
186 let selected_features = self.selected_features_.as_ref().unwrap();
187 let n_samples = x.nrows();
188 let k = selected_features.len();
189 let mut x_new = Array2::zeros((n_samples, k));
190
191 for (new_idx, &old_idx) in selected_features.iter().enumerate() {
192 x_new.column_mut(new_idx).assign(&x.column(old_idx));
193 }
194
195 Ok(x_new)
196 }
197}
198
199impl SelectorMixin for LassoSelector<Trained> {
200 fn get_support(&self) -> SklResult<Array1<bool>> {
201 let n_features = self.n_features_.unwrap();
202 let selected_features = self.selected_features_.as_ref().unwrap();
203 let mut support = Array1::from_elem(n_features, false);
204
205 for &idx in selected_features {
206 support[idx] = true;
207 }
208
209 Ok(support)
210 }
211
212 fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
213 let selected_features = self.selected_features_.as_ref().unwrap();
214 Ok(indices
215 .iter()
216 .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
217 .collect())
218 }
219}
220
221impl LassoSelector<Trained> {
222 pub fn coefficients(&self) -> &Array1<Float> {
224 self.coefficients_.as_ref().unwrap()
225 }
226
227 pub fn selected_features(&self) -> &[usize] {
229 self.selected_features_.as_ref().unwrap()
230 }
231}
232
233#[derive(Debug, Clone)]
236pub struct ElasticNetSelector<State = Untrained> {
237 alpha: f64,
238 l1_ratio: f64,
239 max_iter: usize,
240 tolerance: f64,
241 threshold: Option<f64>,
242 state: PhantomData<State>,
243 coefficients_: Option<Array1<Float>>,
245 selected_features_: Option<Vec<usize>>,
246 n_features_: Option<usize>,
247}
248
249impl ElasticNetSelector<Untrained> {
250 pub fn new() -> Self {
252 Self {
253 alpha: 1.0,
254 l1_ratio: 0.5,
255 max_iter: 1000,
256 tolerance: 1e-4,
257 threshold: None,
258 state: PhantomData,
259 coefficients_: None,
260 selected_features_: None,
261 n_features_: None,
262 }
263 }
264
265 pub fn alpha(mut self, alpha: f64) -> Self {
267 if alpha < 0.0 {
268 panic!("alpha must be non-negative");
269 }
270 self.alpha = alpha;
271 self
272 }
273
274 pub fn l1_ratio(mut self, l1_ratio: f64) -> Self {
276 if !(0.0..=1.0).contains(&l1_ratio) {
277 panic!("l1_ratio must be between 0 and 1");
278 }
279 self.l1_ratio = l1_ratio;
280 self
281 }
282
283 pub fn max_iter(mut self, max_iter: usize) -> Self {
285 self.max_iter = max_iter;
286 self
287 }
288
289 pub fn tolerance(mut self, tolerance: f64) -> Self {
291 self.tolerance = tolerance;
292 self
293 }
294
295 pub fn threshold(mut self, threshold: Option<f64>) -> Self {
297 self.threshold = threshold;
298 self
299 }
300}
301
302impl Default for ElasticNetSelector<Untrained> {
303 fn default() -> Self {
304 Self::new()
305 }
306}
307
308impl Estimator for ElasticNetSelector<Untrained> {
309 type Config = ();
310 type Error = SklearsError;
311 type Float = f64;
312
313 fn config(&self) -> &Self::Config {
314 &()
315 }
316}
317
318impl Fit<Array2<Float>, Array1<Float>> for ElasticNetSelector<Untrained> {
319 type Fitted = ElasticNetSelector<Trained>;
320
321 fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
322 validate::check_consistent_length(x, y)?;
323
324 let (n_samples, n_features) = x.dim();
325 if n_samples < 2 {
326 return Err(SklearsError::InvalidInput(
327 "Elastic Net requires at least 2 samples".to_string(),
328 ));
329 }
330
331 let (x_normalized, _x_mean, x_std) = normalize_and_center(x)?;
333 let y_mean = y.mean().unwrap_or(0.0);
334 let y_centered: Array1<Float> = y - y_mean;
335
336 let mut coefficients = Array1::zeros(n_features);
338 let alpha_l1 = self.alpha * self.l1_ratio;
339 let alpha_l2 = self.alpha * (1.0 - self.l1_ratio);
340
341 for _iter in 0..self.max_iter {
342 let old_coefficients = coefficients.clone();
343
344 for j in 0..n_features {
345 if x_std[j] <= Float::EPSILON {
346 continue;
347 }
348
349 let mut residual = y_centered.clone();
351 for k in 0..n_features {
352 if k != j {
353 let x_k = x_normalized.column(k);
354 for i in 0..n_samples {
355 residual[i] -= coefficients[k] * x_k[i];
356 }
357 }
358 }
359
360 let x_j = x_normalized.column(j);
362 let rho = x_j.dot(&residual) / n_samples as Float;
363
364 let denominator = 1.0 + alpha_l2;
366 coefficients[j] = soft_threshold(rho, alpha_l1) / denominator;
367 }
368
369 let diff = (&coefficients - &old_coefficients).mapv(|x| x.abs()).sum();
371 if diff < self.tolerance {
372 break;
373 }
374 }
375
376 for j in 0..n_features {
378 if x_std[j] > Float::EPSILON {
379 coefficients[j] /= x_std[j];
380 }
381 }
382
383 let threshold = self.threshold.unwrap_or_else(|| {
385 let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
386 abs_coeffs.mean().unwrap_or(0.0)
387 });
388
389 let selected_features: Vec<usize> = coefficients
390 .iter()
391 .enumerate()
392 .filter(|(_, &coeff)| coeff.abs() > threshold)
393 .map(|(idx, _)| idx)
394 .collect();
395
396 if selected_features.is_empty() {
397 return Err(SklearsError::InvalidInput(
398 "No features selected by Elastic Net. Try reducing alpha or threshold.".to_string(),
399 ));
400 }
401
402 Ok(ElasticNetSelector {
403 alpha: self.alpha,
404 l1_ratio: self.l1_ratio,
405 max_iter: self.max_iter,
406 tolerance: self.tolerance,
407 threshold: self.threshold,
408 state: PhantomData,
409 coefficients_: Some(coefficients),
410 selected_features_: Some(selected_features),
411 n_features_: Some(n_features),
412 })
413 }
414}
415
416impl Transform<Array2<Float>> for ElasticNetSelector<Trained> {
417 fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
418 validate::check_n_features(x, self.n_features_.unwrap())?;
419
420 let selected_features = self.selected_features_.as_ref().unwrap();
421 let n_samples = x.nrows();
422 let k = selected_features.len();
423 let mut x_new = Array2::zeros((n_samples, k));
424
425 for (new_idx, &old_idx) in selected_features.iter().enumerate() {
426 x_new.column_mut(new_idx).assign(&x.column(old_idx));
427 }
428
429 Ok(x_new)
430 }
431}
432
433impl SelectorMixin for ElasticNetSelector<Trained> {
434 fn get_support(&self) -> SklResult<Array1<bool>> {
435 let n_features = self.n_features_.unwrap();
436 let selected_features = self.selected_features_.as_ref().unwrap();
437 let mut support = Array1::from_elem(n_features, false);
438
439 for &idx in selected_features {
440 support[idx] = true;
441 }
442
443 Ok(support)
444 }
445
446 fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
447 let selected_features = self.selected_features_.as_ref().unwrap();
448 Ok(indices
449 .iter()
450 .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
451 .collect())
452 }
453}
454
455impl ElasticNetSelector<Trained> {
456 pub fn coefficients(&self) -> &Array1<Float> {
458 self.coefficients_.as_ref().unwrap()
459 }
460
461 pub fn selected_features(&self) -> &[usize] {
463 self.selected_features_.as_ref().unwrap()
464 }
465}
466
467fn soft_threshold(x: Float, threshold: Float) -> Float {
471 if x > threshold {
472 x - threshold
473 } else if x < -threshold {
474 x + threshold
475 } else {
476 0.0
477 }
478}
479
480fn normalize_and_center(
482 x: &Array2<Float>,
483) -> SklResult<(Array2<Float>, Array1<Float>, Array1<Float>)> {
484 let (n_samples, n_features) = x.dim();
485 let mut x_normalized = Array2::zeros((n_samples, n_features));
486 let mut x_mean = Array1::zeros(n_features);
487 let mut x_std = Array1::zeros(n_features);
488
489 for j in 0..n_features {
491 x_mean[j] = x.column(j).mean().unwrap_or(0.0);
492 }
493
494 for j in 0..n_features {
496 for i in 0..n_samples {
497 x_normalized[[i, j]] = x[[i, j]] - x_mean[j];
498 }
499 }
500
501 for j in 0..n_features {
503 let variance = x_normalized.column(j).mapv(|x| x * x).mean().unwrap_or(0.0);
504 x_std[j] = variance.sqrt();
505
506 if x_std[j] > Float::EPSILON {
508 for i in 0..n_samples {
509 x_normalized[[i, j]] /= x_std[j];
510 }
511 }
512 }
513
514 Ok((x_normalized, x_mean, x_std))
515}
516
517#[derive(Debug, Clone)]
520pub struct RidgeSelector<State = Untrained> {
521 alpha: f64,
522 threshold: Option<f64>,
523 max_features: Option<usize>,
524 state: PhantomData<State>,
525 coefficients_: Option<Array1<Float>>,
527 selected_features_: Option<Vec<usize>>,
528 n_features_: Option<usize>,
529}
530
531impl RidgeSelector<Untrained> {
532 pub fn new() -> Self {
534 Self {
535 alpha: 1.0,
536 threshold: None,
537 max_features: None,
538 state: PhantomData,
539 coefficients_: None,
540 selected_features_: None,
541 n_features_: None,
542 }
543 }
544
545 pub fn alpha(mut self, alpha: f64) -> Self {
547 if alpha < 0.0 {
548 panic!("alpha must be non-negative");
549 }
550 self.alpha = alpha;
551 self
552 }
553
554 pub fn threshold(mut self, threshold: Option<f64>) -> Self {
556 self.threshold = threshold;
557 self
558 }
559
560 pub fn max_features(mut self, max_features: Option<usize>) -> Self {
562 self.max_features = max_features;
563 self
564 }
565
566 fn simple_solve(&self, a: &Array2<f64>, b: &Array1<f64>) -> SklResult<Array1<f64>> {
568 let n = a.nrows();
569 if n != a.ncols() {
570 return Err(SklearsError::FitError("Matrix must be square".to_string()));
571 }
572 if n != b.len() {
573 return Err(SklearsError::FitError("Dimension mismatch".to_string()));
574 }
575
576 let mut x = Array1::zeros(n);
579 let max_iter = 1000;
580 let tolerance = 1e-6;
581
582 for _ in 0..max_iter {
583 let mut x_new = Array1::zeros(n);
584 for i in 0..n {
585 let mut sum = 0.0;
586 for j in 0..n {
587 if i != j {
588 sum += a[[i, j]] * x[j];
589 }
590 }
591 x_new[i] = (b[i] - sum) / a[[i, i]];
592 }
593
594 let diff: f64 = x_new
596 .iter()
597 .zip(x.iter())
598 .map(|(a, b): (&f64, &f64)| (a - b).abs())
599 .sum();
600 if diff < tolerance {
601 return Ok(x_new);
602 }
603 x = x_new;
604 }
605
606 Ok(x)
608 }
609}
610
611impl Default for RidgeSelector<Untrained> {
612 fn default() -> Self {
613 Self::new()
614 }
615}
616
617impl Estimator for RidgeSelector<Untrained> {
618 type Config = ();
619 type Error = SklearsError;
620 type Float = f64;
621
622 fn config(&self) -> &Self::Config {
623 &()
624 }
625}
626
627impl Fit<Array2<Float>, Array1<Float>> for RidgeSelector<Untrained> {
628 type Fitted = RidgeSelector<Trained>;
629
630 fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
631 validate::check_consistent_length(x, y)?;
632
633 let (n_samples, n_features) = x.dim();
634 if n_samples < 2 {
635 return Err(SklearsError::InvalidInput(
636 "Ridge requires at least 2 samples".to_string(),
637 ));
638 }
639
640 let xtx = x.t().dot(x);
642 let xty = x.t().dot(y);
643
644 let mut xtx_ridge = xtx;
646 for i in 0..n_features {
647 xtx_ridge[[i, i]] += self.alpha;
648 }
649
650 let coefficients = self.simple_solve(&xtx_ridge, &xty)?;
653
654 let threshold = self.threshold.unwrap_or_else(|| {
656 let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
657 abs_coeffs.mean().unwrap_or(0.0)
658 });
659
660 let mut selected_features: Vec<usize> = coefficients
661 .iter()
662 .enumerate()
663 .filter(|(_, &coeff)| coeff.abs() > threshold)
664 .map(|(idx, _)| idx)
665 .collect();
666
667 if let Some(max_feat) = self.max_features {
669 if selected_features.len() > max_feat {
670 selected_features.sort_by(|&a, &b| {
672 coefficients[b]
673 .abs()
674 .partial_cmp(&coefficients[a].abs())
675 .unwrap()
676 });
677 selected_features.truncate(max_feat);
678 selected_features.sort(); }
680 }
681
682 if selected_features.is_empty() {
683 return Err(SklearsError::InvalidInput(
684 "No features selected by Ridge. Try reducing alpha or threshold.".to_string(),
685 ));
686 }
687
688 Ok(RidgeSelector {
689 alpha: self.alpha,
690 threshold: self.threshold,
691 max_features: self.max_features,
692 state: PhantomData,
693 coefficients_: Some(coefficients),
694 selected_features_: Some(selected_features),
695 n_features_: Some(n_features),
696 })
697 }
698}
699
700impl Transform<Array2<Float>> for RidgeSelector<Trained> {
701 fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
702 validate::check_n_features(x, self.n_features_.unwrap())?;
703
704 let selected_features = self.selected_features_.as_ref().unwrap();
705 let n_samples = x.nrows();
706 let k = selected_features.len();
707 let mut x_new = Array2::zeros((n_samples, k));
708
709 for (new_idx, &old_idx) in selected_features.iter().enumerate() {
710 x_new.column_mut(new_idx).assign(&x.column(old_idx));
711 }
712
713 Ok(x_new)
714 }
715}
716
717impl SelectorMixin for RidgeSelector<Trained> {
718 fn get_support(&self) -> SklResult<Array1<bool>> {
719 let n_features = self.n_features_.unwrap();
720 let selected_features = self.selected_features_.as_ref().unwrap();
721 let mut support = Array1::from_elem(n_features, false);
722
723 for &idx in selected_features {
724 support[idx] = true;
725 }
726
727 Ok(support)
728 }
729
730 fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
731 let selected_features = self.selected_features_.as_ref().unwrap();
732 Ok(indices
733 .iter()
734 .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
735 .collect())
736 }
737}
738
739impl RidgeSelector<Trained> {
740 pub fn coefficients(&self) -> &Array1<Float> {
742 self.coefficients_.as_ref().unwrap()
743 }
744
745 pub fn selected_features(&self) -> &[usize] {
747 self.selected_features_.as_ref().unwrap()
748 }
749}