sklears_feature_selection/
regularization_selectors.rs

1//! Regularization-based feature selection methods
2
3use crate::base::SelectorMixin;
4use scirs2_core::ndarray::{Array1, Array2};
5// For now, let's use a simple approach without external linalg
6// We'll implement a basic solve using Gaussian elimination or use the pseudoinverse
7use sklears_core::{
8    error::{validate, Result as SklResult, SklearsError},
9    traits::{Estimator, Fit, Trained, Transform, Untrained},
10    types::Float,
11};
12use std::marker::PhantomData;
13
14/// LASSO-based feature selection
15/// Uses L1 regularization to automatically select features by setting coefficients to zero
16#[derive(Debug, Clone)]
17pub struct LassoSelector<State = Untrained> {
18    alpha: f64,
19    max_iter: usize,
20    tolerance: f64,
21    threshold: Option<f64>,
22    state: PhantomData<State>,
23    // Trained state
24    coefficients_: Option<Array1<Float>>,
25    selected_features_: Option<Vec<usize>>,
26    n_features_: Option<usize>,
27}
28
29impl LassoSelector<Untrained> {
30    /// Create a new LASSO selector
31    pub fn new() -> Self {
32        Self {
33            alpha: 1.0,
34            max_iter: 1000,
35            tolerance: 1e-4,
36            threshold: None,
37            state: PhantomData,
38            coefficients_: None,
39            selected_features_: None,
40            n_features_: None,
41        }
42    }
43
44    /// Set the regularization parameter alpha
45    pub fn alpha(mut self, alpha: f64) -> Self {
46        if alpha < 0.0 {
47            panic!("alpha must be non-negative");
48        }
49        self.alpha = alpha;
50        self
51    }
52
53    /// Set maximum number of iterations
54    pub fn max_iter(mut self, max_iter: usize) -> Self {
55        self.max_iter = max_iter;
56        self
57    }
58
59    /// Set convergence tolerance
60    pub fn tolerance(mut self, tolerance: f64) -> Self {
61        self.tolerance = tolerance;
62        self
63    }
64
65    /// Set threshold for feature selection (if None, uses mean of absolute coefficients)
66    pub fn threshold(mut self, threshold: Option<f64>) -> Self {
67        self.threshold = threshold;
68        self
69    }
70}
71
72impl Default for LassoSelector<Untrained> {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl Estimator for LassoSelector<Untrained> {
79    type Config = ();
80    type Error = SklearsError;
81    type Float = f64;
82
83    fn config(&self) -> &Self::Config {
84        &()
85    }
86}
87
88impl Fit<Array2<Float>, Array1<Float>> for LassoSelector<Untrained> {
89    type Fitted = LassoSelector<Trained>;
90
91    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
92        validate::check_consistent_length(x, y)?;
93
94        let (n_samples, n_features) = x.dim();
95        if n_samples < 2 {
96            return Err(SklearsError::InvalidInput(
97                "LASSO requires at least 2 samples".to_string(),
98            ));
99        }
100
101        // Normalize features for LASSO
102        let (x_normalized, _x_mean, x_std) = normalize_and_center(x)?;
103        let y_mean = y.mean().unwrap_or(0.0);
104        let y_centered: Array1<Float> = y - y_mean;
105
106        // Fit LASSO using coordinate descent
107        let mut coefficients = Array1::zeros(n_features);
108
109        for _iter in 0..self.max_iter {
110            let old_coefficients = coefficients.clone();
111
112            for j in 0..n_features {
113                if x_std[j] <= Float::EPSILON {
114                    continue; // Skip zero-variance features
115                }
116
117                // Compute residual without feature j
118                let mut residual = y_centered.clone();
119                for k in 0..n_features {
120                    if k != j {
121                        let x_k = x_normalized.column(k);
122                        for i in 0..n_samples {
123                            residual[i] -= coefficients[k] * x_k[i];
124                        }
125                    }
126                }
127
128                // Compute correlation with residual
129                let x_j = x_normalized.column(j);
130                let rho = x_j.dot(&residual) / n_samples as Float;
131
132                // Soft thresholding (LASSO update)
133                coefficients[j] = soft_threshold(rho, self.alpha);
134            }
135
136            // Check convergence
137            let diff = (&coefficients - &old_coefficients).mapv(|x| x.abs()).sum();
138            if diff < self.tolerance {
139                break;
140            }
141        }
142
143        // Rescale coefficients back to original scale
144        for j in 0..n_features {
145            if x_std[j] > Float::EPSILON {
146                coefficients[j] /= x_std[j];
147            }
148        }
149
150        // Select features based on threshold
151        let threshold = self.threshold.unwrap_or_else(|| {
152            let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
153            abs_coeffs.mean().unwrap_or(0.0)
154        });
155
156        let selected_features: Vec<usize> = coefficients
157            .iter()
158            .enumerate()
159            .filter(|(_, &coeff)| coeff.abs() > threshold)
160            .map(|(idx, _)| idx)
161            .collect();
162
163        if selected_features.is_empty() {
164            return Err(SklearsError::InvalidInput(
165                "No features selected by LASSO. Try reducing alpha or threshold.".to_string(),
166            ));
167        }
168
169        Ok(LassoSelector {
170            alpha: self.alpha,
171            max_iter: self.max_iter,
172            tolerance: self.tolerance,
173            threshold: self.threshold,
174            state: PhantomData,
175            coefficients_: Some(coefficients),
176            selected_features_: Some(selected_features),
177            n_features_: Some(n_features),
178        })
179    }
180}
181
182impl Transform<Array2<Float>> for LassoSelector<Trained> {
183    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
184        validate::check_n_features(x, self.n_features_.unwrap())?;
185
186        let selected_features = self.selected_features_.as_ref().unwrap();
187        let n_samples = x.nrows();
188        let k = selected_features.len();
189        let mut x_new = Array2::zeros((n_samples, k));
190
191        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
192            x_new.column_mut(new_idx).assign(&x.column(old_idx));
193        }
194
195        Ok(x_new)
196    }
197}
198
199impl SelectorMixin for LassoSelector<Trained> {
200    fn get_support(&self) -> SklResult<Array1<bool>> {
201        let n_features = self.n_features_.unwrap();
202        let selected_features = self.selected_features_.as_ref().unwrap();
203        let mut support = Array1::from_elem(n_features, false);
204
205        for &idx in selected_features {
206            support[idx] = true;
207        }
208
209        Ok(support)
210    }
211
212    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
213        let selected_features = self.selected_features_.as_ref().unwrap();
214        Ok(indices
215            .iter()
216            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
217            .collect())
218    }
219}
220
221impl LassoSelector<Trained> {
222    /// Get LASSO coefficients
223    pub fn coefficients(&self) -> &Array1<Float> {
224        self.coefficients_.as_ref().unwrap()
225    }
226
227    /// Get selected features
228    pub fn selected_features(&self) -> &[usize] {
229        self.selected_features_.as_ref().unwrap()
230    }
231}
232
233/// Elastic Net feature selection
234/// Combines L1 (LASSO) and L2 (Ridge) regularization
235#[derive(Debug, Clone)]
236pub struct ElasticNetSelector<State = Untrained> {
237    alpha: f64,
238    l1_ratio: f64,
239    max_iter: usize,
240    tolerance: f64,
241    threshold: Option<f64>,
242    state: PhantomData<State>,
243    // Trained state
244    coefficients_: Option<Array1<Float>>,
245    selected_features_: Option<Vec<usize>>,
246    n_features_: Option<usize>,
247}
248
249impl ElasticNetSelector<Untrained> {
250    /// Create a new Elastic Net selector
251    pub fn new() -> Self {
252        Self {
253            alpha: 1.0,
254            l1_ratio: 0.5,
255            max_iter: 1000,
256            tolerance: 1e-4,
257            threshold: None,
258            state: PhantomData,
259            coefficients_: None,
260            selected_features_: None,
261            n_features_: None,
262        }
263    }
264
265    /// Set the regularization parameter alpha
266    pub fn alpha(mut self, alpha: f64) -> Self {
267        if alpha < 0.0 {
268            panic!("alpha must be non-negative");
269        }
270        self.alpha = alpha;
271        self
272    }
273
274    /// Set the L1 ratio (0 = Ridge, 1 = LASSO)
275    pub fn l1_ratio(mut self, l1_ratio: f64) -> Self {
276        if !(0.0..=1.0).contains(&l1_ratio) {
277            panic!("l1_ratio must be between 0 and 1");
278        }
279        self.l1_ratio = l1_ratio;
280        self
281    }
282
283    /// Set maximum number of iterations
284    pub fn max_iter(mut self, max_iter: usize) -> Self {
285        self.max_iter = max_iter;
286        self
287    }
288
289    /// Set convergence tolerance
290    pub fn tolerance(mut self, tolerance: f64) -> Self {
291        self.tolerance = tolerance;
292        self
293    }
294
295    /// Set threshold for feature selection
296    pub fn threshold(mut self, threshold: Option<f64>) -> Self {
297        self.threshold = threshold;
298        self
299    }
300}
301
302impl Default for ElasticNetSelector<Untrained> {
303    fn default() -> Self {
304        Self::new()
305    }
306}
307
308impl Estimator for ElasticNetSelector<Untrained> {
309    type Config = ();
310    type Error = SklearsError;
311    type Float = f64;
312
313    fn config(&self) -> &Self::Config {
314        &()
315    }
316}
317
318impl Fit<Array2<Float>, Array1<Float>> for ElasticNetSelector<Untrained> {
319    type Fitted = ElasticNetSelector<Trained>;
320
321    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
322        validate::check_consistent_length(x, y)?;
323
324        let (n_samples, n_features) = x.dim();
325        if n_samples < 2 {
326            return Err(SklearsError::InvalidInput(
327                "Elastic Net requires at least 2 samples".to_string(),
328            ));
329        }
330
331        // Normalize features
332        let (x_normalized, _x_mean, x_std) = normalize_and_center(x)?;
333        let y_mean = y.mean().unwrap_or(0.0);
334        let y_centered: Array1<Float> = y - y_mean;
335
336        // Fit Elastic Net using coordinate descent
337        let mut coefficients = Array1::zeros(n_features);
338        let alpha_l1 = self.alpha * self.l1_ratio;
339        let alpha_l2 = self.alpha * (1.0 - self.l1_ratio);
340
341        for _iter in 0..self.max_iter {
342            let old_coefficients = coefficients.clone();
343
344            for j in 0..n_features {
345                if x_std[j] <= Float::EPSILON {
346                    continue;
347                }
348
349                // Compute residual without feature j
350                let mut residual = y_centered.clone();
351                for k in 0..n_features {
352                    if k != j {
353                        let x_k = x_normalized.column(k);
354                        for i in 0..n_samples {
355                            residual[i] -= coefficients[k] * x_k[i];
356                        }
357                    }
358                }
359
360                // Compute correlation with residual
361                let x_j = x_normalized.column(j);
362                let rho = x_j.dot(&residual) / n_samples as Float;
363
364                // Elastic Net update (soft thresholding with L2 penalty)
365                let denominator = 1.0 + alpha_l2;
366                coefficients[j] = soft_threshold(rho, alpha_l1) / denominator;
367            }
368
369            // Check convergence
370            let diff = (&coefficients - &old_coefficients).mapv(|x| x.abs()).sum();
371            if diff < self.tolerance {
372                break;
373            }
374        }
375
376        // Rescale coefficients back to original scale
377        for j in 0..n_features {
378            if x_std[j] > Float::EPSILON {
379                coefficients[j] /= x_std[j];
380            }
381        }
382
383        // Select features based on threshold
384        let threshold = self.threshold.unwrap_or_else(|| {
385            let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
386            abs_coeffs.mean().unwrap_or(0.0)
387        });
388
389        let selected_features: Vec<usize> = coefficients
390            .iter()
391            .enumerate()
392            .filter(|(_, &coeff)| coeff.abs() > threshold)
393            .map(|(idx, _)| idx)
394            .collect();
395
396        if selected_features.is_empty() {
397            return Err(SklearsError::InvalidInput(
398                "No features selected by Elastic Net. Try reducing alpha or threshold.".to_string(),
399            ));
400        }
401
402        Ok(ElasticNetSelector {
403            alpha: self.alpha,
404            l1_ratio: self.l1_ratio,
405            max_iter: self.max_iter,
406            tolerance: self.tolerance,
407            threshold: self.threshold,
408            state: PhantomData,
409            coefficients_: Some(coefficients),
410            selected_features_: Some(selected_features),
411            n_features_: Some(n_features),
412        })
413    }
414}
415
416impl Transform<Array2<Float>> for ElasticNetSelector<Trained> {
417    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
418        validate::check_n_features(x, self.n_features_.unwrap())?;
419
420        let selected_features = self.selected_features_.as_ref().unwrap();
421        let n_samples = x.nrows();
422        let k = selected_features.len();
423        let mut x_new = Array2::zeros((n_samples, k));
424
425        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
426            x_new.column_mut(new_idx).assign(&x.column(old_idx));
427        }
428
429        Ok(x_new)
430    }
431}
432
433impl SelectorMixin for ElasticNetSelector<Trained> {
434    fn get_support(&self) -> SklResult<Array1<bool>> {
435        let n_features = self.n_features_.unwrap();
436        let selected_features = self.selected_features_.as_ref().unwrap();
437        let mut support = Array1::from_elem(n_features, false);
438
439        for &idx in selected_features {
440            support[idx] = true;
441        }
442
443        Ok(support)
444    }
445
446    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
447        let selected_features = self.selected_features_.as_ref().unwrap();
448        Ok(indices
449            .iter()
450            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
451            .collect())
452    }
453}
454
455impl ElasticNetSelector<Trained> {
456    /// Get Elastic Net coefficients
457    pub fn coefficients(&self) -> &Array1<Float> {
458        self.coefficients_.as_ref().unwrap()
459    }
460
461    /// Get selected features
462    pub fn selected_features(&self) -> &[usize] {
463        self.selected_features_.as_ref().unwrap()
464    }
465}
466
467// Helper functions
468
469/// Soft thresholding function for LASSO
470fn soft_threshold(x: Float, threshold: Float) -> Float {
471    if x > threshold {
472        x - threshold
473    } else if x < -threshold {
474        x + threshold
475    } else {
476        0.0
477    }
478}
479
480/// Normalize and center features
481fn normalize_and_center(
482    x: &Array2<Float>,
483) -> SklResult<(Array2<Float>, Array1<Float>, Array1<Float>)> {
484    let (n_samples, n_features) = x.dim();
485    let mut x_normalized = Array2::zeros((n_samples, n_features));
486    let mut x_mean = Array1::zeros(n_features);
487    let mut x_std = Array1::zeros(n_features);
488
489    // Compute means
490    for j in 0..n_features {
491        x_mean[j] = x.column(j).mean().unwrap_or(0.0);
492    }
493
494    // Center the data
495    for j in 0..n_features {
496        for i in 0..n_samples {
497            x_normalized[[i, j]] = x[[i, j]] - x_mean[j];
498        }
499    }
500
501    // Compute standard deviations
502    for j in 0..n_features {
503        let variance = x_normalized.column(j).mapv(|x| x * x).mean().unwrap_or(0.0);
504        x_std[j] = variance.sqrt();
505
506        // Normalize if std > 0
507        if x_std[j] > Float::EPSILON {
508            for i in 0..n_samples {
509                x_normalized[[i, j]] /= x_std[j];
510            }
511        }
512    }
513
514    Ok((x_normalized, x_mean, x_std))
515}
516
517/// Ridge Regression feature selection
518/// Uses Ridge regression coefficients to select features
519#[derive(Debug, Clone)]
520pub struct RidgeSelector<State = Untrained> {
521    alpha: f64,
522    threshold: Option<f64>,
523    max_features: Option<usize>,
524    state: PhantomData<State>,
525    // Trained state
526    coefficients_: Option<Array1<Float>>,
527    selected_features_: Option<Vec<usize>>,
528    n_features_: Option<usize>,
529}
530
531impl RidgeSelector<Untrained> {
532    /// Create a new Ridge selector
533    pub fn new() -> Self {
534        Self {
535            alpha: 1.0,
536            threshold: None,
537            max_features: None,
538            state: PhantomData,
539            coefficients_: None,
540            selected_features_: None,
541            n_features_: None,
542        }
543    }
544
545    /// Set the regularization parameter alpha
546    pub fn alpha(mut self, alpha: f64) -> Self {
547        if alpha < 0.0 {
548            panic!("alpha must be non-negative");
549        }
550        self.alpha = alpha;
551        self
552    }
553
554    /// Set threshold for feature selection (if None, uses mean of absolute coefficients)
555    pub fn threshold(mut self, threshold: Option<f64>) -> Self {
556        self.threshold = threshold;
557        self
558    }
559
560    /// Set maximum number of features to select
561    pub fn max_features(mut self, max_features: Option<usize>) -> Self {
562        self.max_features = max_features;
563        self
564    }
565
566    /// Simple solve for linear system Ax = b using Gaussian elimination
567    fn simple_solve(&self, a: &Array2<f64>, b: &Array1<f64>) -> SklResult<Array1<f64>> {
568        let n = a.nrows();
569        if n != a.ncols() {
570            return Err(SklearsError::FitError("Matrix must be square".to_string()));
571        }
572        if n != b.len() {
573            return Err(SklearsError::FitError("Dimension mismatch".to_string()));
574        }
575
576        // Simple approach: use iterative method (Jacobi iteration)
577        // For a well-conditioned ridge system, this should converge
578        let mut x = Array1::zeros(n);
579        let max_iter = 1000;
580        let tolerance = 1e-6;
581
582        for _ in 0..max_iter {
583            let mut x_new = Array1::zeros(n);
584            for i in 0..n {
585                let mut sum = 0.0;
586                for j in 0..n {
587                    if i != j {
588                        sum += a[[i, j]] * x[j];
589                    }
590                }
591                x_new[i] = (b[i] - sum) / a[[i, i]];
592            }
593
594            // Check convergence
595            let diff: f64 = x_new
596                .iter()
597                .zip(x.iter())
598                .map(|(a, b): (&f64, &f64)| (a - b).abs())
599                .sum();
600            if diff < tolerance {
601                return Ok(x_new);
602            }
603            x = x_new;
604        }
605
606        // If we reach here, convergence failed - return best attempt
607        Ok(x)
608    }
609}
610
611impl Default for RidgeSelector<Untrained> {
612    fn default() -> Self {
613        Self::new()
614    }
615}
616
617impl Estimator for RidgeSelector<Untrained> {
618    type Config = ();
619    type Error = SklearsError;
620    type Float = f64;
621
622    fn config(&self) -> &Self::Config {
623        &()
624    }
625}
626
627impl Fit<Array2<Float>, Array1<Float>> for RidgeSelector<Untrained> {
628    type Fitted = RidgeSelector<Trained>;
629
630    fn fit(self, x: &Array2<Float>, y: &Array1<Float>) -> SklResult<Self::Fitted> {
631        validate::check_consistent_length(x, y)?;
632
633        let (n_samples, n_features) = x.dim();
634        if n_samples < 2 {
635            return Err(SklearsError::InvalidInput(
636                "Ridge requires at least 2 samples".to_string(),
637            ));
638        }
639
640        // Solve Ridge regression: (X^T X + alpha * I) beta = X^T y
641        let xtx = x.t().dot(x);
642        let xty = x.t().dot(y);
643
644        // Add ridge penalty to diagonal
645        let mut xtx_ridge = xtx;
646        for i in 0..n_features {
647            xtx_ridge[[i, i]] += self.alpha;
648        }
649
650        // Solve for coefficients using simple approach
651        // For now, use a simple pseudoinverse approach or iterative method
652        let coefficients = self.simple_solve(&xtx_ridge, &xty)?;
653
654        // Select features based on threshold
655        let threshold = self.threshold.unwrap_or_else(|| {
656            let abs_coeffs: Array1<Float> = coefficients.mapv(|x| x.abs());
657            abs_coeffs.mean().unwrap_or(0.0)
658        });
659
660        let mut selected_features: Vec<usize> = coefficients
661            .iter()
662            .enumerate()
663            .filter(|(_, &coeff)| coeff.abs() > threshold)
664            .map(|(idx, _)| idx)
665            .collect();
666
667        // Apply max_features limit if specified
668        if let Some(max_feat) = self.max_features {
669            if selected_features.len() > max_feat {
670                // Sort by absolute coefficient value and take top max_feat
671                selected_features.sort_by(|&a, &b| {
672                    coefficients[b]
673                        .abs()
674                        .partial_cmp(&coefficients[a].abs())
675                        .unwrap()
676                });
677                selected_features.truncate(max_feat);
678                selected_features.sort(); // Restore original order
679            }
680        }
681
682        if selected_features.is_empty() {
683            return Err(SklearsError::InvalidInput(
684                "No features selected by Ridge. Try reducing alpha or threshold.".to_string(),
685            ));
686        }
687
688        Ok(RidgeSelector {
689            alpha: self.alpha,
690            threshold: self.threshold,
691            max_features: self.max_features,
692            state: PhantomData,
693            coefficients_: Some(coefficients),
694            selected_features_: Some(selected_features),
695            n_features_: Some(n_features),
696        })
697    }
698}
699
700impl Transform<Array2<Float>> for RidgeSelector<Trained> {
701    fn transform(&self, x: &Array2<Float>) -> SklResult<Array2<Float>> {
702        validate::check_n_features(x, self.n_features_.unwrap())?;
703
704        let selected_features = self.selected_features_.as_ref().unwrap();
705        let n_samples = x.nrows();
706        let k = selected_features.len();
707        let mut x_new = Array2::zeros((n_samples, k));
708
709        for (new_idx, &old_idx) in selected_features.iter().enumerate() {
710            x_new.column_mut(new_idx).assign(&x.column(old_idx));
711        }
712
713        Ok(x_new)
714    }
715}
716
717impl SelectorMixin for RidgeSelector<Trained> {
718    fn get_support(&self) -> SklResult<Array1<bool>> {
719        let n_features = self.n_features_.unwrap();
720        let selected_features = self.selected_features_.as_ref().unwrap();
721        let mut support = Array1::from_elem(n_features, false);
722
723        for &idx in selected_features {
724            support[idx] = true;
725        }
726
727        Ok(support)
728    }
729
730    fn transform_features(&self, indices: &[usize]) -> SklResult<Vec<usize>> {
731        let selected_features = self.selected_features_.as_ref().unwrap();
732        Ok(indices
733            .iter()
734            .filter_map(|&idx| selected_features.iter().position(|&f| f == idx))
735            .collect())
736    }
737}
738
739impl RidgeSelector<Trained> {
740    /// Get Ridge coefficients
741    pub fn coefficients(&self) -> &Array1<Float> {
742        self.coefficients_.as_ref().unwrap()
743    }
744
745    /// Get selected features
746    pub fn selected_features(&self) -> &[usize] {
747        self.selected_features_.as_ref().unwrap()
748    }
749}