Skip to main content

scirs2_series/feature_selection/
embedded.rs

1//! Embedded feature selection methods
2
3use scirs2_core::ndarray::{Array1, Array2};
4use std::collections::HashMap;
5
6use super::wrapper::WrapperMethods;
7use super::FeatureSelectionResult;
8use crate::error::{Result, TimeSeriesError};
9
10/// Embedded feature selection methods
11pub struct EmbeddedMethods;
12
13impl EmbeddedMethods {
14    /// LASSO-based feature selection
15    ///
16    /// Uses LASSO regularization to select features.
17    ///
18    /// # Arguments
19    ///
20    /// * `features` - Feature matrix (samples x features)
21    /// * `target` - Target variable
22    /// * `alpha` - Regularization parameter
23    /// * `max_iterations` - Maximum iterations for optimization
24    ///
25    /// # Returns
26    ///
27    /// * Feature selection result
28    ///
29    /// # Example
30    ///
31    /// ```
32    /// use scirs2_core::ndarray::{Array1, Array2};
33    /// use scirs2_series::feature_selection::EmbeddedMethods;
34    ///
35    /// let features = Array2::from_shape_vec((100, 10), (0..1000).map(|x| x as f64).collect()).expect("Operation failed");
36    /// let target = Array1::from_vec((0..100).map(|x| x as f64).collect());
37    ///
38    /// let result = EmbeddedMethods::lasso_selection(&features, &target, 1.0, 1000).expect("Operation failed");
39    /// println!("Selected {} features", result.selected_features.len());
40    /// ```
41    pub fn lasso_selection(
42        features: &Array2<f64>,
43        target: &Array1<f64>,
44        alpha: f64,
45        max_iterations: usize,
46    ) -> Result<FeatureSelectionResult> {
47        let (n_samples, n_features) = features.dim();
48
49        if n_samples != target.len() {
50            return Err(TimeSeriesError::DimensionMismatch {
51                expected: n_samples,
52                actual: target.len(),
53            });
54        }
55
56        // Normalize features
57        let (normalized_features, _feature_means, _feature_stds) =
58            Self::normalize_features(features);
59        let target_mean = target.sum() / n_samples as f64;
60        let normalized_target = target.mapv(|x| x - target_mean);
61
62        // Initialize coefficients
63        let mut coefficients = Array1::zeros(n_features);
64        let _learning_rate = 0.01;
65
66        // Coordinate descent for LASSO
67        for _iteration in 0..max_iterations {
68            let mut max_change = 0.0f64;
69
70            for j in 0..n_features {
71                let old_coef = coefficients[j];
72
73                // Calculate residual without feature j
74                let mut residual = normalized_target.clone();
75                for k in 0..n_features {
76                    if k != j {
77                        for i in 0..n_samples {
78                            residual[i] -= coefficients[k] * normalized_features[[i, k]];
79                        }
80                    }
81                }
82
83                // Calculate correlation with residual
84                let correlation = normalized_features.column(j).dot(&residual) / n_samples as f64;
85
86                // Soft thresholding
87                let new_coef = Self::soft_threshold(correlation, alpha);
88                coefficients[j] = new_coef;
89
90                max_change = max_change.max((new_coef - old_coef).abs());
91            }
92
93            // Check convergence
94            if max_change < 1e-6 {
95                break;
96            }
97        }
98
99        // Select features with non-zero coefficients
100        let mut selected_features = Vec::new();
101        let mut feature_scores = Array1::zeros(n_features);
102
103        for i in 0..n_features {
104            let abs_coef = coefficients[i].abs();
105            feature_scores[i] = abs_coef;
106
107            if abs_coef > 1e-6 {
108                selected_features.push(i);
109            }
110        }
111
112        let mut metadata = HashMap::new();
113        metadata.insert("alpha".to_string(), alpha);
114        metadata.insert("n_selected".to_string(), selected_features.len() as f64);
115
116        Ok(FeatureSelectionResult {
117            selected_features,
118            feature_scores,
119            method: "LASSO".to_string(),
120            metadata,
121        })
122    }
123
124    /// Ridge regression-based feature ranking
125    ///
126    /// Uses Ridge regularization to rank features by importance.
127    ///
128    /// # Arguments
129    ///
130    /// * `features` - Feature matrix (samples x features)
131    /// * `target` - Target variable
132    /// * `alpha` - Regularization parameter
133    /// * `n_features` - Number of top features to select
134    ///
135    /// # Returns
136    ///
137    /// * Feature selection result
138    pub fn ridge_selection(
139        features: &Array2<f64>,
140        target: &Array1<f64>,
141        alpha: f64,
142        n_features: Option<usize>,
143    ) -> Result<FeatureSelectionResult> {
144        let (n_samples, n_feat) = features.dim();
145
146        if n_samples != target.len() {
147            return Err(TimeSeriesError::DimensionMismatch {
148                expected: n_samples,
149                actual: target.len(),
150            });
151        }
152
153        // Normalize features
154        let (normalized_features, _, _) = Self::normalize_features(features);
155        let target_mean = target.sum() / n_samples as f64;
156        let normalized_target = target.mapv(|x| x - target_mean);
157
158        // Ridge regression: (X^T X + αI)^(-1) X^T y
159        let xt = normalized_features.t();
160        let mut xtx = xt.dot(&normalized_features);
161
162        // Add regularization
163        for i in 0..n_feat {
164            xtx[[i, i]] += alpha;
165        }
166
167        let xty = xt.dot(&normalized_target);
168        let coefficients = WrapperMethods::solve_linear_system(&xtx, &xty)?;
169
170        // Rank _features by absolute coefficient values
171        let mut feature_scores = Array1::zeros(n_feat);
172        let mut indexed_scores: Vec<(usize, f64)> = Vec::new();
173
174        for i in 0..n_feat {
175            let abs_coef = coefficients[i].abs();
176            feature_scores[i] = abs_coef;
177            indexed_scores.push((i, abs_coef));
178        }
179
180        indexed_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
181
182        // Select top _features
183        let n_to_select = n_features.unwrap_or(n_feat / 2).min(n_feat);
184        let selected_features: Vec<usize> = indexed_scores
185            .into_iter()
186            .take(n_to_select)
187            .map(|(idx_, _)| idx_)
188            .collect();
189
190        let mut metadata = HashMap::new();
191        metadata.insert("alpha".to_string(), alpha);
192        metadata.insert("n_selected".to_string(), selected_features.len() as f64);
193
194        Ok(FeatureSelectionResult {
195            selected_features,
196            feature_scores,
197            method: "Ridge".to_string(),
198            metadata,
199        })
200    }
201
202    /// Tree-based feature importance (simplified decision tree)
203    ///
204    /// Estimates feature importance using a simplified decision tree approach.
205    ///
206    /// # Arguments
207    ///
208    /// * `features` - Feature matrix (samples x features)
209    /// * `target` - Target variable
210    /// * `n_features` - Number of top features to select
211    ///
212    /// # Returns
213    ///
214    /// * Feature selection result
215    pub fn tree_based_selection(
216        features: &Array2<f64>,
217        target: &Array1<f64>,
218        n_features: Option<usize>,
219    ) -> Result<FeatureSelectionResult> {
220        let (n_samples, n_feat) = features.dim();
221
222        if n_samples != target.len() {
223            return Err(TimeSeriesError::DimensionMismatch {
224                expected: n_samples,
225                actual: target.len(),
226            });
227        }
228
229        let mut feature_scores = Array1::zeros(n_feat);
230
231        // Calculate feature importance based on variance reduction
232        for i in 0..n_feat {
233            let importance = Self::calculate_feature_importance_tree(&features.column(i), target)?;
234            feature_scores[i] = importance;
235        }
236
237        // Select top _features
238        let n_to_select = n_features.unwrap_or(n_feat / 2).min(n_feat);
239        let mut indexed_scores: Vec<(usize, f64)> = feature_scores
240            .iter()
241            .enumerate()
242            .map(|(i, &score)| (i, score))
243            .collect();
244
245        indexed_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
246
247        let selected_features: Vec<usize> = indexed_scores
248            .into_iter()
249            .take(n_to_select)
250            .map(|(idx_, _)| idx_)
251            .collect();
252
253        let mut metadata = HashMap::new();
254        metadata.insert("n_selected".to_string(), selected_features.len() as f64);
255
256        Ok(FeatureSelectionResult {
257            selected_features,
258            feature_scores,
259            method: "TreeBased".to_string(),
260            metadata,
261        })
262    }
263
264    // Helper methods
265
266    fn normalize_features(features: &Array2<f64>) -> (Array2<f64>, Array1<f64>, Array1<f64>) {
267        let (n_samples, n_features) = features.dim();
268        let mut normalized = Array2::zeros((n_samples, n_features));
269        let mut means = Array1::zeros(n_features);
270        let mut stds = Array1::zeros(n_features);
271
272        for j in 0..n_features {
273            let col = features.column(j);
274            let mean = col.sum() / n_samples as f64;
275            let variance = col.mapv(|x| (x - mean).powi(2)).sum() / n_samples as f64;
276            let std = variance.sqrt().max(1e-8); // Avoid division by zero
277
278            means[j] = mean;
279            stds[j] = std;
280
281            for i in 0..n_samples {
282                normalized[[i, j]] = (features[[i, j]] - mean) / std;
283            }
284        }
285
286        (normalized, means, stds)
287    }
288
289    fn soft_threshold(x: f64, threshold: f64) -> f64 {
290        if x > threshold {
291            x - threshold
292        } else if x < -threshold {
293            x + threshold
294        } else {
295            0.0
296        }
297    }
298
299    fn calculate_feature_importance_tree(
300        feature: &scirs2_core::ndarray::ArrayView1<f64>,
301        target: &Array1<f64>,
302    ) -> Result<f64> {
303        let n = feature.len();
304
305        if n < 4 {
306            return Ok(0.0);
307        }
308
309        // Calculate initial variance
310        let target_mean = target.sum() / n as f64;
311        let initial_variance = target.mapv(|y| (y - target_mean).powi(2)).sum() / n as f64;
312
313        if initial_variance == 0.0 {
314            return Ok(0.0);
315        }
316
317        // Try different split points
318        let mut feature_values: Vec<f64> = feature.iter().cloned().collect();
319        feature_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
320
321        let mut best_gain = 0.0f64;
322
323        for i in 1..n {
324            if feature_values[i] != feature_values[i - 1] {
325                let threshold = (feature_values[i] + feature_values[i - 1]) / 2.0;
326
327                let mut left_targets = Vec::new();
328                let mut right_targets = Vec::new();
329
330                for j in 0..n {
331                    if feature[j] <= threshold {
332                        left_targets.push(target[j]);
333                    } else {
334                        right_targets.push(target[j]);
335                    }
336                }
337
338                if left_targets.is_empty() || right_targets.is_empty() {
339                    continue;
340                }
341
342                let left_mean = left_targets.iter().sum::<f64>() / left_targets.len() as f64;
343                let right_mean = right_targets.iter().sum::<f64>() / right_targets.len() as f64;
344
345                let left_variance = left_targets
346                    .iter()
347                    .map(|&y| (y - left_mean).powi(2))
348                    .sum::<f64>()
349                    / left_targets.len() as f64;
350
351                let right_variance = right_targets
352                    .iter()
353                    .map(|&y| (y - right_mean).powi(2))
354                    .sum::<f64>()
355                    / right_targets.len() as f64;
356
357                let weighted_variance = (left_targets.len() as f64 * left_variance
358                    + right_targets.len() as f64 * right_variance)
359                    / n as f64;
360
361                let gain = initial_variance - weighted_variance;
362                best_gain = best_gain.max(gain);
363            }
364        }
365
366        Ok(best_gain / initial_variance)
367    }
368}