scirs2_series/feature_selection/mod.rs
1//! Time series feature selection methods
2//!
3//! This module provides comprehensive feature selection methods specifically designed for time series data.
4//! It includes filter methods, wrapper methods, embedded methods, and time series specific approaches.
5
6use scirs2_core::ndarray::Array1;
7use std::collections::HashMap;
8
9// Re-export submodules
10pub mod embedded;
11pub mod filter;
12pub mod selector;
13pub mod time_series;
14pub mod wrapper;
15
16#[cfg(test)]
17mod tests;
18
19// Re-export main types and structs
20pub use embedded::EmbeddedMethods;
21pub use filter::FilterMethods;
22pub use selector::FeatureSelector;
23pub use time_series::TimeSeriesMethods;
24pub use wrapper::WrapperMethods;
25
26/// Feature selection result containing selected features and their scores
27#[derive(Debug, Clone)]
28pub struct FeatureSelectionResult {
29 /// Indices of selected features
30 pub selected_features: Vec<usize>,
31 /// Feature scores (higher is better)
32 pub feature_scores: Array1<f64>,
33 /// Selection method used
34 pub method: String,
35 /// Additional metadata
36 pub metadata: HashMap<String, f64>,
37}
38
39/// Configuration for feature selection methods
40#[derive(Debug, Clone)]
41pub struct FeatureSelectionConfig {
42 /// Number of features to select (None = automatic)
43 pub n_features: Option<usize>,
44 /// Scoring method for wrapper methods
45 pub scoring_method: ScoringMethod,
46 /// Cross-validation folds for wrapper methods
47 pub cv_folds: usize,
48 /// Significance level for statistical tests
49 pub alpha: f64,
50 /// Minimum correlation threshold for filter methods
51 pub correlation_threshold: f64,
52 /// Minimum variance threshold for variance-based filtering
53 pub variance_threshold: f64,
54 /// Maximum number of iterations for wrapper methods
55 pub max_iterations: usize,
56 /// Random seed for reproducibility
57 pub random_seed: Option<u64>,
58 /// Regularization parameter for embedded methods
59 pub regularization_alpha: f64,
60 /// Maximum lag for time series specific methods
61 pub max_lag: usize,
62 /// Seasonal period for seasonal feature selection
63 pub seasonal_period: Option<usize>,
64}
65
66impl Default for FeatureSelectionConfig {
67 fn default() -> Self {
68 Self {
69 n_features: None,
70 scoring_method: ScoringMethod::MeanSquaredError,
71 cv_folds: 5,
72 alpha: 0.05,
73 correlation_threshold: 0.1,
74 variance_threshold: 0.01,
75 max_iterations: 100,
76 random_seed: None,
77 regularization_alpha: 1.0,
78 max_lag: 10,
79 seasonal_period: None,
80 }
81 }
82}
83
84/// Scoring methods for feature selection
85#[derive(Debug, Clone)]
86pub enum ScoringMethod {
87 /// Mean squared error
88 MeanSquaredError,
89 /// Mean absolute error
90 MeanAbsoluteError,
91 /// R-squared
92 RSquared,
93 /// Akaike Information Criterion
94 AIC,
95 /// Bayesian Information Criterion
96 BIC,
97 /// Cross-validation score
98 CrossValidation,
99}