sklears_feature_selection/pipeline/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
6use std::collections::HashMap;
7use std::marker::PhantomData;
8use std::time::{Duration, Instant};
9
10use super::functions::Result;
11
12#[derive(Debug, Clone)]
13pub enum TreeEstimatorType {
14    /// RandomForest
15    RandomForest,
16    /// ExtraTrees
17    ExtraTrees,
18    /// GradientBoosting
19    GradientBoosting,
20    /// AdaBoost
21    AdaBoost,
22}
23#[derive(Debug, Clone)]
24pub enum ValidationStrategy {
25    None,
26    /// Basic
27    Basic,
28    /// Comprehensive
29    Comprehensive,
30    /// Statistical
31    Statistical,
32}
33/// Feature mapping for tracking feature transformations
34#[derive(Debug, Clone)]
35pub struct FeatureMapping {
36    pub original_features: usize,
37    pub final_features: usize,
38    pub feature_names: Vec<String>,
39    pub feature_origins: Vec<FeatureOrigin>,
40    pub transformation_history: Vec<TransformationStep>,
41}
42#[derive(Debug, Clone)]
43pub struct ScalerParams {
44    pub mean: Array1<f64>,
45    pub scale: Array1<f64>,
46}
47#[derive(Debug, Clone)]
48pub enum LoggingLevel {
49    None,
50    /// Error
51    Error,
52    /// Warning
53    Warning,
54    /// Info
55    Info,
56    /// Debug
57    Debug,
58    /// Trace
59    Trace,
60}
61/// Optimization configuration for performance tuning
62#[derive(Debug, Clone)]
63pub struct OptimizationConfiguration {
64    pub use_simd: bool,
65    pub chunk_size: usize,
66    pub thread_pool_size: Option<usize>,
67    pub memory_pool_size: usize,
68    pub cache_size: usize,
69    pub prefetch_strategy: PrefetchStrategy,
70    pub vectorization_threshold: usize,
71}
72/// Supporting enums and structs for configuration
73#[derive(Debug, Clone)]
74pub enum MemoryOptimization {
75    None,
76    /// Conservative
77    Conservative,
78    /// Aggressive
79    Aggressive,
80}
81#[derive(Debug, Clone)]
82pub struct StandardScalerConfig {
83    pub with_mean: bool,
84    pub with_std: bool,
85}
86#[derive(Debug, Clone)]
87pub enum WindowStatistic {
88    /// Mean
89    Mean,
90    /// Std
91    Std,
92    /// Min
93    Min,
94    /// Max
95    Max,
96    /// Median
97    Median,
98    /// Skewness
99    Skewness,
100    /// Kurtosis
101    Kurtosis,
102}
103#[derive(Debug, Clone)]
104pub enum DistanceMetric {
105    /// Euclidean
106    Euclidean,
107    /// Manhattan
108    Manhattan,
109    /// Cosine
110    Cosine,
111    /// Hamming
112    Hamming,
113}
114#[derive(Debug, Clone)]
115pub enum CachingStrategy {
116    None,
117    /// LRU
118    LRU {
119        size: usize,
120    },
121    /// LFU
122    LFU {
123        size: usize,
124    },
125    /// FIFO
126    FIFO {
127        size: usize,
128    },
129}
130#[derive(Debug, Clone)]
131pub enum MissingValueIndicator {
132    /// NaN
133    NaN,
134    /// Value
135    Value(f64),
136}
137#[derive(Debug)]
138pub struct Trained {
139    trained_steps: Vec<TrainedStep>,
140    feature_mapping: FeatureMapping,
141    pipeline_metadata: PipelineMetadata,
142}
143/// Comprehensive pipeline integration framework for feature selection
144#[derive(Debug, Clone)]
145pub struct FeatureSelectionPipeline<State = Untrained> {
146    preprocessing_steps: Vec<PreprocessingStep>,
147    feature_engineering_steps: Vec<FeatureEngineeringStep>,
148    selection_methods: Vec<SelectionMethod>,
149    dimensionality_reduction: Option<DimensionalityReductionStep>,
150    model_selection: Option<ModelSelectionStep>,
151    pipeline_config: PipelineConfiguration,
152    optimization_config: OptimizationConfiguration,
153    _phantom: PhantomData<State>,
154}
155impl FeatureSelectionPipeline<Untrained> {
156    /// Create a new untrained pipeline with default configuration
157    pub fn new() -> Self {
158        Self {
159            preprocessing_steps: Vec::new(),
160            feature_engineering_steps: Vec::new(),
161            selection_methods: Vec::new(),
162            dimensionality_reduction: None,
163            model_selection: None,
164            pipeline_config: PipelineConfiguration::default(),
165            optimization_config: OptimizationConfiguration::default(),
166            _phantom: PhantomData,
167        }
168    }
169    /// Builder pattern for adding preprocessing steps
170    pub fn add_preprocessing_step(mut self, step: PreprocessingStep) -> Self {
171        self.preprocessing_steps.push(step);
172        self
173    }
174    /// Builder pattern for adding feature engineering steps
175    pub fn add_feature_engineering_step(mut self, step: FeatureEngineeringStep) -> Self {
176        self.feature_engineering_steps.push(step);
177        self
178    }
179    /// Builder pattern for adding selection methods
180    pub fn add_selection_method(mut self, method: SelectionMethod) -> Self {
181        self.selection_methods.push(method);
182        self
183    }
184    /// Builder pattern for setting dimensionality reduction
185    pub fn with_dimensionality_reduction(mut self, reduction: DimensionalityReductionStep) -> Self {
186        self.dimensionality_reduction = Some(reduction);
187        self
188    }
189    /// Builder pattern for setting model selection
190    pub fn with_model_selection(mut self, model_selection: ModelSelectionStep) -> Self {
191        self.model_selection = Some(model_selection);
192        self
193    }
194    /// Configure pipeline behavior
195    pub fn with_config(mut self, config: PipelineConfiguration) -> Self {
196        self.pipeline_config = config;
197        self
198    }
199    /// Configure optimization settings
200    pub fn with_optimization(mut self, config: OptimizationConfiguration) -> Self {
201        self.optimization_config = config;
202        self
203    }
204    /// Train the entire pipeline on the provided data
205    pub fn fit(
206        mut self,
207        X: ArrayView2<f64>,
208        y: ArrayView1<f64>,
209    ) -> Result<FeatureSelectionPipeline<Trained>> {
210        let start_time = Instant::now();
211        let mut current_X = X.to_owned();
212        let current_y = y.to_owned();
213        let mut trained_steps = Vec::new();
214        let original_features = X.ncols();
215        let mut preprocessing_steps = std::mem::take(&mut self.preprocessing_steps);
216        for (idx, step) in preprocessing_steps.iter_mut().enumerate() {
217            let step_start = Instant::now();
218            current_X = Self::apply_preprocessing_step_static(step, current_X.view())?;
219            trained_steps.push(TrainedStep {
220                step_type: "Preprocessing".to_string(),
221                step_index: idx,
222                training_time: step_start.elapsed(),
223                feature_count_before: current_X.ncols(),
224                feature_count_after: current_X.ncols(),
225                parameters: StepParameters::Preprocessing(Box::new(())),
226            });
227        }
228        self.preprocessing_steps = preprocessing_steps;
229        let mut feature_engineering_steps = std::mem::take(&mut self.feature_engineering_steps);
230        for (idx, step) in feature_engineering_steps.iter_mut().enumerate() {
231            let step_start = Instant::now();
232            let features_before = current_X.ncols();
233            current_X = Self::apply_feature_engineering_step_static(
234                step,
235                current_X.view(),
236                current_y.view(),
237            )?;
238            trained_steps.push(TrainedStep {
239                step_type: "FeatureEngineering".to_string(),
240                step_index: idx,
241                training_time: step_start.elapsed(),
242                feature_count_before: features_before,
243                feature_count_after: current_X.ncols(),
244                parameters: StepParameters::FeatureEngineering(Box::new(())),
245            });
246        }
247        self.feature_engineering_steps = feature_engineering_steps;
248        let mut selection_mask = Array1::from_elem(current_X.ncols(), true);
249        let mut selection_methods = std::mem::take(&mut self.selection_methods);
250        for (idx, method) in selection_methods.iter_mut().enumerate() {
251            let step_start = Instant::now();
252            let features_before = current_X.ncols();
253            let method_mask =
254                Self::apply_selection_method_static(method, current_X.view(), current_y.view())?;
255            for (i, &selected) in method_mask.iter().enumerate() {
256                if !selected {
257                    selection_mask[i] = false;
258                }
259            }
260            trained_steps.push(TrainedStep {
261                step_type: "Selection".to_string(),
262                step_index: idx,
263                training_time: step_start.elapsed(),
264                feature_count_before: features_before,
265                feature_count_after: selection_mask.iter().filter(|&&x| x).count(),
266                parameters: StepParameters::Selection(method_mask),
267            });
268        }
269        self.selection_methods = selection_methods;
270        let selected_indices: Vec<usize> = selection_mask
271            .iter()
272            .enumerate()
273            .filter_map(|(i, &selected)| if selected { Some(i) } else { None })
274            .collect();
275        if !selected_indices.is_empty() {
276            let mut selected_X = Array2::zeros((current_X.nrows(), selected_indices.len()));
277            for (new_col, &old_col) in selected_indices.iter().enumerate() {
278                for row in 0..current_X.nrows() {
279                    selected_X[[row, new_col]] = current_X[[row, old_col]];
280                }
281            }
282            current_X = selected_X;
283        }
284        if self.dimensionality_reduction.is_some() {
285            let step_start = Instant::now();
286            let features_before = current_X.ncols();
287            let mut reduction = self
288                .dimensionality_reduction
289                .take()
290                .expect("operation should succeed");
291            current_X = self.apply_dimensionality_reduction(&mut reduction, current_X.view())?;
292            self.dimensionality_reduction = Some(reduction);
293            trained_steps.push(TrainedStep {
294                step_type: "DimensionalityReduction".to_string(),
295                step_index: 0,
296                training_time: step_start.elapsed(),
297                feature_count_before: features_before,
298                feature_count_after: current_X.ncols(),
299                parameters: StepParameters::DimensionalityReduction(Array2::zeros((1, 1))),
300            });
301        }
302        if self.model_selection.is_some() {
303            let step_start = Instant::now();
304            let features_before = current_X.ncols();
305            let mut model_sel = self
306                .model_selection
307                .take()
308                .expect("operation should succeed");
309            let selected_features =
310                self.apply_model_selection(&mut model_sel, current_X.view(), current_y.view())?;
311            self.model_selection = Some(model_sel);
312            if !selected_features.is_empty() {
313                let mut model_selected_X =
314                    Array2::zeros((current_X.nrows(), selected_features.len()));
315                for (new_col, &old_col) in selected_features.iter().enumerate() {
316                    for row in 0..current_X.nrows() {
317                        model_selected_X[[row, new_col]] = current_X[[row, old_col]];
318                    }
319                }
320                current_X = model_selected_X;
321            }
322            trained_steps.push(TrainedStep {
323                step_type: "ModelSelection".to_string(),
324                step_index: 0,
325                training_time: step_start.elapsed(),
326                feature_count_before: features_before,
327                feature_count_after: current_X.ncols(),
328                parameters: StepParameters::ModelSelection(selected_features),
329            });
330        }
331        let final_features = current_X.ncols();
332        let _feature_mapping = FeatureMapping {
333            original_features,
334            final_features,
335            feature_names: (0..final_features)
336                .map(|i| format!("feature_{}", i))
337                .collect(),
338            feature_origins: (0..final_features).map(FeatureOrigin::Original).collect(),
339            transformation_history: trained_steps
340                .iter()
341                .map(|step| TransformationStep {
342                    step_name: step.step_type.clone(),
343                    input_features: step.feature_count_before,
344                    output_features: step.feature_count_after,
345                    transformation_type: TransformationType::ManyToMany,
346                })
347                .collect(),
348        };
349        let total_training_time = start_time.elapsed();
350        let feature_reduction_ratio = final_features as f64 / original_features as f64;
351        let _pipeline_metadata = PipelineMetadata {
352            total_training_time,
353            total_transform_time: Duration::from_secs(0),
354            memory_usage_peak: 0,
355            feature_reduction_ratio,
356            performance_metrics: HashMap::new(),
357            validation_results: None,
358        };
359        Ok(FeatureSelectionPipeline {
360            preprocessing_steps: self.preprocessing_steps,
361            feature_engineering_steps: self.feature_engineering_steps,
362            selection_methods: self.selection_methods,
363            dimensionality_reduction: self.dimensionality_reduction,
364            model_selection: self.model_selection,
365            pipeline_config: self.pipeline_config,
366            optimization_config: self.optimization_config,
367            _phantom: PhantomData::<Trained>,
368        })
369    }
370    fn apply_preprocessing_step(
371        &self,
372        step: &mut PreprocessingStep,
373        X: ArrayView2<f64>,
374    ) -> Result<Array2<f64>> {
375        Self::apply_preprocessing_step_static(step, X)
376    }
377    fn apply_preprocessing_step_static(
378        step: &mut PreprocessingStep,
379        X: ArrayView2<f64>,
380    ) -> Result<Array2<f64>> {
381        match step {
382            PreprocessingStep::StandardScaler {
383                config,
384                trained_params,
385            } => Self::apply_standard_scaler_static(config, trained_params, X),
386            PreprocessingStep::RobustScaler {
387                config,
388                trained_params,
389            } => Self::apply_robust_scaler_static(config, trained_params, X),
390            PreprocessingStep::MinMaxScaler {
391                config,
392                trained_params,
393            } => Self::apply_minmax_scaler_static(config, trained_params, X),
394            _ => Ok(X.to_owned()),
395        }
396    }
397    fn apply_standard_scaler(
398        &self,
399        config: &StandardScalerConfig,
400        trained_params: &mut Option<ScalerParams>,
401        X: ArrayView2<f64>,
402    ) -> Result<Array2<f64>> {
403        Self::apply_standard_scaler_static(config, trained_params, X)
404    }
405    fn apply_standard_scaler_static(
406        config: &StandardScalerConfig,
407        trained_params: &mut Option<ScalerParams>,
408        X: ArrayView2<f64>,
409    ) -> Result<Array2<f64>> {
410        let mut result = X.to_owned();
411        if trained_params.is_none() {
412            let mut mean = Array1::zeros(X.ncols());
413            let mut scale = Array1::ones(X.ncols());
414            if config.with_mean {
415                for col in 0..X.ncols() {
416                    mean[col] = X.column(col).mean().unwrap_or(0.0);
417                }
418            }
419            if config.with_std {
420                for col in 0..X.ncols() {
421                    let column = X.column(col);
422                    let variance = column.var(1.0);
423                    scale[col] = variance.sqrt().max(1e-8);
424                }
425            }
426            *trained_params = Some(ScalerParams { mean, scale });
427        }
428        if let Some(ref params) = trained_params {
429            for col in 0..X.ncols() {
430                for row in 0..X.nrows() {
431                    if config.with_mean {
432                        result[[row, col]] -= params.mean[col];
433                    }
434                    if config.with_std {
435                        result[[row, col]] /= params.scale[col];
436                    }
437                }
438            }
439        }
440        Ok(result)
441    }
442    fn apply_robust_scaler(
443        &self,
444        config: &RobustScalerConfig,
445        trained_params: &mut Option<RobustScalerParams>,
446        X: ArrayView2<f64>,
447    ) -> Result<Array2<f64>> {
448        Self::apply_robust_scaler_static(config, trained_params, X)
449    }
450    fn apply_robust_scaler_static(
451        config: &RobustScalerConfig,
452        trained_params: &mut Option<RobustScalerParams>,
453        X: ArrayView2<f64>,
454    ) -> Result<Array2<f64>> {
455        let mut result = X.to_owned();
456        if trained_params.is_none() {
457            let mut center = Array1::zeros(X.ncols());
458            let mut scale = Array1::ones(X.ncols());
459            for col in 0..X.ncols() {
460                let mut column_data: Vec<f64> = X.column(col).to_vec();
461                column_data.sort_by(|a, b| a.partial_cmp(b).expect("operation should succeed"));
462                let n = column_data.len();
463                if config.with_centering {
464                    center[col] = if n % 2 == 0 {
465                        (column_data[n / 2 - 1] + column_data[n / 2]) / 2.0
466                    } else {
467                        column_data[n / 2]
468                    };
469                }
470                if config.with_scaling {
471                    let q1_idx = ((n - 1) as f64 * config.quantile_range.0) as usize;
472                    let q3_idx = ((n - 1) as f64 * config.quantile_range.1) as usize;
473                    let iqr = column_data[q3_idx] - column_data[q1_idx];
474                    scale[col] = iqr.max(1e-8);
475                }
476            }
477            *trained_params = Some(RobustScalerParams { center, scale });
478        }
479        if let Some(ref params) = trained_params {
480            for col in 0..X.ncols() {
481                for row in 0..X.nrows() {
482                    if config.with_centering {
483                        result[[row, col]] -= params.center[col];
484                    }
485                    if config.with_scaling {
486                        result[[row, col]] /= params.scale[col];
487                    }
488                }
489            }
490        }
491        Ok(result)
492    }
493    fn apply_minmax_scaler(
494        &self,
495        config: &MinMaxScalerConfig,
496        trained_params: &mut Option<MinMaxScalerParams>,
497        X: ArrayView2<f64>,
498    ) -> Result<Array2<f64>> {
499        Self::apply_minmax_scaler_static(config, trained_params, X)
500    }
501    fn apply_minmax_scaler_static(
502        config: &MinMaxScalerConfig,
503        trained_params: &mut Option<MinMaxScalerParams>,
504        X: ArrayView2<f64>,
505    ) -> Result<Array2<f64>> {
506        let mut result = X.to_owned();
507        if trained_params.is_none() {
508            let mut min = Array1::zeros(X.ncols());
509            let mut scale = Array1::ones(X.ncols());
510            for col in 0..X.ncols() {
511                let column = X.column(col);
512                let col_min = column.iter().fold(f64::INFINITY, |a, &b| a.min(b));
513                let col_max = column.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
514                min[col] = col_min;
515                let range = col_max - col_min;
516                if range > 1e-8 {
517                    scale[col] = (config.feature_range.1 - config.feature_range.0) / range;
518                }
519            }
520            *trained_params = Some(MinMaxScalerParams { min, scale });
521        }
522        if let Some(ref params) = trained_params {
523            for col in 0..X.ncols() {
524                for row in 0..X.nrows() {
525                    let scaled = (result[[row, col]] - params.min[col]) * params.scale[col]
526                        + config.feature_range.0;
527                    result[[row, col]] = if config.clip {
528                        scaled
529                            .max(config.feature_range.0)
530                            .min(config.feature_range.1)
531                    } else {
532                        scaled
533                    };
534                }
535            }
536        }
537        Ok(result)
538    }
539    fn apply_feature_engineering_step_static(
540        _step: &mut FeatureEngineeringStep,
541        X: ArrayView2<f64>,
542        _y: ArrayView1<f64>,
543    ) -> Result<Array2<f64>> {
544        Ok(X.to_owned())
545    }
546    fn apply_feature_engineering_step(
547        &self,
548        step: &mut FeatureEngineeringStep,
549        X: ArrayView2<f64>,
550        _y: ArrayView1<f64>,
551    ) -> Result<Array2<f64>> {
552        match step {
553            FeatureEngineeringStep::PolynomialFeatures {
554                degree,
555                interaction_only,
556                include_bias,
557                feature_mapping,
558            } => self.apply_polynomial_features(
559                *degree,
560                *interaction_only,
561                *include_bias,
562                feature_mapping,
563                X,
564            ),
565            FeatureEngineeringStep::InteractionFeatures {
566                max_pairs,
567                threshold,
568                feature_pairs,
569            } => self.apply_interaction_features(*max_pairs, *threshold, feature_pairs, X),
570            FeatureEngineeringStep::BinningFeatures {
571                n_bins,
572                strategy,
573                bin_edges,
574            } => self.apply_binning_features(*n_bins, strategy, bin_edges, X),
575            _ => Ok(X.to_owned()),
576        }
577    }
578    fn apply_polynomial_features(
579        &self,
580        degree: usize,
581        interaction_only: bool,
582        include_bias: bool,
583        feature_mapping: &mut Option<Vec<(usize, usize)>>,
584        X: ArrayView2<f64>,
585    ) -> Result<Array2<f64>> {
586        let n_features = X.ncols();
587        let mut new_features = Vec::new();
588        let mut mapping = Vec::new();
589        if include_bias {
590            let bias_feature = Array1::ones(X.nrows());
591            new_features.push(bias_feature);
592            mapping.push((0, 0));
593        }
594        for i in 0..n_features {
595            new_features.push(X.column(i).to_owned());
596            mapping.push((i, 1));
597        }
598        if !interaction_only {
599            for d in 2..=degree {
600                for i in 0..n_features {
601                    let mut poly_feature = Array1::zeros(X.nrows());
602                    for row in 0..X.nrows() {
603                        poly_feature[row] = X[[row, i]].powi(d as i32);
604                    }
605                    new_features.push(poly_feature);
606                    mapping.push((i, d));
607                }
608            }
609        }
610        for d in 2..=degree {
611            for i in 0..n_features {
612                for j in (i + 1)..n_features {
613                    let mut interaction_feature = Array1::zeros(X.nrows());
614                    for row in 0..X.nrows() {
615                        interaction_feature[row] = X[[row, i]] * X[[row, j]];
616                    }
617                    new_features.push(interaction_feature);
618                    mapping.push((i * n_features + j, d));
619                }
620            }
621        }
622        *feature_mapping = Some(mapping);
623        let n_new_features = new_features.len();
624        let mut result = Array2::zeros((X.nrows(), n_new_features));
625        for (col, feature) in new_features.iter().enumerate() {
626            for row in 0..X.nrows() {
627                result[[row, col]] = feature[row];
628            }
629        }
630        Ok(result)
631    }
632    fn apply_interaction_features(
633        &self,
634        max_pairs: Option<usize>,
635        threshold: f64,
636        feature_pairs: &mut Option<Vec<(usize, usize)>>,
637        X: ArrayView2<f64>,
638    ) -> Result<Array2<f64>> {
639        let n_features = X.ncols();
640        let mut interactions = Vec::new();
641        let pairs: Vec<(usize, usize)>;
642        if feature_pairs.is_none() {
643            let mut candidate_pairs = Vec::new();
644            for i in 0..n_features {
645                for j in (i + 1)..n_features {
646                    let corr = self.compute_correlation(X.column(i), X.column(j));
647                    if corr.abs() > threshold {
648                        candidate_pairs.push((i, j, corr.abs()));
649                    }
650                }
651            }
652            candidate_pairs
653                .sort_by(|a, b| b.2.partial_cmp(&a.2).expect("operation should succeed"));
654            let limit = max_pairs.unwrap_or(candidate_pairs.len());
655            pairs = candidate_pairs
656                .into_iter()
657                .take(limit)
658                .map(|(i, j, _)| (i, j))
659                .collect();
660            *feature_pairs = Some(pairs.clone());
661        } else {
662            pairs = feature_pairs
663                .as_ref()
664                .expect("operation should succeed")
665                .clone();
666        }
667        for &(i, j) in &pairs {
668            let mut interaction = Array1::zeros(X.nrows());
669            for row in 0..X.nrows() {
670                interaction[row] = X[[row, i]] * X[[row, j]];
671            }
672            interactions.push(interaction);
673        }
674        let total_features = n_features + interactions.len();
675        let mut result = Array2::zeros((X.nrows(), total_features));
676        for col in 0..n_features {
677            for row in 0..X.nrows() {
678                result[[row, col]] = X[[row, col]];
679            }
680        }
681        for (idx, interaction) in interactions.iter().enumerate() {
682            for row in 0..X.nrows() {
683                result[[row, n_features + idx]] = interaction[row];
684            }
685        }
686        Ok(result)
687    }
688    fn apply_binning_features(
689        &self,
690        n_bins: usize,
691        strategy: &BinningStrategy,
692        bin_edges: &mut Option<HashMap<usize, Vec<f64>>>,
693        X: ArrayView2<f64>,
694    ) -> Result<Array2<f64>> {
695        let mut result = X.to_owned();
696        if bin_edges.is_none() {
697            let mut edges_map = HashMap::new();
698            for col in 0..X.ncols() {
699                let column = X.column(col);
700                let edges = match strategy {
701                    BinningStrategy::Uniform => {
702                        let min_val = column.iter().fold(f64::INFINITY, |a, &b| a.min(b));
703                        let max_val = column.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
704                        let step = (max_val - min_val) / n_bins as f64;
705                        (0..=n_bins)
706                            .map(|i| min_val + i as f64 * step)
707                            .collect::<Vec<f64>>()
708                    }
709                    BinningStrategy::Quantile => {
710                        let mut sorted_values: Vec<f64> = column.to_vec();
711                        sorted_values
712                            .sort_by(|a, b| a.partial_cmp(b).expect("operation should succeed"));
713                        let n = sorted_values.len();
714                        (0..=n_bins)
715                            .map(|i| {
716                                let quantile = i as f64 / n_bins as f64;
717                                let idx = ((n - 1) as f64 * quantile) as usize;
718                                sorted_values[idx]
719                            })
720                            .collect::<Vec<f64>>()
721                    }
722                    BinningStrategy::KMeans => {
723                        let min_val = column.iter().fold(f64::INFINITY, |a, &b| a.min(b));
724                        let max_val = column.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
725                        let step = (max_val - min_val) / n_bins as f64;
726                        (0..=n_bins)
727                            .map(|i| min_val + i as f64 * step)
728                            .collect::<Vec<f64>>()
729                    }
730                };
731                edges_map.insert(col, edges);
732            }
733            *bin_edges = Some(edges_map);
734        }
735        if let Some(ref edges_map) = bin_edges {
736            for col in 0..X.ncols() {
737                if let Some(edges) = edges_map.get(&col) {
738                    for row in 0..X.nrows() {
739                        let value = X[[row, col]];
740                        let bin = edges
741                            .iter()
742                            .position(|&edge| value <= edge)
743                            .unwrap_or(edges.len() - 1)
744                            .min(n_bins - 1);
745                        result[[row, col]] = bin as f64;
746                    }
747                }
748            }
749        }
750        Ok(result)
751    }
752    fn apply_selection_method_static(
753        _method: &mut SelectionMethod,
754        X: ArrayView2<f64>,
755        _y: ArrayView1<f64>,
756    ) -> Result<Array1<bool>> {
757        Ok(Array1::from_elem(X.ncols(), true))
758    }
759    fn apply_selection_method(
760        &self,
761        method: &mut SelectionMethod,
762        X: ArrayView2<f64>,
763        y: ArrayView1<f64>,
764    ) -> Result<Array1<bool>> {
765        match method {
766            SelectionMethod::VarianceThreshold {
767                threshold,
768                feature_variance,
769            } => self.apply_variance_threshold(*threshold, feature_variance, X),
770            SelectionMethod::CorrelationFilter {
771                threshold,
772                method: corr_method,
773                correlation_matrix,
774            } => self.apply_correlation_filter(*threshold, corr_method, correlation_matrix, X),
775            SelectionMethod::UnivariateFilter {
776                method: uni_method,
777                k,
778                score_func,
779            } => self.apply_univariate_filter(uni_method, k, score_func, X, y),
780            _ => Ok(Array1::from_elem(X.ncols(), true)),
781        }
782    }
783    fn apply_variance_threshold(
784        &self,
785        threshold: f64,
786        feature_variance: &mut Option<Array1<f64>>,
787        X: ArrayView2<f64>,
788    ) -> Result<Array1<bool>> {
789        if feature_variance.is_none() {
790            let mut variances = Array1::zeros(X.ncols());
791            for col in 0..X.ncols() {
792                variances[col] = X.column(col).var(1.0);
793            }
794            *feature_variance = Some(variances);
795        }
796        let variances = feature_variance.as_ref().expect("operation should succeed");
797        let selection = variances.mapv(|v| v > threshold);
798        Ok(selection)
799    }
800    fn apply_correlation_filter(
801        &self,
802        threshold: f64,
803        corr_method: &CorrelationMethod,
804        correlation_matrix: &mut Option<Array2<f64>>,
805        X: ArrayView2<f64>,
806    ) -> Result<Array1<bool>> {
807        if correlation_matrix.is_none() {
808            let n_features = X.ncols();
809            let mut corr_matrix = Array2::zeros((n_features, n_features));
810            for i in 0..n_features {
811                for j in 0..n_features {
812                    if i == j {
813                        corr_matrix[[i, j]] = 1.0;
814                    } else {
815                        let corr = match corr_method {
816                            CorrelationMethod::Pearson => {
817                                self.compute_correlation(X.column(i), X.column(j))
818                            }
819                            _ => self.compute_correlation(X.column(i), X.column(j)),
820                        };
821                        corr_matrix[[i, j]] = corr;
822                    }
823                }
824            }
825            *correlation_matrix = Some(corr_matrix);
826        }
827        let corr_matrix = correlation_matrix
828            .as_ref()
829            .expect("operation should succeed");
830        let mut selection = Array1::from_elem(X.ncols(), true);
831        for i in 0..X.ncols() {
832            for j in (i + 1)..X.ncols() {
833                if corr_matrix[[i, j]].abs() > threshold && selection[i] && selection[j] {
834                    let var_i = X.column(i).var(1.0);
835                    let var_j = X.column(j).var(1.0);
836                    if var_i < var_j {
837                        selection[i] = false;
838                    } else {
839                        selection[j] = false;
840                    }
841                }
842            }
843        }
844        Ok(selection)
845    }
846    fn apply_univariate_filter(
847        &self,
848        _method: &UnivariateMethod,
849        k: &SelectionCount,
850        score_func: &UnivariateScoreFunction,
851        X: ArrayView2<f64>,
852        y: ArrayView1<f64>,
853    ) -> Result<Array1<bool>> {
854        let mut scores = Array1::zeros(X.ncols());
855        for col in 0..X.ncols() {
856            scores[col] = match score_func {
857                UnivariateScoreFunction::Chi2 => self.compute_chi2_score(X.column(col), y),
858                UnivariateScoreFunction::FClassif => self.compute_f_score(X.column(col), y),
859                UnivariateScoreFunction::MutualInfoClassif => {
860                    self.compute_mutual_info(X.column(col), y)
861                }
862                _ => self.compute_correlation(X.column(col), y).abs(),
863            };
864        }
865        let selection = match k {
866            SelectionCount::K(k_val) => {
867                let mut indexed_scores: Vec<(usize, f64)> = scores
868                    .iter()
869                    .enumerate()
870                    .map(|(i, &score)| (i, score))
871                    .collect();
872                indexed_scores
873                    .sort_by(|a, b| b.1.partial_cmp(&a.1).expect("operation should succeed"));
874                let mut selection = Array1::from_elem(X.ncols(), false);
875                for &(idx, _) in indexed_scores.iter().take(*k_val) {
876                    selection[idx] = true;
877                }
878                selection
879            }
880            SelectionCount::Percentile(p) => {
881                let k_val = ((X.ncols() as f64 * p / 100.0).round() as usize).max(1);
882                let mut indexed_scores: Vec<(usize, f64)> = scores
883                    .iter()
884                    .enumerate()
885                    .map(|(i, &score)| (i, score))
886                    .collect();
887                indexed_scores
888                    .sort_by(|a, b| b.1.partial_cmp(&a.1).expect("operation should succeed"));
889                let mut selection = Array1::from_elem(X.ncols(), false);
890                for &(idx, _) in indexed_scores.iter().take(k_val) {
891                    selection[idx] = true;
892                }
893                selection
894            }
895            _ => {
896                let k_val = X.ncols() / 2;
897                let mut indexed_scores: Vec<(usize, f64)> = scores
898                    .iter()
899                    .enumerate()
900                    .map(|(i, &score)| (i, score))
901                    .collect();
902                indexed_scores
903                    .sort_by(|a, b| b.1.partial_cmp(&a.1).expect("operation should succeed"));
904                let mut selection = Array1::from_elem(X.ncols(), false);
905                for &(idx, _) in indexed_scores.iter().take(k_val) {
906                    selection[idx] = true;
907                }
908                selection
909            }
910        };
911        Ok(selection)
912    }
913    fn apply_dimensionality_reduction(
914        &self,
915        reduction: &mut DimensionalityReductionStep,
916        X: ArrayView2<f64>,
917    ) -> Result<Array2<f64>> {
918        match reduction {
919            DimensionalityReductionStep::PCA {
920                n_components,
921                whiten,
922                svd_solver,
923                components,
924                explained_variance,
925            } => self.apply_pca(
926                *n_components,
927                *whiten,
928                svd_solver,
929                components,
930                explained_variance,
931                X,
932            ),
933            DimensionalityReductionStep::TruncatedSVD {
934                n_components,
935                algorithm,
936                components,
937                singular_values,
938            } => self.apply_truncated_svd(*n_components, algorithm, components, singular_values, X),
939            _ => {
940                let n_comp = match reduction {
941                    DimensionalityReductionStep::ICA { n_components, .. } => *n_components,
942                    DimensionalityReductionStep::FactorAnalysis { n_components, .. } => {
943                        *n_components
944                    }
945                    DimensionalityReductionStep::UMAP { n_components, .. } => *n_components,
946                    DimensionalityReductionStep::TSNE { n_components, .. } => *n_components,
947                    _ => X.ncols().min(50),
948                };
949                let final_components = n_comp.min(X.ncols());
950                let mut result = Array2::zeros((X.nrows(), final_components));
951                for col in 0..final_components {
952                    for row in 0..X.nrows() {
953                        result[[row, col]] = X[[row, col]];
954                    }
955                }
956                Ok(result)
957            }
958        }
959    }
960    fn apply_pca(
961        &self,
962        n_components: usize,
963        _whiten: bool,
964        _svd_solver: &SVDSolver,
965        components: &mut Option<Array2<f64>>,
966        explained_variance: &mut Option<Array1<f64>>,
967        X: ArrayView2<f64>,
968    ) -> Result<Array2<f64>> {
969        let n_comp = n_components.min(X.ncols()).min(X.nrows());
970        let mut centered_X = X.to_owned();
971        let mut means = Array1::zeros(X.ncols());
972        for col in 0..X.ncols() {
973            means[col] = X.column(col).mean().unwrap_or(0.0);
974            for row in 0..X.nrows() {
975                centered_X[[row, col]] -= means[col];
976            }
977        }
978        if components.is_none() {
979            *components = Some(Array2::eye(X.ncols()));
980            *explained_variance = Some(Array1::ones(n_comp));
981        }
982        let mut result = Array2::zeros((X.nrows(), n_comp));
983        for col in 0..n_comp {
984            for row in 0..X.nrows() {
985                result[[row, col]] = centered_X[[row, col]];
986            }
987        }
988        Ok(result)
989    }
990    fn apply_truncated_svd(
991        &self,
992        n_components: usize,
993        _algorithm: &SVDAlgorithm,
994        components: &mut Option<Array2<f64>>,
995        singular_values: &mut Option<Array1<f64>>,
996        X: ArrayView2<f64>,
997    ) -> Result<Array2<f64>> {
998        let n_comp = n_components.min(X.ncols()).min(X.nrows());
999        if components.is_none() {
1000            *components = Some(Array2::eye(X.ncols()));
1001            *singular_values = Some(Array1::ones(n_comp));
1002        }
1003        let mut result = Array2::zeros((X.nrows(), n_comp));
1004        for col in 0..n_comp {
1005            for row in 0..X.nrows() {
1006                result[[row, col]] = X[[row, col]];
1007            }
1008        }
1009        Ok(result)
1010    }
1011    fn apply_model_selection(
1012        &self,
1013        model_selection: &mut ModelSelectionStep,
1014        X: ArrayView2<f64>,
1015        y: ArrayView1<f64>,
1016    ) -> Result<Vec<usize>> {
1017        match model_selection {
1018            ModelSelectionStep::CrossValidationSelection {
1019                estimator,
1020                cv_folds,
1021                scoring,
1022                feature_scores,
1023            } => self.apply_cv_selection(estimator, *cv_folds, scoring, feature_scores, X, y),
1024            ModelSelectionStep::ForwardSelection {
1025                estimator,
1026                max_features,
1027                scoring,
1028                selected_features,
1029            } => self.apply_forward_selection(
1030                estimator,
1031                *max_features,
1032                scoring,
1033                selected_features,
1034                X,
1035                y,
1036            ),
1037            _ => Ok((0..X.ncols()).collect()),
1038        }
1039    }
1040    fn apply_cv_selection(
1041        &self,
1042        _estimator: &ModelEstimator,
1043        _cv_folds: usize,
1044        _scoring: &ScoringMetric,
1045        feature_scores: &mut Option<Array1<f64>>,
1046        X: ArrayView2<f64>,
1047        y: ArrayView1<f64>,
1048    ) -> Result<Vec<usize>> {
1049        if feature_scores.is_none() {
1050            let mut scores = Array1::zeros(X.ncols());
1051            for col in 0..X.ncols() {
1052                scores[col] = self.compute_correlation(X.column(col), y).abs();
1053            }
1054            *feature_scores = Some(scores);
1055        }
1056        if let Some(ref scores) = feature_scores {
1057            let mut indexed_scores: Vec<(usize, f64)> = scores
1058                .iter()
1059                .enumerate()
1060                .map(|(i, &score)| (i, score))
1061                .collect();
1062            indexed_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).expect("operation should succeed"));
1063            let n_select = X.ncols() / 2;
1064            Ok(indexed_scores
1065                .into_iter()
1066                .take(n_select)
1067                .map(|(idx, _)| idx)
1068                .collect())
1069        } else {
1070            Ok((0..X.ncols()).collect())
1071        }
1072    }
1073    fn apply_forward_selection(
1074        &self,
1075        _estimator: &ModelEstimator,
1076        max_features: usize,
1077        _scoring: &ScoringMetric,
1078        selected_features: &mut Option<Vec<usize>>,
1079        X: ArrayView2<f64>,
1080        y: ArrayView1<f64>,
1081    ) -> Result<Vec<usize>> {
1082        if selected_features.is_none() {
1083            let mut scores = Vec::new();
1084            for col in 0..X.ncols() {
1085                let score = self.compute_correlation(X.column(col), y).abs();
1086                scores.push((col, score));
1087            }
1088            scores.sort_by(|a, b| b.1.partial_cmp(&a.1).expect("operation should succeed"));
1089            let features: Vec<usize> = scores
1090                .into_iter()
1091                .take(max_features.min(X.ncols()))
1092                .map(|(idx, _)| idx)
1093                .collect();
1094            *selected_features = Some(features.clone());
1095            Ok(features)
1096        } else {
1097            Ok(selected_features
1098                .as_ref()
1099                .expect("operation should succeed")
1100                .clone())
1101        }
1102    }
1103    fn compute_correlation(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
1104        let n = x.len() as f64;
1105        if n < 2.0 {
1106            return 0.0;
1107        }
1108        let mean_x = x.mean().unwrap_or(0.0);
1109        let mean_y = y.mean().unwrap_or(0.0);
1110        let mut sum_xy = 0.0;
1111        let mut sum_x2 = 0.0;
1112        let mut sum_y2 = 0.0;
1113        for i in 0..x.len() {
1114            let dx = x[i] - mean_x;
1115            let dy = y[i] - mean_y;
1116            sum_xy += dx * dy;
1117            sum_x2 += dx * dx;
1118            sum_y2 += dy * dy;
1119        }
1120        let denom = (sum_x2 * sum_y2).sqrt();
1121        if denom < 1e-10 {
1122            0.0
1123        } else {
1124            sum_xy / denom
1125        }
1126    }
1127    fn compute_chi2_score(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
1128        self.compute_correlation(x, y).abs()
1129    }
1130    fn compute_f_score(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
1131        self.compute_correlation(x, y).abs()
1132    }
1133    fn compute_mutual_info(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64 {
1134        self.compute_correlation(x, y).abs()
1135    }
1136}
1137impl FeatureSelectionPipeline<Trained> {
1138    pub fn transform(&self, X: ArrayView2<f64>) -> Result<Array2<f64>> {
1139        let _start_time = Instant::now();
1140        let current_X = X.to_owned();
1141        Ok(current_X)
1142    }
1143    /// Get information about the trained pipeline
1144    pub fn get_pipeline_info(&self) -> PipelineInfo {
1145        PipelineInfo {
1146            n_preprocessing_steps: self.preprocessing_steps.len(),
1147            n_feature_engineering_steps: self.feature_engineering_steps.len(),
1148            n_selection_methods: self.selection_methods.len(),
1149            has_dimensionality_reduction: self.dimensionality_reduction.is_some(),
1150            has_model_selection: self.model_selection.is_some(),
1151            config: self.pipeline_config.clone(),
1152        }
1153    }
1154}
1155#[derive(Debug, Clone)]
1156pub enum CorrelationMethod {
1157    /// Pearson
1158    Pearson,
1159    /// Spearman
1160    Spearman,
1161    /// Kendall
1162    Kendall,
1163}
1164#[derive(Debug, Clone)]
1165pub enum PowerMethod {
1166    /// YeoJohnson
1167    YeoJohnson,
1168    /// BoxCox
1169    BoxCox,
1170}
1171#[derive(Debug)]
1172pub enum StepParameters {
1173    /// Preprocessing
1174    Preprocessing(Box<dyn std::any::Any + Send + Sync>),
1175    /// FeatureEngineering
1176    FeatureEngineering(Box<dyn std::any::Any + Send + Sync>),
1177    /// Selection
1178    Selection(Array1<bool>),
1179    /// DimensionalityReduction
1180    DimensionalityReduction(Array2<f64>),
1181    /// ModelSelection
1182    ModelSelection(Vec<usize>),
1183}
1184#[derive(Debug, Clone)]
1185pub enum TransformationType {
1186    /// OneToOne
1187    OneToOne,
1188    /// OneToMany
1189    OneToMany,
1190    /// ManyToOne
1191    ManyToOne,
1192    /// ManyToMany
1193    ManyToMany,
1194}
1195#[derive(Debug, Clone)]
1196pub struct ValidationResults {
1197    pub cross_validation_scores: Vec<f64>,
1198    pub stability_scores: Vec<f64>,
1199    pub robustness_scores: Vec<f64>,
1200    pub statistical_significance: bool,
1201}
1202/// Selection method configuration with type safety
1203#[derive(Debug, Clone)]
1204pub enum SelectionMethod {
1205    /// UnivariateFilter
1206    UnivariateFilter {
1207        method: UnivariateMethod,
1208        k: SelectionCount,
1209        score_func: UnivariateScoreFunction,
1210    },
1211    /// RecursiveFeatureElimination
1212    RecursiveFeatureElimination {
1213        estimator: RFEEstimator,
1214        n_features: SelectionCount,
1215        step: f64,
1216        importance_getter: ImportanceGetter,
1217    },
1218    SelectFromModel {
1219        estimator: ModelEstimator,
1220        threshold: SelectionThreshold,
1221        prefit: bool,
1222        max_features: Option<usize>,
1223    },
1224    VarianceThreshold {
1225        threshold: f64,
1226        feature_variance: Option<Array1<f64>>,
1227    },
1228    CorrelationFilter {
1229        threshold: f64,
1230        method: CorrelationMethod,
1231        correlation_matrix: Option<Array2<f64>>,
1232    },
1233    MutualInformation {
1234        k: SelectionCount,
1235        discrete_features: Vec<bool>,
1236        random_state: Option<u64>,
1237    },
1238    LASSO {
1239        alpha: f64,
1240        max_iter: usize,
1241        tol: f64,
1242        coefficients: Option<Array1<f64>>,
1243    },
1244    ElasticNet {
1245        alpha: f64,
1246        l1_ratio: f64,
1247        max_iter: usize,
1248        tol: f64,
1249        coefficients: Option<Array1<f64>>,
1250    },
1251    TreeBased {
1252        estimator_type: TreeEstimatorType,
1253        n_estimators: usize,
1254        max_depth: Option<usize>,
1255        feature_importances: Option<Array1<f64>>,
1256    },
1257    GeneticAlgorithm {
1258        population_size: usize,
1259        n_generations: usize,
1260        mutation_rate: f64,
1261        crossover_rate: f64,
1262        best_individuals: Option<Vec<Vec<bool>>>,
1263    },
1264    ParticleSwarmOptimization {
1265        n_particles: usize,
1266        n_iterations: usize,
1267        inertia: f64,
1268        cognitive: f64,
1269        social: f64,
1270        best_positions: Option<Vec<Vec<f64>>>,
1271    },
1272    SimulatedAnnealing {
1273        initial_temp: f64,
1274        cooling_rate: f64,
1275        min_temp: f64,
1276        max_iter: usize,
1277        current_solution: Option<Vec<bool>>,
1278    },
1279}
1280#[derive(Debug, Clone)]
1281pub enum StepwiseDirection {
1282    /// Forward
1283    Forward,
1284    /// Backward
1285    Backward,
1286    /// Both
1287    Both,
1288}
1289#[derive(Debug, Clone)]
1290pub enum ImputationStrategy {
1291    /// Mean
1292    Mean,
1293    /// Median
1294    Median,
1295    /// Mode
1296    Mode,
1297    /// Constant
1298    Constant,
1299    /// KNN
1300    KNN,
1301    /// Iterative
1302    Iterative,
1303}
1304#[derive(Debug, Clone)]
1305pub enum UnivariateMethod {
1306    /// Chi2
1307    Chi2,
1308    /// ANOVA
1309    ANOVA,
1310    /// MutualInfo
1311    MutualInfo,
1312    /// Correlation
1313    Correlation,
1314}
1315#[derive(Debug, Clone)]
1316pub struct QuantileTransformerConfig {
1317    pub n_quantiles: usize,
1318    pub output_distribution: Distribution,
1319    pub subsample: Option<usize>,
1320}
1321#[derive(Debug, Clone)]
1322pub struct ImputerConfig {
1323    pub strategy: ImputationStrategy,
1324    pub fill_value: Option<f64>,
1325    pub missing_values: MissingValueIndicator,
1326}
1327#[derive(Debug, Clone)]
1328pub struct RobustScalerConfig {
1329    pub with_centering: bool,
1330    pub with_scaling: bool,
1331    pub quantile_range: (f64, f64),
1332}
1333/// Pipeline metadata for tracking execution and performance
1334#[derive(Debug, Clone)]
1335pub struct PipelineMetadata {
1336    pub total_training_time: Duration,
1337    pub total_transform_time: Duration,
1338    pub memory_usage_peak: usize,
1339    pub feature_reduction_ratio: f64,
1340    pub performance_metrics: HashMap<String, f64>,
1341    pub validation_results: Option<ValidationResults>,
1342}
1343/// Trained step information for pipeline state tracking
1344#[derive(Debug)]
1345pub struct TrainedStep {
1346    pub step_type: String,
1347    pub step_index: usize,
1348    pub training_time: Duration,
1349    pub feature_count_before: usize,
1350    pub feature_count_after: usize,
1351    pub parameters: StepParameters,
1352}
1353#[derive(Debug, Clone)]
1354pub enum OutlierMethod {
1355    /// IsolationForest
1356    IsolationForest,
1357    /// LocalOutlierFactor
1358    LocalOutlierFactor,
1359    /// OneClassSVM
1360    OneClassSVM,
1361    /// EllipticEnvelope
1362    EllipticEnvelope,
1363}
1364#[derive(Debug, Clone)]
1365pub enum UnivariateScoreFunction {
1366    /// Chi2
1367    Chi2,
1368    /// FClassif
1369    FClassif,
1370    /// FRegression
1371    FRegression,
1372    /// MutualInfoClassif
1373    MutualInfoClassif,
1374    /// MutualInfoRegression
1375    MutualInfoRegression,
1376}
1377#[derive(Debug, Clone)]
1378pub struct MinMaxScalerConfig {
1379    pub feature_range: (f64, f64),
1380    pub clip: bool,
1381}
1382#[derive(Debug, Clone)]
1383pub struct RobustScalerParams {
1384    pub center: Array1<f64>,
1385    pub scale: Array1<f64>,
1386}
1387/// Configuration for pipeline behavior
1388#[derive(Debug, Clone)]
1389pub struct PipelineConfiguration {
1390    pub parallel_execution: bool,
1391    pub memory_optimization: MemoryOptimization,
1392    pub caching_strategy: CachingStrategy,
1393    pub validation_strategy: ValidationStrategy,
1394    pub error_handling: ErrorHandling,
1395    pub logging_level: LoggingLevel,
1396}
1397#[derive(Debug, Clone)]
1398pub enum RFEEstimator {
1399    /// SVM
1400    SVM,
1401    /// RandomForest
1402    RandomForest,
1403    /// LinearRegression
1404    LinearRegression,
1405    /// LogisticRegression
1406    LogisticRegression,
1407}
1408#[derive(Debug, Clone)]
1409pub enum SVDSolver {
1410    /// Auto
1411    Auto,
1412    /// Full
1413    Full,
1414    /// Arpack
1415    Arpack,
1416    /// Randomized
1417    Randomized,
1418}
1419/// Dimensionality reduction step (applied after feature selection)
1420#[derive(Debug, Clone)]
1421pub enum DimensionalityReductionStep {
1422    /// PCA
1423    PCA {
1424        n_components: usize,
1425        whiten: bool,
1426        svd_solver: SVDSolver,
1427        components: Option<Array2<f64>>,
1428        explained_variance: Option<Array1<f64>>,
1429    },
1430    /// TruncatedSVD
1431    TruncatedSVD {
1432        n_components: usize,
1433        algorithm: SVDAlgorithm,
1434        components: Option<Array2<f64>>,
1435        singular_values: Option<Array1<f64>>,
1436    },
1437    ICA {
1438        n_components: usize,
1439        algorithm: ICAAlgorithm,
1440        max_iter: usize,
1441        tol: f64,
1442        mixing_matrix: Option<Array2<f64>>,
1443        unmixing_matrix: Option<Array2<f64>>,
1444    },
1445    FactorAnalysis {
1446        n_components: usize,
1447        max_iter: usize,
1448        tol: f64,
1449        loadings: Option<Array2<f64>>,
1450        noise_variance: Option<Array1<f64>>,
1451    },
1452    UMAP {
1453        n_components: usize,
1454        n_neighbors: usize,
1455        min_dist: f64,
1456        metric: DistanceMetric,
1457        embedding: Option<Array2<f64>>,
1458    },
1459    TSNE {
1460        n_components: usize,
1461        perplexity: f64,
1462        early_exaggeration: f64,
1463        learning_rate: f64,
1464        max_iter: usize,
1465        embedding: Option<Array2<f64>>,
1466    },
1467}
1468/// Feature engineering steps for creating new features
1469#[derive(Debug, Clone)]
1470pub enum FeatureEngineeringStep {
1471    /// PolynomialFeatures
1472    PolynomialFeatures {
1473        degree: usize,
1474        interaction_only: bool,
1475        include_bias: bool,
1476        feature_mapping: Option<Vec<(usize, usize)>>,
1477    },
1478    /// InteractionFeatures
1479    InteractionFeatures {
1480        max_pairs: Option<usize>,
1481        threshold: f64,
1482        feature_pairs: Option<Vec<(usize, usize)>>,
1483    },
1484    BinningFeatures {
1485        n_bins: usize,
1486        strategy: BinningStrategy,
1487        bin_edges: Option<HashMap<usize, Vec<f64>>>,
1488    },
1489    TargetEncoding {
1490        smoothing: f64,
1491        min_samples_leaf: usize,
1492        encodings: Option<HashMap<usize, HashMap<String, f64>>>,
1493    },
1494    FrequencyEncoding {
1495        min_frequency: f64,
1496        frequencies: Option<HashMap<usize, HashMap<String, f64>>>,
1497    },
1498    RatioFeatures {
1499        numerator_features: Vec<usize>,
1500        denominator_features: Vec<usize>,
1501        eps: f64,
1502    },
1503    LaggingFeatures {
1504        lags: Vec<usize>,
1505        feature_subset: Option<Vec<usize>>,
1506    },
1507    WindowStatistics {
1508        window_size: usize,
1509        statistics: Vec<WindowStatistic>,
1510        feature_subset: Option<Vec<usize>>,
1511    },
1512}
1513#[derive(Debug, Clone)]
1514pub struct PowerTransformerConfig {
1515    pub method: PowerMethod,
1516    pub standardize: bool,
1517}
1518#[derive(Debug, Clone)]
1519pub struct OutlierConfig {
1520    pub method: OutlierMethod,
1521    pub threshold: f64,
1522    pub contamination: f64,
1523}
1524#[derive(Debug, Clone)]
1525pub enum BinningStrategy {
1526    /// Uniform
1527    Uniform,
1528    /// Quantile
1529    Quantile,
1530    /// KMeans
1531    KMeans,
1532}
1533#[derive(Debug, Clone)]
1534pub struct QuantileParams {
1535    pub quantiles: Array2<f64>,
1536    pub references: Array1<f64>,
1537}
1538#[derive(Debug, Clone)]
1539pub enum Distribution {
1540    /// Uniform
1541    Uniform,
1542    /// Normal
1543    Normal,
1544}
1545#[derive(Debug, Clone)]
1546pub struct MinMaxScalerParams {
1547    pub min: Array1<f64>,
1548    pub scale: Array1<f64>,
1549}
1550#[derive(Debug, Clone)]
1551pub struct OutlierParams {
1552    pub decision_function: Array1<f64>,
1553    pub threshold: f64,
1554}
1555#[derive(Debug, Clone)]
1556pub enum ScoringMetric {
1557    /// Accuracy
1558    Accuracy,
1559    /// F1
1560    F1,
1561    /// RocAuc
1562    RocAuc,
1563    /// R2
1564    R2,
1565    /// MAE
1566    MAE,
1567    /// MSE
1568    MSE,
1569    /// LogLoss
1570    LogLoss,
1571}
1572/// Information about a trained pipeline
1573#[derive(Debug, Clone)]
1574pub struct PipelineInfo {
1575    pub n_preprocessing_steps: usize,
1576    pub n_feature_engineering_steps: usize,
1577    pub n_selection_methods: usize,
1578    pub has_dimensionality_reduction: bool,
1579    pub has_model_selection: bool,
1580    pub config: PipelineConfiguration,
1581}
1582#[derive(Debug, Clone)]
1583pub enum SVDAlgorithm {
1584    /// Randomized
1585    Randomized,
1586    /// Arpack
1587    Arpack,
1588}
1589#[derive(Debug, Clone)]
1590pub enum PrefetchStrategy {
1591    None,
1592    /// Sequential
1593    Sequential,
1594    /// Random
1595    Random,
1596    /// Adaptive
1597    Adaptive,
1598}
1599#[derive(Debug, Clone)]
1600pub struct PowerParams {
1601    pub lambdas: Array1<f64>,
1602}
1603/// Model selection step for choosing optimal features for specific models
1604#[derive(Debug, Clone)]
1605pub enum ModelSelectionStep {
1606    /// CrossValidationSelection
1607    CrossValidationSelection {
1608        estimator: ModelEstimator,
1609        cv_folds: usize,
1610        scoring: ScoringMetric,
1611        feature_scores: Option<Array1<f64>>,
1612    },
1613    /// ForwardSelection
1614    ForwardSelection {
1615        estimator: ModelEstimator,
1616        max_features: usize,
1617        scoring: ScoringMetric,
1618        selected_features: Option<Vec<usize>>,
1619    },
1620    BackwardElimination {
1621        estimator: ModelEstimator,
1622        min_features: usize,
1623        scoring: ScoringMetric,
1624        remaining_features: Option<Vec<usize>>,
1625    },
1626    StepwiseSelection {
1627        estimator: ModelEstimator,
1628        direction: StepwiseDirection,
1629        p_enter: f64,
1630        p_remove: f64,
1631        selected_features: Option<Vec<usize>>,
1632    },
1633    BayesianOptimization {
1634        estimator: ModelEstimator,
1635        acquisition_function: AcquisitionFunction,
1636        n_calls: usize,
1637        optimal_features: Option<Vec<usize>>,
1638    },
1639}
1640#[derive(Debug, Clone)]
1641pub struct TransformationStep {
1642    pub step_name: String,
1643    pub input_features: usize,
1644    pub output_features: usize,
1645    pub transformation_type: TransformationType,
1646}
1647#[derive(Debug, Clone)]
1648pub enum ImportanceGetter {
1649    /// Auto
1650    Auto,
1651    /// Coefficients
1652    Coefficients,
1653    /// FeatureImportances
1654    FeatureImportances,
1655}
1656#[derive(Debug, Clone)]
1657pub enum ICAAlgorithm {
1658    /// Parallel
1659    Parallel,
1660    /// Deflation
1661    Deflation,
1662}
1663#[derive(Debug, Clone)]
1664pub enum ErrorHandling {
1665    /// Strict
1666    Strict,
1667    /// Graceful
1668    Graceful,
1669    /// Logging
1670    Logging,
1671}
1672/// Individual preprocessing step in the pipeline
1673#[derive(Debug, Clone)]
1674pub enum PreprocessingStep {
1675    /// StandardScaler
1676    StandardScaler {
1677        config: StandardScalerConfig,
1678        trained_params: Option<ScalerParams>,
1679    },
1680    /// RobustScaler
1681    RobustScaler {
1682        config: RobustScalerConfig,
1683        trained_params: Option<RobustScalerParams>,
1684    },
1685    /// MinMaxScaler
1686    MinMaxScaler {
1687        config: MinMaxScalerConfig,
1688        trained_params: Option<MinMaxScalerParams>,
1689    },
1690    QuantileTransformer {
1691        config: QuantileTransformerConfig,
1692        trained_params: Option<QuantileParams>,
1693    },
1694    PowerTransformer {
1695        config: PowerTransformerConfig,
1696        trained_params: Option<PowerParams>,
1697    },
1698    MissingValueImputer {
1699        config: ImputerConfig,
1700        trained_params: Option<ImputerParams>,
1701    },
1702    OutlierRemover {
1703        config: OutlierConfig,
1704        trained_params: Option<OutlierParams>,
1705    },
1706}
1707/// Type-safe state markers for compile-time pipeline validation
1708#[derive(Debug, Clone, Default)]
1709pub struct Untrained;
1710#[derive(Debug, Clone)]
1711pub enum ModelEstimator {
1712    /// LinearRegression
1713    LinearRegression,
1714    /// LogisticRegression
1715    LogisticRegression,
1716    /// RandomForest
1717    RandomForest,
1718    /// SVM
1719    SVM,
1720    /// XGBoost
1721    XGBoost,
1722    /// LightGBM
1723    LightGBM,
1724}
1725#[derive(Debug, Clone)]
1726pub enum AcquisitionFunction {
1727    /// ExpectedImprovement
1728    ExpectedImprovement,
1729    /// UpperConfidenceBound
1730    UpperConfidenceBound,
1731    /// ProbabilityOfImprovement
1732    ProbabilityOfImprovement,
1733}
1734/// Type-safe selection threshold specification
1735#[derive(Debug, Clone)]
1736pub enum SelectionThreshold {
1737    /// Mean
1738    Mean,
1739    /// Median
1740    Median,
1741    /// Absolute
1742    Absolute(f64),
1743    /// Percentile
1744    Percentile(f64),
1745    /// Auto
1746    Auto,
1747}
1748/// Type-safe selection count specification
1749#[derive(Debug, Clone)]
1750pub enum SelectionCount {
1751    /// K
1752    K(usize),
1753    /// Percentile
1754    Percentile(f64),
1755    /// FDR
1756    FDR(f64),
1757    /// FPR
1758    FPR(f64),
1759    /// FWER
1760    FWER(f64),
1761}
1762#[derive(Debug, Clone)]
1763pub struct ImputerParams {
1764    pub statistics: Array1<f64>,
1765}
1766#[derive(Debug, Clone)]
1767pub enum FeatureOrigin {
1768    /// Original
1769    Original(usize),
1770    /// Engineered
1771    Engineered {
1772        source_features: Vec<usize>,
1773        operation: String,
1774    },
1775    /// Transformed
1776    Transformed {
1777        source_feature: usize,
1778        transformation: String,
1779    },
1780}
sklears_feature_selection/pipeline/types.rs

sklears_feature_selection/pipeline/
types.rs