Skip to main content

sklears_model_selection/
automl_feature_engineering.rs

1//! Automated Feature Engineering for AutoML
2//!
3//! This module provides comprehensive automated feature engineering capabilities including
4//! feature generation, selection, transformation, and optimization. It automatically creates
5//! and selects the best features for different machine learning tasks.
6
7use crate::{automl_algorithm_selection::DatasetCharacteristics, scoring::TaskType};
8use scirs2_core::ndarray::{concatenate, s, Array1, Array2, ArrayView1, Axis};
9use scirs2_core::SliceRandomExt;
10use sklears_core::error::Result;
11use std::collections::HashMap;
12use std::fmt;
13// use serde::{Deserialize, Serialize};
14// use scirs2_core::rand_prelude::SliceRandom;
15use scirs2_core::random::rngs::StdRng;
16use scirs2_core::random::{RngExt, SeedableRng};
17
18/// Types of feature transformations
19#[derive(Debug, Clone, PartialEq, Eq, Hash)]
20pub enum FeatureTransformationType {
21    /// Polynomial features (degree 2, 3, etc.)
22    Polynomial { degree: usize },
23    /// Logarithmic transformation
24    Logarithmic,
25    /// Square root transformation
26    SquareRoot,
27    /// Exponential transformation
28    Exponential,
29    /// Reciprocal transformation (1/x)
30    Reciprocal,
31    /// Sine transformation
32    Sine,
33    /// Cosine transformation
34    Cosine,
35    /// Absolute value
36    Absolute,
37    /// Sign function (-1, 0, 1)
38    Sign,
39    /// Binning/Discretization
40    Binning { n_bins: usize },
41    /// Feature interactions (products)
42    Interaction,
43    /// Feature ratios
44    Ratio,
45    /// Feature differences
46    Difference,
47    /// Rolling statistics
48    RollingStatistics { window: usize },
49    /// Lag features
50    Lag { lag: usize },
51}
52
53impl fmt::Display for FeatureTransformationType {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        match self {
56            FeatureTransformationType::Polynomial { degree } => write!(f, "Polynomial({})", degree),
57            FeatureTransformationType::Logarithmic => write!(f, "Logarithmic"),
58            FeatureTransformationType::SquareRoot => write!(f, "SquareRoot"),
59            FeatureTransformationType::Exponential => write!(f, "Exponential"),
60            FeatureTransformationType::Reciprocal => write!(f, "Reciprocal"),
61            FeatureTransformationType::Sine => write!(f, "Sine"),
62            FeatureTransformationType::Cosine => write!(f, "Cosine"),
63            FeatureTransformationType::Absolute => write!(f, "Absolute"),
64            FeatureTransformationType::Sign => write!(f, "Sign"),
65            FeatureTransformationType::Binning { n_bins } => write!(f, "Binning({})", n_bins),
66            FeatureTransformationType::Interaction => write!(f, "Interaction"),
67            FeatureTransformationType::Ratio => write!(f, "Ratio"),
68            FeatureTransformationType::Difference => write!(f, "Difference"),
69            FeatureTransformationType::RollingStatistics { window } => {
70                write!(f, "RollingStats({})", window)
71            }
72            FeatureTransformationType::Lag { lag } => write!(f, "Lag({})", lag),
73        }
74    }
75}
76
77/// Feature engineering strategies
78#[derive(Debug, Clone, PartialEq)]
79pub enum FeatureEngineeringStrategy {
80    /// Conservative: Only basic transformations
81    Conservative,
82    /// Balanced: Moderate feature generation
83    Balanced,
84    /// Aggressive: Extensive feature generation
85    Aggressive,
86    /// Custom: User-defined transformations
87    Custom(Vec<FeatureTransformationType>),
88}
89
90/// Feature selection methods
91#[derive(Debug, Clone, PartialEq)]
92pub enum FeatureSelectionMethod {
93    /// Select K best features using univariate statistics
94    SelectKBest { k: usize },
95    /// Select percentile of best features
96    SelectPercentile { percentile: f64 },
97    /// Recursive feature elimination
98    RecursiveFeatureElimination { step: usize },
99    /// L1-based feature selection
100    L1Selection { alpha: f64 },
101    /// Variance threshold
102    VarianceThreshold { threshold: f64 },
103    /// Correlation threshold
104    CorrelationThreshold { threshold: f64 },
105    /// Mutual information
106    MutualInformation { k: usize },
107    /// Feature importance from tree models
108    TreeImportance { threshold: f64 },
109}
110
111/// Generated feature information
112#[derive(Debug, Clone)]
113pub struct GeneratedFeature {
114    /// Feature name
115    pub name: String,
116    /// Transformation type used
117    pub transformation: FeatureTransformationType,
118    /// Source feature indices
119    pub source_features: Vec<usize>,
120    /// Feature importance score
121    pub importance_score: f64,
122    /// Whether the feature is selected
123    pub is_selected: bool,
124    /// Statistical properties
125    pub statistics: FeatureStatistics,
126}
127
128/// Statistical properties of a feature
129#[derive(Debug, Clone)]
130pub struct FeatureStatistics {
131    /// Mean value
132    pub mean: f64,
133    /// Standard deviation
134    pub std: f64,
135    /// Minimum value
136    pub min: f64,
137    /// Maximum value
138    pub max: f64,
139    /// Number of unique values
140    pub n_unique: usize,
141    /// Missing value ratio
142    pub missing_ratio: f64,
143    /// Skewness
144    pub skewness: f64,
145    /// Kurtosis
146    pub kurtosis: f64,
147}
148
149/// Configuration for automated feature engineering
150#[derive(Debug, Clone)]
151pub struct AutoFeatureEngineering {
152    /// Feature engineering strategy
153    pub strategy: FeatureEngineeringStrategy,
154    /// Feature selection method
155    pub selection_method: FeatureSelectionMethod,
156    /// Maximum number of features to generate
157    pub max_features: usize,
158    /// Maximum number of features to select
159    pub max_selected_features: usize,
160    /// Cross-validation folds for feature selection
161    pub cv_folds: usize,
162    /// Task type (classification or regression)
163    pub task_type: TaskType,
164    /// Random seed for reproducibility
165    pub random_seed: Option<u64>,
166    /// Enable polynomial features
167    pub enable_polynomial: bool,
168    /// Enable mathematical transformations
169    pub enable_math_transforms: bool,
170    /// Enable feature interactions
171    pub enable_interactions: bool,
172    /// Enable time series features (for sequential data)
173    pub enable_time_series: bool,
174    /// Minimum correlation threshold for feature selection
175    pub min_correlation_threshold: f64,
176    /// Maximum correlation threshold for redundancy removal
177    pub max_correlation_threshold: f64,
178}
179
180impl Default for AutoFeatureEngineering {
181    fn default() -> Self {
182        Self {
183            strategy: FeatureEngineeringStrategy::Balanced,
184            selection_method: FeatureSelectionMethod::SelectKBest { k: 100 },
185            max_features: 1000,
186            max_selected_features: 100,
187            cv_folds: 5,
188            task_type: TaskType::Classification,
189            random_seed: None,
190            enable_polynomial: true,
191            enable_math_transforms: true,
192            enable_interactions: true,
193            enable_time_series: false,
194            min_correlation_threshold: 0.05,
195            max_correlation_threshold: 0.95,
196        }
197    }
198}
199
200/// Result of feature engineering process
201#[derive(Debug, Clone)]
202pub struct FeatureEngineeringResult {
203    /// Original feature count
204    pub original_feature_count: usize,
205    /// Generated feature count
206    pub generated_feature_count: usize,
207    /// Selected feature count
208    pub selected_feature_count: usize,
209    /// Generated features
210    pub generated_features: Vec<GeneratedFeature>,
211    /// Selected feature indices
212    pub selected_indices: Vec<usize>,
213    /// Feature importance scores
214    pub feature_importances: Vec<f64>,
215    /// Transformation matrix for new data
216    pub transformation_info: TransformationInfo,
217    /// Performance improvement
218    pub performance_improvement: f64,
219    /// Processing time
220    pub processing_time: f64,
221}
222
223/// Information needed to transform new data
224#[derive(Debug, Clone)]
225pub struct TransformationInfo {
226    /// Transformation types to apply
227    pub transformations: Vec<(FeatureTransformationType, Vec<usize>)>,
228    /// Selected feature indices after transformation
229    pub selected_indices: Vec<usize>,
230    /// Scaling parameters
231    pub scaling_params: HashMap<usize, (f64, f64)>, // (mean, std) for each feature
232    /// Binning boundaries
233    pub binning_boundaries: HashMap<usize, Vec<f64>>,
234}
235
236impl fmt::Display for FeatureEngineeringResult {
237    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
238        writeln!(f, "Automated Feature Engineering Results")?;
239        writeln!(f, "=====================================")?;
240        writeln!(f, "Original features: {}", self.original_feature_count)?;
241        writeln!(f, "Generated features: {}", self.generated_feature_count)?;
242        writeln!(f, "Selected features: {}", self.selected_feature_count)?;
243        writeln!(
244            f,
245            "Performance improvement: {:.4}",
246            self.performance_improvement
247        )?;
248        writeln!(f, "Processing time: {:.2}s", self.processing_time)?;
249        writeln!(f)?;
250        writeln!(f, "Top 10 Generated Features:")?;
251
252        let mut top_features: Vec<_> = self
253            .generated_features
254            .iter()
255            .filter(|f| f.is_selected)
256            .collect();
257        top_features.sort_by(|a, b| {
258            b.importance_score
259                .partial_cmp(&a.importance_score)
260                .expect("operation should succeed")
261        });
262
263        for (i, feature) in top_features.iter().take(10).enumerate() {
264            writeln!(
265                f,
266                "{}. {} ({}): {:.4}",
267                i + 1,
268                feature.name,
269                feature.transformation,
270                feature.importance_score
271            )?;
272        }
273        Ok(())
274    }
275}
276
277/// Automated feature engineering engine
278pub struct AutoFeatureEngineer {
279    config: AutoFeatureEngineering,
280    rng: StdRng,
281}
282
283impl Default for AutoFeatureEngineer {
284    fn default() -> Self {
285        Self::new(AutoFeatureEngineering::default())
286    }
287}
288
289impl AutoFeatureEngineer {
290    /// Create a new automated feature engineer
291    pub fn new(config: AutoFeatureEngineering) -> Self {
292        let rng = match config.random_seed {
293            Some(seed) => StdRng::seed_from_u64(seed),
294            None => StdRng::from_rng(&mut scirs2_core::random::thread_rng()),
295        };
296
297        Self { config, rng }
298    }
299
300    /// Perform automated feature engineering
301    pub fn engineer_features(
302        &mut self,
303        X: &Array2<f64>,
304        y: &Array1<f64>,
305    ) -> Result<FeatureEngineeringResult> {
306        let start_time = std::time::Instant::now();
307        let original_feature_count = X.ncols();
308
309        // Step 1: Analyze dataset characteristics
310        let dataset_chars = self.analyze_dataset_for_features(X, y);
311
312        // Step 2: Generate candidate transformations
313        let transformations = self.generate_transformations(&dataset_chars);
314
315        // Step 3: Apply transformations and generate features
316        let (enhanced_X, generated_features) = self.apply_transformations(X, &transformations)?;
317
318        // Step 4: Calculate feature statistics
319        let features_with_stats =
320            self.calculate_feature_statistics(generated_features, &enhanced_X, y);
321
322        // Step 5: Select best features
323        let (selected_features, selected_indices) =
324            self.select_features(&enhanced_X, y, features_with_stats)?;
325
326        // Step 6: Calculate performance improvement
327        let performance_improvement =
328            self.estimate_performance_improvement(X, &enhanced_X, y, &selected_indices)?;
329
330        // Step 7: Create transformation info for future use
331        let transformation_info =
332            self.create_transformation_info(&transformations, &selected_indices, &enhanced_X);
333
334        let processing_time = start_time.elapsed().as_secs_f64();
335
336        Ok(FeatureEngineeringResult {
337            original_feature_count,
338            generated_feature_count: enhanced_X.ncols(),
339            selected_feature_count: selected_indices.len(),
340            generated_features: selected_features,
341            selected_indices: selected_indices.clone(),
342            feature_importances: vec![0.0; selected_indices.len()], // Will be filled by actual importance calculation
343            transformation_info,
344            performance_improvement,
345            processing_time,
346        })
347    }
348
349    /// Transform new data using learned transformations
350    pub fn transform(
351        &self,
352        X: &Array2<f64>,
353        transformation_info: &TransformationInfo,
354    ) -> Result<Array2<f64>> {
355        // Apply the same transformations that were learned during training
356        let mut transformed_X = X.clone();
357
358        // Apply transformations
359        for (transformation, source_indices) in &transformation_info.transformations {
360            let new_features =
361                self.apply_single_transformation(&transformed_X, transformation, source_indices)?;
362            // Concatenate new features
363            transformed_X = concatenate(Axis(1), &[transformed_X.view(), new_features.view()])
364                .expect("operation should succeed");
365        }
366
367        // Apply scaling
368        for (feature_idx, (mean, std)) in &transformation_info.scaling_params {
369            if *feature_idx < transformed_X.ncols() {
370                let mut column = transformed_X.column_mut(*feature_idx);
371                for value in column.iter_mut() {
372                    *value = (*value - mean) / std;
373                }
374            }
375        }
376
377        // Select final features with bounds checking
378        let valid_indices: Vec<usize> = transformation_info
379            .selected_indices
380            .iter()
381            .filter(|&&idx| idx < transformed_X.ncols())
382            .copied()
383            .collect();
384
385        if valid_indices.is_empty() {
386            return Err("No valid feature indices to select".into());
387        }
388
389        let selected_X = transformed_X.select(Axis(1), &valid_indices);
390        Ok(selected_X)
391    }
392
393    /// Analyze dataset characteristics for feature engineering
394    fn analyze_dataset_for_features(
395        &self,
396        X: &Array2<f64>,
397        y: &Array1<f64>,
398    ) -> DatasetCharacteristics {
399        let n_samples = X.nrows();
400        let n_features = X.ncols();
401
402        // Calculate basic statistics
403        let sparsity = self.calculate_sparsity(X);
404        let correlation_structure = self.analyze_correlation_structure(X);
405        let linearity_score = self.estimate_linearity(X, y);
406
407        // Task-specific analysis
408        let (n_classes, class_distribution, target_stats) = match self.config.task_type {
409            TaskType::Classification => {
410                let classes = self.get_unique_classes(y);
411                let class_dist = self.calculate_class_distribution(y, &classes);
412                (Some(classes.len()), Some(class_dist), None)
413            }
414            TaskType::Regression => {
415                let stats = crate::automl_algorithm_selection::TargetStatistics {
416                    mean: y.mean().expect("operation should succeed"),
417                    std: y.std(0.0),
418                    skewness: 0.0, // Would calculate actual skewness
419                    kurtosis: 0.0, // Would calculate actual kurtosis
420                    n_outliers: 0, // Would detect outliers
421                };
422                (None, None, Some(stats))
423            }
424        };
425
426        crate::automl_algorithm_selection::DatasetCharacteristics {
427            n_samples,
428            n_features,
429            n_classes,
430            class_distribution,
431            target_stats,
432            missing_ratio: 0.0,     // Would calculate actual missing ratio
433            categorical_ratio: 0.0, // Would detect categorical features
434            correlation_condition_number: correlation_structure,
435            sparsity,
436            effective_dimensionality: Some((n_features as f64 * 0.8) as usize),
437            noise_level: 0.1, // Would estimate actual noise
438            linearity_score,
439        }
440    }
441
442    /// Generate appropriate transformations based on dataset characteristics
443    fn generate_transformations(
444        &mut self,
445        dataset_chars: &DatasetCharacteristics,
446    ) -> Vec<FeatureTransformationType> {
447        let mut transformations = Vec::new();
448
449        match &self.config.strategy {
450            FeatureEngineeringStrategy::Conservative => {
451                if self.config.enable_polynomial {
452                    transformations.push(FeatureTransformationType::Polynomial { degree: 2 });
453                }
454                if self.config.enable_math_transforms {
455                    transformations.push(FeatureTransformationType::Logarithmic);
456                    transformations.push(FeatureTransformationType::SquareRoot);
457                }
458            }
459
460            FeatureEngineeringStrategy::Balanced => {
461                if self.config.enable_polynomial {
462                    transformations.push(FeatureTransformationType::Polynomial { degree: 2 });
463                    if dataset_chars.n_features < 20 {
464                        transformations.push(FeatureTransformationType::Polynomial { degree: 3 });
465                    }
466                }
467
468                if self.config.enable_math_transforms {
469                    transformations.extend(vec![
470                        FeatureTransformationType::Logarithmic,
471                        FeatureTransformationType::SquareRoot,
472                        FeatureTransformationType::Absolute,
473                        FeatureTransformationType::Reciprocal,
474                    ]);
475                }
476
477                if self.config.enable_interactions && dataset_chars.n_features < 50 {
478                    transformations.push(FeatureTransformationType::Interaction);
479                    transformations.push(FeatureTransformationType::Ratio);
480                }
481
482                transformations.push(FeatureTransformationType::Binning { n_bins: 10 });
483            }
484
485            FeatureEngineeringStrategy::Aggressive => {
486                if self.config.enable_polynomial {
487                    transformations.push(FeatureTransformationType::Polynomial { degree: 2 });
488                    if dataset_chars.n_features < 15 {
489                        transformations.push(FeatureTransformationType::Polynomial { degree: 3 });
490                    }
491                }
492
493                if self.config.enable_math_transforms {
494                    transformations.extend(vec![
495                        FeatureTransformationType::Logarithmic,
496                        FeatureTransformationType::SquareRoot,
497                        FeatureTransformationType::Exponential,
498                        FeatureTransformationType::Reciprocal,
499                        FeatureTransformationType::Sine,
500                        FeatureTransformationType::Cosine,
501                        FeatureTransformationType::Absolute,
502                        FeatureTransformationType::Sign,
503                    ]);
504                }
505
506                if self.config.enable_interactions {
507                    transformations.push(FeatureTransformationType::Interaction);
508                    transformations.push(FeatureTransformationType::Ratio);
509                    transformations.push(FeatureTransformationType::Difference);
510                }
511
512                transformations.extend(vec![
513                    FeatureTransformationType::Binning { n_bins: 5 },
514                    FeatureTransformationType::Binning { n_bins: 10 },
515                    FeatureTransformationType::Binning { n_bins: 20 },
516                ]);
517
518                if self.config.enable_time_series {
519                    transformations.extend(vec![
520                        FeatureTransformationType::RollingStatistics { window: 3 },
521                        FeatureTransformationType::RollingStatistics { window: 5 },
522                        FeatureTransformationType::Lag { lag: 1 },
523                        FeatureTransformationType::Lag { lag: 2 },
524                    ]);
525                }
526            }
527
528            FeatureEngineeringStrategy::Custom(custom_transforms) => {
529                transformations.extend(custom_transforms.clone());
530            }
531        }
532
533        // Randomly shuffle transformations for diversity
534        transformations.shuffle(&mut self.rng);
535
536        // Limit to max_features constraint
537        let max_transforms = (self.config.max_features / dataset_chars.n_features).max(1);
538        transformations.truncate(max_transforms);
539
540        transformations
541    }
542
543    /// Apply transformations to generate new features
544    fn apply_transformations(
545        &mut self,
546        X: &Array2<f64>,
547        transformations: &[FeatureTransformationType],
548    ) -> Result<(Array2<f64>, Vec<GeneratedFeature>)> {
549        let mut enhanced_X = X.clone();
550        let mut generated_features = Vec::new();
551
552        // Start with original features
553        for i in 0..X.ncols() {
554            generated_features.push(GeneratedFeature {
555                name: format!("original_feature_{}", i),
556                transformation: FeatureTransformationType::Absolute, // Placeholder
557                source_features: vec![i],
558                importance_score: 0.0,
559                is_selected: false,
560                statistics: FeatureStatistics {
561                    mean: 0.0,
562                    std: 0.0,
563                    min: 0.0,
564                    max: 0.0,
565                    n_unique: 0,
566                    missing_ratio: 0.0,
567                    skewness: 0.0,
568                    kurtosis: 0.0,
569                },
570            });
571        }
572
573        for transformation in transformations {
574            let source_indices: Vec<usize> = match transformation {
575                FeatureTransformationType::Interaction
576                | FeatureTransformationType::Ratio
577                | FeatureTransformationType::Difference => {
578                    // Select pairs of features
579                    self.select_feature_pairs(X.ncols())
580                }
581                _ => {
582                    // Use all original features
583                    (0..X.ncols()).collect()
584                }
585            };
586
587            let new_features =
588                self.apply_single_transformation(&enhanced_X, transformation, &source_indices)?;
589
590            // Add metadata for generated features
591            for (i, _) in new_features.columns().into_iter().enumerate() {
592                generated_features.push(GeneratedFeature {
593                    name: format!("{}_{}", transformation, i),
594                    transformation: transformation.clone(),
595                    source_features: source_indices.clone(),
596                    importance_score: 0.0,
597                    is_selected: false,
598                    statistics: FeatureStatistics {
599                        mean: 0.0,
600                        std: 0.0,
601                        min: 0.0,
602                        max: 0.0,
603                        n_unique: 0,
604                        missing_ratio: 0.0,
605                        skewness: 0.0,
606                        kurtosis: 0.0,
607                    },
608                });
609            }
610
611            // Concatenate new features
612            enhanced_X = concatenate(Axis(1), &[enhanced_X.view(), new_features.view()])
613                .expect("operation should succeed");
614
615            // Check if we've reached the maximum number of features
616            if enhanced_X.ncols() >= self.config.max_features {
617                break;
618            }
619        }
620
621        Ok((enhanced_X, generated_features))
622    }
623
624    /// Apply a single transformation
625    fn apply_single_transformation(
626        &self,
627        X: &Array2<f64>,
628        transformation: &FeatureTransformationType,
629        source_indices: &[usize],
630    ) -> Result<Array2<f64>> {
631        match transformation {
632            FeatureTransformationType::Polynomial { degree } => {
633                self.apply_polynomial_features(X, source_indices, *degree)
634            }
635
636            FeatureTransformationType::Logarithmic => {
637                self.apply_logarithmic_transform(X, source_indices)
638            }
639
640            FeatureTransformationType::SquareRoot => self.apply_sqrt_transform(X, source_indices),
641
642            FeatureTransformationType::Exponential => self.apply_exp_transform(X, source_indices),
643
644            FeatureTransformationType::Reciprocal => {
645                self.apply_reciprocal_transform(X, source_indices)
646            }
647
648            FeatureTransformationType::Sine => self.apply_sine_transform(X, source_indices),
649
650            FeatureTransformationType::Cosine => self.apply_cosine_transform(X, source_indices),
651
652            FeatureTransformationType::Absolute => self.apply_absolute_transform(X, source_indices),
653
654            FeatureTransformationType::Sign => self.apply_sign_transform(X, source_indices),
655
656            FeatureTransformationType::Binning { n_bins } => {
657                self.apply_binning_transform(X, source_indices, *n_bins)
658            }
659
660            FeatureTransformationType::Interaction => {
661                self.apply_interaction_features(X, source_indices)
662            }
663
664            FeatureTransformationType::Ratio => self.apply_ratio_features(X, source_indices),
665
666            FeatureTransformationType::Difference => {
667                self.apply_difference_features(X, source_indices)
668            }
669
670            FeatureTransformationType::RollingStatistics { window } => {
671                self.apply_rolling_statistics(X, source_indices, *window)
672            }
673
674            FeatureTransformationType::Lag { lag } => {
675                self.apply_lag_features(X, source_indices, *lag)
676            }
677        }
678    }
679
680    /// Calculate feature statistics
681    fn calculate_feature_statistics(
682        &self,
683        mut generated_features: Vec<GeneratedFeature>,
684        X: &Array2<f64>,
685        y: &Array1<f64>,
686    ) -> Vec<GeneratedFeature> {
687        for (i, feature) in generated_features.iter_mut().enumerate() {
688            if i < X.ncols() {
689                let column = X.column(i);
690
691                feature.statistics = FeatureStatistics {
692                    mean: column.mean().unwrap_or(0.0),
693                    std: column.std(0.0),
694                    min: column.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
695                    max: column.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
696                    n_unique: self.count_unique_values(&column),
697                    missing_ratio: column.iter().filter(|&&x| x.is_nan()).count() as f64
698                        / column.len() as f64,
699                    skewness: 0.0, // Would calculate actual skewness
700                    kurtosis: 0.0, // Would calculate actual kurtosis
701                };
702
703                // Calculate importance score (mock implementation)
704                feature.importance_score = self.calculate_feature_importance(&column, y);
705            }
706        }
707
708        generated_features
709    }
710
711    /// Select best features using the configured method
712    fn select_features(
713        &self,
714        X: &Array2<f64>,
715        _y: &Array1<f64>,
716        mut generated_features: Vec<GeneratedFeature>,
717    ) -> Result<(Vec<GeneratedFeature>, Vec<usize>)> {
718        // Create a vector of (index, feature) pairs to maintain original indices
719        let mut indexed_features: Vec<(usize, &mut GeneratedFeature)> =
720            generated_features.iter_mut().enumerate().collect();
721
722        // Sort by importance (descending)
723        indexed_features.sort_by(|a, b| {
724            b.1.importance_score
725                .partial_cmp(&a.1.importance_score)
726                .expect("operation should succeed")
727        });
728
729        let n_features_to_select = match &self.config.selection_method {
730            FeatureSelectionMethod::SelectKBest { k } => (*k).min(X.ncols()),
731            FeatureSelectionMethod::SelectPercentile { percentile } => {
732                ((X.ncols() as f64 * percentile / 100.0) as usize).max(1)
733            }
734            _ => self.config.max_selected_features.min(X.ncols()),
735        };
736
737        // Apply selection method
738        let selected_indices = match &self.config.selection_method {
739            FeatureSelectionMethod::SelectKBest { k: _ }
740            | FeatureSelectionMethod::SelectPercentile { percentile: _ } => {
741                // Select top k features by importance, using their original indices
742                indexed_features
743                    .iter()
744                    .take(n_features_to_select)
745                    .map(|(idx, _)| *idx)
746                    .collect()
747            }
748
749            FeatureSelectionMethod::VarianceThreshold { threshold } => {
750                self.select_by_variance_threshold(X, *threshold)
751            }
752
753            FeatureSelectionMethod::CorrelationThreshold { threshold } => {
754                self.select_by_correlation_threshold(X, *threshold)
755            }
756
757            _ => {
758                // Default to top k features using their original indices
759                indexed_features
760                    .iter()
761                    .take(n_features_to_select)
762                    .map(|(idx, _)| *idx)
763                    .collect()
764            }
765        };
766
767        // Mark selected features
768        for (i, feature) in generated_features.iter_mut().enumerate() {
769            feature.is_selected = selected_indices.contains(&i);
770        }
771
772        Ok((generated_features, selected_indices))
773    }
774
775    /// Estimate performance improvement from feature engineering
776    fn estimate_performance_improvement(
777        &self,
778        _original_X: &Array2<f64>,
779        _enhanced_X: &Array2<f64>,
780        _y: &Array1<f64>,
781        _selected_indices: &[usize],
782    ) -> Result<f64> {
783        // Mock implementation - would use actual cross-validation
784        let original_score = 0.7; // Mock baseline score
785        let enhanced_score = 0.8; // Mock enhanced score
786        Ok(enhanced_score - original_score)
787    }
788
789    /// Create transformation info for future data transformation
790    fn create_transformation_info(
791        &self,
792        transformations: &[FeatureTransformationType],
793        selected_indices: &[usize],
794        enhanced_X: &Array2<f64>,
795    ) -> TransformationInfo {
796        let mut scaling_params = HashMap::new();
797        let binning_boundaries = HashMap::new();
798
799        // Calculate scaling parameters for selected features
800        for &idx in selected_indices {
801            if idx < enhanced_X.ncols() {
802                let column = enhanced_X.column(idx);
803                let mean = column.mean().unwrap_or(0.0);
804                let std = column.std(0.0);
805                scaling_params.insert(idx, (mean, std));
806            }
807        }
808
809        TransformationInfo {
810            transformations: transformations
811                .iter()
812                .map(|t| (t.clone(), vec![]))
813                .collect(),
814            selected_indices: selected_indices.to_vec(),
815            scaling_params,
816            binning_boundaries,
817        }
818    }
819
820    // Helper methods for specific transformations
821    fn apply_polynomial_features(
822        &self,
823        X: &Array2<f64>,
824        source_indices: &[usize],
825        degree: usize,
826    ) -> Result<Array2<f64>> {
827        let n_samples = X.nrows();
828        let selected_X = X.select(Axis(1), source_indices);
829        let n_features = selected_X.ncols();
830
831        if degree == 2 {
832            // Quadratic features: x^2 and x_i * x_j
833            let mut poly_features = Vec::new();
834
835            // Squared terms
836            for i in 0..n_features {
837                let col = selected_X.column(i);
838                let squared: Vec<f64> = col.iter().map(|&x| x * x).collect();
839                poly_features.push(squared);
840            }
841
842            // Interaction terms (only if not too many features)
843            if n_features < 20 {
844                for i in 0..n_features {
845                    for j in (i + 1)..n_features {
846                        let col_i = selected_X.column(i);
847                        let col_j = selected_X.column(j);
848                        let interaction: Vec<f64> = col_i
849                            .iter()
850                            .zip(col_j.iter())
851                            .map(|(&xi, &xj)| xi * xj)
852                            .collect();
853                        poly_features.push(interaction);
854                    }
855                }
856            }
857
858            // Convert to Array2
859            let n_poly_features = poly_features.len();
860            let mut result = Array2::zeros((n_samples, n_poly_features));
861            for (j, feature) in poly_features.iter().enumerate() {
862                for (i, &value) in feature.iter().enumerate() {
863                    result[[i, j]] = value;
864                }
865            }
866            Ok(result)
867        } else {
868            // For higher degrees, just use power transforms
869            let mut result = Array2::zeros((n_samples, n_features));
870            for (j, i) in source_indices.iter().enumerate() {
871                let col = X.column(*i);
872                for (row, &value) in col.iter().enumerate() {
873                    result[[row, j]] = value.powi(degree as i32);
874                }
875            }
876            Ok(result)
877        }
878    }
879
880    fn apply_logarithmic_transform(
881        &self,
882        X: &Array2<f64>,
883        source_indices: &[usize],
884    ) -> Result<Array2<f64>> {
885        let n_samples = X.nrows();
886        let n_features = source_indices.len();
887        let mut result = Array2::zeros((n_samples, n_features));
888
889        for (j, &i) in source_indices.iter().enumerate() {
890            let col = X.column(i);
891            for (row, &value) in col.iter().enumerate() {
892                // Handle negative values and zeros
893                let log_value = if value > 0.0 {
894                    value.ln()
895                } else if value == 0.0 {
896                    0.0
897                } else {
898                    -(value.abs() + 1e-8).ln()
899                };
900                result[[row, j]] = log_value;
901            }
902        }
903        Ok(result)
904    }
905
906    fn apply_sqrt_transform(
907        &self,
908        X: &Array2<f64>,
909        source_indices: &[usize],
910    ) -> Result<Array2<f64>> {
911        let n_samples = X.nrows();
912        let n_features = source_indices.len();
913        let mut result = Array2::zeros((n_samples, n_features));
914
915        for (j, &i) in source_indices.iter().enumerate() {
916            let col = X.column(i);
917            for (row, &value) in col.iter().enumerate() {
918                let sqrt_value = if value >= 0.0 {
919                    value.sqrt()
920                } else {
921                    -(value.abs().sqrt())
922                };
923                result[[row, j]] = sqrt_value;
924            }
925        }
926        Ok(result)
927    }
928
929    fn apply_exp_transform(
930        &self,
931        X: &Array2<f64>,
932        source_indices: &[usize],
933    ) -> Result<Array2<f64>> {
934        let n_samples = X.nrows();
935        let n_features = source_indices.len();
936        let mut result = Array2::zeros((n_samples, n_features));
937
938        for (j, &i) in source_indices.iter().enumerate() {
939            let col = X.column(i);
940            for (row, &value) in col.iter().enumerate() {
941                // Clip values to prevent overflow
942                let clipped_value = value.clamp(-10.0, 10.0);
943                result[[row, j]] = clipped_value.exp();
944            }
945        }
946        Ok(result)
947    }
948
949    fn apply_reciprocal_transform(
950        &self,
951        X: &Array2<f64>,
952        source_indices: &[usize],
953    ) -> Result<Array2<f64>> {
954        let n_samples = X.nrows();
955        let n_features = source_indices.len();
956        let mut result = Array2::zeros((n_samples, n_features));
957
958        for (j, &i) in source_indices.iter().enumerate() {
959            let col = X.column(i);
960            for (row, &value) in col.iter().enumerate() {
961                let reciprocal = if value.abs() > 1e-8 { 1.0 / value } else { 0.0 };
962                result[[row, j]] = reciprocal;
963            }
964        }
965        Ok(result)
966    }
967
968    fn apply_sine_transform(
969        &self,
970        X: &Array2<f64>,
971        source_indices: &[usize],
972    ) -> Result<Array2<f64>> {
973        let n_samples = X.nrows();
974        let n_features = source_indices.len();
975        let mut result = Array2::zeros((n_samples, n_features));
976
977        for (j, &i) in source_indices.iter().enumerate() {
978            let col = X.column(i);
979            for (row, &value) in col.iter().enumerate() {
980                result[[row, j]] = value.sin();
981            }
982        }
983        Ok(result)
984    }
985
986    fn apply_cosine_transform(
987        &self,
988        X: &Array2<f64>,
989        source_indices: &[usize],
990    ) -> Result<Array2<f64>> {
991        let n_samples = X.nrows();
992        let n_features = source_indices.len();
993        let mut result = Array2::zeros((n_samples, n_features));
994
995        for (j, &i) in source_indices.iter().enumerate() {
996            let col = X.column(i);
997            for (row, &value) in col.iter().enumerate() {
998                result[[row, j]] = value.cos();
999            }
1000        }
1001        Ok(result)
1002    }
1003
1004    fn apply_absolute_transform(
1005        &self,
1006        X: &Array2<f64>,
1007        source_indices: &[usize],
1008    ) -> Result<Array2<f64>> {
1009        let n_samples = X.nrows();
1010        let n_features = source_indices.len();
1011        let mut result = Array2::zeros((n_samples, n_features));
1012
1013        for (j, &i) in source_indices.iter().enumerate() {
1014            let col = X.column(i);
1015            for (row, &value) in col.iter().enumerate() {
1016                result[[row, j]] = value.abs();
1017            }
1018        }
1019        Ok(result)
1020    }
1021
1022    fn apply_sign_transform(
1023        &self,
1024        X: &Array2<f64>,
1025        source_indices: &[usize],
1026    ) -> Result<Array2<f64>> {
1027        let n_samples = X.nrows();
1028        let n_features = source_indices.len();
1029        let mut result = Array2::zeros((n_samples, n_features));
1030
1031        for (j, &i) in source_indices.iter().enumerate() {
1032            let col = X.column(i);
1033            for (row, &value) in col.iter().enumerate() {
1034                let sign = if value > 0.0 {
1035                    1.0
1036                } else if value < 0.0 {
1037                    -1.0
1038                } else {
1039                    0.0
1040                };
1041                result[[row, j]] = sign;
1042            }
1043        }
1044        Ok(result)
1045    }
1046
1047    fn apply_binning_transform(
1048        &self,
1049        X: &Array2<f64>,
1050        source_indices: &[usize],
1051        n_bins: usize,
1052    ) -> Result<Array2<f64>> {
1053        let n_samples = X.nrows();
1054        let n_features = source_indices.len();
1055        let mut result = Array2::zeros((n_samples, n_features));
1056
1057        for (j, &i) in source_indices.iter().enumerate() {
1058            let col = X.column(i);
1059            let min_val = col.iter().fold(f64::INFINITY, |a, &b| a.min(b));
1060            let max_val = col.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1061            let bin_width = (max_val - min_val) / (n_bins as f64);
1062
1063            for (row, &value) in col.iter().enumerate() {
1064                let bin = if bin_width > 0.0 {
1065                    ((value - min_val) / bin_width)
1066                        .floor()
1067                        .min((n_bins - 1) as f64)
1068                } else {
1069                    0.0
1070                };
1071                result[[row, j]] = bin;
1072            }
1073        }
1074        Ok(result)
1075    }
1076
1077    fn apply_interaction_features(
1078        &self,
1079        X: &Array2<f64>,
1080        source_indices: &[usize],
1081    ) -> Result<Array2<f64>> {
1082        let n_samples = X.nrows();
1083        let selected_X = X.select(Axis(1), source_indices);
1084        let n_features = selected_X.ncols();
1085
1086        // Generate all pairwise interactions
1087        let mut interactions = Vec::new();
1088        for i in 0..n_features {
1089            for j in (i + 1)..n_features {
1090                let col_i = selected_X.column(i);
1091                let col_j = selected_X.column(j);
1092                let interaction: Vec<f64> = col_i
1093                    .iter()
1094                    .zip(col_j.iter())
1095                    .map(|(&xi, &xj)| xi * xj)
1096                    .collect();
1097                interactions.push(interaction);
1098            }
1099        }
1100
1101        if interactions.is_empty() {
1102            return Ok(Array2::zeros((n_samples, 1)));
1103        }
1104
1105        // Convert to Array2
1106        let n_interactions = interactions.len();
1107        let mut result = Array2::zeros((n_samples, n_interactions));
1108        for (j, interaction) in interactions.iter().enumerate() {
1109            for (i, &value) in interaction.iter().enumerate() {
1110                result[[i, j]] = value;
1111            }
1112        }
1113        Ok(result)
1114    }
1115
1116    fn apply_ratio_features(
1117        &self,
1118        X: &Array2<f64>,
1119        source_indices: &[usize],
1120    ) -> Result<Array2<f64>> {
1121        let n_samples = X.nrows();
1122        let selected_X = X.select(Axis(1), source_indices);
1123        let n_features = selected_X.ncols();
1124
1125        // Generate ratios between pairs of features
1126        let mut ratios = Vec::new();
1127        for i in 0..n_features {
1128            for j in 0..n_features {
1129                if i != j {
1130                    let col_i = selected_X.column(i);
1131                    let col_j = selected_X.column(j);
1132                    let ratio: Vec<f64> = col_i
1133                        .iter()
1134                        .zip(col_j.iter())
1135                        .map(|(&xi, &xj)| if xj.abs() > 1e-8 { xi / xj } else { 0.0 })
1136                        .collect();
1137                    ratios.push(ratio);
1138                }
1139            }
1140        }
1141
1142        if ratios.is_empty() {
1143            return Ok(Array2::zeros((n_samples, 1)));
1144        }
1145
1146        // Limit number of ratio features
1147        ratios.truncate(20);
1148
1149        // Convert to Array2
1150        let n_ratios = ratios.len();
1151        let mut result = Array2::zeros((n_samples, n_ratios));
1152        for (j, ratio) in ratios.iter().enumerate() {
1153            for (i, &value) in ratio.iter().enumerate() {
1154                result[[i, j]] = value;
1155            }
1156        }
1157        Ok(result)
1158    }
1159
1160    fn apply_difference_features(
1161        &self,
1162        X: &Array2<f64>,
1163        source_indices: &[usize],
1164    ) -> Result<Array2<f64>> {
1165        let n_samples = X.nrows();
1166        let selected_X = X.select(Axis(1), source_indices);
1167        let n_features = selected_X.ncols();
1168
1169        // Generate differences between pairs of features
1170        let mut differences = Vec::new();
1171        for i in 0..n_features {
1172            for j in (i + 1)..n_features {
1173                let col_i = selected_X.column(i);
1174                let col_j = selected_X.column(j);
1175                let diff: Vec<f64> = col_i
1176                    .iter()
1177                    .zip(col_j.iter())
1178                    .map(|(&xi, &xj)| xi - xj)
1179                    .collect();
1180                differences.push(diff);
1181            }
1182        }
1183
1184        if differences.is_empty() {
1185            return Ok(Array2::zeros((n_samples, 1)));
1186        }
1187
1188        // Convert to Array2
1189        let n_differences = differences.len();
1190        let mut result = Array2::zeros((n_samples, n_differences));
1191        for (j, diff) in differences.iter().enumerate() {
1192            for (i, &value) in diff.iter().enumerate() {
1193                result[[i, j]] = value;
1194            }
1195        }
1196        Ok(result)
1197    }
1198
1199    fn apply_rolling_statistics(
1200        &self,
1201        X: &Array2<f64>,
1202        source_indices: &[usize],
1203        window: usize,
1204    ) -> Result<Array2<f64>> {
1205        let n_samples = X.nrows();
1206        let n_features = source_indices.len();
1207        let mut result = Array2::zeros((n_samples, n_features * 2)); // Mean and std
1208
1209        for (j, &i) in source_indices.iter().enumerate() {
1210            let col = X.column(i);
1211
1212            for row in 0..n_samples {
1213                let start = row.saturating_sub(window - 1);
1214                let end = (row + 1).min(n_samples);
1215                let window_data: Vec<f64> = col.slice(s![start..end]).to_vec();
1216
1217                let mean = window_data.iter().sum::<f64>() / window_data.len() as f64;
1218                let variance = window_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
1219                    / window_data.len() as f64;
1220                let std = variance.sqrt();
1221
1222                result[[row, j * 2]] = mean;
1223                result[[row, j * 2 + 1]] = std;
1224            }
1225        }
1226        Ok(result)
1227    }
1228
1229    fn apply_lag_features(
1230        &self,
1231        X: &Array2<f64>,
1232        source_indices: &[usize],
1233        lag: usize,
1234    ) -> Result<Array2<f64>> {
1235        let n_samples = X.nrows();
1236        let n_features = source_indices.len();
1237        let mut result = Array2::zeros((n_samples, n_features));
1238
1239        for (j, &i) in source_indices.iter().enumerate() {
1240            let col = X.column(i);
1241
1242            for row in 0..n_samples {
1243                let lag_row = row.saturating_sub(lag);
1244                result[[row, j]] = col[lag_row];
1245            }
1246        }
1247        Ok(result)
1248    }
1249
1250    // Helper methods
1251    fn select_feature_pairs(&mut self, n_features: usize) -> Vec<usize> {
1252        // Select random pairs of features for interaction/ratio/difference features
1253        let max_pairs = 10.min(n_features);
1254        let mut indices = Vec::new();
1255
1256        for _ in 0..max_pairs {
1257            let i = self.rng.random_range(0..n_features);
1258            let j = self.rng.random_range(0..n_features);
1259            if i != j {
1260                indices.extend(vec![i, j]);
1261            }
1262        }
1263
1264        indices.sort_unstable();
1265        indices.dedup();
1266        indices
1267    }
1268
1269    fn calculate_sparsity(&self, X: &Array2<f64>) -> f64 {
1270        let total_values = X.len() as f64;
1271        let zero_count = X.iter().filter(|&&x| x == 0.0).count() as f64;
1272        zero_count / total_values
1273    }
1274
1275    fn analyze_correlation_structure(&self, _X: &Array2<f64>) -> f64 {
1276        // Mock implementation - would calculate actual correlation matrix condition number
1277        //         use scirs2_core::random::Rng;
1278        let mut rng = scirs2_core::random::thread_rng();
1279        rng.gen_range(1.0..100.0)
1280    }
1281
1282    fn estimate_linearity(&self, _X: &Array2<f64>, _y: &Array1<f64>) -> f64 {
1283        // Mock implementation - would perform actual linearity test
1284        //         use scirs2_core::random::Rng;
1285        let mut rng = scirs2_core::random::thread_rng();
1286        rng.gen_range(0.0..1.0)
1287    }
1288
1289    fn get_unique_classes(&self, y: &Array1<f64>) -> Vec<i32> {
1290        let mut classes: Vec<i32> = y.iter().map(|&x| x as i32).collect();
1291        classes.sort_unstable();
1292        classes.dedup();
1293        classes
1294    }
1295
1296    fn calculate_class_distribution(&self, y: &Array1<f64>, classes: &[i32]) -> Vec<f64> {
1297        let total = y.len() as f64;
1298        classes
1299            .iter()
1300            .map(|&class| {
1301                let count = y.iter().filter(|&&yi| yi as i32 == class).count() as f64;
1302                count / total
1303            })
1304            .collect()
1305    }
1306
1307    fn count_unique_values(&self, column: &ArrayView1<f64>) -> usize {
1308        let mut values: Vec<i64> = column.iter().map(|&x| (x * 1000.0) as i64).collect();
1309        values.sort_unstable();
1310        values.dedup();
1311        values.len()
1312    }
1313
1314    fn calculate_feature_importance(&self, _column: &ArrayView1<f64>, _y: &Array1<f64>) -> f64 {
1315        // Mock implementation - would calculate actual feature importance
1316        //         use scirs2_core::random::Rng;
1317        let mut rng = scirs2_core::random::thread_rng();
1318        rng.gen_range(0.0..1.0)
1319    }
1320
1321    fn select_by_variance_threshold(&self, X: &Array2<f64>, threshold: f64) -> Vec<usize> {
1322        (0..X.ncols())
1323            .filter(|&i| {
1324                let col = X.column(i);
1325                col.std(0.0) > threshold
1326            })
1327            .collect()
1328    }
1329
1330    fn select_by_correlation_threshold(&self, X: &Array2<f64>, _threshold: f64) -> Vec<usize> {
1331        // Mock implementation - would calculate actual correlations
1332        (0..X.ncols()).collect()
1333    }
1334}
1335
1336/// Convenience function for quick feature engineering
1337pub fn engineer_features(
1338    X: &Array2<f64>,
1339    y: &Array1<f64>,
1340    task_type: TaskType,
1341) -> Result<FeatureEngineeringResult> {
1342    let config = AutoFeatureEngineering {
1343        task_type,
1344        ..Default::default()
1345    };
1346
1347    let mut engineer = AutoFeatureEngineer::new(config);
1348    engineer.engineer_features(X, y)
1349}
1350
1351#[allow(non_snake_case)]
1352#[cfg(test)]
1353mod tests {
1354    use super::*;
1355    use scirs2_core::ndarray::{Array1, Array2};
1356
1357    #[allow(non_snake_case)]
1358    fn create_test_data() -> (Array2<f64>, Array1<f64>) {
1359        let X = Array2::from_shape_vec((100, 4), (0..400).map(|i| i as f64).collect())
1360            .expect("operation should succeed");
1361        let y = Array1::from_vec((0..100).map(|i| (i % 3) as f64).collect());
1362        (X, y)
1363    }
1364
1365    #[test]
1366    fn test_feature_engineering() {
1367        let (X, y) = create_test_data();
1368        let result = engineer_features(&X, &y, TaskType::Classification);
1369        assert!(result.is_ok());
1370
1371        let result = result.expect("operation should succeed");
1372        assert!(result.generated_feature_count > result.original_feature_count);
1373        assert!(result.selected_feature_count <= result.generated_feature_count);
1374    }
1375
1376    #[test]
1377    fn test_polynomial_features() {
1378        let (X, _y) = create_test_data();
1379        let engineer = AutoFeatureEngineer::default();
1380
1381        let poly_features = engineer.apply_polynomial_features(&X, &[0, 1], 2);
1382        assert!(poly_features.is_ok());
1383
1384        let poly_features = poly_features.expect("operation should succeed");
1385        assert!(poly_features.ncols() > 0);
1386    }
1387
1388    #[test]
1389    fn test_mathematical_transforms() {
1390        let (X, _y) = create_test_data();
1391        let engineer = AutoFeatureEngineer::default();
1392
1393        let log_features = engineer.apply_logarithmic_transform(&X, &[0, 1]);
1394        assert!(log_features.is_ok());
1395
1396        let sqrt_features = engineer.apply_sqrt_transform(&X, &[0, 1]);
1397        assert!(sqrt_features.is_ok());
1398    }
1399
1400    #[test]
1401    fn test_interaction_features() {
1402        let (X, _y) = create_test_data();
1403        let engineer = AutoFeatureEngineer::default();
1404
1405        let interaction_features = engineer.apply_interaction_features(&X, &[0, 1, 2]);
1406        assert!(interaction_features.is_ok());
1407
1408        let interaction_features = interaction_features.expect("operation should succeed");
1409        assert!(interaction_features.ncols() > 0);
1410    }
1411
1412    #[test]
1413    fn test_custom_strategy() {
1414        let (X, y) = create_test_data();
1415
1416        let config = AutoFeatureEngineering {
1417            strategy: FeatureEngineeringStrategy::Custom(vec![
1418                FeatureTransformationType::Polynomial { degree: 2 },
1419                FeatureTransformationType::Logarithmic,
1420            ]),
1421            max_features: 50,
1422            ..Default::default()
1423        };
1424
1425        let mut engineer = AutoFeatureEngineer::new(config);
1426        let result = engineer.engineer_features(&X, &y);
1427        assert!(result.is_ok());
1428    }
1429
1430    #[test]
1431    fn test_feature_selection_methods() {
1432        let (X, y) = create_test_data();
1433
1434        let config = AutoFeatureEngineering {
1435            selection_method: FeatureSelectionMethod::SelectPercentile { percentile: 50.0 },
1436            ..Default::default()
1437        };
1438
1439        let mut engineer = AutoFeatureEngineer::new(config);
1440        let result = engineer.engineer_features(&X, &y);
1441        assert!(result.is_ok());
1442
1443        let result = result.expect("operation should succeed");
1444        assert!(result.selected_feature_count > 0);
1445    }
1446}