sklears_feature_selection/automl/
hyperparameter_optimizer.rs

1//! Hyperparameter Optimization Module for AutoML Feature Selection
2//!
3//! Optimizes hyperparameters for different feature selection methods based on data characteristics.
4//! All implementations follow the SciRS2 policy using scirs2-core for numerical computations.
5
6use scirs2_core::ndarray::{ArrayView1, ArrayView2};
7
8use super::automl_core::{AutoMLMethod, DataCharacteristics, TargetType};
9use sklears_core::error::Result as SklResult;
10
11type Result<T> = SklResult<T>;
12
13#[derive(Debug, Clone)]
14struct DatasetMetrics {
15    avg_feature_magnitude: f64,
16    target_variance: f64,
17    class_balance: f64,
18    sample_count: usize,
19    feature_count: usize,
20}
21
22impl DatasetMetrics {
23    fn from_data(X: &ArrayView2<f64>, y: &ArrayView1<f64>) -> Self {
24        let (sample_count, feature_count) = X.dim();
25        let total_entries = sample_count * feature_count;
26        let avg_feature_magnitude = if total_entries > 0 {
27            X.iter().map(|value| value.abs()).sum::<f64>() / total_entries as f64
28        } else {
29            0.0
30        };
31
32        let target_len = y.len();
33        let (target_variance, class_balance) = if target_len > 0 {
34            let target_mean = y.iter().copied().sum::<f64>() / target_len as f64;
35            let variance = if target_len > 1 {
36                y.iter()
37                    .map(|value| (value - target_mean).powi(2))
38                    .sum::<f64>()
39                    / (target_len - 1) as f64
40            } else {
41                0.0
42            };
43
44            let positives = y.iter().filter(|value| **value >= target_mean).count();
45            let balance = (positives as f64 / target_len as f64).clamp(0.0, 1.0);
46            (variance, balance)
47        } else {
48            (0.0, 0.5)
49        };
50
51        Self {
52            avg_feature_magnitude,
53            target_variance,
54            class_balance,
55            sample_count,
56            feature_count,
57        }
58    }
59}
60
61/// Hyperparameter optimizer for feature selection methods
62#[derive(Debug, Clone)]
63pub struct HyperparameterOptimizer {
64    pub max_iterations: usize,
65}
66
67impl HyperparameterOptimizer {
68    pub fn new() -> Self {
69        Self { max_iterations: 20 }
70    }
71
72    pub fn optimize_method(
73        &self,
74        method: &AutoMLMethod,
75        X: ArrayView2<f64>,
76        y: ArrayView1<f64>,
77        characteristics: &DataCharacteristics,
78    ) -> Result<OptimizedMethod> {
79        let metrics = DatasetMetrics::from_data(&X, &y);
80
81        let mut config = match method {
82            AutoMLMethod::UnivariateFiltering => self.optimize_univariate(characteristics)?,
83            AutoMLMethod::CorrelationBased => self.optimize_correlation(characteristics)?,
84            AutoMLMethod::TreeBased => self.optimize_tree(characteristics)?,
85            AutoMLMethod::LassoBased => self.optimize_lasso(characteristics)?,
86            AutoMLMethod::WrapperBased => self.optimize_wrapper(characteristics)?,
87            AutoMLMethod::EnsembleBased => self.optimize_ensemble(characteristics)?,
88            AutoMLMethod::Hybrid => self.optimize_hybrid(characteristics)?,
89            AutoMLMethod::NeuralArchitectureSearch => self.optimize_nas(characteristics)?,
90            AutoMLMethod::TransferLearning => self.optimize_transfer_learning(characteristics)?,
91            AutoMLMethod::MetaLearningEnsemble => self.optimize_meta_learning(characteristics)?,
92        };
93
94        self.adjust_config_for_data(&mut config, characteristics, &metrics);
95
96        let estimated_cost = self.estimate_computational_cost(method, characteristics, &metrics);
97
98        Ok(OptimizedMethod {
99            method_type: method.clone(),
100            config,
101            estimated_cost,
102        })
103    }
104
105    fn optimize_univariate(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
106        let k = if characteristics.n_features > 1000 {
107            (characteristics.n_features / 10).min(100)
108        } else {
109            (characteristics.n_features / 2).min(50)
110        };
111
112        Ok(MethodConfig::Univariate { k })
113    }
114
115    fn optimize_correlation(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
116        let threshold = if characteristics.correlation_structure.average_correlation > 0.5 {
117            0.8
118        } else {
119            0.7
120        };
121
122        Ok(MethodConfig::Correlation { threshold })
123    }
124
125    fn optimize_tree(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
126        let n_estimators = if characteristics.n_samples > 10000 {
127            100
128        } else {
129            50
130        };
131        let max_depth = if characteristics.n_features > 100 {
132            10
133        } else {
134            6
135        };
136
137        Ok(MethodConfig::Tree {
138            n_estimators,
139            max_depth,
140        })
141    }
142
143    fn optimize_lasso(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
144        let alpha = if characteristics.feature_to_sample_ratio > 1.0 {
145            0.1
146        } else {
147            0.01
148        };
149
150        Ok(MethodConfig::Lasso { alpha })
151    }
152
153    fn optimize_wrapper(&self, _characteristics: &DataCharacteristics) -> Result<MethodConfig> {
154        Ok(MethodConfig::Wrapper {
155            cv_folds: 5,
156            scoring: "accuracy".to_string(),
157        })
158    }
159
160    fn optimize_ensemble(&self, _characteristics: &DataCharacteristics) -> Result<MethodConfig> {
161        Ok(MethodConfig::Ensemble {
162            n_methods: 3,
163            aggregation: "voting".to_string(),
164        })
165    }
166
167    fn optimize_hybrid(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
168        let stage1_method = if characteristics.n_features > 1000 {
169            "univariate"
170        } else {
171            "correlation"
172        };
173
174        Ok(MethodConfig::Hybrid {
175            stage1_method: stage1_method.to_string(),
176            stage2_method: "lasso".to_string(),
177            stage1_features: characteristics.n_features / 3,
178        })
179    }
180
181    fn optimize_nas(&self, characteristics: &DataCharacteristics) -> Result<MethodConfig> {
182        let max_epochs = if characteristics.n_features > 1000 {
183            100
184        } else {
185            50
186        };
187
188        let population_size = if characteristics.computational_budget.allow_complex_methods {
189            20
190        } else {
191            10
192        };
193
194        Ok(MethodConfig::NeuralArchitectureSearch {
195            max_epochs,
196            population_size,
197            mutation_rate: 0.1,
198            early_stopping_patience: 10,
199        })
200    }
201
202    fn optimize_transfer_learning(
203        &self,
204        characteristics: &DataCharacteristics,
205    ) -> Result<MethodConfig> {
206        let source_domain = match characteristics.target_type {
207            TargetType::BinaryClassification => "binary_classification",
208            TargetType::MultiClassification => "multi_classification",
209            TargetType::Regression => "regression",
210            _ => "general",
211        }
212        .to_string();
213
214        let fine_tuning_epochs = if characteristics.n_samples > 1000 {
215            30
216        } else {
217            10
218        };
219
220        Ok(MethodConfig::TransferLearning {
221            source_domain,
222            adaptation_method: "fine_tuning".to_string(),
223            fine_tuning_epochs,
224            transfer_ratio: 0.7,
225        })
226    }
227
228    fn optimize_meta_learning(
229        &self,
230        characteristics: &DataCharacteristics,
231    ) -> Result<MethodConfig> {
232        let base_methods = vec![
233            "univariate".to_string(),
234            "correlation".to_string(),
235            "lasso".to_string(),
236        ];
237
238        let ensemble_size = if characteristics.computational_budget.allow_complex_methods {
239            5
240        } else {
241            3
242        };
243
244        Ok(MethodConfig::MetaLearningEnsemble {
245            base_methods,
246            meta_learner: "gradient_boosting".to_string(),
247            adaptation_strategy: "online_learning".to_string(),
248            ensemble_size,
249        })
250    }
251
252    fn adjust_config_for_data(
253        &self,
254        config: &mut MethodConfig,
255        characteristics: &DataCharacteristics,
256        metrics: &DatasetMetrics,
257    ) {
258        match config {
259            MethodConfig::Univariate { k } => {
260                let feature_cap = std::cmp::max(metrics.feature_count, 1);
261                if metrics.target_variance < 1e-3 {
262                    let conservative_cap = std::cmp::max(feature_cap / 5, 1);
263                    *k = (*k).min(conservative_cap);
264                } else if metrics.target_variance > 1.0 {
265                    let bonus =
266                        ((metrics.target_variance.min(4.0)) * feature_cap as f64 * 0.05) as usize;
267                    *k = (*k + bonus).min(feature_cap);
268                } else {
269                    *k = (*k).min(feature_cap);
270                }
271                *k = (*k).max(1);
272            }
273            MethodConfig::Correlation { threshold } => {
274                let fluctuation = (metrics.avg_feature_magnitude - 1.0).abs().min(0.15);
275                if metrics.target_variance < 0.3 {
276                    *threshold = (*threshold - fluctuation).clamp(0.3, 0.95);
277                } else {
278                    *threshold = (*threshold + fluctuation).clamp(0.3, 0.95);
279                }
280            }
281            MethodConfig::Tree {
282                n_estimators,
283                max_depth,
284            } => {
285                if metrics.sample_count > 5_000 {
286                    *n_estimators = (*n_estimators).max(100);
287                }
288                if metrics.target_variance > 1.2 {
289                    *max_depth = (*max_depth + 2).min(20);
290                } else if metrics.target_variance < 0.2 {
291                    *max_depth = (*max_depth).max(4);
292                }
293            }
294            MethodConfig::Lasso { alpha } => {
295                let scale_adjustment = metrics.avg_feature_magnitude.clamp(0.5, 2.0);
296                *alpha = (*alpha * scale_adjustment).max(1e-4);
297                if matches!(characteristics.target_type, TargetType::Regression)
298                    && metrics.target_variance > 2.0
299                {
300                    *alpha *= 0.9;
301                }
302            }
303            MethodConfig::Wrapper { scoring, cv_folds } => {
304                let imbalance = (metrics.class_balance - 0.5).abs();
305                if matches!(characteristics.target_type, TargetType::Regression) {
306                    *scoring = "r2".to_string();
307                } else if imbalance > 0.2 {
308                    *scoring = "roc_auc".to_string();
309                } else {
310                    *scoring = "accuracy".to_string();
311                }
312
313                *cv_folds = if metrics.sample_count < 200 {
314                    3
315                } else if metrics.sample_count > 5_000 {
316                    7
317                } else {
318                    5
319                };
320            }
321            MethodConfig::Ensemble { n_methods, .. } => {
322                if metrics.feature_count > 500 {
323                    *n_methods = (*n_methods).max(4);
324                }
325                if metrics.class_balance < 0.35 || metrics.class_balance > 0.65 {
326                    *n_methods = (*n_methods).max(5);
327                }
328            }
329            MethodConfig::Hybrid {
330                stage1_features, ..
331            } => {
332                let feature_cap = std::cmp::max(metrics.feature_count, 1);
333                let mut desired = feature_cap / 3;
334                if metrics.target_variance < 0.2 {
335                    desired = std::cmp::max(feature_cap / 5, 1);
336                } else if metrics.target_variance > 1.0 {
337                    desired = std::cmp::max(feature_cap / 2, 1);
338                }
339                *stage1_features = desired.min(feature_cap);
340            }
341            MethodConfig::NeuralArchitectureSearch {
342                max_epochs,
343                population_size,
344                early_stopping_patience,
345                ..
346            } => {
347                if metrics.sample_count > 2_000 {
348                    *population_size = (*population_size).max(25);
349                }
350                if metrics.target_variance < 0.4 {
351                    *max_epochs = (*max_epochs).max(80);
352                    *early_stopping_patience = (*early_stopping_patience).max(15);
353                } else {
354                    *max_epochs = (*max_epochs).min(150);
355                }
356            }
357            MethodConfig::TransferLearning {
358                transfer_ratio,
359                fine_tuning_epochs,
360                ..
361            } => {
362                if matches!(characteristics.target_type, TargetType::Regression) {
363                    *transfer_ratio = 0.6;
364                } else if metrics.target_variance > 1.0 {
365                    *transfer_ratio = 0.8;
366                } else {
367                    *transfer_ratio = 0.7;
368                }
369
370                if metrics.sample_count > 2_500 {
371                    *fine_tuning_epochs = (*fine_tuning_epochs).max(25);
372                }
373            }
374            MethodConfig::MetaLearningEnsemble { ensemble_size, .. } => {
375                if metrics.feature_count > 1_000 {
376                    *ensemble_size = (*ensemble_size).max(6);
377                }
378                if metrics.sample_count < 500 {
379                    *ensemble_size = (*ensemble_size).min(4);
380                }
381            }
382        }
383    }
384
385    fn estimate_computational_cost(
386        &self,
387        method: &AutoMLMethod,
388        characteristics: &DataCharacteristics,
389        metrics: &DatasetMetrics,
390    ) -> f64 {
391        let base_cost =
392            characteristics.n_samples as f64 * characteristics.n_features as f64 / 1_000_000.0;
393
394        let scale_penalty = 1.0 + (metrics.avg_feature_magnitude - 1.0).abs().min(3.0) * 0.05;
395        let variance_discount = if metrics.target_variance < 1e-6 {
396            0.85
397        } else {
398            1.0
399        };
400        let imbalance_penalty = 1.0 + (metrics.class_balance - 0.5).abs() * 0.5;
401
402        let method_multiplier = match method {
403            AutoMLMethod::UnivariateFiltering => 0.1,
404            AutoMLMethod::CorrelationBased => 0.5,
405            AutoMLMethod::TreeBased => 2.0,
406            AutoMLMethod::LassoBased => 1.5,
407            AutoMLMethod::WrapperBased => 10.0,
408            AutoMLMethod::EnsembleBased => 5.0,
409            AutoMLMethod::Hybrid => 3.0,
410            AutoMLMethod::NeuralArchitectureSearch => 15.0,
411            AutoMLMethod::TransferLearning => 8.0,
412            AutoMLMethod::MetaLearningEnsemble => 12.0,
413        };
414
415        base_cost * method_multiplier * scale_penalty * variance_discount * imbalance_penalty
416    }
417}
418
419impl Default for HyperparameterOptimizer {
420    fn default() -> Self {
421        Self::new()
422    }
423}
424
425/// Optimized method with hyperparameters
426#[derive(Debug, Clone)]
427pub struct OptimizedMethod {
428    pub method_type: AutoMLMethod,
429    pub config: MethodConfig,
430    pub estimated_cost: f64,
431}
432
433/// Method configuration with optimized hyperparameters
434#[derive(Debug, Clone)]
435pub enum MethodConfig {
436    /// Univariate
437    Univariate {
438        k: usize,
439    },
440    /// Correlation
441    Correlation {
442        threshold: f64,
443    },
444    /// Tree
445    Tree {
446        n_estimators: usize,
447
448        max_depth: usize,
449    },
450    Lasso {
451        alpha: f64,
452    },
453    Wrapper {
454        cv_folds: usize,
455        scoring: String,
456    },
457    Ensemble {
458        n_methods: usize,
459        aggregation: String,
460    },
461    Hybrid {
462        stage1_method: String,
463        stage2_method: String,
464        stage1_features: usize,
465    },
466    NeuralArchitectureSearch {
467        max_epochs: usize,
468        population_size: usize,
469        mutation_rate: f64,
470        early_stopping_patience: usize,
471    },
472    TransferLearning {
473        source_domain: String,
474        adaptation_method: String,
475        fine_tuning_epochs: usize,
476        transfer_ratio: f64,
477    },
478    MetaLearningEnsemble {
479        base_methods: Vec<String>,
480        meta_learner: String,
481        adaptation_strategy: String,
482        ensemble_size: usize,
483    },
484}
485
486impl OptimizedMethod {
487    /// Fit the method to training data (stub implementation)
488    pub fn fit(self, X: ArrayView2<f64>, y: ArrayView1<f64>) -> Result<TrainedMethod> {
489        // Simplified feature selection based on method type
490        let mut selected_features: Vec<usize> = match &self.method_type {
491            AutoMLMethod::UnivariateFiltering => {
492                if let MethodConfig::Univariate { k } = &self.config {
493                    (0..*k.min(&X.ncols())).collect()
494                } else {
495                    (0..X.ncols().min(10)).collect()
496                }
497            }
498            AutoMLMethod::CorrelationBased => {
499                // Select features with correlation above threshold
500                (0..X.ncols().min(20)).collect()
501            }
502            AutoMLMethod::TreeBased => {
503                // Select features based on tree importance (simplified)
504                (0..X.ncols().min(30)).collect()
505            }
506            AutoMLMethod::LassoBased => {
507                // Select features with non-zero Lasso coefficients (simplified)
508                (0..X.ncols().min(15)).collect()
509            }
510            AutoMLMethod::WrapperBased => {
511                // Select features using wrapper method (simplified)
512                (0..X.ncols().min(25)).collect()
513            }
514            AutoMLMethod::EnsembleBased => {
515                // Select features from ensemble (simplified)
516                (0..X.ncols().min(35)).collect()
517            }
518            AutoMLMethod::Hybrid => {
519                // Multi-stage feature selection (simplified)
520                (0..X.ncols().min(20)).collect()
521            }
522            AutoMLMethod::NeuralArchitectureSearch => {
523                // Features selected by NAS (simplified)
524                (0..X.ncols().min(40)).collect()
525            }
526            AutoMLMethod::TransferLearning => {
527                // Features from transfer learning (simplified)
528                (0..X.ncols().min(30)).collect()
529            }
530            AutoMLMethod::MetaLearningEnsemble => {
531                // Features from meta-learning ensemble (simplified)
532                (0..X.ncols().min(50)).collect()
533            }
534        };
535
536        let metrics = DatasetMetrics::from_data(&X, &y);
537
538        if metrics.target_variance < 1e-6 && selected_features.len() > 10 {
539            selected_features.truncate(10);
540        }
541
542        let denom = std::cmp::max(selected_features.len(), 1) as f64;
543        let importance_scale = 1.0 + metrics.target_variance.sqrt().min(2.0);
544        let balance_adjustment = 1.0 + (0.5 - metrics.class_balance).abs() * 0.5;
545        let magnitude_adjustment = metrics.avg_feature_magnitude.max(0.1);
546
547        let feature_importances: Vec<f64> = selected_features
548            .iter()
549            .enumerate()
550            .map(|(index, _)| {
551                let rank = (denom - index as f64) / denom;
552                (rank * importance_scale * balance_adjustment * magnitude_adjustment).max(0.05)
553            })
554            .collect();
555
556        Ok(TrainedMethod {
557            method_type: self.method_type,
558            config: self.config,
559            selected_features: selected_features.clone(),
560            feature_importances,
561        })
562    }
563}
564
565/// Trained method with selected features
566#[derive(Debug, Clone)]
567pub struct TrainedMethod {
568    pub method_type: AutoMLMethod,
569    pub config: MethodConfig,
570    pub selected_features: Vec<usize>,
571    pub feature_importances: Vec<f64>,
572}
573
574impl TrainedMethod {
575    pub fn transform_indices(&self) -> Result<Vec<usize>> {
576        Ok(self.selected_features.clone())
577    }
578}