1use scirs2_core::ndarray::{ArrayView1, ArrayView2};
8use scirs2_core::random::rngs::StdRng;
9use scirs2_core::random::{thread_rng, Rng, SeedableRng};
10use sklears_core::{
11 error::{Result as SklResult, SklearsError},
12 traits::Estimator,
13 types::{Float, FloatBounds},
14};
15use std::collections::BTreeMap;
16use std::time::{Duration, Instant};
17
18use crate::{FluentPipelineBuilder, PipelineConfiguration};
19
20#[derive(Debug)]
22pub struct AutoMLOptimizer {
23 config: AutoMLConfig,
25 search_space: SearchSpace,
27 history: OptimizationHistory,
29 rng: StdRng,
31}
32
33#[derive(Debug, Clone)]
35pub struct AutoMLConfig {
36 pub max_time: Duration,
38 pub max_trials: usize,
40 pub cv_folds: usize,
42 pub metric: OptimizationMetric,
44 pub strategy: SearchStrategy,
46 pub population_size: usize,
48 pub early_stopping_patience: Option<usize>,
50 pub random_seed: Option<u64>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
56pub enum OptimizationMetric {
57 Accuracy,
59 F1Score,
61 AUCROC,
63 MSE,
65 RMSE,
67 MAE,
69 R2,
71 Custom(String),
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum SearchStrategy {
78 Random,
80 Grid,
82 Bayesian,
84 Genetic,
86 ParticleSwarm,
88 DifferentialEvolution,
90 TPE,
92 Hyperband,
94}
95
96#[derive(Debug, Clone)]
98pub struct SearchSpace {
99 pub algorithms: Vec<AlgorithmChoice>,
101 pub preprocessing: Vec<PreprocessingChoice>,
103 pub feature_engineering: Vec<FeatureEngineeringChoice>,
105 pub hyperparameters: BTreeMap<String, ParameterRange>,
107 pub constraints: Vec<ArchitectureConstraint>,
109}
110
111#[derive(Debug, Clone)]
113pub struct AlgorithmChoice {
114 pub name: String,
116 pub algorithm_type: AlgorithmType,
118 pub hyperparameters: BTreeMap<String, ParameterRange>,
120 pub resource_requirements: ResourceRequirements,
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum AlgorithmType {
127 Linear,
129 Tree,
131 Ensemble,
133 NeuralNetwork,
135 SVM,
137 KNN,
139 NaiveBayes,
141 Custom(String),
143}
144
145#[derive(Debug, Clone)]
147pub struct PreprocessingChoice {
148 pub name: String,
150 pub parameters: BTreeMap<String, ParameterRange>,
152 pub optional: bool,
154}
155
156#[derive(Debug, Clone)]
158pub struct FeatureEngineeringChoice {
159 pub name: String,
161 pub parameters: BTreeMap<String, ParameterRange>,
163 pub optional: bool,
165 pub cost_estimate: f64,
167}
168
169#[derive(Debug, Clone)]
171pub enum ParameterRange {
172 Continuous { min: f64, max: f64, log_scale: bool },
174 Integer { min: i64, max: i64 },
176 Categorical(Vec<String>),
178 Boolean,
180 Fixed(ParameterValue),
182}
183
184#[derive(Debug, Clone)]
186pub enum ParameterValue {
187 Float(f64),
189 Int(i64),
191 String(String),
193 Bool(bool),
195 Array(Vec<ParameterValue>),
197}
198
199#[derive(Debug, Clone)]
201pub enum ArchitectureConstraint {
202 MaxLayers(usize),
204 MaxParameters(usize),
206 MaxMemoryMB(usize),
208 MaxTrainingTime(Duration),
210 MinAccuracy(f64),
212}
213
214#[derive(Debug, Clone)]
216pub struct ResourceRequirements {
217 pub memory_mb: usize,
219 pub cpu_cores: usize,
221 pub time_complexity: TimeComplexity,
223 pub requires_gpu: bool,
225}
226
227#[derive(Debug, Clone, PartialEq, Eq)]
229pub enum TimeComplexity {
230 Constant,
232 Logarithmic,
234 Linear,
236 Linearithmic,
238 Quadratic,
240 Cubic,
242 Exponential,
244}
245
246#[derive(Debug, Clone)]
248pub struct OptimizationHistory {
249 pub trials: Vec<TrialResult>,
251 pub best_score: Option<f64>,
253 pub best_config: Option<PipelineConfiguration>,
255 pub start_time: Option<Instant>,
257 pub total_time: Duration,
259}
260
261#[derive(Debug, Clone)]
263pub struct TrialResult {
264 pub trial_id: usize,
266 pub config: PipelineConfiguration,
268 pub score: f64,
270 pub training_time: Duration,
272 pub cv_scores: Vec<f64>,
274 pub timestamp: Instant,
276 pub status: TrialStatus,
278 pub error: Option<String>,
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
284pub enum TrialStatus {
285 Success,
287 Failed,
289 Stopped,
291 Running,
293 Queued,
295}
296
297#[derive(Debug)]
299pub struct NeuralArchitectureSearch {
300 search_space: NeuralSearchSpace,
302 strategy: NASStrategy,
304 evaluator: ArchitectureEvaluator,
306}
307
308#[derive(Debug, Clone)]
310pub struct NeuralSearchSpace {
311 pub layer_types: Vec<LayerType>,
313 pub num_layers: ParameterRange,
315 pub hidden_units: ParameterRange,
317 pub activations: Vec<ActivationFunction>,
319 pub regularization: Vec<RegularizationOption>,
321 pub connections: Vec<ConnectionPattern>,
323}
324
325#[derive(Debug, Clone, PartialEq, Eq)]
327pub enum LayerType {
328 Dense,
330 Dropout,
332 BatchNorm,
334 Convolution,
336 Pooling,
338 LSTM,
340 GRU,
342 Attention,
344 Embedding,
346 Custom(String),
348}
349
350#[derive(Debug, Clone, PartialEq, Eq)]
352pub enum ActivationFunction {
353 ReLU,
355 LeakyReLU,
357 ELU,
359 Swish,
361 GELU,
363 Tanh,
365 Sigmoid,
367 Identity,
369 Custom(String),
371}
372
373#[derive(Debug, Clone)]
375pub struct RegularizationOption {
376 pub reg_type: RegularizationType,
378 pub strength: ParameterRange,
380}
381
382#[derive(Debug, Clone, PartialEq, Eq)]
384pub enum RegularizationType {
385 L1,
387 L2,
389 Dropout,
391 BatchNorm,
393 LayerNorm,
395 EarlyStopping,
397 Custom(String),
399}
400
401#[derive(Debug, Clone, PartialEq, Eq)]
403pub enum ConnectionPattern {
404 Sequential,
406 Residual,
408 DenseNet,
410 Highway,
412 Custom(String),
414}
415
416#[derive(Debug, Clone, PartialEq, Eq)]
418pub enum NASStrategy {
419 Random,
421 Evolutionary,
423 ReinforcementLearning,
425 DARTS,
427 Progressive,
429 OneShot,
431}
432
433#[derive(Debug, Clone)]
435pub struct ArchitectureEvaluator {
436 pub strategy: EvaluationStrategy,
438 pub max_eval_time: Duration,
440 pub early_stopping: Option<EarlyStoppingCriteria>,
442}
443
444#[derive(Debug, Clone, PartialEq, Eq)]
446pub enum EvaluationStrategy {
447 FullTraining,
449 EarlyStopping,
451 WeightSharing,
453 PerformancePrediction,
455 Progressive,
457}
458
459#[derive(Debug, Clone)]
461pub struct EarlyStoppingCriteria {
462 pub metric: String,
464 pub patience: usize,
466 pub min_delta: f64,
468}
469
470impl Default for AutoMLConfig {
471 fn default() -> Self {
472 Self {
473 max_time: Duration::from_secs(3600), max_trials: 100,
475 cv_folds: 5,
476 metric: OptimizationMetric::Accuracy,
477 strategy: SearchStrategy::Random,
478 population_size: 20,
479 early_stopping_patience: Some(10),
480 random_seed: None,
481 }
482 }
483}
484
485impl Default for OptimizationHistory {
486 fn default() -> Self {
487 Self {
488 trials: Vec::new(),
489 best_score: None,
490 best_config: None,
491 start_time: None,
492 total_time: Duration::ZERO,
493 }
494 }
495}
496
497impl AutoMLOptimizer {
498 pub fn new(config: AutoMLConfig) -> SklResult<Self> {
500 let rng = if let Some(seed) = config.random_seed {
501 StdRng::seed_from_u64(seed)
502 } else {
503 StdRng::from_rng(&mut thread_rng())
504 };
505
506 Ok(Self {
507 config,
508 search_space: SearchSpace::default(),
509 history: OptimizationHistory::default(),
510 rng,
511 })
512 }
513
514 #[must_use]
516 pub fn search_space(mut self, search_space: SearchSpace) -> Self {
517 self.search_space = search_space;
518 self
519 }
520
521 pub fn optimize(
523 &mut self,
524 x_train: &ArrayView2<Float>,
525 y_train: &ArrayView1<Float>,
526 x_val: Option<&ArrayView2<Float>>,
527 y_val: Option<&ArrayView1<Float>>,
528 ) -> SklResult<FluentPipelineBuilder> {
529 self.history.start_time = Some(Instant::now());
530 let start_time = Instant::now();
531
532 let mut best_score = f64::NEG_INFINITY;
533 let mut best_config = None;
534 let mut trials_without_improvement = 0;
535
536 for trial_id in 0..self.config.max_trials {
537 if start_time.elapsed() > self.config.max_time {
539 break;
540 }
541
542 let config = self.generate_candidate_config()?;
544
545 let trial_result =
547 self.evaluate_config(&config, x_train, y_train, x_val, y_val, trial_id)?;
548
549 self.history.trials.push(trial_result.clone());
551
552 if trial_result.score > best_score {
554 best_score = trial_result.score;
555 best_config = Some(config);
556 trials_without_improvement = 0;
557
558 self.history.best_score = Some(best_score);
559 self.history.best_config = best_config.clone();
560 } else {
561 trials_without_improvement += 1;
562 }
563
564 if let Some(patience) = self.config.early_stopping_patience {
566 if trials_without_improvement >= patience {
567 break;
568 }
569 }
570 }
571
572 self.history.total_time = start_time.elapsed();
573
574 if let Some(best_config) = best_config {
576 Ok(self.config_to_builder(best_config))
577 } else {
578 Err(SklearsError::InvalidData {
579 reason: "No valid configuration found during optimization".to_string(),
580 })
581 }
582 }
583
584 fn generate_candidate_config(&mut self) -> SklResult<PipelineConfiguration> {
586 match self.config.strategy {
587 SearchStrategy::Random => self.generate_random_config(),
588 SearchStrategy::Genetic => self.generate_genetic_config(),
589 SearchStrategy::Bayesian => self.generate_bayesian_config(),
590 _ => self.generate_random_config(), }
592 }
593
594 fn generate_random_config(&mut self) -> SklResult<PipelineConfiguration> {
596 let algorithm = &self.search_space.algorithms
598 [self.rng.gen_range(0..self.search_space.algorithms.len())];
599
600 let preprocessing_steps: Vec<_> = self
602 .search_space
603 .preprocessing
604 .iter()
605 .filter(|step| !step.optional || self.rng.gen_bool(0.5))
606 .collect();
607
608 let feature_steps: Vec<_> = self
610 .search_space
611 .feature_engineering
612 .iter()
613 .filter(|step| !step.optional || self.rng.gen_bool(0.3))
614 .collect();
615
616 Ok(PipelineConfiguration::default())
618 }
619
620 fn generate_genetic_config(&mut self) -> SklResult<PipelineConfiguration> {
622 self.generate_random_config()
625 }
626
627 fn generate_bayesian_config(&mut self) -> SklResult<PipelineConfiguration> {
629 self.generate_random_config()
632 }
633
634 fn evaluate_config(
636 &mut self,
637 config: &PipelineConfiguration,
638 x_train: &ArrayView2<Float>,
639 y_train: &ArrayView1<Float>,
640 x_val: Option<&ArrayView2<Float>>,
641 y_val: Option<&ArrayView1<Float>>,
642 trial_id: usize,
643 ) -> SklResult<TrialResult> {
644 let start_time = Instant::now();
645
646 let pipeline_builder = self.config_to_builder(config.clone());
648
649 let mock_score = self.rng.gen_range(0.5..1.0);
656 let cv_scores = (0..self.config.cv_folds)
657 .map(|_| self.rng.gen_range(0.4..1.0))
658 .collect();
659
660 Ok(TrialResult {
661 trial_id,
662 config: config.clone(),
663 score: mock_score,
664 training_time: start_time.elapsed(),
665 cv_scores,
666 timestamp: start_time,
667 status: TrialStatus::Success,
668 error: None,
669 })
670 }
671
672 fn config_to_builder(&self, config: PipelineConfiguration) -> FluentPipelineBuilder {
674 FluentPipelineBuilder::data_science_preset()
676 .memory(config.memory_config)
677 .caching(config.caching)
678 .validation(config.validation)
679 .debug(config.debug)
680 }
681
682 #[must_use]
684 pub fn get_results(&self) -> &OptimizationHistory {
685 &self.history
686 }
687
688 #[must_use]
690 pub fn get_best_trial(&self) -> Option<&TrialResult> {
691 self.history.trials.iter().max_by(|a, b| {
692 a.score
693 .partial_cmp(&b.score)
694 .unwrap_or(std::cmp::Ordering::Equal)
695 })
696 }
697
698 #[must_use]
700 pub fn generate_report(&self) -> OptimizationReport {
701 OptimizationReport {
703 total_trials: self.history.trials.len(),
704 successful_trials: self
705 .history
706 .trials
707 .iter()
708 .filter(|t| t.status == TrialStatus::Success)
709 .count(),
710 best_score: self.history.best_score,
711 total_time: self.history.total_time,
712 average_trial_time: if self.history.trials.is_empty() {
713 None
714 } else {
715 Some(Duration::from_secs_f64(
716 self.history
717 .trials
718 .iter()
719 .map(|t| t.training_time.as_secs_f64())
720 .sum::<f64>()
721 / self.history.trials.len() as f64,
722 ))
723 },
724 trials_summary: self.history.trials.clone(),
725 }
726 }
727}
728
729#[derive(Debug, Clone)]
731pub struct OptimizationReport {
732 pub total_trials: usize,
734 pub successful_trials: usize,
736 pub best_score: Option<f64>,
738 pub total_time: Duration,
740 pub average_trial_time: Option<Duration>,
742 pub trials_summary: Vec<TrialResult>,
744}
745
746impl Default for SearchSpace {
747 fn default() -> Self {
748 Self {
749 algorithms: vec![
750 AlgorithmChoice {
752 name: "LinearRegression".to_string(),
753 algorithm_type: AlgorithmType::Linear,
754 hyperparameters: BTreeMap::new(),
755 resource_requirements: ResourceRequirements {
756 memory_mb: 100,
757 cpu_cores: 1,
758 time_complexity: TimeComplexity::Linear,
759 requires_gpu: false,
760 },
761 },
762 AlgorithmChoice {
764 name: "RandomForest".to_string(),
765 algorithm_type: AlgorithmType::Ensemble,
766 hyperparameters: BTreeMap::from([
767 (
768 "n_estimators".to_string(),
769 ParameterRange::Integer { min: 10, max: 1000 },
770 ),
771 (
772 "max_depth".to_string(),
773 ParameterRange::Integer { min: 1, max: 50 },
774 ),
775 ]),
776 resource_requirements: ResourceRequirements {
777 memory_mb: 500,
778 cpu_cores: 4,
779 time_complexity: TimeComplexity::Linearithmic,
780 requires_gpu: false,
781 },
782 },
783 ],
784 preprocessing: vec![
785 PreprocessingChoice {
787 name: "StandardScaler".to_string(),
788 parameters: BTreeMap::new(),
789 optional: false,
790 },
791 PreprocessingChoice {
793 name: "MinMaxScaler".to_string(),
794 parameters: BTreeMap::new(),
795 optional: true,
796 },
797 ],
798 feature_engineering: vec![FeatureEngineeringChoice {
799 name: "PolynomialFeatures".to_string(),
800 parameters: BTreeMap::from([(
801 "degree".to_string(),
802 ParameterRange::Integer { min: 2, max: 4 },
803 )]),
804 optional: true,
805 cost_estimate: 0.5,
806 }],
807 hyperparameters: BTreeMap::new(),
808 constraints: vec![
809 ArchitectureConstraint::MaxTrainingTime(Duration::from_secs(300)),
810 ArchitectureConstraint::MaxMemoryMB(2048),
811 ],
812 }
813 }
814}
815
816impl NeuralArchitectureSearch {
817 #[must_use]
819 pub fn new(search_space: NeuralSearchSpace, strategy: NASStrategy) -> Self {
820 Self {
821 search_space,
822 strategy,
823 evaluator: ArchitectureEvaluator {
824 strategy: EvaluationStrategy::EarlyStopping,
825 max_eval_time: Duration::from_secs(300),
826 early_stopping: Some(EarlyStoppingCriteria {
827 metric: "val_accuracy".to_string(),
828 patience: 10,
829 min_delta: 0.001,
830 }),
831 },
832 }
833 }
834
835 pub fn search(&mut self, max_architectures: usize) -> SklResult<Vec<NeuralArchitecture>> {
837 let mut architectures = Vec::new();
838 let mut rng = StdRng::from_rng(&mut thread_rng());
839
840 for _ in 0..max_architectures {
841 let architecture = self.generate_architecture(&mut rng)?;
842 architectures.push(architecture);
843 }
844
845 architectures.sort_by(|a, b| {
847 b.estimated_performance
848 .partial_cmp(&a.estimated_performance)
849 .unwrap()
850 });
851
852 Ok(architectures)
853 }
854
855 fn generate_architecture(&self, rng: &mut StdRng) -> SklResult<NeuralArchitecture> {
857 let num_layers = match &self.search_space.num_layers {
858 ParameterRange::Integer { min, max } => rng.gen_range(*min..*max + 1) as usize,
859 _ => 3, };
861
862 let mut layers = Vec::new();
863 for i in 0..num_layers {
864 let layer_type = &self.search_space.layer_types
865 [rng.gen_range(0..self.search_space.layer_types.len())];
866
867 let activation = &self.search_space.activations
868 [rng.gen_range(0..self.search_space.activations.len())];
869
870 layers.push(NeuralLayer {
871 layer_type: layer_type.clone(),
872 units: Some(rng.gen_range(32..512)),
873 activation: activation.clone(),
874 layer_id: i,
875 });
876 }
877
878 Ok(NeuralArchitecture {
879 layers,
880 connection_pattern: ConnectionPattern::Sequential,
881 estimated_performance: rng.gen_range(0.5..1.0),
882 parameter_count: rng.gen_range(1000..1_000_000),
883 memory_usage_mb: rng.gen_range(10..500),
884 })
885 }
886}
887
888#[derive(Debug, Clone)]
890pub struct NeuralArchitecture {
891 pub layers: Vec<NeuralLayer>,
893 pub connection_pattern: ConnectionPattern,
895 pub estimated_performance: f64,
897 pub parameter_count: usize,
899 pub memory_usage_mb: usize,
901}
902
903#[derive(Debug, Clone)]
905pub struct NeuralLayer {
906 pub layer_type: LayerType,
908 pub units: Option<usize>,
910 pub activation: ActivationFunction,
912 pub layer_id: usize,
914}
915
916#[allow(non_snake_case)]
917#[cfg(test)]
918mod tests {
919 use super::*;
920
921 #[test]
922 fn test_automl_config() {
923 let config = AutoMLConfig::default();
924 assert_eq!(config.max_trials, 100);
925 assert_eq!(config.cv_folds, 5);
926 assert_eq!(config.metric, OptimizationMetric::Accuracy);
927 }
928
929 #[test]
930 fn test_automl_optimizer() {
931 let config = AutoMLConfig::default();
932 let optimizer = AutoMLOptimizer::new(config).unwrap();
933 assert_eq!(optimizer.history.trials.len(), 0);
934 assert!(optimizer.history.best_score.is_none());
935 }
936
937 #[test]
938 fn test_search_space() {
939 let search_space = SearchSpace::default();
940 assert!(!search_space.algorithms.is_empty());
941 assert!(!search_space.preprocessing.is_empty());
942 }
943
944 #[test]
945 fn test_neural_architecture_search() {
946 let search_space = NeuralSearchSpace {
947 layer_types: vec![LayerType::Dense, LayerType::Dropout],
948 num_layers: ParameterRange::Integer { min: 2, max: 5 },
949 hidden_units: ParameterRange::Integer { min: 32, max: 512 },
950 activations: vec![ActivationFunction::ReLU, ActivationFunction::Tanh],
951 regularization: vec![],
952 connections: vec![ConnectionPattern::Sequential],
953 };
954
955 let mut nas = NeuralArchitectureSearch::new(search_space, NASStrategy::Random);
956 let architectures = nas.search(5).unwrap();
957
958 assert_eq!(architectures.len(), 5);
959 assert!(architectures[0].estimated_performance >= architectures[1].estimated_performance);
960 }
961
962 #[test]
963 fn test_parameter_ranges() {
964 let float_range = ParameterRange::Continuous {
965 min: 0.1,
966 max: 1.0,
967 log_scale: false,
968 };
969 let int_range = ParameterRange::Integer { min: 1, max: 100 };
970 let categorical = ParameterRange::Categorical(vec!["a".to_string(), "b".to_string()]);
971
972 match float_range {
973 ParameterRange::Continuous { min, max, .. } => {
974 assert_eq!(min, 0.1);
975 assert_eq!(max, 1.0);
976 }
977 _ => panic!("Wrong parameter range type"),
978 }
979 }
980
981 #[test]
982 fn test_optimization_history() {
983 let mut history = OptimizationHistory::default();
984 assert!(history.trials.is_empty());
985 assert!(history.best_score.is_none());
986
987 let trial = TrialResult {
988 trial_id: 0,
989 config: PipelineConfiguration::default(),
990 score: 0.85,
991 training_time: Duration::from_secs(30),
992 cv_scores: vec![0.8, 0.9, 0.85],
993 timestamp: Instant::now(),
994 status: TrialStatus::Success,
995 error: None,
996 };
997
998 history.trials.push(trial);
999 assert_eq!(history.trials.len(), 1);
1000 }
1001
1002 #[test]
1003 fn test_trial_status() {
1004 assert_eq!(TrialStatus::Success, TrialStatus::Success);
1005 assert_ne!(TrialStatus::Success, TrialStatus::Failed);
1006 }
1007
1008 #[test]
1009 fn test_algorithm_types() {
1010 let linear = AlgorithmType::Linear;
1011 let tree = AlgorithmType::Tree;
1012 let neural = AlgorithmType::NeuralNetwork;
1013
1014 assert_eq!(linear, AlgorithmType::Linear);
1015 assert_ne!(linear, tree);
1016 assert_ne!(tree, neural);
1017 }
1018
1019 #[test]
1020 fn test_resource_requirements() {
1021 let requirements = ResourceRequirements {
1022 memory_mb: 1024,
1023 cpu_cores: 4,
1024 time_complexity: TimeComplexity::Linear,
1025 requires_gpu: false,
1026 };
1027
1028 assert_eq!(requirements.memory_mb, 1024);
1029 assert_eq!(requirements.cpu_cores, 4);
1030 assert!(!requirements.requires_gpu);
1031 }
1032}