1pub mod advanced_optimizer;
8pub mod automl_core;
9pub mod benchmark_framework;
10pub mod data_analyzer;
11pub mod hyperparameter_optimizer;
12pub mod method_selector;
13pub mod pipeline_optimizer;
14pub mod preprocessing_integration;
15
16pub use automl_core::{
18 AutoMLError, AutoMLMethod, AutoMLResults, AutoMLSummary, AutomatedFeatureSelectionPipeline,
19 ComputationalBudget, CorrelationStructure, DataCharacteristics, TargetType,
20};
21
22pub use data_analyzer::DataAnalyzer;
23
24pub use method_selector::MethodSelector;
25
26pub use hyperparameter_optimizer::{
27 HyperparameterOptimizer, MethodConfig, OptimizedMethod, TrainedMethod,
28};
29
30pub use pipeline_optimizer::{
31 MethodInfo, MethodPerformance, OptimalPipeline, PipelineConfig, PipelineConfigResult,
32 PipelineOptimizer, TrainedOptimalPipeline, ValidationStrategy,
33};
34
35pub use preprocessing_integration::{
36 DimensionalityReduction, FeatureEngineering, MissingValueStrategy, OutlierHandling,
37 PreprocessingIntegration, ScalerType,
38};
39
40pub use advanced_optimizer::{
41 AdvancedHyperparameterOptimizer, EarlyStoppingConfig, OptimizationStrategy,
42};
43
44pub use benchmark_framework::{
45 AutoMLBenchmark, BenchmarkDataset, BenchmarkMetric, BenchmarkResults, DatasetType,
46 DetailedBenchmarkResults, DifficultyLevel, ErrorAnalysis, ImprovementRatios, MethodComparison,
47 OptimizationDetails, PerformanceMetrics,
48};
49
50use scirs2_core::ndarray::{ArrayView1, ArrayView2};
52use sklears_core::error::Result as SklResult;
53
54type Result<T> = SklResult<T>;
55
56#[derive(Debug, Clone)]
58pub struct AutoMLFactory {
59 config: AutoMLFactoryConfig,
60}
61
62#[derive(Debug, Clone)]
64pub struct AutoMLFactoryConfig {
65 pub enable_advanced_optimization: bool,
66 pub enable_preprocessing: bool,
67 pub enable_benchmarking: bool,
68 pub parallel_workers: usize,
69 pub time_budget_seconds: u64,
70}
71
72impl Default for AutoMLFactoryConfig {
73 fn default() -> Self {
74 Self {
75 enable_advanced_optimization: false,
76 enable_preprocessing: true,
77 enable_benchmarking: false,
78 parallel_workers: 1,
79 time_budget_seconds: 300,
80 }
81 }
82}
83
84impl AutoMLFactory {
85 pub fn new() -> Self {
87 Self {
88 config: AutoMLFactoryConfig::default(),
89 }
90 }
91
92 pub fn with_config(config: AutoMLFactoryConfig) -> Self {
94 Self { config }
95 }
96
97 pub fn with_advanced_optimization(mut self) -> Self {
99 self.config.enable_advanced_optimization = true;
100 self
101 }
102
103 pub fn with_preprocessing(mut self) -> Self {
105 self.config.enable_preprocessing = true;
106 self
107 }
108
109 pub fn with_benchmarking(mut self) -> Self {
111 self.config.enable_benchmarking = true;
112 self
113 }
114
115 pub fn with_time_budget(mut self, seconds: u64) -> Self {
117 self.config.time_budget_seconds = seconds;
118 self
119 }
120
121 pub fn with_parallel_workers(mut self, workers: usize) -> Self {
123 self.config.parallel_workers = workers;
124 self
125 }
126
127 pub fn create_basic_pipeline(&self) -> AutomatedFeatureSelectionPipeline {
129 let mut pipeline = AutomatedFeatureSelectionPipeline::new();
130
131 if self.config.enable_preprocessing {
132 pipeline = pipeline.with_preprocessing();
133 }
134
135 if self.config.enable_advanced_optimization {
136 let advanced_optimizer = AdvancedHyperparameterOptimizer::new()
137 .with_time_budget(std::time::Duration::from_secs(
138 self.config.time_budget_seconds,
139 ))
140 .with_parallel_workers(self.config.parallel_workers);
141 pipeline = pipeline.with_advanced_optimizer(advanced_optimizer);
142 }
143
144 pipeline
145 }
146
147 pub fn create_advanced_pipeline(&self) -> AutomatedFeatureSelectionPipeline {
149 let preprocessing = PreprocessingIntegration::new()
150 .with_scaler(ScalerType::StandardScaler)
151 .with_missing_value_strategy(MissingValueStrategy::KNN { k: 5 })
152 .with_outlier_handling(OutlierHandling::IQR { multiplier: 1.5 })
153 .with_feature_engineering(FeatureEngineering::Polynomial { degree: 2 });
154
155 let advanced_optimizer = AdvancedHyperparameterOptimizer::new()
156 .with_strategy(OptimizationStrategy::BayesianOptimization)
157 .with_time_budget(std::time::Duration::from_secs(
158 self.config.time_budget_seconds,
159 ))
160 .with_parallel_workers(self.config.parallel_workers)
161 .with_early_stopping(EarlyStoppingConfig {
162 patience: 10,
163 min_improvement: 0.001,
164 restore_best: true,
165 });
166
167 AutomatedFeatureSelectionPipeline::new()
168 .with_custom_preprocessing(preprocessing)
169 .with_advanced_optimizer(advanced_optimizer)
170 }
171
172 pub fn create_speed_optimized_pipeline(&self) -> AutomatedFeatureSelectionPipeline {
174 let preprocessing = PreprocessingIntegration::new()
175 .with_scaler(ScalerType::MinMaxScaler)
176 .with_missing_value_strategy(MissingValueStrategy::Mean);
177
178 AutomatedFeatureSelectionPipeline::new().with_custom_preprocessing(preprocessing)
179 }
180
181 pub fn create_benchmark_suite(&self) -> Result<AutoMLBenchmark> {
183 if !self.config.enable_benchmarking {
184 return Err(AutoMLError::InvalidConfiguration.into());
185 }
186
187 let mut benchmark = AutoMLBenchmark::new()
188 .with_methods(vec![
189 AutoMLMethod::UnivariateFiltering,
190 AutoMLMethod::CorrelationBased,
191 AutoMLMethod::TreeBased,
192 AutoMLMethod::LassoBased,
193 AutoMLMethod::WrapperBased,
194 AutoMLMethod::EnsembleBased,
195 AutoMLMethod::Hybrid,
196 AutoMLMethod::NeuralArchitectureSearch,
197 AutoMLMethod::TransferLearning,
198 AutoMLMethod::MetaLearningEnsemble,
199 ])
200 .with_metrics(vec![
201 BenchmarkMetric::Accuracy,
202 BenchmarkMetric::F1Score,
203 BenchmarkMetric::FeatureReduction,
204 BenchmarkMetric::ComputationalTime,
205 BenchmarkMetric::FeatureStability,
206 ]);
207
208 benchmark.generate_synthetic_datasets(10)?;
210
211 Ok(benchmark)
212 }
213
214 pub fn quick_feature_selection(
216 &self,
217 X: ArrayView2<f64>,
218 y: ArrayView1<f64>,
219 target_features: Option<usize>,
220 ) -> Result<AutoMLResults> {
221 let pipeline = self.create_basic_pipeline();
222 pipeline.auto_select_features(X, y, target_features)
223 }
224
225 pub fn comprehensive_feature_selection(
227 &self,
228 X: ArrayView2<f64>,
229 y: ArrayView1<f64>,
230 target_features: Option<usize>,
231 ) -> Result<AutoMLResults> {
232 let pipeline = self.create_advanced_pipeline();
233 pipeline.auto_select_features(X, y, target_features)
234 }
235
236 pub fn analyze_data_characteristics(
238 &self,
239 X: ArrayView2<f64>,
240 y: ArrayView1<f64>,
241 ) -> Result<DataCharacteristics> {
242 let analyzer = DataAnalyzer::new();
243 analyzer.analyze_data(X, y)
244 }
245
246 pub fn recommend_methods(
248 &self,
249 characteristics: &DataCharacteristics,
250 ) -> Result<Vec<AutoMLMethod>> {
251 let selector = MethodSelector::new();
252 selector.select_methods(characteristics)
253 }
254
255 pub fn auto_configure_preprocessing(
257 &self,
258 characteristics: &DataCharacteristics,
259 ) -> PreprocessingIntegration {
260 PreprocessingIntegration::auto_configure(characteristics)
261 }
262
263 pub fn run_benchmark_evaluation(&self) -> Result<BenchmarkResults> {
265 let benchmark = self.create_benchmark_suite()?;
266 benchmark.run_benchmark()
267 }
268
269 pub fn generate_capability_report(&self) -> String {
271 let mut report = String::new();
272
273 report.push_str(
274 "╔══════════════════════════════════════════════════════════════════════════════╗\n",
275 );
276 report.push_str(
277 "║ AutoML Factory Capabilities ║\n",
278 );
279 report.push_str(
280 "╚══════════════════════════════════════════════════════════════════════════════╝\n\n",
281 );
282
283 report.push_str("=== Configuration ===\n");
285 report.push_str(&format!(
286 "Advanced Optimization: {}\n",
287 self.config.enable_advanced_optimization
288 ));
289 report.push_str(&format!(
290 "Preprocessing: {}\n",
291 self.config.enable_preprocessing
292 ));
293 report.push_str(&format!(
294 "Benchmarking: {}\n",
295 self.config.enable_benchmarking
296 ));
297 report.push_str(&format!(
298 "Parallel Workers: {}\n",
299 self.config.parallel_workers
300 ));
301 report.push_str(&format!(
302 "Time Budget: {} seconds\n",
303 self.config.time_budget_seconds
304 ));
305
306 report.push_str("\n=== Available Methods ===\n");
308 let methods = vec![
309 "• Univariate Filtering - Fast statistical feature selection",
310 "• Correlation-Based - Remove redundant features",
311 "• Tree-Based - Feature importance from tree models",
312 "• Lasso-Based - L1 regularization feature selection",
313 "• Wrapper-Based - Model-based selection with CV",
314 "• Ensemble-Based - Combine multiple selection methods",
315 "• Hybrid - Multi-stage selection pipeline",
316 "• Neural Architecture Search - Deep learning optimization",
317 "• Transfer Learning - Leverage pre-trained models",
318 "• Meta-Learning Ensemble - Adaptive method combination",
319 ];
320 for method in methods {
321 report.push_str(&format!("{}\n", method));
322 }
323
324 if self.config.enable_advanced_optimization {
326 report.push_str("\n=== Optimization Strategies ===\n");
327 let strategies = vec![
328 "• Bayesian Optimization - Gaussian process guided search",
329 "• Genetic Algorithm - Evolutionary optimization",
330 "• Random Search - Efficient random exploration",
331 "• Grid Search - Exhaustive parameter exploration",
332 "• Particle Swarm Optimization - Swarm intelligence",
333 "• Simulated Annealing - Temperature-based optimization",
334 "• HyperBand - Multi-fidelity optimization",
335 ];
336 for strategy in strategies {
337 report.push_str(&format!("{}\n", strategy));
338 }
339 }
340
341 if self.config.enable_preprocessing {
343 report.push_str("\n=== Preprocessing Features ===\n");
344 let preprocessing = vec![
345 "• Scaling: Standard, MinMax, Robust, Quantile",
346 "• Missing Values: Mean, Median, KNN, Interpolation",
347 "• Outlier Handling: IQR, Z-Score, Isolation Forest",
348 "• Feature Engineering: Polynomial, Interaction terms",
349 "• Dimensionality Reduction: PCA, ICA, SVD",
350 ];
351 for feature in preprocessing {
352 report.push_str(&format!("{}\n", feature));
353 }
354 }
355
356 if self.config.enable_benchmarking {
358 report.push_str("\n=== Benchmarking Features ===\n");
359 let benchmarking = vec![
360 "• Synthetic Dataset Generation",
361 "• Multi-metric Evaluation (Accuracy, F1, Time, Stability)",
362 "• Statistical Significance Testing",
363 "• Performance Comparison and Ranking",
364 "• Error Analysis and Diagnostics",
365 "• Improvement Ratio Calculations",
366 ];
367 for feature in benchmarking {
368 report.push_str(&format!("{}\n", feature));
369 }
370 }
371
372 report.push_str("\n💡 Use AutoMLFactory::quick_feature_selection() for fast results\n");
373 report
374 .push_str("💡 Use AutoMLFactory::comprehensive_feature_selection() for best quality\n");
375
376 report
377 }
378}
379
380impl Default for AutoMLFactory {
381 fn default() -> Self {
382 Self::new()
383 }
384}
385
386pub fn quick_automl(
389 X: ArrayView2<f64>,
390 y: ArrayView1<f64>,
391 target_features: Option<usize>,
392) -> Result<AutoMLResults> {
393 let factory = AutoMLFactory::new();
394 factory.quick_feature_selection(X, y, target_features)
395}
396
397pub fn comprehensive_automl(
399 X: ArrayView2<f64>,
400 y: ArrayView1<f64>,
401 target_features: Option<usize>,
402) -> Result<AutoMLResults> {
403 let factory = AutoMLFactory::new()
404 .with_advanced_optimization()
405 .with_preprocessing()
406 .with_time_budget(600); factory.comprehensive_feature_selection(X, y, target_features)
408}
409
410pub fn analyze_and_recommend(
412 X: ArrayView2<f64>,
413 y: ArrayView1<f64>,
414) -> Result<(DataCharacteristics, Vec<AutoMLMethod>)> {
415 let factory = AutoMLFactory::new();
416 let characteristics = factory.analyze_data_characteristics(X, y)?;
417 let methods = factory.recommend_methods(&characteristics)?;
418 Ok((characteristics, methods))
419}