1pub mod analysis;
11pub mod config;
12pub mod pipeline;
13pub mod resource;
14pub mod search;
15
16pub use config::*;
17
18use crate::anomaly_detection::QuantumAnomalyDetector;
19use crate::classification::Classifier;
20use crate::clustering::QuantumClusterer;
21use crate::dimensionality_reduction::QuantumDimensionalityReducer;
22use crate::error::{MLError, Result};
23use crate::optimization::OptimizationMethod;
24use crate::qnn::{QNNLayerType, QuantumNeuralNetwork};
25use crate::quantum_nas::{ArchitectureCandidate, QuantumNAS, SearchStrategy};
26use crate::time_series::QuantumTimeSeriesForecaster;
27use fastrand;
28use scirs2_core::ndarray::{s, Array1, Array2, Array3, Axis};
29use std::collections::{HashMap, VecDeque};
30use std::f64::consts::PI;
31
32use analysis::{AutoMLResults, PerformanceTracker};
33use pipeline::{AutomatedPipelineConstructor, QuantumMLPipeline};
34use resource::QuantumResourceOptimizer;
35use search::{QuantumHyperparameterOptimizer, QuantumModelSelector, SearchHistory};
36
37#[derive(Debug, Clone)]
39pub struct QuantumAutoML {
40 config: QuantumAutoMLConfig,
42
43 pipeline_constructor: AutomatedPipelineConstructor,
45
46 hyperparameter_optimizer: QuantumHyperparameterOptimizer,
48
49 model_selector: QuantumModelSelector,
51
52 ensemble_manager: QuantumEnsembleManager,
54
55 performance_tracker: PerformanceTracker,
57
58 resource_optimizer: QuantumResourceOptimizer,
60
61 search_history: SearchHistory,
63
64 best_pipeline: Option<QuantumMLPipeline>,
66
67 experiment_results: AutoMLResults,
69}
70
71#[derive(Debug, Clone)]
73pub struct QuantumEnsembleManager {
74 max_ensemble_size: usize,
76
77 ensemble_members: Vec<QuantumMLPipeline>,
79
80 diversity_strategies: Vec<EnsembleDiversityStrategy>,
82
83 combination_methods: Vec<EnsembleCombinationMethod>,
85
86 performance_weights: Vec<f64>,
88}
89
90impl QuantumAutoML {
91 pub fn new(config: QuantumAutoMLConfig) -> Self {
93 Self {
94 pipeline_constructor: AutomatedPipelineConstructor::new(&config),
95 hyperparameter_optimizer: QuantumHyperparameterOptimizer::new(
96 &config.search_space.hyperparameters,
97 ),
98 model_selector: QuantumModelSelector::new(&config.search_space.algorithms),
99 ensemble_manager: QuantumEnsembleManager::new(&config.search_space.ensembles),
100 performance_tracker: PerformanceTracker::new(&config.evaluation_config),
101 resource_optimizer: QuantumResourceOptimizer::new(&config.quantum_constraints),
102 search_history: SearchHistory::new(),
103 best_pipeline: None,
104 experiment_results: AutoMLResults::new(),
105 config,
106 }
107 }
108
109 pub fn basic() -> Self {
111 Self::new(QuantumAutoMLConfig::basic())
112 }
113
114 pub fn comprehensive() -> Self {
116 Self::new(QuantumAutoMLConfig::comprehensive())
117 }
118
119 pub fn production() -> Self {
121 Self::new(QuantumAutoMLConfig::production())
122 }
123
124 pub fn fit(&mut self, X: &Array2<f64>, y: &Array1<f64>) -> Result<()> {
126 if self.config.task_type.is_none() {
128 self.config.task_type = Some(self.detect_task_type(X, y)?);
129 }
130
131 self.search_history.start_search();
133
134 for trial_id in 0..self.config.search_budget.max_trials {
136 if self.should_stop_search(trial_id)? {
137 break;
138 }
139
140 let pipeline = self
142 .pipeline_constructor
143 .construct_pipeline(X, y, &self.config)?;
144
145 let optimized_pipeline = self.hyperparameter_optimizer.optimize(pipeline, X, y)?;
147
148 let performance = self.evaluate_pipeline(&optimized_pipeline, X, y)?;
150
151 self.search_history
153 .record_trial(trial_id, &optimized_pipeline, performance);
154
155 if self.is_better_pipeline(&optimized_pipeline, performance)? {
157 self.best_pipeline = Some(optimized_pipeline.clone());
158 self.performance_tracker
159 .update_best_performance(performance);
160 }
161
162 if self.config.search_space.ensembles.enabled {
164 self.ensemble_manager
165 .consider_pipeline(optimized_pipeline, performance)?;
166 }
167 }
168
169 self.finalize_search()?;
171
172 Ok(())
173 }
174
175 pub fn predict(&self, X: &Array2<f64>) -> Result<Array1<f64>> {
177 match &self.best_pipeline {
178 Some(pipeline) => pipeline.predict(X),
179 None => Err(MLError::ModelNotTrained(
180 "AutoML has not been fitted yet".to_string(),
181 )),
182 }
183 }
184
185 pub fn best_pipeline(&self) -> Option<&QuantumMLPipeline> {
187 self.best_pipeline.as_ref()
188 }
189
190 pub fn get_results(&self) -> &AutoMLResults {
192 &self.experiment_results
193 }
194
195 pub fn get_search_history(&self) -> &SearchHistory {
197 &self.search_history
198 }
199
200 pub fn get_performance_tracker(&self) -> &PerformanceTracker {
202 &self.performance_tracker
203 }
204
205 fn detect_task_type(&self, X: &Array2<f64>, y: &Array1<f64>) -> Result<MLTaskType> {
208 let unique_values = {
210 let mut values: Vec<f64> = y.iter().cloned().collect();
211 values.sort_by(|a, b| a.partial_cmp(b).unwrap());
212 values.dedup();
213 values
214 };
215
216 let all_integers = unique_values.iter().all(|&v| (v.fract()).abs() < 1e-10);
218
219 if all_integers && unique_values.len() <= 2 {
220 Ok(MLTaskType::BinaryClassification)
221 } else if all_integers && unique_values.len() <= 20 {
222 Ok(MLTaskType::MultiClassification {
223 num_classes: unique_values.len(),
224 })
225 } else {
226 Ok(MLTaskType::Regression)
227 }
228 }
229
230 fn should_stop_search(&self, trial_id: usize) -> Result<bool> {
231 if let Some(elapsed) = self.search_history.elapsed_time() {
233 if elapsed > self.config.search_budget.max_time_seconds {
234 return Ok(true);
235 }
236 }
237
238 if self.config.search_budget.early_stopping.enabled {
240 if let Some(best_performance) = self.performance_tracker.best_performance() {
241 let trials_without_improvement = self.search_history.trials_without_improvement();
242 if trials_without_improvement >= self.config.search_budget.early_stopping.patience {
243 return Ok(true);
244 }
245 }
246 }
247
248 Ok(false)
249 }
250
251 fn evaluate_pipeline(
252 &self,
253 pipeline: &QuantumMLPipeline,
254 X: &Array2<f64>,
255 y: &Array1<f64>,
256 ) -> Result<f64> {
257 match &self.config.evaluation_config.cv_strategy {
259 CrossValidationStrategy::KFold { k } => self.evaluate_k_fold(pipeline, X, y, *k),
260 CrossValidationStrategy::HoldOut { test_size } => {
261 self.evaluate_holdout(pipeline, X, y, *test_size)
262 }
263 _ => {
264 self.evaluate_holdout(pipeline, X, y, self.config.evaluation_config.test_size)
266 }
267 }
268 }
269
270 fn evaluate_k_fold(
271 &self,
272 pipeline: &QuantumMLPipeline,
273 X: &Array2<f64>,
274 y: &Array1<f64>,
275 k: usize,
276 ) -> Result<f64> {
277 let n_samples = X.nrows();
278 let fold_size = n_samples / k;
279 let mut scores = Vec::new();
280
281 for fold in 0..k {
282 let start_idx = fold * fold_size;
283 let end_idx = if fold == k - 1 {
284 n_samples
285 } else {
286 (fold + 1) * fold_size
287 };
288
289 let mut train_indices = Vec::new();
291 let mut test_indices = Vec::new();
292
293 for i in 0..n_samples {
294 if i >= start_idx && i < end_idx {
295 test_indices.push(i);
296 } else {
297 train_indices.push(i);
298 }
299 }
300
301 let X_train = X.select(Axis(0), &train_indices);
303 let y_train = y.select(Axis(0), &train_indices);
304 let X_test = X.select(Axis(0), &test_indices);
305 let y_test = y.select(Axis(0), &test_indices);
306
307 let mut pipeline_copy = pipeline.clone();
309 pipeline_copy.fit(&X_train, &y_train)?;
310 let predictions = pipeline_copy.predict(&X_test)?;
311
312 let score = self.calculate_score(&predictions, &y_test)?;
314 scores.push(score);
315 }
316
317 Ok(scores.iter().sum::<f64>() / scores.len() as f64)
319 }
320
321 fn evaluate_holdout(
322 &self,
323 pipeline: &QuantumMLPipeline,
324 X: &Array2<f64>,
325 y: &Array1<f64>,
326 test_size: f64,
327 ) -> Result<f64> {
328 let n_samples = X.nrows();
329 let n_test = (n_samples as f64 * test_size) as usize;
330 let n_train = n_samples - n_test;
331
332 let X_train = X.slice(s![0..n_train, ..]).to_owned();
334 let y_train = y.slice(s![0..n_train]).to_owned();
335 let X_test = X.slice(s![n_train.., ..]).to_owned();
336 let y_test = y.slice(s![n_train..]).to_owned();
337
338 let mut pipeline_copy = pipeline.clone();
340 pipeline_copy.fit(&X_train, &y_train)?;
341 let predictions = pipeline_copy.predict(&X_test)?;
342
343 self.calculate_score(&predictions, &y_test)
344 }
345
346 fn calculate_score(&self, predictions: &Array1<f64>, y_true: &Array1<f64>) -> Result<f64> {
347 match &self.config.task_type {
348 Some(MLTaskType::BinaryClassification)
349 | Some(MLTaskType::MultiClassification { .. }) => {
350 let correct = predictions
352 .iter()
353 .zip(y_true.iter())
354 .map(|(pred, true_val)| {
355 if (pred.round() - true_val).abs() < 1e-10 {
356 1.0
357 } else {
358 0.0
359 }
360 })
361 .sum::<f64>();
362 Ok(correct / predictions.len() as f64)
363 }
364 Some(MLTaskType::Regression) => {
365 let mean_true = y_true.mean().unwrap();
367 let ss_tot = y_true.iter().map(|&y| (y - mean_true).powi(2)).sum::<f64>();
368 let ss_res = predictions
369 .iter()
370 .zip(y_true.iter())
371 .map(|(pred, true_val)| (true_val - pred).powi(2))
372 .sum::<f64>();
373 Ok(1.0 - (ss_res / ss_tot))
374 }
375 _ => {
376 let mse = predictions
378 .iter()
379 .zip(y_true.iter())
380 .map(|(pred, true_val)| (pred - true_val).powi(2))
381 .sum::<f64>()
382 / predictions.len() as f64;
383 Ok(-mse) }
385 }
386 }
387
388 fn is_better_pipeline(&self, pipeline: &QuantumMLPipeline, performance: f64) -> Result<bool> {
389 match self.performance_tracker.best_performance() {
390 Some(best_perf) => {
391 Ok(performance
392 > best_perf + self.config.search_budget.early_stopping.min_improvement)
393 }
394 None => Ok(true), }
396 }
397
398 fn finalize_search(&mut self) -> Result<()> {
399 self.experiment_results = self.generate_final_results()?;
401
402 if self.config.search_space.ensembles.enabled {
404 self.ensemble_manager.finalize_ensemble()?;
405 }
406
407 Ok(())
408 }
409
410 fn generate_final_results(&self) -> Result<AutoMLResults> {
411 let mut results = AutoMLResults::new();
412
413 if let Some(pipeline) = &self.best_pipeline {
415 results.set_best_pipeline_info(pipeline);
416 }
417
418 results.set_search_statistics(&self.search_history);
420
421 results.set_performance_analysis(&self.performance_tracker);
423
424 Ok(results)
425 }
426}
427
428impl QuantumEnsembleManager {
429 fn new(ensemble_config: &EnsembleSearchSpace) -> Self {
430 Self {
431 max_ensemble_size: ensemble_config.max_ensemble_size,
432 ensemble_members: Vec::new(),
433 diversity_strategies: ensemble_config.diversity_strategies.clone(),
434 combination_methods: ensemble_config.combination_methods.clone(),
435 performance_weights: Vec::new(),
436 }
437 }
438
439 fn consider_pipeline(&mut self, pipeline: QuantumMLPipeline, performance: f64) -> Result<()> {
440 if self.ensemble_members.len() < self.max_ensemble_size {
442 self.ensemble_members.push(pipeline);
443 self.performance_weights.push(performance);
444 } else {
445 if let Some((worst_idx, _)) = self
447 .performance_weights
448 .iter()
449 .enumerate()
450 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
451 {
452 if performance > self.performance_weights[worst_idx] {
453 self.ensemble_members[worst_idx] = pipeline;
454 self.performance_weights[worst_idx] = performance;
455 }
456 }
457 }
458
459 Ok(())
460 }
461
462 fn finalize_ensemble(&mut self) -> Result<()> {
463 let total_weight: f64 = self.performance_weights.iter().sum();
465 if total_weight > 0.0 {
466 for weight in &mut self.performance_weights {
467 *weight /= total_weight;
468 }
469 }
470
471 Ok(())
472 }
473}
474
475impl Default for QuantumAutoML {
476 fn default() -> Self {
477 Self::basic()
478 }
479}
480
481pub fn create_default_automl_config() -> QuantumAutoMLConfig {
485 QuantumAutoMLConfig::basic()
486}
487
488pub fn create_comprehensive_automl_config() -> QuantumAutoMLConfig {
490 QuantumAutoMLConfig::comprehensive()
491}