quantrs2_ml/automl/
mod.rs

1//! Quantum Automated Machine Learning (AutoML) Framework
2//!
3//! This module provides comprehensive automated machine learning capabilities for quantum
4//! computing, including automated model selection, hyperparameter optimization, pipeline
5//! construction, and quantum-specific optimizations.
6//!
7//! The framework has been refactored from a single 3,471-line file into a modular architecture
8//! to address configuration explosion and mixed responsibilities.
9
10pub mod analysis;
11pub mod config;
12pub mod pipeline;
13pub mod resource;
14pub mod search;
15
16pub use config::*;
17
18use crate::anomaly_detection::QuantumAnomalyDetector;
19use crate::classification::Classifier;
20use crate::clustering::QuantumClusterer;
21use crate::dimensionality_reduction::QuantumDimensionalityReducer;
22use crate::error::{MLError, Result};
23use crate::optimization::OptimizationMethod;
24use crate::qnn::{QNNLayerType, QuantumNeuralNetwork};
25use crate::quantum_nas::{ArchitectureCandidate, QuantumNAS, SearchStrategy};
26use crate::time_series::QuantumTimeSeriesForecaster;
27use fastrand;
28use scirs2_core::ndarray::{s, Array1, Array2, Array3, Axis};
29use std::collections::{HashMap, VecDeque};
30use std::f64::consts::PI;
31
32use analysis::{AutoMLResults, PerformanceTracker};
33use pipeline::{AutomatedPipelineConstructor, QuantumMLPipeline};
34use resource::QuantumResourceOptimizer;
35use search::{QuantumHyperparameterOptimizer, QuantumModelSelector, SearchHistory};
36
37/// Main Quantum AutoML framework
38#[derive(Debug, Clone)]
39pub struct QuantumAutoML {
40    /// AutoML configuration
41    config: QuantumAutoMLConfig,
42
43    /// Automated pipeline constructor
44    pipeline_constructor: AutomatedPipelineConstructor,
45
46    /// Hyperparameter optimizer
47    hyperparameter_optimizer: QuantumHyperparameterOptimizer,
48
49    /// Model selector
50    model_selector: QuantumModelSelector,
51
52    /// Ensemble manager
53    ensemble_manager: QuantumEnsembleManager,
54
55    /// Performance tracker
56    performance_tracker: PerformanceTracker,
57
58    /// Resource optimizer
59    resource_optimizer: QuantumResourceOptimizer,
60
61    /// Search history
62    search_history: SearchHistory,
63
64    /// Best pipeline found
65    best_pipeline: Option<QuantumMLPipeline>,
66
67    /// Current experiment results
68    experiment_results: AutoMLResults,
69}
70
71/// Quantum ensemble manager
72#[derive(Debug, Clone)]
73pub struct QuantumEnsembleManager {
74    /// Maximum ensemble size
75    max_ensemble_size: usize,
76
77    /// Current ensemble members
78    ensemble_members: Vec<QuantumMLPipeline>,
79
80    /// Diversity strategies
81    diversity_strategies: Vec<EnsembleDiversityStrategy>,
82
83    /// Combination methods
84    combination_methods: Vec<EnsembleCombinationMethod>,
85
86    /// Performance weights
87    performance_weights: Vec<f64>,
88}
89
90impl QuantumAutoML {
91    /// Create a new Quantum AutoML instance
92    pub fn new(config: QuantumAutoMLConfig) -> Self {
93        Self {
94            pipeline_constructor: AutomatedPipelineConstructor::new(&config),
95            hyperparameter_optimizer: QuantumHyperparameterOptimizer::new(
96                &config.search_space.hyperparameters,
97            ),
98            model_selector: QuantumModelSelector::new(&config.search_space.algorithms),
99            ensemble_manager: QuantumEnsembleManager::new(&config.search_space.ensembles),
100            performance_tracker: PerformanceTracker::new(&config.evaluation_config),
101            resource_optimizer: QuantumResourceOptimizer::new(&config.quantum_constraints),
102            search_history: SearchHistory::new(),
103            best_pipeline: None,
104            experiment_results: AutoMLResults::new(),
105            config,
106        }
107    }
108
109    /// Create AutoML with basic configuration
110    pub fn basic() -> Self {
111        Self::new(QuantumAutoMLConfig::basic())
112    }
113
114    /// Create AutoML with comprehensive configuration
115    pub fn comprehensive() -> Self {
116        Self::new(QuantumAutoMLConfig::comprehensive())
117    }
118
119    /// Create AutoML with production configuration
120    pub fn production() -> Self {
121        Self::new(QuantumAutoMLConfig::production())
122    }
123
124    /// Fit the AutoML system to training data
125    pub fn fit(&mut self, X: &Array2<f64>, y: &Array1<f64>) -> Result<()> {
126        // Task detection if not specified
127        if self.config.task_type.is_none() {
128            self.config.task_type = Some(self.detect_task_type(X, y)?);
129        }
130
131        // Initialize search process
132        self.search_history.start_search();
133
134        // Main AutoML search loop
135        for trial_id in 0..self.config.search_budget.max_trials {
136            if self.should_stop_search(trial_id)? {
137                break;
138            }
139
140            // Construct candidate pipeline
141            let pipeline = self
142                .pipeline_constructor
143                .construct_pipeline(X, y, &self.config)?;
144
145            // Optimize hyperparameters
146            let optimized_pipeline = self.hyperparameter_optimizer.optimize(pipeline, X, y)?;
147
148            // Evaluate pipeline
149            let performance = self.evaluate_pipeline(&optimized_pipeline, X, y)?;
150
151            // Update search history
152            self.search_history
153                .record_trial(trial_id, &optimized_pipeline, performance);
154
155            // Update best pipeline if better
156            if self.is_better_pipeline(&optimized_pipeline, performance)? {
157                self.best_pipeline = Some(optimized_pipeline.clone());
158                self.performance_tracker
159                    .update_best_performance(performance);
160            }
161
162            // Update ensemble if enabled
163            if self.config.search_space.ensembles.enabled {
164                self.ensemble_manager
165                    .consider_pipeline(optimized_pipeline, performance)?;
166            }
167        }
168
169        // Finalize results
170        self.finalize_search()?;
171
172        Ok(())
173    }
174
175    /// Predict using the best found pipeline
176    pub fn predict(&self, X: &Array2<f64>) -> Result<Array1<f64>> {
177        match &self.best_pipeline {
178            Some(pipeline) => pipeline.predict(X),
179            None => Err(MLError::ModelNotTrained(
180                "AutoML has not been fitted yet".to_string(),
181            )),
182        }
183    }
184
185    /// Get the best pipeline found
186    pub fn best_pipeline(&self) -> Option<&QuantumMLPipeline> {
187        self.best_pipeline.as_ref()
188    }
189
190    /// Get search results and analysis
191    pub fn get_results(&self) -> &AutoMLResults {
192        &self.experiment_results
193    }
194
195    /// Get search history
196    pub fn get_search_history(&self) -> &SearchHistory {
197        &self.search_history
198    }
199
200    /// Get performance tracker
201    pub fn get_performance_tracker(&self) -> &PerformanceTracker {
202        &self.performance_tracker
203    }
204
205    // Private methods
206
207    fn detect_task_type(&self, X: &Array2<f64>, y: &Array1<f64>) -> Result<MLTaskType> {
208        // Simple task type detection based on target values
209        let unique_values = {
210            let mut values: Vec<f64> = y.iter().cloned().collect();
211            values.sort_by(|a, b| a.partial_cmp(b).unwrap());
212            values.dedup();
213            values
214        };
215
216        // Check if all values are integers (classification)
217        let all_integers = unique_values.iter().all(|&v| (v.fract()).abs() < 1e-10);
218
219        if all_integers && unique_values.len() <= 2 {
220            Ok(MLTaskType::BinaryClassification)
221        } else if all_integers && unique_values.len() <= 20 {
222            Ok(MLTaskType::MultiClassification {
223                num_classes: unique_values.len(),
224            })
225        } else {
226            Ok(MLTaskType::Regression)
227        }
228    }
229
230    fn should_stop_search(&self, trial_id: usize) -> Result<bool> {
231        // Check time budget
232        if let Some(elapsed) = self.search_history.elapsed_time() {
233            if elapsed > self.config.search_budget.max_time_seconds {
234                return Ok(true);
235            }
236        }
237
238        // Check early stopping
239        if self.config.search_budget.early_stopping.enabled {
240            if let Some(best_performance) = self.performance_tracker.best_performance() {
241                let trials_without_improvement = self.search_history.trials_without_improvement();
242                if trials_without_improvement >= self.config.search_budget.early_stopping.patience {
243                    return Ok(true);
244                }
245            }
246        }
247
248        Ok(false)
249    }
250
251    fn evaluate_pipeline(
252        &self,
253        pipeline: &QuantumMLPipeline,
254        X: &Array2<f64>,
255        y: &Array1<f64>,
256    ) -> Result<f64> {
257        // Perform cross-validation evaluation
258        match &self.config.evaluation_config.cv_strategy {
259            CrossValidationStrategy::KFold { k } => self.evaluate_k_fold(pipeline, X, y, *k),
260            CrossValidationStrategy::HoldOut { test_size } => {
261                self.evaluate_holdout(pipeline, X, y, *test_size)
262            }
263            _ => {
264                // For other strategies, use simple holdout for now
265                self.evaluate_holdout(pipeline, X, y, self.config.evaluation_config.test_size)
266            }
267        }
268    }
269
270    fn evaluate_k_fold(
271        &self,
272        pipeline: &QuantumMLPipeline,
273        X: &Array2<f64>,
274        y: &Array1<f64>,
275        k: usize,
276    ) -> Result<f64> {
277        let n_samples = X.nrows();
278        let fold_size = n_samples / k;
279        let mut scores = Vec::new();
280
281        for fold in 0..k {
282            let start_idx = fold * fold_size;
283            let end_idx = if fold == k - 1 {
284                n_samples
285            } else {
286                (fold + 1) * fold_size
287            };
288
289            // Create train/test split
290            let mut train_indices = Vec::new();
291            let mut test_indices = Vec::new();
292
293            for i in 0..n_samples {
294                if i >= start_idx && i < end_idx {
295                    test_indices.push(i);
296                } else {
297                    train_indices.push(i);
298                }
299            }
300
301            // Extract train/test data
302            let X_train = X.select(Axis(0), &train_indices);
303            let y_train = y.select(Axis(0), &train_indices);
304            let X_test = X.select(Axis(0), &test_indices);
305            let y_test = y.select(Axis(0), &test_indices);
306
307            // Train and evaluate
308            let mut pipeline_copy = pipeline.clone();
309            pipeline_copy.fit(&X_train, &y_train)?;
310            let predictions = pipeline_copy.predict(&X_test)?;
311
312            // Calculate score based on task type
313            let score = self.calculate_score(&predictions, &y_test)?;
314            scores.push(score);
315        }
316
317        // Return mean score
318        Ok(scores.iter().sum::<f64>() / scores.len() as f64)
319    }
320
321    fn evaluate_holdout(
322        &self,
323        pipeline: &QuantumMLPipeline,
324        X: &Array2<f64>,
325        y: &Array1<f64>,
326        test_size: f64,
327    ) -> Result<f64> {
328        let n_samples = X.nrows();
329        let n_test = (n_samples as f64 * test_size) as usize;
330        let n_train = n_samples - n_test;
331
332        // Simple train/test split
333        let X_train = X.slice(s![0..n_train, ..]).to_owned();
334        let y_train = y.slice(s![0..n_train]).to_owned();
335        let X_test = X.slice(s![n_train.., ..]).to_owned();
336        let y_test = y.slice(s![n_train..]).to_owned();
337
338        // Train and evaluate
339        let mut pipeline_copy = pipeline.clone();
340        pipeline_copy.fit(&X_train, &y_train)?;
341        let predictions = pipeline_copy.predict(&X_test)?;
342
343        self.calculate_score(&predictions, &y_test)
344    }
345
346    fn calculate_score(&self, predictions: &Array1<f64>, y_true: &Array1<f64>) -> Result<f64> {
347        match &self.config.task_type {
348            Some(MLTaskType::BinaryClassification)
349            | Some(MLTaskType::MultiClassification { .. }) => {
350                // Calculate accuracy
351                let correct = predictions
352                    .iter()
353                    .zip(y_true.iter())
354                    .map(|(pred, true_val)| {
355                        if (pred.round() - true_val).abs() < 1e-10 {
356                            1.0
357                        } else {
358                            0.0
359                        }
360                    })
361                    .sum::<f64>();
362                Ok(correct / predictions.len() as f64)
363            }
364            Some(MLTaskType::Regression) => {
365                // Calculate R2 score
366                let mean_true = y_true.mean().unwrap();
367                let ss_tot = y_true.iter().map(|&y| (y - mean_true).powi(2)).sum::<f64>();
368                let ss_res = predictions
369                    .iter()
370                    .zip(y_true.iter())
371                    .map(|(pred, true_val)| (true_val - pred).powi(2))
372                    .sum::<f64>();
373                Ok(1.0 - (ss_res / ss_tot))
374            }
375            _ => {
376                // Default to MSE for unknown tasks
377                let mse = predictions
378                    .iter()
379                    .zip(y_true.iter())
380                    .map(|(pred, true_val)| (pred - true_val).powi(2))
381                    .sum::<f64>()
382                    / predictions.len() as f64;
383                Ok(-mse) // Negative because we want to maximize
384            }
385        }
386    }
387
388    fn is_better_pipeline(&self, pipeline: &QuantumMLPipeline, performance: f64) -> Result<bool> {
389        match self.performance_tracker.best_performance() {
390            Some(best_perf) => {
391                Ok(performance
392                    > best_perf + self.config.search_budget.early_stopping.min_improvement)
393            }
394            None => Ok(true), // First pipeline is always better
395        }
396    }
397
398    fn finalize_search(&mut self) -> Result<()> {
399        // Generate final results
400        self.experiment_results = self.generate_final_results()?;
401
402        // Update ensemble if enabled
403        if self.config.search_space.ensembles.enabled {
404            self.ensemble_manager.finalize_ensemble()?;
405        }
406
407        Ok(())
408    }
409
410    fn generate_final_results(&self) -> Result<AutoMLResults> {
411        let mut results = AutoMLResults::new();
412
413        // Set best pipeline info
414        if let Some(pipeline) = &self.best_pipeline {
415            results.set_best_pipeline_info(pipeline);
416        }
417
418        // Set search statistics
419        results.set_search_statistics(&self.search_history);
420
421        // Set performance analysis
422        results.set_performance_analysis(&self.performance_tracker);
423
424        Ok(results)
425    }
426}
427
428impl QuantumEnsembleManager {
429    fn new(ensemble_config: &EnsembleSearchSpace) -> Self {
430        Self {
431            max_ensemble_size: ensemble_config.max_ensemble_size,
432            ensemble_members: Vec::new(),
433            diversity_strategies: ensemble_config.diversity_strategies.clone(),
434            combination_methods: ensemble_config.combination_methods.clone(),
435            performance_weights: Vec::new(),
436        }
437    }
438
439    fn consider_pipeline(&mut self, pipeline: QuantumMLPipeline, performance: f64) -> Result<()> {
440        // Simple strategy: keep top N performing pipelines
441        if self.ensemble_members.len() < self.max_ensemble_size {
442            self.ensemble_members.push(pipeline);
443            self.performance_weights.push(performance);
444        } else {
445            // Replace worst performer if this is better
446            if let Some((worst_idx, _)) = self
447                .performance_weights
448                .iter()
449                .enumerate()
450                .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
451            {
452                if performance > self.performance_weights[worst_idx] {
453                    self.ensemble_members[worst_idx] = pipeline;
454                    self.performance_weights[worst_idx] = performance;
455                }
456            }
457        }
458
459        Ok(())
460    }
461
462    fn finalize_ensemble(&mut self) -> Result<()> {
463        // Normalize performance weights
464        let total_weight: f64 = self.performance_weights.iter().sum();
465        if total_weight > 0.0 {
466            for weight in &mut self.performance_weights {
467                *weight /= total_weight;
468            }
469        }
470
471        Ok(())
472    }
473}
474
475impl Default for QuantumAutoML {
476    fn default() -> Self {
477        Self::basic()
478    }
479}
480
481// Helper functions for creating configurations
482
483/// Create a default AutoML configuration
484pub fn create_default_automl_config() -> QuantumAutoMLConfig {
485    QuantumAutoMLConfig::basic()
486}
487
488/// Create a comprehensive AutoML configuration
489pub fn create_comprehensive_automl_config() -> QuantumAutoMLConfig {
490    QuantumAutoMLConfig::comprehensive()
491}