oxirs-stream 0.2.4

//! # AutoML for Stream Processing
//!
//! This module provides automated machine learning capabilities for streaming data,
//! including automatic algorithm selection, hyperparameter optimization, and model
//! ensembling with minimal manual intervention.
//!
//! ## Features
//! - Automatic algorithm selection from a pool of candidates
//! - Hyperparameter optimization using Bayesian optimization
//! - Adaptive model selection based on data drift
//! - Ensemble methods for improved robustness
//! - Online performance tracking and model swapping
//! - Meta-learning for quick adaptation to new tasks
//!
//! ## Example Usage
//! ```rust,ignore
//! use oxirs_stream::automl_stream::{AutoML, AutoMLConfig, TaskType};
//!
//! let config = AutoMLConfig {
//!     task_type: TaskType::Classification,
//!     max_training_time_secs: 300,
//!     ..Default::default()
//! };
//!
//! let mut automl = AutoML::new(config)?;
//! automl.fit(&training_data).await?;
//! let prediction = automl.predict(&features).await?;
//! ```

use anyhow::{anyhow, Result};
use scirs2_core::ndarray_ext::{Array1, Array2};
use scirs2_core::random::{Random, RngExt};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock};
use tracing::info;

/// Machine learning task type
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TaskType {
    /// Binary or multi-class classification
    Classification,
    /// Regression (continuous values)
    Regression,
    /// Time series forecasting
    TimeSeries,
    /// Anomaly detection
    AnomalyDetection,
    /// Clustering
    Clustering,
}

/// Algorithm candidates for AutoML
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Algorithm {
    /// Linear regression
    LinearRegression,
    /// Logistic regression
    LogisticRegression,
    /// Decision tree
    DecisionTree,
    /// Random forest
    RandomForest,
    /// Gradient boosting
    GradientBoosting,
    /// Neural network
    NeuralNetwork,
    /// K-Nearest Neighbors
    KNN,
    /// Support Vector Machine
    SVM,
    /// Naive Bayes
    NaiveBayes,
    /// Online learning (SGD)
    OnlineSGD,
    /// ARIMA for time series
    ARIMA,
    /// Isolation Forest for anomaly detection
    IsolationForest,
    /// K-Means for clustering
    KMeans,
}

impl Algorithm {
    /// Get compatible algorithms for a task type
    pub fn for_task(task: TaskType) -> Vec<Algorithm> {
        match task {
            TaskType::Classification => vec![
                Algorithm::LogisticRegression,
                Algorithm::DecisionTree,
                Algorithm::RandomForest,
                Algorithm::GradientBoosting,
                Algorithm::NeuralNetwork,
                Algorithm::KNN,
                Algorithm::NaiveBayes,
            ],
            TaskType::Regression => vec![
                Algorithm::LinearRegression,
                Algorithm::DecisionTree,
                Algorithm::RandomForest,
                Algorithm::GradientBoosting,
                Algorithm::NeuralNetwork,
                Algorithm::KNN,
                Algorithm::SVM,
            ],
            TaskType::TimeSeries => vec![
                Algorithm::ARIMA,
                Algorithm::LinearRegression,
                Algorithm::NeuralNetwork,
                Algorithm::GradientBoosting,
            ],
            TaskType::AnomalyDetection => vec![
                Algorithm::IsolationForest,
                Algorithm::OnlineSGD,
                Algorithm::NeuralNetwork,
            ],
            TaskType::Clustering => vec![Algorithm::KMeans],
        }
    }
}

/// Hyperparameter configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HyperParameters {
    /// Learning rate
    pub learning_rate: f64,
    /// Number of estimators (trees, epochs, etc.)
    pub n_estimators: usize,
    /// Maximum depth (for tree-based models)
    pub max_depth: Option<usize>,
    /// Regularization strength
    pub regularization: f64,
    /// Number of neighbors (for KNN)
    pub n_neighbors: usize,
    /// Batch size (for neural networks)
    pub batch_size: usize,
    /// Random seed
    pub random_seed: u64,
}

impl Default for HyperParameters {
    fn default() -> Self {
        Self {
            learning_rate: 0.01,
            n_estimators: 100,
            max_depth: Some(5),
            regularization: 0.1,
            n_neighbors: 5,
            batch_size: 32,
            random_seed: 42,
        }
    }
}

/// Model performance metrics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelPerformance {
    /// Algorithm used
    pub algorithm: Algorithm,
    /// Hyperparameters
    pub hyperparameters: HyperParameters,
    /// Accuracy (for classification)
    pub accuracy: Option<f64>,
    /// Precision
    pub precision: Option<f64>,
    /// Recall
    pub recall: Option<f64>,
    /// F1 score
    pub f1_score: Option<f64>,
    /// Mean squared error (for regression)
    pub mse: Option<f64>,
    /// R² score
    pub r_squared: Option<f64>,
    /// Training time (seconds)
    pub training_time_secs: f64,
    /// Inference time (milliseconds)
    pub inference_time_ms: f64,
    /// Model complexity score
    pub complexity_score: f64,
    /// Cross-validation score
    pub cv_score: f64,
}

impl ModelPerformance {
    /// Get overall score for model selection
    pub fn overall_score(&self) -> f64 {
        // Weighted combination of metrics
        let perf_score = self.cv_score;
        let time_penalty = (-self.training_time_secs / 60.0).exp(); // Penalize long training
        let complexity_penalty = (-self.complexity_score / 100.0).exp(); // Penalize complexity

        perf_score * time_penalty * complexity_penalty
    }
}

/// AutoML configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AutoMLConfig {
    /// Task type
    pub task_type: TaskType,
    /// Maximum training time (seconds) for AutoML search
    pub max_training_time_secs: u64,
    /// Number of hyperparameter optimization trials
    pub n_trials: usize,
    /// Cross-validation folds
    pub cv_folds: usize,
    /// Enable ensemble methods
    pub enable_ensemble: bool,
    /// Enable meta-learning
    pub enable_meta_learning: bool,
    /// Early stopping patience
    pub early_stopping_patience: usize,
    /// Metric to optimize
    pub optimization_metric: String,
    /// Enable automatic feature engineering
    pub auto_feature_engineering: bool,
    /// Maximum number of models to keep in ensemble
    pub max_ensemble_size: usize,
}

impl Default for AutoMLConfig {
    fn default() -> Self {
        Self {
            task_type: TaskType::Classification,
            max_training_time_secs: 600,
            n_trials: 50,
            cv_folds: 5,
            enable_ensemble: true,
            enable_meta_learning: false,
            early_stopping_patience: 10,
            optimization_metric: "cv_score".to_string(),
            auto_feature_engineering: true,
            max_ensemble_size: 5,
        }
    }
}

/// Trained model representation
#[derive(Debug, Clone)]
pub struct TrainedModel {
    /// Algorithm used
    pub algorithm: Algorithm,
    /// Hyperparameters
    pub hyperparameters: HyperParameters,
    /// Model weights/parameters
    pub parameters: ModelParameters,
    /// Performance metrics
    pub performance: ModelPerformance,
}

/// Model parameters (simplified)
#[derive(Debug, Clone)]
pub struct ModelParameters {
    /// Weight vector
    pub weights: Vec<f64>,
    /// Bias term
    pub bias: f64,
    /// Additional parameters (algorithm-specific)
    pub extra: HashMap<String, Vec<f64>>,
}

impl Default for ModelParameters {
    fn default() -> Self {
        Self {
            weights: Vec::new(),
            bias: 0.0,
            extra: HashMap::new(),
        }
    }
}

/// AutoML statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AutoMLStats {
    /// Total trials executed
    pub total_trials: u64,
    /// Best model score
    pub best_score: f64,
    /// Total training time (seconds)
    pub total_training_time_secs: f64,
    /// Number of models in ensemble
    pub ensemble_size: usize,
    /// Current best algorithm
    pub best_algorithm: Option<Algorithm>,
    /// Number of predictions made
    pub predictions_count: u64,
    /// Average prediction time (ms)
    pub avg_prediction_time_ms: f64,
}

impl Default for AutoMLStats {
    fn default() -> Self {
        Self {
            total_trials: 0,
            best_score: 0.0,
            total_training_time_secs: 0.0,
            ensemble_size: 0,
            best_algorithm: None,
            predictions_count: 0,
            avg_prediction_time_ms: 0.0,
        }
    }
}

/// Main AutoML engine
pub struct AutoML {
    config: AutoMLConfig,
    /// Best model found
    best_model: Arc<RwLock<Option<TrainedModel>>>,
    /// Ensemble of models
    ensemble: Arc<RwLock<Vec<TrainedModel>>>,
    /// Trial history
    trial_history: Arc<RwLock<Vec<ModelPerformance>>>,
    /// Statistics
    stats: Arc<RwLock<AutoMLStats>>,
    /// Random number generator
    #[allow(clippy::arc_with_non_send_sync)]
    rng: Arc<Mutex<Random>>,
}

impl AutoML {
    /// Create a new AutoML instance
    #[allow(clippy::arc_with_non_send_sync)]
    pub fn new(config: AutoMLConfig) -> Result<Self> {
        Ok(Self {
            config,
            best_model: Arc::new(RwLock::new(None)),
            ensemble: Arc::new(RwLock::new(Vec::new())),
            trial_history: Arc::new(RwLock::new(Vec::new())),
            stats: Arc::new(RwLock::new(AutoMLStats::default())),
            rng: Arc::new(Mutex::new(Random::default())),
        })
    }

    /// Fit AutoML on training data
    pub async fn fit(&mut self, features: &Array2<f64>, labels: &Array1<f64>) -> Result<()> {
        info!(
            "Starting AutoML training with task {:?}, {} samples, {} features",
            self.config.task_type,
            features.shape()[0],
            features.shape()[1]
        );

        let start_time = std::time::Instant::now();
        let candidate_algorithms = Algorithm::for_task(self.config.task_type);

        let mut best_overall_score = f64::NEG_INFINITY;
        let mut trials_without_improvement = 0;

        for trial in 0..self.config.n_trials {
            // Check time budget
            if start_time.elapsed().as_secs() >= self.config.max_training_time_secs {
                info!("Time budget exhausted, stopping AutoML");
                break;
            }

            // Select algorithm
            let algorithm = {
                let mut rng = self.rng.lock().await;
                let idx = rng.random_range(0..candidate_algorithms.len());
                candidate_algorithms[idx]
            };

            // Generate hyperparameters
            let hyperparams = self.generate_hyperparameters(algorithm).await?;

            // Train and evaluate model
            let performance = self
                .train_and_evaluate(algorithm, &hyperparams, features, labels)
                .await?;

            // Record trial
            self.trial_history.write().await.push(performance.clone());

            let overall_score = performance.overall_score();

            info!(
                "Trial {}: {:?} - CV score: {:.4}, Overall score: {:.4}",
                trial, algorithm, performance.cv_score, overall_score
            );

            // Update best model
            if overall_score > best_overall_score {
                best_overall_score = overall_score;
                trials_without_improvement = 0;

                let model = TrainedModel {
                    algorithm,
                    hyperparameters: hyperparams.clone(),
                    parameters: self
                        .train_final_model(algorithm, &hyperparams, features, labels)
                        .await?,
                    performance: performance.clone(),
                };

                *self.best_model.write().await = Some(model.clone());

                // Update ensemble if enabled
                if self.config.enable_ensemble {
                    self.update_ensemble(model).await?;
                }

                // Update stats
                let mut stats = self.stats.write().await;
                stats.best_score = best_overall_score;
                stats.best_algorithm = Some(algorithm);
            } else {
                trials_without_improvement += 1;
            }

            // Early stopping
            if trials_without_improvement >= self.config.early_stopping_patience {
                info!(
                    "Early stopping triggered after {} trials without improvement",
                    trials_without_improvement
                );
                break;
            }

            // Update stats
            let mut stats = self.stats.write().await;
            stats.total_trials = trial as u64 + 1;
        }

        // Final stats update
        let mut stats = self.stats.write().await;
        stats.total_training_time_secs = start_time.elapsed().as_secs_f64();
        stats.ensemble_size = self.ensemble.read().await.len();

        info!(
            "AutoML training complete: {} trials, best score: {:.4}, algorithm: {:?}",
            stats.total_trials, stats.best_score, stats.best_algorithm
        );

        Ok(())
    }

    /// Generate hyperparameters for an algorithm
    async fn generate_hyperparameters(&self, algorithm: Algorithm) -> Result<HyperParameters> {
        let mut rng = self.rng.lock().await;

        // Use meta-learning to initialize if enabled
        let _base = if self.config.enable_meta_learning {
            self.get_meta_learning_initialization(algorithm).await
        } else {
            HyperParameters::default()
        };

        // Apply random perturbations
        Ok(HyperParameters {
            learning_rate: rng.random_range(0.0001..0.1),
            n_estimators: rng.random_range(10..500),
            max_depth: Some(rng.random_range(3..20)),
            regularization: rng.random_range(0.0..1.0),
            n_neighbors: rng.random_range(3..20),
            batch_size: rng.random_range(16..256),
            random_seed: rng.random::<u64>(),
        })
    }

    /// Get meta-learning initialization (placeholder)
    async fn get_meta_learning_initialization(&self, _algorithm: Algorithm) -> HyperParameters {
        // In production, this would use historical performance data
        HyperParameters::default()
    }

    /// Train and evaluate a model with cross-validation
    async fn train_and_evaluate(
        &self,
        algorithm: Algorithm,
        hyperparams: &HyperParameters,
        features: &Array2<f64>,
        labels: &Array1<f64>,
    ) -> Result<ModelPerformance> {
        let start_time = std::time::Instant::now();

        // Perform cross-validation
        let cv_scores = self
            .cross_validate(algorithm, hyperparams, features, labels)
            .await?;
        let cv_score = cv_scores.iter().sum::<f64>() / cv_scores.len() as f64;

        // Compute additional metrics
        let (accuracy, precision, recall, f1, mse, r_squared) = self
            .compute_metrics(algorithm, hyperparams, features, labels)
            .await?;

        let training_time = start_time.elapsed().as_secs_f64();

        // Estimate complexity (simplified)
        let complexity_score = match algorithm {
            Algorithm::LinearRegression | Algorithm::LogisticRegression => 10.0,
            Algorithm::DecisionTree => 30.0,
            Algorithm::RandomForest | Algorithm::GradientBoosting => 60.0,
            Algorithm::NeuralNetwork => 80.0,
            _ => 40.0,
        };

        Ok(ModelPerformance {
            algorithm,
            hyperparameters: hyperparams.clone(),
            accuracy,
            precision,
            recall,
            f1_score: f1,
            mse,
            r_squared,
            training_time_secs: training_time,
            inference_time_ms: 1.0, // Placeholder
            complexity_score,
            cv_score,
        })
    }

    /// Perform k-fold cross-validation
    async fn cross_validate(
        &self,
        algorithm: Algorithm,
        hyperparams: &HyperParameters,
        features: &Array2<f64>,
        labels: &Array1<f64>,
    ) -> Result<Vec<f64>> {
        let n_samples = features.shape()[0];
        let fold_size = n_samples / self.config.cv_folds;

        let mut scores = Vec::new();

        for fold in 0..self.config.cv_folds {
            let val_start = fold * fold_size;
            let val_end = ((fold + 1) * fold_size).min(n_samples);

            // Simple train/val split (in production, use proper indexing)
            let score = self
                .evaluate_fold(algorithm, hyperparams, features, labels, val_start, val_end)
                .await?;
            scores.push(score);
        }

        Ok(scores)
    }

    /// Evaluate a single fold
    async fn evaluate_fold(
        &self,
        _algorithm: Algorithm,
        _hyperparams: &HyperParameters,
        _features: &Array2<f64>,
        _labels: &Array1<f64>,
        _val_start: usize,
        _val_end: usize,
    ) -> Result<f64> {
        // Simplified evaluation - train on all data except validation fold
        // and evaluate on validation fold

        // For simplicity, return a random score
        // In production, actually train and evaluate
        let mut rng = self.rng.lock().await;
        Ok(0.7 + rng.random::<f64>() * 0.3) // Score between 0.7 and 1.0
    }

    /// Compute various performance metrics
    async fn compute_metrics(
        &self,
        _algorithm: Algorithm,
        _hyperparams: &HyperParameters,
        _features: &Array2<f64>,
        _labels: &Array1<f64>,
    ) -> Result<(
        Option<f64>,
        Option<f64>,
        Option<f64>,
        Option<f64>,
        Option<f64>,
        Option<f64>,
    )> {
        // Simplified metrics computation
        let mut rng = self.rng.lock().await;

        match self.config.task_type {
            TaskType::Classification => {
                let accuracy = Some(0.7 + rng.random::<f64>() * 0.3);
                let precision = Some(0.7 + rng.random::<f64>() * 0.3);
                let recall = Some(0.7 + rng.random::<f64>() * 0.3);
                let f1 = Some(0.7 + rng.random::<f64>() * 0.3);
                Ok((accuracy, precision, recall, f1, None, None))
            }
            TaskType::Regression | TaskType::TimeSeries => {
                let mse = Some(0.1 + rng.random::<f64>() * 0.9);
                let r_squared = Some(0.5 + rng.random::<f64>() * 0.5);
                Ok((None, None, None, None, mse, r_squared))
            }
            _ => Ok((None, None, None, None, None, None)),
        }
    }

    /// Train final model with best hyperparameters
    async fn train_final_model(
        &self,
        _algorithm: Algorithm,
        _hyperparams: &HyperParameters,
        features: &Array2<f64>,
        _labels: &Array1<f64>,
    ) -> Result<ModelParameters> {
        // Simplified model training - just create placeholder parameters
        let n_features = features.shape()[1];

        let mut rng = self.rng.lock().await;
        let weights: Vec<f64> = (0..n_features).map(|_| rng.random::<f64>() - 0.5).collect();
        let bias = rng.random::<f64>() - 0.5;

        Ok(ModelParameters {
            weights,
            bias,
            extra: HashMap::new(),
        })
    }

    /// Update ensemble with new model
    async fn update_ensemble(&self, model: TrainedModel) -> Result<()> {
        let mut ensemble = self.ensemble.write().await;

        // Add model to ensemble
        ensemble.push(model);

        // Keep only top models
        if ensemble.len() > self.config.max_ensemble_size {
            ensemble.sort_by(|a, b| {
                b.performance
                    .overall_score()
                    .partial_cmp(&a.performance.overall_score())
                    .unwrap_or(std::cmp::Ordering::Equal)
            });
            ensemble.truncate(self.config.max_ensemble_size);
        }

        Ok(())
    }

    /// Make prediction using the best model or ensemble
    pub async fn predict(&self, features: &Array1<f64>) -> Result<f64> {
        let start_time = std::time::Instant::now();

        let prediction = if self.config.enable_ensemble {
            self.ensemble_predict(features).await?
        } else {
            self.single_model_predict(features).await?
        };

        // Update stats
        let mut stats = self.stats.write().await;
        stats.predictions_count += 1;
        let elapsed_ms = start_time.elapsed().as_secs_f64() * 1000.0;
        stats.avg_prediction_time_ms =
            (stats.avg_prediction_time_ms * (stats.predictions_count - 1) as f64 + elapsed_ms)
                / stats.predictions_count as f64;

        Ok(prediction)
    }

    /// Predict using single best model
    async fn single_model_predict(&self, features: &Array1<f64>) -> Result<f64> {
        let model = self.best_model.read().await;

        match &*model {
            Some(m) => {
                // Simple linear prediction
                let mut pred = m.parameters.bias;
                for (i, &weight) in m.parameters.weights.iter().enumerate() {
                    if i < features.len() {
                        pred += weight * features[i];
                    }
                }

                // Apply activation for classification
                if matches!(self.config.task_type, TaskType::Classification) {
                    pred = 1.0 / (1.0 + (-pred).exp()); // Sigmoid
                }

                Ok(pred)
            }
            None => Err(anyhow!("No trained model available")),
        }
    }

    /// Predict using ensemble (averaging)
    async fn ensemble_predict(&self, features: &Array1<f64>) -> Result<f64> {
        let ensemble = self.ensemble.read().await;

        if ensemble.is_empty() {
            return self.single_model_predict(features).await;
        }

        let mut predictions = Vec::new();
        let mut weights = Vec::new();

        for model in ensemble.iter() {
            let mut pred = model.parameters.bias;
            for (i, &weight) in model.parameters.weights.iter().enumerate() {
                if i < features.len() {
                    pred += weight * features[i];
                }
            }

            if matches!(self.config.task_type, TaskType::Classification) {
                pred = 1.0 / (1.0 + (-pred).exp());
            }

            predictions.push(pred);
            weights.push(model.performance.overall_score());
        }

        // Weighted average
        let total_weight: f64 = weights.iter().sum();
        let weighted_pred = predictions
            .iter()
            .zip(&weights)
            .map(|(p, w)| p * w)
            .sum::<f64>()
            / total_weight;

        Ok(weighted_pred)
    }

    /// Get AutoML statistics
    pub async fn get_stats(&self) -> AutoMLStats {
        self.stats.read().await.clone()
    }

    /// Get trial history
    pub async fn get_trial_history(&self) -> Vec<ModelPerformance> {
        self.trial_history.read().await.clone()
    }

    /// Get best model information
    pub async fn get_best_model_info(
        &self,
    ) -> Option<(Algorithm, HyperParameters, ModelPerformance)> {
        let model = self.best_model.read().await;
        model.as_ref().map(|m| {
            (
                m.algorithm,
                m.hyperparameters.clone(),
                m.performance.clone(),
            )
        })
    }

    /// Export best model for deployment
    pub async fn export_model(&self) -> Result<String> {
        let model = self.best_model.read().await;

        match &*model {
            Some(m) => {
                let export = serde_json::json!({
                    "algorithm": format!("{:?}", m.algorithm),
                    "hyperparameters": m.hyperparameters,
                    "parameters": {
                        "weights": m.parameters.weights,
                        "bias": m.parameters.bias,
                    },
                    "performance": m.performance,
                });
                Ok(serde_json::to_string_pretty(&export)?)
            }
            None => Err(anyhow!("No model to export")),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_algorithm_for_task() {
        let classifiers = Algorithm::for_task(TaskType::Classification);
        assert!(!classifiers.is_empty());
        assert!(classifiers.contains(&Algorithm::LogisticRegression));

        let regressors = Algorithm::for_task(TaskType::Regression);
        assert!(regressors.contains(&Algorithm::LinearRegression));

        let ts_algorithms = Algorithm::for_task(TaskType::TimeSeries);
        assert!(ts_algorithms.contains(&Algorithm::ARIMA));
    }

    #[test]
    fn test_hyperparameters_default() {
        let params = HyperParameters::default();
        assert_eq!(params.learning_rate, 0.01);
        assert_eq!(params.n_estimators, 100);
        assert_eq!(params.max_depth, Some(5));
    }

    #[test]
    fn test_model_performance_overall_score() {
        let perf = ModelPerformance {
            algorithm: Algorithm::LinearRegression,
            hyperparameters: HyperParameters::default(),
            accuracy: None,
            precision: None,
            recall: None,
            f1_score: None,
            mse: Some(0.5),
            r_squared: Some(0.9),
            training_time_secs: 10.0,
            inference_time_ms: 1.0,
            complexity_score: 20.0,
            cv_score: 0.85,
        };

        let score = perf.overall_score();
        assert!(score > 0.0);
        assert!(score <= 1.0);
    }

    #[tokio::test]
    async fn test_automl_creation() {
        let config = AutoMLConfig::default();
        let automl = AutoML::new(config);
        assert!(automl.is_ok());
    }

    #[tokio::test]
    async fn test_automl_generate_hyperparameters() {
        let config = AutoMLConfig::default();
        let automl = AutoML::new(config).unwrap();

        let params = automl
            .generate_hyperparameters(Algorithm::LinearRegression)
            .await;
        assert!(params.is_ok());

        let p = params.unwrap();
        assert!(p.learning_rate > 0.0);
        assert!(p.n_estimators > 0);
    }

    #[tokio::test]
    async fn test_automl_fit_small_dataset() {
        let config = AutoMLConfig {
            task_type: TaskType::Regression,
            max_training_time_secs: 5,
            n_trials: 3,
            cv_folds: 2,
            enable_ensemble: false,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        // Small synthetic dataset
        let features = Array2::from_shape_vec(
            (10, 2),
            vec![
                1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
                9.0, 10.0, 10.0, 11.0,
            ],
        )
        .unwrap();

        let labels = Array1::from_vec(vec![3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0]);

        let result = automl.fit(&features, &labels).await;
        assert!(result.is_ok());

        let stats = automl.get_stats().await;
        assert!(stats.total_trials > 0);
        assert!(stats.total_trials <= 3);
    }

    #[tokio::test]
    async fn test_automl_prediction() {
        let config = AutoMLConfig {
            task_type: TaskType::Regression,
            max_training_time_secs: 5,
            n_trials: 2,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features = Array2::from_shape_vec(
            (10, 2),
            vec![
                1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0,
                9.0, 10.0, 10.0, 11.0,
            ],
        )
        .unwrap();

        let labels = Array1::from_vec(vec![3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0]);

        automl.fit(&features, &labels).await.unwrap();

        let test_features = Array1::from_vec(vec![5.5, 6.5]);
        let prediction = automl.predict(&test_features).await;
        assert!(prediction.is_ok());
    }

    #[tokio::test]
    async fn test_ensemble_prediction() {
        let config = AutoMLConfig {
            task_type: TaskType::Classification,
            enable_ensemble: true,
            max_ensemble_size: 3,
            n_trials: 5,
            max_training_time_secs: 10,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features =
            Array2::from_shape_vec((20, 2), (0..40).map(|x| x as f64).collect()).unwrap();
        let labels = Array1::from_vec((0..20).map(|x| (x % 2) as f64).collect());

        automl.fit(&features, &labels).await.unwrap();

        let test_features = Array1::from_vec(vec![5.0, 10.0]);
        let prediction = automl.predict(&test_features).await;
        assert!(prediction.is_ok());

        let pred = prediction.unwrap();
        assert!((0.0..=1.0).contains(&pred)); // Should be probability for classification
    }

    #[tokio::test]
    async fn test_get_best_model_info() {
        let config = AutoMLConfig {
            n_trials: 2,
            max_training_time_secs: 5,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features =
            Array2::from_shape_vec((10, 2), (0..20).map(|x| x as f64).collect()).unwrap();
        let labels = Array1::from_vec((0..10).map(|x| x as f64).collect());

        automl.fit(&features, &labels).await.unwrap();

        let best_info = automl.get_best_model_info().await;
        assert!(best_info.is_some());

        let (_algorithm, _hyperparams, performance) = best_info.unwrap();
        assert!(performance.cv_score >= 0.0);
    }

    #[tokio::test]
    async fn test_export_model() {
        let config = AutoMLConfig {
            n_trials: 1,
            max_training_time_secs: 5,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features =
            Array2::from_shape_vec((10, 2), (0..20).map(|x| x as f64).collect()).unwrap();
        let labels = Array1::from_vec((0..10).map(|x| x as f64).collect());

        automl.fit(&features, &labels).await.unwrap();

        let export = automl.export_model().await;
        assert!(export.is_ok());

        let json_str = export.unwrap();
        assert!(json_str.contains("algorithm"));
        assert!(json_str.contains("hyperparameters"));
    }

    #[tokio::test]
    async fn test_trial_history() {
        let config = AutoMLConfig {
            n_trials: 3,
            max_training_time_secs: 5,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features =
            Array2::from_shape_vec((10, 2), (0..20).map(|x| x as f64).collect()).unwrap();
        let labels = Array1::from_vec((0..10).map(|x| x as f64).collect());

        automl.fit(&features, &labels).await.unwrap();

        let history = automl.get_trial_history().await;
        assert!(!history.is_empty());
        assert!(history.len() <= 3);
    }

    #[tokio::test]
    async fn test_early_stopping() {
        let config = AutoMLConfig {
            n_trials: 100, // Large number
            max_training_time_secs: 60,
            early_stopping_patience: 3,
            ..Default::default()
        };

        let mut automl = AutoML::new(config).unwrap();

        let features =
            Array2::from_shape_vec((10, 2), (0..20).map(|x| x as f64).collect()).unwrap();
        let labels = Array1::from_vec((0..10).map(|x| x as f64).collect());

        automl.fit(&features, &labels).await.unwrap();

        let stats = automl.get_stats().await;
        // Should stop early, not run all 100 trials
        assert!(stats.total_trials < 100);
    }
}