scirs2_optimize/learned_optimizers/
learned_hyperparameter_tuner.rs

1//! Learned Hyperparameter Tuner
2//!
3//! Implementation of machine learning-based hyperparameter tuning that learns
4//! optimal hyperparameter configurations across different optimization problems.
5
6use super::{
7    LearnedOptimizationConfig, LearnedOptimizer, MetaOptimizerState, OptimizationProblem,
8    TrainingTask,
9};
10use crate::error::OptimizeResult;
11use crate::result::OptimizeResults;
12use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
13use scirs2_core::random::Rng;
14use statrs::statistics::Statistics;
15use std::collections::{HashMap, VecDeque};
16
17/// Learned hyperparameter tuner with adaptive configuration
18#[derive(Debug, Clone)]
19pub struct LearnedHyperparameterTuner {
20    /// Configuration
21    config: LearnedOptimizationConfig,
22    /// Hyperparameter space
23    hyperparameter_space: HyperparameterSpace,
24    /// Performance database
25    performance_database: PerformanceDatabase,
26    /// Bayesian optimizer for hyperparameter search
27    bayesian_optimizer: BayesianOptimizer,
28    /// Multi-fidelity evaluator
29    multi_fidelity_evaluator: MultiFidelityEvaluator,
30    /// Meta-optimizer state
31    meta_state: MetaOptimizerState,
32    /// Tuning statistics
33    tuning_stats: HyperparameterTuningStats,
34}
35
36/// Hyperparameter space definition
37#[derive(Debug, Clone)]
38pub struct HyperparameterSpace {
39    /// Continuous hyperparameters
40    continuous_params: Vec<ContinuousHyperparameter>,
41    /// Discrete hyperparameters
42    discrete_params: Vec<DiscreteHyperparameter>,
43    /// Categorical hyperparameters
44    categorical_params: Vec<CategoricalHyperparameter>,
45    /// Conditional dependencies
46    conditional_dependencies: Vec<ConditionalDependency>,
47    /// Parameter bounds
48    parameter_bounds: HashMap<String, (f64, f64)>,
49}
50
51/// Continuous hyperparameter
52#[derive(Debug, Clone)]
53pub struct ContinuousHyperparameter {
54    /// Parameter name
55    name: String,
56    /// Lower bound
57    lower_bound: f64,
58    /// Upper bound
59    upper_bound: f64,
60    /// Scale (linear, log, etc.)
61    scale: ParameterScale,
62    /// Default value
63    default_value: f64,
64    /// Importance score
65    importance_score: f64,
66}
67
68/// Discrete hyperparameter
69#[derive(Debug, Clone)]
70pub struct DiscreteHyperparameter {
71    /// Parameter name
72    name: String,
73    /// Possible values
74    values: Vec<i64>,
75    /// Default value
76    default_value: i64,
77    /// Importance score
78    importance_score: f64,
79}
80
81/// Categorical hyperparameter
82#[derive(Debug, Clone)]
83pub struct CategoricalHyperparameter {
84    /// Parameter name
85    name: String,
86    /// Possible categories
87    categories: Vec<String>,
88    /// Default category
89    default_category: String,
90    /// Category embeddings
91    category_embeddings: HashMap<String, Array1<f64>>,
92    /// Importance score
93    importance_score: f64,
94}
95
96/// Parameter scale types
97#[derive(Debug, Clone)]
98pub enum ParameterScale {
99    Linear,
100    Logarithmic,
101    Exponential,
102    Sigmoid,
103}
104
105/// Conditional dependency between parameters
106#[derive(Debug, Clone)]
107pub struct ConditionalDependency {
108    /// Parent parameter
109    parent_param: String,
110    /// Child parameter
111    child_param: String,
112    /// Condition
113    condition: DependencyCondition,
114}
115
116/// Dependency condition types
117#[derive(Debug, Clone)]
118pub enum DependencyCondition {
119    Equals(String),
120    GreaterThan(f64),
121    LessThan(f64),
122    InRange(f64, f64),
123    OneOf(Vec<String>),
124}
125
126/// Performance database for storing evaluation results
127#[derive(Debug, Clone)]
128pub struct PerformanceDatabase {
129    /// Evaluation records
130    records: Vec<EvaluationRecord>,
131    /// Indexing for fast retrieval
132    index: HashMap<String, Vec<usize>>,
133    /// Performance trends
134    performance_trends: HashMap<String, PerformanceTrend>,
135    /// Correlation matrix
136    correlation_matrix: Array2<f64>,
137}
138
139/// Evaluation record
140#[derive(Debug, Clone)]
141pub struct EvaluationRecord {
142    /// Hyperparameter configuration
143    config: HyperparameterConfig,
144    /// Performance metric
145    performance: f64,
146    /// Evaluation cost
147    cost: f64,
148    /// Timestamp
149    timestamp: u64,
150    /// Problem characteristics
151    problem_features: Array1<f64>,
152    /// Fidelity level
153    fidelity: f64,
154    /// Additional metrics
155    additional_metrics: HashMap<String, f64>,
156}
157
158/// Hyperparameter configuration
159#[derive(Debug, Clone)]
160pub struct HyperparameterConfig {
161    /// Parameter values
162    parameters: HashMap<String, ParameterValue>,
163    /// Configuration hash
164    config_hash: u64,
165    /// Configuration embedding
166    embedding: Array1<f64>,
167}
168
169/// Parameter value types
170#[derive(Debug, Clone)]
171pub enum ParameterValue {
172    Continuous(f64),
173    Discrete(i64),
174    Categorical(String),
175}
176
177/// Performance trend analysis
178#[derive(Debug, Clone)]
179pub struct PerformanceTrend {
180    /// Trend direction
181    trend_direction: f64,
182    /// Trend strength
183    trend_strength: f64,
184    /// Seasonal patterns
185    seasonal_patterns: Array1<f64>,
186    /// Volatility measure
187    volatility: f64,
188}
189
190/// Bayesian optimizer for hyperparameter search
191#[derive(Debug, Clone)]
192pub struct BayesianOptimizer {
193    /// Gaussian process surrogate model
194    gaussian_process: GaussianProcess,
195    /// Acquisition function
196    acquisition_function: AcquisitionFunction,
197    /// Optimization strategy
198    optimization_strategy: OptimizationStrategy,
199    /// Exploration-exploitation balance
200    exploration_factor: f64,
201}
202
203/// Gaussian process surrogate model
204#[derive(Debug, Clone)]
205pub struct GaussianProcess {
206    /// Training inputs
207    training_inputs: Array2<f64>,
208    /// Training outputs
209    training_outputs: Array1<f64>,
210    /// Kernel function
211    kernel: KernelFunction,
212    /// Kernel hyperparameters
213    kernel_params: Array1<f64>,
214    /// Noise variance
215    noise_variance: f64,
216    /// Mean function
217    mean_function: MeanFunction,
218}
219
220/// Kernel function types
221#[derive(Debug, Clone)]
222pub enum KernelFunction {
223    RBF {
224        length_scale: f64,
225        variance: f64,
226    },
227    Matern {
228        nu: f64,
229        length_scale: f64,
230        variance: f64,
231    },
232    Polynomial {
233        degree: i32,
234        variance: f64,
235    },
236    Composite {
237        kernels: Vec<KernelFunction>,
238        weights: Array1<f64>,
239    },
240}
241
242/// Mean function for GP
243#[derive(Debug, Clone)]
244pub enum MeanFunction {
245    Zero,
246    Constant(f64),
247    Linear(Array1<f64>),
248    Quadratic(Array2<f64>),
249}
250
251/// Acquisition function types
252#[derive(Debug, Clone)]
253pub enum AcquisitionFunction {
254    ExpectedImprovement { xi: f64 },
255    ProbabilityOfImprovement { xi: f64 },
256    UpperConfidenceBound { beta: f64 },
257    EntropySearch { num_samples: usize },
258    MultiFidelity { alpha: f64, beta: f64 },
259}
260
261/// Optimization strategy for acquisition function
262#[derive(Debug, Clone)]
263pub enum OptimizationStrategy {
264    RandomSearch { num_candidates: usize },
265    GridSearch { grid_resolution: usize },
266    GradientBased { num_restarts: usize },
267    EvolutionarySearch { population_size: usize },
268    DIRECT { max_nit: usize },
269}
270
271/// Multi-fidelity evaluator
272#[derive(Debug, Clone)]
273pub struct MultiFidelityEvaluator {
274    /// Available fidelity levels
275    fidelity_levels: Vec<FidelityLevel>,
276    /// Cost model
277    cost_model: CostModel,
278    /// Fidelity selection strategy
279    selection_strategy: FidelitySelectionStrategy,
280    /// Correlation estimator
281    correlation_estimator: FidelityCorrelationEstimator,
282}
283
284/// Fidelity level definition
285#[derive(Debug, Clone)]
286pub struct FidelityLevel {
287    /// Fidelity value (0.0 to 1.0)
288    fidelity: f64,
289    /// Cost multiplier
290    cost_multiplier: f64,
291    /// Accuracy estimate
292    accuracy: f64,
293    /// Resource requirements
294    resource_requirements: ResourceRequirements,
295}
296
297/// Resource requirements for evaluation
298#[derive(Debug, Clone)]
299pub struct ResourceRequirements {
300    /// Computational time
301    computation_time: f64,
302    /// Memory usage
303    memory_usage: f64,
304    /// CPU cores
305    cpu_cores: usize,
306    /// GPU requirements
307    gpu_required: bool,
308}
309
310/// Cost model for evaluations
311#[derive(Debug, Clone)]
312pub struct CostModel {
313    /// Cost prediction network
314    cost_network: Array2<f64>,
315    /// Base cost parameters
316    base_cost: f64,
317    /// Scaling factors
318    scaling_factors: Array1<f64>,
319    /// Historical cost data
320    cost_history: VecDeque<(f64, f64)>, // (fidelity, cost)
321}
322
323/// Fidelity selection strategy
324#[derive(Debug, Clone)]
325pub enum FidelitySelectionStrategy {
326    Static(f64),
327    Adaptive {
328        initial_fidelity: f64,
329        adaptation_rate: f64,
330    },
331    BanditBased {
332        epsilon: f64,
333    },
334    Predictive {
335        prediction_horizon: usize,
336    },
337}
338
339/// Correlation estimator between fidelities
340#[derive(Debug, Clone)]
341pub struct FidelityCorrelationEstimator {
342    /// Correlation matrix
343    correlation_matrix: Array2<f64>,
344    /// Estimation method
345    estimation_method: CorrelationMethod,
346    /// Confidence intervals
347    confidence_intervals: Array2<f64>,
348}
349
350/// Correlation estimation methods
351#[derive(Debug, Clone)]
352pub enum CorrelationMethod {
353    Pearson,
354    Spearman,
355    Kendall,
356    MutualInformation,
357}
358
359/// Hyperparameter tuning statistics
360#[derive(Debug, Clone)]
361pub struct HyperparameterTuningStats {
362    /// Total evaluations performed
363    total_evaluations: usize,
364    /// Best performance found
365    best_performance: f64,
366    /// Total cost spent
367    total_cost: f64,
368    /// Convergence rate
369    convergence_rate: f64,
370    /// Exploration efficiency
371    exploration_efficiency: f64,
372    /// Multi-fidelity savings
373    multi_fidelity_savings: f64,
374}
375
376impl LearnedHyperparameterTuner {
377    /// Create new learned hyperparameter tuner
378    pub fn new(config: LearnedOptimizationConfig) -> Self {
379        let hyperparameter_space = HyperparameterSpace::create_default_space();
380        let performance_database = PerformanceDatabase::new();
381        let bayesian_optimizer = BayesianOptimizer::new();
382        let multi_fidelity_evaluator = MultiFidelityEvaluator::new();
383        let hidden_size = config.hidden_size;
384
385        Self {
386            config,
387            hyperparameter_space,
388            performance_database,
389            bayesian_optimizer,
390            multi_fidelity_evaluator,
391            meta_state: MetaOptimizerState {
392                meta_params: Array1::zeros(hidden_size),
393                network_weights: Array2::zeros((hidden_size, hidden_size)),
394                performance_history: Vec::new(),
395                adaptation_stats: super::AdaptationStatistics::default(),
396                episode: 0,
397            },
398            tuning_stats: HyperparameterTuningStats::default(),
399        }
400    }
401
402    /// Tune hyperparameters for optimization problem
403    pub fn tune_hyperparameters<F>(
404        &mut self,
405        objective: F,
406        initial_params: &ArrayView1<f64>,
407        problem: &OptimizationProblem,
408        budget: f64,
409    ) -> OptimizeResult<HyperparameterConfig>
410    where
411        F: Fn(&ArrayView1<f64>) -> f64,
412    {
413        let mut remaining_budget = budget;
414        let mut best_config = self.get_default_config()?;
415        let mut best_performance = f64::INFINITY;
416
417        // Extract problem features
418        let problem_features =
419            self.extract_problem_features(&objective, initial_params, problem)?;
420
421        // Initialize with promising configurations from database
422        let promising_configs = self.get_promising_configurations(&problem_features)?;
423
424        // Evaluate promising configurations
425        for config in promising_configs {
426            if remaining_budget <= 0.0 {
427                break;
428            }
429
430            let (performance, cost) =
431                self.evaluate_configuration(&objective, initial_params, &config)?;
432            remaining_budget -= cost;
433
434            // Update database
435            self.add_evaluation_record(config.clone(), performance, cost, &problem_features)?;
436
437            if performance < best_performance {
438                best_performance = performance;
439                best_config = config;
440            }
441        }
442
443        // Bayesian optimization loop
444        while remaining_budget > 0.0 {
445            // Update Gaussian process
446            self.update_gaussian_process()?;
447
448            // Select next configuration to evaluate
449            let next_config = self.select_next_configuration(&problem_features)?;
450
451            // Select fidelity level
452            let fidelity = self.select_fidelity_level(&next_config, remaining_budget)?;
453
454            // Evaluate configuration
455            let (performance, cost) = self.evaluate_configuration_with_fidelity(
456                &objective,
457                initial_params,
458                &next_config,
459                fidelity,
460            )?;
461
462            remaining_budget -= cost;
463
464            // Update database
465            self.add_evaluation_record(next_config.clone(), performance, cost, &problem_features)?;
466
467            // Update best configuration
468            if performance < best_performance {
469                best_performance = performance;
470                best_config = next_config;
471            }
472
473            // Update statistics
474            self.update_tuning_stats(performance, cost)?;
475
476            // Check convergence
477            if self.check_convergence() {
478                break;
479            }
480        }
481
482        Ok(best_config)
483    }
484
485    /// Extract problem features for configuration selection
486    fn extract_problem_features<F>(
487        &self,
488        objective: &F,
489        initial_params: &ArrayView1<f64>,
490        problem: &OptimizationProblem,
491    ) -> OptimizeResult<Array1<f64>>
492    where
493        F: Fn(&ArrayView1<f64>) -> f64,
494    {
495        let mut features = Array1::zeros(20);
496
497        // Problem dimension
498        features[0] = (problem.dimension as f64).ln();
499
500        // Objective landscape features
501        let f0 = objective(initial_params);
502        features[1] = f0.abs().ln();
503
504        // Gradient features
505        let h = 1e-6;
506        let mut gradient_norm = 0.0;
507        for i in 0..initial_params.len().min(10) {
508            let mut params_plus = initial_params.to_owned();
509            params_plus[i] += h;
510            let f_plus = objective(&params_plus.view());
511            let grad_i = (f_plus - f0) / h;
512            gradient_norm += grad_i * grad_i;
513        }
514        gradient_norm = gradient_norm.sqrt();
515        features[2] = gradient_norm.ln();
516
517        // Parameter statistics
518        features[3] = initial_params.view().mean();
519        features[4] = initial_params.variance().sqrt();
520        features[5] = initial_params.fold(-f64::INFINITY, |a, &b| a.max(b));
521        features[6] = initial_params.fold(f64::INFINITY, |a, &b| a.min(b));
522
523        // Problem class encoding
524        match problem.problem_class.as_str() {
525            "quadratic" => features[7] = 1.0,
526            "neural_network" => features[8] = 1.0,
527            "sparse" => features[9] = 1.0,
528            _ => features[10] = 1.0,
529        }
530
531        // Budget and accuracy requirements
532        features[11] = (problem.max_evaluations as f64).ln();
533        features[12] = problem.target_accuracy.ln().abs();
534
535        // Add metadata features
536        for (i, (_, &value)) in problem.metadata.iter().enumerate() {
537            if 13 + i < features.len() {
538                features[13 + i] = value.tanh();
539            }
540        }
541
542        Ok(features)
543    }
544
545    /// Get promising configurations from database
546    fn get_promising_configurations(
547        &self,
548        problem_features: &Array1<f64>,
549    ) -> OptimizeResult<Vec<HyperparameterConfig>> {
550        let mut configs = Vec::new();
551        let mut similarities = Vec::new();
552
553        // Find similar problems in database
554        for record in &self.performance_database.records {
555            let similarity =
556                self.compute_problem_similarity(problem_features, &record.problem_features)?;
557            similarities.push((record, similarity));
558        }
559
560        // Sort by similarity and performance
561        similarities.sort_by(|a, b| {
562            let combined_score_a = a.1 * (1.0 / (1.0 + a.0.performance));
563            let combined_score_b = b.1 * (1.0 / (1.0 + b.0.performance));
564            combined_score_b
565                .partial_cmp(&combined_score_a)
566                .unwrap_or(std::cmp::Ordering::Equal)
567        });
568
569        // Select top configurations
570        for (record, similarity) in similarities.into_iter().take(5) {
571            configs.push(record.config.clone());
572        }
573
574        // Add some random configurations for exploration
575        for _ in 0..3 {
576            configs.push(self.sample_random_configuration()?);
577        }
578
579        Ok(configs)
580    }
581
582    /// Compute similarity between problem features
583    fn compute_problem_similarity(
584        &self,
585        features1: &Array1<f64>,
586        features2: &Array1<f64>,
587    ) -> OptimizeResult<f64> {
588        // Cosine similarity
589        let dot_product = features1
590            .iter()
591            .zip(features2.iter())
592            .map(|(&a, &b)| a * b)
593            .sum::<f64>();
594
595        let norm1 = (features1.iter().map(|&x| x * x).sum::<f64>()).sqrt();
596        let norm2 = (features2.iter().map(|&x| x * x).sum::<f64>()).sqrt();
597
598        if norm1 > 0.0 && norm2 > 0.0 {
599            Ok(dot_product / (norm1 * norm2))
600        } else {
601            Ok(0.0)
602        }
603    }
604
605    /// Sample random configuration from hyperparameter space
606    fn sample_random_configuration(&self) -> OptimizeResult<HyperparameterConfig> {
607        let mut parameters = HashMap::new();
608
609        // Sample continuous parameters
610        for param in &self.hyperparameter_space.continuous_params {
611            let value = match param.scale {
612                ParameterScale::Linear => {
613                    param.lower_bound
614                        + scirs2_core::random::rng().random::<f64>()
615                            * (param.upper_bound - param.lower_bound)
616                }
617                ParameterScale::Logarithmic => {
618                    let log_lower = param.lower_bound.ln();
619                    let log_upper = param.upper_bound.ln();
620                    (log_lower
621                        + scirs2_core::random::rng().random::<f64>() * (log_upper - log_lower))
622                        .exp()
623                }
624                _ => param.default_value,
625            };
626
627            parameters.insert(param.name.clone(), ParameterValue::Continuous(value));
628        }
629
630        // Sample discrete parameters
631        for param in &self.hyperparameter_space.discrete_params {
632            let idx = scirs2_core::random::rng().random_range(0..param.values.len());
633            let value = param.values[idx];
634            parameters.insert(param.name.clone(), ParameterValue::Discrete(value));
635        }
636
637        // Sample categorical parameters
638        for param in &self.hyperparameter_space.categorical_params {
639            let idx = scirs2_core::random::rng().random_range(0..param.categories.len());
640            let value = param.categories[idx].clone();
641            parameters.insert(param.name.clone(), ParameterValue::Categorical(value));
642        }
643
644        Ok(HyperparameterConfig::new(parameters))
645    }
646
647    /// Get default configuration
648    fn get_default_config(&self) -> OptimizeResult<HyperparameterConfig> {
649        let mut parameters = HashMap::new();
650
651        for param in &self.hyperparameter_space.continuous_params {
652            parameters.insert(
653                param.name.clone(),
654                ParameterValue::Continuous(param.default_value),
655            );
656        }
657
658        for param in &self.hyperparameter_space.discrete_params {
659            parameters.insert(
660                param.name.clone(),
661                ParameterValue::Discrete(param.default_value),
662            );
663        }
664
665        for param in &self.hyperparameter_space.categorical_params {
666            parameters.insert(
667                param.name.clone(),
668                ParameterValue::Categorical(param.default_category.clone()),
669            );
670        }
671
672        Ok(HyperparameterConfig::new(parameters))
673    }
674
675    /// Evaluate configuration
676    fn evaluate_configuration<F>(
677        &self,
678        objective: &F,
679        initial_params: &ArrayView1<f64>,
680        config: &HyperparameterConfig,
681    ) -> OptimizeResult<(f64, f64)>
682    where
683        F: Fn(&ArrayView1<f64>) -> f64,
684    {
685        self.evaluate_configuration_with_fidelity(objective, initial_params, config, 1.0)
686    }
687
688    /// Evaluate configuration with specified fidelity
689    fn evaluate_configuration_with_fidelity<F>(
690        &self,
691        objective: &F,
692        initial_params: &ArrayView1<f64>,
693        config: &HyperparameterConfig,
694        fidelity: f64,
695    ) -> OptimizeResult<(f64, f64)>
696    where
697        F: Fn(&ArrayView1<f64>) -> f64,
698    {
699        // Create optimizer with specified configuration
700        let optimizer_result =
701            self.create_optimizer_from_config(config, objective, initial_params, fidelity)?;
702
703        // Compute cost based on fidelity
704        let base_cost = 1.0;
705        let cost = base_cost * self.multi_fidelity_evaluator.cost_model.base_cost * fidelity;
706
707        Ok((optimizer_result.fun, cost))
708    }
709
710    /// Create optimizer from configuration
711    fn create_optimizer_from_config<F>(
712        &self,
713        config: &HyperparameterConfig,
714        objective: &F,
715        initial_params: &ArrayView1<f64>,
716        fidelity: f64,
717    ) -> OptimizeResult<OptimizeResults<f64>>
718    where
719        F: Fn(&ArrayView1<f64>) -> f64,
720    {
721        // Extract optimization parameters from config
722        let learning_rate = match config.parameters.get("learning_rate") {
723            Some(ParameterValue::Continuous(lr)) => *lr,
724            _ => 0.01,
725        };
726
727        let max_nit = match config.parameters.get("max_nit") {
728            Some(ParameterValue::Discrete(iters)) => (*iters as f64 * fidelity) as usize,
729            _ => (100.0 * fidelity) as usize,
730        };
731
732        // Simple optimization with extracted parameters
733        let mut current_params = initial_params.to_owned();
734        let mut best_value = objective(initial_params);
735
736        for iter in 0..max_nit {
737            // Compute gradient
738            let h = 1e-6;
739            let f0 = objective(&current_params.view());
740            let mut gradient = Array1::zeros(current_params.len());
741
742            for i in 0..current_params.len() {
743                let mut params_plus = current_params.clone();
744                params_plus[i] += h;
745                let f_plus = objective(&params_plus.view());
746                gradient[i] = (f_plus - f0) / h;
747            }
748
749            // Update parameters
750            for i in 0..current_params.len() {
751                current_params[i] -= learning_rate * gradient[i];
752            }
753
754            let current_value = objective(&current_params.view());
755            if current_value < best_value {
756                best_value = current_value;
757            }
758
759            // Early stopping for low fidelity
760            if fidelity < 1.0 && iter > (max_nit / 2) {
761                break;
762            }
763        }
764
765        Ok(OptimizeResults::<f64> {
766            x: current_params,
767            fun: best_value,
768            success: true,
769            nit: max_nit,
770            message: "Hyperparameter evaluation completed".to_string(),
771            jac: None,
772            hess: None,
773            constr: None,
774            nfev: max_nit,
775            njev: 0,
776            nhev: 0,
777            maxcv: 0,
778            status: 0,
779        })
780    }
781
782    /// Add evaluation record to database
783    fn add_evaluation_record(
784        &mut self,
785        config: HyperparameterConfig,
786        performance: f64,
787        cost: f64,
788        problem_features: &Array1<f64>,
789    ) -> OptimizeResult<()> {
790        let record = EvaluationRecord {
791            config,
792            performance,
793            cost,
794            timestamp: std::time::SystemTime::now()
795                .duration_since(std::time::UNIX_EPOCH)
796                .unwrap_or_default()
797                .as_secs(),
798            problem_features: problem_features.clone(),
799            fidelity: 1.0,
800            additional_metrics: HashMap::new(),
801        };
802
803        self.performance_database.add_record(record);
804        Ok(())
805    }
806
807    /// Update Gaussian process with new data
808    fn update_gaussian_process(&mut self) -> OptimizeResult<()> {
809        // Extract training data from database
810        let (inputs, outputs) = self.extract_training_data()?;
811
812        // Update GP
813        self.bayesian_optimizer
814            .gaussian_process
815            .update_training_data(inputs, outputs)?;
816
817        // Optimize hyperparameters
818        self.bayesian_optimizer
819            .gaussian_process
820            .optimize_hyperparameters()?;
821
822        Ok(())
823    }
824
825    /// Extract training data from database
826    fn extract_training_data(&self) -> OptimizeResult<(Array2<f64>, Array1<f64>)> {
827        let num_records = self.performance_database.records.len();
828        if num_records == 0 {
829            return Ok((Array2::zeros((0, 10)), Array1::zeros(0)));
830        }
831
832        let input_dim = self.performance_database.records[0].config.embedding.len();
833        let mut inputs = Array2::zeros((num_records, input_dim));
834        let mut outputs = Array1::zeros(num_records);
835
836        for (i, record) in self.performance_database.records.iter().enumerate() {
837            for j in 0..input_dim.min(record.config.embedding.len()) {
838                inputs[[i, j]] = record.config.embedding[j];
839            }
840            outputs[i] = record.performance;
841        }
842
843        Ok((inputs, outputs))
844    }
845
846    /// Select next configuration to evaluate
847    fn select_next_configuration(
848        &self,
849        _problem_features: &Array1<f64>,
850    ) -> OptimizeResult<HyperparameterConfig> {
851        // Use acquisition function to select next point
852        let candidate_configs = self.generate_candidate_configurations(100)?;
853        let mut best_config = candidate_configs[0].clone();
854        let mut best_acquisition = f64::NEG_INFINITY;
855
856        for config in candidate_configs {
857            let acquisition_value = self.evaluate_acquisition_function(&config)?;
858            if acquisition_value > best_acquisition {
859                best_acquisition = acquisition_value;
860                best_config = config;
861            }
862        }
863
864        Ok(best_config)
865    }
866
867    /// Generate candidate configurations
868    fn generate_candidate_configurations(
869        &self,
870        num_candidates: usize,
871    ) -> OptimizeResult<Vec<HyperparameterConfig>> {
872        let mut candidates = Vec::new();
873
874        for _ in 0..num_candidates {
875            candidates.push(self.sample_random_configuration()?);
876        }
877
878        Ok(candidates)
879    }
880
881    /// Evaluate acquisition function
882    fn evaluate_acquisition_function(&self, config: &HyperparameterConfig) -> OptimizeResult<f64> {
883        // Predict mean and variance using GP
884        let (mean, variance) = self
885            .bayesian_optimizer
886            .gaussian_process
887            .predict(&config.embedding)?;
888
889        // Compute acquisition function value
890        let acquisition_value = match &self.bayesian_optimizer.acquisition_function {
891            AcquisitionFunction::ExpectedImprovement { xi } => {
892                let best_value = self.get_best_performance();
893                let improvement = best_value - mean;
894                let std_dev = variance.sqrt();
895
896                if std_dev > 1e-8 {
897                    let z = (improvement + xi) / std_dev;
898                    improvement * self.normal_cdf(z) + std_dev * self.normal_pdf(z)
899                } else {
900                    0.0
901                }
902            }
903            AcquisitionFunction::UpperConfidenceBound { beta } => mean + beta * variance.sqrt(),
904            _ => mean + variance.sqrt(), // Default UCB
905        };
906
907        Ok(acquisition_value)
908    }
909
910    /// Normal CDF approximation
911    fn normal_cdf(&self, x: f64) -> f64 {
912        // Approximation of error function for Gaussian CDF
913        // Using tanh approximation: erf(x) ≈ tanh(√(π/2) * x)
914        let sqrt_pi_over_2 = (std::f64::consts::PI / 2.0).sqrt();
915        0.5 * (1.0 + (sqrt_pi_over_2 * x / 2.0_f64.sqrt()).tanh())
916    }
917
918    /// Normal PDF
919    fn normal_pdf(&self, x: f64) -> f64 {
920        (1.0 / (2.0 * std::f64::consts::PI).sqrt()) * (-0.5 * x * x).exp()
921    }
922
923    /// Get best performance from database
924    fn get_best_performance(&self) -> f64 {
925        self.performance_database
926            .records
927            .iter()
928            .map(|r| r.performance)
929            .fold(f64::INFINITY, |a, b| a.min(b))
930    }
931
932    /// Select fidelity level for evaluation
933    fn select_fidelity_level(
934        &self,
935        _config: &HyperparameterConfig,
936        remaining_budget: f64,
937    ) -> OptimizeResult<f64> {
938        match &self.multi_fidelity_evaluator.selection_strategy {
939            FidelitySelectionStrategy::Static(fidelity) => Ok(*fidelity),
940            FidelitySelectionStrategy::Adaptive {
941                initial_fidelity,
942                adaptation_rate: _,
943            } => {
944                // Simple adaptive strategy based on remaining _budget
945                let budget_ratio = remaining_budget / self.tuning_stats.total_cost.max(1.0);
946                Ok(initial_fidelity * budget_ratio.max(0.1).min(1.0))
947            }
948            _ => Ok(0.5), // Default medium fidelity
949        }
950    }
951
952    /// Update tuning statistics
953    fn update_tuning_stats(&mut self, performance: f64, cost: f64) -> OptimizeResult<()> {
954        self.tuning_stats.total_evaluations += 1;
955        self.tuning_stats.total_cost += cost;
956
957        if performance < self.tuning_stats.best_performance {
958            self.tuning_stats.best_performance = performance;
959        }
960
961        // Update convergence rate (simplified)
962        if self.tuning_stats.total_evaluations > 1 {
963            let improvement_rate = (self.tuning_stats.best_performance - performance)
964                / self.tuning_stats.total_evaluations as f64;
965            self.tuning_stats.convergence_rate = improvement_rate.max(0.0);
966        }
967
968        Ok(())
969    }
970
971    /// Check convergence criteria
972    fn check_convergence(&self) -> bool {
973        // Simple convergence check
974        self.tuning_stats.total_evaluations > 50 && self.tuning_stats.convergence_rate < 1e-6
975    }
976
977    /// Get tuning statistics
978    pub fn get_tuning_stats(&self) -> &HyperparameterTuningStats {
979        &self.tuning_stats
980    }
981}
982
983impl HyperparameterSpace {
984    /// Create default hyperparameter space for optimization
985    pub fn create_default_space() -> Self {
986        let continuous_params = vec![
987            ContinuousHyperparameter {
988                name: "learning_rate".to_string(),
989                lower_bound: 1e-5,
990                upper_bound: 1.0,
991                scale: ParameterScale::Logarithmic,
992                default_value: 0.01,
993                importance_score: 1.0,
994            },
995            ContinuousHyperparameter {
996                name: "momentum".to_string(),
997                lower_bound: 0.0,
998                upper_bound: 0.99,
999                scale: ParameterScale::Linear,
1000                default_value: 0.9,
1001                importance_score: 0.8,
1002            },
1003            ContinuousHyperparameter {
1004                name: "weight_decay".to_string(),
1005                lower_bound: 1e-8,
1006                upper_bound: 1e-2,
1007                scale: ParameterScale::Logarithmic,
1008                default_value: 1e-4,
1009                importance_score: 0.6,
1010            },
1011        ];
1012
1013        let discrete_params = vec![
1014            DiscreteHyperparameter {
1015                name: "max_nit".to_string(),
1016                values: vec![10, 50, 100, 500, 1000],
1017                default_value: 100,
1018                importance_score: 0.9,
1019            },
1020            DiscreteHyperparameter {
1021                name: "batch_size".to_string(),
1022                values: vec![1, 8, 16, 32, 64, 128],
1023                default_value: 32,
1024                importance_score: 0.7,
1025            },
1026        ];
1027
1028        let categorical_params = vec![CategoricalHyperparameter {
1029            name: "optimizer_type".to_string(),
1030            categories: vec!["sgd".to_string(), "adam".to_string(), "lbfgs".to_string()],
1031            default_category: "adam".to_string(),
1032            category_embeddings: HashMap::new(),
1033            importance_score: 1.0,
1034        }];
1035
1036        Self {
1037            continuous_params,
1038            discrete_params,
1039            categorical_params,
1040            conditional_dependencies: Vec::new(),
1041            parameter_bounds: HashMap::new(),
1042        }
1043    }
1044}
1045
1046impl HyperparameterConfig {
1047    /// Create new hyperparameter configuration
1048    pub fn new(parameters: HashMap<String, ParameterValue>) -> Self {
1049        let config_hash = Self::compute_hash(&parameters);
1050        let embedding = Self::compute_embedding(&parameters);
1051
1052        Self {
1053            parameters,
1054            config_hash,
1055            embedding,
1056        }
1057    }
1058
1059    /// Compute hash for configuration
1060    fn compute_hash(parameters: &HashMap<String, ParameterValue>) -> u64 {
1061        // Simplified hash computation
1062        let mut hash = 0u64;
1063        for (key, value) in parameters {
1064            hash ^= Self::hash_string(key);
1065            hash ^= Self::hash_parameter_value(value);
1066        }
1067        hash
1068    }
1069
1070    /// Hash string
1071    fn hash_string(s: &str) -> u64 {
1072        // Simple string hash
1073        s.bytes().fold(0u64, |hash, byte| {
1074            hash.wrapping_mul(31).wrapping_add(byte as u64)
1075        })
1076    }
1077
1078    /// Hash parameter value
1079    fn hash_parameter_value(value: &ParameterValue) -> u64 {
1080        match value {
1081            ParameterValue::Continuous(v) => v.to_bits(),
1082            ParameterValue::Discrete(v) => *v as u64,
1083            ParameterValue::Categorical(s) => Self::hash_string(s),
1084        }
1085    }
1086
1087    /// Compute embedding for configuration
1088    fn compute_embedding(parameters: &HashMap<String, ParameterValue>) -> Array1<f64> {
1089        let mut embedding = Array1::zeros(32); // Fixed embedding size
1090
1091        let mut idx = 0;
1092        for value in parameters.values() {
1093            if idx >= embedding.len() {
1094                break;
1095            }
1096
1097            match value {
1098                ParameterValue::Continuous(v) => {
1099                    embedding[idx] = v.tanh();
1100                    idx += 1;
1101                }
1102                ParameterValue::Discrete(v) => {
1103                    embedding[idx] = (*v as f64 / 100.0).tanh();
1104                    idx += 1;
1105                }
1106                ParameterValue::Categorical(s) => {
1107                    // Simple categorical encoding
1108                    let hash_val = Self::hash_string(s) as f64 / u64::MAX as f64;
1109                    embedding[idx] = (hash_val * 2.0 - 1.0).tanh();
1110                    idx += 1;
1111                }
1112            }
1113        }
1114
1115        embedding
1116    }
1117}
1118
1119impl Default for PerformanceDatabase {
1120    fn default() -> Self {
1121        Self::new()
1122    }
1123}
1124
1125impl PerformanceDatabase {
1126    /// Create new performance database
1127    pub fn new() -> Self {
1128        Self {
1129            records: Vec::new(),
1130            index: HashMap::new(),
1131            performance_trends: HashMap::new(),
1132            correlation_matrix: Array2::zeros((0, 0)),
1133        }
1134    }
1135
1136    /// Add evaluation record
1137    pub fn add_record(&mut self, record: EvaluationRecord) {
1138        self.records.push(record);
1139
1140        // Update index (simplified)
1141        let record_idx = self.records.len() - 1;
1142        self.index
1143            .entry("all".to_string())
1144            .or_default()
1145            .push(record_idx);
1146    }
1147}
1148
1149impl Default for BayesianOptimizer {
1150    fn default() -> Self {
1151        Self::new()
1152    }
1153}
1154
1155impl BayesianOptimizer {
1156    /// Create new Bayesian optimizer
1157    pub fn new() -> Self {
1158        Self {
1159            gaussian_process: GaussianProcess::new(),
1160            acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
1161            optimization_strategy: OptimizationStrategy::RandomSearch {
1162                num_candidates: 100,
1163            },
1164            exploration_factor: 0.1,
1165        }
1166    }
1167}
1168
1169impl Default for GaussianProcess {
1170    fn default() -> Self {
1171        Self::new()
1172    }
1173}
1174
1175impl GaussianProcess {
1176    /// Create new Gaussian process
1177    pub fn new() -> Self {
1178        Self {
1179            training_inputs: Array2::zeros((0, 0)),
1180            training_outputs: Array1::zeros(0),
1181            kernel: KernelFunction::RBF {
1182                length_scale: 1.0,
1183                variance: 1.0,
1184            },
1185            kernel_params: Array1::from(vec![1.0, 1.0]),
1186            noise_variance: 0.1,
1187            mean_function: MeanFunction::Zero,
1188        }
1189    }
1190
1191    /// Update training data
1192    pub fn update_training_data(
1193        &mut self,
1194        inputs: Array2<f64>,
1195        outputs: Array1<f64>,
1196    ) -> OptimizeResult<()> {
1197        self.training_inputs = inputs;
1198        self.training_outputs = outputs;
1199        Ok(())
1200    }
1201
1202    /// Optimize hyperparameters
1203    pub fn optimize_hyperparameters(&mut self) -> OptimizeResult<()> {
1204        // Simplified hyperparameter optimization
1205        // In practice, would use marginal likelihood optimization
1206        Ok(())
1207    }
1208
1209    /// Predict mean and variance
1210    pub fn predict(&self, input: &Array1<f64>) -> OptimizeResult<(f64, f64)> {
1211        if self.training_inputs.is_empty() {
1212            return Ok((0.0, 1.0));
1213        }
1214
1215        // Simplified GP prediction
1216        let mean = 0.0; // Would compute proper posterior mean
1217        let variance = 1.0; // Would compute proper posterior variance
1218
1219        Ok((mean, variance))
1220    }
1221}
1222
1223impl Default for MultiFidelityEvaluator {
1224    fn default() -> Self {
1225        Self::new()
1226    }
1227}
1228
1229impl MultiFidelityEvaluator {
1230    /// Create new multi-fidelity evaluator
1231    pub fn new() -> Self {
1232        let fidelity_levels = vec![
1233            FidelityLevel {
1234                fidelity: 0.1,
1235                cost_multiplier: 0.1,
1236                accuracy: 0.7,
1237                resource_requirements: ResourceRequirements {
1238                    computation_time: 1.0,
1239                    memory_usage: 0.5,
1240                    cpu_cores: 1,
1241                    gpu_required: false,
1242                },
1243            },
1244            FidelityLevel {
1245                fidelity: 0.5,
1246                cost_multiplier: 0.5,
1247                accuracy: 0.9,
1248                resource_requirements: ResourceRequirements {
1249                    computation_time: 5.0,
1250                    memory_usage: 1.0,
1251                    cpu_cores: 2,
1252                    gpu_required: false,
1253                },
1254            },
1255            FidelityLevel {
1256                fidelity: 1.0,
1257                cost_multiplier: 1.0,
1258                accuracy: 1.0,
1259                resource_requirements: ResourceRequirements {
1260                    computation_time: 10.0,
1261                    memory_usage: 2.0,
1262                    cpu_cores: 4,
1263                    gpu_required: true,
1264                },
1265            },
1266        ];
1267
1268        Self {
1269            fidelity_levels,
1270            cost_model: CostModel::new(),
1271            selection_strategy: FidelitySelectionStrategy::Adaptive {
1272                initial_fidelity: 0.5,
1273                adaptation_rate: 0.1,
1274            },
1275            correlation_estimator: FidelityCorrelationEstimator::new(),
1276        }
1277    }
1278}
1279
1280impl Default for CostModel {
1281    fn default() -> Self {
1282        Self::new()
1283    }
1284}
1285
1286impl CostModel {
1287    /// Create new cost model
1288    pub fn new() -> Self {
1289        Self {
1290            cost_network: Array2::from_shape_fn((1, 10), |_| {
1291                (scirs2_core::random::rng().random::<f64>() - 0.5) * 0.1
1292            }),
1293            base_cost: 1.0,
1294            scaling_factors: Array1::ones(5),
1295            cost_history: VecDeque::with_capacity(1000),
1296        }
1297    }
1298}
1299
1300impl Default for FidelityCorrelationEstimator {
1301    fn default() -> Self {
1302        Self::new()
1303    }
1304}
1305
1306impl FidelityCorrelationEstimator {
1307    /// Create new correlation estimator
1308    pub fn new() -> Self {
1309        Self {
1310            correlation_matrix: Array2::eye(3),
1311            estimation_method: CorrelationMethod::Pearson,
1312            confidence_intervals: Array2::zeros((3, 2)),
1313        }
1314    }
1315}
1316
1317impl Default for HyperparameterTuningStats {
1318    fn default() -> Self {
1319        Self {
1320            total_evaluations: 0,
1321            best_performance: f64::INFINITY,
1322            total_cost: 0.0,
1323            convergence_rate: 0.0,
1324            exploration_efficiency: 0.0,
1325            multi_fidelity_savings: 0.0,
1326        }
1327    }
1328}
1329
1330impl LearnedOptimizer for LearnedHyperparameterTuner {
1331    fn meta_train(&mut self, training_tasks: &[TrainingTask]) -> OptimizeResult<()> {
1332        for task in training_tasks {
1333            // Create simple objective for training
1334            let training_objective = |x: &ArrayView1<f64>| x.iter().map(|&xi| xi * xi).sum::<f64>();
1335
1336            let initial_params = Array1::zeros(task.problem.dimension);
1337
1338            // Tune hyperparameters for this task
1339            let _best_config = self.tune_hyperparameters(
1340                training_objective,
1341                &initial_params.view(),
1342                &task.problem,
1343                10.0,
1344            )?;
1345        }
1346
1347        Ok(())
1348    }
1349
1350    fn adapt_to_problem(
1351        &mut self,
1352        problem: &OptimizationProblem,
1353        initial_params: &ArrayView1<f64>,
1354    ) -> OptimizeResult<()> {
1355        // Extract problem features for future configuration selection
1356        let simple_objective = |_x: &ArrayView1<f64>| 0.0;
1357        let _problem_features =
1358            self.extract_problem_features(&simple_objective, initial_params, problem)?;
1359
1360        Ok(())
1361    }
1362
1363    fn optimize<F>(
1364        &mut self,
1365        objective: F,
1366        initial_params: &ArrayView1<f64>,
1367    ) -> OptimizeResult<OptimizeResults<f64>>
1368    where
1369        F: Fn(&ArrayView1<f64>) -> f64,
1370    {
1371        // Create default problem for hyperparameter tuning
1372        let default_problem = OptimizationProblem {
1373            name: "hyperparameter_tuning".to_string(),
1374            dimension: initial_params.len(),
1375            problem_class: "general".to_string(),
1376            metadata: HashMap::new(),
1377            max_evaluations: 1000,
1378            target_accuracy: 1e-6,
1379        };
1380
1381        // Tune hyperparameters
1382        let best_config =
1383            self.tune_hyperparameters(&objective, initial_params, &default_problem, 20.0)?;
1384
1385        // Use best configuration for final optimization
1386        self.create_optimizer_from_config(&best_config, &objective, initial_params, 1.0)
1387    }
1388
1389    fn get_state(&self) -> &MetaOptimizerState {
1390        &self.meta_state
1391    }
1392
1393    fn reset(&mut self) {
1394        self.performance_database = PerformanceDatabase::new();
1395        self.tuning_stats = HyperparameterTuningStats::default();
1396    }
1397}
1398
1399/// Convenience function for learned hyperparameter tuning
1400#[allow(dead_code)]
1401pub fn hyperparameter_tuning_optimize<F>(
1402    objective: F,
1403    initial_params: &ArrayView1<f64>,
1404    config: Option<LearnedOptimizationConfig>,
1405) -> super::OptimizeResult<OptimizeResults<f64>>
1406where
1407    F: Fn(&ArrayView1<f64>) -> f64,
1408{
1409    let config = config.unwrap_or_default();
1410    let mut tuner = LearnedHyperparameterTuner::new(config);
1411    tuner.optimize(objective, initial_params)
1412}
1413
1414#[cfg(test)]
1415mod tests {
1416    use super::*;
1417
1418    #[test]
1419    fn test_hyperparameter_tuner_creation() {
1420        let config = LearnedOptimizationConfig::default();
1421        let tuner = LearnedHyperparameterTuner::new(config);
1422
1423        assert_eq!(tuner.tuning_stats.total_evaluations, 0);
1424        assert!(!tuner.hyperparameter_space.continuous_params.is_empty());
1425    }
1426
1427    #[test]
1428    fn test_hyperparameter_space() {
1429        let space = HyperparameterSpace::create_default_space();
1430
1431        assert!(!space.continuous_params.is_empty());
1432        assert!(!space.discrete_params.is_empty());
1433        assert!(!space.categorical_params.is_empty());
1434    }
1435
1436    #[test]
1437    fn test_hyperparameter_config() {
1438        let mut parameters = HashMap::new();
1439        parameters.insert(
1440            "learning_rate".to_string(),
1441            ParameterValue::Continuous(0.01),
1442        );
1443        parameters.insert("max_nit".to_string(), ParameterValue::Discrete(100));
1444        parameters.insert(
1445            "optimizer_type".to_string(),
1446            ParameterValue::Categorical("adam".to_string()),
1447        );
1448
1449        let config = HyperparameterConfig::new(parameters);
1450
1451        assert!(config.config_hash != 0);
1452        assert_eq!(config.embedding.len(), 32);
1453        assert!(config.embedding.iter().all(|&x| x.is_finite()));
1454    }
1455
1456    #[test]
1457    fn test_problem_similarity() {
1458        let config = LearnedOptimizationConfig::default();
1459        let tuner = LearnedHyperparameterTuner::new(config);
1460
1461        let features1 = Array1::from(vec![1.0, 0.0, 0.0]);
1462        let features2 = Array1::from(vec![0.0, 1.0, 0.0]);
1463        let features3 = Array1::from(vec![1.0, 0.1, 0.1]);
1464
1465        let sim1 = tuner
1466            .compute_problem_similarity(&features1, &features2)
1467            .unwrap();
1468        let sim2 = tuner
1469            .compute_problem_similarity(&features1, &features3)
1470            .unwrap();
1471
1472        assert!(sim2 > sim1); // features3 should be more similar to features1
1473    }
1474
1475    #[test]
1476    fn test_gaussian_process() {
1477        let mut gp = GaussianProcess::new();
1478
1479        let inputs = Array2::from_shape_fn((3, 2), |_| scirs2_core::random::rng().random::<f64>());
1480        let outputs = Array1::from(vec![1.0, 2.0, 3.0]);
1481
1482        gp.update_training_data(inputs, outputs).unwrap();
1483
1484        let test_input = Array1::from(vec![0.5, 0.5]);
1485        let (mean, variance) = gp.predict(&test_input).unwrap();
1486
1487        assert!(mean.is_finite());
1488        assert!(variance >= 0.0);
1489    }
1490
1491    #[test]
1492    fn test_hyperparameter_tuning_optimization() {
1493        let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
1494        let initial = Array1::from(vec![2.0, 2.0]);
1495
1496        let config = LearnedOptimizationConfig {
1497            hidden_size: 32,
1498            ..Default::default()
1499        };
1500
1501        let result =
1502            hyperparameter_tuning_optimize(objective, &initial.view(), Some(config)).unwrap();
1503
1504        assert!(result.fun >= 0.0);
1505        assert_eq!(result.x.len(), 2);
1506        assert!(result.success);
1507    }
1508}
1509
1510#[allow(dead_code)]
1511pub fn placeholder() {
1512    // Placeholder function to prevent unused module warnings
1513}
scirs2_optimize/learned_optimizers/learned_hyperparameter_tuner.rs

scirs2_optimize/learned_optimizers/
learned_hyperparameter_tuner.rs