scirs2_optimize/learned_optimizers/
learned_hyperparameter_tuner.rs

1//! Learned Hyperparameter Tuner
2//!
3//! Implementation of machine learning-based hyperparameter tuning that learns
4//! optimal hyperparameter configurations across different optimization problems.
5
6use super::{
7    LearnedOptimizationConfig, LearnedOptimizer, MetaOptimizerState, OptimizationProblem,
8    TrainingTask,
9};
10use crate::error::OptimizeResult;
11use crate::result::OptimizeResults;
12use ndarray::{Array1, Array2, ArrayView1};
13use rand::Rng;
14use statrs::statistics::Statistics;
15use std::collections::{HashMap, VecDeque};
16
17/// Learned hyperparameter tuner with adaptive configuration
18#[derive(Debug, Clone)]
19pub struct LearnedHyperparameterTuner {
20    /// Configuration
21    config: LearnedOptimizationConfig,
22    /// Hyperparameter space
23    hyperparameter_space: HyperparameterSpace,
24    /// Performance database
25    performance_database: PerformanceDatabase,
26    /// Bayesian optimizer for hyperparameter search
27    bayesian_optimizer: BayesianOptimizer,
28    /// Multi-fidelity evaluator
29    multi_fidelity_evaluator: MultiFidelityEvaluator,
30    /// Meta-optimizer state
31    meta_state: MetaOptimizerState,
32    /// Tuning statistics
33    tuning_stats: HyperparameterTuningStats,
34}
35
36/// Hyperparameter space definition
37#[derive(Debug, Clone)]
38pub struct HyperparameterSpace {
39    /// Continuous hyperparameters
40    continuous_params: Vec<ContinuousHyperparameter>,
41    /// Discrete hyperparameters
42    discrete_params: Vec<DiscreteHyperparameter>,
43    /// Categorical hyperparameters
44    categorical_params: Vec<CategoricalHyperparameter>,
45    /// Conditional dependencies
46    conditional_dependencies: Vec<ConditionalDependency>,
47    /// Parameter bounds
48    parameter_bounds: HashMap<String, (f64, f64)>,
49}
50
51/// Continuous hyperparameter
52#[derive(Debug, Clone)]
53pub struct ContinuousHyperparameter {
54    /// Parameter name
55    name: String,
56    /// Lower bound
57    lower_bound: f64,
58    /// Upper bound
59    upper_bound: f64,
60    /// Scale (linear, log, etc.)
61    scale: ParameterScale,
62    /// Default value
63    default_value: f64,
64    /// Importance score
65    importance_score: f64,
66}
67
68/// Discrete hyperparameter
69#[derive(Debug, Clone)]
70pub struct DiscreteHyperparameter {
71    /// Parameter name
72    name: String,
73    /// Possible values
74    values: Vec<i64>,
75    /// Default value
76    default_value: i64,
77    /// Importance score
78    importance_score: f64,
79}
80
81/// Categorical hyperparameter
82#[derive(Debug, Clone)]
83pub struct CategoricalHyperparameter {
84    /// Parameter name
85    name: String,
86    /// Possible categories
87    categories: Vec<String>,
88    /// Default category
89    default_category: String,
90    /// Category embeddings
91    category_embeddings: HashMap<String, Array1<f64>>,
92    /// Importance score
93    importance_score: f64,
94}
95
96/// Parameter scale types
97#[derive(Debug, Clone)]
98pub enum ParameterScale {
99    Linear,
100    Logarithmic,
101    Exponential,
102    Sigmoid,
103}
104
105/// Conditional dependency between parameters
106#[derive(Debug, Clone)]
107pub struct ConditionalDependency {
108    /// Parent parameter
109    parent_param: String,
110    /// Child parameter
111    child_param: String,
112    /// Condition
113    condition: DependencyCondition,
114}
115
116/// Dependency condition types
117#[derive(Debug, Clone)]
118pub enum DependencyCondition {
119    Equals(String),
120    GreaterThan(f64),
121    LessThan(f64),
122    InRange(f64, f64),
123    OneOf(Vec<String>),
124}
125
126/// Performance database for storing evaluation results
127#[derive(Debug, Clone)]
128pub struct PerformanceDatabase {
129    /// Evaluation records
130    records: Vec<EvaluationRecord>,
131    /// Indexing for fast retrieval
132    index: HashMap<String, Vec<usize>>,
133    /// Performance trends
134    performance_trends: HashMap<String, PerformanceTrend>,
135    /// Correlation matrix
136    correlation_matrix: Array2<f64>,
137}
138
139/// Evaluation record
140#[derive(Debug, Clone)]
141pub struct EvaluationRecord {
142    /// Hyperparameter configuration
143    config: HyperparameterConfig,
144    /// Performance metric
145    performance: f64,
146    /// Evaluation cost
147    cost: f64,
148    /// Timestamp
149    timestamp: u64,
150    /// Problem characteristics
151    problem_features: Array1<f64>,
152    /// Fidelity level
153    fidelity: f64,
154    /// Additional metrics
155    additional_metrics: HashMap<String, f64>,
156}
157
158/// Hyperparameter configuration
159#[derive(Debug, Clone)]
160pub struct HyperparameterConfig {
161    /// Parameter values
162    parameters: HashMap<String, ParameterValue>,
163    /// Configuration hash
164    config_hash: u64,
165    /// Configuration embedding
166    embedding: Array1<f64>,
167}
168
169/// Parameter value types
170#[derive(Debug, Clone)]
171pub enum ParameterValue {
172    Continuous(f64),
173    Discrete(i64),
174    Categorical(String),
175}
176
177/// Performance trend analysis
178#[derive(Debug, Clone)]
179pub struct PerformanceTrend {
180    /// Trend direction
181    trend_direction: f64,
182    /// Trend strength
183    trend_strength: f64,
184    /// Seasonal patterns
185    seasonal_patterns: Array1<f64>,
186    /// Volatility measure
187    volatility: f64,
188}
189
190/// Bayesian optimizer for hyperparameter search
191#[derive(Debug, Clone)]
192pub struct BayesianOptimizer {
193    /// Gaussian process surrogate model
194    gaussian_process: GaussianProcess,
195    /// Acquisition function
196    acquisition_function: AcquisitionFunction,
197    /// Optimization strategy
198    optimization_strategy: OptimizationStrategy,
199    /// Exploration-exploitation balance
200    exploration_factor: f64,
201}
202
203/// Gaussian process surrogate model
204#[derive(Debug, Clone)]
205pub struct GaussianProcess {
206    /// Training inputs
207    training_inputs: Array2<f64>,
208    /// Training outputs
209    training_outputs: Array1<f64>,
210    /// Kernel function
211    kernel: KernelFunction,
212    /// Kernel hyperparameters
213    kernel_params: Array1<f64>,
214    /// Noise variance
215    noise_variance: f64,
216    /// Mean function
217    mean_function: MeanFunction,
218}
219
220/// Kernel function types
221#[derive(Debug, Clone)]
222pub enum KernelFunction {
223    RBF {
224        length_scale: f64,
225        variance: f64,
226    },
227    Matern {
228        nu: f64,
229        length_scale: f64,
230        variance: f64,
231    },
232    Polynomial {
233        degree: i32,
234        variance: f64,
235    },
236    Composite {
237        kernels: Vec<KernelFunction>,
238        weights: Array1<f64>,
239    },
240}
241
242/// Mean function for GP
243#[derive(Debug, Clone)]
244pub enum MeanFunction {
245    Zero,
246    Constant(f64),
247    Linear(Array1<f64>),
248    Quadratic(Array2<f64>),
249}
250
251/// Acquisition function types
252#[derive(Debug, Clone)]
253pub enum AcquisitionFunction {
254    ExpectedImprovement { xi: f64 },
255    ProbabilityOfImprovement { xi: f64 },
256    UpperConfidenceBound { beta: f64 },
257    EntropySearch { num_samples: usize },
258    MultiFidelity { alpha: f64, beta: f64 },
259}
260
261/// Optimization strategy for acquisition function
262#[derive(Debug, Clone)]
263pub enum OptimizationStrategy {
264    RandomSearch { num_candidates: usize },
265    GridSearch { grid_resolution: usize },
266    GradientBased { num_restarts: usize },
267    EvolutionarySearch { population_size: usize },
268    DIRECT { max_nit: usize },
269}
270
271/// Multi-fidelity evaluator
272#[derive(Debug, Clone)]
273pub struct MultiFidelityEvaluator {
274    /// Available fidelity levels
275    fidelity_levels: Vec<FidelityLevel>,
276    /// Cost model
277    cost_model: CostModel,
278    /// Fidelity selection strategy
279    selection_strategy: FidelitySelectionStrategy,
280    /// Correlation estimator
281    correlation_estimator: FidelityCorrelationEstimator,
282}
283
284/// Fidelity level definition
285#[derive(Debug, Clone)]
286pub struct FidelityLevel {
287    /// Fidelity value (0.0 to 1.0)
288    fidelity: f64,
289    /// Cost multiplier
290    cost_multiplier: f64,
291    /// Accuracy estimate
292    accuracy: f64,
293    /// Resource requirements
294    resource_requirements: ResourceRequirements,
295}
296
297/// Resource requirements for evaluation
298#[derive(Debug, Clone)]
299pub struct ResourceRequirements {
300    /// Computational time
301    computation_time: f64,
302    /// Memory usage
303    memory_usage: f64,
304    /// CPU cores
305    cpu_cores: usize,
306    /// GPU requirements
307    gpu_required: bool,
308}
309
310/// Cost model for evaluations
311#[derive(Debug, Clone)]
312pub struct CostModel {
313    /// Cost prediction network
314    cost_network: Array2<f64>,
315    /// Base cost parameters
316    base_cost: f64,
317    /// Scaling factors
318    scaling_factors: Array1<f64>,
319    /// Historical cost data
320    cost_history: VecDeque<(f64, f64)>, // (fidelity, cost)
321}
322
323/// Fidelity selection strategy
324#[derive(Debug, Clone)]
325pub enum FidelitySelectionStrategy {
326    Static(f64),
327    Adaptive {
328        initial_fidelity: f64,
329        adaptation_rate: f64,
330    },
331    BanditBased {
332        epsilon: f64,
333    },
334    Predictive {
335        prediction_horizon: usize,
336    },
337}
338
339/// Correlation estimator between fidelities
340#[derive(Debug, Clone)]
341pub struct FidelityCorrelationEstimator {
342    /// Correlation matrix
343    correlation_matrix: Array2<f64>,
344    /// Estimation method
345    estimation_method: CorrelationMethod,
346    /// Confidence intervals
347    confidence_intervals: Array2<f64>,
348}
349
350/// Correlation estimation methods
351#[derive(Debug, Clone)]
352pub enum CorrelationMethod {
353    Pearson,
354    Spearman,
355    Kendall,
356    MutualInformation,
357}
358
359/// Hyperparameter tuning statistics
360#[derive(Debug, Clone)]
361pub struct HyperparameterTuningStats {
362    /// Total evaluations performed
363    total_evaluations: usize,
364    /// Best performance found
365    best_performance: f64,
366    /// Total cost spent
367    total_cost: f64,
368    /// Convergence rate
369    convergence_rate: f64,
370    /// Exploration efficiency
371    exploration_efficiency: f64,
372    /// Multi-fidelity savings
373    multi_fidelity_savings: f64,
374}
375
376impl LearnedHyperparameterTuner {
377    /// Create new learned hyperparameter tuner
378    pub fn new(config: LearnedOptimizationConfig) -> Self {
379        let hyperparameter_space = HyperparameterSpace::create_default_space();
380        let performance_database = PerformanceDatabase::new();
381        let bayesian_optimizer = BayesianOptimizer::new();
382        let multi_fidelity_evaluator = MultiFidelityEvaluator::new();
383        let hidden_size = config.hidden_size;
384
385        Self {
386            config,
387            hyperparameter_space,
388            performance_database,
389            bayesian_optimizer,
390            multi_fidelity_evaluator,
391            meta_state: MetaOptimizerState {
392                meta_params: Array1::zeros(hidden_size),
393                network_weights: Array2::zeros((hidden_size, hidden_size)),
394                performance_history: Vec::new(),
395                adaptation_stats: super::AdaptationStatistics::default(),
396                episode: 0,
397            },
398            tuning_stats: HyperparameterTuningStats::default(),
399        }
400    }
401
402    /// Tune hyperparameters for optimization problem
403    pub fn tune_hyperparameters<F>(
404        &mut self,
405        objective: F,
406        initial_params: &ArrayView1<f64>,
407        problem: &OptimizationProblem,
408        budget: f64,
409    ) -> OptimizeResult<HyperparameterConfig>
410    where
411        F: Fn(&ArrayView1<f64>) -> f64,
412    {
413        let mut remaining_budget = budget;
414        let mut best_config = self.get_default_config()?;
415        let mut best_performance = f64::INFINITY;
416
417        // Extract problem features
418        let problem_features =
419            self.extract_problem_features(&objective, initial_params, problem)?;
420
421        // Initialize with promising configurations from database
422        let promising_configs = self.get_promising_configurations(&problem_features)?;
423
424        // Evaluate promising configurations
425        for config in promising_configs {
426            if remaining_budget <= 0.0 {
427                break;
428            }
429
430            let (performance, cost) =
431                self.evaluate_configuration(&objective, initial_params, &config)?;
432            remaining_budget -= cost;
433
434            // Update database
435            self.add_evaluation_record(config.clone(), performance, cost, &problem_features)?;
436
437            if performance < best_performance {
438                best_performance = performance;
439                best_config = config;
440            }
441        }
442
443        // Bayesian optimization loop
444        while remaining_budget > 0.0 {
445            // Update Gaussian process
446            self.update_gaussian_process()?;
447
448            // Select next configuration to evaluate
449            let next_config = self.select_next_configuration(&problem_features)?;
450
451            // Select fidelity level
452            let fidelity = self.select_fidelity_level(&next_config, remaining_budget)?;
453
454            // Evaluate configuration
455            let (performance, cost) = self.evaluate_configuration_with_fidelity(
456                &objective,
457                initial_params,
458                &next_config,
459                fidelity,
460            )?;
461
462            remaining_budget -= cost;
463
464            // Update database
465            self.add_evaluation_record(next_config.clone(), performance, cost, &problem_features)?;
466
467            // Update best configuration
468            if performance < best_performance {
469                best_performance = performance;
470                best_config = next_config;
471            }
472
473            // Update statistics
474            self.update_tuning_stats(performance, cost)?;
475
476            // Check convergence
477            if self.check_convergence() {
478                break;
479            }
480        }
481
482        Ok(best_config)
483    }
484
485    /// Extract problem features for configuration selection
486    fn extract_problem_features<F>(
487        &self,
488        objective: &F,
489        initial_params: &ArrayView1<f64>,
490        problem: &OptimizationProblem,
491    ) -> OptimizeResult<Array1<f64>>
492    where
493        F: Fn(&ArrayView1<f64>) -> f64,
494    {
495        let mut features = Array1::zeros(20);
496
497        // Problem dimension
498        features[0] = (problem.dimension as f64).ln();
499
500        // Objective landscape features
501        let f0 = objective(initial_params);
502        features[1] = f0.abs().ln();
503
504        // Gradient features
505        let h = 1e-6;
506        let mut gradient_norm = 0.0;
507        for i in 0..initial_params.len().min(10) {
508            let mut params_plus = initial_params.to_owned();
509            params_plus[i] += h;
510            let f_plus = objective(&params_plus.view());
511            let grad_i = (f_plus - f0) / h;
512            gradient_norm += grad_i * grad_i;
513        }
514        gradient_norm = gradient_norm.sqrt();
515        features[2] = gradient_norm.ln();
516
517        // Parameter statistics
518        features[3] = initial_params.view().mean();
519        features[4] = initial_params.variance().sqrt();
520        features[5] = initial_params.fold(-f64::INFINITY, |a, &b| a.max(b));
521        features[6] = initial_params.fold(f64::INFINITY, |a, &b| a.min(b));
522
523        // Problem class encoding
524        match problem.problem_class.as_str() {
525            "quadratic" => features[7] = 1.0,
526            "neural_network" => features[8] = 1.0,
527            "sparse" => features[9] = 1.0,
528            _ => features[10] = 1.0,
529        }
530
531        // Budget and accuracy requirements
532        features[11] = (problem.max_evaluations as f64).ln();
533        features[12] = problem.target_accuracy.ln().abs();
534
535        // Add metadata features
536        for (i, (_, &value)) in problem.metadata.iter().enumerate() {
537            if 13 + i < features.len() {
538                features[13 + i] = value.tanh();
539            }
540        }
541
542        Ok(features)
543    }
544
545    /// Get promising configurations from database
546    fn get_promising_configurations(
547        &self,
548        problem_features: &Array1<f64>,
549    ) -> OptimizeResult<Vec<HyperparameterConfig>> {
550        let mut configs = Vec::new();
551        let mut similarities = Vec::new();
552
553        // Find similar problems in database
554        for record in &self.performance_database.records {
555            let similarity =
556                self.compute_problem_similarity(problem_features, &record.problem_features)?;
557            similarities.push((record, similarity));
558        }
559
560        // Sort by similarity and performance
561        similarities.sort_by(|a, b| {
562            let combined_score_a = a.1 * (1.0 / (1.0 + a.0.performance));
563            let combined_score_b = b.1 * (1.0 / (1.0 + b.0.performance));
564            combined_score_b
565                .partial_cmp(&combined_score_a)
566                .unwrap_or(std::cmp::Ordering::Equal)
567        });
568
569        // Select top configurations
570        for (record, similarity) in similarities.into_iter().take(5) {
571            configs.push(record.config.clone());
572        }
573
574        // Add some random configurations for exploration
575        for _ in 0..3 {
576            configs.push(self.sample_random_configuration()?);
577        }
578
579        Ok(configs)
580    }
581
582    /// Compute similarity between problem features
583    fn compute_problem_similarity(
584        &self,
585        features1: &Array1<f64>,
586        features2: &Array1<f64>,
587    ) -> OptimizeResult<f64> {
588        // Cosine similarity
589        let dot_product = features1
590            .iter()
591            .zip(features2.iter())
592            .map(|(&a, &b)| a * b)
593            .sum::<f64>();
594
595        let norm1 = (features1.iter().map(|&x| x * x).sum::<f64>()).sqrt();
596        let norm2 = (features2.iter().map(|&x| x * x).sum::<f64>()).sqrt();
597
598        if norm1 > 0.0 && norm2 > 0.0 {
599            Ok(dot_product / (norm1 * norm2))
600        } else {
601            Ok(0.0)
602        }
603    }
604
605    /// Sample random configuration from hyperparameter space
606    fn sample_random_configuration(&self) -> OptimizeResult<HyperparameterConfig> {
607        let mut parameters = HashMap::new();
608
609        // Sample continuous parameters
610        for param in &self.hyperparameter_space.continuous_params {
611            let value = match param.scale {
612                ParameterScale::Linear => {
613                    param.lower_bound
614                        + rand::rng().random::<f64>() * (param.upper_bound - param.lower_bound)
615                }
616                ParameterScale::Logarithmic => {
617                    let log_lower = param.lower_bound.ln();
618                    let log_upper = param.upper_bound.ln();
619                    (log_lower + rand::rng().random::<f64>() * (log_upper - log_lower)).exp()
620                }
621                _ => param.default_value,
622            };
623
624            parameters.insert(param.name.clone(), ParameterValue::Continuous(value));
625        }
626
627        // Sample discrete parameters
628        for param in &self.hyperparameter_space.discrete_params {
629            let idx = rand::rng().random_range(0..param.values.len());
630            let value = param.values[idx];
631            parameters.insert(param.name.clone(), ParameterValue::Discrete(value));
632        }
633
634        // Sample categorical parameters
635        for param in &self.hyperparameter_space.categorical_params {
636            let idx = rand::rng().random_range(0..param.categories.len());
637            let value = param.categories[idx].clone();
638            parameters.insert(param.name.clone(), ParameterValue::Categorical(value));
639        }
640
641        Ok(HyperparameterConfig::new(parameters))
642    }
643
644    /// Get default configuration
645    fn get_default_config(&self) -> OptimizeResult<HyperparameterConfig> {
646        let mut parameters = HashMap::new();
647
648        for param in &self.hyperparameter_space.continuous_params {
649            parameters.insert(
650                param.name.clone(),
651                ParameterValue::Continuous(param.default_value),
652            );
653        }
654
655        for param in &self.hyperparameter_space.discrete_params {
656            parameters.insert(
657                param.name.clone(),
658                ParameterValue::Discrete(param.default_value),
659            );
660        }
661
662        for param in &self.hyperparameter_space.categorical_params {
663            parameters.insert(
664                param.name.clone(),
665                ParameterValue::Categorical(param.default_category.clone()),
666            );
667        }
668
669        Ok(HyperparameterConfig::new(parameters))
670    }
671
672    /// Evaluate configuration
673    fn evaluate_configuration<F>(
674        &self,
675        objective: &F,
676        initial_params: &ArrayView1<f64>,
677        config: &HyperparameterConfig,
678    ) -> OptimizeResult<(f64, f64)>
679    where
680        F: Fn(&ArrayView1<f64>) -> f64,
681    {
682        self.evaluate_configuration_with_fidelity(objective, initial_params, config, 1.0)
683    }
684
685    /// Evaluate configuration with specified fidelity
686    fn evaluate_configuration_with_fidelity<F>(
687        &self,
688        objective: &F,
689        initial_params: &ArrayView1<f64>,
690        config: &HyperparameterConfig,
691        fidelity: f64,
692    ) -> OptimizeResult<(f64, f64)>
693    where
694        F: Fn(&ArrayView1<f64>) -> f64,
695    {
696        // Create optimizer with specified configuration
697        let optimizer_result =
698            self.create_optimizer_from_config(config, objective, initial_params, fidelity)?;
699
700        // Compute cost based on fidelity
701        let base_cost = 1.0;
702        let cost = base_cost * self.multi_fidelity_evaluator.cost_model.base_cost * fidelity;
703
704        Ok((optimizer_result.fun, cost))
705    }
706
707    /// Create optimizer from configuration
708    fn create_optimizer_from_config<F>(
709        &self,
710        config: &HyperparameterConfig,
711        objective: &F,
712        initial_params: &ArrayView1<f64>,
713        fidelity: f64,
714    ) -> OptimizeResult<OptimizeResults<f64>>
715    where
716        F: Fn(&ArrayView1<f64>) -> f64,
717    {
718        // Extract optimization parameters from config
719        let learning_rate = match config.parameters.get("learning_rate") {
720            Some(ParameterValue::Continuous(lr)) => *lr,
721            _ => 0.01,
722        };
723
724        let max_nit = match config.parameters.get("max_nit") {
725            Some(ParameterValue::Discrete(iters)) => (*iters as f64 * fidelity) as usize,
726            _ => (100.0 * fidelity) as usize,
727        };
728
729        // Simple optimization with extracted parameters
730        let mut current_params = initial_params.to_owned();
731        let mut best_value = objective(initial_params);
732
733        for iter in 0..max_nit {
734            // Compute gradient
735            let h = 1e-6;
736            let f0 = objective(&current_params.view());
737            let mut gradient = Array1::zeros(current_params.len());
738
739            for i in 0..current_params.len() {
740                let mut params_plus = current_params.clone();
741                params_plus[i] += h;
742                let f_plus = objective(&params_plus.view());
743                gradient[i] = (f_plus - f0) / h;
744            }
745
746            // Update parameters
747            for i in 0..current_params.len() {
748                current_params[i] -= learning_rate * gradient[i];
749            }
750
751            let current_value = objective(&current_params.view());
752            if current_value < best_value {
753                best_value = current_value;
754            }
755
756            // Early stopping for low fidelity
757            if fidelity < 1.0 && iter > (max_nit / 2) {
758                break;
759            }
760        }
761
762        Ok(OptimizeResults::<f64> {
763            x: current_params,
764            fun: best_value,
765            success: true,
766            nit: max_nit,
767            message: "Hyperparameter evaluation completed".to_string(),
768            jac: None,
769            hess: None,
770            constr: None,
771            nfev: max_nit,
772            njev: 0,
773            nhev: 0,
774            maxcv: 0,
775            status: 0,
776        })
777    }
778
779    /// Add evaluation record to database
780    fn add_evaluation_record(
781        &mut self,
782        config: HyperparameterConfig,
783        performance: f64,
784        cost: f64,
785        problem_features: &Array1<f64>,
786    ) -> OptimizeResult<()> {
787        let record = EvaluationRecord {
788            config,
789            performance,
790            cost,
791            timestamp: std::time::SystemTime::now()
792                .duration_since(std::time::UNIX_EPOCH)
793                .unwrap_or_default()
794                .as_secs(),
795            problem_features: problem_features.clone(),
796            fidelity: 1.0,
797            additional_metrics: HashMap::new(),
798        };
799
800        self.performance_database.add_record(record);
801        Ok(())
802    }
803
804    /// Update Gaussian process with new data
805    fn update_gaussian_process(&mut self) -> OptimizeResult<()> {
806        // Extract training data from database
807        let (inputs, outputs) = self.extract_training_data()?;
808
809        // Update GP
810        self.bayesian_optimizer
811            .gaussian_process
812            .update_training_data(inputs, outputs)?;
813
814        // Optimize hyperparameters
815        self.bayesian_optimizer
816            .gaussian_process
817            .optimize_hyperparameters()?;
818
819        Ok(())
820    }
821
822    /// Extract training data from database
823    fn extract_training_data(&self) -> OptimizeResult<(Array2<f64>, Array1<f64>)> {
824        let num_records = self.performance_database.records.len();
825        if num_records == 0 {
826            return Ok((Array2::zeros((0, 10)), Array1::zeros(0)));
827        }
828
829        let input_dim = self.performance_database.records[0].config.embedding.len();
830        let mut inputs = Array2::zeros((num_records, input_dim));
831        let mut outputs = Array1::zeros(num_records);
832
833        for (i, record) in self.performance_database.records.iter().enumerate() {
834            for j in 0..input_dim.min(record.config.embedding.len()) {
835                inputs[[i, j]] = record.config.embedding[j];
836            }
837            outputs[i] = record.performance;
838        }
839
840        Ok((inputs, outputs))
841    }
842
843    /// Select next configuration to evaluate
844    fn select_next_configuration(
845        &self,
846        _problem_features: &Array1<f64>,
847    ) -> OptimizeResult<HyperparameterConfig> {
848        // Use acquisition function to select next point
849        let candidate_configs = self.generate_candidate_configurations(100)?;
850        let mut best_config = candidate_configs[0].clone();
851        let mut best_acquisition = f64::NEG_INFINITY;
852
853        for config in candidate_configs {
854            let acquisition_value = self.evaluate_acquisition_function(&config)?;
855            if acquisition_value > best_acquisition {
856                best_acquisition = acquisition_value;
857                best_config = config;
858            }
859        }
860
861        Ok(best_config)
862    }
863
864    /// Generate candidate configurations
865    fn generate_candidate_configurations(
866        &self,
867        num_candidates: usize,
868    ) -> OptimizeResult<Vec<HyperparameterConfig>> {
869        let mut candidates = Vec::new();
870
871        for _ in 0..num_candidates {
872            candidates.push(self.sample_random_configuration()?);
873        }
874
875        Ok(candidates)
876    }
877
878    /// Evaluate acquisition function
879    fn evaluate_acquisition_function(&self, config: &HyperparameterConfig) -> OptimizeResult<f64> {
880        // Predict mean and variance using GP
881        let (mean, variance) = self
882            .bayesian_optimizer
883            .gaussian_process
884            .predict(&config.embedding)?;
885
886        // Compute acquisition function value
887        let acquisition_value = match &self.bayesian_optimizer.acquisition_function {
888            AcquisitionFunction::ExpectedImprovement { xi } => {
889                let best_value = self.get_best_performance();
890                let improvement = best_value - mean;
891                let std_dev = variance.sqrt();
892
893                if std_dev > 1e-8 {
894                    let z = (improvement + xi) / std_dev;
895                    improvement * self.normal_cdf(z) + std_dev * self.normal_pdf(z)
896                } else {
897                    0.0
898                }
899            }
900            AcquisitionFunction::UpperConfidenceBound { beta } => mean + beta * variance.sqrt(),
901            _ => mean + variance.sqrt(), // Default UCB
902        };
903
904        Ok(acquisition_value)
905    }
906
907    /// Normal CDF approximation
908    fn normal_cdf(&self, x: f64) -> f64 {
909        // Approximation of error function for Gaussian CDF
910        // Using tanh approximation: erf(x) ≈ tanh(√(π/2) * x)
911        let sqrt_pi_over_2 = (std::f64::consts::PI / 2.0).sqrt();
912        0.5 * (1.0 + (sqrt_pi_over_2 * x / 2.0_f64.sqrt()).tanh())
913    }
914
915    /// Normal PDF
916    fn normal_pdf(&self, x: f64) -> f64 {
917        (1.0 / (2.0 * std::f64::consts::PI).sqrt()) * (-0.5 * x * x).exp()
918    }
919
920    /// Get best performance from database
921    fn get_best_performance(&self) -> f64 {
922        self.performance_database
923            .records
924            .iter()
925            .map(|r| r.performance)
926            .fold(f64::INFINITY, |a, b| a.min(b))
927    }
928
929    /// Select fidelity level for evaluation
930    fn select_fidelity_level(
931        &self,
932        _config: &HyperparameterConfig,
933        remaining_budget: f64,
934    ) -> OptimizeResult<f64> {
935        match &self.multi_fidelity_evaluator.selection_strategy {
936            FidelitySelectionStrategy::Static(fidelity) => Ok(*fidelity),
937            FidelitySelectionStrategy::Adaptive {
938                initial_fidelity,
939                adaptation_rate: _,
940            } => {
941                // Simple adaptive strategy based on remaining _budget
942                let budget_ratio = remaining_budget / self.tuning_stats.total_cost.max(1.0);
943                Ok(initial_fidelity * budget_ratio.max(0.1).min(1.0))
944            }
945            _ => Ok(0.5), // Default medium fidelity
946        }
947    }
948
949    /// Update tuning statistics
950    fn update_tuning_stats(&mut self, performance: f64, cost: f64) -> OptimizeResult<()> {
951        self.tuning_stats.total_evaluations += 1;
952        self.tuning_stats.total_cost += cost;
953
954        if performance < self.tuning_stats.best_performance {
955            self.tuning_stats.best_performance = performance;
956        }
957
958        // Update convergence rate (simplified)
959        if self.tuning_stats.total_evaluations > 1 {
960            let improvement_rate = (self.tuning_stats.best_performance - performance)
961                / self.tuning_stats.total_evaluations as f64;
962            self.tuning_stats.convergence_rate = improvement_rate.max(0.0);
963        }
964
965        Ok(())
966    }
967
968    /// Check convergence criteria
969    fn check_convergence(&self) -> bool {
970        // Simple convergence check
971        self.tuning_stats.total_evaluations > 50 && self.tuning_stats.convergence_rate < 1e-6
972    }
973
974    /// Get tuning statistics
975    pub fn get_tuning_stats(&self) -> &HyperparameterTuningStats {
976        &self.tuning_stats
977    }
978}
979
980impl HyperparameterSpace {
981    /// Create default hyperparameter space for optimization
982    pub fn create_default_space() -> Self {
983        let continuous_params = vec![
984            ContinuousHyperparameter {
985                name: "learning_rate".to_string(),
986                lower_bound: 1e-5,
987                upper_bound: 1.0,
988                scale: ParameterScale::Logarithmic,
989                default_value: 0.01,
990                importance_score: 1.0,
991            },
992            ContinuousHyperparameter {
993                name: "momentum".to_string(),
994                lower_bound: 0.0,
995                upper_bound: 0.99,
996                scale: ParameterScale::Linear,
997                default_value: 0.9,
998                importance_score: 0.8,
999            },
1000            ContinuousHyperparameter {
1001                name: "weight_decay".to_string(),
1002                lower_bound: 1e-8,
1003                upper_bound: 1e-2,
1004                scale: ParameterScale::Logarithmic,
1005                default_value: 1e-4,
1006                importance_score: 0.6,
1007            },
1008        ];
1009
1010        let discrete_params = vec![
1011            DiscreteHyperparameter {
1012                name: "max_nit".to_string(),
1013                values: vec![10, 50, 100, 500, 1000],
1014                default_value: 100,
1015                importance_score: 0.9,
1016            },
1017            DiscreteHyperparameter {
1018                name: "batch_size".to_string(),
1019                values: vec![1, 8, 16, 32, 64, 128],
1020                default_value: 32,
1021                importance_score: 0.7,
1022            },
1023        ];
1024
1025        let categorical_params = vec![CategoricalHyperparameter {
1026            name: "optimizer_type".to_string(),
1027            categories: vec!["sgd".to_string(), "adam".to_string(), "lbfgs".to_string()],
1028            default_category: "adam".to_string(),
1029            category_embeddings: HashMap::new(),
1030            importance_score: 1.0,
1031        }];
1032
1033        Self {
1034            continuous_params,
1035            discrete_params,
1036            categorical_params,
1037            conditional_dependencies: Vec::new(),
1038            parameter_bounds: HashMap::new(),
1039        }
1040    }
1041}
1042
1043impl HyperparameterConfig {
1044    /// Create new hyperparameter configuration
1045    pub fn new(parameters: HashMap<String, ParameterValue>) -> Self {
1046        let config_hash = Self::compute_hash(&parameters);
1047        let embedding = Self::compute_embedding(&parameters);
1048
1049        Self {
1050            parameters,
1051            config_hash,
1052            embedding,
1053        }
1054    }
1055
1056    /// Compute hash for configuration
1057    fn compute_hash(parameters: &HashMap<String, ParameterValue>) -> u64 {
1058        // Simplified hash computation
1059        let mut hash = 0u64;
1060        for (key, value) in parameters {
1061            hash ^= Self::hash_string(key);
1062            hash ^= Self::hash_parameter_value(value);
1063        }
1064        hash
1065    }
1066
1067    /// Hash string
1068    fn hash_string(s: &str) -> u64 {
1069        // Simple string hash
1070        s.bytes().fold(0u64, |hash, byte| {
1071            hash.wrapping_mul(31).wrapping_add(byte as u64)
1072        })
1073    }
1074
1075    /// Hash parameter value
1076    fn hash_parameter_value(value: &ParameterValue) -> u64 {
1077        match value {
1078            ParameterValue::Continuous(v) => v.to_bits(),
1079            ParameterValue::Discrete(v) => *v as u64,
1080            ParameterValue::Categorical(s) => Self::hash_string(s),
1081        }
1082    }
1083
1084    /// Compute embedding for configuration
1085    fn compute_embedding(parameters: &HashMap<String, ParameterValue>) -> Array1<f64> {
1086        let mut embedding = Array1::zeros(32); // Fixed embedding size
1087
1088        let mut idx = 0;
1089        for (_, value) in parameters {
1090            if idx >= embedding.len() {
1091                break;
1092            }
1093
1094            match value {
1095                ParameterValue::Continuous(v) => {
1096                    embedding[idx] = v.tanh();
1097                    idx += 1;
1098                }
1099                ParameterValue::Discrete(v) => {
1100                    embedding[idx] = (*v as f64 / 100.0).tanh();
1101                    idx += 1;
1102                }
1103                ParameterValue::Categorical(s) => {
1104                    // Simple categorical encoding
1105                    let hash_val = Self::hash_string(s) as f64 / u64::MAX as f64;
1106                    embedding[idx] = (hash_val * 2.0 - 1.0).tanh();
1107                    idx += 1;
1108                }
1109            }
1110        }
1111
1112        embedding
1113    }
1114}
1115
1116impl PerformanceDatabase {
1117    /// Create new performance database
1118    pub fn new() -> Self {
1119        Self {
1120            records: Vec::new(),
1121            index: HashMap::new(),
1122            performance_trends: HashMap::new(),
1123            correlation_matrix: Array2::zeros((0, 0)),
1124        }
1125    }
1126
1127    /// Add evaluation record
1128    pub fn add_record(&mut self, record: EvaluationRecord) {
1129        self.records.push(record);
1130
1131        // Update index (simplified)
1132        let record_idx = self.records.len() - 1;
1133        self.index
1134            .entry("all".to_string())
1135            .or_insert_with(Vec::new)
1136            .push(record_idx);
1137    }
1138}
1139
1140impl BayesianOptimizer {
1141    /// Create new Bayesian optimizer
1142    pub fn new() -> Self {
1143        Self {
1144            gaussian_process: GaussianProcess::new(),
1145            acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
1146            optimization_strategy: OptimizationStrategy::RandomSearch {
1147                num_candidates: 100,
1148            },
1149            exploration_factor: 0.1,
1150        }
1151    }
1152}
1153
1154impl GaussianProcess {
1155    /// Create new Gaussian process
1156    pub fn new() -> Self {
1157        Self {
1158            training_inputs: Array2::zeros((0, 0)),
1159            training_outputs: Array1::zeros(0),
1160            kernel: KernelFunction::RBF {
1161                length_scale: 1.0,
1162                variance: 1.0,
1163            },
1164            kernel_params: Array1::from(vec![1.0, 1.0]),
1165            noise_variance: 0.1,
1166            mean_function: MeanFunction::Zero,
1167        }
1168    }
1169
1170    /// Update training data
1171    pub fn update_training_data(
1172        &mut self,
1173        inputs: Array2<f64>,
1174        outputs: Array1<f64>,
1175    ) -> OptimizeResult<()> {
1176        self.training_inputs = inputs;
1177        self.training_outputs = outputs;
1178        Ok(())
1179    }
1180
1181    /// Optimize hyperparameters
1182    pub fn optimize_hyperparameters(&mut self) -> OptimizeResult<()> {
1183        // Simplified hyperparameter optimization
1184        // In practice, would use marginal likelihood optimization
1185        Ok(())
1186    }
1187
1188    /// Predict mean and variance
1189    pub fn predict(&self, input: &Array1<f64>) -> OptimizeResult<(f64, f64)> {
1190        if self.training_inputs.is_empty() {
1191            return Ok((0.0, 1.0));
1192        }
1193
1194        // Simplified GP prediction
1195        let mean = 0.0; // Would compute proper posterior mean
1196        let variance = 1.0; // Would compute proper posterior variance
1197
1198        Ok((mean, variance))
1199    }
1200}
1201
1202impl MultiFidelityEvaluator {
1203    /// Create new multi-fidelity evaluator
1204    pub fn new() -> Self {
1205        let fidelity_levels = vec![
1206            FidelityLevel {
1207                fidelity: 0.1,
1208                cost_multiplier: 0.1,
1209                accuracy: 0.7,
1210                resource_requirements: ResourceRequirements {
1211                    computation_time: 1.0,
1212                    memory_usage: 0.5,
1213                    cpu_cores: 1,
1214                    gpu_required: false,
1215                },
1216            },
1217            FidelityLevel {
1218                fidelity: 0.5,
1219                cost_multiplier: 0.5,
1220                accuracy: 0.9,
1221                resource_requirements: ResourceRequirements {
1222                    computation_time: 5.0,
1223                    memory_usage: 1.0,
1224                    cpu_cores: 2,
1225                    gpu_required: false,
1226                },
1227            },
1228            FidelityLevel {
1229                fidelity: 1.0,
1230                cost_multiplier: 1.0,
1231                accuracy: 1.0,
1232                resource_requirements: ResourceRequirements {
1233                    computation_time: 10.0,
1234                    memory_usage: 2.0,
1235                    cpu_cores: 4,
1236                    gpu_required: true,
1237                },
1238            },
1239        ];
1240
1241        Self {
1242            fidelity_levels,
1243            cost_model: CostModel::new(),
1244            selection_strategy: FidelitySelectionStrategy::Adaptive {
1245                initial_fidelity: 0.5,
1246                adaptation_rate: 0.1,
1247            },
1248            correlation_estimator: FidelityCorrelationEstimator::new(),
1249        }
1250    }
1251}
1252
1253impl CostModel {
1254    /// Create new cost model
1255    pub fn new() -> Self {
1256        Self {
1257            cost_network: Array2::from_shape_fn((1, 10), |_| {
1258                (rand::rng().random::<f64>() - 0.5) * 0.1
1259            }),
1260            base_cost: 1.0,
1261            scaling_factors: Array1::ones(5),
1262            cost_history: VecDeque::with_capacity(1000),
1263        }
1264    }
1265}
1266
1267impl FidelityCorrelationEstimator {
1268    /// Create new correlation estimator
1269    pub fn new() -> Self {
1270        Self {
1271            correlation_matrix: Array2::eye(3),
1272            estimation_method: CorrelationMethod::Pearson,
1273            confidence_intervals: Array2::zeros((3, 2)),
1274        }
1275    }
1276}
1277
1278impl Default for HyperparameterTuningStats {
1279    fn default() -> Self {
1280        Self {
1281            total_evaluations: 0,
1282            best_performance: f64::INFINITY,
1283            total_cost: 0.0,
1284            convergence_rate: 0.0,
1285            exploration_efficiency: 0.0,
1286            multi_fidelity_savings: 0.0,
1287        }
1288    }
1289}
1290
1291impl LearnedOptimizer for LearnedHyperparameterTuner {
1292    fn meta_train(&mut self, training_tasks: &[TrainingTask]) -> OptimizeResult<()> {
1293        for task in training_tasks {
1294            // Create simple objective for training
1295            let training_objective = |x: &ArrayView1<f64>| x.iter().map(|&xi| xi * xi).sum::<f64>();
1296
1297            let initial_params = Array1::zeros(task.problem.dimension);
1298
1299            // Tune hyperparameters for this task
1300            let _best_config = self.tune_hyperparameters(
1301                training_objective,
1302                &initial_params.view(),
1303                &task.problem,
1304                10.0,
1305            )?;
1306        }
1307
1308        Ok(())
1309    }
1310
1311    fn adapt_to_problem(
1312        &mut self,
1313        problem: &OptimizationProblem,
1314        initial_params: &ArrayView1<f64>,
1315    ) -> OptimizeResult<()> {
1316        // Extract problem features for future configuration selection
1317        let simple_objective = |_x: &ArrayView1<f64>| 0.0;
1318        let _problem_features =
1319            self.extract_problem_features(&simple_objective, initial_params, problem)?;
1320
1321        Ok(())
1322    }
1323
1324    fn optimize<F>(
1325        &mut self,
1326        objective: F,
1327        initial_params: &ArrayView1<f64>,
1328    ) -> OptimizeResult<OptimizeResults<f64>>
1329    where
1330        F: Fn(&ArrayView1<f64>) -> f64,
1331    {
1332        // Create default problem for hyperparameter tuning
1333        let default_problem = OptimizationProblem {
1334            name: "hyperparameter_tuning".to_string(),
1335            dimension: initial_params.len(),
1336            problem_class: "general".to_string(),
1337            metadata: HashMap::new(),
1338            max_evaluations: 1000,
1339            target_accuracy: 1e-6,
1340        };
1341
1342        // Tune hyperparameters
1343        let best_config =
1344            self.tune_hyperparameters(&objective, initial_params, &default_problem, 20.0)?;
1345
1346        // Use best configuration for final optimization
1347        self.create_optimizer_from_config(&best_config, &objective, initial_params, 1.0)
1348    }
1349
1350    fn get_state(&self) -> &MetaOptimizerState {
1351        &self.meta_state
1352    }
1353
1354    fn reset(&mut self) {
1355        self.performance_database = PerformanceDatabase::new();
1356        self.tuning_stats = HyperparameterTuningStats::default();
1357    }
1358}
1359
1360/// Convenience function for learned hyperparameter tuning
1361#[allow(dead_code)]
1362pub fn hyperparameter_tuning_optimize<F>(
1363    objective: F,
1364    initial_params: &ArrayView1<f64>,
1365    config: Option<LearnedOptimizationConfig>,
1366) -> super::OptimizeResult<OptimizeResults<f64>>
1367where
1368    F: Fn(&ArrayView1<f64>) -> f64,
1369{
1370    let config = config.unwrap_or_default();
1371    let mut tuner = LearnedHyperparameterTuner::new(config);
1372    tuner.optimize(objective, initial_params)
1373}
1374
1375#[cfg(test)]
1376mod tests {
1377    use super::*;
1378
1379    #[test]
1380    fn test_hyperparameter_tuner_creation() {
1381        let config = LearnedOptimizationConfig::default();
1382        let tuner = LearnedHyperparameterTuner::new(config);
1383
1384        assert_eq!(tuner.tuning_stats.total_evaluations, 0);
1385        assert!(!tuner.hyperparameter_space.continuous_params.is_empty());
1386    }
1387
1388    #[test]
1389    fn test_hyperparameter_space() {
1390        let space = HyperparameterSpace::create_default_space();
1391
1392        assert!(!space.continuous_params.is_empty());
1393        assert!(!space.discrete_params.is_empty());
1394        assert!(!space.categorical_params.is_empty());
1395    }
1396
1397    #[test]
1398    fn test_hyperparameter_config() {
1399        let mut parameters = HashMap::new();
1400        parameters.insert(
1401            "learning_rate".to_string(),
1402            ParameterValue::Continuous(0.01),
1403        );
1404        parameters.insert("max_nit".to_string(), ParameterValue::Discrete(100));
1405        parameters.insert(
1406            "optimizer_type".to_string(),
1407            ParameterValue::Categorical("adam".to_string()),
1408        );
1409
1410        let config = HyperparameterConfig::new(parameters);
1411
1412        assert!(config.config_hash != 0);
1413        assert_eq!(config.embedding.len(), 32);
1414        assert!(config.embedding.iter().all(|&x| x.is_finite()));
1415    }
1416
1417    #[test]
1418    fn test_problem_similarity() {
1419        let config = LearnedOptimizationConfig::default();
1420        let tuner = LearnedHyperparameterTuner::new(config);
1421
1422        let features1 = Array1::from(vec![1.0, 0.0, 0.0]);
1423        let features2 = Array1::from(vec![0.0, 1.0, 0.0]);
1424        let features3 = Array1::from(vec![1.0, 0.1, 0.1]);
1425
1426        let sim1 = tuner
1427            .compute_problem_similarity(&features1, &features2)
1428            .unwrap();
1429        let sim2 = tuner
1430            .compute_problem_similarity(&features1, &features3)
1431            .unwrap();
1432
1433        assert!(sim2 > sim1); // features3 should be more similar to features1
1434    }
1435
1436    #[test]
1437    fn test_gaussian_process() {
1438        let mut gp = GaussianProcess::new();
1439
1440        let inputs = Array2::from_shape_fn((3, 2), |_| rand::rng().random::<f64>());
1441        let outputs = Array1::from(vec![1.0, 2.0, 3.0]);
1442
1443        gp.update_training_data(inputs, outputs).unwrap();
1444
1445        let test_input = Array1::from(vec![0.5, 0.5]);
1446        let (mean, variance) = gp.predict(&test_input).unwrap();
1447
1448        assert!(mean.is_finite());
1449        assert!(variance >= 0.0);
1450    }
1451
1452    #[test]
1453    fn test_hyperparameter_tuning_optimization() {
1454        let objective = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
1455        let initial = Array1::from(vec![2.0, 2.0]);
1456
1457        let config = LearnedOptimizationConfig {
1458            hidden_size: 32,
1459            ..Default::default()
1460        };
1461
1462        let result =
1463            hyperparameter_tuning_optimize(objective, &initial.view(), Some(config)).unwrap();
1464
1465        assert!(result.fun >= 0.0);
1466        assert_eq!(result.x.len(), 2);
1467        assert!(result.success);
1468    }
1469}
1470
1471#[allow(dead_code)]
1472pub fn placeholder() {
1473    // Placeholder function to prevent unused module warnings
1474}
scirs2_optimize/learned_optimizers/learned_hyperparameter_tuner.rs

scirs2_optimize/learned_optimizers/
learned_hyperparameter_tuner.rs