Skip to main content

torsh_optim/
bayesian_optimization.rs

1//! Bayesian Optimization for Hyperparameter Tuning
2//!
3//! This module implements Bayesian optimization algorithms for efficient optimization
4//! of expensive black-box functions. It's particularly useful for hyperparameter
5//! optimization where function evaluations are costly.
6//!
7//! Key features:
8//! - Gaussian Process surrogate models
9//! - Various acquisition functions (EI, PI, UCB, etc.)
10//! - Sequential model-based optimization
11//! - Multi-objective optimization support
12//! - Constraint handling
13//! - Parallel evaluation support
14
15use crate::{OptimizerError, OptimizerResult};
16use scirs2_core::RngExt;
17use std::collections::HashMap;
18use std::sync::Arc;
19use torsh_core::device::CpuDevice;
20
21/// Trait for objective functions in Bayesian optimization
22pub trait ObjectiveFunction: Send + Sync {
23    /// Evaluate the objective function (to be minimized)
24    fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32>;
25
26    /// Get the dimension of the parameter space
27    fn dimension(&self) -> usize;
28
29    /// Get parameter bounds (required for Bayesian optimization)
30    fn bounds(&self) -> (Vec<f32>, Vec<f32>);
31
32    /// Get the name of the objective function
33    fn name(&self) -> &str {
34        "Unknown"
35    }
36
37    /// Whether the function is noisy (affects GP model)
38    fn is_noisy(&self) -> bool {
39        false
40    }
41}
42
43/// Configuration for Bayesian optimization
44#[derive(Debug, Clone)]
45pub struct BayesianOptConfig {
46    /// Maximum number of function evaluations
47    pub max_evaluations: usize,
48    /// Number of initial random evaluations
49    pub initial_random_evaluations: usize,
50    /// Acquisition function to use
51    pub acquisition_function: AcquisitionFunction,
52    /// Number of optimization restarts for acquisition function
53    pub acquisition_restarts: usize,
54    /// Gaussian Process hyperparameters
55    pub gp_config: GaussianProcessConfig,
56    /// Random seed for reproducibility
57    pub seed: Option<u64>,
58    /// Device for computations
59    pub device: Arc<CpuDevice>,
60    /// Verbose output
61    pub verbose: bool,
62}
63
64impl Default for BayesianOptConfig {
65    fn default() -> Self {
66        Self {
67            max_evaluations: 100,
68            initial_random_evaluations: 10,
69            acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
70            acquisition_restarts: 10,
71            gp_config: GaussianProcessConfig::default(),
72            seed: None,
73            device: Arc::new(CpuDevice::new()),
74            verbose: false,
75        }
76    }
77}
78
79/// Available acquisition functions
80#[derive(Debug, Clone)]
81pub enum AcquisitionFunction {
82    /// Expected Improvement
83    ExpectedImprovement { xi: f32 },
84    /// Probability of Improvement
85    ProbabilityOfImprovement { xi: f32 },
86    /// Upper Confidence Bound
87    UpperConfidenceBound { kappa: f32 },
88    /// Thompson Sampling
89    ThompsonSampling,
90    /// Entropy Search
91    EntropySearch,
92}
93
94/// Gaussian Process configuration
95#[derive(Debug, Clone)]
96pub struct GaussianProcessConfig {
97    /// Kernel type
98    pub kernel: KernelType,
99    /// Noise level
100    pub noise_level: f32,
101    /// Length scale bounds for optimization
102    pub length_scale_bounds: (f32, f32),
103    /// Signal variance bounds
104    pub signal_variance_bounds: (f32, f32),
105    /// Whether to optimize hyperparameters
106    pub optimize_hyperparameters: bool,
107}
108
109impl Default for GaussianProcessConfig {
110    fn default() -> Self {
111        Self {
112            kernel: KernelType::RBF { length_scale: 1.0 },
113            noise_level: 1e-6,
114            length_scale_bounds: (1e-3, 1e3),
115            signal_variance_bounds: (1e-3, 1e3),
116            optimize_hyperparameters: true,
117        }
118    }
119}
120
121/// Available kernel types
122#[derive(Debug, Clone)]
123pub enum KernelType {
124    /// Radial Basis Function (RBF) / Squared Exponential
125    RBF { length_scale: f32 },
126    /// Matern kernel with nu = 1.5
127    Matern15 { length_scale: f32 },
128    /// Matern kernel with nu = 2.5
129    Matern25 { length_scale: f32 },
130    /// Linear kernel
131    Linear { variance: f32 },
132    /// Rational Quadratic kernel
133    RationalQuadratic { length_scale: f32, alpha: f32 },
134}
135
136/// Data point in the optimization history
137#[derive(Debug, Clone)]
138pub struct DataPoint {
139    /// Parameter values
140    pub parameters: Vec<f32>,
141    /// Objective function value
142    pub value: f32,
143    /// Evaluation time (optional)
144    pub evaluation_time: Option<std::time::Duration>,
145}
146
147/// Result from Bayesian optimization
148#[derive(Debug, Clone)]
149pub struct BayesianOptResult {
150    /// Best point found
151    pub best_point: DataPoint,
152    /// All evaluated points
153    pub history: Vec<DataPoint>,
154    /// Number of function evaluations
155    pub evaluations: usize,
156    /// Whether optimization converged
157    pub converged: bool,
158    /// Convergence reason
159    pub convergence_reason: String,
160    /// Acquisition function values at each iteration
161    pub acquisition_history: Vec<f32>,
162}
163
164/// Simplified Gaussian Process implementation
165#[derive(Debug)]
166pub struct GaussianProcess {
167    config: GaussianProcessConfig,
168    /// Training inputs (X)
169    x_train: Vec<Vec<f32>>,
170    /// Training outputs (y)
171    y_train: Vec<f32>,
172    /// Gram matrix (K + noise*I)
173    gram_matrix: Vec<Vec<f32>>,
174    /// Cholesky decomposition of gram matrix
175    chol_gram: Vec<Vec<f32>>,
176    /// Alpha vector for predictions
177    alpha: Vec<f32>,
178    /// Mean of training data
179    y_mean: f32,
180    /// Standard deviation of training data
181    y_std: f32,
182    /// Whether the model has been fitted
183    fitted: bool,
184}
185
186impl GaussianProcess {
187    pub fn new(config: GaussianProcessConfig) -> Self {
188        Self {
189            config,
190            x_train: Vec::new(),
191            y_train: Vec::new(),
192            gram_matrix: Vec::new(),
193            chol_gram: Vec::new(),
194            alpha: Vec::new(),
195            y_mean: 0.0,
196            y_std: 1.0,
197            fitted: false,
198        }
199    }
200
201    pub fn fit(&mut self, x_train: Vec<Vec<f32>>, y_train: Vec<f32>) -> OptimizerResult<()> {
202        if x_train.is_empty() || y_train.is_empty() || x_train.len() != y_train.len() {
203            return Err(OptimizerError::InvalidInput(
204                "Invalid training data".to_string(),
205            ));
206        }
207
208        self.x_train = x_train;
209        self.y_train = y_train;
210
211        // Normalize targets
212        self.y_mean = self.y_train.iter().sum::<f32>() / self.y_train.len() as f32;
213        let var = self
214            .y_train
215            .iter()
216            .map(|&y| (y - self.y_mean).powi(2))
217            .sum::<f32>()
218            / self.y_train.len() as f32;
219        self.y_std = var.sqrt().max(1e-6);
220
221        let y_normalized: Vec<f32> = self
222            .y_train
223            .iter()
224            .map(|&y| (y - self.y_mean) / self.y_std)
225            .collect();
226
227        // Build gram matrix
228        let n = self.x_train.len();
229        self.gram_matrix = vec![vec![0.0; n]; n];
230
231        for i in 0..n {
232            for j in 0..n {
233                self.gram_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
234                if i == j {
235                    self.gram_matrix[i][j] += self.config.noise_level;
236                }
237            }
238        }
239
240        // Cholesky decomposition (simplified)
241        self.chol_gram = self.cholesky_decomposition(&self.gram_matrix)?;
242
243        // Solve for alpha
244        self.alpha = self.solve_triangular(&self.chol_gram, &y_normalized)?;
245
246        self.fitted = true;
247        Ok(())
248    }
249
250    pub fn predict(&self, x_test: &[f32]) -> OptimizerResult<(f32, f32)> {
251        if !self.fitted {
252            return Err(OptimizerError::InvalidInput("Model not fitted".to_string()));
253        }
254
255        // Compute kernel vector
256        let k_star: Vec<f32> = self
257            .x_train
258            .iter()
259            .map(|x_train| self.kernel(x_test, x_train))
260            .collect();
261
262        // Predictive mean
263        let mut mean = 0.0;
264        for i in 0..self.alpha.len() {
265            mean += self.alpha[i] * k_star[i];
266        }
267
268        // Denormalize
269        mean = mean * self.y_std + self.y_mean;
270
271        // Predictive variance (simplified)
272        let k_star_star = self.kernel(x_test, x_test);
273        let v = self.solve_triangular(&self.chol_gram, &k_star)?;
274        let var = k_star_star - v.iter().map(|x| x * x).sum::<f32>();
275        let std = (var.max(0.0).sqrt() * self.y_std).max(1e-6);
276
277        Ok((mean, std))
278    }
279
280    fn kernel(&self, x1: &[f32], x2: &[f32]) -> f32 {
281        match &self.config.kernel {
282            KernelType::RBF { length_scale } => {
283                let dist_sq = x1
284                    .iter()
285                    .zip(x2.iter())
286                    .map(|(a, b)| (a - b).powi(2))
287                    .sum::<f32>();
288                (-0.5 * dist_sq / length_scale.powi(2)).exp()
289            }
290            KernelType::Matern15 { length_scale } => {
291                let dist = x1
292                    .iter()
293                    .zip(x2.iter())
294                    .map(|(a, b)| (a - b).powi(2))
295                    .sum::<f32>()
296                    .sqrt();
297                let scaled_dist = dist * 3.0_f32.sqrt() / length_scale;
298                (1.0 + scaled_dist) * (-scaled_dist).exp()
299            }
300            KernelType::Matern25 { length_scale } => {
301                let dist = x1
302                    .iter()
303                    .zip(x2.iter())
304                    .map(|(a, b)| (a - b).powi(2))
305                    .sum::<f32>()
306                    .sqrt();
307                let scaled_dist = dist * 5.0_f32.sqrt() / length_scale;
308                (1.0 + scaled_dist + scaled_dist.powi(2) / 3.0) * (-scaled_dist).exp()
309            }
310            KernelType::Linear { variance } => {
311                variance * x1.iter().zip(x2.iter()).map(|(a, b)| a * b).sum::<f32>()
312            }
313            KernelType::RationalQuadratic {
314                length_scale,
315                alpha,
316            } => {
317                let dist_sq = x1
318                    .iter()
319                    .zip(x2.iter())
320                    .map(|(a, b)| (a - b).powi(2))
321                    .sum::<f32>();
322                (1.0 + dist_sq / (2.0 * alpha * length_scale.powi(2))).powf(-alpha)
323            }
324        }
325    }
326
327    fn cholesky_decomposition(&self, matrix: &[Vec<f32>]) -> OptimizerResult<Vec<Vec<f32>>> {
328        let n = matrix.len();
329        let mut l = vec![vec![0.0; n]; n];
330
331        for i in 0..n {
332            for j in 0..=i {
333                if i == j {
334                    let sum: f32 = (0..j).map(|k| (l[j][k] as f32).powi(2)).sum();
335                    let val = matrix[j][j] - sum;
336                    if val <= 0.0 {
337                        return Err(OptimizerError::NumericalError(
338                            "Matrix not positive definite".to_string(),
339                        ));
340                    }
341                    l[j][j] = val.sqrt();
342                } else {
343                    let sum: f32 = (0..j).map(|k| l[i][k] * l[j][k]).sum();
344                    l[i][j] = (matrix[i][j] - sum) / l[j][j];
345                }
346            }
347        }
348
349        Ok(l)
350    }
351
352    fn solve_triangular(&self, l: &[Vec<f32>], b: &[f32]) -> OptimizerResult<Vec<f32>> {
353        let n = l.len();
354        let mut x = vec![0.0; n];
355
356        // Forward substitution
357        for i in 0..n {
358            let sum: f32 = (0..i).map(|j| l[i][j] * x[j]).sum();
359            x[i] = (b[i] - sum) / l[i][i];
360        }
361
362        // Backward substitution
363        for i in (0..n).rev() {
364            let sum: f32 = ((i + 1)..n).map(|j| l[j][i] * x[j]).sum();
365            x[i] = (x[i] - sum) / l[i][i];
366        }
367
368        Ok(x)
369    }
370}
371
372/// Bayesian optimization algorithm
373#[derive(Debug)]
374pub struct BayesianOptimizer {
375    config: BayesianOptConfig,
376    gp: GaussianProcess,
377    history: Vec<DataPoint>,
378    best_point: Option<DataPoint>,
379}
380
381impl BayesianOptimizer {
382    pub fn new(config: BayesianOptConfig) -> Self {
383        let gp = GaussianProcess::new(config.gp_config.clone());
384
385        Self {
386            config,
387            gp,
388            history: Vec::new(),
389            best_point: None,
390        }
391    }
392
393    pub fn optimize<F: ObjectiveFunction>(
394        &mut self,
395        objective: &F,
396    ) -> OptimizerResult<BayesianOptResult> {
397        let dimension = objective.dimension();
398        let (lower_bounds, upper_bounds) = objective.bounds();
399
400        if lower_bounds.len() != dimension || upper_bounds.len() != dimension {
401            return Err(OptimizerError::InvalidInput(
402                "Bounds dimension mismatch".to_string(),
403            ));
404        }
405
406        // ✅ SciRS2 Policy Compliant - Using scirs2_core::random instead of direct rand
407        use scirs2_core::random::{Random, Rng};
408
409        let mut rng = if let Some(seed) = self.config.seed {
410            Random::seed(seed)
411        } else {
412            Random::seed(0)
413        };
414
415        let mut acquisition_history = Vec::new();
416
417        // Initial random evaluations
418        for i in 0..self
419            .config
420            .initial_random_evaluations
421            .min(self.config.max_evaluations)
422        {
423            let params = self.sample_random_point(&mut rng, &lower_bounds, &upper_bounds);
424            let start_time = std::time::Instant::now();
425            let value = objective.evaluate(&params)?;
426            let evaluation_time = start_time.elapsed();
427
428            let point = DataPoint {
429                parameters: params,
430                value,
431                evaluation_time: Some(evaluation_time),
432            };
433
434            self.add_observation(point.clone());
435
436            if self.config.verbose {
437                println!(
438                    "Initial evaluation {}: f({:?}) = {:.6e}",
439                    i + 1,
440                    point.parameters,
441                    point.value
442                );
443            }
444        }
445
446        // Main optimization loop
447        let mut evaluations = self.config.initial_random_evaluations;
448
449        while evaluations < self.config.max_evaluations {
450            // Fit GP model
451            let x_train: Vec<Vec<f32>> =
452                self.history.iter().map(|p| p.parameters.clone()).collect();
453            let y_train: Vec<f32> = self.history.iter().map(|p| p.value).collect();
454
455            self.gp.fit(x_train, y_train)?;
456
457            // Optimize acquisition function
458            let next_point = self.optimize_acquisition(&mut rng, &lower_bounds, &upper_bounds)?;
459
460            // Evaluate objective at next point
461            let start_time = std::time::Instant::now();
462            let value = objective.evaluate(&next_point)?;
463            let evaluation_time = start_time.elapsed();
464
465            let point = DataPoint {
466                parameters: next_point,
467                value,
468                evaluation_time: Some(evaluation_time),
469            };
470
471            // Compute acquisition value for history
472            let acq_value = self.compute_acquisition(&point.parameters)?;
473            acquisition_history.push(acq_value);
474
475            self.add_observation(point.clone());
476            evaluations += 1;
477
478            if self.config.verbose {
479                println!(
480                    "Iteration {}: f({:?}) = {:.6e}, Best = {:.6e}",
481                    evaluations,
482                    point.parameters,
483                    point.value,
484                    self.best_point
485                        .as_ref()
486                        .expect("best_point should exist after add_observation")
487                        .value
488                );
489            }
490        }
491
492        Ok(BayesianOptResult {
493            best_point: self
494                .best_point
495                .clone()
496                .expect("best_point should exist after optimization"),
497            history: self.history.clone(),
498            evaluations,
499            converged: false, // Could implement convergence criteria
500            convergence_reason: "Maximum evaluations reached".to_string(),
501            acquisition_history,
502        })
503    }
504
505    fn add_observation(&mut self, point: DataPoint) {
506        if self.best_point.is_none()
507            || point.value
508                < self
509                    .best_point
510                    .as_ref()
511                    .expect("best_point should exist after is_none check")
512                    .value
513        {
514            self.best_point = Some(point.clone());
515        }
516        self.history.push(point);
517    }
518
519    fn sample_random_point<R: scirs2_core::random::Rng>(
520        &self,
521        rng: &mut R,
522        lower: &[f32],
523        upper: &[f32],
524    ) -> Vec<f32> {
525        (0..lower.len())
526            .map(|i| rng.random::<f32>() * (upper[i] - lower[i]) + lower[i])
527            .collect()
528    }
529
530    fn optimize_acquisition<R: scirs2_core::random::Rng>(
531        &self,
532        rng: &mut R,
533        lower: &[f32],
534        upper: &[f32],
535    ) -> OptimizerResult<Vec<f32>> {
536        let mut best_point = Vec::new();
537        let mut best_acquisition = f32::NEG_INFINITY;
538
539        // Multi-restart optimization of acquisition function
540        for _ in 0..self.config.acquisition_restarts {
541            let start_point = self.sample_random_point(rng, lower, upper);
542            let optimized_point = self.local_optimize_acquisition(start_point, lower, upper)?;
543            let acq_value = self.compute_acquisition(&optimized_point)?;
544
545            if acq_value > best_acquisition {
546                best_acquisition = acq_value;
547                best_point = optimized_point;
548            }
549        }
550
551        if best_point.is_empty() {
552            // Fallback to random sampling
553            best_point = self.sample_random_point(rng, lower, upper);
554        }
555
556        Ok(best_point)
557    }
558
559    fn local_optimize_acquisition(
560        &self,
561        start_point: Vec<f32>,
562        lower: &[f32],
563        upper: &[f32],
564    ) -> OptimizerResult<Vec<f32>> {
565        // Simple gradient-free optimization using random search
566        let mut best_point = start_point;
567        let mut best_value = self.compute_acquisition(&best_point)?;
568
569        // ✅ SciRS2 Policy Compliant - Using scirs2_core::random instead of direct rand
570        use scirs2_core::random::{Random, Rng};
571        let mut rng = Random::seed(0);
572        let step_size = 0.1;
573
574        for _ in 0..100 {
575            let mut candidate = best_point.clone();
576
577            // Random perturbation
578            for i in 0..candidate.len() {
579                let perturbation =
580                    (rng.random::<f32>() * 2.0 - 1.0) * step_size * (upper[i] - lower[i]);
581                candidate[i] = (candidate[i] + perturbation).max(lower[i]).min(upper[i]);
582            }
583
584            let value = self.compute_acquisition(&candidate)?;
585            if value > best_value {
586                best_value = value;
587                best_point = candidate;
588            }
589        }
590
591        Ok(best_point)
592    }
593
594    fn compute_acquisition(&self, point: &[f32]) -> OptimizerResult<f32> {
595        let (mean, std) = self.gp.predict(point)?;
596        let best_value = self
597            .best_point
598            .as_ref()
599            .expect("best_point should exist for acquisition computation")
600            .value;
601
602        match &self.config.acquisition_function {
603            AcquisitionFunction::ExpectedImprovement { xi } => {
604                let improvement = best_value - mean - xi;
605                if std <= 0.0 {
606                    return Ok(0.0);
607                }
608                let z = improvement / std;
609                Ok(improvement * self.normal_cdf(z) + std * self.normal_pdf(z))
610            }
611            AcquisitionFunction::ProbabilityOfImprovement { xi } => {
612                let improvement = best_value - mean - xi;
613                if std <= 0.0 {
614                    return Ok(0.0);
615                }
616                let z = improvement / std;
617                Ok(self.normal_cdf(z))
618            }
619            AcquisitionFunction::UpperConfidenceBound { kappa } => {
620                Ok(-(mean - kappa * std)) // Negative because we're maximizing acquisition but minimizing objective
621            }
622            _ => {
623                // Simplified implementations for other acquisition functions
624                Ok(-(mean - std)) // Default to UCB-like behavior
625            }
626        }
627    }
628
629    fn normal_cdf(&self, x: f32) -> f32 {
630        // Approximation of the standard normal CDF
631        0.5 * (1.0 + self.erf(x / 2.0_f32.sqrt()))
632    }
633
634    fn normal_pdf(&self, x: f32) -> f32 {
635        // Standard normal PDF
636        (2.0 * std::f32::consts::PI).sqrt().recip() * (-0.5 * x * x).exp()
637    }
638
639    fn erf(&self, x: f32) -> f32 {
640        // Approximation of the error function
641        let a1 = 0.254829592;
642        let a2 = -0.284496736;
643        let a3 = 1.421413741;
644        let a4 = -1.453152027;
645        let a5 = 1.061405429;
646        let p = 0.3275911;
647
648        let sign = if x < 0.0 { -1.0 } else { 1.0 };
649        let x = x.abs();
650
651        let t = 1.0 / (1.0 + p * x);
652        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
653
654        sign * y
655    }
656}
657
658/// Example objective functions for testing
659pub mod test_functions {
660    use super::*;
661
662    /// 1D quadratic function: f(x) = (x - 0.5)^2
663    pub struct Quadratic1D;
664
665    impl ObjectiveFunction for Quadratic1D {
666        fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32> {
667            if parameters.len() != 1 {
668                return Err(OptimizerError::InvalidInput(
669                    "Expected 1D input".to_string(),
670                ));
671            }
672            Ok((parameters[0] - 0.5).powi(2))
673        }
674
675        fn dimension(&self) -> usize {
676            1
677        }
678
679        fn bounds(&self) -> (Vec<f32>, Vec<f32>) {
680            (vec![0.0], vec![1.0])
681        }
682
683        fn name(&self) -> &str {
684            "Quadratic 1D"
685        }
686    }
687
688    /// 2D Branin function
689    pub struct Branin;
690
691    impl ObjectiveFunction for Branin {
692        fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32> {
693            if parameters.len() != 2 {
694                return Err(OptimizerError::InvalidInput(
695                    "Expected 2D input".to_string(),
696                ));
697            }
698
699            let x1 = parameters[0];
700            let x2 = parameters[1];
701
702            let a = 1.0;
703            let b = 5.1 / (4.0 * std::f32::consts::PI.powi(2));
704            let c = 5.0 / std::f32::consts::PI;
705            let r = 6.0;
706            let s = 10.0;
707            let t = 1.0 / (8.0 * std::f32::consts::PI);
708
709            let term1 = a * (x2 - b * x1.powi(2) + c * x1 - r).powi(2);
710            let term2 = s * (1.0 - t) * x1.cos();
711            let term3 = s;
712
713            Ok(term1 + term2 + term3)
714        }
715
716        fn dimension(&self) -> usize {
717            2
718        }
719
720        fn bounds(&self) -> (Vec<f32>, Vec<f32>) {
721            (vec![-5.0, 0.0], vec![10.0, 15.0])
722        }
723
724        fn name(&self) -> &str {
725            "Branin"
726        }
727    }
728}
729
730#[cfg(test)]
731mod tests {
732    use super::test_functions::*;
733    use super::*;
734
735    #[test]
736    fn test_gaussian_process_fit_predict() {
737        let config = GaussianProcessConfig::default();
738        let mut gp = GaussianProcess::new(config);
739
740        let x_train = vec![vec![0.0], vec![0.5], vec![1.0]];
741        let y_train = vec![0.0, 0.25, 1.0];
742
743        gp.fit(x_train, y_train).unwrap();
744
745        let (mean, std) = gp.predict(&[0.75]).unwrap();
746        assert!(mean >= 0.0 && mean <= 1.0);
747        assert!(std > 0.0);
748    }
749
750    #[test]
751    fn test_bayesian_optimization_quadratic() {
752        let config = BayesianOptConfig {
753            max_evaluations: 20,
754            initial_random_evaluations: 5,
755            acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
756            seed: Some(42),
757            verbose: false,
758            ..Default::default()
759        };
760
761        let mut optimizer = BayesianOptimizer::new(config);
762        let objective = Quadratic1D;
763
764        let result = optimizer.optimize(&objective).unwrap();
765
766        // Should find minimum near x = 0.5
767        assert!(result.best_point.parameters[0] > 0.3 && result.best_point.parameters[0] < 0.7);
768        assert!(result.best_point.value < 0.1);
769        assert_eq!(result.evaluations, 20);
770    }
771
772    #[test]
773    fn test_bayesian_optimization_branin() {
774        let config = BayesianOptConfig {
775            max_evaluations: 30,
776            initial_random_evaluations: 10,
777            acquisition_function: AcquisitionFunction::UpperConfidenceBound { kappa: 2.576 },
778            seed: Some(42),
779            verbose: false,
780            ..Default::default()
781        };
782
783        let mut optimizer = BayesianOptimizer::new(config);
784        let objective = Branin;
785
786        let result = optimizer.optimize(&objective).unwrap();
787
788        // Branin function has global minimum around 0.398
789        assert!(result.best_point.value < 5.0); // Should find a reasonably good solution
790        assert_eq!(result.evaluations, 30);
791    }
792}