Skip to main content

numrs2/optimize/
bayesian_opt.rs

1//! Bayesian Optimization with Gaussian Process Surrogate
2//!
3//! Implements Bayesian Optimization for black-box function minimization using a
4//! GP surrogate model with RBF or Matern 5/2 kernels. Supports EI, PI, UCB, and
5//! Thompson Sampling acquisition functions with hyperparameter optimization via
6//! marginal log-likelihood.
7//!
8//! # Example
9//!
10//! ```no_run
11//! use numrs2::optimize::bayesian_opt::*;
12//!
13//! let f = |x: &[f64]| -> f64 { x.iter().map(|xi| (xi - 0.5).powi(2)).sum() };
14//! let config = BayesOptConfig {
15//!     bounds: vec![(0.0, 1.0), (0.0, 1.0)],
16//!     n_initial: 5, n_iterations: 20,
17//!     ..Default::default()
18//! };
19//! let result = bayesian_optimize(f, config).expect("optimization should succeed");
20//! assert!(result.f_best < 0.1);
21//! ```
22
23use crate::error::{NumRs2Error, Result};
24use scirs2_core::random::{thread_rng, Distribution, Normal, Rng, Uniform};
25use std::f64::consts::PI;
26
27/// Kernel function type for the GP surrogate model.
28#[derive(Debug, Clone, Default)]
29pub enum KernelType {
30    /// Squared Exponential (RBF): k(x,x') = sigma^2 exp(-||x-x'||^2 / (2l^2))
31    SquaredExponential,
32    /// Matern 5/2: k(x,x') = sigma^2 (1+sqrt(5)r/l+5r^2/(3l^2)) exp(-sqrt(5)r/l)
33    #[default]
34    Matern52,
35}
36
37/// Internal kernel representation with hyperparameters.
38#[derive(Debug, Clone)]
39struct Kernel {
40    /// The kernel type
41    kernel_type: KernelType,
42    /// Length scale parameter (l)
43    length_scale: f64,
44    /// Signal variance parameter (sigma^2)
45    signal_variance: f64,
46}
47
48impl Kernel {
49    /// Create a new kernel with the given type and default hyperparameters.
50    fn new(kernel_type: KernelType) -> Self {
51        Self {
52            kernel_type,
53            length_scale: 1.0,
54            signal_variance: 1.0,
55        }
56    }
57
58    /// Compute the kernel value between two points.
59    fn compute(&self, x1: &[f64], x2: &[f64]) -> f64 {
60        let sq_dist = squared_euclidean_distance(x1, x2);
61
62        match self.kernel_type {
63            KernelType::SquaredExponential => {
64                let l2 = self.length_scale * self.length_scale;
65                self.signal_variance * (-sq_dist / (2.0 * l2)).exp()
66            }
67            KernelType::Matern52 => {
68                let r = sq_dist.sqrt();
69                let sqrt5_r_l = 5.0_f64.sqrt() * r / self.length_scale;
70                let sq_term = 5.0 * sq_dist / (3.0 * self.length_scale * self.length_scale);
71                self.signal_variance * (1.0 + sqrt5_r_l + sq_term) * (-sqrt5_r_l).exp()
72            }
73        }
74    }
75
76    /// Compute the full kernel matrix K for a set of training points.
77    fn compute_matrix(&self, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
78        let n = x.len();
79        let mut k = vec![vec![0.0; n]; n];
80        for i in 0..n {
81            k[i][i] = self.signal_variance;
82            for j in (i + 1)..n {
83                let val = self.compute(&x[i], &x[j]);
84                k[i][j] = val;
85                k[j][i] = val;
86            }
87        }
88        k
89    }
90
91    /// Compute the kernel vector k* between a new point and training points.
92    fn compute_vector(&self, x_new: &[f64], x_train: &[Vec<f64>]) -> Vec<f64> {
93        x_train.iter().map(|xi| self.compute(x_new, xi)).collect()
94    }
95
96    /// Create a copy with updated hyperparameters.
97    fn with_params(&self, length_scale: f64, signal_variance: f64) -> Self {
98        Self {
99            kernel_type: self.kernel_type.clone(),
100            length_scale,
101            signal_variance,
102        }
103    }
104}
105
106/// Compute squared Euclidean distance between two vectors.
107fn squared_euclidean_distance(a: &[f64], b: &[f64]) -> f64 {
108    a.iter()
109        .zip(b.iter())
110        .map(|(&ai, &bi)| {
111            let d = ai - bi;
112            d * d
113        })
114        .sum()
115}
116
117/// Gaussian Process (GP) surrogate model for function approximation.
118///
119/// Provides posterior mean and variance predictions at query points using
120/// Cholesky-based computations. Supports hyperparameter optimization via NLML.
121#[derive(Debug, Clone)]
122pub struct GaussianProcess {
123    /// Kernel function with hyperparameters
124    kernel: Kernel,
125    /// Training inputs (each inner Vec is one data point)
126    x_train: Vec<Vec<f64>>,
127    /// Training outputs (normalized)
128    y_train: Vec<f64>,
129    /// Noise variance for observations
130    noise_variance: f64,
131    /// Cholesky factor L of (K + sigma_n^2 I)
132    cholesky_l: Vec<Vec<f64>>,
133    /// Pre-computed alpha = L^T \ (L \ y) for predictions
134    alpha: Vec<f64>,
135    /// Mean of raw training outputs (for de-normalization)
136    y_mean: f64,
137    /// Standard deviation of raw training outputs (for de-normalization)
138    y_std: f64,
139}
140
141impl GaussianProcess {
142    /// Create a new unfitted Gaussian Process with the given kernel and noise variance.
143    pub fn new(kernel_type: KernelType, noise_variance: f64) -> Self {
144        Self {
145            kernel: Kernel::new(kernel_type),
146            x_train: Vec::new(),
147            y_train: Vec::new(),
148            noise_variance,
149            cholesky_l: Vec::new(),
150            alpha: Vec::new(),
151            y_mean: 0.0,
152            y_std: 1.0,
153        }
154    }
155
156    /// Fit the GP model to training data (normalizes, computes kernel, Cholesky, alpha).
157    pub fn fit(&mut self, x: &[Vec<f64>], y: &[f64]) -> Result<()> {
158        if x.is_empty() || y.is_empty() {
159            return Err(NumRs2Error::InvalidInput(
160                "Training data cannot be empty".to_string(),
161            ));
162        }
163        if x.len() != y.len() {
164            return Err(NumRs2Error::InvalidInput(format!(
165                "x and y must have same length: {} vs {}",
166                x.len(),
167                y.len()
168            )));
169        }
170
171        self.x_train = x.to_vec();
172
173        // Normalize y for numerical stability
174        self.y_mean = y.iter().sum::<f64>() / y.len() as f64;
175        let y_var = y.iter().map(|&yi| (yi - self.y_mean).powi(2)).sum::<f64>() / y.len() as f64;
176        self.y_std = if y_var > 1e-12 { y_var.sqrt() } else { 1.0 };
177
178        self.y_train = y
179            .iter()
180            .map(|&yi| (yi - self.y_mean) / self.y_std)
181            .collect();
182
183        // Compute kernel matrix K
184        let mut k = self.kernel.compute_matrix(&self.x_train);
185        let n = k.len();
186
187        // Add noise to diagonal: K + (sigma_n^2 / y_std^2) * I + jitter
188        let noise = self.noise_variance / (self.y_std * self.y_std);
189        for i in 0..n {
190            k[i][i] += noise + 1e-8;
191        }
192
193        // Cholesky decomposition: K = L * L^T
194        self.cholesky_l = cholesky_decomposition(&k)?;
195
196        // Solve for alpha = L^T \ (L \ y_normalized)
197        let z = forward_substitution(&self.cholesky_l, &self.y_train)?;
198        self.alpha = backward_substitution_transpose(&self.cholesky_l, &z)?;
199
200        Ok(())
201    }
202
203    /// Predict the posterior mean and variance at a new point (de-normalized).
204    pub fn predict(&self, x_new: &[f64]) -> Result<(f64, f64)> {
205        if self.x_train.is_empty() {
206            return Err(NumRs2Error::InvalidOperation(
207                "GP model has not been fitted yet".to_string(),
208            ));
209        }
210
211        let k_star = self.kernel.compute_vector(x_new, &self.x_train);
212
213        // Mean: mu = k*^T alpha (then de-normalize)
214        let mu_norm: f64 = k_star
215            .iter()
216            .zip(self.alpha.iter())
217            .map(|(&ki, &ai)| ki * ai)
218            .sum();
219        let mu = mu_norm * self.y_std + self.y_mean;
220
221        // Variance: var = k(x,x) - v^T v where v = L \ k*
222        let k_self = self.kernel.signal_variance;
223        let v = forward_substitution(&self.cholesky_l, &k_star)?;
224        let v_sq: f64 = v.iter().map(|&vi| vi * vi).sum();
225        let var_norm = (k_self - v_sq).max(1e-12);
226        let var = var_norm * self.y_std * self.y_std;
227
228        Ok((mu, var))
229    }
230
231    /// Predict mean and variance for multiple points.
232    pub fn predict_batch(&self, x_new: &[Vec<f64>]) -> Result<(Vec<f64>, Vec<f64>)> {
233        let mut means = Vec::with_capacity(x_new.len());
234        let mut variances = Vec::with_capacity(x_new.len());
235        for x in x_new {
236            let (mu, var) = self.predict(x)?;
237            means.push(mu);
238            variances.push(var);
239        }
240        Ok((means, variances))
241    }
242
243    /// Compute the negative log marginal likelihood (for hyperparameter optimization).
244    pub fn negative_log_marginal_likelihood(&self) -> Result<f64> {
245        if self.x_train.is_empty() {
246            return Err(NumRs2Error::InvalidOperation(
247                "GP model has not been fitted yet".to_string(),
248            ));
249        }
250
251        let n = self.y_train.len() as f64;
252
253        // Data fit term: 0.5 * y^T alpha
254        let data_fit: f64 = self
255            .y_train
256            .iter()
257            .zip(self.alpha.iter())
258            .map(|(&yi, &ai)| yi * ai)
259            .sum();
260
261        // Complexity penalty: sum of log of diagonal of L
262        let log_det: f64 = self
263            .cholesky_l
264            .iter()
265            .enumerate()
266            .map(|(i, row)| row[i].ln())
267            .sum();
268
269        // Constant term
270        let constant = 0.5 * n * (2.0 * PI).ln();
271
272        Ok(0.5 * data_fit + log_det + constant)
273    }
274
275    /// Optimize kernel hyperparameters via multi-start coordinate descent on NLML.
276    pub fn optimize_hyperparameters(
277        &mut self,
278        x: &[Vec<f64>],
279        y: &[f64],
280        n_restarts: usize,
281    ) -> Result<()> {
282        let mut rng = thread_rng();
283        let log_dist = Uniform::new(-2.0_f64, 2.0_f64).map_err(|e| {
284            NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
285        })?;
286
287        let mut best_nll = f64::INFINITY;
288        let mut best_ls = self.kernel.length_scale;
289        let mut best_sv = self.kernel.signal_variance;
290
291        for restart in 0..n_restarts {
292            let (init_ls, init_sv) = if restart == 0 {
293                (self.kernel.length_scale, self.kernel.signal_variance)
294            } else {
295                let log_ls: f64 = log_dist.sample(&mut rng);
296                let log_sv: f64 = log_dist.sample(&mut rng);
297                (log_ls.exp(), log_sv.exp())
298            };
299
300            let refinement = self.refine_hyperparameters(x, y, init_ls, init_sv)?;
301
302            if refinement.0 < best_nll {
303                best_nll = refinement.0;
304                best_ls = refinement.1;
305                best_sv = refinement.2;
306            }
307        }
308
309        self.kernel = self.kernel.with_params(best_ls, best_sv);
310        self.fit(x, y)?;
311
312        Ok(())
313    }
314
315    /// Refine hyperparameters via coordinate descent with multiplicative steps.
316    fn refine_hyperparameters(
317        &mut self,
318        x: &[Vec<f64>],
319        y: &[f64],
320        init_ls: f64,
321        init_sv: f64,
322    ) -> Result<(f64, f64, f64)> {
323        let mut current_ls = init_ls;
324        let mut current_sv = init_sv;
325
326        self.kernel = self.kernel.with_params(current_ls, current_sv);
327        self.fit(x, y)?;
328        let mut current_nll = self.negative_log_marginal_likelihood()?;
329
330        let factors = [0.5, 0.8, 1.0, 1.25, 2.0];
331
332        for _ in 0..5 {
333            // Optimize length scale
334            for &factor in &factors {
335                let trial_ls = (current_ls * factor).clamp(1e-4, 100.0);
336                self.kernel = self.kernel.with_params(trial_ls, current_sv);
337                if self.fit(x, y).is_ok() {
338                    if let Ok(nll) = self.negative_log_marginal_likelihood() {
339                        if nll < current_nll {
340                            current_nll = nll;
341                            current_ls = trial_ls;
342                        }
343                    }
344                }
345            }
346
347            // Optimize signal variance
348            for &factor in &factors {
349                let trial_sv = (current_sv * factor).clamp(1e-4, 100.0);
350                self.kernel = self.kernel.with_params(current_ls, trial_sv);
351                if self.fit(x, y).is_ok() {
352                    if let Ok(nll) = self.negative_log_marginal_likelihood() {
353                        if nll < current_nll {
354                            current_nll = nll;
355                            current_sv = trial_sv;
356                        }
357                    }
358                }
359            }
360        }
361
362        Ok((current_nll, current_ls, current_sv))
363    }
364
365    /// Sample from the GP posterior at the given points (for Thompson Sampling).
366    pub fn sample_posterior(&self, x_points: &[Vec<f64>], rng: &mut impl Rng) -> Result<Vec<f64>> {
367        let (means, variances) = self.predict_batch(x_points)?;
368
369        let normal = Normal::new(0.0, 1.0).map_err(|e| {
370            NumRs2Error::ComputationError(format!("Failed to create Normal distribution: {}", e))
371        })?;
372
373        let samples: Vec<f64> = means
374            .iter()
375            .zip(variances.iter())
376            .map(|(&mu, &var)| {
377                let z: f64 = normal.sample(rng);
378                mu + z * var.sqrt()
379            })
380            .collect();
381
382        Ok(samples)
383    }
384
385    /// Get the current kernel type.
386    pub fn kernel_type(&self) -> &KernelType {
387        &self.kernel.kernel_type
388    }
389
390    /// Get the current length scale.
391    pub fn length_scale(&self) -> f64 {
392        self.kernel.length_scale
393    }
394
395    /// Get the current signal variance.
396    pub fn signal_variance(&self) -> f64 {
397        self.kernel.signal_variance
398    }
399
400    /// Get the number of training points.
401    pub fn n_train(&self) -> usize {
402        self.x_train.len()
403    }
404}
405
406/// Cholesky decomposition: A = L * L^T. Returns lower-triangular L.
407fn cholesky_decomposition(a: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
408    let n = a.len();
409    if n == 0 {
410        return Err(NumRs2Error::InvalidInput(
411            "Matrix cannot be empty".to_string(),
412        ));
413    }
414
415    let mut l = vec![vec![0.0; n]; n];
416
417    for j in 0..n {
418        let mut sum = 0.0;
419        for k in 0..j {
420            sum += l[j][k] * l[j][k];
421        }
422        let diag = a[j][j] - sum;
423        if diag <= 0.0 {
424            return Err(NumRs2Error::NumericalError(format!(
425                "Matrix is not positive definite at index {} (diag = {:.6e})",
426                j, diag
427            )));
428        }
429        l[j][j] = diag.sqrt();
430
431        for i in (j + 1)..n {
432            let mut s = 0.0;
433            for k in 0..j {
434                s += l[i][k] * l[j][k];
435            }
436            l[i][j] = (a[i][j] - s) / l[j][j];
437        }
438    }
439
440    Ok(l)
441}
442
443/// Forward substitution: solve L x = b where L is lower triangular.
444fn forward_substitution(l: &[Vec<f64>], b: &[f64]) -> Result<Vec<f64>> {
445    let n = l.len();
446    let mut x = vec![0.0; n];
447
448    for i in 0..n {
449        let mut sum = 0.0;
450        for j in 0..i {
451            sum += l[i][j] * x[j];
452        }
453        if l[i][i].abs() < 1e-15 {
454            return Err(NumRs2Error::NumericalError(
455                "Singular matrix in forward substitution".to_string(),
456            ));
457        }
458        x[i] = (b[i] - sum) / l[i][i];
459    }
460
461    Ok(x)
462}
463
464/// Backward substitution: solve L^T x = b where L is lower triangular.
465fn backward_substitution_transpose(l: &[Vec<f64>], b: &[f64]) -> Result<Vec<f64>> {
466    let n = l.len();
467    let mut x = vec![0.0; n];
468
469    for i in (0..n).rev() {
470        let mut sum = 0.0;
471        for j in (i + 1)..n {
472            sum += l[j][i] * x[j];
473        }
474        if l[i][i].abs() < 1e-15 {
475            return Err(NumRs2Error::NumericalError(
476                "Singular matrix in backward substitution".to_string(),
477            ));
478        }
479        x[i] = (b[i] - sum) / l[i][i];
480    }
481
482    Ok(x)
483}
484
485/// Acquisition function type for guiding optimization.
486#[derive(Debug, Clone, Default)]
487pub enum AcquisitionType {
488    /// Expected Improvement: EI(x) = E[max(f_best - f(x), 0)] (uses xi param)
489    #[default]
490    ExpectedImprovement,
491    /// Probability of Improvement: PI(x) = P(f(x) < f_best - xi) (uses xi param)
492    ProbabilityOfImprovement,
493    /// Upper Confidence Bound: UCB(x) = -(mu(x) - kappa * sigma(x)) (uses kappa param)
494    UpperConfidenceBound,
495    /// Thompson Sampling: sample from posterior and minimize the sample
496    ThompsonSampling,
497}
498
499/// Evaluate the acquisition function at a point given GP predictions.
500///
501/// Returns higher values for more promising points (to be maximized).
502fn compute_acquisition(
503    acq: &AcquisitionType,
504    mu: f64,
505    sigma: f64,
506    best_y: f64,
507    xi: f64,
508    kappa: f64,
509) -> f64 {
510    if sigma < 1e-12 {
511        return 0.0;
512    }
513
514    match acq {
515        AcquisitionType::ExpectedImprovement => {
516            let improvement = best_y - mu - xi;
517            let z = improvement / sigma;
518            improvement * standard_normal_cdf(z) + sigma * standard_normal_pdf(z)
519        }
520        AcquisitionType::ProbabilityOfImprovement => {
521            let z = (best_y - mu - xi) / sigma;
522            standard_normal_cdf(z)
523        }
524        AcquisitionType::UpperConfidenceBound => {
525            // For minimization: lower mu is better; higher sigma = more exploration
526            // Return negative LCB so higher = more promising
527            -(mu - kappa * sigma)
528        }
529        AcquisitionType::ThompsonSampling => {
530            // Handled separately via posterior sampling
531            0.0
532        }
533    }
534}
535
536/// Standard normal PDF: phi(z) = exp(-z^2/2) / sqrt(2*pi).
537fn standard_normal_pdf(z: f64) -> f64 {
538    (-0.5 * z * z).exp() / (2.0 * PI).sqrt()
539}
540
541/// Standard normal CDF: Phi(z) = 0.5 * (1 + erf(z / sqrt(2))).
542fn standard_normal_cdf(z: f64) -> f64 {
543    0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
544}
545
546/// Error function approximation (Abramowitz & Stegun 7.1.26, max error 1.5e-7).
547fn erf_approx(x: f64) -> f64 {
548    let sign = if x >= 0.0 { 1.0 } else { -1.0 };
549    let x = x.abs();
550
551    let p = 0.3275911;
552    let a1 = 0.254829592;
553    let a2 = -0.284496736;
554    let a3 = 1.421413741;
555    let a4 = -1.453152027;
556    let a5 = 1.061405429;
557
558    let t = 1.0 / (1.0 + p * x);
559    let t2 = t * t;
560    let t3 = t2 * t;
561    let t4 = t3 * t;
562    let t5 = t4 * t;
563
564    let inner = a1 * t + a2 * t2 + a3 * t3 + a4 * t4 + a5 * t5;
565    sign * (1.0 - inner * (-x * x).exp())
566}
567
568/// Configuration for Bayesian Optimization.
569#[derive(Debug, Clone)]
570pub struct BayesOptConfig {
571    /// Number of initial random samples (Latin Hypercube design).
572    pub n_initial: usize,
573
574    /// Number of optimization iterations after the initial phase.
575    pub n_iterations: usize,
576
577    /// Search space bounds for each dimension: Vec<(lower, upper)>.
578    pub bounds: Vec<(f64, f64)>,
579
580    /// Acquisition function type.
581    pub acquisition: AcquisitionType,
582
583    /// Kernel type for the GP surrogate.
584    pub kernel: KernelType,
585
586    /// Optional RNG seed for reproducibility (currently uses thread_rng).
587    pub seed: Option<u64>,
588
589    /// Exploration-exploitation tradeoff for EI and PI (default: 0.01).
590    pub xi: f64,
591    /// UCB exploration parameter (default: 2.0).
592    pub kappa: f64,
593
594    /// Noise variance assumed for observations.
595    pub noise_variance: f64,
596
597    /// Number of random candidates for acquisition optimization.
598    pub n_candidates: usize,
599
600    /// Number of restarts for hyperparameter optimization.
601    pub n_hp_restarts: usize,
602
603    /// Whether to optimize GP hyperparameters during the run.
604    pub optimize_hyperparams: bool,
605
606    /// Frequency of hyperparameter optimization (every N iterations).
607    pub hp_optimize_interval: usize,
608}
609
610impl Default for BayesOptConfig {
611    fn default() -> Self {
612        Self {
613            n_initial: 5,
614            n_iterations: 25,
615            bounds: vec![(0.0, 1.0)],
616            acquisition: AcquisitionType::default(),
617            kernel: KernelType::default(),
618            seed: None,
619            xi: 0.01,
620            kappa: 2.0,
621            noise_variance: 1e-6,
622            n_candidates: 1000,
623            n_hp_restarts: 3,
624            optimize_hyperparams: true,
625            hp_optimize_interval: 5,
626        }
627    }
628}
629
630/// Result of Bayesian Optimization.
631#[derive(Debug, Clone)]
632pub struct BayesOptResult {
633    /// Best found solution (parameter vector).
634    pub x_best: Vec<f64>,
635
636    /// Best objective value found.
637    pub f_best: f64,
638
639    /// History of all evaluated points.
640    pub x_history: Vec<Vec<f64>>,
641
642    /// History of all objective values.
643    pub f_history: Vec<f64>,
644
645    /// Total number of function evaluations.
646    pub n_evaluations: usize,
647
648    /// Whether optimization improved over the initial samples.
649    pub success: bool,
650    /// Final GP model for inspection.
651    pub model: GaussianProcess,
652}
653
654/// Generate a Latin Hypercube Sampling (LHS) design with space-filling properties.
655fn latin_hypercube_sampling(
656    n_samples: usize,
657    bounds: &[(f64, f64)],
658    rng: &mut impl Rng,
659) -> Result<Vec<Vec<f64>>> {
660    let n_dims = bounds.len();
661    if n_dims == 0 {
662        return Err(NumRs2Error::InvalidInput(
663            "Bounds cannot be empty".to_string(),
664        ));
665    }
666    if n_samples == 0 {
667        return Err(NumRs2Error::InvalidInput(
668            "Number of samples must be positive".to_string(),
669        ));
670    }
671
672    let uniform = Uniform::new(0.0_f64, 1.0_f64)
673        .map_err(|e| NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e)))?;
674
675    let mut samples = vec![vec![0.0; n_dims]; n_samples];
676
677    for dim in 0..n_dims {
678        // Fisher-Yates shuffle to create a random permutation
679        let mut perm: Vec<usize> = (0..n_samples).collect();
680        for i in (1..n_samples).rev() {
681            let j_f64: f64 = uniform.sample(rng);
682            let j = ((j_f64 * (i + 1) as f64) as usize).min(i);
683            perm.swap(i, j);
684        }
685
686        let (low, high) = bounds[dim];
687        let step = (high - low) / n_samples as f64;
688
689        for (i, &p) in perm.iter().enumerate() {
690            let u: f64 = uniform.sample(rng);
691            samples[i][dim] = low + (p as f64 + u) * step;
692        }
693    }
694
695    Ok(samples)
696}
697
698/// Generate uniform random samples within bounds.
699fn random_sampling(
700    n_samples: usize,
701    bounds: &[(f64, f64)],
702    rng: &mut impl Rng,
703) -> Result<Vec<Vec<f64>>> {
704    let n_dims = bounds.len();
705    let mut samples = Vec::with_capacity(n_samples);
706
707    for _ in 0..n_samples {
708        let mut point = Vec::with_capacity(n_dims);
709        for &(low, high) in bounds {
710            let dist = Uniform::new(low, high).map_err(|e| {
711                NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
712            })?;
713            let val: f64 = dist.sample(rng);
714            point.push(val);
715        }
716        samples.push(point);
717    }
718
719    Ok(samples)
720}
721
722/// Optimize the acquisition function via random search + local refinement.
723fn optimize_acquisition(
724    gp: &GaussianProcess,
725    acq: &AcquisitionType,
726    bounds: &[(f64, f64)],
727    best_y: f64,
728    xi: f64,
729    kappa: f64,
730    n_candidates: usize,
731    rng: &mut impl Rng,
732) -> Result<Vec<f64>> {
733    match acq {
734        AcquisitionType::ThompsonSampling => {
735            let candidates = random_sampling(n_candidates, bounds, rng)?;
736            let posterior_samples = gp.sample_posterior(&candidates, rng)?;
737
738            let mut best_idx = 0;
739            let mut best_val = f64::INFINITY;
740            for (i, &val) in posterior_samples.iter().enumerate() {
741                if val < best_val {
742                    best_val = val;
743                    best_idx = i;
744                }
745            }
746            Ok(candidates[best_idx].clone())
747        }
748        _ => {
749            // Phase 1: Random search
750            let candidates = random_sampling(n_candidates, bounds, rng)?;
751            let mut best_acq = f64::NEG_INFINITY;
752            let mut best_point = candidates[0].clone();
753
754            for candidate in &candidates {
755                let (mu, var) = gp.predict(candidate)?;
756                let sigma = var.sqrt();
757                let acq_val = compute_acquisition(acq, mu, sigma, best_y, xi, kappa);
758                if acq_val > best_acq {
759                    best_acq = acq_val;
760                    best_point = candidate.clone();
761                }
762            }
763
764            // Phase 2: Local refinement around best candidate
765            let n_refine = 50;
766            for _ in 0..n_refine {
767                let mut perturbed = best_point.clone();
768                for (d, &(low, high)) in bounds.iter().enumerate() {
769                    let range = (high - low) * 0.05;
770                    let pert_dist = Uniform::new(-range, range).map_err(|e| {
771                        NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
772                    })?;
773                    let delta: f64 = pert_dist.sample(rng);
774                    perturbed[d] = (perturbed[d] + delta).max(low).min(high);
775                }
776
777                let (mu, var) = gp.predict(&perturbed)?;
778                let sigma = var.sqrt();
779                let acq_val = compute_acquisition(acq, mu, sigma, best_y, xi, kappa);
780                if acq_val > best_acq {
781                    best_acq = acq_val;
782                    best_point = perturbed;
783                }
784            }
785
786            Ok(best_point)
787        }
788    }
789}
790
791/// Run Bayesian Optimization to minimize the given objective function.
792///
793/// Phase 1: Evaluates `n_initial` points via LHS. Phase 2: Iteratively fits GP,
794/// optimizes acquisition function, evaluates objective at suggested point.
795pub fn bayesian_optimize<F>(objective: F, config: BayesOptConfig) -> Result<BayesOptResult>
796where
797    F: Fn(&[f64]) -> f64,
798{
799    let n_dims = config.bounds.len();
800    if n_dims == 0 {
801        return Err(NumRs2Error::InvalidInput(
802            "Bounds cannot be empty".to_string(),
803        ));
804    }
805
806    // Validate bounds
807    for (i, &(low, high)) in config.bounds.iter().enumerate() {
808        if low >= high {
809            return Err(NumRs2Error::InvalidInput(format!(
810                "Invalid bounds for dimension {}: lower ({}) must be less than upper ({})",
811                i, low, high
812            )));
813        }
814    }
815
816    // Note: scirs2_core thread_rng does not support seeding directly;
817    // we use thread_rng regardless. The seed field is reserved for future use.
818    let mut rng = thread_rng();
819
820    // Initialize GP model
821    let mut gp = GaussianProcess::new(config.kernel.clone(), config.noise_variance);
822
823    // Phase 1: Initial sampling via LHS
824    let initial_points = latin_hypercube_sampling(config.n_initial, &config.bounds, &mut rng)?;
825
826    let mut x_history: Vec<Vec<f64>> = Vec::new();
827    let mut f_history: Vec<f64> = Vec::new();
828    let mut f_best = f64::INFINITY;
829    let mut x_best = initial_points[0].clone();
830
831    for point in &initial_points {
832        let value = objective(point);
833        x_history.push(point.clone());
834        f_history.push(value);
835
836        if value < f_best {
837            f_best = value;
838            x_best = point.clone();
839        }
840    }
841
842    let initial_best = f_best;
843
844    // Phase 2: Bayesian Optimization loop
845    for iteration in 0..config.n_iterations {
846        // Fit GP model to all data collected so far
847        gp.fit(&x_history, &f_history)?;
848
849        // Optionally optimize GP hyperparameters
850        if config.optimize_hyperparams
851            && config.hp_optimize_interval > 0
852            && iteration % config.hp_optimize_interval == 0
853        {
854            let _ = gp.optimize_hyperparameters(&x_history, &f_history, config.n_hp_restarts);
855        }
856
857        // Find the next point by maximizing the acquisition function
858        let next_point = optimize_acquisition(
859            &gp,
860            &config.acquisition,
861            &config.bounds,
862            f_best,
863            config.xi,
864            config.kappa,
865            config.n_candidates,
866            &mut rng,
867        )?;
868
869        // Evaluate objective at the next point
870        let value = objective(&next_point);
871        x_history.push(next_point.clone());
872        f_history.push(value);
873
874        if value < f_best {
875            f_best = value;
876            x_best = next_point;
877        }
878    }
879
880    // Final fit for the returned model
881    let _ = gp.fit(&x_history, &f_history);
882
883    let n_evaluations = x_history.len();
884    let success = f_best < initial_best;
885
886    Ok(BayesOptResult {
887        x_best,
888        f_best,
889        x_history,
890        f_history,
891        n_evaluations,
892        success,
893        model: gp,
894    })
895}
896
897#[cfg(test)]
898mod tests {
899    use super::*;
900    use approx::assert_relative_eq;
901
902    #[test]
903    fn test_rbf_kernel_properties() {
904        let kernel = Kernel {
905            kernel_type: KernelType::SquaredExponential,
906            length_scale: 1.0,
907            signal_variance: 1.0,
908        };
909
910        // Same point => signal_variance
911        let k_same = kernel.compute(&[0.0, 0.0], &[0.0, 0.0]);
912        assert_relative_eq!(k_same, 1.0, epsilon = 1e-10);
913
914        // Far points => near zero
915        let k_far = kernel.compute(&[0.0, 0.0], &[100.0, 100.0]);
916        assert!(
917            k_far < 1e-10,
918            "Far points should have near-zero kernel: {}",
919            k_far
920        );
921
922        // Symmetry
923        let k12 = kernel.compute(&[1.0, 2.0], &[3.0, 4.0]);
924        let k21 = kernel.compute(&[3.0, 4.0], &[1.0, 2.0]);
925        assert_relative_eq!(k12, k21, epsilon = 1e-15);
926
927        // Monotone decay with distance
928        let k_near = kernel.compute(&[0.0], &[0.5]);
929        let k_mid = kernel.compute(&[0.0], &[2.0]);
930        assert!(k_near > k_mid, "Kernel should decrease with distance");
931    }
932
933    #[test]
934    fn test_matern52_kernel_properties() {
935        let kernel = Kernel {
936            kernel_type: KernelType::Matern52,
937            length_scale: 1.0,
938            signal_variance: 1.0,
939        };
940
941        let k_same = kernel.compute(&[0.0], &[0.0]);
942        assert_relative_eq!(k_same, 1.0, epsilon = 1e-10);
943
944        let k_near = kernel.compute(&[0.0], &[0.5]);
945        let k_far = kernel.compute(&[0.0], &[2.0]);
946        assert!(k_near > k_far, "Kernel should decrease with distance");
947
948        // Symmetry in multi-dimensions
949        let k12 = kernel.compute(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]);
950        let k21 = kernel.compute(&[4.0, 5.0, 6.0], &[1.0, 2.0, 3.0]);
951        assert_relative_eq!(k12, k21, epsilon = 1e-15);
952    }
953
954    #[test]
955    fn test_gp_prediction_at_training_points() {
956        let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
957
958        let x_train: Vec<Vec<f64>> = vec![
959            vec![0.0],
960            vec![1.0],
961            vec![2.0],
962            vec![3.0],
963            vec![4.0],
964            vec![5.0],
965        ];
966        let y_train: Vec<f64> = x_train.iter().map(|x| x[0].sin()).collect();
967
968        gp.fit(&x_train, &y_train).expect("GP fit should succeed");
969
970        for (x, &y_true) in x_train.iter().zip(y_train.iter()) {
971            let (mu, var) = gp.predict(x).expect("GP predict should succeed");
972            assert!(
973                (mu - y_true).abs() < 0.1,
974                "Prediction at training point x={:?}: mu={}, y_true={}",
975                x,
976                mu,
977                y_true
978            );
979            assert!(var >= 0.0, "Variance should be non-negative");
980        }
981    }
982
983    #[test]
984    fn test_gp_variance_increases_away_from_data() {
985        let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
986
987        let x_train = vec![vec![0.0], vec![2.0], vec![4.0]];
988        let y_train = vec![0.0, 1.0, 0.0];
989
990        gp.fit(&x_train, &y_train).expect("GP fit should succeed");
991
992        let (_, var_at_train) = gp.predict(&[0.0]).expect("predict should succeed");
993        let (_, var_far) = gp.predict(&[10.0]).expect("predict should succeed");
994
995        assert!(
996            var_far > var_at_train,
997            "Variance far from data ({}) should exceed variance at data ({})",
998            var_far,
999            var_at_train
1000        );
1001    }
1002
1003    #[test]
1004    fn test_1d_quadratic_optimization() {
1005        let objective = |x: &[f64]| -> f64 { (x[0] - 0.3).powi(2) };
1006
1007        let config = BayesOptConfig {
1008            bounds: vec![(0.0, 1.0)],
1009            n_initial: 5,
1010            n_iterations: 15,
1011            noise_variance: 1e-6,
1012            n_candidates: 500,
1013            optimize_hyperparams: true,
1014            hp_optimize_interval: 5,
1015            ..Default::default()
1016        };
1017
1018        let result = bayesian_optimize(objective, config)
1019            .expect("Bayesian optimization should succeed for 1D quadratic");
1020
1021        assert!(
1022            result.f_best < 0.01,
1023            "Best value should be close to 0, got {}",
1024            result.f_best
1025        );
1026        assert!(
1027            (result.x_best[0] - 0.3).abs() < 0.15,
1028            "Best x should be close to 0.3, got {}",
1029            result.x_best[0]
1030        );
1031        assert_eq!(result.n_evaluations, 20); // 5 initial + 15 iterations
1032    }
1033
1034    #[test]
1035    fn test_2d_branin_optimization() {
1036        let branin = |x: &[f64]| -> f64 {
1037            let x1 = x[0] * 15.0 - 5.0;
1038            let x2 = x[1] * 15.0;
1039
1040            let a = 1.0;
1041            let b = 5.1 / (4.0 * PI * PI);
1042            let c = 5.0 / PI;
1043            let r = 6.0;
1044            let s = 10.0;
1045            let t = 1.0 / (8.0 * PI);
1046
1047            a * (x2 - b * x1 * x1 + c * x1 - r).powi(2) + s * (1.0 - t) * x1.cos() + s
1048        };
1049
1050        let config = BayesOptConfig {
1051            bounds: vec![(0.0, 1.0), (0.0, 1.0)],
1052            n_initial: 10,
1053            n_iterations: 30,
1054            noise_variance: 1e-6,
1055            n_candidates: 1000,
1056            optimize_hyperparams: true,
1057            hp_optimize_interval: 5,
1058            kernel: KernelType::Matern52,
1059            ..Default::default()
1060        };
1061
1062        let result = bayesian_optimize(branin, config)
1063            .expect("Bayesian optimization should succeed for Branin");
1064
1065        // Branin global minimum ~ 0.397887; with limited budget, get reasonably close
1066        assert!(
1067            result.f_best < 2.0,
1068            "Branin should find a reasonably good minimum, got {}",
1069            result.f_best
1070        );
1071        assert_eq!(result.x_best.len(), 2);
1072    }
1073
1074    #[test]
1075    fn test_rosenbrock_optimization() {
1076        let rosenbrock = |x: &[f64]| -> f64 {
1077            let x0 = x[0];
1078            let x1 = x[1];
1079            (1.0 - x0).powi(2) + 100.0 * (x1 - x0 * x0).powi(2)
1080        };
1081
1082        let config = BayesOptConfig {
1083            bounds: vec![(0.0, 2.0), (0.0, 2.0)],
1084            n_initial: 10,
1085            n_iterations: 40,
1086            noise_variance: 1e-6,
1087            n_candidates: 1000,
1088            optimize_hyperparams: true,
1089            hp_optimize_interval: 5,
1090            kernel: KernelType::Matern52,
1091            ..Default::default()
1092        };
1093
1094        let result = bayesian_optimize(rosenbrock, config)
1095            .expect("Bayesian optimization should succeed for Rosenbrock");
1096
1097        // Rosenbrock is very hard for BO with limited budget
1098        assert!(
1099            result.f_best < 10.0,
1100            "Rosenbrock best value should be < 10 with 50 evals, got {}",
1101            result.f_best
1102        );
1103    }
1104
1105    #[test]
1106    fn test_different_acquisition_functions() {
1107        let objective = |x: &[f64]| -> f64 { (x[0] - 0.7).powi(2) };
1108
1109        let acqs: Vec<AcquisitionType> = vec![
1110            AcquisitionType::ExpectedImprovement,
1111            AcquisitionType::ProbabilityOfImprovement,
1112            AcquisitionType::UpperConfidenceBound,
1113            AcquisitionType::ThompsonSampling,
1114        ];
1115
1116        for acq in acqs {
1117            let acq_name = format!("{:?}", acq);
1118            let config = BayesOptConfig {
1119                bounds: vec![(0.0, 1.0)],
1120                n_initial: 5,
1121                n_iterations: 10,
1122                noise_variance: 1e-6,
1123                n_candidates: 200,
1124                optimize_hyperparams: false,
1125                acquisition: acq,
1126                ..Default::default()
1127            };
1128
1129            let result = bayesian_optimize(objective, config);
1130            assert!(
1131                result.is_ok(),
1132                "Optimization with {} should succeed",
1133                acq_name
1134            );
1135
1136            let res = result.expect("should not fail");
1137            assert!(
1138                res.f_best < 0.5,
1139                "{} should find reasonable solution, got f_best={}",
1140                acq_name,
1141                res.f_best
1142            );
1143        }
1144    }
1145
1146    #[test]
1147    fn test_different_kernels() {
1148        let objective = |x: &[f64]| -> f64 { x[0].sin() + 0.1 * x[0] };
1149
1150        let kernels = vec![KernelType::SquaredExponential, KernelType::Matern52];
1151
1152        for kernel in kernels {
1153            let kernel_name = format!("{:?}", kernel);
1154            let config = BayesOptConfig {
1155                bounds: vec![(0.0, 6.0)],
1156                n_initial: 5,
1157                n_iterations: 10,
1158                noise_variance: 1e-6,
1159                n_candidates: 200,
1160                optimize_hyperparams: false,
1161                kernel,
1162                ..Default::default()
1163            };
1164
1165            let result = bayesian_optimize(objective, config);
1166            assert!(
1167                result.is_ok(),
1168                "Optimization with kernel {} should succeed",
1169                kernel_name
1170            );
1171        }
1172    }
1173
1174    #[test]
1175    fn test_bounded_optimization() {
1176        // Minimum of x^2 is at x=0, but bounds force x >= 2
1177        let objective = |x: &[f64]| -> f64 { x[0] * x[0] };
1178
1179        let config = BayesOptConfig {
1180            bounds: vec![(2.0, 5.0)],
1181            n_initial: 3,
1182            n_iterations: 10,
1183            noise_variance: 1e-6,
1184            n_candidates: 300,
1185            optimize_hyperparams: false,
1186            ..Default::default()
1187        };
1188
1189        let result =
1190            bayesian_optimize(objective, config).expect("Bounded optimization should succeed");
1191
1192        // Best should be near x=2 (lower bound)
1193        assert!(
1194            (result.x_best[0] - 2.0).abs() < 0.5,
1195            "Best x should be near lower bound 2.0, got {}",
1196            result.x_best[0]
1197        );
1198        assert!(
1199            result.f_best < 6.5,
1200            "Best value should be near 4.0, got {}",
1201            result.f_best
1202        );
1203
1204        // All evaluated points should be within bounds
1205        for point in &result.x_history {
1206            assert!(
1207                point[0] >= 2.0 - 1e-10 && point[0] <= 5.0 + 1e-10,
1208                "Point {} should be within bounds [2, 5]",
1209                point[0]
1210            );
1211        }
1212    }
1213
1214    #[test]
1215    fn test_ei_acquisition_values() {
1216        // EI positive when mu < best_y
1217        let ei = compute_acquisition(
1218            &AcquisitionType::ExpectedImprovement,
1219            0.0,
1220            1.0,
1221            1.0,
1222            0.0,
1223            2.0,
1224        );
1225        assert!(
1226            ei > 0.0,
1227            "EI should be positive when improvement is possible"
1228        );
1229
1230        // EI zero when sigma is 0
1231        let ei_zero = compute_acquisition(
1232            &AcquisitionType::ExpectedImprovement,
1233            0.0,
1234            0.0,
1235            1.0,
1236            0.0,
1237            2.0,
1238        );
1239        assert_relative_eq!(ei_zero, 0.0, epsilon = 1e-10);
1240
1241        // Higher sigma => higher EI
1242        let ei_low = compute_acquisition(
1243            &AcquisitionType::ExpectedImprovement,
1244            0.5,
1245            0.1,
1246            1.0,
1247            0.0,
1248            2.0,
1249        );
1250        let ei_high = compute_acquisition(
1251            &AcquisitionType::ExpectedImprovement,
1252            0.5,
1253            2.0,
1254            1.0,
1255            0.0,
1256            2.0,
1257        );
1258        assert!(ei_high > ei_low, "Higher sigma should give higher EI");
1259    }
1260
1261    #[test]
1262    fn test_pi_acquisition_values() {
1263        // mu << best_y => PI ~ 1
1264        let pi_good = compute_acquisition(
1265            &AcquisitionType::ProbabilityOfImprovement,
1266            -10.0,
1267            1.0,
1268            0.0,
1269            0.0,
1270            2.0,
1271        );
1272        assert!(
1273            pi_good > 0.9,
1274            "PI should be high when mu << best_y, got {}",
1275            pi_good
1276        );
1277
1278        // mu >> best_y => PI ~ 0
1279        let pi_bad = compute_acquisition(
1280            &AcquisitionType::ProbabilityOfImprovement,
1281            10.0,
1282            1.0,
1283            0.0,
1284            0.0,
1285            2.0,
1286        );
1287        assert!(
1288            pi_bad < 0.1,
1289            "PI should be low when mu >> best_y, got {}",
1290            pi_bad
1291        );
1292    }
1293
1294    #[test]
1295    fn test_ucb_kappa_effect() {
1296        let ucb_low_kappa = compute_acquisition(
1297            &AcquisitionType::UpperConfidenceBound,
1298            1.0,
1299            1.0,
1300            0.0,
1301            0.0,
1302            0.1,
1303        );
1304        let ucb_high_kappa = compute_acquisition(
1305            &AcquisitionType::UpperConfidenceBound,
1306            1.0,
1307            1.0,
1308            0.0,
1309            0.0,
1310            5.0,
1311        );
1312        assert!(
1313            ucb_high_kappa > ucb_low_kappa,
1314            "Higher kappa should favor exploration: low={}, high={}",
1315            ucb_low_kappa,
1316            ucb_high_kappa
1317        );
1318    }
1319
1320    #[test]
1321    fn test_optimization_progress_is_monotonic() {
1322        let objective = |x: &[f64]| -> f64 { x.iter().map(|xi| (xi - 0.5).powi(2)).sum() };
1323
1324        let config = BayesOptConfig {
1325            bounds: vec![(0.0, 1.0), (0.0, 1.0)],
1326            n_initial: 5,
1327            n_iterations: 15,
1328            noise_variance: 1e-6,
1329            n_candidates: 300,
1330            optimize_hyperparams: false,
1331            ..Default::default()
1332        };
1333
1334        let result = bayesian_optimize(objective, config).expect("Optimization should succeed");
1335
1336        // Verify running best is monotonically non-increasing
1337        let mut running_best = f64::INFINITY;
1338        for &val in &result.f_history {
1339            if val < running_best {
1340                running_best = val;
1341            }
1342        }
1343        assert_relative_eq!(running_best, result.f_best, epsilon = 1e-15);
1344    }
1345
1346    #[test]
1347    fn test_hyperparameter_optimization() {
1348        let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
1349        // Start with bad hyperparameters
1350        gp.kernel = gp.kernel.with_params(0.1, 10.0);
1351
1352        let x_train: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64 * 0.5]).collect();
1353        let y_train: Vec<f64> = x_train.iter().map(|x| x[0].sin()).collect();
1354
1355        gp.optimize_hyperparameters(&x_train, &y_train, 3)
1356            .expect("Hyperparameter optimization should succeed");
1357
1358        let (mu, _var) = gp.predict(&[0.25]).expect("Prediction should succeed");
1359        let y_true = 0.25_f64.sin();
1360        assert!(
1361            (mu - y_true).abs() < 0.5,
1362            "After HP opt, prediction should be reasonable: mu={}, y_true={}",
1363            mu,
1364            y_true
1365        );
1366    }
1367
1368    #[test]
1369    fn test_invalid_inputs() {
1370        let objective = |x: &[f64]| -> f64 { x[0] };
1371
1372        // Empty bounds
1373        let config = BayesOptConfig {
1374            bounds: vec![],
1375            ..Default::default()
1376        };
1377        assert!(bayesian_optimize(objective, config).is_err());
1378
1379        // Inverted bounds
1380        let config = BayesOptConfig {
1381            bounds: vec![(1.0, 0.0)],
1382            ..Default::default()
1383        };
1384        assert!(bayesian_optimize(objective, config).is_err());
1385
1386        // GP operations on unfitted model
1387        let gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
1388        assert!(gp.predict(&[0.0]).is_err());
1389        assert!(gp.negative_log_marginal_likelihood().is_err());
1390
1391        // Mismatched training data
1392        let mut gp2 = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
1393        assert!(gp2.fit(&[], &[]).is_err());
1394        assert!(gp2.fit(&[vec![1.0]], &[1.0, 2.0]).is_err());
1395    }
1396
1397    #[test]
1398    fn test_cholesky_correctness() {
1399        let a = vec![vec![4.0, 2.0], vec![2.0, 3.0]];
1400
1401        let l = cholesky_decomposition(&a).expect("Cholesky should succeed for PD matrix");
1402
1403        // Verify L * L^T = A
1404        let n = a.len();
1405        for i in 0..n {
1406            for j in 0..n {
1407                let mut sum = 0.0;
1408                for k in 0..n {
1409                    sum += l[i][k] * l[j][k];
1410                }
1411                assert_relative_eq!(sum, a[i][j], epsilon = 1e-10);
1412            }
1413        }
1414
1415        // Non-PD should fail
1416        let bad = vec![vec![1.0, 2.0], vec![2.0, 1.0]];
1417        assert!(cholesky_decomposition(&bad).is_err());
1418    }
1419
1420    #[test]
1421    fn test_erf_and_normal_cdf() {
1422        assert_relative_eq!(erf_approx(0.0), 0.0, epsilon = 1e-6);
1423        assert_relative_eq!(erf_approx(1.0), 0.842700793, epsilon = 1e-5);
1424        assert_relative_eq!(erf_approx(-1.0), -0.842700793, epsilon = 1e-5);
1425        assert_relative_eq!(erf_approx(2.0), 0.995322265, epsilon = 1e-5);
1426
1427        assert_relative_eq!(standard_normal_cdf(0.0), 0.5, epsilon = 1e-6);
1428        assert!(standard_normal_cdf(5.0) > 0.999);
1429        assert!(standard_normal_cdf(-5.0) < 0.001);
1430    }
1431
1432    #[test]
1433    fn test_result_struct_completeness() {
1434        let objective = |x: &[f64]| -> f64 { x[0] * x[0] };
1435
1436        let config = BayesOptConfig {
1437            bounds: vec![(0.0, 1.0)],
1438            n_initial: 3,
1439            n_iterations: 5,
1440            noise_variance: 1e-6,
1441            n_candidates: 100,
1442            optimize_hyperparams: false,
1443            ..Default::default()
1444        };
1445
1446        let result = bayesian_optimize(objective, config).expect("Should succeed");
1447
1448        // Verify all result fields
1449        assert_eq!(result.x_best.len(), 1);
1450        assert!(result.f_best.is_finite());
1451        assert_eq!(result.x_history.len(), 8); // 3 initial + 5 iterations
1452        assert_eq!(result.f_history.len(), 8);
1453        assert_eq!(result.n_evaluations, 8);
1454        // Model should be usable
1455        let (mu, var) = result.model.predict(&[0.5]).expect("Model should predict");
1456        assert!(mu.is_finite());
1457        assert!(var >= 0.0);
1458    }
1459
1460    #[test]
1461    fn test_latin_hypercube_sampling_coverage() {
1462        let mut rng = thread_rng();
1463        let bounds = vec![(0.0, 1.0), (0.0, 1.0)];
1464        let n_samples = 20;
1465
1466        let samples =
1467            latin_hypercube_sampling(n_samples, &bounds, &mut rng).expect("LHS should succeed");
1468
1469        assert_eq!(samples.len(), n_samples);
1470
1471        // All samples must be within bounds
1472        for sample in &samples {
1473            assert_eq!(sample.len(), 2);
1474            for (d, &val) in sample.iter().enumerate() {
1475                assert!(
1476                    val >= bounds[d].0 && val <= bounds[d].1,
1477                    "Sample dim {} value {} out of bounds [{}, {}]",
1478                    d,
1479                    val,
1480                    bounds[d].0,
1481                    bounds[d].1
1482                );
1483            }
1484        }
1485
1486        // Check LHS strata coverage
1487        for dim in 0..2 {
1488            let mut strata = vec![false; n_samples];
1489            for sample in &samples {
1490                let stratum = (sample[dim] * n_samples as f64) as usize;
1491                let stratum = stratum.min(n_samples - 1);
1492                strata[stratum] = true;
1493            }
1494            let covered = strata.iter().filter(|&&s| s).count();
1495            assert!(
1496                covered >= n_samples / 2,
1497                "LHS should cover most strata in dim {}: only {}/{}",
1498                dim,
1499                covered,
1500                n_samples
1501            );
1502        }
1503    }
1504
1505    #[test]
1506    fn test_xi_parameter_effect() {
1507        let ei_low_xi = compute_acquisition(
1508            &AcquisitionType::ExpectedImprovement,
1509            0.8,
1510            0.5,
1511            1.0,
1512            0.0,
1513            2.0,
1514        );
1515        let ei_high_xi = compute_acquisition(
1516            &AcquisitionType::ExpectedImprovement,
1517            0.8,
1518            0.5,
1519            1.0,
1520            0.5,
1521            2.0,
1522        );
1523        // Higher xi subtracts more from improvement => lower EI at same point
1524        assert!(
1525            ei_low_xi >= ei_high_xi,
1526            "Higher xi should reduce EI at the same point: low_xi={}, high_xi={}",
1527            ei_low_xi,
1528            ei_high_xi
1529        );
1530    }
1531}