1use crate::engine::rng::SimRng;
34use crate::error::{SimError, SimResult};
35use serde::{Deserialize, Serialize};
36
37#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
39pub enum AcquisitionFunction {
40 #[default]
42 ExpectedImprovement,
43 UCB { kappa: f64 },
45 ProbabilityOfImprovement,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct OptimizerConfig {
52 pub bounds: Vec<(f64, f64)>,
54 pub acquisition: AcquisitionFunction,
56 pub length_scale: f64,
58 pub signal_variance: f64,
60 pub noise_variance: f64,
62 pub n_candidates: usize,
64 pub seed: u64,
66}
67
68impl Default for OptimizerConfig {
69 fn default() -> Self {
70 Self {
71 bounds: vec![(-1.0, 1.0)],
72 acquisition: AcquisitionFunction::default(),
73 length_scale: 1.0,
74 signal_variance: 1.0,
75 noise_variance: 1e-6,
76 n_candidates: 1000,
77 seed: 42,
78 }
79 }
80}
81
82#[derive(Debug, Clone)]
86pub struct GaussianProcess {
87 x_train: Vec<Vec<f64>>,
89 y_train: Vec<f64>,
91 length_scale: f64,
93 signal_variance: f64,
95 noise_variance: f64,
97 k_inv_y: Option<Vec<f64>>,
99 l_matrix: Option<Vec<Vec<f64>>>,
101}
102
103impl GaussianProcess {
104 #[must_use]
106 pub fn new(length_scale: f64, signal_variance: f64, noise_variance: f64) -> Self {
107 Self {
108 x_train: Vec::new(),
109 y_train: Vec::new(),
110 length_scale,
111 signal_variance,
112 noise_variance,
113 k_inv_y: None,
114 l_matrix: None,
115 }
116 }
117
118 pub fn add_observation(&mut self, x: Vec<f64>, y: f64) {
120 self.x_train.push(x);
121 self.y_train.push(y);
122 self.k_inv_y = None;
124 self.l_matrix = None;
125 }
126
127 pub fn fit(&mut self) -> SimResult<()> {
133 if self.x_train.is_empty() {
134 return Ok(());
135 }
136
137 let n = self.x_train.len();
138
139 let mut k_matrix = vec![vec![0.0; n]; n];
141 for i in 0..n {
142 for j in 0..n {
143 k_matrix[i][j] = self.rbf_kernel(&self.x_train[i], &self.x_train[j]);
144 if i == j {
145 k_matrix[i][j] += self.noise_variance;
146 }
147 }
148 }
149
150 let l = Self::cholesky(&k_matrix)?;
152
153 let alpha = Self::solve_triangular(&l, &self.y_train, false);
155 let k_inv_y = Self::solve_triangular(&l, &alpha, true);
156
157 self.l_matrix = Some(l);
158 self.k_inv_y = Some(k_inv_y);
159
160 Ok(())
161 }
162
163 #[must_use]
165 pub fn predict(&self, x: &[f64]) -> (f64, f64) {
166 if self.x_train.is_empty() {
167 return (0.0, self.signal_variance);
168 }
169
170 let Some(k_inv_y) = &self.k_inv_y else {
171 return (0.0, self.signal_variance);
172 };
173
174 let Some(l) = &self.l_matrix else {
175 return (0.0, self.signal_variance);
176 };
177
178 let k_star: Vec<f64> = self
180 .x_train
181 .iter()
182 .map(|xi| self.rbf_kernel(xi, x))
183 .collect();
184
185 let mu: f64 = k_star.iter().zip(k_inv_y.iter()).map(|(k, a)| k * a).sum();
187
188 let k_xx = self.rbf_kernel(x, x);
190
191 let v = Self::solve_triangular(l, &k_star, false);
193 let variance = k_xx - v.iter().map(|vi| vi * vi).sum::<f64>();
194
195 (mu, variance.max(1e-10))
196 }
197
198 fn rbf_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
200 let sq_dist: f64 = x1.iter().zip(x2.iter()).map(|(a, b)| (a - b).powi(2)).sum();
201
202 self.signal_variance * (-sq_dist / (2.0 * self.length_scale.powi(2))).exp()
203 }
204
205 fn cholesky(matrix: &[Vec<f64>]) -> SimResult<Vec<Vec<f64>>> {
207 let n = matrix.len();
208 let mut l = vec![vec![0.0; n]; n];
209
210 for i in 0..n {
211 for j in 0..=i {
212 let mut sum = 0.0;
213 for k in 0..j {
214 sum += l[i][k] * l[j][k];
215 }
216
217 if i == j {
218 let val = matrix[i][i] - sum;
219 if val <= 0.0 {
220 return Err(SimError::optimization(
221 "Cholesky decomposition failed: matrix not positive definite",
222 ));
223 }
224 l[i][j] = val.sqrt();
225 } else {
226 l[i][j] = (matrix[i][j] - sum) / l[j][j];
227 }
228 }
229 }
230
231 Ok(l)
232 }
233
234 fn solve_triangular(l: &[Vec<f64>], b: &[f64], transpose: bool) -> Vec<f64> {
236 let n = b.len();
237 let mut x = vec![0.0; n];
238
239 if transpose {
240 for i in (0..n).rev() {
242 let mut sum = b[i];
243 for j in (i + 1)..n {
244 sum -= l[j][i] * x[j];
245 }
246 x[i] = sum / l[i][i];
247 }
248 } else {
249 for i in 0..n {
251 let mut sum = b[i];
252 for j in 0..i {
253 sum -= l[i][j] * x[j];
254 }
255 x[i] = sum / l[i][i];
256 }
257 }
258
259 x
260 }
261
262 #[must_use]
264 pub fn n_observations(&self) -> usize {
265 self.x_train.len()
266 }
267}
268
269#[derive(Debug)]
271pub struct BayesianOptimizer {
272 config: OptimizerConfig,
274 gp: GaussianProcess,
276 rng: SimRng,
278 best_y: Option<f64>,
280 best_x: Option<Vec<f64>>,
282}
283
284impl BayesianOptimizer {
285 #[must_use]
287 pub fn new(config: OptimizerConfig) -> Self {
288 let gp = GaussianProcess::new(
289 config.length_scale,
290 config.signal_variance,
291 config.noise_variance,
292 );
293 let rng = SimRng::new(config.seed);
294
295 Self {
296 config,
297 gp,
298 rng,
299 best_y: None,
300 best_x: None,
301 }
302 }
303
304 pub fn observe(&mut self, x: Vec<f64>, y: f64) -> SimResult<()> {
310 self.gp.add_observation(x.clone(), y);
311
312 if self.best_y.is_none() || y < self.best_y.unwrap_or(f64::INFINITY) {
314 self.best_y = Some(y);
315 self.best_x = Some(x);
316 }
317
318 self.gp.fit()
320 }
321
322 #[must_use]
326 pub fn suggest(&mut self) -> Vec<f64> {
327 if self.gp.n_observations() == 0 {
328 return self.random_point();
330 }
331
332 let mut best_acq = f64::NEG_INFINITY;
334 let mut best_candidate = self.random_point();
335
336 for _ in 0..self.config.n_candidates {
337 let candidate = self.random_point();
338 let acq_value = self.evaluate_acquisition(&candidate);
339
340 if acq_value > best_acq {
341 best_acq = acq_value;
342 best_candidate = candidate;
343 }
344 }
345
346 best_candidate
347 }
348
349 fn evaluate_acquisition(&self, x: &[f64]) -> f64 {
351 let (mu, variance) = self.gp.predict(x);
352 let sigma = variance.sqrt();
353
354 match self.config.acquisition {
355 AcquisitionFunction::ExpectedImprovement => self.expected_improvement(mu, sigma),
356 AcquisitionFunction::UCB { kappa } => Self::upper_confidence_bound(mu, sigma, kappa),
357 AcquisitionFunction::ProbabilityOfImprovement => {
358 self.probability_of_improvement(mu, sigma)
359 }
360 }
361 }
362
363 fn expected_improvement(&self, mu: f64, sigma: f64) -> f64 {
365 let best = self.best_y.unwrap_or(0.0);
366
367 if sigma < 1e-10 {
368 return 0.0;
369 }
370
371 let z = (best - mu) / sigma;
373 let pdf = Self::normal_pdf(z);
374 let cdf = Self::normal_cdf(z);
375
376 (sigma * (z * cdf + pdf)).max(0.0)
378 }
379
380 fn upper_confidence_bound(mu: f64, sigma: f64, kappa: f64) -> f64 {
382 -mu + kappa * sigma
384 }
385
386 fn probability_of_improvement(&self, mu: f64, sigma: f64) -> f64 {
388 let best = self.best_y.unwrap_or(0.0);
389
390 if sigma < 1e-10 {
391 return if mu < best { 1.0 } else { 0.0 };
392 }
393
394 let z = (best - mu) / sigma;
395 Self::normal_cdf(z)
396 }
397
398 fn normal_pdf(z: f64) -> f64 {
400 const INV_SQRT_2PI: f64 = 0.398_942_280_401_432_7; INV_SQRT_2PI * (-0.5 * z * z).exp()
402 }
403
404 fn normal_cdf(z: f64) -> f64 {
406 const A1: f64 = 0.254_829_592;
408 const A2: f64 = -0.284_496_736;
409 const A3: f64 = 1.421_413_741;
410 const A4: f64 = -1.453_152_027;
411 const A5: f64 = 1.061_405_429;
412 const P: f64 = 0.327_591_1;
413
414 let sign = if z < 0.0 { -1.0 } else { 1.0 };
415 let z_abs = z.abs();
416 let t = 1.0 / (1.0 + P * z_abs);
417 let y = 1.0
418 - (((((A5 * t + A4) * t) + A3) * t + A2) * t + A1) * t * (-z_abs * z_abs / 2.0).exp();
419
420 0.5 * (1.0 + sign * y)
421 }
422
423 fn random_point(&mut self) -> Vec<f64> {
425 self.config
426 .bounds
427 .iter()
428 .map(|(min, max)| self.rng.gen_range_f64(*min, *max))
429 .collect()
430 }
431
432 #[must_use]
434 pub fn best(&self) -> Option<(&[f64], f64)> {
435 match (&self.best_x, self.best_y) {
436 (Some(x), Some(y)) => Some((x.as_slice(), y)),
437 _ => None,
438 }
439 }
440
441 #[must_use]
443 pub fn n_observations(&self) -> usize {
444 self.gp.n_observations()
445 }
446
447 #[must_use]
449 pub const fn config(&self) -> &OptimizerConfig {
450 &self.config
451 }
452}
453
454#[derive(Debug, Clone, Serialize, Deserialize)]
456pub struct OptimizationResult {
457 pub best_x: Vec<f64>,
459 pub best_y: f64,
461 pub n_evaluations: usize,
463 pub history: Vec<(Vec<f64>, f64)>,
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 #[test]
472 fn test_gp_create() {
473 let gp = GaussianProcess::new(1.0, 1.0, 1e-6);
474 assert_eq!(gp.n_observations(), 0);
475 }
476
477 #[test]
478 fn test_gp_add_observation() {
479 let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
480 gp.add_observation(vec![0.0], 1.0);
481 gp.add_observation(vec![1.0], 2.0);
482 assert_eq!(gp.n_observations(), 2);
483 }
484
485 #[test]
486 fn test_gp_fit() {
487 let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
488 gp.add_observation(vec![0.0], 0.0);
489 gp.add_observation(vec![1.0], 1.0);
490 gp.add_observation(vec![2.0], 4.0);
491
492 assert!(gp.fit().is_ok());
493 }
494
495 #[test]
496 fn test_gp_predict_empty() {
497 let gp = GaussianProcess::new(1.0, 1.0, 1e-6);
498 let (mu, var) = gp.predict(&[0.5]);
499
500 assert!((mu - 0.0).abs() < f64::EPSILON);
501 assert!((var - 1.0).abs() < f64::EPSILON); }
503
504 #[test]
505 fn test_gp_predict_interpolation() {
506 let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
507 gp.add_observation(vec![0.0], 0.0);
508 gp.add_observation(vec![1.0], 1.0);
509 gp.fit().ok();
510
511 let (mu0, _) = gp.predict(&[0.0]);
513 let (mu1, _) = gp.predict(&[1.0]);
514
515 assert!((mu0 - 0.0).abs() < 0.1);
516 assert!((mu1 - 1.0).abs() < 0.1);
517 }
518
519 #[test]
520 fn test_gp_variance_decreases_near_data() {
521 let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
522 gp.add_observation(vec![0.0], 0.0);
523 gp.fit().ok();
524
525 let (_, var_near) = gp.predict(&[0.0]);
526 let (_, var_far) = gp.predict(&[5.0]);
527
528 assert!(
529 var_near < var_far,
530 "Variance should be lower near observations"
531 );
532 }
533
534 #[test]
535 fn test_optimizer_create() {
536 let config = OptimizerConfig::default();
537 let optimizer = BayesianOptimizer::new(config);
538 assert_eq!(optimizer.n_observations(), 0);
539 }
540
541 #[test]
542 fn test_optimizer_observe() {
543 let config = OptimizerConfig {
544 bounds: vec![(-5.0, 5.0)],
545 ..Default::default()
546 };
547 let mut optimizer = BayesianOptimizer::new(config);
548
549 optimizer.observe(vec![0.0], 1.0).ok();
550 optimizer.observe(vec![1.0], 0.5).ok();
551
552 assert_eq!(optimizer.n_observations(), 2);
553
554 let (best_x, best_y) = optimizer.best().expect("Should have best");
555 assert!((best_y - 0.5).abs() < f64::EPSILON);
556 assert!((best_x[0] - 1.0).abs() < f64::EPSILON);
557 }
558
559 #[test]
560 fn test_optimizer_suggest_empty() {
561 let config = OptimizerConfig {
562 bounds: vec![(-5.0, 5.0)],
563 ..Default::default()
564 };
565 let mut optimizer = BayesianOptimizer::new(config);
566
567 let suggestion = optimizer.suggest();
569 assert_eq!(suggestion.len(), 1);
570 assert!(suggestion[0] >= -5.0 && suggestion[0] <= 5.0);
571 }
572
573 #[test]
574 fn test_optimizer_suggest_with_observations() {
575 let config = OptimizerConfig {
576 bounds: vec![(-5.0, 5.0)],
577 n_candidates: 100,
578 ..Default::default()
579 };
580 let mut optimizer = BayesianOptimizer::new(config);
581
582 optimizer.observe(vec![0.0], 1.0).ok();
583 optimizer.observe(vec![1.0], 0.5).ok();
584 optimizer.observe(vec![-1.0], 1.5).ok();
585
586 let suggestion = optimizer.suggest();
587 assert_eq!(suggestion.len(), 1);
588 assert!(suggestion[0] >= -5.0 && suggestion[0] <= 5.0);
589 }
590
591 #[test]
592 fn test_optimizer_multidimensional() {
593 let config = OptimizerConfig {
594 bounds: vec![(-5.0, 5.0), (-5.0, 5.0)],
595 n_candidates: 100,
596 ..Default::default()
597 };
598 let mut optimizer = BayesianOptimizer::new(config);
599
600 optimizer.observe(vec![0.0, 0.0], 1.0).ok();
601 optimizer.observe(vec![1.0, 1.0], 0.5).ok();
602
603 let suggestion = optimizer.suggest();
604 assert_eq!(suggestion.len(), 2);
605 }
606
607 #[test]
608 fn test_acquisition_ei() {
609 let config = OptimizerConfig {
610 bounds: vec![(-5.0, 5.0)],
611 acquisition: AcquisitionFunction::ExpectedImprovement,
612 ..Default::default()
613 };
614 let optimizer = BayesianOptimizer::new(config);
615
616 let ei = optimizer.expected_improvement(0.0, 1.0);
618 assert!(ei > 0.0);
619
620 let ei_zero_var = optimizer.expected_improvement(0.0, 0.0);
622 assert!((ei_zero_var - 0.0).abs() < f64::EPSILON);
623 }
624
625 #[test]
626 fn test_acquisition_ucb() {
627 let ucb = BayesianOptimizer::upper_confidence_bound(1.0, 0.5, 2.0);
629 let expected = -1.0 + 2.0 * 0.5;
630 assert!((ucb - expected).abs() < f64::EPSILON);
631 }
632
633 #[test]
634 fn test_acquisition_pi() {
635 let config = OptimizerConfig {
636 bounds: vec![(-5.0, 5.0)],
637 acquisition: AcquisitionFunction::ProbabilityOfImprovement,
638 ..Default::default()
639 };
640 let mut optimizer = BayesianOptimizer::new(config);
641 optimizer.best_y = Some(1.0);
642
643 let pi = optimizer.probability_of_improvement(1.0, 1.0);
645 assert!((pi - 0.5).abs() < 0.01);
646
647 let pi_good = optimizer.probability_of_improvement(-1.0, 1.0);
649 assert!(pi_good > 0.9);
650
651 let pi_bad = optimizer.probability_of_improvement(3.0, 1.0);
653 assert!(pi_bad < 0.1);
654 }
655
656 #[test]
657 fn test_normal_pdf() {
658 let pdf_0 = BayesianOptimizer::normal_pdf(0.0);
660 assert!((pdf_0 - 0.3989).abs() < 0.01);
661 }
662
663 #[test]
664 fn test_normal_cdf() {
665 let cdf_0 = BayesianOptimizer::normal_cdf(0.0);
667 assert!((cdf_0 - 0.5).abs() < 0.01);
668
669 let cdf_neg3 = BayesianOptimizer::normal_cdf(-3.0);
671 assert!(cdf_neg3 < 0.01);
672
673 let cdf_pos3 = BayesianOptimizer::normal_cdf(3.0);
675 assert!(cdf_pos3 > 0.99);
676 }
677
678 #[test]
679 fn test_cholesky() {
680 let matrix = vec![vec![4.0, 2.0], vec![2.0, 5.0]];
682
683 let l = GaussianProcess::cholesky(&matrix).expect("Should succeed");
684
685 let reconstructed_00 = l[0][0] * l[0][0];
687 let reconstructed_01 = l[1][0] * l[0][0];
688 let reconstructed_11 = l[1][0] * l[1][0] + l[1][1] * l[1][1];
689
690 assert!((reconstructed_00 - 4.0).abs() < 1e-10);
691 assert!((reconstructed_01 - 2.0).abs() < 1e-10);
692 assert!((reconstructed_11 - 5.0).abs() < 1e-10);
693 }
694
695 #[test]
696 fn test_optimizer_finds_minimum() {
697 let config = OptimizerConfig {
699 bounds: vec![(-5.0, 5.0)],
700 n_candidates: 500,
701 seed: 42,
702 ..Default::default()
703 };
704 let mut optimizer = BayesianOptimizer::new(config);
705
706 for x in [-4.0_f64, -2.0, 0.0, 2.0, 4.0] {
708 let y = (x - 2.0).powi(2);
709 optimizer.observe(vec![x], y).ok();
710 }
711
712 for _ in 0..20 {
714 let suggestion = optimizer.suggest();
715 let y = (suggestion[0] - 2.0).powi(2);
716 optimizer.observe(suggestion, y).ok();
717 }
718
719 let (best_x, best_y) = optimizer.best().expect("Should have best");
720 assert!(best_y < 0.1, "Should find near-minimum, got y={}", best_y);
721 assert!(
722 (best_x[0] - 2.0).abs() < 0.5,
723 "Should find x near 2, got x={}",
724 best_x[0]
725 );
726 }
727}
728
729#[cfg(test)]
730mod proptests {
731 use super::*;
732 use proptest::prelude::*;
733
734 proptest! {
735 #[test]
737 fn prop_gp_variance_nonnegative(
738 x in -10.0f64..10.0,
739 obs_x in prop::collection::vec(-10.0f64..10.0, 1..5),
740 obs_y in prop::collection::vec(-10.0f64..10.0, 1..5),
741 ) {
742 if obs_x.len() != obs_y.len() {
743 return Ok(());
744 }
745
746 let mut gp = GaussianProcess::new(1.0, 1.0, 1e-4);
747 for (xi, yi) in obs_x.iter().zip(obs_y.iter()) {
748 gp.add_observation(vec![*xi], *yi);
749 }
750 gp.fit().ok();
751
752 let (_, var) = gp.predict(&[x]);
753 prop_assert!(var >= 0.0, "Variance must be non-negative");
754 }
755
756 #[test]
758 fn prop_suggest_within_bounds(
759 min in -100.0f64..0.0,
760 max in 0.0f64..100.0,
761 seed in 0u64..10000,
762 ) {
763 let config = OptimizerConfig {
764 bounds: vec![(min, max)],
765 seed,
766 n_candidates: 10,
767 ..Default::default()
768 };
769 let mut optimizer = BayesianOptimizer::new(config);
770
771 optimizer.observe(vec![(min + max) / 2.0], 1.0).ok();
773
774 let suggestion = optimizer.suggest();
775 prop_assert!(suggestion[0] >= min && suggestion[0] <= max);
776 }
777
778 #[test]
780 fn prop_normal_cdf_monotonic(z1 in -5.0f64..5.0, z2 in -5.0f64..5.0) {
781 let cdf1 = BayesianOptimizer::normal_cdf(z1);
782 let cdf2 = BayesianOptimizer::normal_cdf(z2);
783
784 if z1 < z2 {
785 prop_assert!(cdf1 <= cdf2 + 1e-10, "CDF should be monotonic");
786 }
787 }
788
789 #[test]
791 fn prop_ei_nonnegative(mu in -10.0f64..10.0, sigma in 0.01f64..10.0) {
792 let config = OptimizerConfig::default();
793 let mut optimizer = BayesianOptimizer::new(config);
794 optimizer.best_y = Some(0.0);
795
796 let ei = optimizer.expected_improvement(mu, sigma);
797 prop_assert!(ei >= 0.0, "EI must be non-negative");
798 }
799 }
800}