use crate::engine::rng::SimRng;
use crate::error::{SimError, SimResult};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
pub enum AcquisitionFunction {
#[default]
ExpectedImprovement,
UCB { kappa: f64 },
ProbabilityOfImprovement,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizerConfig {
pub bounds: Vec<(f64, f64)>,
pub acquisition: AcquisitionFunction,
pub length_scale: f64,
pub signal_variance: f64,
pub noise_variance: f64,
pub n_candidates: usize,
pub seed: u64,
}
impl Default for OptimizerConfig {
fn default() -> Self {
Self {
bounds: vec![(-1.0, 1.0)],
acquisition: AcquisitionFunction::default(),
length_scale: 1.0,
signal_variance: 1.0,
noise_variance: 1e-6,
n_candidates: 1000,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
pub struct GaussianProcess {
x_train: Vec<Vec<f64>>,
y_train: Vec<f64>,
length_scale: f64,
signal_variance: f64,
noise_variance: f64,
k_inv_y: Option<Vec<f64>>,
l_matrix: Option<Vec<Vec<f64>>>,
}
impl GaussianProcess {
#[must_use]
pub fn new(length_scale: f64, signal_variance: f64, noise_variance: f64) -> Self {
Self {
x_train: Vec::new(),
y_train: Vec::new(),
length_scale,
signal_variance,
noise_variance,
k_inv_y: None,
l_matrix: None,
}
}
pub fn add_observation(&mut self, x: Vec<f64>, y: f64) {
self.x_train.push(x);
self.y_train.push(y);
self.k_inv_y = None;
self.l_matrix = None;
}
pub fn fit(&mut self) -> SimResult<()> {
if self.x_train.is_empty() {
return Ok(());
}
let n = self.x_train.len();
let mut k_matrix = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..n {
k_matrix[i][j] = self.rbf_kernel(&self.x_train[i], &self.x_train[j]);
if i == j {
k_matrix[i][j] += self.noise_variance;
}
}
}
let l = Self::cholesky(&k_matrix)?;
let alpha = Self::solve_triangular(&l, &self.y_train, false);
let k_inv_y = Self::solve_triangular(&l, &alpha, true);
self.l_matrix = Some(l);
self.k_inv_y = Some(k_inv_y);
Ok(())
}
#[must_use]
pub fn predict(&self, x: &[f64]) -> (f64, f64) {
if self.x_train.is_empty() {
return (0.0, self.signal_variance);
}
let Some(k_inv_y) = &self.k_inv_y else {
return (0.0, self.signal_variance);
};
let Some(l) = &self.l_matrix else {
return (0.0, self.signal_variance);
};
let k_star: Vec<f64> = self
.x_train
.iter()
.map(|xi| self.rbf_kernel(xi, x))
.collect();
let mu: f64 = k_star.iter().zip(k_inv_y.iter()).map(|(k, a)| k * a).sum();
let k_xx = self.rbf_kernel(x, x);
let v = Self::solve_triangular(l, &k_star, false);
let variance = k_xx - v.iter().map(|vi| vi * vi).sum::<f64>();
(mu, variance.max(1e-10))
}
fn rbf_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
let sq_dist: f64 = x1.iter().zip(x2.iter()).map(|(a, b)| (a - b).powi(2)).sum();
self.signal_variance * (-sq_dist / (2.0 * self.length_scale.powi(2))).exp()
}
fn cholesky(matrix: &[Vec<f64>]) -> SimResult<Vec<Vec<f64>>> {
let n = matrix.len();
let mut l = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0;
for k in 0..j {
sum += l[i][k] * l[j][k];
}
if i == j {
let val = matrix[i][i] - sum;
if val <= 0.0 {
return Err(SimError::optimization(
"Cholesky decomposition failed: matrix not positive definite",
));
}
l[i][j] = val.sqrt();
} else {
l[i][j] = (matrix[i][j] - sum) / l[j][j];
}
}
}
Ok(l)
}
fn solve_triangular(l: &[Vec<f64>], b: &[f64], transpose: bool) -> Vec<f64> {
let n = b.len();
let mut x = vec![0.0; n];
if transpose {
for i in (0..n).rev() {
let mut sum = b[i];
for j in (i + 1)..n {
sum -= l[j][i] * x[j];
}
x[i] = sum / l[i][i];
}
} else {
for i in 0..n {
let mut sum = b[i];
for j in 0..i {
sum -= l[i][j] * x[j];
}
x[i] = sum / l[i][i];
}
}
x
}
#[must_use]
pub fn n_observations(&self) -> usize {
self.x_train.len()
}
}
#[derive(Debug)]
pub struct BayesianOptimizer {
config: OptimizerConfig,
gp: GaussianProcess,
rng: SimRng,
best_y: Option<f64>,
best_x: Option<Vec<f64>>,
}
impl BayesianOptimizer {
#[must_use]
pub fn new(config: OptimizerConfig) -> Self {
let gp = GaussianProcess::new(
config.length_scale,
config.signal_variance,
config.noise_variance,
);
let rng = SimRng::new(config.seed);
Self {
config,
gp,
rng,
best_y: None,
best_x: None,
}
}
pub fn observe(&mut self, x: Vec<f64>, y: f64) -> SimResult<()> {
self.gp.add_observation(x.clone(), y);
if self.best_y.is_none() || y < self.best_y.unwrap_or(f64::INFINITY) {
self.best_y = Some(y);
self.best_x = Some(x);
}
self.gp.fit()
}
#[must_use]
pub fn suggest(&mut self) -> Vec<f64> {
if self.gp.n_observations() == 0 {
return self.random_point();
}
let mut best_acq = f64::NEG_INFINITY;
let mut best_candidate = self.random_point();
for _ in 0..self.config.n_candidates {
let candidate = self.random_point();
let acq_value = self.evaluate_acquisition(&candidate);
if acq_value > best_acq {
best_acq = acq_value;
best_candidate = candidate;
}
}
best_candidate
}
fn evaluate_acquisition(&self, x: &[f64]) -> f64 {
let (mu, variance) = self.gp.predict(x);
let sigma = variance.sqrt();
match self.config.acquisition {
AcquisitionFunction::ExpectedImprovement => self.expected_improvement(mu, sigma),
AcquisitionFunction::UCB { kappa } => Self::upper_confidence_bound(mu, sigma, kappa),
AcquisitionFunction::ProbabilityOfImprovement => {
self.probability_of_improvement(mu, sigma)
}
}
}
fn expected_improvement(&self, mu: f64, sigma: f64) -> f64 {
let best = self.best_y.unwrap_or(0.0);
if sigma < 1e-10 {
return 0.0;
}
let z = (best - mu) / sigma;
let pdf = Self::normal_pdf(z);
let cdf = Self::normal_cdf(z);
(sigma * (z * cdf + pdf)).max(0.0)
}
fn upper_confidence_bound(mu: f64, sigma: f64, kappa: f64) -> f64 {
-mu + kappa * sigma
}
fn probability_of_improvement(&self, mu: f64, sigma: f64) -> f64 {
let best = self.best_y.unwrap_or(0.0);
if sigma < 1e-10 {
return if mu < best { 1.0 } else { 0.0 };
}
let z = (best - mu) / sigma;
Self::normal_cdf(z)
}
fn normal_pdf(z: f64) -> f64 {
const INV_SQRT_2PI: f64 = 0.398_942_280_401_432_7; INV_SQRT_2PI * (-0.5 * z * z).exp()
}
fn normal_cdf(z: f64) -> f64 {
const A1: f64 = 0.254_829_592;
const A2: f64 = -0.284_496_736;
const A3: f64 = 1.421_413_741;
const A4: f64 = -1.453_152_027;
const A5: f64 = 1.061_405_429;
const P: f64 = 0.327_591_1;
let sign = if z < 0.0 { -1.0 } else { 1.0 };
let z_abs = z.abs();
let t = 1.0 / (1.0 + P * z_abs);
let y = 1.0
- (((((A5 * t + A4) * t) + A3) * t + A2) * t + A1) * t * (-z_abs * z_abs / 2.0).exp();
0.5 * (1.0 + sign * y)
}
fn random_point(&mut self) -> Vec<f64> {
self.config
.bounds
.iter()
.map(|(min, max)| self.rng.gen_range_f64(*min, *max))
.collect()
}
#[must_use]
pub fn best(&self) -> Option<(&[f64], f64)> {
match (&self.best_x, self.best_y) {
(Some(x), Some(y)) => Some((x.as_slice(), y)),
_ => None,
}
}
#[must_use]
pub fn n_observations(&self) -> usize {
self.gp.n_observations()
}
#[must_use]
pub const fn config(&self) -> &OptimizerConfig {
&self.config
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizationResult {
pub best_x: Vec<f64>,
pub best_y: f64,
pub n_evaluations: usize,
pub history: Vec<(Vec<f64>, f64)>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gp_create() {
let gp = GaussianProcess::new(1.0, 1.0, 1e-6);
assert_eq!(gp.n_observations(), 0);
}
#[test]
fn test_gp_add_observation() {
let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
gp.add_observation(vec![0.0], 1.0);
gp.add_observation(vec![1.0], 2.0);
assert_eq!(gp.n_observations(), 2);
}
#[test]
fn test_gp_fit() {
let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
gp.add_observation(vec![0.0], 0.0);
gp.add_observation(vec![1.0], 1.0);
gp.add_observation(vec![2.0], 4.0);
assert!(gp.fit().is_ok());
}
#[test]
fn test_gp_predict_empty() {
let gp = GaussianProcess::new(1.0, 1.0, 1e-6);
let (mu, var) = gp.predict(&[0.5]);
assert!((mu - 0.0).abs() < f64::EPSILON);
assert!((var - 1.0).abs() < f64::EPSILON); }
#[test]
fn test_gp_predict_interpolation() {
let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
gp.add_observation(vec![0.0], 0.0);
gp.add_observation(vec![1.0], 1.0);
gp.fit().ok();
let (mu0, _) = gp.predict(&[0.0]);
let (mu1, _) = gp.predict(&[1.0]);
assert!((mu0 - 0.0).abs() < 0.1);
assert!((mu1 - 1.0).abs() < 0.1);
}
#[test]
fn test_gp_variance_decreases_near_data() {
let mut gp = GaussianProcess::new(1.0, 1.0, 1e-6);
gp.add_observation(vec![0.0], 0.0);
gp.fit().ok();
let (_, var_near) = gp.predict(&[0.0]);
let (_, var_far) = gp.predict(&[5.0]);
assert!(
var_near < var_far,
"Variance should be lower near observations"
);
}
#[test]
fn test_optimizer_create() {
let config = OptimizerConfig::default();
let optimizer = BayesianOptimizer::new(config);
assert_eq!(optimizer.n_observations(), 0);
}
#[test]
fn test_optimizer_observe() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
optimizer.observe(vec![0.0], 1.0).ok();
optimizer.observe(vec![1.0], 0.5).ok();
assert_eq!(optimizer.n_observations(), 2);
let (best_x, best_y) = optimizer.best().expect("Should have best");
assert!((best_y - 0.5).abs() < f64::EPSILON);
assert!((best_x[0] - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_optimizer_suggest_empty() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
let suggestion = optimizer.suggest();
assert_eq!(suggestion.len(), 1);
assert!(suggestion[0] >= -5.0 && suggestion[0] <= 5.0);
}
#[test]
fn test_optimizer_suggest_with_observations() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
n_candidates: 100,
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
optimizer.observe(vec![0.0], 1.0).ok();
optimizer.observe(vec![1.0], 0.5).ok();
optimizer.observe(vec![-1.0], 1.5).ok();
let suggestion = optimizer.suggest();
assert_eq!(suggestion.len(), 1);
assert!(suggestion[0] >= -5.0 && suggestion[0] <= 5.0);
}
#[test]
fn test_optimizer_multidimensional() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0), (-5.0, 5.0)],
n_candidates: 100,
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
optimizer.observe(vec![0.0, 0.0], 1.0).ok();
optimizer.observe(vec![1.0, 1.0], 0.5).ok();
let suggestion = optimizer.suggest();
assert_eq!(suggestion.len(), 2);
}
#[test]
fn test_acquisition_ei() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
acquisition: AcquisitionFunction::ExpectedImprovement,
..Default::default()
};
let optimizer = BayesianOptimizer::new(config);
let ei = optimizer.expected_improvement(0.0, 1.0);
assert!(ei > 0.0);
let ei_zero_var = optimizer.expected_improvement(0.0, 0.0);
assert!((ei_zero_var - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_acquisition_ucb() {
let ucb = BayesianOptimizer::upper_confidence_bound(1.0, 0.5, 2.0);
let expected = -1.0 + 2.0 * 0.5;
assert!((ucb - expected).abs() < f64::EPSILON);
}
#[test]
fn test_acquisition_pi() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
acquisition: AcquisitionFunction::ProbabilityOfImprovement,
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
optimizer.best_y = Some(1.0);
let pi = optimizer.probability_of_improvement(1.0, 1.0);
assert!((pi - 0.5).abs() < 0.01);
let pi_good = optimizer.probability_of_improvement(-1.0, 1.0);
assert!(pi_good > 0.9);
let pi_bad = optimizer.probability_of_improvement(3.0, 1.0);
assert!(pi_bad < 0.1);
}
#[test]
fn test_normal_pdf() {
let pdf_0 = BayesianOptimizer::normal_pdf(0.0);
assert!((pdf_0 - 0.3989).abs() < 0.01);
}
#[test]
fn test_normal_cdf() {
let cdf_0 = BayesianOptimizer::normal_cdf(0.0);
assert!((cdf_0 - 0.5).abs() < 0.01);
let cdf_neg3 = BayesianOptimizer::normal_cdf(-3.0);
assert!(cdf_neg3 < 0.01);
let cdf_pos3 = BayesianOptimizer::normal_cdf(3.0);
assert!(cdf_pos3 > 0.99);
}
#[test]
fn test_cholesky() {
let matrix = vec![vec![4.0, 2.0], vec![2.0, 5.0]];
let l = GaussianProcess::cholesky(&matrix).expect("Should succeed");
let reconstructed_00 = l[0][0] * l[0][0];
let reconstructed_01 = l[1][0] * l[0][0];
let reconstructed_11 = l[1][0] * l[1][0] + l[1][1] * l[1][1];
assert!((reconstructed_00 - 4.0).abs() < 1e-10);
assert!((reconstructed_01 - 2.0).abs() < 1e-10);
assert!((reconstructed_11 - 5.0).abs() < 1e-10);
}
#[test]
fn test_optimizer_finds_minimum() {
let config = OptimizerConfig {
bounds: vec![(-5.0, 5.0)],
n_candidates: 500,
seed: 42,
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
for x in [-4.0_f64, -2.0, 0.0, 2.0, 4.0] {
let y = (x - 2.0).powi(2);
optimizer.observe(vec![x], y).ok();
}
for _ in 0..20 {
let suggestion = optimizer.suggest();
let y = (suggestion[0] - 2.0).powi(2);
optimizer.observe(suggestion, y).ok();
}
let (best_x, best_y) = optimizer.best().expect("Should have best");
assert!(best_y < 0.1, "Should find near-minimum, got y={}", best_y);
assert!(
(best_x[0] - 2.0).abs() < 0.5,
"Should find x near 2, got x={}",
best_x[0]
);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn prop_gp_variance_nonnegative(
x in -10.0f64..10.0,
obs_x in prop::collection::vec(-10.0f64..10.0, 1..5),
obs_y in prop::collection::vec(-10.0f64..10.0, 1..5),
) {
if obs_x.len() != obs_y.len() {
return Ok(());
}
let mut gp = GaussianProcess::new(1.0, 1.0, 1e-4);
for (xi, yi) in obs_x.iter().zip(obs_y.iter()) {
gp.add_observation(vec![*xi], *yi);
}
gp.fit().ok();
let (_, var) = gp.predict(&[x]);
prop_assert!(var >= 0.0, "Variance must be non-negative");
}
#[test]
fn prop_suggest_within_bounds(
min in -100.0f64..0.0,
max in 0.0f64..100.0,
seed in 0u64..10000,
) {
let config = OptimizerConfig {
bounds: vec![(min, max)],
seed,
n_candidates: 10,
..Default::default()
};
let mut optimizer = BayesianOptimizer::new(config);
optimizer.observe(vec![(min + max) / 2.0], 1.0).ok();
let suggestion = optimizer.suggest();
prop_assert!(suggestion[0] >= min && suggestion[0] <= max);
}
#[test]
fn prop_normal_cdf_monotonic(z1 in -5.0f64..5.0, z2 in -5.0f64..5.0) {
let cdf1 = BayesianOptimizer::normal_cdf(z1);
let cdf2 = BayesianOptimizer::normal_cdf(z2);
if z1 < z2 {
prop_assert!(cdf1 <= cdf2 + 1e-10, "CDF should be monotonic");
}
}
#[test]
fn prop_ei_nonnegative(mu in -10.0f64..10.0, sigma in 0.01f64..10.0) {
let config = OptimizerConfig::default();
let mut optimizer = BayesianOptimizer::new(config);
optimizer.best_y = Some(0.0);
let ei = optimizer.expected_improvement(mu, sigma);
prop_assert!(ei >= 0.0, "EI must be non-negative");
}
}
}