use rand::Rng as _;
use crate::core::candidate::Candidate;
use crate::core::evaluation::Evaluation;
use crate::core::objective::Direction;
use crate::core::population::Population;
use crate::core::problem::Problem;
use crate::core::result::OptimizationResult;
use crate::core::rng::{Rng, rng_from_seed};
use crate::internal::cholesky::{cholesky, solve};
use crate::operators::real::RealBounds;
use crate::traits::Optimizer;
#[derive(Debug, Clone)]
pub struct BayesianOptConfig {
pub initial_samples: usize,
pub iterations: usize,
pub length_scales: Option<Vec<f64>>,
pub signal_variance: f64,
pub noise_variance: f64,
pub acquisition_samples: usize,
pub seed: u64,
}
impl Default for BayesianOptConfig {
fn default() -> Self {
Self {
initial_samples: 10,
iterations: 40,
length_scales: None,
signal_variance: 1.0,
noise_variance: 1e-6,
acquisition_samples: 1_000,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
pub struct BayesianOpt {
pub config: BayesianOptConfig,
pub bounds: RealBounds,
}
impl BayesianOpt {
pub fn new(config: BayesianOptConfig, bounds: RealBounds) -> Self {
Self { config, bounds }
}
}
impl<P> Optimizer<P> for BayesianOpt
where
P: Problem<Decision = Vec<f64>> + Sync,
{
fn run(&mut self, problem: &P) -> OptimizationResult<P::Decision> {
assert!(
self.config.initial_samples >= 2,
"BayesianOpt initial_samples must be >= 2",
);
assert!(
self.config.signal_variance > 0.0,
"BayesianOpt signal_variance must be > 0"
);
assert!(
self.config.noise_variance > 0.0,
"BayesianOpt noise_variance must be > 0"
);
assert!(
self.config.acquisition_samples >= 1,
"BayesianOpt acquisition_samples must be >= 1",
);
let objectives = problem.objectives();
assert!(
objectives.is_single_objective(),
"BayesianOpt requires exactly one objective",
);
let direction = objectives.objectives[0].direction;
let dim = self.bounds.bounds.len();
if let Some(ls) = &self.config.length_scales {
assert_eq!(
ls.len(),
dim,
"BayesianOpt length_scales.len() must equal dim"
);
}
let length_scales: Vec<f64> = self.config.length_scales.clone().unwrap_or_else(|| {
self.bounds
.bounds
.iter()
.map(|&(lo, hi)| 0.2 * (hi - lo).max(1e-9))
.collect()
});
let mut rng = rng_from_seed(self.config.seed);
let mut decisions: Vec<Vec<f64>> =
Vec::with_capacity(self.config.initial_samples + self.config.iterations);
let mut targets: Vec<f64> = Vec::with_capacity(decisions.capacity());
let mut evaluations = Vec::with_capacity(decisions.capacity());
for _ in 0..self.config.initial_samples {
let x = sample_uniform_in_bounds(&self.bounds, &mut rng);
let e = problem.evaluate(&x);
let t = oriented_target(&e, direction);
decisions.push(x);
targets.push(t);
evaluations.push(e);
}
for _ in 0..self.config.iterations {
let posterior = match GpPosterior::fit(
&decisions,
&targets,
&length_scales,
self.config.signal_variance,
self.config.noise_variance,
) {
Ok(p) => p,
Err(_) => {
let x = sample_uniform_in_bounds(&self.bounds, &mut rng);
let e = problem.evaluate(&x);
targets.push(oriented_target(&e, direction));
decisions.push(x);
evaluations.push(e);
continue;
}
};
let best_target = targets.iter().cloned().fold(f64::INFINITY, f64::min);
let mut best_x = sample_uniform_in_bounds(&self.bounds, &mut rng);
let mut best_ei = -f64::INFINITY;
let mut cand: Vec<f64> = Vec::with_capacity(dim);
let mut k_star_buf: Vec<f64> = Vec::new();
let mut v_temp_buf: Vec<f64> = Vec::new();
for _ in 0..self.config.acquisition_samples {
sample_uniform_in_bounds_into(&self.bounds, &mut rng, &mut cand);
let (mu, sigma) = posterior.predict_into(&cand, &mut k_star_buf, &mut v_temp_buf);
let ei = expected_improvement(mu, sigma, best_target);
if ei > best_ei {
best_ei = ei;
best_x.clear();
best_x.extend_from_slice(&cand);
}
}
let e = problem.evaluate(&best_x);
targets.push(oriented_target(&e, direction));
decisions.push(best_x);
evaluations.push(e);
}
let final_pop: Vec<Candidate<Vec<f64>>> = decisions
.into_iter()
.zip(evaluations)
.map(|(d, e)| Candidate::new(d, e))
.collect();
let mut best_idx = 0;
for i in 1..final_pop.len() {
if better(
&final_pop[i].evaluation,
&final_pop[best_idx].evaluation,
direction,
) {
best_idx = i;
}
}
let total_evaluations = final_pop.len();
let best = final_pop[best_idx].clone();
let front = vec![best.clone()];
OptimizationResult::new(
Population::new(final_pop),
front,
Some(best),
total_evaluations,
self.config.iterations + self.config.initial_samples,
)
}
}
fn oriented_target(e: &Evaluation, direction: Direction) -> f64 {
let base = match direction {
Direction::Minimize => e.objectives[0],
Direction::Maximize => -e.objectives[0],
};
if e.is_feasible() {
base
} else {
base + 1e6 * e.constraint_violation
}
}
fn better(a: &Evaluation, b: &Evaluation, direction: Direction) -> bool {
match (a.is_feasible(), b.is_feasible()) {
(true, false) => true,
(false, true) => false,
(false, false) => a.constraint_violation < b.constraint_violation,
(true, true) => match direction {
Direction::Minimize => a.objectives[0] < b.objectives[0],
Direction::Maximize => a.objectives[0] > b.objectives[0],
},
}
}
fn sample_uniform_in_bounds_into(bounds: &RealBounds, rng: &mut Rng, out: &mut Vec<f64>) {
out.clear();
for &(lo, hi) in &bounds.bounds {
let v = if lo == hi {
lo
} else {
lo + (hi - lo) * rng.random::<f64>()
};
out.push(v);
}
}
fn sample_uniform_in_bounds(bounds: &RealBounds, rng: &mut Rng) -> Vec<f64> {
let mut out = Vec::new();
sample_uniform_in_bounds_into(bounds, rng, &mut out);
out
}
fn rbf_kernel(x: &[f64], y: &[f64], length_scales: &[f64], signal_variance: f64) -> f64 {
let mut sum = 0.0;
for ((a, b), l) in x.iter().zip(y.iter()).zip(length_scales.iter()) {
let d = (a - b) / l.max(1e-12);
sum += d * d;
}
signal_variance * (-0.5 * sum).exp()
}
struct GpPosterior {
decisions: Vec<Vec<f64>>,
length_scales: Vec<f64>,
signal_variance: f64,
alpha: Vec<f64>,
chol_l: Vec<Vec<f64>>,
}
impl GpPosterior {
fn fit(
decisions: &[Vec<f64>],
targets: &[f64],
length_scales: &[f64],
signal_variance: f64,
noise_variance: f64,
) -> Result<Self, &'static str> {
let n = decisions.len();
let mut k = vec![vec![0.0_f64; n]; n];
for i in 0..n {
for j in 0..=i {
let v = rbf_kernel(&decisions[i], &decisions[j], length_scales, signal_variance);
k[i][j] = v;
k[j][i] = v;
}
k[i][i] += noise_variance;
}
let chol_l = cholesky(&k)?;
let alpha = solve(&chol_l, targets);
Ok(Self {
decisions: decisions.to_vec(),
length_scales: length_scales.to_vec(),
signal_variance,
alpha,
chol_l,
})
}
fn predict_into(&self, x: &[f64], k_star: &mut Vec<f64>, v_temp: &mut Vec<f64>) -> (f64, f64) {
let n = self.decisions.len();
k_star.clear();
k_star.reserve(n);
for d in &self.decisions {
k_star.push(rbf_kernel(x, d, &self.length_scales, self.signal_variance));
}
let mu: f64 = k_star
.iter()
.zip(self.alpha.iter())
.map(|(a, b)| a * b)
.sum();
crate::internal::cholesky::solve_lower_into(&self.chol_l, k_star, v_temp);
let v: f64 = v_temp.iter().map(|x| x * x).sum();
let var = (self.signal_variance - v).max(0.0);
(mu, var.sqrt())
}
}
fn expected_improvement(mu: f64, sigma: f64, f_best: f64) -> f64 {
if sigma < 1e-12 {
return 0.0;
}
let improvement = f_best - mu;
let z = improvement / sigma;
improvement * normal_cdf(z) + sigma * normal_pdf(z)
}
fn normal_pdf(z: f64) -> f64 {
(-0.5 * z * z).exp() / (2.0 * std::f64::consts::PI).sqrt()
}
fn normal_cdf(z: f64) -> f64 {
0.5 * (1.0 + erf(z / std::f64::consts::SQRT_2))
}
fn erf(x: f64) -> f64 {
let a1 = 0.254_829_592;
let a2 = -0.284_496_736;
let a3 = 1.421_413_741;
let a4 = -1.453_152_027;
let a5 = 1.061_405_429;
let p = 0.327_591_1;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
#[cfg(feature = "async")]
impl BayesianOpt {
pub async fn run_async<P>(
&mut self,
problem: &P,
concurrency: usize,
) -> OptimizationResult<Vec<f64>>
where
P: crate::core::async_problem::AsyncProblem<Decision = Vec<f64>>,
{
use crate::algorithms::parallel_eval_async::evaluate_batch_async;
assert!(
self.config.initial_samples >= 2,
"BayesianOpt initial_samples must be >= 2",
);
assert!(
self.config.signal_variance > 0.0,
"BayesianOpt signal_variance must be > 0"
);
assert!(
self.config.noise_variance > 0.0,
"BayesianOpt noise_variance must be > 0"
);
assert!(
self.config.acquisition_samples >= 1,
"BayesianOpt acquisition_samples must be >= 1",
);
let objectives = problem.objectives();
assert!(
objectives.is_single_objective(),
"BayesianOpt requires exactly one objective",
);
let direction = objectives.objectives[0].direction;
let dim = self.bounds.bounds.len();
if let Some(ls) = &self.config.length_scales {
assert_eq!(
ls.len(),
dim,
"BayesianOpt length_scales.len() must equal dim"
);
}
let length_scales: Vec<f64> = self.config.length_scales.clone().unwrap_or_else(|| {
self.bounds
.bounds
.iter()
.map(|&(lo, hi)| 0.2 * (hi - lo).max(1e-9))
.collect()
});
let mut rng = rng_from_seed(self.config.seed);
let mut decisions: Vec<Vec<f64>> =
Vec::with_capacity(self.config.initial_samples + self.config.iterations);
let mut targets: Vec<f64> = Vec::with_capacity(decisions.capacity());
let mut evaluations: Vec<Evaluation> = Vec::with_capacity(decisions.capacity());
let initial_decisions: Vec<Vec<f64>> = (0..self.config.initial_samples)
.map(|_| sample_uniform_in_bounds(&self.bounds, &mut rng))
.collect();
let initial_cands = evaluate_batch_async(problem, initial_decisions, concurrency).await;
for c in initial_cands {
let t = oriented_target(&c.evaluation, direction);
decisions.push(c.decision);
targets.push(t);
evaluations.push(c.evaluation);
}
for _ in 0..self.config.iterations {
let posterior = match GpPosterior::fit(
&decisions,
&targets,
&length_scales,
self.config.signal_variance,
self.config.noise_variance,
) {
Ok(p) => p,
Err(_) => {
let x = sample_uniform_in_bounds(&self.bounds, &mut rng);
let e = problem.evaluate_async(&x).await;
targets.push(oriented_target(&e, direction));
decisions.push(x);
evaluations.push(e);
continue;
}
};
let best_target = targets.iter().cloned().fold(f64::INFINITY, f64::min);
let mut best_x = sample_uniform_in_bounds(&self.bounds, &mut rng);
let mut best_ei = -f64::INFINITY;
let mut cand: Vec<f64> = Vec::with_capacity(dim);
let mut k_star_buf: Vec<f64> = Vec::new();
let mut v_temp_buf: Vec<f64> = Vec::new();
for _ in 0..self.config.acquisition_samples {
sample_uniform_in_bounds_into(&self.bounds, &mut rng, &mut cand);
let (mu, sigma) = posterior.predict_into(&cand, &mut k_star_buf, &mut v_temp_buf);
let ei = expected_improvement(mu, sigma, best_target);
if ei > best_ei {
best_ei = ei;
best_x.clear();
best_x.extend_from_slice(&cand);
}
}
let e = problem.evaluate_async(&best_x).await;
targets.push(oriented_target(&e, direction));
decisions.push(best_x);
evaluations.push(e);
}
let final_pop: Vec<Candidate<Vec<f64>>> = decisions
.into_iter()
.zip(evaluations)
.map(|(d, e)| Candidate::new(d, e))
.collect();
let mut best_idx = 0;
for i in 1..final_pop.len() {
if better(
&final_pop[i].evaluation,
&final_pop[best_idx].evaluation,
direction,
) {
best_idx = i;
}
}
let total_evaluations = final_pop.len();
let best = final_pop[best_idx].clone();
let front = vec![best.clone()];
OptimizationResult::new(
Population::new(final_pop),
front,
Some(best),
total_evaluations,
self.config.iterations + self.config.initial_samples,
)
}
}
impl crate::traits::AlgorithmInfo for BayesianOpt {
fn name(&self) -> &'static str {
"Bayesian Optimization"
}
fn full_name(&self) -> &'static str {
"Gaussian Process Bayesian Optimization with Expected Improvement"
}
fn seed(&self) -> Option<u64> {
Some(self.config.seed)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tests_support::{SchafferN1, Sphere1D};
fn make_optimizer(seed: u64) -> BayesianOpt {
BayesianOpt::new(
BayesianOptConfig {
initial_samples: 5,
iterations: 25,
length_scales: None,
signal_variance: 1.0,
noise_variance: 1e-6,
acquisition_samples: 500,
seed,
},
RealBounds::new(vec![(-5.0, 5.0)]),
)
}
#[test]
fn finds_minimum_of_sphere_quickly() {
let mut opt = make_optimizer(1);
let r = opt.run(&Sphere1D);
let best = r.best.unwrap();
assert!(
best.evaluation.objectives[0] < 1e-3,
"BO should converge fast on 1-D sphere; got f = {}",
best.evaluation.objectives[0],
);
assert!(r.evaluations <= 30 + 1);
}
#[test]
fn deterministic_with_same_seed() {
let mut a = make_optimizer(99);
let mut b = make_optimizer(99);
let ra = a.run(&Sphere1D);
let rb = b.run(&Sphere1D);
assert_eq!(
ra.best.unwrap().evaluation.objectives,
rb.best.unwrap().evaluation.objectives,
);
}
#[test]
#[should_panic(expected = "exactly one objective")]
fn multi_objective_panics() {
let mut opt = make_optimizer(0);
let _ = opt.run(&SchafferN1);
}
#[test]
#[should_panic(expected = "length_scales.len() must equal dim")]
fn length_scales_dim_mismatch_panics() {
let mut opt = BayesianOpt::new(
BayesianOptConfig {
initial_samples: 5,
iterations: 5,
length_scales: Some(vec![1.0, 1.0]),
signal_variance: 1.0,
noise_variance: 1e-6,
acquisition_samples: 100,
seed: 0,
},
RealBounds::new(vec![(-1.0, 1.0)]),
);
let _ = opt.run(&Sphere1D);
}
#[test]
fn rbf_kernel_x_equals_y_is_signal_variance() {
let x = vec![0.5_f64, -1.0, 2.0];
let lengths = vec![1.0_f64; 3];
assert!((rbf_kernel(&x, &x, &lengths, 1.5) - 1.5).abs() < 1e-12);
assert!((rbf_kernel(&x, &x, &lengths, 4.0) - 4.0).abs() < 1e-12);
}
#[test]
fn rbf_kernel_unit_distance_unit_length() {
let got = rbf_kernel(&[0.0], &[1.0], &[1.0], 1.0);
let expected = (-0.5_f64).exp();
assert!(
(got - expected).abs() < 1e-12,
"got {got}, expected {expected}"
);
}
#[test]
fn rbf_kernel_far_points_approach_zero() {
let got = rbf_kernel(&[0.0], &[100.0], &[1.0], 1.0);
assert!((0.0..1e-12).contains(&got), "got {got}");
}
#[test]
fn rbf_kernel_length_scale_widens_kernel() {
let small_l = rbf_kernel(&[0.0], &[1.0], &[1.0], 1.0);
let large_l = rbf_kernel(&[0.0], &[1.0], &[10.0], 1.0);
assert!(large_l > small_l, "small_l={small_l} large_l={large_l}");
}
#[test]
fn normal_pdf_at_zero_is_inverse_sqrt_2pi() {
let got = normal_pdf(0.0);
let expected = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
assert!((got - expected).abs() < 1e-12);
}
#[test]
fn normal_pdf_symmetric_about_zero() {
for z in [0.5_f64, 1.0, 2.5] {
assert!((normal_pdf(z) - normal_pdf(-z)).abs() < 1e-12);
}
}
#[test]
fn normal_cdf_at_zero_is_one_half() {
assert!((normal_cdf(0.0) - 0.5).abs() < 1e-9);
}
#[test]
fn normal_cdf_sums_to_one_at_symmetric_points() {
for z in [0.5_f64, 1.0, 2.5] {
let s = normal_cdf(z) + normal_cdf(-z);
assert!((s - 1.0).abs() < 1e-9, "z={z} sum={s}");
}
}
#[test]
fn erf_zero_is_zero() {
assert!(erf(0.0).abs() < 1e-6);
}
#[test]
fn erf_odd_function() {
for x in [0.1_f64, 0.5, 1.0, 2.0] {
assert!((erf(x) + erf(-x)).abs() < 1e-9, "x={x}");
}
}
#[test]
fn expected_improvement_zero_sigma_is_zero() {
assert_eq!(expected_improvement(0.0, 0.0, 1.0), 0.0);
assert_eq!(expected_improvement(-5.0, 1e-13, 1.0), 0.0);
}
#[test]
fn expected_improvement_grows_with_sigma() {
let lo = expected_improvement(1.0, 0.1, 1.0);
let hi = expected_improvement(1.0, 1.0, 1.0);
assert!(hi > lo, "lo={lo} hi={hi}");
}
#[test]
fn expected_improvement_positive_when_mu_below_fbest() {
let ei = expected_improvement(0.5, 0.5, 1.0);
assert!(ei > 0.0, "ei = {ei}");
}
#[test]
fn oriented_target_flips_sign_under_maximize() {
let e = Evaluation::new(vec![3.0]);
assert!((oriented_target(&e, Direction::Minimize) - 3.0).abs() < 1e-12);
assert!((oriented_target(&e, Direction::Maximize) - (-3.0)).abs() < 1e-12);
}
#[test]
fn oriented_target_penalizes_infeasible() {
let mut e = Evaluation::new(vec![1.0]);
e.constraint_violation = 0.5;
let got = oriented_target(&e, Direction::Minimize);
assert!((got - 500_001.0).abs() < 1e-9);
}
#[test]
fn better_helper_feasibility_first() {
let mut a = Evaluation::new(vec![10.0]);
a.constraint_violation = 0.0;
let mut b = Evaluation::new(vec![1.0]);
b.constraint_violation = 1.0;
assert!(better(&a, &b, Direction::Minimize));
assert!(!better(&b, &a, Direction::Minimize));
}
#[test]
fn better_helper_two_feasible_under_min_and_max() {
let a = Evaluation::new(vec![1.0]);
let b = Evaluation::new(vec![2.0]);
assert!(better(&a, &b, Direction::Minimize));
assert!(!better(&b, &a, Direction::Minimize));
assert!(better(&b, &a, Direction::Maximize));
assert!(!better(&a, &b, Direction::Maximize));
}
}