use crate::error::{NumRs2Error, Result};
use scirs2_core::random::{thread_rng, Distribution, Normal, Rng, Uniform};
use std::f64::consts::PI;
#[derive(Debug, Clone, Default)]
pub enum KernelType {
SquaredExponential,
#[default]
Matern52,
}
#[derive(Debug, Clone)]
struct Kernel {
kernel_type: KernelType,
length_scale: f64,
signal_variance: f64,
}
impl Kernel {
fn new(kernel_type: KernelType) -> Self {
Self {
kernel_type,
length_scale: 1.0,
signal_variance: 1.0,
}
}
fn compute(&self, x1: &[f64], x2: &[f64]) -> f64 {
let sq_dist = squared_euclidean_distance(x1, x2);
match self.kernel_type {
KernelType::SquaredExponential => {
let l2 = self.length_scale * self.length_scale;
self.signal_variance * (-sq_dist / (2.0 * l2)).exp()
}
KernelType::Matern52 => {
let r = sq_dist.sqrt();
let sqrt5_r_l = 5.0_f64.sqrt() * r / self.length_scale;
let sq_term = 5.0 * sq_dist / (3.0 * self.length_scale * self.length_scale);
self.signal_variance * (1.0 + sqrt5_r_l + sq_term) * (-sqrt5_r_l).exp()
}
}
}
fn compute_matrix(&self, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = x.len();
let mut k = vec![vec![0.0; n]; n];
for i in 0..n {
k[i][i] = self.signal_variance;
for j in (i + 1)..n {
let val = self.compute(&x[i], &x[j]);
k[i][j] = val;
k[j][i] = val;
}
}
k
}
fn compute_vector(&self, x_new: &[f64], x_train: &[Vec<f64>]) -> Vec<f64> {
x_train.iter().map(|xi| self.compute(x_new, xi)).collect()
}
fn with_params(&self, length_scale: f64, signal_variance: f64) -> Self {
Self {
kernel_type: self.kernel_type.clone(),
length_scale,
signal_variance,
}
}
}
fn squared_euclidean_distance(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.map(|(&ai, &bi)| {
let d = ai - bi;
d * d
})
.sum()
}
#[derive(Debug, Clone)]
pub struct GaussianProcess {
kernel: Kernel,
x_train: Vec<Vec<f64>>,
y_train: Vec<f64>,
noise_variance: f64,
cholesky_l: Vec<Vec<f64>>,
alpha: Vec<f64>,
y_mean: f64,
y_std: f64,
}
impl GaussianProcess {
pub fn new(kernel_type: KernelType, noise_variance: f64) -> Self {
Self {
kernel: Kernel::new(kernel_type),
x_train: Vec::new(),
y_train: Vec::new(),
noise_variance,
cholesky_l: Vec::new(),
alpha: Vec::new(),
y_mean: 0.0,
y_std: 1.0,
}
}
pub fn fit(&mut self, x: &[Vec<f64>], y: &[f64]) -> Result<()> {
if x.is_empty() || y.is_empty() {
return Err(NumRs2Error::InvalidInput(
"Training data cannot be empty".to_string(),
));
}
if x.len() != y.len() {
return Err(NumRs2Error::InvalidInput(format!(
"x and y must have same length: {} vs {}",
x.len(),
y.len()
)));
}
self.x_train = x.to_vec();
self.y_mean = y.iter().sum::<f64>() / y.len() as f64;
let y_var = y.iter().map(|&yi| (yi - self.y_mean).powi(2)).sum::<f64>() / y.len() as f64;
self.y_std = if y_var > 1e-12 { y_var.sqrt() } else { 1.0 };
self.y_train = y
.iter()
.map(|&yi| (yi - self.y_mean) / self.y_std)
.collect();
let mut k = self.kernel.compute_matrix(&self.x_train);
let n = k.len();
let noise = self.noise_variance / (self.y_std * self.y_std);
for i in 0..n {
k[i][i] += noise + 1e-8;
}
self.cholesky_l = cholesky_decomposition(&k)?;
let z = forward_substitution(&self.cholesky_l, &self.y_train)?;
self.alpha = backward_substitution_transpose(&self.cholesky_l, &z)?;
Ok(())
}
pub fn predict(&self, x_new: &[f64]) -> Result<(f64, f64)> {
if self.x_train.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"GP model has not been fitted yet".to_string(),
));
}
let k_star = self.kernel.compute_vector(x_new, &self.x_train);
let mu_norm: f64 = k_star
.iter()
.zip(self.alpha.iter())
.map(|(&ki, &ai)| ki * ai)
.sum();
let mu = mu_norm * self.y_std + self.y_mean;
let k_self = self.kernel.signal_variance;
let v = forward_substitution(&self.cholesky_l, &k_star)?;
let v_sq: f64 = v.iter().map(|&vi| vi * vi).sum();
let var_norm = (k_self - v_sq).max(1e-12);
let var = var_norm * self.y_std * self.y_std;
Ok((mu, var))
}
pub fn predict_batch(&self, x_new: &[Vec<f64>]) -> Result<(Vec<f64>, Vec<f64>)> {
let mut means = Vec::with_capacity(x_new.len());
let mut variances = Vec::with_capacity(x_new.len());
for x in x_new {
let (mu, var) = self.predict(x)?;
means.push(mu);
variances.push(var);
}
Ok((means, variances))
}
pub fn negative_log_marginal_likelihood(&self) -> Result<f64> {
if self.x_train.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"GP model has not been fitted yet".to_string(),
));
}
let n = self.y_train.len() as f64;
let data_fit: f64 = self
.y_train
.iter()
.zip(self.alpha.iter())
.map(|(&yi, &ai)| yi * ai)
.sum();
let log_det: f64 = self
.cholesky_l
.iter()
.enumerate()
.map(|(i, row)| row[i].ln())
.sum();
let constant = 0.5 * n * (2.0 * PI).ln();
Ok(0.5 * data_fit + log_det + constant)
}
pub fn optimize_hyperparameters(
&mut self,
x: &[Vec<f64>],
y: &[f64],
n_restarts: usize,
) -> Result<()> {
let mut rng = thread_rng();
let log_dist = Uniform::new(-2.0_f64, 2.0_f64).map_err(|e| {
NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
})?;
let mut best_nll = f64::INFINITY;
let mut best_ls = self.kernel.length_scale;
let mut best_sv = self.kernel.signal_variance;
for restart in 0..n_restarts {
let (init_ls, init_sv) = if restart == 0 {
(self.kernel.length_scale, self.kernel.signal_variance)
} else {
let log_ls: f64 = log_dist.sample(&mut rng);
let log_sv: f64 = log_dist.sample(&mut rng);
(log_ls.exp(), log_sv.exp())
};
let refinement = self.refine_hyperparameters(x, y, init_ls, init_sv)?;
if refinement.0 < best_nll {
best_nll = refinement.0;
best_ls = refinement.1;
best_sv = refinement.2;
}
}
self.kernel = self.kernel.with_params(best_ls, best_sv);
self.fit(x, y)?;
Ok(())
}
fn refine_hyperparameters(
&mut self,
x: &[Vec<f64>],
y: &[f64],
init_ls: f64,
init_sv: f64,
) -> Result<(f64, f64, f64)> {
let mut current_ls = init_ls;
let mut current_sv = init_sv;
self.kernel = self.kernel.with_params(current_ls, current_sv);
self.fit(x, y)?;
let mut current_nll = self.negative_log_marginal_likelihood()?;
let factors = [0.5, 0.8, 1.0, 1.25, 2.0];
for _ in 0..5 {
for &factor in &factors {
let trial_ls = (current_ls * factor).clamp(1e-4, 100.0);
self.kernel = self.kernel.with_params(trial_ls, current_sv);
if self.fit(x, y).is_ok() {
if let Ok(nll) = self.negative_log_marginal_likelihood() {
if nll < current_nll {
current_nll = nll;
current_ls = trial_ls;
}
}
}
}
for &factor in &factors {
let trial_sv = (current_sv * factor).clamp(1e-4, 100.0);
self.kernel = self.kernel.with_params(current_ls, trial_sv);
if self.fit(x, y).is_ok() {
if let Ok(nll) = self.negative_log_marginal_likelihood() {
if nll < current_nll {
current_nll = nll;
current_sv = trial_sv;
}
}
}
}
}
Ok((current_nll, current_ls, current_sv))
}
pub fn sample_posterior(&self, x_points: &[Vec<f64>], rng: &mut impl Rng) -> Result<Vec<f64>> {
let (means, variances) = self.predict_batch(x_points)?;
let normal = Normal::new(0.0, 1.0).map_err(|e| {
NumRs2Error::ComputationError(format!("Failed to create Normal distribution: {}", e))
})?;
let samples: Vec<f64> = means
.iter()
.zip(variances.iter())
.map(|(&mu, &var)| {
let z: f64 = normal.sample(rng);
mu + z * var.sqrt()
})
.collect();
Ok(samples)
}
pub fn kernel_type(&self) -> &KernelType {
&self.kernel.kernel_type
}
pub fn length_scale(&self) -> f64 {
self.kernel.length_scale
}
pub fn signal_variance(&self) -> f64 {
self.kernel.signal_variance
}
pub fn n_train(&self) -> usize {
self.x_train.len()
}
}
fn cholesky_decomposition(a: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
let n = a.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Matrix cannot be empty".to_string(),
));
}
let mut l = vec![vec![0.0; n]; n];
for j in 0..n {
let mut sum = 0.0;
for k in 0..j {
sum += l[j][k] * l[j][k];
}
let diag = a[j][j] - sum;
if diag <= 0.0 {
return Err(NumRs2Error::NumericalError(format!(
"Matrix is not positive definite at index {} (diag = {:.6e})",
j, diag
)));
}
l[j][j] = diag.sqrt();
for i in (j + 1)..n {
let mut s = 0.0;
for k in 0..j {
s += l[i][k] * l[j][k];
}
l[i][j] = (a[i][j] - s) / l[j][j];
}
}
Ok(l)
}
fn forward_substitution(l: &[Vec<f64>], b: &[f64]) -> Result<Vec<f64>> {
let n = l.len();
let mut x = vec![0.0; n];
for i in 0..n {
let mut sum = 0.0;
for j in 0..i {
sum += l[i][j] * x[j];
}
if l[i][i].abs() < 1e-15 {
return Err(NumRs2Error::NumericalError(
"Singular matrix in forward substitution".to_string(),
));
}
x[i] = (b[i] - sum) / l[i][i];
}
Ok(x)
}
fn backward_substitution_transpose(l: &[Vec<f64>], b: &[f64]) -> Result<Vec<f64>> {
let n = l.len();
let mut x = vec![0.0; n];
for i in (0..n).rev() {
let mut sum = 0.0;
for j in (i + 1)..n {
sum += l[j][i] * x[j];
}
if l[i][i].abs() < 1e-15 {
return Err(NumRs2Error::NumericalError(
"Singular matrix in backward substitution".to_string(),
));
}
x[i] = (b[i] - sum) / l[i][i];
}
Ok(x)
}
#[derive(Debug, Clone, Default)]
pub enum AcquisitionType {
#[default]
ExpectedImprovement,
ProbabilityOfImprovement,
UpperConfidenceBound,
ThompsonSampling,
}
fn compute_acquisition(
acq: &AcquisitionType,
mu: f64,
sigma: f64,
best_y: f64,
xi: f64,
kappa: f64,
) -> f64 {
if sigma < 1e-12 {
return 0.0;
}
match acq {
AcquisitionType::ExpectedImprovement => {
let improvement = best_y - mu - xi;
let z = improvement / sigma;
improvement * standard_normal_cdf(z) + sigma * standard_normal_pdf(z)
}
AcquisitionType::ProbabilityOfImprovement => {
let z = (best_y - mu - xi) / sigma;
standard_normal_cdf(z)
}
AcquisitionType::UpperConfidenceBound => {
-(mu - kappa * sigma)
}
AcquisitionType::ThompsonSampling => {
0.0
}
}
}
fn standard_normal_pdf(z: f64) -> f64 {
(-0.5 * z * z).exp() / (2.0 * PI).sqrt()
}
fn standard_normal_cdf(z: f64) -> f64 {
0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
}
fn erf_approx(x: f64) -> f64 {
let sign = if x >= 0.0 { 1.0 } else { -1.0 };
let x = x.abs();
let p = 0.3275911;
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let t = 1.0 / (1.0 + p * x);
let t2 = t * t;
let t3 = t2 * t;
let t4 = t3 * t;
let t5 = t4 * t;
let inner = a1 * t + a2 * t2 + a3 * t3 + a4 * t4 + a5 * t5;
sign * (1.0 - inner * (-x * x).exp())
}
#[derive(Debug, Clone)]
pub struct BayesOptConfig {
pub n_initial: usize,
pub n_iterations: usize,
pub bounds: Vec<(f64, f64)>,
pub acquisition: AcquisitionType,
pub kernel: KernelType,
pub seed: Option<u64>,
pub xi: f64,
pub kappa: f64,
pub noise_variance: f64,
pub n_candidates: usize,
pub n_hp_restarts: usize,
pub optimize_hyperparams: bool,
pub hp_optimize_interval: usize,
}
impl Default for BayesOptConfig {
fn default() -> Self {
Self {
n_initial: 5,
n_iterations: 25,
bounds: vec![(0.0, 1.0)],
acquisition: AcquisitionType::default(),
kernel: KernelType::default(),
seed: None,
xi: 0.01,
kappa: 2.0,
noise_variance: 1e-6,
n_candidates: 1000,
n_hp_restarts: 3,
optimize_hyperparams: true,
hp_optimize_interval: 5,
}
}
}
#[derive(Debug, Clone)]
pub struct BayesOptResult {
pub x_best: Vec<f64>,
pub f_best: f64,
pub x_history: Vec<Vec<f64>>,
pub f_history: Vec<f64>,
pub n_evaluations: usize,
pub success: bool,
pub model: GaussianProcess,
}
fn latin_hypercube_sampling(
n_samples: usize,
bounds: &[(f64, f64)],
rng: &mut impl Rng,
) -> Result<Vec<Vec<f64>>> {
let n_dims = bounds.len();
if n_dims == 0 {
return Err(NumRs2Error::InvalidInput(
"Bounds cannot be empty".to_string(),
));
}
if n_samples == 0 {
return Err(NumRs2Error::InvalidInput(
"Number of samples must be positive".to_string(),
));
}
let uniform = Uniform::new(0.0_f64, 1.0_f64)
.map_err(|e| NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e)))?;
let mut samples = vec![vec![0.0; n_dims]; n_samples];
for dim in 0..n_dims {
let mut perm: Vec<usize> = (0..n_samples).collect();
for i in (1..n_samples).rev() {
let j_f64: f64 = uniform.sample(rng);
let j = ((j_f64 * (i + 1) as f64) as usize).min(i);
perm.swap(i, j);
}
let (low, high) = bounds[dim];
let step = (high - low) / n_samples as f64;
for (i, &p) in perm.iter().enumerate() {
let u: f64 = uniform.sample(rng);
samples[i][dim] = low + (p as f64 + u) * step;
}
}
Ok(samples)
}
fn random_sampling(
n_samples: usize,
bounds: &[(f64, f64)],
rng: &mut impl Rng,
) -> Result<Vec<Vec<f64>>> {
let n_dims = bounds.len();
let mut samples = Vec::with_capacity(n_samples);
for _ in 0..n_samples {
let mut point = Vec::with_capacity(n_dims);
for &(low, high) in bounds {
let dist = Uniform::new(low, high).map_err(|e| {
NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
})?;
let val: f64 = dist.sample(rng);
point.push(val);
}
samples.push(point);
}
Ok(samples)
}
fn optimize_acquisition(
gp: &GaussianProcess,
acq: &AcquisitionType,
bounds: &[(f64, f64)],
best_y: f64,
xi: f64,
kappa: f64,
n_candidates: usize,
rng: &mut impl Rng,
) -> Result<Vec<f64>> {
match acq {
AcquisitionType::ThompsonSampling => {
let candidates = random_sampling(n_candidates, bounds, rng)?;
let posterior_samples = gp.sample_posterior(&candidates, rng)?;
let mut best_idx = 0;
let mut best_val = f64::INFINITY;
for (i, &val) in posterior_samples.iter().enumerate() {
if val < best_val {
best_val = val;
best_idx = i;
}
}
Ok(candidates[best_idx].clone())
}
_ => {
let candidates = random_sampling(n_candidates, bounds, rng)?;
let mut best_acq = f64::NEG_INFINITY;
let mut best_point = candidates[0].clone();
for candidate in &candidates {
let (mu, var) = gp.predict(candidate)?;
let sigma = var.sqrt();
let acq_val = compute_acquisition(acq, mu, sigma, best_y, xi, kappa);
if acq_val > best_acq {
best_acq = acq_val;
best_point = candidate.clone();
}
}
let n_refine = 50;
for _ in 0..n_refine {
let mut perturbed = best_point.clone();
for (d, &(low, high)) in bounds.iter().enumerate() {
let range = (high - low) * 0.05;
let pert_dist = Uniform::new(-range, range).map_err(|e| {
NumRs2Error::ComputationError(format!("Uniform creation failed: {}", e))
})?;
let delta: f64 = pert_dist.sample(rng);
perturbed[d] = (perturbed[d] + delta).max(low).min(high);
}
let (mu, var) = gp.predict(&perturbed)?;
let sigma = var.sqrt();
let acq_val = compute_acquisition(acq, mu, sigma, best_y, xi, kappa);
if acq_val > best_acq {
best_acq = acq_val;
best_point = perturbed;
}
}
Ok(best_point)
}
}
}
pub fn bayesian_optimize<F>(objective: F, config: BayesOptConfig) -> Result<BayesOptResult>
where
F: Fn(&[f64]) -> f64,
{
let n_dims = config.bounds.len();
if n_dims == 0 {
return Err(NumRs2Error::InvalidInput(
"Bounds cannot be empty".to_string(),
));
}
for (i, &(low, high)) in config.bounds.iter().enumerate() {
if low >= high {
return Err(NumRs2Error::InvalidInput(format!(
"Invalid bounds for dimension {}: lower ({}) must be less than upper ({})",
i, low, high
)));
}
}
let mut rng = thread_rng();
let mut gp = GaussianProcess::new(config.kernel.clone(), config.noise_variance);
let initial_points = latin_hypercube_sampling(config.n_initial, &config.bounds, &mut rng)?;
let mut x_history: Vec<Vec<f64>> = Vec::new();
let mut f_history: Vec<f64> = Vec::new();
let mut f_best = f64::INFINITY;
let mut x_best = initial_points[0].clone();
for point in &initial_points {
let value = objective(point);
x_history.push(point.clone());
f_history.push(value);
if value < f_best {
f_best = value;
x_best = point.clone();
}
}
let initial_best = f_best;
for iteration in 0..config.n_iterations {
gp.fit(&x_history, &f_history)?;
if config.optimize_hyperparams
&& config.hp_optimize_interval > 0
&& iteration % config.hp_optimize_interval == 0
{
let _ = gp.optimize_hyperparameters(&x_history, &f_history, config.n_hp_restarts);
}
let next_point = optimize_acquisition(
&gp,
&config.acquisition,
&config.bounds,
f_best,
config.xi,
config.kappa,
config.n_candidates,
&mut rng,
)?;
let value = objective(&next_point);
x_history.push(next_point.clone());
f_history.push(value);
if value < f_best {
f_best = value;
x_best = next_point;
}
}
let _ = gp.fit(&x_history, &f_history);
let n_evaluations = x_history.len();
let success = f_best < initial_best;
Ok(BayesOptResult {
x_best,
f_best,
x_history,
f_history,
n_evaluations,
success,
model: gp,
})
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
#[test]
fn test_rbf_kernel_properties() {
let kernel = Kernel {
kernel_type: KernelType::SquaredExponential,
length_scale: 1.0,
signal_variance: 1.0,
};
let k_same = kernel.compute(&[0.0, 0.0], &[0.0, 0.0]);
assert_relative_eq!(k_same, 1.0, epsilon = 1e-10);
let k_far = kernel.compute(&[0.0, 0.0], &[100.0, 100.0]);
assert!(
k_far < 1e-10,
"Far points should have near-zero kernel: {}",
k_far
);
let k12 = kernel.compute(&[1.0, 2.0], &[3.0, 4.0]);
let k21 = kernel.compute(&[3.0, 4.0], &[1.0, 2.0]);
assert_relative_eq!(k12, k21, epsilon = 1e-15);
let k_near = kernel.compute(&[0.0], &[0.5]);
let k_mid = kernel.compute(&[0.0], &[2.0]);
assert!(k_near > k_mid, "Kernel should decrease with distance");
}
#[test]
fn test_matern52_kernel_properties() {
let kernel = Kernel {
kernel_type: KernelType::Matern52,
length_scale: 1.0,
signal_variance: 1.0,
};
let k_same = kernel.compute(&[0.0], &[0.0]);
assert_relative_eq!(k_same, 1.0, epsilon = 1e-10);
let k_near = kernel.compute(&[0.0], &[0.5]);
let k_far = kernel.compute(&[0.0], &[2.0]);
assert!(k_near > k_far, "Kernel should decrease with distance");
let k12 = kernel.compute(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]);
let k21 = kernel.compute(&[4.0, 5.0, 6.0], &[1.0, 2.0, 3.0]);
assert_relative_eq!(k12, k21, epsilon = 1e-15);
}
#[test]
fn test_gp_prediction_at_training_points() {
let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
let x_train: Vec<Vec<f64>> = vec![
vec![0.0],
vec![1.0],
vec![2.0],
vec![3.0],
vec![4.0],
vec![5.0],
];
let y_train: Vec<f64> = x_train.iter().map(|x| x[0].sin()).collect();
gp.fit(&x_train, &y_train).expect("GP fit should succeed");
for (x, &y_true) in x_train.iter().zip(y_train.iter()) {
let (mu, var) = gp.predict(x).expect("GP predict should succeed");
assert!(
(mu - y_true).abs() < 0.1,
"Prediction at training point x={:?}: mu={}, y_true={}",
x,
mu,
y_true
);
assert!(var >= 0.0, "Variance should be non-negative");
}
}
#[test]
fn test_gp_variance_increases_away_from_data() {
let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
let x_train = vec![vec![0.0], vec![2.0], vec![4.0]];
let y_train = vec![0.0, 1.0, 0.0];
gp.fit(&x_train, &y_train).expect("GP fit should succeed");
let (_, var_at_train) = gp.predict(&[0.0]).expect("predict should succeed");
let (_, var_far) = gp.predict(&[10.0]).expect("predict should succeed");
assert!(
var_far > var_at_train,
"Variance far from data ({}) should exceed variance at data ({})",
var_far,
var_at_train
);
}
#[test]
fn test_1d_quadratic_optimization() {
let objective = |x: &[f64]| -> f64 { (x[0] - 0.3).powi(2) };
let config = BayesOptConfig {
bounds: vec![(0.0, 1.0)],
n_initial: 5,
n_iterations: 15,
noise_variance: 1e-6,
n_candidates: 500,
optimize_hyperparams: true,
hp_optimize_interval: 5,
..Default::default()
};
let result = bayesian_optimize(objective, config)
.expect("Bayesian optimization should succeed for 1D quadratic");
assert!(
result.f_best < 0.01,
"Best value should be close to 0, got {}",
result.f_best
);
assert!(
(result.x_best[0] - 0.3).abs() < 0.15,
"Best x should be close to 0.3, got {}",
result.x_best[0]
);
assert_eq!(result.n_evaluations, 20); }
#[test]
fn test_2d_branin_optimization() {
let branin = |x: &[f64]| -> f64 {
let x1 = x[0] * 15.0 - 5.0;
let x2 = x[1] * 15.0;
let a = 1.0;
let b = 5.1 / (4.0 * PI * PI);
let c = 5.0 / PI;
let r = 6.0;
let s = 10.0;
let t = 1.0 / (8.0 * PI);
a * (x2 - b * x1 * x1 + c * x1 - r).powi(2) + s * (1.0 - t) * x1.cos() + s
};
let config = BayesOptConfig {
bounds: vec![(0.0, 1.0), (0.0, 1.0)],
n_initial: 10,
n_iterations: 30,
noise_variance: 1e-6,
n_candidates: 1000,
optimize_hyperparams: true,
hp_optimize_interval: 5,
kernel: KernelType::Matern52,
..Default::default()
};
let result = bayesian_optimize(branin, config)
.expect("Bayesian optimization should succeed for Branin");
assert!(
result.f_best < 2.0,
"Branin should find a reasonably good minimum, got {}",
result.f_best
);
assert_eq!(result.x_best.len(), 2);
}
#[test]
fn test_rosenbrock_optimization() {
let rosenbrock = |x: &[f64]| -> f64 {
let x0 = x[0];
let x1 = x[1];
(1.0 - x0).powi(2) + 100.0 * (x1 - x0 * x0).powi(2)
};
let config = BayesOptConfig {
bounds: vec![(0.0, 2.0), (0.0, 2.0)],
n_initial: 10,
n_iterations: 40,
noise_variance: 1e-6,
n_candidates: 1000,
optimize_hyperparams: true,
hp_optimize_interval: 5,
kernel: KernelType::Matern52,
..Default::default()
};
let result = bayesian_optimize(rosenbrock, config)
.expect("Bayesian optimization should succeed for Rosenbrock");
assert!(
result.f_best < 10.0,
"Rosenbrock best value should be < 10 with 50 evals, got {}",
result.f_best
);
}
#[test]
fn test_different_acquisition_functions() {
let objective = |x: &[f64]| -> f64 { (x[0] - 0.7).powi(2) };
let acqs: Vec<AcquisitionType> = vec![
AcquisitionType::ExpectedImprovement,
AcquisitionType::ProbabilityOfImprovement,
AcquisitionType::UpperConfidenceBound,
AcquisitionType::ThompsonSampling,
];
for acq in acqs {
let acq_name = format!("{:?}", acq);
let config = BayesOptConfig {
bounds: vec![(0.0, 1.0)],
n_initial: 5,
n_iterations: 10,
noise_variance: 1e-6,
n_candidates: 200,
optimize_hyperparams: false,
acquisition: acq,
..Default::default()
};
let result = bayesian_optimize(objective, config);
assert!(
result.is_ok(),
"Optimization with {} should succeed",
acq_name
);
let res = result.expect("should not fail");
assert!(
res.f_best < 0.5,
"{} should find reasonable solution, got f_best={}",
acq_name,
res.f_best
);
}
}
#[test]
fn test_different_kernels() {
let objective = |x: &[f64]| -> f64 { x[0].sin() + 0.1 * x[0] };
let kernels = vec![KernelType::SquaredExponential, KernelType::Matern52];
for kernel in kernels {
let kernel_name = format!("{:?}", kernel);
let config = BayesOptConfig {
bounds: vec![(0.0, 6.0)],
n_initial: 5,
n_iterations: 10,
noise_variance: 1e-6,
n_candidates: 200,
optimize_hyperparams: false,
kernel,
..Default::default()
};
let result = bayesian_optimize(objective, config);
assert!(
result.is_ok(),
"Optimization with kernel {} should succeed",
kernel_name
);
}
}
#[test]
fn test_bounded_optimization() {
let objective = |x: &[f64]| -> f64 { x[0] * x[0] };
let config = BayesOptConfig {
bounds: vec![(2.0, 5.0)],
n_initial: 3,
n_iterations: 10,
noise_variance: 1e-6,
n_candidates: 300,
optimize_hyperparams: false,
..Default::default()
};
let result =
bayesian_optimize(objective, config).expect("Bounded optimization should succeed");
assert!(
(result.x_best[0] - 2.0).abs() < 0.5,
"Best x should be near lower bound 2.0, got {}",
result.x_best[0]
);
assert!(
result.f_best < 6.5,
"Best value should be near 4.0, got {}",
result.f_best
);
for point in &result.x_history {
assert!(
point[0] >= 2.0 - 1e-10 && point[0] <= 5.0 + 1e-10,
"Point {} should be within bounds [2, 5]",
point[0]
);
}
}
#[test]
fn test_ei_acquisition_values() {
let ei = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.0,
1.0,
1.0,
0.0,
2.0,
);
assert!(
ei > 0.0,
"EI should be positive when improvement is possible"
);
let ei_zero = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.0,
0.0,
1.0,
0.0,
2.0,
);
assert_relative_eq!(ei_zero, 0.0, epsilon = 1e-10);
let ei_low = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.5,
0.1,
1.0,
0.0,
2.0,
);
let ei_high = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.5,
2.0,
1.0,
0.0,
2.0,
);
assert!(ei_high > ei_low, "Higher sigma should give higher EI");
}
#[test]
fn test_pi_acquisition_values() {
let pi_good = compute_acquisition(
&AcquisitionType::ProbabilityOfImprovement,
-10.0,
1.0,
0.0,
0.0,
2.0,
);
assert!(
pi_good > 0.9,
"PI should be high when mu << best_y, got {}",
pi_good
);
let pi_bad = compute_acquisition(
&AcquisitionType::ProbabilityOfImprovement,
10.0,
1.0,
0.0,
0.0,
2.0,
);
assert!(
pi_bad < 0.1,
"PI should be low when mu >> best_y, got {}",
pi_bad
);
}
#[test]
fn test_ucb_kappa_effect() {
let ucb_low_kappa = compute_acquisition(
&AcquisitionType::UpperConfidenceBound,
1.0,
1.0,
0.0,
0.0,
0.1,
);
let ucb_high_kappa = compute_acquisition(
&AcquisitionType::UpperConfidenceBound,
1.0,
1.0,
0.0,
0.0,
5.0,
);
assert!(
ucb_high_kappa > ucb_low_kappa,
"Higher kappa should favor exploration: low={}, high={}",
ucb_low_kappa,
ucb_high_kappa
);
}
#[test]
fn test_optimization_progress_is_monotonic() {
let objective = |x: &[f64]| -> f64 { x.iter().map(|xi| (xi - 0.5).powi(2)).sum() };
let config = BayesOptConfig {
bounds: vec![(0.0, 1.0), (0.0, 1.0)],
n_initial: 5,
n_iterations: 15,
noise_variance: 1e-6,
n_candidates: 300,
optimize_hyperparams: false,
..Default::default()
};
let result = bayesian_optimize(objective, config).expect("Optimization should succeed");
let mut running_best = f64::INFINITY;
for &val in &result.f_history {
if val < running_best {
running_best = val;
}
}
assert_relative_eq!(running_best, result.f_best, epsilon = 1e-15);
}
#[test]
fn test_hyperparameter_optimization() {
let mut gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
gp.kernel = gp.kernel.with_params(0.1, 10.0);
let x_train: Vec<Vec<f64>> = (0..10).map(|i| vec![i as f64 * 0.5]).collect();
let y_train: Vec<f64> = x_train.iter().map(|x| x[0].sin()).collect();
gp.optimize_hyperparameters(&x_train, &y_train, 3)
.expect("Hyperparameter optimization should succeed");
let (mu, _var) = gp.predict(&[0.25]).expect("Prediction should succeed");
let y_true = 0.25_f64.sin();
assert!(
(mu - y_true).abs() < 0.5,
"After HP opt, prediction should be reasonable: mu={}, y_true={}",
mu,
y_true
);
}
#[test]
fn test_invalid_inputs() {
let objective = |x: &[f64]| -> f64 { x[0] };
let config = BayesOptConfig {
bounds: vec![],
..Default::default()
};
assert!(bayesian_optimize(objective, config).is_err());
let config = BayesOptConfig {
bounds: vec![(1.0, 0.0)],
..Default::default()
};
assert!(bayesian_optimize(objective, config).is_err());
let gp = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
assert!(gp.predict(&[0.0]).is_err());
assert!(gp.negative_log_marginal_likelihood().is_err());
let mut gp2 = GaussianProcess::new(KernelType::SquaredExponential, 1e-6);
assert!(gp2.fit(&[], &[]).is_err());
assert!(gp2.fit(&[vec![1.0]], &[1.0, 2.0]).is_err());
}
#[test]
fn test_cholesky_correctness() {
let a = vec![vec![4.0, 2.0], vec![2.0, 3.0]];
let l = cholesky_decomposition(&a).expect("Cholesky should succeed for PD matrix");
let n = a.len();
for i in 0..n {
for j in 0..n {
let mut sum = 0.0;
for k in 0..n {
sum += l[i][k] * l[j][k];
}
assert_relative_eq!(sum, a[i][j], epsilon = 1e-10);
}
}
let bad = vec![vec![1.0, 2.0], vec![2.0, 1.0]];
assert!(cholesky_decomposition(&bad).is_err());
}
#[test]
fn test_erf_and_normal_cdf() {
assert_relative_eq!(erf_approx(0.0), 0.0, epsilon = 1e-6);
assert_relative_eq!(erf_approx(1.0), 0.842700793, epsilon = 1e-5);
assert_relative_eq!(erf_approx(-1.0), -0.842700793, epsilon = 1e-5);
assert_relative_eq!(erf_approx(2.0), 0.995322265, epsilon = 1e-5);
assert_relative_eq!(standard_normal_cdf(0.0), 0.5, epsilon = 1e-6);
assert!(standard_normal_cdf(5.0) > 0.999);
assert!(standard_normal_cdf(-5.0) < 0.001);
}
#[test]
fn test_result_struct_completeness() {
let objective = |x: &[f64]| -> f64 { x[0] * x[0] };
let config = BayesOptConfig {
bounds: vec![(0.0, 1.0)],
n_initial: 3,
n_iterations: 5,
noise_variance: 1e-6,
n_candidates: 100,
optimize_hyperparams: false,
..Default::default()
};
let result = bayesian_optimize(objective, config).expect("Should succeed");
assert_eq!(result.x_best.len(), 1);
assert!(result.f_best.is_finite());
assert_eq!(result.x_history.len(), 8); assert_eq!(result.f_history.len(), 8);
assert_eq!(result.n_evaluations, 8);
let (mu, var) = result.model.predict(&[0.5]).expect("Model should predict");
assert!(mu.is_finite());
assert!(var >= 0.0);
}
#[test]
fn test_latin_hypercube_sampling_coverage() {
let mut rng = thread_rng();
let bounds = vec![(0.0, 1.0), (0.0, 1.0)];
let n_samples = 20;
let samples =
latin_hypercube_sampling(n_samples, &bounds, &mut rng).expect("LHS should succeed");
assert_eq!(samples.len(), n_samples);
for sample in &samples {
assert_eq!(sample.len(), 2);
for (d, &val) in sample.iter().enumerate() {
assert!(
val >= bounds[d].0 && val <= bounds[d].1,
"Sample dim {} value {} out of bounds [{}, {}]",
d,
val,
bounds[d].0,
bounds[d].1
);
}
}
for dim in 0..2 {
let mut strata = vec![false; n_samples];
for sample in &samples {
let stratum = (sample[dim] * n_samples as f64) as usize;
let stratum = stratum.min(n_samples - 1);
strata[stratum] = true;
}
let covered = strata.iter().filter(|&&s| s).count();
assert!(
covered >= n_samples / 2,
"LHS should cover most strata in dim {}: only {}/{}",
dim,
covered,
n_samples
);
}
}
#[test]
fn test_xi_parameter_effect() {
let ei_low_xi = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.8,
0.5,
1.0,
0.0,
2.0,
);
let ei_high_xi = compute_acquisition(
&AcquisitionType::ExpectedImprovement,
0.8,
0.5,
1.0,
0.5,
2.0,
);
assert!(
ei_low_xi >= ei_high_xi,
"Higher xi should reduce EI at the same point: low_xi={}, high_xi={}",
ei_low_xi,
ei_high_xi
);
}
}