use std::fmt;
use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
use scirs2_core::random::rngs::StdRng;
use scirs2_core::random::SeedableRng;
use scirs2_core::random::{Rng, RngExt};
use scirs2_stats::gaussian_process::{GaussianProcessRegressor, SquaredExponential};
use crate::error::OptimizeError;
use crate::parallel::ParallelOptions;
use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
struct GaussianProcess {
regressor: GaussianProcessRegressor<SquaredExponential>,
n_features: usize,
}
impl GaussianProcess {
fn default(x_data: Vec<Vec<f64>>, y_data: Vec<f64>) -> Self {
let n_samples = x_data.len();
let n_features = if n_samples > 0 { x_data[0].len() } else { 0 };
let mut x_flat = Vec::with_capacity(n_samples * n_features);
for row in &x_data {
x_flat.extend_from_slice(row);
}
let x_array =
Array2::from_shape_vec((n_samples, n_features), x_flat).expect("Operation failed");
let y_array = Array1::from_vec(y_data);
let kernel = SquaredExponential::default();
let mut regressor = GaussianProcessRegressor::new(kernel);
regressor.fit(&x_array, &y_array).expect("Operation failed");
Self {
regressor,
n_features,
}
}
fn predict(&self, x: &Vec<f64>) -> f64 {
let x_array =
Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
let predictions = self.regressor.predict(&x_array).expect("Operation failed");
predictions[0]
}
fn predict_variance(&self, x: &Vec<f64>) -> f64 {
let x_array =
Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
let (_mean, std) = self
.regressor
.predict_with_std(&x_array)
.expect("Operation failed");
std[0] * std[0] }
}
#[derive(Debug, Clone)]
pub enum Parameter {
Real(f64, f64),
Integer(i64, i64),
Categorical(Vec<String>),
}
#[derive(Debug, Clone)]
pub struct Space {
parameters: Vec<(String, Parameter)>,
transformed_n_dims: usize,
bounds: Vec<(f64, f64)>,
}
impl Space {
pub fn new() -> Self {
Self {
parameters: Vec::new(),
transformed_n_dims: 0,
bounds: Vec::new(),
}
}
pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
let name = name.into();
self.transformed_n_dims += match ¶meter {
Parameter::Real(_, _) => 1,
Parameter::Integer(_, _) => 1,
Parameter::Categorical(values) => values.len(),
};
let bounds = match ¶meter {
Parameter::Real(lower, upper) => (*lower, *upper),
Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
Parameter::Categorical(_) => (0.0, 1.0),
};
self.bounds.push(bounds);
self.parameters.push((name, parameter));
self
}
pub fn n_dims(&self) -> usize {
self.parameters.len()
}
pub fn transformed_n_dims(&self) -> usize {
self.transformed_n_dims
}
pub fn dimension_types(&self) -> Vec<crate::bayesian::optimizer::DimensionType> {
use crate::bayesian::optimizer::DimensionType;
self.parameters
.iter()
.map(|(_, param)| match param {
Parameter::Integer(_, _) => DimensionType::Integer,
_ => DimensionType::Continuous,
})
.collect()
}
pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
let n_dims = self.n_dims();
let mut samples = Vec::with_capacity(n_samples);
for _ in 0..n_samples {
let mut sample = Array1::zeros(n_dims);
for (i, (_, param)) in self.parameters.iter().enumerate() {
match param {
Parameter::Real(lower, upper) => {
sample[i] = rng.random_range(*lower..*upper);
}
Parameter::Integer(lower, upper) => {
let range = rng.random_range(*lower..=*upper);
sample[i] = range as f64;
}
Parameter::Categorical(values) => {
let index = rng.random_range(0..values.len());
sample[i] = index as f64;
}
}
}
samples.push(sample);
}
samples
}
pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
let mut transformed = Array1::zeros(self.transformed_n_dims);
let mut idx = 0;
for (i, (_, param)) in self.parameters.iter().enumerate() {
match param {
Parameter::Real(lower, upper) => {
transformed[idx] = (x[i] - lower) / (upper - lower);
idx += 1;
}
Parameter::Integer(lower, upper) => {
transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
idx += 1;
}
Parameter::Categorical(values) => {
let index = x[i] as usize;
for j in 0..values.len() {
transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
}
idx += values.len();
}
}
}
transformed
}
pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
let mut inverse = Array1::zeros(self.n_dims());
let mut idx = 0;
for (i, (_, param)) in self.parameters.iter().enumerate() {
match param {
Parameter::Real(lower, upper) => {
inverse[i] = lower + x[idx] * (upper - lower);
idx += 1;
}
Parameter::Integer(lower, upper) => {
let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
inverse[i] = value as f64;
idx += 1;
}
Parameter::Categorical(values) => {
let mut max_idx = 0;
let mut max_val = x[idx];
for j in 1..values.len() {
if x[idx + j] > max_val {
max_val = x[idx + j];
max_idx = j;
}
}
inverse[i] = max_idx as f64;
idx += values.len();
}
}
}
inverse
}
pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
self.parameters
.iter()
.map(|(_, param)| match param {
Parameter::Real(lower, upper) => (*lower, *upper),
Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
Parameter::Categorical(_) => (0.0, 1.0), })
.collect()
}
}
impl Default for Space {
fn default() -> Self {
Self::new()
}
}
pub trait AcquisitionFunction: Send + Sync {
fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
None
}
}
pub struct ExpectedImprovement {
model: GaussianProcess,
y_best: f64,
xi: f64,
}
impl ExpectedImprovement {
pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
Self { model, y_best, xi }
}
}
impl AcquisitionFunction for ExpectedImprovement {
fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
let mean = self.model.predict(&x.to_vec());
let std = (self.model.predict_variance(&x.to_vec())).sqrt();
if std <= 0.0 {
return 0.0;
}
let z = (self.y_best - mean - self.xi) / std;
let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
if ei < 0.0 {
0.0
} else {
ei
}
}
fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
None
}
}
pub struct LowerConfidenceBound {
model: GaussianProcess,
kappa: f64,
}
impl LowerConfidenceBound {
pub fn new(model: GaussianProcess, kappa: f64) -> Self {
Self { model, kappa }
}
}
impl AcquisitionFunction for LowerConfidenceBound {
fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
let mean = self.model.predict(&x.to_vec());
let std = (self.model.predict_variance(&x.to_vec())).sqrt();
mean - self.kappa * std
}
fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
None
}
}
pub struct ProbabilityOfImprovement {
model: GaussianProcess,
y_best: f64,
xi: f64,
}
impl ProbabilityOfImprovement {
pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
Self { model, y_best, xi }
}
}
impl AcquisitionFunction for ProbabilityOfImprovement {
fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
let mean = self.model.predict(&x.to_vec());
let std = (self.model.predict_variance(&x.to_vec())).sqrt();
if std <= 0.0 {
return 0.0;
}
let z = (self.y_best - mean - self.xi) / std;
0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
}
fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
None
}
}
#[allow(dead_code)]
fn approx_erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
#[derive(Debug, Clone, Copy, Default)]
pub enum AcquisitionFunctionType {
#[default]
ExpectedImprovement,
LowerConfidenceBound,
ProbabilityOfImprovement,
}
impl fmt::Display for AcquisitionFunctionType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub enum KernelType {
#[default]
SquaredExponential,
Matern52,
Matern32,
}
impl fmt::Display for KernelType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
KernelType::SquaredExponential => write!(f, "SquaredExponential"),
KernelType::Matern52 => write!(f, "Matern52"),
KernelType::Matern32 => write!(f, "Matern32"),
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub enum InitialPointGenerator {
#[default]
Random,
LatinHypercube,
Sobol,
Halton,
}
impl fmt::Display for InitialPointGenerator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
InitialPointGenerator::Random => write!(f, "Random"),
InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
InitialPointGenerator::Sobol => write!(f, "Sobol"),
InitialPointGenerator::Halton => write!(f, "Halton"),
}
}
}
#[derive(Clone, Debug)]
pub struct BayesianOptimizationOptions {
pub n_initial_points: usize,
pub initial_point_generator: InitialPointGenerator,
pub acq_func: AcquisitionFunctionType,
pub kernel: KernelType,
pub kappa: f64,
pub xi: f64,
pub seed: Option<u64>,
pub parallel: Option<ParallelOptions>,
pub n_restarts: usize,
}
impl Default for BayesianOptimizationOptions {
fn default() -> Self {
Self {
n_initial_points: 10,
initial_point_generator: InitialPointGenerator::default(),
acq_func: AcquisitionFunctionType::default(),
kernel: KernelType::default(),
kappa: 1.96,
xi: 0.01,
seed: None,
parallel: None,
n_restarts: 5,
}
}
}
#[derive(Debug, Clone)]
struct Observation {
x: Array1<f64>,
y: f64,
}
pub struct BayesianOptimizer {
space: Space,
options: BayesianOptimizationOptions,
observations: Vec<Observation>,
best_observation: Option<Observation>,
rng: StdRng,
iteration: usize,
}
impl BayesianOptimizer {
pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
let options = options.unwrap_or_default();
let seed = options
.seed
.unwrap_or_else(|| scirs2_core::random::rng().random());
let rng = StdRng::seed_from_u64(seed);
Self {
space,
options,
observations: Vec::new(),
best_observation: None,
rng,
iteration: 0,
}
}
fn enforce_integer_dims(&self, x: &mut Array1<f64>) {
for (i, (_, param)) in self.space.parameters.iter().enumerate() {
if let Parameter::Integer(lo, hi) = param {
x[i] = x[i].round().clamp(*lo as f64, *hi as f64);
}
}
}
pub fn ask(&mut self) -> Array1<f64> {
if self.observations.len() < self.options.n_initial_points {
let samples = match self.options.initial_point_generator {
InitialPointGenerator::Random => {
self.space.sample(1, &mut self.rng)
}
InitialPointGenerator::LatinHypercube => {
let dim = self.space.bounds.len();
let n_samples = 1;
let mut sample = Array1::zeros(dim);
for (i, (low, high)) in self.space.bounds.iter().enumerate() {
let interval_size = (high - low) / n_samples as f64;
let offset = self.rng.random_range(0.0..1.0) * interval_size;
sample[i] = low + offset;
}
vec![sample]
}
InitialPointGenerator::Sobol => {
let dim = self.space.bounds.len();
let mut sample = Array1::zeros(dim);
let seed = self.iteration as u32 + 1;
for (i, (low, high)) in self.space.bounds.iter().enumerate() {
let mut n = seed;
let mut denom = 1.0;
let mut result = 0.0;
let base = 2u32;
while n > 0 {
denom *= base as f64;
result += (n % base) as f64 / denom;
n /= base;
}
let offset = ((i + 1) as f64 * 0.5).fract();
let value = (result + offset).fract();
sample[i] = low + value * (high - low);
}
vec![sample]
}
InitialPointGenerator::Halton => {
let dim = self.space.bounds.len();
let mut sample = Array1::zeros(dim);
let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
let seed = self.iteration as u32 + 1;
for (i, (low, high)) in self.space.bounds.iter().enumerate() {
let base = if i < primes.len() {
primes[i]
} else {
primes[i % primes.len()]
};
let mut n = seed;
let mut denom = 1.0;
let mut result = 0.0;
while n > 0 {
denom *= base as f64;
result += (n % base) as f64 / denom;
n /= base;
}
sample[i] = low + result * (high - low);
}
vec![sample]
}
};
self.iteration += 1;
let mut point = samples[0].clone();
self.enforce_integer_dims(&mut point);
return point;
}
self.iteration += 1;
let mut point = self.optimize_acquisition_function();
self.enforce_integer_dims(&mut point);
point
}
pub fn tell(&mut self, x: Array1<f64>, y: f64) {
let observation = Observation { x, y };
if let Some(best) = &self.best_observation {
if y < best.y {
self.best_observation = Some(observation.clone());
}
} else {
self.best_observation = Some(observation.clone());
}
self.observations.push(observation);
}
fn build_model(&self) -> GaussianProcess {
let mut x_data = Vec::with_capacity(self.observations.len());
let mut y_data = Vec::with_capacity(self.observations.len());
for obs in &self.observations {
let x_transformed = self.space.transform(&obs.x.view()).to_vec();
x_data.push(x_transformed);
y_data.push(obs.y);
}
GaussianProcess::default(x_data, y_data)
}
fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
let model = self.build_model();
let y_best = self.best_observation.as_ref().expect("Operation failed").y;
match self.options.acq_func {
AcquisitionFunctionType::ExpectedImprovement => {
Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
}
AcquisitionFunctionType::LowerConfidenceBound => {
Box::new(LowerConfidenceBound::new(model, self.options.kappa))
}
AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
ProbabilityOfImprovement::new(model, y_best, self.options.xi),
),
}
}
fn optimize_acquisition_function(&mut self) -> Array1<f64> {
let acq_func = self.create_acquisition_function();
let bounds = self.space.bounds_to_vec();
let n_restarts = self.options.n_restarts;
let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
if let Some(best) = &self.best_observation {
if !x_starts.is_empty() {
x_starts[0] = best.x.clone();
} else {
x_starts.push(best.x.clone());
}
}
let mut best_x = None;
let mut best_value = f64::INFINITY;
for x_start in x_starts {
let result = minimize(
f,
&x_start.to_vec(),
Method::LBFGS,
Some(Options {
bounds: Some(
crate::unconstrained::Bounds::from_vecs(
bounds.iter().map(|b| Some(b.0)).collect(),
bounds.iter().map(|b| Some(b.1)).collect(),
)
.expect("Operation failed"),
),
..Default::default()
}),
);
if let Ok(res) = result {
if res.fun < best_value {
best_value = res.fun;
best_x = Some(res.x);
}
}
}
let mut result = best_x.unwrap_or_else(|| {
self.space.sample(1, &mut self.rng)[0].clone()
});
self.enforce_integer_dims(&mut result);
result
}
pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
where
F: Fn(&ArrayView1<f64>) -> f64,
{
let mut n_calls_remaining = n_calls;
let n_initial = self.options.n_initial_points.min(n_calls);
let initial_points = self.space.sample(n_initial, &mut self.rng);
for point in initial_points {
let value = func(&point.view());
self.tell(point, value);
n_calls_remaining -= 1;
if n_calls_remaining == 0 {
break;
}
}
let mut iterations = 0;
while n_calls_remaining > 0 {
let next_point = self.ask();
let value = func(&next_point.view());
self.tell(next_point, value);
n_calls_remaining -= 1;
iterations += 1;
}
let best = self.best_observation.as_ref().expect("Operation failed");
OptimizeResult {
x: best.x.clone(),
fun: best.y,
nfev: self.observations.len(),
func_evals: self.observations.len(),
nit: iterations,
success: true,
message: "Optimization terminated successfully".to_string(),
..Default::default()
}
}
}
#[allow(dead_code)]
pub fn bayesian_optimization<F>(
func: F,
bounds: Vec<(f64, f64)>,
n_calls: usize,
options: Option<BayesianOptimizationOptions>,
) -> Result<OptimizeResult<f64>, OptimizeError>
where
F: Fn(&ArrayView1<f64>) -> f64,
{
let space = bounds
.into_iter()
.enumerate()
.fold(Space::new(), |space, (i, (lower, upper))| {
space.add(format!("x{}", i), Parameter::Real(lower, upper))
});
let mut optimizer = BayesianOptimizer::new(space, options);
let result = optimizer.optimize(func, n_calls);
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_space_creation() {
let space = Space::new()
.add("x1", Parameter::Real(-5.0, 5.0))
.add("x2", Parameter::Integer(0, 10))
.add(
"x3",
Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
);
assert_eq!(space.n_dims(), 3);
assert_eq!(space.transformed_n_dims(), 5); }
#[test]
fn test_space_transform() {
let space = Space::new()
.add("x1", Parameter::Real(-5.0, 5.0))
.add("x2", Parameter::Integer(0, 10));
let x = array![0.0, 5.0];
let transformed = space.transform(&x.view());
assert_eq!(transformed.len(), 2);
assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
#[test]
fn test_bayesian_optimization() {
let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
let options = BayesianOptimizationOptions {
n_initial_points: 5,
seed: Some(42),
..Default::default()
};
let result = bayesian_optimization(f, bounds, 15, Some(options)).expect("Operation failed");
assert!(result.fun < 0.5);
assert!(result.x[0].abs() < 0.5);
assert!(result.x[1].abs() < 0.5);
}
#[test]
fn test_integer_dimension_enforcement() {
let space = Space::new().add("rating", Parameter::Integer(0, 3));
let options = BayesianOptimizationOptions {
n_initial_points: 2,
seed: Some(42),
..Default::default()
};
let mut opt = BayesianOptimizer::new(space, Some(options));
let result = opt.optimize(
|x| {
let v = x[0];
(v - 2.0).abs()
},
6,
);
for obs in &opt.observations {
let v = obs.x[0];
assert!(v >= 0.0 && v <= 3.0, "Out of bounds: {v}");
assert!((v - v.round()).abs() < 1e-12, "Not integer: {v}");
}
assert!(
(result.x[0] - 2.0).abs() < 1e-12,
"Best x should be 2, got {}",
result.x[0]
);
}
}