1use std::fmt;
35
36use friedrich::gaussian_process::GaussianProcess;
37use friedrich::kernel::SquaredExp;
38use friedrich::prior::ConstantPrior;
39use ndarray::{Array1, ArrayView1};
40use rand::rngs::StdRng;
41use rand::Rng;
42use rand::SeedableRng;
43
44use crate::error::OptimizeError;
45use crate::parallel::ParallelOptions;
46use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
47
48#[derive(Debug, Clone)]
50pub enum Parameter {
51 Real(f64, f64),
53 Integer(i64, i64),
55 Categorical(Vec<String>),
57}
58
59#[derive(Debug, Clone)]
61pub struct Space {
62 parameters: Vec<(String, Parameter)>,
64 transformed_n_dims: usize,
66}
67
68impl Space {
69 pub fn new() -> Self {
71 Self {
72 parameters: Vec::new(),
73 transformed_n_dims: 0,
74 }
75 }
76
77 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
79 let name = name.into();
80
81 self.transformed_n_dims += match ¶meter {
83 Parameter::Real(_, _) => 1,
84 Parameter::Integer(_, _) => 1,
85 Parameter::Categorical(values) => values.len(),
86 };
87
88 self.parameters.push((name, parameter));
89 self
90 }
91
92 pub fn n_dims(&self) -> usize {
94 self.parameters.len()
95 }
96
97 pub fn transformed_n_dims(&self) -> usize {
99 self.transformed_n_dims
100 }
101
102 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
104 let n_dims = self.n_dims();
105 let mut samples = Vec::with_capacity(n_samples);
106
107 for _ in 0..n_samples {
108 let mut sample = Array1::zeros(n_dims);
109
110 for (i, (_, param)) in self.parameters.iter().enumerate() {
111 match param {
112 Parameter::Real(lower, upper) => {
113 sample[i] = rng.random_range(*lower..*upper);
115 }
116 Parameter::Integer(lower, upper) => {
117 let range = rng.random_range(*lower..=*upper);
118 sample[i] = range as f64;
119 }
120 Parameter::Categorical(values) => {
121 let index = rng.random_range(0..values.len());
122 sample[i] = index as f64;
123 }
124 }
125 }
126
127 samples.push(sample);
128 }
129
130 samples
131 }
132
133 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
135 let mut transformed = Array1::zeros(self.transformed_n_dims);
136 let mut idx = 0;
137
138 for (i, (_, param)) in self.parameters.iter().enumerate() {
139 match param {
140 Parameter::Real(lower, upper) => {
141 transformed[idx] = (x[i] - lower) / (upper - lower);
143 idx += 1;
144 }
145 Parameter::Integer(lower, upper) => {
146 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
148 idx += 1;
149 }
150 Parameter::Categorical(values) => {
151 let index = x[i] as usize;
153 for j in 0..values.len() {
154 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
155 }
156 idx += values.len();
157 }
158 }
159 }
160
161 transformed
162 }
163
164 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
166 let mut inverse = Array1::zeros(self.n_dims());
167 let mut idx = 0;
168
169 for (i, (_, param)) in self.parameters.iter().enumerate() {
170 match param {
171 Parameter::Real(lower, upper) => {
172 inverse[i] = lower + x[idx] * (upper - lower);
174 idx += 1;
175 }
176 Parameter::Integer(lower, upper) => {
177 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
179 inverse[i] = value as f64;
180 idx += 1;
181 }
182 Parameter::Categorical(values) => {
183 let mut max_idx = 0;
185 let mut max_val = x[idx];
186 for j in 1..values.len() {
187 if x[idx + j] > max_val {
188 max_val = x[idx + j];
189 max_idx = j;
190 }
191 }
192 inverse[i] = max_idx as f64;
193 idx += values.len();
194 }
195 }
196 }
197
198 inverse
199 }
200
201 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
203 self.parameters
204 .iter()
205 .map(|(_, param)| match param {
206 Parameter::Real(lower, upper) => (*lower, *upper),
207 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
208 Parameter::Categorical(_) => (0.0, 1.0), })
210 .collect()
211 }
212}
213
214impl Default for Space {
215 fn default() -> Self {
216 Self::new()
217 }
218}
219
220pub trait AcquisitionFunction: Send + Sync {
222 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
224
225 fn gradient(&self, _x: &ArrayView1<f64>) -> Option<Array1<f64>> {
227 None
228 }
229}
230
231pub struct ExpectedImprovement {
233 model: GaussianProcess<SquaredExp, ConstantPrior>,
234 y_best: f64,
235 xi: f64,
236}
237
238impl ExpectedImprovement {
239 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
241 Self { model, y_best, xi }
242 }
243}
244
245impl AcquisitionFunction for ExpectedImprovement {
246 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
247 let mean = self.model.predict(&x.to_vec());
248 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
249
250 if std <= 0.0 {
251 return 0.0;
252 }
253
254 let z = (self.y_best - mean - self.xi) / std;
255 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
257 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
258
259 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
260
261 if ei < 0.0 {
262 0.0
263 } else {
264 ei
265 }
266 }
267
268 fn gradient(&self, _x: &ArrayView1<f64>) -> Option<Array1<f64>> {
269 None
271 }
272}
273
274pub struct LowerConfidenceBound {
276 model: GaussianProcess<SquaredExp, ConstantPrior>,
277 kappa: f64,
278}
279
280impl LowerConfidenceBound {
281 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, kappa: f64) -> Self {
283 Self { model, kappa }
284 }
285}
286
287impl AcquisitionFunction for LowerConfidenceBound {
288 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
289 let mean = self.model.predict(&x.to_vec());
290 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
291
292 mean - self.kappa * std
293 }
294
295 fn gradient(&self, _x: &ArrayView1<f64>) -> Option<Array1<f64>> {
296 None
298 }
299}
300
301pub struct ProbabilityOfImprovement {
303 model: GaussianProcess<SquaredExp, ConstantPrior>,
304 y_best: f64,
305 xi: f64,
306}
307
308impl ProbabilityOfImprovement {
309 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
311 Self { model, y_best, xi }
312 }
313}
314
315impl AcquisitionFunction for ProbabilityOfImprovement {
316 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
317 let mean = self.model.predict(&x.to_vec());
318 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
319
320 if std <= 0.0 {
321 return 0.0;
322 }
323
324 let z = (self.y_best - mean - self.xi) / std;
325 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
327 }
328
329 fn gradient(&self, _x: &ArrayView1<f64>) -> Option<Array1<f64>> {
330 None
332 }
333}
334
335fn approx_erf(x: f64) -> f64 {
338 let a1 = 0.254829592;
340 let a2 = -0.284496736;
341 let a3 = 1.421413741;
342 let a4 = -1.453152027;
343 let a5 = 1.061405429;
344 let p = 0.3275911;
345
346 let sign = if x < 0.0 { -1.0 } else { 1.0 };
348 let x = x.abs();
349
350 let t = 1.0 / (1.0 + p * x);
352 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
353
354 sign * y
355}
356
357#[derive(Debug, Clone, Copy, Default)]
359pub enum AcquisitionFunctionType {
360 #[default]
362 ExpectedImprovement,
363 LowerConfidenceBound,
365 ProbabilityOfImprovement,
367}
368
369impl fmt::Display for AcquisitionFunctionType {
370 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
371 match self {
372 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
373 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
374 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
375 }
376 }
377}
378
379#[derive(Debug, Clone, Copy, Default)]
381pub enum KernelType {
382 #[default]
384 SquaredExponential,
385 Matern52,
387 Matern32,
389}
390
391impl fmt::Display for KernelType {
392 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
393 match self {
394 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
395 KernelType::Matern52 => write!(f, "Matern52"),
396 KernelType::Matern32 => write!(f, "Matern32"),
397 }
398 }
399}
400
401#[derive(Debug, Clone, Copy, Default)]
403pub enum InitialPointGenerator {
404 #[default]
406 Random,
407 LatinHypercube,
409 Sobol,
411 Halton,
413}
414
415impl fmt::Display for InitialPointGenerator {
416 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
417 match self {
418 InitialPointGenerator::Random => write!(f, "Random"),
419 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
420 InitialPointGenerator::Sobol => write!(f, "Sobol"),
421 InitialPointGenerator::Halton => write!(f, "Halton"),
422 }
423 }
424}
425
426#[derive(Clone, Debug)]
428pub struct BayesianOptimizationOptions {
429 pub n_initial_points: usize,
431 pub initial_point_generator: InitialPointGenerator,
433 pub acq_func: AcquisitionFunctionType,
435 pub kernel: KernelType,
437 pub kappa: f64,
439 pub xi: f64,
441 pub seed: Option<u64>,
443 pub parallel: Option<ParallelOptions>,
445 pub n_restarts: usize,
447}
448
449impl Default for BayesianOptimizationOptions {
450 fn default() -> Self {
451 Self {
452 n_initial_points: 10,
453 initial_point_generator: InitialPointGenerator::default(),
454 acq_func: AcquisitionFunctionType::default(),
455 kernel: KernelType::default(),
456 kappa: 1.96,
457 xi: 0.01,
458 seed: None,
459 parallel: None,
460 n_restarts: 5,
461 }
462 }
463}
464
465#[derive(Debug, Clone)]
467struct Observation {
468 x: Array1<f64>,
470 y: f64,
472}
473
474pub struct BayesianOptimizer {
476 space: Space,
478 options: BayesianOptimizationOptions,
480 observations: Vec<Observation>,
482 best_observation: Option<Observation>,
484 rng: StdRng,
486}
487
488impl BayesianOptimizer {
489 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
491 let options = options.unwrap_or_default();
492 let seed = options.seed.unwrap_or_else(rand::random);
493 let rng = StdRng::seed_from_u64(seed);
494
495 Self {
496 space,
497 options,
498 observations: Vec::new(),
499 best_observation: None,
500 rng,
501 }
502 }
503
504 pub fn ask(&mut self) -> Array1<f64> {
506 if self.observations.len() < self.options.n_initial_points {
508 let samples = match self.options.initial_point_generator {
509 InitialPointGenerator::Random => {
510 self.space.sample(1, &mut self.rng)
512 }
513 InitialPointGenerator::LatinHypercube => {
514 self.space.sample(1, &mut self.rng)
517 }
518 InitialPointGenerator::Sobol => {
519 self.space.sample(1, &mut self.rng)
522 }
523 InitialPointGenerator::Halton => {
524 self.space.sample(1, &mut self.rng)
527 }
528 };
529
530 return samples[0].clone();
531 }
532
533 self.optimize_acquisition_function()
535 }
536
537 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
539 let observation = Observation { x, y };
540
541 if let Some(best) = &self.best_observation {
543 if y < best.y {
544 self.best_observation = Some(observation.clone());
545 }
546 } else {
547 self.best_observation = Some(observation.clone());
548 }
549
550 self.observations.push(observation);
552 }
553
554 fn build_model(&self) -> GaussianProcess<SquaredExp, ConstantPrior> {
556 let mut x_data = Vec::with_capacity(self.observations.len());
558 let mut y_data = Vec::with_capacity(self.observations.len());
559
560 for obs in &self.observations {
561 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
562 x_data.push(x_transformed);
563 y_data.push(obs.y);
564 }
565
566 GaussianProcess::default(x_data, y_data)
569 }
570
571 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
573 let model = self.build_model();
574 let y_best = self.best_observation.as_ref().unwrap().y;
575
576 match self.options.acq_func {
577 AcquisitionFunctionType::ExpectedImprovement => {
578 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
579 }
580 AcquisitionFunctionType::LowerConfidenceBound => {
581 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
582 }
583 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
584 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
585 ),
586 }
587 }
588
589 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
591 let acq_func = self.create_acquisition_function();
592 let bounds = self.space.bounds_to_vec();
593 let n_restarts = self.options.n_restarts;
594
595 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
597
598 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
600
601 if let Some(best) = &self.best_observation {
603 if !x_starts.is_empty() {
604 x_starts[0] = best.x.clone();
605 } else {
606 x_starts.push(best.x.clone());
607 }
608 }
609
610 let mut best_x = None;
612 let mut best_value = f64::INFINITY;
613
614 for x_start in x_starts {
615 let result = minimize(
616 f,
617 &x_start.to_vec(),
618 Method::LBFGS,
619 Some(Options {
620 bounds: Some(
621 crate::unconstrained::Bounds::from_vecs(
622 bounds.iter().map(|b| Some(b.0)).collect(),
623 bounds.iter().map(|b| Some(b.1)).collect(),
624 )
625 .unwrap(),
626 ),
627 ..Default::default()
628 }),
629 );
630
631 if let Ok(res) = result {
632 if res.fun < best_value {
633 best_value = res.fun;
634 best_x = Some(res.x);
635 }
636 }
637 }
638
639 best_x.unwrap_or_else(|| {
641 self.space.sample(1, &mut self.rng)[0].clone()
643 })
644 }
645
646 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
648 where
649 F: Fn(&ArrayView1<f64>) -> f64,
650 {
651 let mut n_calls_remaining = n_calls;
652
653 let n_initial = self.options.n_initial_points.min(n_calls);
655 let initial_points = self.space.sample(n_initial, &mut self.rng);
656
657 for point in initial_points {
658 let value = func(&point.view());
659 self.tell(point, value);
660 n_calls_remaining -= 1;
661
662 if n_calls_remaining == 0 {
663 break;
664 }
665 }
666
667 let mut iterations = 0;
669
670 while n_calls_remaining > 0 {
671 let next_point = self.ask();
673
674 let value = func(&next_point.view());
676
677 self.tell(next_point, value);
679
680 n_calls_remaining -= 1;
682 iterations += 1;
683 }
684
685 let best = self.best_observation.as_ref().unwrap();
687 OptimizeResult {
688 x: best.x.clone(),
689 fun: best.y,
690 nfev: self.observations.len(),
691 func_evals: self.observations.len(),
692 nit: iterations,
693 iterations,
694 success: true,
695 message: "Optimization terminated successfully".to_string(),
696 ..Default::default()
697 }
698 }
699}
700
701pub fn bayesian_optimization<F>(
703 func: F,
704 bounds: Vec<(f64, f64)>,
705 n_calls: usize,
706 options: Option<BayesianOptimizationOptions>,
707) -> Result<OptimizeResult<f64>, OptimizeError>
708where
709 F: Fn(&ArrayView1<f64>) -> f64,
710{
711 let space = bounds
713 .into_iter()
714 .enumerate()
715 .fold(Space::new(), |space, (i, (lower, upper))| {
716 space.add(format!("x{}", i), Parameter::Real(lower, upper))
717 });
718
719 let mut optimizer = BayesianOptimizer::new(space, options);
721
722 let result = optimizer.optimize(func, n_calls);
724
725 Ok(result)
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731 use ndarray::array;
732
733 #[test]
734 fn test_space_creation() {
735 let space = Space::new()
736 .add("x1", Parameter::Real(-5.0, 5.0))
737 .add("x2", Parameter::Integer(0, 10))
738 .add(
739 "x3",
740 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
741 );
742
743 assert_eq!(space.n_dims(), 3);
744 assert_eq!(space.transformed_n_dims(), 5); }
746
747 #[test]
748 fn test_space_transform() {
749 let space = Space::new()
750 .add("x1", Parameter::Real(-5.0, 5.0))
751 .add("x2", Parameter::Integer(0, 10));
752
753 let x = array![0.0, 5.0];
754 let transformed = space.transform(&x.view());
755
756 assert_eq!(transformed.len(), 2);
757 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
760
761 #[test]
762 fn test_bayesian_optimization() {
763 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
765 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
766
767 let options = BayesianOptimizationOptions {
768 n_initial_points: 5,
769 seed: Some(42),
770 ..Default::default()
771 };
772
773 let result = bayesian_optimization(f, bounds, 15, Some(options)).unwrap();
774
775 assert!(result.fun < 0.5);
777 assert!(result.x[0].abs() < 0.5);
778 assert!(result.x[1].abs() < 0.5);
779 }
780}