1use std::fmt;
35
36use friedrich::gaussian_process::GaussianProcess;
37use friedrich::kernel::SquaredExp;
38use friedrich::prior::ConstantPrior;
39use ndarray::{Array1, ArrayView1};
40use rand::rngs::StdRng;
41use rand::Rng;
42use rand::SeedableRng;
43
44use crate::error::OptimizeError;
45use crate::parallel::ParallelOptions;
46use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
47
48#[derive(Debug, Clone)]
50pub enum Parameter {
51 Real(f64, f64),
53 Integer(i64, i64),
55 Categorical(Vec<String>),
57}
58
59#[derive(Debug, Clone)]
61pub struct Space {
62 parameters: Vec<(String, Parameter)>,
64 transformed_n_dims: usize,
66 bounds: Vec<(f64, f64)>,
68}
69
70impl Space {
71 pub fn new() -> Self {
73 Self {
74 parameters: Vec::new(),
75 transformed_n_dims: 0,
76 bounds: Vec::new(),
77 }
78 }
79
80 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
82 let name = name.into();
83
84 self.transformed_n_dims += match ¶meter {
86 Parameter::Real(_, _) => 1,
87 Parameter::Integer(_, _) => 1,
88 Parameter::Categorical(values) => values.len(),
89 };
90
91 let bounds = match ¶meter {
93 Parameter::Real(lower, upper) => (*lower, *upper),
94 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
95 Parameter::Categorical(_) => (0.0, 1.0),
96 };
97 self.bounds.push(bounds);
98
99 self.parameters.push((name, parameter));
100 self
101 }
102
103 pub fn n_dims(&self) -> usize {
105 self.parameters.len()
106 }
107
108 pub fn transformed_n_dims(&self) -> usize {
110 self.transformed_n_dims
111 }
112
113 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
115 let n_dims = self.n_dims();
116 let mut samples = Vec::with_capacity(n_samples);
117
118 for _ in 0..n_samples {
119 let mut sample = Array1::zeros(n_dims);
120
121 for (i, (_, param)) in self.parameters.iter().enumerate() {
122 match param {
123 Parameter::Real(lower, upper) => {
124 sample[i] = rng.gen_range(*lower..*upper);
126 }
127 Parameter::Integer(lower, upper) => {
128 let range = rng.gen_range(*lower..=*upper);
129 sample[i] = range as f64;
130 }
131 Parameter::Categorical(values) => {
132 let index = rng.gen_range(0..values.len());
133 sample[i] = index as f64;
134 }
135 }
136 }
137
138 samples.push(sample);
139 }
140
141 samples
142 }
143
144 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
146 let mut transformed = Array1::zeros(self.transformed_n_dims);
147 let mut idx = 0;
148
149 for (i, (_, param)) in self.parameters.iter().enumerate() {
150 match param {
151 Parameter::Real(lower, upper) => {
152 transformed[idx] = (x[i] - lower) / (upper - lower);
154 idx += 1;
155 }
156 Parameter::Integer(lower, upper) => {
157 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
159 idx += 1;
160 }
161 Parameter::Categorical(values) => {
162 let index = x[i] as usize;
164 for j in 0..values.len() {
165 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
166 }
167 idx += values.len();
168 }
169 }
170 }
171
172 transformed
173 }
174
175 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
177 let mut inverse = Array1::zeros(self.n_dims());
178 let mut idx = 0;
179
180 for (i, (_, param)) in self.parameters.iter().enumerate() {
181 match param {
182 Parameter::Real(lower, upper) => {
183 inverse[i] = lower + x[idx] * (upper - lower);
185 idx += 1;
186 }
187 Parameter::Integer(lower, upper) => {
188 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
190 inverse[i] = value as f64;
191 idx += 1;
192 }
193 Parameter::Categorical(values) => {
194 let mut max_idx = 0;
196 let mut max_val = x[idx];
197 for j in 1..values.len() {
198 if x[idx + j] > max_val {
199 max_val = x[idx + j];
200 max_idx = j;
201 }
202 }
203 inverse[i] = max_idx as f64;
204 idx += values.len();
205 }
206 }
207 }
208
209 inverse
210 }
211
212 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
214 self.parameters
215 .iter()
216 .map(|(_, param)| match param {
217 Parameter::Real(lower, upper) => (*lower, *upper),
218 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
219 Parameter::Categorical(_) => (0.0, 1.0), })
221 .collect()
222 }
223}
224
225impl Default for Space {
226 fn default() -> Self {
227 Self::new()
228 }
229}
230
231pub trait AcquisitionFunction: Send + Sync {
233 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
235
236 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
238 None
239 }
240}
241
242pub struct ExpectedImprovement {
244 model: GaussianProcess<SquaredExp, ConstantPrior>,
245 y_best: f64,
246 xi: f64,
247}
248
249impl ExpectedImprovement {
250 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
252 Self { model, y_best, xi }
253 }
254}
255
256impl AcquisitionFunction for ExpectedImprovement {
257 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
258 let mean = self.model.predict(&x.to_vec());
259 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
260
261 if std <= 0.0 {
262 return 0.0;
263 }
264
265 let z = (self.y_best - mean - self.xi) / std;
266 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
268 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
269
270 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
271
272 if ei < 0.0 {
273 0.0
274 } else {
275 ei
276 }
277 }
278
279 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
280 None
282 }
283}
284
285pub struct LowerConfidenceBound {
287 model: GaussianProcess<SquaredExp, ConstantPrior>,
288 kappa: f64,
289}
290
291impl LowerConfidenceBound {
292 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, kappa: f64) -> Self {
294 Self { model, kappa }
295 }
296}
297
298impl AcquisitionFunction for LowerConfidenceBound {
299 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
300 let mean = self.model.predict(&x.to_vec());
301 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
302
303 mean - self.kappa * std
304 }
305
306 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
307 None
309 }
310}
311
312pub struct ProbabilityOfImprovement {
314 model: GaussianProcess<SquaredExp, ConstantPrior>,
315 y_best: f64,
316 xi: f64,
317}
318
319impl ProbabilityOfImprovement {
320 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
322 Self { model, y_best, xi }
323 }
324}
325
326impl AcquisitionFunction for ProbabilityOfImprovement {
327 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
328 let mean = self.model.predict(&x.to_vec());
329 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
330
331 if std <= 0.0 {
332 return 0.0;
333 }
334
335 let z = (self.y_best - mean - self.xi) / std;
336 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
338 }
339
340 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
341 None
343 }
344}
345
346#[allow(dead_code)]
349fn approx_erf(x: f64) -> f64 {
350 let a1 = 0.254829592;
352 let a2 = -0.284496736;
353 let a3 = 1.421413741;
354 let a4 = -1.453152027;
355 let a5 = 1.061405429;
356 let p = 0.3275911;
357
358 let sign = if x < 0.0 { -1.0 } else { 1.0 };
360 let x = x.abs();
361
362 let t = 1.0 / (1.0 + p * x);
364 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
365
366 sign * y
367}
368
369#[derive(Debug, Clone, Copy, Default)]
371pub enum AcquisitionFunctionType {
372 #[default]
374 ExpectedImprovement,
375 LowerConfidenceBound,
377 ProbabilityOfImprovement,
379}
380
381impl fmt::Display for AcquisitionFunctionType {
382 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
383 match self {
384 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
385 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
386 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
387 }
388 }
389}
390
391#[derive(Debug, Clone, Copy, Default)]
393pub enum KernelType {
394 #[default]
396 SquaredExponential,
397 Matern52,
399 Matern32,
401}
402
403impl fmt::Display for KernelType {
404 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
405 match self {
406 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
407 KernelType::Matern52 => write!(f, "Matern52"),
408 KernelType::Matern32 => write!(f, "Matern32"),
409 }
410 }
411}
412
413#[derive(Debug, Clone, Copy, Default)]
415pub enum InitialPointGenerator {
416 #[default]
418 Random,
419 LatinHypercube,
421 Sobol,
423 Halton,
425}
426
427impl fmt::Display for InitialPointGenerator {
428 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
429 match self {
430 InitialPointGenerator::Random => write!(f, "Random"),
431 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
432 InitialPointGenerator::Sobol => write!(f, "Sobol"),
433 InitialPointGenerator::Halton => write!(f, "Halton"),
434 }
435 }
436}
437
438#[derive(Clone, Debug)]
440pub struct BayesianOptimizationOptions {
441 pub n_initial_points: usize,
443 pub initial_point_generator: InitialPointGenerator,
445 pub acq_func: AcquisitionFunctionType,
447 pub kernel: KernelType,
449 pub kappa: f64,
451 pub xi: f64,
453 pub seed: Option<u64>,
455 pub parallel: Option<ParallelOptions>,
457 pub n_restarts: usize,
459}
460
461impl Default for BayesianOptimizationOptions {
462 fn default() -> Self {
463 Self {
464 n_initial_points: 10,
465 initial_point_generator: InitialPointGenerator::default(),
466 acq_func: AcquisitionFunctionType::default(),
467 kernel: KernelType::default(),
468 kappa: 1.96,
469 xi: 0.01,
470 seed: None,
471 parallel: None,
472 n_restarts: 5,
473 }
474 }
475}
476
477#[derive(Debug, Clone)]
479struct Observation {
480 x: Array1<f64>,
482 y: f64,
484}
485
486pub struct BayesianOptimizer {
488 space: Space,
490 options: BayesianOptimizationOptions,
492 observations: Vec<Observation>,
494 best_observation: Option<Observation>,
496 rng: StdRng,
498 iteration: usize,
500}
501
502impl BayesianOptimizer {
503 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
505 let options = options.unwrap_or_default();
506 let seed = options.seed.unwrap_or_else(|| rand::rng().random());
507 let rng = StdRng::seed_from_u64(seed);
508
509 Self {
510 space,
511 options,
512 observations: Vec::new(),
513 best_observation: None,
514 rng,
515 iteration: 0,
516 }
517 }
518
519 pub fn ask(&mut self) -> Array1<f64> {
521 if self.observations.len() < self.options.n_initial_points {
523 let samples = match self.options.initial_point_generator {
524 InitialPointGenerator::Random => {
525 self.space.sample(1, &mut self.rng)
527 }
528 InitialPointGenerator::LatinHypercube => {
529 let dim = self.space.bounds.len();
531 let n_samples = 1;
532
533 let mut sample = Array1::zeros(dim);
535
536 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
537 let interval_size = (high - low) / n_samples as f64;
539 let offset = self.rng.gen_range(0.0..1.0) * interval_size;
540 sample[i] = low + offset;
541 }
542
543 vec![sample]
544 }
545 InitialPointGenerator::Sobol => {
546 let dim = self.space.bounds.len();
548 let mut sample = Array1::zeros(dim);
549
550 let seed = self.iteration as u32 + 1;
552
553 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
554 let mut n = seed;
556 let mut denom = 1.0;
557 let mut result = 0.0;
558 let base = 2u32;
559
560 while n > 0 {
561 denom *= base as f64;
562 result += (n % base) as f64 / denom;
563 n /= base;
564 }
565
566 let offset = ((i + 1) as f64 * 0.5).fract();
568 let value = (result + offset).fract();
569 sample[i] = low + value * (high - low);
570 }
571
572 vec![sample]
573 }
574 InitialPointGenerator::Halton => {
575 let dim = self.space.bounds.len();
577 let mut sample = Array1::zeros(dim);
578
579 let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
581 let seed = self.iteration as u32 + 1;
582
583 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
584 let base = if i < primes.len() {
586 primes[i]
587 } else {
588 primes[i % primes.len()]
589 };
590
591 let mut n = seed;
593 let mut denom = 1.0;
594 let mut result = 0.0;
595
596 while n > 0 {
597 denom *= base as f64;
598 result += (n % base) as f64 / denom;
599 n /= base;
600 }
601
602 sample[i] = low + result * (high - low);
604 }
605
606 vec![sample]
607 }
608 };
609
610 self.iteration += 1;
611 return samples[0].clone();
612 }
613
614 self.iteration += 1;
616 self.optimize_acquisition_function()
617 }
618
619 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
621 let observation = Observation { x, y };
622
623 if let Some(best) = &self.best_observation {
625 if y < best.y {
626 self.best_observation = Some(observation.clone());
627 }
628 } else {
629 self.best_observation = Some(observation.clone());
630 }
631
632 self.observations.push(observation);
634 }
635
636 fn build_model(&self) -> GaussianProcess<SquaredExp, ConstantPrior> {
638 let mut x_data = Vec::with_capacity(self.observations.len());
640 let mut y_data = Vec::with_capacity(self.observations.len());
641
642 for obs in &self.observations {
643 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
644 x_data.push(x_transformed);
645 y_data.push(obs.y);
646 }
647
648 GaussianProcess::default(x_data, y_data)
651 }
652
653 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
655 let model = self.build_model();
656 let y_best = self.best_observation.as_ref().unwrap().y;
657
658 match self.options.acq_func {
659 AcquisitionFunctionType::ExpectedImprovement => {
660 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
661 }
662 AcquisitionFunctionType::LowerConfidenceBound => {
663 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
664 }
665 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
666 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
667 ),
668 }
669 }
670
671 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
673 let acq_func = self.create_acquisition_function();
674 let bounds = self.space.bounds_to_vec();
675 let n_restarts = self.options.n_restarts;
676
677 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
679
680 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
682
683 if let Some(best) = &self.best_observation {
685 if !x_starts.is_empty() {
686 x_starts[0] = best.x.clone();
687 } else {
688 x_starts.push(best.x.clone());
689 }
690 }
691
692 let mut best_x = None;
694 let mut best_value = f64::INFINITY;
695
696 for x_start in x_starts {
697 let result = minimize(
698 f,
699 &x_start.to_vec(),
700 Method::LBFGS,
701 Some(Options {
702 bounds: Some(
703 crate::unconstrained::Bounds::from_vecs(
704 bounds.iter().map(|b| Some(b.0)).collect(),
705 bounds.iter().map(|b| Some(b.1)).collect(),
706 )
707 .unwrap(),
708 ),
709 ..Default::default()
710 }),
711 );
712
713 if let Ok(res) = result {
714 if res.fun < best_value {
715 best_value = res.fun;
716 best_x = Some(res.x);
717 }
718 }
719 }
720
721 best_x.unwrap_or_else(|| {
723 self.space.sample(1, &mut self.rng)[0].clone()
725 })
726 }
727
728 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
730 where
731 F: Fn(&ArrayView1<f64>) -> f64,
732 {
733 let mut n_calls_remaining = n_calls;
734
735 let n_initial = self.options.n_initial_points.min(n_calls);
737 let initial_points = self.space.sample(n_initial, &mut self.rng);
738
739 for point in initial_points {
740 let value = func(&point.view());
741 self.tell(point, value);
742 n_calls_remaining -= 1;
743
744 if n_calls_remaining == 0 {
745 break;
746 }
747 }
748
749 let mut iterations = 0;
751
752 while n_calls_remaining > 0 {
753 let next_point = self.ask();
755
756 let value = func(&next_point.view());
758
759 self.tell(next_point, value);
761
762 n_calls_remaining -= 1;
764 iterations += 1;
765 }
766
767 let best = self.best_observation.as_ref().unwrap();
769 OptimizeResult {
770 x: best.x.clone(),
771 fun: best.y,
772 nfev: self.observations.len(),
773 func_evals: self.observations.len(),
774 nit: iterations,
775 success: true,
776 message: "Optimization terminated successfully".to_string(),
777 ..Default::default()
778 }
779 }
780}
781
782#[allow(dead_code)]
784pub fn bayesian_optimization<F>(
785 func: F,
786 bounds: Vec<(f64, f64)>,
787 n_calls: usize,
788 options: Option<BayesianOptimizationOptions>,
789) -> Result<OptimizeResult<f64>, OptimizeError>
790where
791 F: Fn(&ArrayView1<f64>) -> f64,
792{
793 let space = bounds
795 .into_iter()
796 .enumerate()
797 .fold(Space::new(), |space, (i, (lower, upper))| {
798 space.add(format!("x{}", i), Parameter::Real(lower, upper))
799 });
800
801 let mut optimizer = BayesianOptimizer::new(space, options);
803
804 let result = optimizer.optimize(func, n_calls);
806
807 Ok(result)
808}
809
810#[cfg(test)]
811mod tests {
812 use super::*;
813 use ndarray::array;
814
815 #[test]
816 fn test_space_creation() {
817 let space = Space::new()
818 .add("x1", Parameter::Real(-5.0, 5.0))
819 .add("x2", Parameter::Integer(0, 10))
820 .add(
821 "x3",
822 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
823 );
824
825 assert_eq!(space.n_dims(), 3);
826 assert_eq!(space.transformed_n_dims(), 5); }
828
829 #[test]
830 fn test_space_transform() {
831 let space = Space::new()
832 .add("x1", Parameter::Real(-5.0, 5.0))
833 .add("x2", Parameter::Integer(0, 10));
834
835 let x = array![0.0, 5.0];
836 let transformed = space.transform(&x.view());
837
838 assert_eq!(transformed.len(), 2);
839 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
842
843 #[test]
844 fn test_bayesian_optimization() {
845 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
847 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
848
849 let options = BayesianOptimizationOptions {
850 n_initial_points: 5,
851 seed: Some(42),
852 ..Default::default()
853 };
854
855 let result = bayesian_optimization(f, bounds, 15, Some(options)).unwrap();
856
857 assert!(result.fun < 0.5);
859 assert!(result.x[0].abs() < 0.5);
860 assert!(result.x[1].abs() < 0.5);
861 }
862}