1use std::fmt;
35
36use friedrich::gaussian_process::GaussianProcess;
37use friedrich::kernel::SquaredExp;
38use friedrich::prior::ConstantPrior;
39use scirs2_core::ndarray::{Array1, ArrayView1};
40use scirs2_core::random::rngs::StdRng;
41use scirs2_core::random::Rng;
42use scirs2_core::random::SeedableRng;
43
44use crate::error::OptimizeError;
45use crate::parallel::ParallelOptions;
46use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
47
48#[derive(Debug, Clone)]
50pub enum Parameter {
51 Real(f64, f64),
53 Integer(i64, i64),
55 Categorical(Vec<String>),
57}
58
59#[derive(Debug, Clone)]
61pub struct Space {
62 parameters: Vec<(String, Parameter)>,
64 transformed_n_dims: usize,
66 bounds: Vec<(f64, f64)>,
68}
69
70impl Space {
71 pub fn new() -> Self {
73 Self {
74 parameters: Vec::new(),
75 transformed_n_dims: 0,
76 bounds: Vec::new(),
77 }
78 }
79
80 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
82 let name = name.into();
83
84 self.transformed_n_dims += match ¶meter {
86 Parameter::Real(_, _) => 1,
87 Parameter::Integer(_, _) => 1,
88 Parameter::Categorical(values) => values.len(),
89 };
90
91 let bounds = match ¶meter {
93 Parameter::Real(lower, upper) => (*lower, *upper),
94 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
95 Parameter::Categorical(_) => (0.0, 1.0),
96 };
97 self.bounds.push(bounds);
98
99 self.parameters.push((name, parameter));
100 self
101 }
102
103 pub fn n_dims(&self) -> usize {
105 self.parameters.len()
106 }
107
108 pub fn transformed_n_dims(&self) -> usize {
110 self.transformed_n_dims
111 }
112
113 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
115 let n_dims = self.n_dims();
116 let mut samples = Vec::with_capacity(n_samples);
117
118 for _ in 0..n_samples {
119 let mut sample = Array1::zeros(n_dims);
120
121 for (i, (_, param)) in self.parameters.iter().enumerate() {
122 match param {
123 Parameter::Real(lower, upper) => {
124 sample[i] = rng.gen_range(*lower..*upper);
126 }
127 Parameter::Integer(lower, upper) => {
128 let range = rng.gen_range(*lower..=*upper);
129 sample[i] = range as f64;
130 }
131 Parameter::Categorical(values) => {
132 let index = rng.gen_range(0..values.len());
133 sample[i] = index as f64;
134 }
135 }
136 }
137
138 samples.push(sample);
139 }
140
141 samples
142 }
143
144 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
146 let mut transformed = Array1::zeros(self.transformed_n_dims);
147 let mut idx = 0;
148
149 for (i, (_, param)) in self.parameters.iter().enumerate() {
150 match param {
151 Parameter::Real(lower, upper) => {
152 transformed[idx] = (x[i] - lower) / (upper - lower);
154 idx += 1;
155 }
156 Parameter::Integer(lower, upper) => {
157 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
159 idx += 1;
160 }
161 Parameter::Categorical(values) => {
162 let index = x[i] as usize;
164 for j in 0..values.len() {
165 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
166 }
167 idx += values.len();
168 }
169 }
170 }
171
172 transformed
173 }
174
175 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
177 let mut inverse = Array1::zeros(self.n_dims());
178 let mut idx = 0;
179
180 for (i, (_, param)) in self.parameters.iter().enumerate() {
181 match param {
182 Parameter::Real(lower, upper) => {
183 inverse[i] = lower + x[idx] * (upper - lower);
185 idx += 1;
186 }
187 Parameter::Integer(lower, upper) => {
188 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
190 inverse[i] = value as f64;
191 idx += 1;
192 }
193 Parameter::Categorical(values) => {
194 let mut max_idx = 0;
196 let mut max_val = x[idx];
197 for j in 1..values.len() {
198 if x[idx + j] > max_val {
199 max_val = x[idx + j];
200 max_idx = j;
201 }
202 }
203 inverse[i] = max_idx as f64;
204 idx += values.len();
205 }
206 }
207 }
208
209 inverse
210 }
211
212 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
214 self.parameters
215 .iter()
216 .map(|(_, param)| match param {
217 Parameter::Real(lower, upper) => (*lower, *upper),
218 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
219 Parameter::Categorical(_) => (0.0, 1.0), })
221 .collect()
222 }
223}
224
225impl Default for Space {
226 fn default() -> Self {
227 Self::new()
228 }
229}
230
231pub trait AcquisitionFunction: Send + Sync {
233 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
235
236 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
238 None
239 }
240}
241
242pub struct ExpectedImprovement {
244 model: GaussianProcess<SquaredExp, ConstantPrior>,
245 y_best: f64,
246 xi: f64,
247}
248
249impl ExpectedImprovement {
250 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
252 Self { model, y_best, xi }
253 }
254}
255
256impl AcquisitionFunction for ExpectedImprovement {
257 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
258 let mean = self.model.predict(&x.to_vec());
259 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
260
261 if std <= 0.0 {
262 return 0.0;
263 }
264
265 let z = (self.y_best - mean - self.xi) / std;
266 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
268 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
269
270 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
271
272 if ei < 0.0 {
273 0.0
274 } else {
275 ei
276 }
277 }
278
279 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
280 None
282 }
283}
284
285pub struct LowerConfidenceBound {
287 model: GaussianProcess<SquaredExp, ConstantPrior>,
288 kappa: f64,
289}
290
291impl LowerConfidenceBound {
292 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, kappa: f64) -> Self {
294 Self { model, kappa }
295 }
296}
297
298impl AcquisitionFunction for LowerConfidenceBound {
299 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
300 let mean = self.model.predict(&x.to_vec());
301 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
302
303 mean - self.kappa * std
304 }
305
306 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
307 None
309 }
310}
311
312pub struct ProbabilityOfImprovement {
314 model: GaussianProcess<SquaredExp, ConstantPrior>,
315 y_best: f64,
316 xi: f64,
317}
318
319impl ProbabilityOfImprovement {
320 pub fn new(model: GaussianProcess<SquaredExp, ConstantPrior>, y_best: f64, xi: f64) -> Self {
322 Self { model, y_best, xi }
323 }
324}
325
326impl AcquisitionFunction for ProbabilityOfImprovement {
327 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
328 let mean = self.model.predict(&x.to_vec());
329 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
330
331 if std <= 0.0 {
332 return 0.0;
333 }
334
335 let z = (self.y_best - mean - self.xi) / std;
336 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
338 }
339
340 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
341 None
343 }
344}
345
346#[allow(dead_code)]
349fn approx_erf(x: f64) -> f64 {
350 let a1 = 0.254829592;
352 let a2 = -0.284496736;
353 let a3 = 1.421413741;
354 let a4 = -1.453152027;
355 let a5 = 1.061405429;
356 let p = 0.3275911;
357
358 let sign = if x < 0.0 { -1.0 } else { 1.0 };
360 let x = x.abs();
361
362 let t = 1.0 / (1.0 + p * x);
364 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
365
366 sign * y
367}
368
369#[derive(Debug, Clone, Copy, Default)]
371pub enum AcquisitionFunctionType {
372 #[default]
374 ExpectedImprovement,
375 LowerConfidenceBound,
377 ProbabilityOfImprovement,
379}
380
381impl fmt::Display for AcquisitionFunctionType {
382 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
383 match self {
384 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
385 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
386 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
387 }
388 }
389}
390
391#[derive(Debug, Clone, Copy, Default)]
393pub enum KernelType {
394 #[default]
396 SquaredExponential,
397 Matern52,
399 Matern32,
401}
402
403impl fmt::Display for KernelType {
404 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
405 match self {
406 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
407 KernelType::Matern52 => write!(f, "Matern52"),
408 KernelType::Matern32 => write!(f, "Matern32"),
409 }
410 }
411}
412
413#[derive(Debug, Clone, Copy, Default)]
415pub enum InitialPointGenerator {
416 #[default]
418 Random,
419 LatinHypercube,
421 Sobol,
423 Halton,
425}
426
427impl fmt::Display for InitialPointGenerator {
428 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
429 match self {
430 InitialPointGenerator::Random => write!(f, "Random"),
431 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
432 InitialPointGenerator::Sobol => write!(f, "Sobol"),
433 InitialPointGenerator::Halton => write!(f, "Halton"),
434 }
435 }
436}
437
438#[derive(Clone, Debug)]
440pub struct BayesianOptimizationOptions {
441 pub n_initial_points: usize,
443 pub initial_point_generator: InitialPointGenerator,
445 pub acq_func: AcquisitionFunctionType,
447 pub kernel: KernelType,
449 pub kappa: f64,
451 pub xi: f64,
453 pub seed: Option<u64>,
455 pub parallel: Option<ParallelOptions>,
457 pub n_restarts: usize,
459}
460
461impl Default for BayesianOptimizationOptions {
462 fn default() -> Self {
463 Self {
464 n_initial_points: 10,
465 initial_point_generator: InitialPointGenerator::default(),
466 acq_func: AcquisitionFunctionType::default(),
467 kernel: KernelType::default(),
468 kappa: 1.96,
469 xi: 0.01,
470 seed: None,
471 parallel: None,
472 n_restarts: 5,
473 }
474 }
475}
476
477#[derive(Debug, Clone)]
479struct Observation {
480 x: Array1<f64>,
482 y: f64,
484}
485
486pub struct BayesianOptimizer {
488 space: Space,
490 options: BayesianOptimizationOptions,
492 observations: Vec<Observation>,
494 best_observation: Option<Observation>,
496 rng: StdRng,
498 iteration: usize,
500}
501
502impl BayesianOptimizer {
503 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
505 let options = options.unwrap_or_default();
506 let seed = options
507 .seed
508 .unwrap_or_else(|| scirs2_core::random::rng().random());
509 let rng = StdRng::seed_from_u64(seed);
510
511 Self {
512 space,
513 options,
514 observations: Vec::new(),
515 best_observation: None,
516 rng,
517 iteration: 0,
518 }
519 }
520
521 pub fn ask(&mut self) -> Array1<f64> {
523 if self.observations.len() < self.options.n_initial_points {
525 let samples = match self.options.initial_point_generator {
526 InitialPointGenerator::Random => {
527 self.space.sample(1, &mut self.rng)
529 }
530 InitialPointGenerator::LatinHypercube => {
531 let dim = self.space.bounds.len();
533 let n_samples = 1;
534
535 let mut sample = Array1::zeros(dim);
537
538 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
539 let interval_size = (high - low) / n_samples as f64;
541 let offset = self.rng.gen_range(0.0..1.0) * interval_size;
542 sample[i] = low + offset;
543 }
544
545 vec![sample]
546 }
547 InitialPointGenerator::Sobol => {
548 let dim = self.space.bounds.len();
550 let mut sample = Array1::zeros(dim);
551
552 let seed = self.iteration as u32 + 1;
554
555 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
556 let mut n = seed;
558 let mut denom = 1.0;
559 let mut result = 0.0;
560 let base = 2u32;
561
562 while n > 0 {
563 denom *= base as f64;
564 result += (n % base) as f64 / denom;
565 n /= base;
566 }
567
568 let offset = ((i + 1) as f64 * 0.5).fract();
570 let value = (result + offset).fract();
571 sample[i] = low + value * (high - low);
572 }
573
574 vec![sample]
575 }
576 InitialPointGenerator::Halton => {
577 let dim = self.space.bounds.len();
579 let mut sample = Array1::zeros(dim);
580
581 let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
583 let seed = self.iteration as u32 + 1;
584
585 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
586 let base = if i < primes.len() {
588 primes[i]
589 } else {
590 primes[i % primes.len()]
591 };
592
593 let mut n = seed;
595 let mut denom = 1.0;
596 let mut result = 0.0;
597
598 while n > 0 {
599 denom *= base as f64;
600 result += (n % base) as f64 / denom;
601 n /= base;
602 }
603
604 sample[i] = low + result * (high - low);
606 }
607
608 vec![sample]
609 }
610 };
611
612 self.iteration += 1;
613 return samples[0].clone();
614 }
615
616 self.iteration += 1;
618 self.optimize_acquisition_function()
619 }
620
621 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
623 let observation = Observation { x, y };
624
625 if let Some(best) = &self.best_observation {
627 if y < best.y {
628 self.best_observation = Some(observation.clone());
629 }
630 } else {
631 self.best_observation = Some(observation.clone());
632 }
633
634 self.observations.push(observation);
636 }
637
638 fn build_model(&self) -> GaussianProcess<SquaredExp, ConstantPrior> {
640 let mut x_data = Vec::with_capacity(self.observations.len());
642 let mut y_data = Vec::with_capacity(self.observations.len());
643
644 for obs in &self.observations {
645 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
646 x_data.push(x_transformed);
647 y_data.push(obs.y);
648 }
649
650 GaussianProcess::default(x_data, y_data)
653 }
654
655 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
657 let model = self.build_model();
658 let y_best = self.best_observation.as_ref().unwrap().y;
659
660 match self.options.acq_func {
661 AcquisitionFunctionType::ExpectedImprovement => {
662 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
663 }
664 AcquisitionFunctionType::LowerConfidenceBound => {
665 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
666 }
667 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
668 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
669 ),
670 }
671 }
672
673 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
675 let acq_func = self.create_acquisition_function();
676 let bounds = self.space.bounds_to_vec();
677 let n_restarts = self.options.n_restarts;
678
679 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
681
682 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
684
685 if let Some(best) = &self.best_observation {
687 if !x_starts.is_empty() {
688 x_starts[0] = best.x.clone();
689 } else {
690 x_starts.push(best.x.clone());
691 }
692 }
693
694 let mut best_x = None;
696 let mut best_value = f64::INFINITY;
697
698 for x_start in x_starts {
699 let result = minimize(
700 f,
701 &x_start.to_vec(),
702 Method::LBFGS,
703 Some(Options {
704 bounds: Some(
705 crate::unconstrained::Bounds::from_vecs(
706 bounds.iter().map(|b| Some(b.0)).collect(),
707 bounds.iter().map(|b| Some(b.1)).collect(),
708 )
709 .unwrap(),
710 ),
711 ..Default::default()
712 }),
713 );
714
715 if let Ok(res) = result {
716 if res.fun < best_value {
717 best_value = res.fun;
718 best_x = Some(res.x);
719 }
720 }
721 }
722
723 best_x.unwrap_or_else(|| {
725 self.space.sample(1, &mut self.rng)[0].clone()
727 })
728 }
729
730 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
732 where
733 F: Fn(&ArrayView1<f64>) -> f64,
734 {
735 let mut n_calls_remaining = n_calls;
736
737 let n_initial = self.options.n_initial_points.min(n_calls);
739 let initial_points = self.space.sample(n_initial, &mut self.rng);
740
741 for point in initial_points {
742 let value = func(&point.view());
743 self.tell(point, value);
744 n_calls_remaining -= 1;
745
746 if n_calls_remaining == 0 {
747 break;
748 }
749 }
750
751 let mut iterations = 0;
753
754 while n_calls_remaining > 0 {
755 let next_point = self.ask();
757
758 let value = func(&next_point.view());
760
761 self.tell(next_point, value);
763
764 n_calls_remaining -= 1;
766 iterations += 1;
767 }
768
769 let best = self.best_observation.as_ref().unwrap();
771 OptimizeResult {
772 x: best.x.clone(),
773 fun: best.y,
774 nfev: self.observations.len(),
775 func_evals: self.observations.len(),
776 nit: iterations,
777 success: true,
778 message: "Optimization terminated successfully".to_string(),
779 ..Default::default()
780 }
781 }
782}
783
784#[allow(dead_code)]
786pub fn bayesian_optimization<F>(
787 func: F,
788 bounds: Vec<(f64, f64)>,
789 n_calls: usize,
790 options: Option<BayesianOptimizationOptions>,
791) -> Result<OptimizeResult<f64>, OptimizeError>
792where
793 F: Fn(&ArrayView1<f64>) -> f64,
794{
795 let space = bounds
797 .into_iter()
798 .enumerate()
799 .fold(Space::new(), |space, (i, (lower, upper))| {
800 space.add(format!("x{}", i), Parameter::Real(lower, upper))
801 });
802
803 let mut optimizer = BayesianOptimizer::new(space, options);
805
806 let result = optimizer.optimize(func, n_calls);
808
809 Ok(result)
810}
811
812#[cfg(test)]
813mod tests {
814 use super::*;
815 use scirs2_core::ndarray::array;
816
817 #[test]
818 fn test_space_creation() {
819 let space = Space::new()
820 .add("x1", Parameter::Real(-5.0, 5.0))
821 .add("x2", Parameter::Integer(0, 10))
822 .add(
823 "x3",
824 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
825 );
826
827 assert_eq!(space.n_dims(), 3);
828 assert_eq!(space.transformed_n_dims(), 5); }
830
831 #[test]
832 fn test_space_transform() {
833 let space = Space::new()
834 .add("x1", Parameter::Real(-5.0, 5.0))
835 .add("x2", Parameter::Integer(0, 10));
836
837 let x = array![0.0, 5.0];
838 let transformed = space.transform(&x.view());
839
840 assert_eq!(transformed.len(), 2);
841 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
844
845 #[test]
846 fn test_bayesian_optimization() {
847 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
849 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
850
851 let options = BayesianOptimizationOptions {
852 n_initial_points: 5,
853 seed: Some(42),
854 ..Default::default()
855 };
856
857 let result = bayesian_optimization(f, bounds, 15, Some(options)).unwrap();
858
859 assert!(result.fun < 0.5);
861 assert!(result.x[0].abs() < 0.5);
862 assert!(result.x[1].abs() < 0.5);
863 }
864}