1use std::fmt;
35
36use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
37use scirs2_core::random::rngs::StdRng;
38use scirs2_core::random::Rng;
39use scirs2_core::random::SeedableRng;
40use scirs2_stats::gaussian_process::{GaussianProcessRegressor, SquaredExponential};
41
42use crate::error::OptimizeError;
43use crate::parallel::ParallelOptions;
44use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
45
46struct GaussianProcess {
48 regressor: GaussianProcessRegressor<SquaredExponential>,
49 n_features: usize,
50}
51
52impl GaussianProcess {
53 fn default(x_data: Vec<Vec<f64>>, y_data: Vec<f64>) -> Self {
55 let n_samples = x_data.len();
56 let n_features = if n_samples > 0 { x_data[0].len() } else { 0 };
57
58 let mut x_flat = Vec::with_capacity(n_samples * n_features);
60 for row in &x_data {
61 x_flat.extend_from_slice(row);
62 }
63 let x_array = Array2::from_shape_vec((n_samples, n_features), x_flat).unwrap();
64 let y_array = Array1::from_vec(y_data);
65
66 let kernel = SquaredExponential::default();
68 let mut regressor = GaussianProcessRegressor::new(kernel);
69 regressor.fit(&x_array, &y_array).unwrap();
70
71 Self {
72 regressor,
73 n_features,
74 }
75 }
76
77 fn predict(&self, x: &Vec<f64>) -> f64 {
79 let x_array = Array2::from_shape_vec((1, self.n_features), x.clone()).unwrap();
80 let predictions = self.regressor.predict(&x_array).unwrap();
81 predictions[0]
82 }
83
84 fn predict_variance(&self, x: &Vec<f64>) -> f64 {
86 let x_array = Array2::from_shape_vec((1, self.n_features), x.clone()).unwrap();
87 let (_mean, std) = self.regressor.predict_with_std(&x_array).unwrap();
88 std[0] * std[0] }
90}
91
92#[derive(Debug, Clone)]
94pub enum Parameter {
95 Real(f64, f64),
97 Integer(i64, i64),
99 Categorical(Vec<String>),
101}
102
103#[derive(Debug, Clone)]
105pub struct Space {
106 parameters: Vec<(String, Parameter)>,
108 transformed_n_dims: usize,
110 bounds: Vec<(f64, f64)>,
112}
113
114impl Space {
115 pub fn new() -> Self {
117 Self {
118 parameters: Vec::new(),
119 transformed_n_dims: 0,
120 bounds: Vec::new(),
121 }
122 }
123
124 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
126 let name = name.into();
127
128 self.transformed_n_dims += match ¶meter {
130 Parameter::Real(_, _) => 1,
131 Parameter::Integer(_, _) => 1,
132 Parameter::Categorical(values) => values.len(),
133 };
134
135 let bounds = match ¶meter {
137 Parameter::Real(lower, upper) => (*lower, *upper),
138 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
139 Parameter::Categorical(_) => (0.0, 1.0),
140 };
141 self.bounds.push(bounds);
142
143 self.parameters.push((name, parameter));
144 self
145 }
146
147 pub fn n_dims(&self) -> usize {
149 self.parameters.len()
150 }
151
152 pub fn transformed_n_dims(&self) -> usize {
154 self.transformed_n_dims
155 }
156
157 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
159 let n_dims = self.n_dims();
160 let mut samples = Vec::with_capacity(n_samples);
161
162 for _ in 0..n_samples {
163 let mut sample = Array1::zeros(n_dims);
164
165 for (i, (_, param)) in self.parameters.iter().enumerate() {
166 match param {
167 Parameter::Real(lower, upper) => {
168 sample[i] = rng.gen_range(*lower..*upper);
170 }
171 Parameter::Integer(lower, upper) => {
172 let range = rng.gen_range(*lower..=*upper);
173 sample[i] = range as f64;
174 }
175 Parameter::Categorical(values) => {
176 let index = rng.gen_range(0..values.len());
177 sample[i] = index as f64;
178 }
179 }
180 }
181
182 samples.push(sample);
183 }
184
185 samples
186 }
187
188 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
190 let mut transformed = Array1::zeros(self.transformed_n_dims);
191 let mut idx = 0;
192
193 for (i, (_, param)) in self.parameters.iter().enumerate() {
194 match param {
195 Parameter::Real(lower, upper) => {
196 transformed[idx] = (x[i] - lower) / (upper - lower);
198 idx += 1;
199 }
200 Parameter::Integer(lower, upper) => {
201 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
203 idx += 1;
204 }
205 Parameter::Categorical(values) => {
206 let index = x[i] as usize;
208 for j in 0..values.len() {
209 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
210 }
211 idx += values.len();
212 }
213 }
214 }
215
216 transformed
217 }
218
219 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
221 let mut inverse = Array1::zeros(self.n_dims());
222 let mut idx = 0;
223
224 for (i, (_, param)) in self.parameters.iter().enumerate() {
225 match param {
226 Parameter::Real(lower, upper) => {
227 inverse[i] = lower + x[idx] * (upper - lower);
229 idx += 1;
230 }
231 Parameter::Integer(lower, upper) => {
232 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
234 inverse[i] = value as f64;
235 idx += 1;
236 }
237 Parameter::Categorical(values) => {
238 let mut max_idx = 0;
240 let mut max_val = x[idx];
241 for j in 1..values.len() {
242 if x[idx + j] > max_val {
243 max_val = x[idx + j];
244 max_idx = j;
245 }
246 }
247 inverse[i] = max_idx as f64;
248 idx += values.len();
249 }
250 }
251 }
252
253 inverse
254 }
255
256 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
258 self.parameters
259 .iter()
260 .map(|(_, param)| match param {
261 Parameter::Real(lower, upper) => (*lower, *upper),
262 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
263 Parameter::Categorical(_) => (0.0, 1.0), })
265 .collect()
266 }
267}
268
269impl Default for Space {
270 fn default() -> Self {
271 Self::new()
272 }
273}
274
275pub trait AcquisitionFunction: Send + Sync {
277 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
279
280 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
282 None
283 }
284}
285
286pub struct ExpectedImprovement {
288 model: GaussianProcess,
289 y_best: f64,
290 xi: f64,
291}
292
293impl ExpectedImprovement {
294 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
296 Self { model, y_best, xi }
297 }
298}
299
300impl AcquisitionFunction for ExpectedImprovement {
301 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
302 let mean = self.model.predict(&x.to_vec());
303 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
304
305 if std <= 0.0 {
306 return 0.0;
307 }
308
309 let z = (self.y_best - mean - self.xi) / std;
310 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
312 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
313
314 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
315
316 if ei < 0.0 {
317 0.0
318 } else {
319 ei
320 }
321 }
322
323 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
324 None
326 }
327}
328
329pub struct LowerConfidenceBound {
331 model: GaussianProcess,
332 kappa: f64,
333}
334
335impl LowerConfidenceBound {
336 pub fn new(model: GaussianProcess, kappa: f64) -> Self {
338 Self { model, kappa }
339 }
340}
341
342impl AcquisitionFunction for LowerConfidenceBound {
343 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
344 let mean = self.model.predict(&x.to_vec());
345 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
346
347 mean - self.kappa * std
348 }
349
350 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
351 None
353 }
354}
355
356pub struct ProbabilityOfImprovement {
358 model: GaussianProcess,
359 y_best: f64,
360 xi: f64,
361}
362
363impl ProbabilityOfImprovement {
364 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
366 Self { model, y_best, xi }
367 }
368}
369
370impl AcquisitionFunction for ProbabilityOfImprovement {
371 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
372 let mean = self.model.predict(&x.to_vec());
373 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
374
375 if std <= 0.0 {
376 return 0.0;
377 }
378
379 let z = (self.y_best - mean - self.xi) / std;
380 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
382 }
383
384 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
385 None
387 }
388}
389
390#[allow(dead_code)]
393fn approx_erf(x: f64) -> f64 {
394 let a1 = 0.254829592;
396 let a2 = -0.284496736;
397 let a3 = 1.421413741;
398 let a4 = -1.453152027;
399 let a5 = 1.061405429;
400 let p = 0.3275911;
401
402 let sign = if x < 0.0 { -1.0 } else { 1.0 };
404 let x = x.abs();
405
406 let t = 1.0 / (1.0 + p * x);
408 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
409
410 sign * y
411}
412
413#[derive(Debug, Clone, Copy, Default)]
415pub enum AcquisitionFunctionType {
416 #[default]
418 ExpectedImprovement,
419 LowerConfidenceBound,
421 ProbabilityOfImprovement,
423}
424
425impl fmt::Display for AcquisitionFunctionType {
426 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
427 match self {
428 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
429 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
430 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
431 }
432 }
433}
434
435#[derive(Debug, Clone, Copy, Default)]
437pub enum KernelType {
438 #[default]
440 SquaredExponential,
441 Matern52,
443 Matern32,
445}
446
447impl fmt::Display for KernelType {
448 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
449 match self {
450 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
451 KernelType::Matern52 => write!(f, "Matern52"),
452 KernelType::Matern32 => write!(f, "Matern32"),
453 }
454 }
455}
456
457#[derive(Debug, Clone, Copy, Default)]
459pub enum InitialPointGenerator {
460 #[default]
462 Random,
463 LatinHypercube,
465 Sobol,
467 Halton,
469}
470
471impl fmt::Display for InitialPointGenerator {
472 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
473 match self {
474 InitialPointGenerator::Random => write!(f, "Random"),
475 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
476 InitialPointGenerator::Sobol => write!(f, "Sobol"),
477 InitialPointGenerator::Halton => write!(f, "Halton"),
478 }
479 }
480}
481
482#[derive(Clone, Debug)]
484pub struct BayesianOptimizationOptions {
485 pub n_initial_points: usize,
487 pub initial_point_generator: InitialPointGenerator,
489 pub acq_func: AcquisitionFunctionType,
491 pub kernel: KernelType,
493 pub kappa: f64,
495 pub xi: f64,
497 pub seed: Option<u64>,
499 pub parallel: Option<ParallelOptions>,
501 pub n_restarts: usize,
503}
504
505impl Default for BayesianOptimizationOptions {
506 fn default() -> Self {
507 Self {
508 n_initial_points: 10,
509 initial_point_generator: InitialPointGenerator::default(),
510 acq_func: AcquisitionFunctionType::default(),
511 kernel: KernelType::default(),
512 kappa: 1.96,
513 xi: 0.01,
514 seed: None,
515 parallel: None,
516 n_restarts: 5,
517 }
518 }
519}
520
521#[derive(Debug, Clone)]
523struct Observation {
524 x: Array1<f64>,
526 y: f64,
528}
529
530pub struct BayesianOptimizer {
532 space: Space,
534 options: BayesianOptimizationOptions,
536 observations: Vec<Observation>,
538 best_observation: Option<Observation>,
540 rng: StdRng,
542 iteration: usize,
544}
545
546impl BayesianOptimizer {
547 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
549 let options = options.unwrap_or_default();
550 let seed = options
551 .seed
552 .unwrap_or_else(|| scirs2_core::random::rng().random());
553 let rng = StdRng::seed_from_u64(seed);
554
555 Self {
556 space,
557 options,
558 observations: Vec::new(),
559 best_observation: None,
560 rng,
561 iteration: 0,
562 }
563 }
564
565 pub fn ask(&mut self) -> Array1<f64> {
567 if self.observations.len() < self.options.n_initial_points {
569 let samples = match self.options.initial_point_generator {
570 InitialPointGenerator::Random => {
571 self.space.sample(1, &mut self.rng)
573 }
574 InitialPointGenerator::LatinHypercube => {
575 let dim = self.space.bounds.len();
577 let n_samples = 1;
578
579 let mut sample = Array1::zeros(dim);
581
582 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
583 let interval_size = (high - low) / n_samples as f64;
585 let offset = self.rng.gen_range(0.0..1.0) * interval_size;
586 sample[i] = low + offset;
587 }
588
589 vec![sample]
590 }
591 InitialPointGenerator::Sobol => {
592 let dim = self.space.bounds.len();
594 let mut sample = Array1::zeros(dim);
595
596 let seed = self.iteration as u32 + 1;
598
599 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
600 let mut n = seed;
602 let mut denom = 1.0;
603 let mut result = 0.0;
604 let base = 2u32;
605
606 while n > 0 {
607 denom *= base as f64;
608 result += (n % base) as f64 / denom;
609 n /= base;
610 }
611
612 let offset = ((i + 1) as f64 * 0.5).fract();
614 let value = (result + offset).fract();
615 sample[i] = low + value * (high - low);
616 }
617
618 vec![sample]
619 }
620 InitialPointGenerator::Halton => {
621 let dim = self.space.bounds.len();
623 let mut sample = Array1::zeros(dim);
624
625 let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
627 let seed = self.iteration as u32 + 1;
628
629 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
630 let base = if i < primes.len() {
632 primes[i]
633 } else {
634 primes[i % primes.len()]
635 };
636
637 let mut n = seed;
639 let mut denom = 1.0;
640 let mut result = 0.0;
641
642 while n > 0 {
643 denom *= base as f64;
644 result += (n % base) as f64 / denom;
645 n /= base;
646 }
647
648 sample[i] = low + result * (high - low);
650 }
651
652 vec![sample]
653 }
654 };
655
656 self.iteration += 1;
657 return samples[0].clone();
658 }
659
660 self.iteration += 1;
662 self.optimize_acquisition_function()
663 }
664
665 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
667 let observation = Observation { x, y };
668
669 if let Some(best) = &self.best_observation {
671 if y < best.y {
672 self.best_observation = Some(observation.clone());
673 }
674 } else {
675 self.best_observation = Some(observation.clone());
676 }
677
678 self.observations.push(observation);
680 }
681
682 fn build_model(&self) -> GaussianProcess {
684 let mut x_data = Vec::with_capacity(self.observations.len());
686 let mut y_data = Vec::with_capacity(self.observations.len());
687
688 for obs in &self.observations {
689 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
690 x_data.push(x_transformed);
691 y_data.push(obs.y);
692 }
693
694 GaussianProcess::default(x_data, y_data)
697 }
698
699 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
701 let model = self.build_model();
702 let y_best = self.best_observation.as_ref().unwrap().y;
703
704 match self.options.acq_func {
705 AcquisitionFunctionType::ExpectedImprovement => {
706 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
707 }
708 AcquisitionFunctionType::LowerConfidenceBound => {
709 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
710 }
711 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
712 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
713 ),
714 }
715 }
716
717 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
719 let acq_func = self.create_acquisition_function();
720 let bounds = self.space.bounds_to_vec();
721 let n_restarts = self.options.n_restarts;
722
723 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
725
726 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
728
729 if let Some(best) = &self.best_observation {
731 if !x_starts.is_empty() {
732 x_starts[0] = best.x.clone();
733 } else {
734 x_starts.push(best.x.clone());
735 }
736 }
737
738 let mut best_x = None;
740 let mut best_value = f64::INFINITY;
741
742 for x_start in x_starts {
743 let result = minimize(
744 f,
745 &x_start.to_vec(),
746 Method::LBFGS,
747 Some(Options {
748 bounds: Some(
749 crate::unconstrained::Bounds::from_vecs(
750 bounds.iter().map(|b| Some(b.0)).collect(),
751 bounds.iter().map(|b| Some(b.1)).collect(),
752 )
753 .unwrap(),
754 ),
755 ..Default::default()
756 }),
757 );
758
759 if let Ok(res) = result {
760 if res.fun < best_value {
761 best_value = res.fun;
762 best_x = Some(res.x);
763 }
764 }
765 }
766
767 best_x.unwrap_or_else(|| {
769 self.space.sample(1, &mut self.rng)[0].clone()
771 })
772 }
773
774 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
776 where
777 F: Fn(&ArrayView1<f64>) -> f64,
778 {
779 let mut n_calls_remaining = n_calls;
780
781 let n_initial = self.options.n_initial_points.min(n_calls);
783 let initial_points = self.space.sample(n_initial, &mut self.rng);
784
785 for point in initial_points {
786 let value = func(&point.view());
787 self.tell(point, value);
788 n_calls_remaining -= 1;
789
790 if n_calls_remaining == 0 {
791 break;
792 }
793 }
794
795 let mut iterations = 0;
797
798 while n_calls_remaining > 0 {
799 let next_point = self.ask();
801
802 let value = func(&next_point.view());
804
805 self.tell(next_point, value);
807
808 n_calls_remaining -= 1;
810 iterations += 1;
811 }
812
813 let best = self.best_observation.as_ref().unwrap();
815 OptimizeResult {
816 x: best.x.clone(),
817 fun: best.y,
818 nfev: self.observations.len(),
819 func_evals: self.observations.len(),
820 nit: iterations,
821 success: true,
822 message: "Optimization terminated successfully".to_string(),
823 ..Default::default()
824 }
825 }
826}
827
828#[allow(dead_code)]
830pub fn bayesian_optimization<F>(
831 func: F,
832 bounds: Vec<(f64, f64)>,
833 n_calls: usize,
834 options: Option<BayesianOptimizationOptions>,
835) -> Result<OptimizeResult<f64>, OptimizeError>
836where
837 F: Fn(&ArrayView1<f64>) -> f64,
838{
839 let space = bounds
841 .into_iter()
842 .enumerate()
843 .fold(Space::new(), |space, (i, (lower, upper))| {
844 space.add(format!("x{}", i), Parameter::Real(lower, upper))
845 });
846
847 let mut optimizer = BayesianOptimizer::new(space, options);
849
850 let result = optimizer.optimize(func, n_calls);
852
853 Ok(result)
854}
855
856#[cfg(test)]
857mod tests {
858 use super::*;
859 use scirs2_core::ndarray::array;
860
861 #[test]
862 fn test_space_creation() {
863 let space = Space::new()
864 .add("x1", Parameter::Real(-5.0, 5.0))
865 .add("x2", Parameter::Integer(0, 10))
866 .add(
867 "x3",
868 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
869 );
870
871 assert_eq!(space.n_dims(), 3);
872 assert_eq!(space.transformed_n_dims(), 5); }
874
875 #[test]
876 fn test_space_transform() {
877 let space = Space::new()
878 .add("x1", Parameter::Real(-5.0, 5.0))
879 .add("x2", Parameter::Integer(0, 10));
880
881 let x = array![0.0, 5.0];
882 let transformed = space.transform(&x.view());
883
884 assert_eq!(transformed.len(), 2);
885 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
888
889 #[test]
890 fn test_bayesian_optimization() {
891 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
893 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
894
895 let options = BayesianOptimizationOptions {
896 n_initial_points: 5,
897 seed: Some(42),
898 ..Default::default()
899 };
900
901 let result = bayesian_optimization(f, bounds, 15, Some(options)).unwrap();
902
903 assert!(result.fun < 0.5);
905 assert!(result.x[0].abs() < 0.5);
906 assert!(result.x[1].abs() < 0.5);
907 }
908}