1use std::fmt;
35
36use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
37use scirs2_core::random::rngs::StdRng;
38use scirs2_core::random::SeedableRng;
39use scirs2_core::random::{Rng, RngExt};
40use scirs2_stats::gaussian_process::{GaussianProcessRegressor, SquaredExponential};
41
42use crate::error::OptimizeError;
43use crate::parallel::ParallelOptions;
44use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
45
46struct GaussianProcess {
48 regressor: GaussianProcessRegressor<SquaredExponential>,
49 n_features: usize,
50}
51
52impl GaussianProcess {
53 fn default(x_data: Vec<Vec<f64>>, y_data: Vec<f64>) -> Self {
55 let n_samples = x_data.len();
56 let n_features = if n_samples > 0 { x_data[0].len() } else { 0 };
57
58 let mut x_flat = Vec::with_capacity(n_samples * n_features);
60 for row in &x_data {
61 x_flat.extend_from_slice(row);
62 }
63 let x_array =
64 Array2::from_shape_vec((n_samples, n_features), x_flat).expect("Operation failed");
65 let y_array = Array1::from_vec(y_data);
66
67 let kernel = SquaredExponential::default();
69 let mut regressor = GaussianProcessRegressor::new(kernel);
70 regressor.fit(&x_array, &y_array).expect("Operation failed");
71
72 Self {
73 regressor,
74 n_features,
75 }
76 }
77
78 fn predict(&self, x: &Vec<f64>) -> f64 {
80 let x_array =
81 Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
82 let predictions = self.regressor.predict(&x_array).expect("Operation failed");
83 predictions[0]
84 }
85
86 fn predict_variance(&self, x: &Vec<f64>) -> f64 {
88 let x_array =
89 Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
90 let (_mean, std) = self
91 .regressor
92 .predict_with_std(&x_array)
93 .expect("Operation failed");
94 std[0] * std[0] }
96}
97
98#[derive(Debug, Clone)]
100pub enum Parameter {
101 Real(f64, f64),
103 Integer(i64, i64),
105 Categorical(Vec<String>),
107}
108
109#[derive(Debug, Clone)]
111pub struct Space {
112 parameters: Vec<(String, Parameter)>,
114 transformed_n_dims: usize,
116 bounds: Vec<(f64, f64)>,
118}
119
120impl Space {
121 pub fn new() -> Self {
123 Self {
124 parameters: Vec::new(),
125 transformed_n_dims: 0,
126 bounds: Vec::new(),
127 }
128 }
129
130 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
132 let name = name.into();
133
134 self.transformed_n_dims += match ¶meter {
136 Parameter::Real(_, _) => 1,
137 Parameter::Integer(_, _) => 1,
138 Parameter::Categorical(values) => values.len(),
139 };
140
141 let bounds = match ¶meter {
143 Parameter::Real(lower, upper) => (*lower, *upper),
144 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
145 Parameter::Categorical(_) => (0.0, 1.0),
146 };
147 self.bounds.push(bounds);
148
149 self.parameters.push((name, parameter));
150 self
151 }
152
153 pub fn n_dims(&self) -> usize {
155 self.parameters.len()
156 }
157
158 pub fn transformed_n_dims(&self) -> usize {
160 self.transformed_n_dims
161 }
162
163 pub fn dimension_types(&self) -> Vec<crate::bayesian::optimizer::DimensionType> {
165 use crate::bayesian::optimizer::DimensionType;
166 self.parameters
167 .iter()
168 .map(|(_, param)| match param {
169 Parameter::Integer(_, _) => DimensionType::Integer,
170 _ => DimensionType::Continuous,
171 })
172 .collect()
173 }
174
175 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
177 let n_dims = self.n_dims();
178 let mut samples = Vec::with_capacity(n_samples);
179
180 for _ in 0..n_samples {
181 let mut sample = Array1::zeros(n_dims);
182
183 for (i, (_, param)) in self.parameters.iter().enumerate() {
184 match param {
185 Parameter::Real(lower, upper) => {
186 sample[i] = rng.random_range(*lower..*upper);
188 }
189 Parameter::Integer(lower, upper) => {
190 let range = rng.random_range(*lower..=*upper);
191 sample[i] = range as f64;
192 }
193 Parameter::Categorical(values) => {
194 let index = rng.random_range(0..values.len());
195 sample[i] = index as f64;
196 }
197 }
198 }
199
200 samples.push(sample);
201 }
202
203 samples
204 }
205
206 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
208 let mut transformed = Array1::zeros(self.transformed_n_dims);
209 let mut idx = 0;
210
211 for (i, (_, param)) in self.parameters.iter().enumerate() {
212 match param {
213 Parameter::Real(lower, upper) => {
214 transformed[idx] = (x[i] - lower) / (upper - lower);
216 idx += 1;
217 }
218 Parameter::Integer(lower, upper) => {
219 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
221 idx += 1;
222 }
223 Parameter::Categorical(values) => {
224 let index = x[i] as usize;
226 for j in 0..values.len() {
227 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
228 }
229 idx += values.len();
230 }
231 }
232 }
233
234 transformed
235 }
236
237 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
239 let mut inverse = Array1::zeros(self.n_dims());
240 let mut idx = 0;
241
242 for (i, (_, param)) in self.parameters.iter().enumerate() {
243 match param {
244 Parameter::Real(lower, upper) => {
245 inverse[i] = lower + x[idx] * (upper - lower);
247 idx += 1;
248 }
249 Parameter::Integer(lower, upper) => {
250 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
252 inverse[i] = value as f64;
253 idx += 1;
254 }
255 Parameter::Categorical(values) => {
256 let mut max_idx = 0;
258 let mut max_val = x[idx];
259 for j in 1..values.len() {
260 if x[idx + j] > max_val {
261 max_val = x[idx + j];
262 max_idx = j;
263 }
264 }
265 inverse[i] = max_idx as f64;
266 idx += values.len();
267 }
268 }
269 }
270
271 inverse
272 }
273
274 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
276 self.parameters
277 .iter()
278 .map(|(_, param)| match param {
279 Parameter::Real(lower, upper) => (*lower, *upper),
280 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
281 Parameter::Categorical(_) => (0.0, 1.0), })
283 .collect()
284 }
285}
286
287impl Default for Space {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293pub trait AcquisitionFunction: Send + Sync {
295 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
297
298 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
300 None
301 }
302}
303
304pub struct ExpectedImprovement {
306 model: GaussianProcess,
307 y_best: f64,
308 xi: f64,
309}
310
311impl ExpectedImprovement {
312 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
314 Self { model, y_best, xi }
315 }
316}
317
318impl AcquisitionFunction for ExpectedImprovement {
319 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
320 let mean = self.model.predict(&x.to_vec());
321 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
322
323 if std <= 0.0 {
324 return 0.0;
325 }
326
327 let z = (self.y_best - mean - self.xi) / std;
328 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
330 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
331
332 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
333
334 if ei < 0.0 {
335 0.0
336 } else {
337 ei
338 }
339 }
340
341 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
342 None
344 }
345}
346
347pub struct LowerConfidenceBound {
349 model: GaussianProcess,
350 kappa: f64,
351}
352
353impl LowerConfidenceBound {
354 pub fn new(model: GaussianProcess, kappa: f64) -> Self {
356 Self { model, kappa }
357 }
358}
359
360impl AcquisitionFunction for LowerConfidenceBound {
361 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
362 let mean = self.model.predict(&x.to_vec());
363 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
364
365 mean - self.kappa * std
366 }
367
368 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
369 None
371 }
372}
373
374pub struct ProbabilityOfImprovement {
376 model: GaussianProcess,
377 y_best: f64,
378 xi: f64,
379}
380
381impl ProbabilityOfImprovement {
382 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
384 Self { model, y_best, xi }
385 }
386}
387
388impl AcquisitionFunction for ProbabilityOfImprovement {
389 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
390 let mean = self.model.predict(&x.to_vec());
391 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
392
393 if std <= 0.0 {
394 return 0.0;
395 }
396
397 let z = (self.y_best - mean - self.xi) / std;
398 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
400 }
401
402 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
403 None
405 }
406}
407
408#[allow(dead_code)]
411fn approx_erf(x: f64) -> f64 {
412 let a1 = 0.254829592;
414 let a2 = -0.284496736;
415 let a3 = 1.421413741;
416 let a4 = -1.453152027;
417 let a5 = 1.061405429;
418 let p = 0.3275911;
419
420 let sign = if x < 0.0 { -1.0 } else { 1.0 };
422 let x = x.abs();
423
424 let t = 1.0 / (1.0 + p * x);
426 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
427
428 sign * y
429}
430
431#[derive(Debug, Clone, Copy, Default)]
433pub enum AcquisitionFunctionType {
434 #[default]
436 ExpectedImprovement,
437 LowerConfidenceBound,
439 ProbabilityOfImprovement,
441}
442
443impl fmt::Display for AcquisitionFunctionType {
444 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
445 match self {
446 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
447 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
448 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
449 }
450 }
451}
452
453#[derive(Debug, Clone, Copy, Default)]
455pub enum KernelType {
456 #[default]
458 SquaredExponential,
459 Matern52,
461 Matern32,
463}
464
465impl fmt::Display for KernelType {
466 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
467 match self {
468 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
469 KernelType::Matern52 => write!(f, "Matern52"),
470 KernelType::Matern32 => write!(f, "Matern32"),
471 }
472 }
473}
474
475#[derive(Debug, Clone, Copy, Default)]
477pub enum InitialPointGenerator {
478 #[default]
480 Random,
481 LatinHypercube,
483 Sobol,
485 Halton,
487}
488
489impl fmt::Display for InitialPointGenerator {
490 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
491 match self {
492 InitialPointGenerator::Random => write!(f, "Random"),
493 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
494 InitialPointGenerator::Sobol => write!(f, "Sobol"),
495 InitialPointGenerator::Halton => write!(f, "Halton"),
496 }
497 }
498}
499
500#[derive(Clone, Debug)]
502pub struct BayesianOptimizationOptions {
503 pub n_initial_points: usize,
505 pub initial_point_generator: InitialPointGenerator,
507 pub acq_func: AcquisitionFunctionType,
509 pub kernel: KernelType,
511 pub kappa: f64,
513 pub xi: f64,
515 pub seed: Option<u64>,
517 pub parallel: Option<ParallelOptions>,
519 pub n_restarts: usize,
521}
522
523impl Default for BayesianOptimizationOptions {
524 fn default() -> Self {
525 Self {
526 n_initial_points: 10,
527 initial_point_generator: InitialPointGenerator::default(),
528 acq_func: AcquisitionFunctionType::default(),
529 kernel: KernelType::default(),
530 kappa: 1.96,
531 xi: 0.01,
532 seed: None,
533 parallel: None,
534 n_restarts: 5,
535 }
536 }
537}
538
539#[derive(Debug, Clone)]
541struct Observation {
542 x: Array1<f64>,
544 y: f64,
546}
547
548pub struct BayesianOptimizer {
550 space: Space,
552 options: BayesianOptimizationOptions,
554 observations: Vec<Observation>,
556 best_observation: Option<Observation>,
558 rng: StdRng,
560 iteration: usize,
562}
563
564impl BayesianOptimizer {
565 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
567 let options = options.unwrap_or_default();
568 let seed = options
569 .seed
570 .unwrap_or_else(|| scirs2_core::random::rng().random());
571 let rng = StdRng::seed_from_u64(seed);
572
573 Self {
574 space,
575 options,
576 observations: Vec::new(),
577 best_observation: None,
578 rng,
579 iteration: 0,
580 }
581 }
582
583 pub fn ask(&mut self) -> Array1<f64> {
585 if self.observations.len() < self.options.n_initial_points {
587 let samples = match self.options.initial_point_generator {
588 InitialPointGenerator::Random => {
589 self.space.sample(1, &mut self.rng)
591 }
592 InitialPointGenerator::LatinHypercube => {
593 let dim = self.space.bounds.len();
595 let n_samples = 1;
596
597 let mut sample = Array1::zeros(dim);
599
600 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
601 let interval_size = (high - low) / n_samples as f64;
603 let offset = self.rng.random_range(0.0..1.0) * interval_size;
604 sample[i] = low + offset;
605 }
606
607 vec![sample]
608 }
609 InitialPointGenerator::Sobol => {
610 let dim = self.space.bounds.len();
612 let mut sample = Array1::zeros(dim);
613
614 let seed = self.iteration as u32 + 1;
616
617 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
618 let mut n = seed;
620 let mut denom = 1.0;
621 let mut result = 0.0;
622 let base = 2u32;
623
624 while n > 0 {
625 denom *= base as f64;
626 result += (n % base) as f64 / denom;
627 n /= base;
628 }
629
630 let offset = ((i + 1) as f64 * 0.5).fract();
632 let value = (result + offset).fract();
633 sample[i] = low + value * (high - low);
634 }
635
636 vec![sample]
637 }
638 InitialPointGenerator::Halton => {
639 let dim = self.space.bounds.len();
641 let mut sample = Array1::zeros(dim);
642
643 let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
645 let seed = self.iteration as u32 + 1;
646
647 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
648 let base = if i < primes.len() {
650 primes[i]
651 } else {
652 primes[i % primes.len()]
653 };
654
655 let mut n = seed;
657 let mut denom = 1.0;
658 let mut result = 0.0;
659
660 while n > 0 {
661 denom *= base as f64;
662 result += (n % base) as f64 / denom;
663 n /= base;
664 }
665
666 sample[i] = low + result * (high - low);
668 }
669
670 vec![sample]
671 }
672 };
673
674 self.iteration += 1;
675 return samples[0].clone();
676 }
677
678 self.iteration += 1;
680 self.optimize_acquisition_function()
681 }
682
683 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
685 let observation = Observation { x, y };
686
687 if let Some(best) = &self.best_observation {
689 if y < best.y {
690 self.best_observation = Some(observation.clone());
691 }
692 } else {
693 self.best_observation = Some(observation.clone());
694 }
695
696 self.observations.push(observation);
698 }
699
700 fn build_model(&self) -> GaussianProcess {
702 let mut x_data = Vec::with_capacity(self.observations.len());
704 let mut y_data = Vec::with_capacity(self.observations.len());
705
706 for obs in &self.observations {
707 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
708 x_data.push(x_transformed);
709 y_data.push(obs.y);
710 }
711
712 GaussianProcess::default(x_data, y_data)
715 }
716
717 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
719 let model = self.build_model();
720 let y_best = self.best_observation.as_ref().expect("Operation failed").y;
721
722 match self.options.acq_func {
723 AcquisitionFunctionType::ExpectedImprovement => {
724 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
725 }
726 AcquisitionFunctionType::LowerConfidenceBound => {
727 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
728 }
729 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
730 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
731 ),
732 }
733 }
734
735 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
737 let acq_func = self.create_acquisition_function();
738 let bounds = self.space.bounds_to_vec();
739 let n_restarts = self.options.n_restarts;
740
741 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
743
744 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
746
747 if let Some(best) = &self.best_observation {
749 if !x_starts.is_empty() {
750 x_starts[0] = best.x.clone();
751 } else {
752 x_starts.push(best.x.clone());
753 }
754 }
755
756 let mut best_x = None;
758 let mut best_value = f64::INFINITY;
759
760 for x_start in x_starts {
761 let result = minimize(
762 f,
763 &x_start.to_vec(),
764 Method::LBFGS,
765 Some(Options {
766 bounds: Some(
767 crate::unconstrained::Bounds::from_vecs(
768 bounds.iter().map(|b| Some(b.0)).collect(),
769 bounds.iter().map(|b| Some(b.1)).collect(),
770 )
771 .expect("Operation failed"),
772 ),
773 ..Default::default()
774 }),
775 );
776
777 if let Ok(res) = result {
778 if res.fun < best_value {
779 best_value = res.fun;
780 best_x = Some(res.x);
781 }
782 }
783 }
784
785 best_x.unwrap_or_else(|| {
787 self.space.sample(1, &mut self.rng)[0].clone()
789 })
790 }
791
792 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
794 where
795 F: Fn(&ArrayView1<f64>) -> f64,
796 {
797 let mut n_calls_remaining = n_calls;
798
799 let n_initial = self.options.n_initial_points.min(n_calls);
801 let initial_points = self.space.sample(n_initial, &mut self.rng);
802
803 for point in initial_points {
804 let value = func(&point.view());
805 self.tell(point, value);
806 n_calls_remaining -= 1;
807
808 if n_calls_remaining == 0 {
809 break;
810 }
811 }
812
813 let mut iterations = 0;
815
816 while n_calls_remaining > 0 {
817 let next_point = self.ask();
819
820 let value = func(&next_point.view());
822
823 self.tell(next_point, value);
825
826 n_calls_remaining -= 1;
828 iterations += 1;
829 }
830
831 let best = self.best_observation.as_ref().expect("Operation failed");
833 OptimizeResult {
834 x: best.x.clone(),
835 fun: best.y,
836 nfev: self.observations.len(),
837 func_evals: self.observations.len(),
838 nit: iterations,
839 success: true,
840 message: "Optimization terminated successfully".to_string(),
841 ..Default::default()
842 }
843 }
844}
845
846#[allow(dead_code)]
848pub fn bayesian_optimization<F>(
849 func: F,
850 bounds: Vec<(f64, f64)>,
851 n_calls: usize,
852 options: Option<BayesianOptimizationOptions>,
853) -> Result<OptimizeResult<f64>, OptimizeError>
854where
855 F: Fn(&ArrayView1<f64>) -> f64,
856{
857 let space = bounds
859 .into_iter()
860 .enumerate()
861 .fold(Space::new(), |space, (i, (lower, upper))| {
862 space.add(format!("x{}", i), Parameter::Real(lower, upper))
863 });
864
865 let mut optimizer = BayesianOptimizer::new(space, options);
867
868 let result = optimizer.optimize(func, n_calls);
870
871 Ok(result)
872}
873
874#[cfg(test)]
875mod tests {
876 use super::*;
877 use scirs2_core::ndarray::array;
878
879 #[test]
880 fn test_space_creation() {
881 let space = Space::new()
882 .add("x1", Parameter::Real(-5.0, 5.0))
883 .add("x2", Parameter::Integer(0, 10))
884 .add(
885 "x3",
886 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
887 );
888
889 assert_eq!(space.n_dims(), 3);
890 assert_eq!(space.transformed_n_dims(), 5); }
892
893 #[test]
894 fn test_space_transform() {
895 let space = Space::new()
896 .add("x1", Parameter::Real(-5.0, 5.0))
897 .add("x2", Parameter::Integer(0, 10));
898
899 let x = array![0.0, 5.0];
900 let transformed = space.transform(&x.view());
901
902 assert_eq!(transformed.len(), 2);
903 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
906
907 #[test]
908 fn test_bayesian_optimization() {
909 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
911 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
912
913 let options = BayesianOptimizationOptions {
914 n_initial_points: 5,
915 seed: Some(42),
916 ..Default::default()
917 };
918
919 let result = bayesian_optimization(f, bounds, 15, Some(options)).expect("Operation failed");
920
921 assert!(result.fun < 0.5);
923 assert!(result.x[0].abs() < 0.5);
924 assert!(result.x[1].abs() < 0.5);
925 }
926}