1use std::fmt;
35
36use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
37use scirs2_core::random::rngs::StdRng;
38use scirs2_core::random::Rng;
39use scirs2_core::random::SeedableRng;
40use scirs2_stats::gaussian_process::{GaussianProcessRegressor, SquaredExponential};
41
42use crate::error::OptimizeError;
43use crate::parallel::ParallelOptions;
44use crate::unconstrained::{minimize, Method, OptimizeResult, Options};
45
46struct GaussianProcess {
48 regressor: GaussianProcessRegressor<SquaredExponential>,
49 n_features: usize,
50}
51
52impl GaussianProcess {
53 fn default(x_data: Vec<Vec<f64>>, y_data: Vec<f64>) -> Self {
55 let n_samples = x_data.len();
56 let n_features = if n_samples > 0 { x_data[0].len() } else { 0 };
57
58 let mut x_flat = Vec::with_capacity(n_samples * n_features);
60 for row in &x_data {
61 x_flat.extend_from_slice(row);
62 }
63 let x_array =
64 Array2::from_shape_vec((n_samples, n_features), x_flat).expect("Operation failed");
65 let y_array = Array1::from_vec(y_data);
66
67 let kernel = SquaredExponential::default();
69 let mut regressor = GaussianProcessRegressor::new(kernel);
70 regressor.fit(&x_array, &y_array).expect("Operation failed");
71
72 Self {
73 regressor,
74 n_features,
75 }
76 }
77
78 fn predict(&self, x: &Vec<f64>) -> f64 {
80 let x_array =
81 Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
82 let predictions = self.regressor.predict(&x_array).expect("Operation failed");
83 predictions[0]
84 }
85
86 fn predict_variance(&self, x: &Vec<f64>) -> f64 {
88 let x_array =
89 Array2::from_shape_vec((1, self.n_features), x.clone()).expect("Operation failed");
90 let (_mean, std) = self
91 .regressor
92 .predict_with_std(&x_array)
93 .expect("Operation failed");
94 std[0] * std[0] }
96}
97
98#[derive(Debug, Clone)]
100pub enum Parameter {
101 Real(f64, f64),
103 Integer(i64, i64),
105 Categorical(Vec<String>),
107}
108
109#[derive(Debug, Clone)]
111pub struct Space {
112 parameters: Vec<(String, Parameter)>,
114 transformed_n_dims: usize,
116 bounds: Vec<(f64, f64)>,
118}
119
120impl Space {
121 pub fn new() -> Self {
123 Self {
124 parameters: Vec::new(),
125 transformed_n_dims: 0,
126 bounds: Vec::new(),
127 }
128 }
129
130 pub fn add<S: Into<String>>(mut self, name: S, parameter: Parameter) -> Self {
132 let name = name.into();
133
134 self.transformed_n_dims += match ¶meter {
136 Parameter::Real(_, _) => 1,
137 Parameter::Integer(_, _) => 1,
138 Parameter::Categorical(values) => values.len(),
139 };
140
141 let bounds = match ¶meter {
143 Parameter::Real(lower, upper) => (*lower, *upper),
144 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
145 Parameter::Categorical(_) => (0.0, 1.0),
146 };
147 self.bounds.push(bounds);
148
149 self.parameters.push((name, parameter));
150 self
151 }
152
153 pub fn n_dims(&self) -> usize {
155 self.parameters.len()
156 }
157
158 pub fn transformed_n_dims(&self) -> usize {
160 self.transformed_n_dims
161 }
162
163 pub fn sample(&self, n_samples: usize, rng: &mut StdRng) -> Vec<Array1<f64>> {
165 let n_dims = self.n_dims();
166 let mut samples = Vec::with_capacity(n_samples);
167
168 for _ in 0..n_samples {
169 let mut sample = Array1::zeros(n_dims);
170
171 for (i, (_, param)) in self.parameters.iter().enumerate() {
172 match param {
173 Parameter::Real(lower, upper) => {
174 sample[i] = rng.random_range(*lower..*upper);
176 }
177 Parameter::Integer(lower, upper) => {
178 let range = rng.random_range(*lower..=*upper);
179 sample[i] = range as f64;
180 }
181 Parameter::Categorical(values) => {
182 let index = rng.random_range(0..values.len());
183 sample[i] = index as f64;
184 }
185 }
186 }
187
188 samples.push(sample);
189 }
190
191 samples
192 }
193
194 pub fn transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
196 let mut transformed = Array1::zeros(self.transformed_n_dims);
197 let mut idx = 0;
198
199 for (i, (_, param)) in self.parameters.iter().enumerate() {
200 match param {
201 Parameter::Real(lower, upper) => {
202 transformed[idx] = (x[i] - lower) / (upper - lower);
204 idx += 1;
205 }
206 Parameter::Integer(lower, upper) => {
207 transformed[idx] = (x[i] - *lower as f64) / (*upper as f64 - *lower as f64);
209 idx += 1;
210 }
211 Parameter::Categorical(values) => {
212 let index = x[i] as usize;
214 for j in 0..values.len() {
215 transformed[idx + j] = if j == index { 1.0 } else { 0.0 };
216 }
217 idx += values.len();
218 }
219 }
220 }
221
222 transformed
223 }
224
225 pub fn inverse_transform(&self, x: &ArrayView1<f64>) -> Array1<f64> {
227 let mut inverse = Array1::zeros(self.n_dims());
228 let mut idx = 0;
229
230 for (i, (_, param)) in self.parameters.iter().enumerate() {
231 match param {
232 Parameter::Real(lower, upper) => {
233 inverse[i] = lower + x[idx] * (upper - lower);
235 idx += 1;
236 }
237 Parameter::Integer(lower, upper) => {
238 let value = lower + (x[idx] * (*upper - *lower) as f64).round() as i64;
240 inverse[i] = value as f64;
241 idx += 1;
242 }
243 Parameter::Categorical(values) => {
244 let mut max_idx = 0;
246 let mut max_val = x[idx];
247 for j in 1..values.len() {
248 if x[idx + j] > max_val {
249 max_val = x[idx + j];
250 max_idx = j;
251 }
252 }
253 inverse[i] = max_idx as f64;
254 idx += values.len();
255 }
256 }
257 }
258
259 inverse
260 }
261
262 pub fn bounds_to_vec(&self) -> Vec<(f64, f64)> {
264 self.parameters
265 .iter()
266 .map(|(_, param)| match param {
267 Parameter::Real(lower, upper) => (*lower, *upper),
268 Parameter::Integer(lower, upper) => (*lower as f64, *upper as f64),
269 Parameter::Categorical(_) => (0.0, 1.0), })
271 .collect()
272 }
273}
274
275impl Default for Space {
276 fn default() -> Self {
277 Self::new()
278 }
279}
280
281pub trait AcquisitionFunction: Send + Sync {
283 fn evaluate(&self, x: &ArrayView1<f64>) -> f64;
285
286 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
288 None
289 }
290}
291
292pub struct ExpectedImprovement {
294 model: GaussianProcess,
295 y_best: f64,
296 xi: f64,
297}
298
299impl ExpectedImprovement {
300 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
302 Self { model, y_best, xi }
303 }
304}
305
306impl AcquisitionFunction for ExpectedImprovement {
307 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
308 let mean = self.model.predict(&x.to_vec());
309 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
310
311 if std <= 0.0 {
312 return 0.0;
313 }
314
315 let z = (self.y_best - mean - self.xi) / std;
316 let norm_cdf = 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0));
318 let norm_pdf = (-0.5 * z.powi(2)).exp() / (2.0 * std::f64::consts::PI).sqrt();
319
320 let ei = (self.y_best - mean - self.xi) * norm_cdf + std * norm_pdf;
321
322 if ei < 0.0 {
323 0.0
324 } else {
325 ei
326 }
327 }
328
329 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
330 None
332 }
333}
334
335pub struct LowerConfidenceBound {
337 model: GaussianProcess,
338 kappa: f64,
339}
340
341impl LowerConfidenceBound {
342 pub fn new(model: GaussianProcess, kappa: f64) -> Self {
344 Self { model, kappa }
345 }
346}
347
348impl AcquisitionFunction for LowerConfidenceBound {
349 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
350 let mean = self.model.predict(&x.to_vec());
351 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
352
353 mean - self.kappa * std
354 }
355
356 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
357 None
359 }
360}
361
362pub struct ProbabilityOfImprovement {
364 model: GaussianProcess,
365 y_best: f64,
366 xi: f64,
367}
368
369impl ProbabilityOfImprovement {
370 pub fn new(model: GaussianProcess, y_best: f64, xi: f64) -> Self {
372 Self { model, y_best, xi }
373 }
374}
375
376impl AcquisitionFunction for ProbabilityOfImprovement {
377 fn evaluate(&self, x: &ArrayView1<f64>) -> f64 {
378 let mean = self.model.predict(&x.to_vec());
379 let std = (self.model.predict_variance(&x.to_vec())).sqrt();
380
381 if std <= 0.0 {
382 return 0.0;
383 }
384
385 let z = (self.y_best - mean - self.xi) / std;
386 0.5 * (1.0 + approx_erf(z * std::f64::consts::SQRT_2 / 2.0))
388 }
389
390 fn gradient(&self, x: &ArrayView1<f64>) -> Option<Array1<f64>> {
391 None
393 }
394}
395
396#[allow(dead_code)]
399fn approx_erf(x: f64) -> f64 {
400 let a1 = 0.254829592;
402 let a2 = -0.284496736;
403 let a3 = 1.421413741;
404 let a4 = -1.453152027;
405 let a5 = 1.061405429;
406 let p = 0.3275911;
407
408 let sign = if x < 0.0 { -1.0 } else { 1.0 };
410 let x = x.abs();
411
412 let t = 1.0 / (1.0 + p * x);
414 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
415
416 sign * y
417}
418
419#[derive(Debug, Clone, Copy, Default)]
421pub enum AcquisitionFunctionType {
422 #[default]
424 ExpectedImprovement,
425 LowerConfidenceBound,
427 ProbabilityOfImprovement,
429}
430
431impl fmt::Display for AcquisitionFunctionType {
432 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
433 match self {
434 AcquisitionFunctionType::ExpectedImprovement => write!(f, "EI"),
435 AcquisitionFunctionType::LowerConfidenceBound => write!(f, "LCB"),
436 AcquisitionFunctionType::ProbabilityOfImprovement => write!(f, "PI"),
437 }
438 }
439}
440
441#[derive(Debug, Clone, Copy, Default)]
443pub enum KernelType {
444 #[default]
446 SquaredExponential,
447 Matern52,
449 Matern32,
451}
452
453impl fmt::Display for KernelType {
454 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455 match self {
456 KernelType::SquaredExponential => write!(f, "SquaredExponential"),
457 KernelType::Matern52 => write!(f, "Matern52"),
458 KernelType::Matern32 => write!(f, "Matern32"),
459 }
460 }
461}
462
463#[derive(Debug, Clone, Copy, Default)]
465pub enum InitialPointGenerator {
466 #[default]
468 Random,
469 LatinHypercube,
471 Sobol,
473 Halton,
475}
476
477impl fmt::Display for InitialPointGenerator {
478 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
479 match self {
480 InitialPointGenerator::Random => write!(f, "Random"),
481 InitialPointGenerator::LatinHypercube => write!(f, "LatinHypercube"),
482 InitialPointGenerator::Sobol => write!(f, "Sobol"),
483 InitialPointGenerator::Halton => write!(f, "Halton"),
484 }
485 }
486}
487
488#[derive(Clone, Debug)]
490pub struct BayesianOptimizationOptions {
491 pub n_initial_points: usize,
493 pub initial_point_generator: InitialPointGenerator,
495 pub acq_func: AcquisitionFunctionType,
497 pub kernel: KernelType,
499 pub kappa: f64,
501 pub xi: f64,
503 pub seed: Option<u64>,
505 pub parallel: Option<ParallelOptions>,
507 pub n_restarts: usize,
509}
510
511impl Default for BayesianOptimizationOptions {
512 fn default() -> Self {
513 Self {
514 n_initial_points: 10,
515 initial_point_generator: InitialPointGenerator::default(),
516 acq_func: AcquisitionFunctionType::default(),
517 kernel: KernelType::default(),
518 kappa: 1.96,
519 xi: 0.01,
520 seed: None,
521 parallel: None,
522 n_restarts: 5,
523 }
524 }
525}
526
527#[derive(Debug, Clone)]
529struct Observation {
530 x: Array1<f64>,
532 y: f64,
534}
535
536pub struct BayesianOptimizer {
538 space: Space,
540 options: BayesianOptimizationOptions,
542 observations: Vec<Observation>,
544 best_observation: Option<Observation>,
546 rng: StdRng,
548 iteration: usize,
550}
551
552impl BayesianOptimizer {
553 pub fn new(space: Space, options: Option<BayesianOptimizationOptions>) -> Self {
555 let options = options.unwrap_or_default();
556 let seed = options
557 .seed
558 .unwrap_or_else(|| scirs2_core::random::rng().random());
559 let rng = StdRng::seed_from_u64(seed);
560
561 Self {
562 space,
563 options,
564 observations: Vec::new(),
565 best_observation: None,
566 rng,
567 iteration: 0,
568 }
569 }
570
571 pub fn ask(&mut self) -> Array1<f64> {
573 if self.observations.len() < self.options.n_initial_points {
575 let samples = match self.options.initial_point_generator {
576 InitialPointGenerator::Random => {
577 self.space.sample(1, &mut self.rng)
579 }
580 InitialPointGenerator::LatinHypercube => {
581 let dim = self.space.bounds.len();
583 let n_samples = 1;
584
585 let mut sample = Array1::zeros(dim);
587
588 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
589 let interval_size = (high - low) / n_samples as f64;
591 let offset = self.rng.random_range(0.0..1.0) * interval_size;
592 sample[i] = low + offset;
593 }
594
595 vec![sample]
596 }
597 InitialPointGenerator::Sobol => {
598 let dim = self.space.bounds.len();
600 let mut sample = Array1::zeros(dim);
601
602 let seed = self.iteration as u32 + 1;
604
605 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
606 let mut n = seed;
608 let mut denom = 1.0;
609 let mut result = 0.0;
610 let base = 2u32;
611
612 while n > 0 {
613 denom *= base as f64;
614 result += (n % base) as f64 / denom;
615 n /= base;
616 }
617
618 let offset = ((i + 1) as f64 * 0.5).fract();
620 let value = (result + offset).fract();
621 sample[i] = low + value * (high - low);
622 }
623
624 vec![sample]
625 }
626 InitialPointGenerator::Halton => {
627 let dim = self.space.bounds.len();
629 let mut sample = Array1::zeros(dim);
630
631 let primes = [2u32, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
633 let seed = self.iteration as u32 + 1;
634
635 for (i, (low, high)) in self.space.bounds.iter().enumerate() {
636 let base = if i < primes.len() {
638 primes[i]
639 } else {
640 primes[i % primes.len()]
641 };
642
643 let mut n = seed;
645 let mut denom = 1.0;
646 let mut result = 0.0;
647
648 while n > 0 {
649 denom *= base as f64;
650 result += (n % base) as f64 / denom;
651 n /= base;
652 }
653
654 sample[i] = low + result * (high - low);
656 }
657
658 vec![sample]
659 }
660 };
661
662 self.iteration += 1;
663 return samples[0].clone();
664 }
665
666 self.iteration += 1;
668 self.optimize_acquisition_function()
669 }
670
671 pub fn tell(&mut self, x: Array1<f64>, y: f64) {
673 let observation = Observation { x, y };
674
675 if let Some(best) = &self.best_observation {
677 if y < best.y {
678 self.best_observation = Some(observation.clone());
679 }
680 } else {
681 self.best_observation = Some(observation.clone());
682 }
683
684 self.observations.push(observation);
686 }
687
688 fn build_model(&self) -> GaussianProcess {
690 let mut x_data = Vec::with_capacity(self.observations.len());
692 let mut y_data = Vec::with_capacity(self.observations.len());
693
694 for obs in &self.observations {
695 let x_transformed = self.space.transform(&obs.x.view()).to_vec();
696 x_data.push(x_transformed);
697 y_data.push(obs.y);
698 }
699
700 GaussianProcess::default(x_data, y_data)
703 }
704
705 fn create_acquisition_function(&self) -> Box<dyn AcquisitionFunction> {
707 let model = self.build_model();
708 let y_best = self.best_observation.as_ref().expect("Operation failed").y;
709
710 match self.options.acq_func {
711 AcquisitionFunctionType::ExpectedImprovement => {
712 Box::new(ExpectedImprovement::new(model, y_best, self.options.xi))
713 }
714 AcquisitionFunctionType::LowerConfidenceBound => {
715 Box::new(LowerConfidenceBound::new(model, self.options.kappa))
716 }
717 AcquisitionFunctionType::ProbabilityOfImprovement => Box::new(
718 ProbabilityOfImprovement::new(model, y_best, self.options.xi),
719 ),
720 }
721 }
722
723 fn optimize_acquisition_function(&mut self) -> Array1<f64> {
725 let acq_func = self.create_acquisition_function();
726 let bounds = self.space.bounds_to_vec();
727 let n_restarts = self.options.n_restarts;
728
729 let f = |x: &ArrayView1<f64>| -acq_func.evaluate(x);
731
732 let mut x_starts = self.space.sample(n_restarts, &mut self.rng);
734
735 if let Some(best) = &self.best_observation {
737 if !x_starts.is_empty() {
738 x_starts[0] = best.x.clone();
739 } else {
740 x_starts.push(best.x.clone());
741 }
742 }
743
744 let mut best_x = None;
746 let mut best_value = f64::INFINITY;
747
748 for x_start in x_starts {
749 let result = minimize(
750 f,
751 &x_start.to_vec(),
752 Method::LBFGS,
753 Some(Options {
754 bounds: Some(
755 crate::unconstrained::Bounds::from_vecs(
756 bounds.iter().map(|b| Some(b.0)).collect(),
757 bounds.iter().map(|b| Some(b.1)).collect(),
758 )
759 .expect("Operation failed"),
760 ),
761 ..Default::default()
762 }),
763 );
764
765 if let Ok(res) = result {
766 if res.fun < best_value {
767 best_value = res.fun;
768 best_x = Some(res.x);
769 }
770 }
771 }
772
773 best_x.unwrap_or_else(|| {
775 self.space.sample(1, &mut self.rng)[0].clone()
777 })
778 }
779
780 pub fn optimize<F>(&mut self, func: F, n_calls: usize) -> OptimizeResult<f64>
782 where
783 F: Fn(&ArrayView1<f64>) -> f64,
784 {
785 let mut n_calls_remaining = n_calls;
786
787 let n_initial = self.options.n_initial_points.min(n_calls);
789 let initial_points = self.space.sample(n_initial, &mut self.rng);
790
791 for point in initial_points {
792 let value = func(&point.view());
793 self.tell(point, value);
794 n_calls_remaining -= 1;
795
796 if n_calls_remaining == 0 {
797 break;
798 }
799 }
800
801 let mut iterations = 0;
803
804 while n_calls_remaining > 0 {
805 let next_point = self.ask();
807
808 let value = func(&next_point.view());
810
811 self.tell(next_point, value);
813
814 n_calls_remaining -= 1;
816 iterations += 1;
817 }
818
819 let best = self.best_observation.as_ref().expect("Operation failed");
821 OptimizeResult {
822 x: best.x.clone(),
823 fun: best.y,
824 nfev: self.observations.len(),
825 func_evals: self.observations.len(),
826 nit: iterations,
827 success: true,
828 message: "Optimization terminated successfully".to_string(),
829 ..Default::default()
830 }
831 }
832}
833
834#[allow(dead_code)]
836pub fn bayesian_optimization<F>(
837 func: F,
838 bounds: Vec<(f64, f64)>,
839 n_calls: usize,
840 options: Option<BayesianOptimizationOptions>,
841) -> Result<OptimizeResult<f64>, OptimizeError>
842where
843 F: Fn(&ArrayView1<f64>) -> f64,
844{
845 let space = bounds
847 .into_iter()
848 .enumerate()
849 .fold(Space::new(), |space, (i, (lower, upper))| {
850 space.add(format!("x{}", i), Parameter::Real(lower, upper))
851 });
852
853 let mut optimizer = BayesianOptimizer::new(space, options);
855
856 let result = optimizer.optimize(func, n_calls);
858
859 Ok(result)
860}
861
862#[cfg(test)]
863mod tests {
864 use super::*;
865 use scirs2_core::ndarray::array;
866
867 #[test]
868 fn test_space_creation() {
869 let space = Space::new()
870 .add("x1", Parameter::Real(-5.0, 5.0))
871 .add("x2", Parameter::Integer(0, 10))
872 .add(
873 "x3",
874 Parameter::Categorical(vec!["a".into(), "b".into(), "c".into()]),
875 );
876
877 assert_eq!(space.n_dims(), 3);
878 assert_eq!(space.transformed_n_dims(), 5); }
880
881 #[test]
882 fn test_space_transform() {
883 let space = Space::new()
884 .add("x1", Parameter::Real(-5.0, 5.0))
885 .add("x2", Parameter::Integer(0, 10));
886
887 let x = array![0.0, 5.0];
888 let transformed = space.transform(&x.view());
889
890 assert_eq!(transformed.len(), 2);
891 assert!((transformed[0] - 0.5).abs() < 1e-6); assert!((transformed[1] - 0.5).abs() < 1e-6); }
894
895 #[test]
896 fn test_bayesian_optimization() {
897 let f = |x: &ArrayView1<f64>| x[0].powi(2) + x[1].powi(2);
899 let bounds = vec![(-5.0, 5.0), (-5.0, 5.0)];
900
901 let options = BayesianOptimizationOptions {
902 n_initial_points: 5,
903 seed: Some(42),
904 ..Default::default()
905 };
906
907 let result = bayesian_optimization(f, bounds, 15, Some(options)).expect("Operation failed");
908
909 assert!(result.fun < 0.5);
911 assert!(result.x[0].abs() < 0.5);
912 assert!(result.x[1].abs() < 0.5);
913 }
914}