1use crate::{OptimizerError, OptimizerResult};
16use scirs2_core::RngExt;
17use std::collections::HashMap;
18use std::sync::Arc;
19use torsh_core::device::CpuDevice;
20
21pub trait ObjectiveFunction: Send + Sync {
23 fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32>;
25
26 fn dimension(&self) -> usize;
28
29 fn bounds(&self) -> (Vec<f32>, Vec<f32>);
31
32 fn name(&self) -> &str {
34 "Unknown"
35 }
36
37 fn is_noisy(&self) -> bool {
39 false
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct BayesianOptConfig {
46 pub max_evaluations: usize,
48 pub initial_random_evaluations: usize,
50 pub acquisition_function: AcquisitionFunction,
52 pub acquisition_restarts: usize,
54 pub gp_config: GaussianProcessConfig,
56 pub seed: Option<u64>,
58 pub device: Arc<CpuDevice>,
60 pub verbose: bool,
62}
63
64impl Default for BayesianOptConfig {
65 fn default() -> Self {
66 Self {
67 max_evaluations: 100,
68 initial_random_evaluations: 10,
69 acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
70 acquisition_restarts: 10,
71 gp_config: GaussianProcessConfig::default(),
72 seed: None,
73 device: Arc::new(CpuDevice::new()),
74 verbose: false,
75 }
76 }
77}
78
79#[derive(Debug, Clone)]
81pub enum AcquisitionFunction {
82 ExpectedImprovement { xi: f32 },
84 ProbabilityOfImprovement { xi: f32 },
86 UpperConfidenceBound { kappa: f32 },
88 ThompsonSampling,
90 EntropySearch,
92}
93
94#[derive(Debug, Clone)]
96pub struct GaussianProcessConfig {
97 pub kernel: KernelType,
99 pub noise_level: f32,
101 pub length_scale_bounds: (f32, f32),
103 pub signal_variance_bounds: (f32, f32),
105 pub optimize_hyperparameters: bool,
107}
108
109impl Default for GaussianProcessConfig {
110 fn default() -> Self {
111 Self {
112 kernel: KernelType::RBF { length_scale: 1.0 },
113 noise_level: 1e-6,
114 length_scale_bounds: (1e-3, 1e3),
115 signal_variance_bounds: (1e-3, 1e3),
116 optimize_hyperparameters: true,
117 }
118 }
119}
120
121#[derive(Debug, Clone)]
123pub enum KernelType {
124 RBF { length_scale: f32 },
126 Matern15 { length_scale: f32 },
128 Matern25 { length_scale: f32 },
130 Linear { variance: f32 },
132 RationalQuadratic { length_scale: f32, alpha: f32 },
134}
135
136#[derive(Debug, Clone)]
138pub struct DataPoint {
139 pub parameters: Vec<f32>,
141 pub value: f32,
143 pub evaluation_time: Option<std::time::Duration>,
145}
146
147#[derive(Debug, Clone)]
149pub struct BayesianOptResult {
150 pub best_point: DataPoint,
152 pub history: Vec<DataPoint>,
154 pub evaluations: usize,
156 pub converged: bool,
158 pub convergence_reason: String,
160 pub acquisition_history: Vec<f32>,
162}
163
164#[derive(Debug)]
166pub struct GaussianProcess {
167 config: GaussianProcessConfig,
168 x_train: Vec<Vec<f32>>,
170 y_train: Vec<f32>,
172 gram_matrix: Vec<Vec<f32>>,
174 chol_gram: Vec<Vec<f32>>,
176 alpha: Vec<f32>,
178 y_mean: f32,
180 y_std: f32,
182 fitted: bool,
184}
185
186impl GaussianProcess {
187 pub fn new(config: GaussianProcessConfig) -> Self {
188 Self {
189 config,
190 x_train: Vec::new(),
191 y_train: Vec::new(),
192 gram_matrix: Vec::new(),
193 chol_gram: Vec::new(),
194 alpha: Vec::new(),
195 y_mean: 0.0,
196 y_std: 1.0,
197 fitted: false,
198 }
199 }
200
201 pub fn fit(&mut self, x_train: Vec<Vec<f32>>, y_train: Vec<f32>) -> OptimizerResult<()> {
202 if x_train.is_empty() || y_train.is_empty() || x_train.len() != y_train.len() {
203 return Err(OptimizerError::InvalidInput(
204 "Invalid training data".to_string(),
205 ));
206 }
207
208 self.x_train = x_train;
209 self.y_train = y_train;
210
211 self.y_mean = self.y_train.iter().sum::<f32>() / self.y_train.len() as f32;
213 let var = self
214 .y_train
215 .iter()
216 .map(|&y| (y - self.y_mean).powi(2))
217 .sum::<f32>()
218 / self.y_train.len() as f32;
219 self.y_std = var.sqrt().max(1e-6);
220
221 let y_normalized: Vec<f32> = self
222 .y_train
223 .iter()
224 .map(|&y| (y - self.y_mean) / self.y_std)
225 .collect();
226
227 let n = self.x_train.len();
229 self.gram_matrix = vec![vec![0.0; n]; n];
230
231 for i in 0..n {
232 for j in 0..n {
233 self.gram_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
234 if i == j {
235 self.gram_matrix[i][j] += self.config.noise_level;
236 }
237 }
238 }
239
240 self.chol_gram = self.cholesky_decomposition(&self.gram_matrix)?;
242
243 self.alpha = self.solve_triangular(&self.chol_gram, &y_normalized)?;
245
246 self.fitted = true;
247 Ok(())
248 }
249
250 pub fn predict(&self, x_test: &[f32]) -> OptimizerResult<(f32, f32)> {
251 if !self.fitted {
252 return Err(OptimizerError::InvalidInput("Model not fitted".to_string()));
253 }
254
255 let k_star: Vec<f32> = self
257 .x_train
258 .iter()
259 .map(|x_train| self.kernel(x_test, x_train))
260 .collect();
261
262 let mut mean = 0.0;
264 for i in 0..self.alpha.len() {
265 mean += self.alpha[i] * k_star[i];
266 }
267
268 mean = mean * self.y_std + self.y_mean;
270
271 let k_star_star = self.kernel(x_test, x_test);
273 let v = self.solve_triangular(&self.chol_gram, &k_star)?;
274 let var = k_star_star - v.iter().map(|x| x * x).sum::<f32>();
275 let std = (var.max(0.0).sqrt() * self.y_std).max(1e-6);
276
277 Ok((mean, std))
278 }
279
280 fn kernel(&self, x1: &[f32], x2: &[f32]) -> f32 {
281 match &self.config.kernel {
282 KernelType::RBF { length_scale } => {
283 let dist_sq = x1
284 .iter()
285 .zip(x2.iter())
286 .map(|(a, b)| (a - b).powi(2))
287 .sum::<f32>();
288 (-0.5 * dist_sq / length_scale.powi(2)).exp()
289 }
290 KernelType::Matern15 { length_scale } => {
291 let dist = x1
292 .iter()
293 .zip(x2.iter())
294 .map(|(a, b)| (a - b).powi(2))
295 .sum::<f32>()
296 .sqrt();
297 let scaled_dist = dist * 3.0_f32.sqrt() / length_scale;
298 (1.0 + scaled_dist) * (-scaled_dist).exp()
299 }
300 KernelType::Matern25 { length_scale } => {
301 let dist = x1
302 .iter()
303 .zip(x2.iter())
304 .map(|(a, b)| (a - b).powi(2))
305 .sum::<f32>()
306 .sqrt();
307 let scaled_dist = dist * 5.0_f32.sqrt() / length_scale;
308 (1.0 + scaled_dist + scaled_dist.powi(2) / 3.0) * (-scaled_dist).exp()
309 }
310 KernelType::Linear { variance } => {
311 variance * x1.iter().zip(x2.iter()).map(|(a, b)| a * b).sum::<f32>()
312 }
313 KernelType::RationalQuadratic {
314 length_scale,
315 alpha,
316 } => {
317 let dist_sq = x1
318 .iter()
319 .zip(x2.iter())
320 .map(|(a, b)| (a - b).powi(2))
321 .sum::<f32>();
322 (1.0 + dist_sq / (2.0 * alpha * length_scale.powi(2))).powf(-alpha)
323 }
324 }
325 }
326
327 fn cholesky_decomposition(&self, matrix: &[Vec<f32>]) -> OptimizerResult<Vec<Vec<f32>>> {
328 let n = matrix.len();
329 let mut l = vec![vec![0.0; n]; n];
330
331 for i in 0..n {
332 for j in 0..=i {
333 if i == j {
334 let sum: f32 = (0..j).map(|k| (l[j][k] as f32).powi(2)).sum();
335 let val = matrix[j][j] - sum;
336 if val <= 0.0 {
337 return Err(OptimizerError::NumericalError(
338 "Matrix not positive definite".to_string(),
339 ));
340 }
341 l[j][j] = val.sqrt();
342 } else {
343 let sum: f32 = (0..j).map(|k| l[i][k] * l[j][k]).sum();
344 l[i][j] = (matrix[i][j] - sum) / l[j][j];
345 }
346 }
347 }
348
349 Ok(l)
350 }
351
352 fn solve_triangular(&self, l: &[Vec<f32>], b: &[f32]) -> OptimizerResult<Vec<f32>> {
353 let n = l.len();
354 let mut x = vec![0.0; n];
355
356 for i in 0..n {
358 let sum: f32 = (0..i).map(|j| l[i][j] * x[j]).sum();
359 x[i] = (b[i] - sum) / l[i][i];
360 }
361
362 for i in (0..n).rev() {
364 let sum: f32 = ((i + 1)..n).map(|j| l[j][i] * x[j]).sum();
365 x[i] = (x[i] - sum) / l[i][i];
366 }
367
368 Ok(x)
369 }
370}
371
372#[derive(Debug)]
374pub struct BayesianOptimizer {
375 config: BayesianOptConfig,
376 gp: GaussianProcess,
377 history: Vec<DataPoint>,
378 best_point: Option<DataPoint>,
379}
380
381impl BayesianOptimizer {
382 pub fn new(config: BayesianOptConfig) -> Self {
383 let gp = GaussianProcess::new(config.gp_config.clone());
384
385 Self {
386 config,
387 gp,
388 history: Vec::new(),
389 best_point: None,
390 }
391 }
392
393 pub fn optimize<F: ObjectiveFunction>(
394 &mut self,
395 objective: &F,
396 ) -> OptimizerResult<BayesianOptResult> {
397 let dimension = objective.dimension();
398 let (lower_bounds, upper_bounds) = objective.bounds();
399
400 if lower_bounds.len() != dimension || upper_bounds.len() != dimension {
401 return Err(OptimizerError::InvalidInput(
402 "Bounds dimension mismatch".to_string(),
403 ));
404 }
405
406 use scirs2_core::random::{Random, Rng};
408
409 let mut rng = if let Some(seed) = self.config.seed {
410 Random::seed(seed)
411 } else {
412 Random::seed(0)
413 };
414
415 let mut acquisition_history = Vec::new();
416
417 for i in 0..self
419 .config
420 .initial_random_evaluations
421 .min(self.config.max_evaluations)
422 {
423 let params = self.sample_random_point(&mut rng, &lower_bounds, &upper_bounds);
424 let start_time = std::time::Instant::now();
425 let value = objective.evaluate(¶ms)?;
426 let evaluation_time = start_time.elapsed();
427
428 let point = DataPoint {
429 parameters: params,
430 value,
431 evaluation_time: Some(evaluation_time),
432 };
433
434 self.add_observation(point.clone());
435
436 if self.config.verbose {
437 println!(
438 "Initial evaluation {}: f({:?}) = {:.6e}",
439 i + 1,
440 point.parameters,
441 point.value
442 );
443 }
444 }
445
446 let mut evaluations = self.config.initial_random_evaluations;
448
449 while evaluations < self.config.max_evaluations {
450 let x_train: Vec<Vec<f32>> =
452 self.history.iter().map(|p| p.parameters.clone()).collect();
453 let y_train: Vec<f32> = self.history.iter().map(|p| p.value).collect();
454
455 self.gp.fit(x_train, y_train)?;
456
457 let next_point = self.optimize_acquisition(&mut rng, &lower_bounds, &upper_bounds)?;
459
460 let start_time = std::time::Instant::now();
462 let value = objective.evaluate(&next_point)?;
463 let evaluation_time = start_time.elapsed();
464
465 let point = DataPoint {
466 parameters: next_point,
467 value,
468 evaluation_time: Some(evaluation_time),
469 };
470
471 let acq_value = self.compute_acquisition(&point.parameters)?;
473 acquisition_history.push(acq_value);
474
475 self.add_observation(point.clone());
476 evaluations += 1;
477
478 if self.config.verbose {
479 println!(
480 "Iteration {}: f({:?}) = {:.6e}, Best = {:.6e}",
481 evaluations,
482 point.parameters,
483 point.value,
484 self.best_point
485 .as_ref()
486 .expect("best_point should exist after add_observation")
487 .value
488 );
489 }
490 }
491
492 Ok(BayesianOptResult {
493 best_point: self
494 .best_point
495 .clone()
496 .expect("best_point should exist after optimization"),
497 history: self.history.clone(),
498 evaluations,
499 converged: false, convergence_reason: "Maximum evaluations reached".to_string(),
501 acquisition_history,
502 })
503 }
504
505 fn add_observation(&mut self, point: DataPoint) {
506 if self.best_point.is_none()
507 || point.value
508 < self
509 .best_point
510 .as_ref()
511 .expect("best_point should exist after is_none check")
512 .value
513 {
514 self.best_point = Some(point.clone());
515 }
516 self.history.push(point);
517 }
518
519 fn sample_random_point<R: scirs2_core::random::Rng>(
520 &self,
521 rng: &mut R,
522 lower: &[f32],
523 upper: &[f32],
524 ) -> Vec<f32> {
525 (0..lower.len())
526 .map(|i| rng.random::<f32>() * (upper[i] - lower[i]) + lower[i])
527 .collect()
528 }
529
530 fn optimize_acquisition<R: scirs2_core::random::Rng>(
531 &self,
532 rng: &mut R,
533 lower: &[f32],
534 upper: &[f32],
535 ) -> OptimizerResult<Vec<f32>> {
536 let mut best_point = Vec::new();
537 let mut best_acquisition = f32::NEG_INFINITY;
538
539 for _ in 0..self.config.acquisition_restarts {
541 let start_point = self.sample_random_point(rng, lower, upper);
542 let optimized_point = self.local_optimize_acquisition(start_point, lower, upper)?;
543 let acq_value = self.compute_acquisition(&optimized_point)?;
544
545 if acq_value > best_acquisition {
546 best_acquisition = acq_value;
547 best_point = optimized_point;
548 }
549 }
550
551 if best_point.is_empty() {
552 best_point = self.sample_random_point(rng, lower, upper);
554 }
555
556 Ok(best_point)
557 }
558
559 fn local_optimize_acquisition(
560 &self,
561 start_point: Vec<f32>,
562 lower: &[f32],
563 upper: &[f32],
564 ) -> OptimizerResult<Vec<f32>> {
565 let mut best_point = start_point;
567 let mut best_value = self.compute_acquisition(&best_point)?;
568
569 use scirs2_core::random::{Random, Rng};
571 let mut rng = Random::seed(0);
572 let step_size = 0.1;
573
574 for _ in 0..100 {
575 let mut candidate = best_point.clone();
576
577 for i in 0..candidate.len() {
579 let perturbation =
580 (rng.random::<f32>() * 2.0 - 1.0) * step_size * (upper[i] - lower[i]);
581 candidate[i] = (candidate[i] + perturbation).max(lower[i]).min(upper[i]);
582 }
583
584 let value = self.compute_acquisition(&candidate)?;
585 if value > best_value {
586 best_value = value;
587 best_point = candidate;
588 }
589 }
590
591 Ok(best_point)
592 }
593
594 fn compute_acquisition(&self, point: &[f32]) -> OptimizerResult<f32> {
595 let (mean, std) = self.gp.predict(point)?;
596 let best_value = self
597 .best_point
598 .as_ref()
599 .expect("best_point should exist for acquisition computation")
600 .value;
601
602 match &self.config.acquisition_function {
603 AcquisitionFunction::ExpectedImprovement { xi } => {
604 let improvement = best_value - mean - xi;
605 if std <= 0.0 {
606 return Ok(0.0);
607 }
608 let z = improvement / std;
609 Ok(improvement * self.normal_cdf(z) + std * self.normal_pdf(z))
610 }
611 AcquisitionFunction::ProbabilityOfImprovement { xi } => {
612 let improvement = best_value - mean - xi;
613 if std <= 0.0 {
614 return Ok(0.0);
615 }
616 let z = improvement / std;
617 Ok(self.normal_cdf(z))
618 }
619 AcquisitionFunction::UpperConfidenceBound { kappa } => {
620 Ok(-(mean - kappa * std)) }
622 _ => {
623 Ok(-(mean - std)) }
626 }
627 }
628
629 fn normal_cdf(&self, x: f32) -> f32 {
630 0.5 * (1.0 + self.erf(x / 2.0_f32.sqrt()))
632 }
633
634 fn normal_pdf(&self, x: f32) -> f32 {
635 (2.0 * std::f32::consts::PI).sqrt().recip() * (-0.5 * x * x).exp()
637 }
638
639 fn erf(&self, x: f32) -> f32 {
640 let a1 = 0.254829592;
642 let a2 = -0.284496736;
643 let a3 = 1.421413741;
644 let a4 = -1.453152027;
645 let a5 = 1.061405429;
646 let p = 0.3275911;
647
648 let sign = if x < 0.0 { -1.0 } else { 1.0 };
649 let x = x.abs();
650
651 let t = 1.0 / (1.0 + p * x);
652 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
653
654 sign * y
655 }
656}
657
658pub mod test_functions {
660 use super::*;
661
662 pub struct Quadratic1D;
664
665 impl ObjectiveFunction for Quadratic1D {
666 fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32> {
667 if parameters.len() != 1 {
668 return Err(OptimizerError::InvalidInput(
669 "Expected 1D input".to_string(),
670 ));
671 }
672 Ok((parameters[0] - 0.5).powi(2))
673 }
674
675 fn dimension(&self) -> usize {
676 1
677 }
678
679 fn bounds(&self) -> (Vec<f32>, Vec<f32>) {
680 (vec![0.0], vec![1.0])
681 }
682
683 fn name(&self) -> &str {
684 "Quadratic 1D"
685 }
686 }
687
688 pub struct Branin;
690
691 impl ObjectiveFunction for Branin {
692 fn evaluate(&self, parameters: &[f32]) -> OptimizerResult<f32> {
693 if parameters.len() != 2 {
694 return Err(OptimizerError::InvalidInput(
695 "Expected 2D input".to_string(),
696 ));
697 }
698
699 let x1 = parameters[0];
700 let x2 = parameters[1];
701
702 let a = 1.0;
703 let b = 5.1 / (4.0 * std::f32::consts::PI.powi(2));
704 let c = 5.0 / std::f32::consts::PI;
705 let r = 6.0;
706 let s = 10.0;
707 let t = 1.0 / (8.0 * std::f32::consts::PI);
708
709 let term1 = a * (x2 - b * x1.powi(2) + c * x1 - r).powi(2);
710 let term2 = s * (1.0 - t) * x1.cos();
711 let term3 = s;
712
713 Ok(term1 + term2 + term3)
714 }
715
716 fn dimension(&self) -> usize {
717 2
718 }
719
720 fn bounds(&self) -> (Vec<f32>, Vec<f32>) {
721 (vec![-5.0, 0.0], vec![10.0, 15.0])
722 }
723
724 fn name(&self) -> &str {
725 "Branin"
726 }
727 }
728}
729
730#[cfg(test)]
731mod tests {
732 use super::test_functions::*;
733 use super::*;
734
735 #[test]
736 fn test_gaussian_process_fit_predict() {
737 let config = GaussianProcessConfig::default();
738 let mut gp = GaussianProcess::new(config);
739
740 let x_train = vec![vec![0.0], vec![0.5], vec![1.0]];
741 let y_train = vec![0.0, 0.25, 1.0];
742
743 gp.fit(x_train, y_train).unwrap();
744
745 let (mean, std) = gp.predict(&[0.75]).unwrap();
746 assert!(mean >= 0.0 && mean <= 1.0);
747 assert!(std > 0.0);
748 }
749
750 #[test]
751 fn test_bayesian_optimization_quadratic() {
752 let config = BayesianOptConfig {
753 max_evaluations: 20,
754 initial_random_evaluations: 5,
755 acquisition_function: AcquisitionFunction::ExpectedImprovement { xi: 0.01 },
756 seed: Some(42),
757 verbose: false,
758 ..Default::default()
759 };
760
761 let mut optimizer = BayesianOptimizer::new(config);
762 let objective = Quadratic1D;
763
764 let result = optimizer.optimize(&objective).unwrap();
765
766 assert!(result.best_point.parameters[0] > 0.3 && result.best_point.parameters[0] < 0.7);
768 assert!(result.best_point.value < 0.1);
769 assert_eq!(result.evaluations, 20);
770 }
771
772 #[test]
773 fn test_bayesian_optimization_branin() {
774 let config = BayesianOptConfig {
775 max_evaluations: 30,
776 initial_random_evaluations: 10,
777 acquisition_function: AcquisitionFunction::UpperConfidenceBound { kappa: 2.576 },
778 seed: Some(42),
779 verbose: false,
780 ..Default::default()
781 };
782
783 let mut optimizer = BayesianOptimizer::new(config);
784 let objective = Branin;
785
786 let result = optimizer.optimize(&objective).unwrap();
787
788 assert!(result.best_point.value < 5.0); assert_eq!(result.evaluations, 30);
791 }
792}