sklears_gaussian_process/
bayesian_optimization.rs

1//! Bayesian Optimization framework for hyperparameter tuning and global optimization
2
3// SciRS2 Policy - Use scirs2-autograd for ndarray types and operations
4use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis};
5use sklears_core::{
6    error::{Result as SklResult, SklearsError},
7    traits::Predict,
8};
9
10use crate::gpr::{GaussianProcessRegressor, GprTrained};
11
12///
13/// let kernel = RBF::new(1.0);
14/// let gpr = GaussianProcessRegressor::new().kernel(Box::new(kernel));
15/// let optimizer = BayesianOptimizer::new(gpr)
16///     .acquisition(AcquisitionFunction::ExpectedImprovement)
17///     .xi(0.01);
18///
19/// // Define bounds and initial points
20/// let bounds = array![[0.0, 10.0]]; // 1D optimization between 0 and 10
21/// let X_init = array![[1.0], [5.0], [9.0]];
22/// let y_init = array![1.0, 25.0, 81.0]; // f(x) = x^2
23///
24/// let mut bo = optimizer.fit_initial(&X_init.view(), &y_init.view()).unwrap();
25/// let next_point = bo.suggest_next_point(&bounds.view()).unwrap();
26/// ```
27#[derive(Debug, Clone)]
28pub struct BayesianOptimizer {
29    gp: Option<GaussianProcessRegressor<GprTrained>>,
30    acquisition: AcquisitionFunction,
31    xi: f64,
32    n_restarts: usize,
33    random_state: Option<u64>,
34}
35
36/// Available acquisition functions for Bayesian optimization
37#[derive(Debug, Clone, Copy)]
38pub enum AcquisitionFunction {
39    /// Expected Improvement - balances exploration and exploitation
40    ExpectedImprovement,
41    /// Probability of Improvement - conservative acquisition function
42    ProbabilityOfImprovement,
43    /// Upper Confidence Bound - optimistic acquisition function
44    UpperConfidenceBound { beta: f64 },
45    /// Entropy Search - information-theoretic acquisition
46    EntropySearch,
47}
48
49/// Result of Bayesian optimization
50#[derive(Debug, Clone)]
51pub struct OptimizationResult {
52    /// Best point found during optimization
53    pub best_point: Array1<f64>,
54    /// Best value found during optimization
55    pub best_value: f64,
56    /// All evaluated points
57    pub all_points: Array2<f64>,
58    /// All evaluated values
59    pub all_values: Array1<f64>,
60    /// Number of iterations completed
61    pub n_iterations: usize,
62}
63
64impl BayesianOptimizer {
65    /// Create a new BayesianOptimizer instance
66    pub fn new(_gp: GaussianProcessRegressor) -> Self {
67        Self {
68            gp: None,
69            acquisition: AcquisitionFunction::ExpectedImprovement,
70            xi: 0.01,
71            n_restarts: 10,
72            random_state: None,
73        }
74    }
75
76    /// Set the acquisition function
77    pub fn acquisition(mut self, acquisition: AcquisitionFunction) -> Self {
78        self.acquisition = acquisition;
79        self
80    }
81
82    /// Set the exploration parameter
83    pub fn xi(mut self, xi: f64) -> Self {
84        self.xi = xi;
85        self
86    }
87
88    /// Set the number of random restarts for acquisition optimization
89    pub fn n_restarts(mut self, n_restarts: usize) -> Self {
90        self.n_restarts = n_restarts;
91        self
92    }
93
94    /// Set the random state
95    pub fn random_state(mut self, random_state: Option<u64>) -> Self {
96        self.random_state = random_state;
97        self
98    }
99
100    /// Create a builder for the Bayesian optimizer
101    pub fn builder() -> BayesianOptimizerBuilder {
102        BayesianOptimizerBuilder::new()
103    }
104
105    /// Fit the initial Gaussian Process model
106    pub fn fit_initial(
107        self,
108        X: &ArrayView2<f64>,
109        y: &ArrayView1<f64>,
110    ) -> SklResult<BayesianOptimizerFitted> {
111        if X.nrows() != y.len() {
112            return Err(SklearsError::InvalidInput(
113                "X and y must have the same number of samples".to_string(),
114            ));
115        }
116
117        // Create a default GP if none provided
118        let gp = match self.gp {
119            Some(gp) => gp,
120            None => {
121                use crate::kernels::RBF;
122                use sklears_core::traits::Fit;
123
124                let kernel = RBF::new(1.0);
125                let gpr = GaussianProcessRegressor::new().kernel(Box::new(kernel));
126                gpr.fit(&X.to_owned(), &y.to_owned())?
127            }
128        };
129
130        let current_best = y.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
131
132        Ok(BayesianOptimizerFitted {
133            gp,
134            X_obs: X.to_owned(),
135            y_obs: y.to_owned(),
136            current_best,
137            acquisition: self.acquisition,
138            xi: self.xi,
139            n_restarts: self.n_restarts,
140            random_state: self.random_state,
141        })
142    }
143}
144
145/// Fitted Bayesian Optimizer that can suggest new points
146#[derive(Debug, Clone)]
147pub struct BayesianOptimizerFitted {
148    gp: GaussianProcessRegressor<GprTrained>,
149    X_obs: Array2<f64>,
150    y_obs: Array1<f64>,
151    current_best: f64,
152    acquisition: AcquisitionFunction,
153    xi: f64,
154    n_restarts: usize,
155    random_state: Option<u64>,
156}
157
158impl BayesianOptimizerFitted {
159    /// Suggest the next point to evaluate
160    pub fn suggest_next_point(&self, bounds: &ArrayView2<f64>) -> SklResult<Array1<f64>> {
161        if bounds.nrows() != self.X_obs.ncols() {
162            return Err(SklearsError::InvalidInput(
163                "Bounds dimension must match feature dimension".to_string(),
164            ));
165        }
166
167        let mut best_x = Array1::<f64>::zeros(bounds.nrows());
168        let mut best_acq = f64::NEG_INFINITY;
169
170        // Multi-restart optimization
171        for restart in 0..self.n_restarts {
172            // Random starting point within bounds
173            let mut x_start = Array1::<f64>::zeros(bounds.nrows());
174            let mut rng = self.random_state.unwrap_or(42) + restart as u64 * 1337;
175
176            for i in 0..bounds.nrows() {
177                let min_bound = bounds[[i, 0]];
178                let max_bound = bounds[[i, 1]];
179                // Simple pseudo-random number generation
180                rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
181                let random_val = (rng % 1000000) as f64 / 1000000.0;
182                x_start[i] = min_bound + random_val * (max_bound - min_bound);
183            }
184
185            // Simple gradient-free optimization (grid search for simplicity)
186            let optimized_x = self.optimize_acquisition_internal(&x_start, bounds)?;
187            let acq_val = self.evaluate_acquisition(&optimized_x)?;
188
189            if acq_val > best_acq {
190                best_acq = acq_val;
191                best_x = optimized_x;
192            }
193        }
194
195        Ok(best_x)
196    }
197
198    /// Update the optimizer with a new observation
199    pub fn update(
200        mut self,
201        x_new: &ArrayView1<f64>,
202        y_new: f64,
203    ) -> SklResult<BayesianOptimizerFitted> {
204        // Add new observation
205        let mut X_new = Array2::<f64>::zeros((self.X_obs.nrows() + 1, self.X_obs.ncols()));
206        X_new
207            .slice_mut(s![..self.X_obs.nrows(), ..])
208            .assign(&self.X_obs);
209        X_new.row_mut(self.X_obs.nrows()).assign(x_new);
210
211        let mut y_new_vec = Array1::<f64>::zeros(self.y_obs.len() + 1);
212        y_new_vec
213            .slice_mut(s![..self.y_obs.len()])
214            .assign(&self.y_obs);
215        y_new_vec[self.y_obs.len()] = y_new;
216
217        // Update current best
218        self.current_best = self.current_best.max(y_new);
219
220        // Refit GP
221        use crate::kernels::RBF;
222        use sklears_core::traits::Fit;
223
224        let kernel = RBF::new(1.0);
225        let gpr = GaussianProcessRegressor::new().kernel(Box::new(kernel));
226        let new_gp = gpr.fit(&X_new, &y_new_vec)?;
227
228        Ok(BayesianOptimizerFitted {
229            gp: new_gp,
230            X_obs: X_new,
231            y_obs: y_new_vec,
232            current_best: self.current_best,
233            acquisition: self.acquisition,
234            xi: self.xi,
235            n_restarts: self.n_restarts,
236            random_state: self.random_state,
237        })
238    }
239
240    /// Get the current best observed value
241    pub fn get_best_value(&self) -> f64 {
242        self.current_best
243    }
244
245    /// Get the point that achieved the best value
246    pub fn get_best_point(&self) -> SklResult<Array1<f64>> {
247        let best_idx = self
248            .y_obs
249            .iter()
250            .enumerate()
251            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
252            .map(|(idx, _)| idx)
253            .ok_or_else(|| SklearsError::InvalidInput("No observations available".to_string()))?;
254
255        Ok(self.X_obs.row(best_idx).to_owned())
256    }
257
258    /// Optimize the acquisition function (simplified implementation)
259    fn optimize_acquisition_internal(
260        &self,
261        x_start: &Array1<f64>,
262        bounds: &ArrayView2<f64>,
263    ) -> SklResult<Array1<f64>> {
264        // Simple grid search for acquisition function optimization
265        let n_grid = 20;
266        let mut best_x = x_start.clone();
267        let mut best_acq = self.evaluate_acquisition(&best_x)?;
268
269        // Grid search over each dimension
270        for dim in 0..bounds.nrows() {
271            let min_bound = bounds[[dim, 0]];
272            let max_bound = bounds[[dim, 1]];
273
274            for i in 0..n_grid {
275                let mut x_test = best_x.clone();
276                x_test[dim] =
277                    min_bound + (i as f64 / (n_grid - 1) as f64) * (max_bound - min_bound);
278
279                let acq_val = self.evaluate_acquisition(&x_test)?;
280                if acq_val > best_acq {
281                    best_acq = acq_val;
282                    best_x = x_test;
283                }
284            }
285        }
286
287        Ok(best_x)
288    }
289
290    /// Evaluate the acquisition function at a given point
291    fn evaluate_acquisition(&self, x: &Array1<f64>) -> SklResult<f64> {
292        let X_test = x.view().insert_axis(Axis(0));
293        let (mean, std) = self.gp.predict_with_std(&X_test.to_owned())?;
294        let mu = mean[0];
295        let sigma = std[0];
296
297        let acq_val = match self.acquisition {
298            AcquisitionFunction::ExpectedImprovement => {
299                if sigma == 0.0 {
300                    0.0
301                } else {
302                    let z = (mu - self.current_best - self.xi) / sigma;
303                    let phi = 0.5 * (1.0 + erf(z / std::f64::consts::SQRT_2));
304                    let pdf = (-0.5 * z * z).exp() / (sigma * (2.0 * std::f64::consts::PI).sqrt());
305                    (mu - self.current_best - self.xi) * phi + sigma * pdf
306                }
307            }
308            AcquisitionFunction::ProbabilityOfImprovement => {
309                if sigma == 0.0 {
310                    if mu > self.current_best + self.xi {
311                        1.0
312                    } else {
313                        0.0
314                    }
315                } else {
316                    let z = (mu - self.current_best - self.xi) / sigma;
317                    0.5 * (1.0 + erf(z / std::f64::consts::SQRT_2))
318                }
319            }
320            AcquisitionFunction::UpperConfidenceBound { beta } => mu + beta * sigma,
321            AcquisitionFunction::EntropySearch => {
322                // Simplified entropy search (just use uncertainty)
323                sigma
324            }
325        };
326
327        Ok(acq_val)
328    }
329}
330
331impl BayesianOptimizerFitted {
332    /// Predict at given points
333    pub fn predict(&self, X: &Array2<f64>) -> SklResult<Array1<f64>> {
334        self.gp.predict(X)
335    }
336
337    /// Predict variance at given points
338    pub fn predict_variance(&self, X: &Array2<f64>) -> SklResult<Array1<f64>> {
339        let (_mean, std) = self.gp.predict_with_std(X)?;
340        // Variance is the square of standard deviation
341        Ok(std.mapv(|s| s * s))
342    }
343
344    /// Compute acquisition value at a given point
345    pub fn acquisition_value(&self, x: &Array1<f64>, current_best: f64) -> SklResult<f64> {
346        let X_test = x.view().insert_axis(Axis(0));
347        let (mean, std) = self.gp.predict_with_std(&X_test.to_owned())?;
348        let mu = mean[0];
349        let sigma = std[0];
350
351        let acq_val = match self.acquisition {
352            AcquisitionFunction::ExpectedImprovement => {
353                if sigma == 0.0 {
354                    0.0
355                } else {
356                    let z = (mu - current_best - self.xi) / sigma;
357                    let phi = 0.5 * (1.0 + erf(z / std::f64::consts::SQRT_2));
358                    let pdf = (-0.5 * z * z).exp() / (sigma * (2.0 * std::f64::consts::PI).sqrt());
359                    (mu - current_best - self.xi) * phi + sigma * pdf
360                }
361            }
362            AcquisitionFunction::ProbabilityOfImprovement => {
363                if sigma == 0.0 {
364                    if mu > current_best + self.xi {
365                        1.0
366                    } else {
367                        0.0
368                    }
369                } else {
370                    let z = (mu - current_best - self.xi) / sigma;
371                    0.5 * (1.0 + erf(z / std::f64::consts::SQRT_2))
372                }
373            }
374            AcquisitionFunction::UpperConfidenceBound { beta } => mu + beta * sigma,
375            AcquisitionFunction::EntropySearch => {
376                // Simplified entropy search (just use uncertainty)
377                sigma
378            }
379        };
380
381        Ok(acq_val)
382    }
383
384    /// Optimize acquisition function to find next point
385    pub fn optimize_acquisition(
386        &self,
387        bounds: &Array2<f64>,
388        current_best: f64,
389        n_restarts: usize,
390    ) -> SklResult<Array1<f64>> {
391        let mut best_x = Array1::<f64>::zeros(bounds.nrows());
392        let mut best_acq = f64::NEG_INFINITY;
393
394        // Multi-restart optimization
395        for restart in 0..n_restarts {
396            // Random starting point within bounds
397            let mut x_start = Array1::<f64>::zeros(bounds.nrows());
398            let mut rng = self.random_state.unwrap_or(42) + restart as u64 * 1337;
399
400            for i in 0..bounds.nrows() {
401                let min_bound = bounds[[i, 0]];
402                let max_bound = bounds[[i, 1]];
403                // Simple pseudo-random number generation
404                rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
405                let random_val = (rng % 1000000) as f64 / 1000000.0;
406                x_start[i] = min_bound + random_val * (max_bound - min_bound);
407            }
408
409            // Simple gradient-free optimization (grid search for simplicity)
410            let optimized_x =
411                self.optimize_acquisition_from_start(&x_start, bounds, current_best)?;
412            let acq_val = self.acquisition_value(&optimized_x, current_best)?;
413
414            if acq_val > best_acq {
415                best_acq = acq_val;
416                best_x = optimized_x;
417            }
418        }
419
420        Ok(best_x)
421    }
422
423    /// Optimize from a starting point
424    fn optimize_acquisition_from_start(
425        &self,
426        x_start: &Array1<f64>,
427        bounds: &Array2<f64>,
428        current_best: f64,
429    ) -> SklResult<Array1<f64>> {
430        // Simple grid search for acquisition function optimization
431        let n_grid = 20;
432        let mut best_x = x_start.clone();
433        let mut best_acq = self.acquisition_value(&best_x, current_best)?;
434
435        // Grid search over each dimension
436        for dim in 0..bounds.nrows() {
437            let min_bound = bounds[[dim, 0]];
438            let max_bound = bounds[[dim, 1]];
439
440            for i in 0..n_grid {
441                let mut x_test = best_x.clone();
442                x_test[dim] =
443                    min_bound + (i as f64 / (n_grid - 1) as f64) * (max_bound - min_bound);
444
445                let acq_val = self.acquisition_value(&x_test, current_best)?;
446                if acq_val > best_acq {
447                    best_acq = acq_val;
448                    best_x = x_test;
449                }
450            }
451        }
452
453        Ok(best_x)
454    }
455
456    /// Add observation to the model
457    pub fn add_observation(&mut self, X: &Array2<f64>, y: &Array1<f64>) -> SklResult<()> {
458        // Add new observations
459        let mut X_new = Array2::<f64>::zeros((self.X_obs.nrows() + X.nrows(), self.X_obs.ncols()));
460        X_new
461            .slice_mut(s![..self.X_obs.nrows(), ..])
462            .assign(&self.X_obs);
463        X_new.slice_mut(s![self.X_obs.nrows().., ..]).assign(X);
464
465        let mut y_new = Array1::<f64>::zeros(self.y_obs.len() + y.len());
466        y_new.slice_mut(s![..self.y_obs.len()]).assign(&self.y_obs);
467        y_new.slice_mut(s![self.y_obs.len()..]).assign(y);
468
469        // Update current best
470        for &val in y.iter() {
471            self.current_best = self.current_best.max(val);
472        }
473
474        // Refit GP
475        use crate::kernels::RBF;
476        use sklears_core::traits::Fit;
477
478        let kernel = RBF::new(1.0);
479        let gpr = GaussianProcessRegressor::new().kernel(Box::new(kernel));
480        self.gp = gpr.fit(&X_new, &y_new)?;
481        self.X_obs = X_new;
482        self.y_obs = y_new;
483
484        Ok(())
485    }
486}
487
488/// Builder for Bayesian optimizer
489#[derive(Debug)]
490pub struct BayesianOptimizerBuilder {
491    acquisition: AcquisitionFunction,
492    xi: f64,
493    n_restarts: usize,
494    random_state: Option<u64>,
495}
496
497impl BayesianOptimizerBuilder {
498    pub fn new() -> Self {
499        Self {
500            acquisition: AcquisitionFunction::ExpectedImprovement,
501            xi: 0.01,
502            n_restarts: 10,
503            random_state: None,
504        }
505    }
506
507    pub fn acquisition_function(mut self, acquisition: AcquisitionFunction) -> Self {
508        self.acquisition = acquisition;
509        self
510    }
511
512    pub fn xi(mut self, xi: f64) -> Self {
513        self.xi = xi;
514        self
515    }
516
517    pub fn n_restarts(mut self, n_restarts: usize) -> Self {
518        self.n_restarts = n_restarts;
519        self
520    }
521
522    pub fn random_state(mut self, random_state: u64) -> Self {
523        self.random_state = Some(random_state);
524        self
525    }
526
527    pub fn build(self) -> BayesianOptimizer {
528        BayesianOptimizer {
529            gp: None,
530            acquisition: self.acquisition,
531            xi: self.xi,
532            n_restarts: self.n_restarts,
533            random_state: self.random_state,
534        }
535    }
536}
537
538impl Default for BayesianOptimizerBuilder {
539    fn default() -> Self {
540        Self::new()
541    }
542}
543
544/// Error function approximation
545fn erf(x: f64) -> f64 {
546    // Abramowitz and Stegun approximation
547    let a1 = 0.254829592;
548    let a2 = -0.284496736;
549    let a3 = 1.421413741;
550    let a4 = -1.453152027;
551    let a5 = 1.061405429;
552    let p = 0.3275911;
553
554    let sign = if x < 0.0 { -1.0 } else { 1.0 };
555    let x = x.abs();
556
557    let t = 1.0 / (1.0 + p * x);
558    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
559
560    sign * y
561}