sklears_svm/hyperparameter_optimization/
bayesian_optimization.rs

1//! Bayesian Optimization for hyperparameter tuning
2
3use std::time::Instant;
4
5use scirs2_core::ndarray::{Array1, Array2};
6use scirs2_core::random::Random;
7
8use crate::kernels::KernelType;
9use crate::svc::SVC;
10use sklears_core::error::{Result, SklearsError};
11use sklears_core::traits::{Fit, Predict};
12
13use super::{
14    OptimizationConfig, OptimizationResult, ParameterSet, ParameterSpec, ScoringMetric, SearchSpace,
15};
16
17// Type aliases for compatibility
18type DMatrix<T> = Array2<T>;
19type DVector<T> = Array1<T>;
20
21/// Bayesian Optimization hyperparameter optimizer
22pub struct BayesianOptimizationCV {
23    config: OptimizationConfig,
24    search_space: SearchSpace,
25    rng: Random<scirs2_core::random::rngs::StdRng>,
26}
27
28impl BayesianOptimizationCV {
29    /// Create a new Bayesian optimization optimizer
30    pub fn new(config: OptimizationConfig, search_space: SearchSpace) -> Self {
31        let rng = if let Some(seed) = config.random_state {
32            Random::seed(seed)
33        } else {
34            Random::seed(42) // Default seed for reproducibility
35        };
36
37        Self {
38            config,
39            search_space,
40            rng,
41        }
42    }
43
44    /// Run Bayesian optimization
45    pub fn fit(&mut self, x: &DMatrix<f64>, y: &DVector<f64>) -> Result<OptimizationResult> {
46        let start_time = Instant::now();
47
48        if self.config.verbose {
49            println!(
50                "Bayesian optimization with {} iterations",
51                self.config.n_iterations
52            );
53        }
54
55        // Initialize with random samples
56        let n_initial = (self.config.n_iterations / 5).clamp(5, 20);
57        let mut evaluated_params: Vec<(ParameterSet, f64)> = Vec::new();
58        let mut best_score = -f64::INFINITY;
59        let mut best_params = ParameterSet::new();
60        let mut score_history = Vec::new();
61        let mut iterations_without_improvement = 0;
62
63        // Phase 1: Random exploration
64        if self.config.verbose {
65            println!("Phase 1: Random exploration ({} samples)", n_initial);
66        }
67
68        for i in 0..n_initial {
69            let params = self.sample_random_params()?;
70            let score = self.evaluate_params(&params, x, y)?;
71
72            evaluated_params.push((params.clone(), score));
73            score_history.push(score);
74
75            if score > best_score {
76                best_score = score;
77                best_params = params.clone();
78                iterations_without_improvement = 0;
79            } else {
80                iterations_without_improvement += 1;
81            }
82
83            if self.config.verbose {
84                println!("Sample {}/{}: Score {:.6}", i + 1, n_initial, score);
85            }
86        }
87
88        // Phase 2: Bayesian optimization with Expected Improvement
89        if self.config.verbose {
90            println!("Phase 2: Bayesian optimization");
91        }
92
93        for iteration in n_initial..self.config.n_iterations {
94            // Build surrogate model (Gaussian Process approximation)
95            // Select next point using Expected Improvement acquisition function
96            let next_params = self.select_next_point(&evaluated_params)?;
97
98            // Evaluate the selected point
99            let score = self.evaluate_params(&next_params, x, y)?;
100
101            evaluated_params.push((next_params.clone(), score));
102            score_history.push(score);
103
104            if score > best_score {
105                best_score = score;
106                best_params = next_params.clone();
107                iterations_without_improvement = 0;
108
109                if self.config.verbose {
110                    println!(
111                        "Iteration {}/{}: NEW BEST Score {:.6}",
112                        iteration + 1,
113                        self.config.n_iterations,
114                        score
115                    );
116                }
117            } else {
118                iterations_without_improvement += 1;
119
120                if self.config.verbose && (iteration + 1) % 10 == 0 {
121                    println!(
122                        "Iteration {}/{}: Score {:.6} (best: {:.6})",
123                        iteration + 1,
124                        self.config.n_iterations,
125                        score,
126                        best_score
127                    );
128                }
129            }
130
131            // Early stopping
132            if let Some(patience) = self.config.early_stopping_patience {
133                if iterations_without_improvement >= patience {
134                    if self.config.verbose {
135                        println!("Early stopping at iteration {}", iteration + 1);
136                    }
137                    break;
138                }
139            }
140        }
141
142        if self.config.verbose {
143            println!("Best score: {:.6}", best_score);
144            println!("Best params: {:?}", best_params);
145        }
146
147        Ok(OptimizationResult {
148            best_params,
149            best_score,
150            cv_results: evaluated_params,
151            n_iterations: score_history.len(),
152            optimization_time: start_time.elapsed().as_secs_f64(),
153            score_history,
154        })
155    }
156
157    /// Sample random parameters from search space
158    fn sample_random_params(&mut self) -> Result<ParameterSet> {
159        // Clone search space specs to avoid borrow checker issues
160        let c_spec = self.search_space.c.clone();
161        let kernel_spec = self.search_space.kernel.clone();
162        let tol_spec = self.search_space.tol.clone();
163        let max_iter_spec = self.search_space.max_iter.clone();
164
165        let c = self.sample_value(&c_spec)?;
166
167        let kernel = if let Some(ref spec) = kernel_spec {
168            self.sample_kernel(spec)?
169        } else {
170            KernelType::Rbf { gamma: 1.0 }
171        };
172
173        let tol = if let Some(ref spec) = tol_spec {
174            self.sample_value(spec)?
175        } else {
176            1e-3
177        };
178
179        let max_iter = if let Some(ref spec) = max_iter_spec {
180            self.sample_value(spec)? as usize
181        } else {
182            1000
183        };
184
185        Ok(ParameterSet {
186            c,
187            kernel,
188            tol,
189            max_iter,
190        })
191    }
192
193    /// Select next point to evaluate using Expected Improvement
194    fn select_next_point(&mut self, evaluated: &[(ParameterSet, f64)]) -> Result<ParameterSet> {
195        // Generate candidate points
196        let n_candidates = 100;
197        let mut candidates = Vec::with_capacity(n_candidates);
198
199        for _ in 0..n_candidates {
200            candidates.push(self.sample_random_params()?);
201        }
202
203        // Calculate Expected Improvement for each candidate
204        let best_observed = evaluated
205            .iter()
206            .map(|(_, score)| *score)
207            .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
208            .unwrap_or(0.0);
209
210        let mut best_ei = -f64::INFINITY;
211        let mut best_candidate = candidates[0].clone();
212
213        for candidate in &candidates {
214            let ei = self.expected_improvement(candidate, evaluated, best_observed)?;
215            if ei > best_ei {
216                best_ei = ei;
217                best_candidate = candidate.clone();
218            }
219        }
220
221        Ok(best_candidate)
222    }
223
224    /// Calculate Expected Improvement acquisition function
225    fn expected_improvement(
226        &self,
227        candidate: &ParameterSet,
228        evaluated: &[(ParameterSet, f64)],
229        best_observed: f64,
230    ) -> Result<f64> {
231        // Simplified Gaussian Process prediction using RBF kernel
232        // Mean prediction: weighted average of observed values
233        // Std prediction: based on distance to nearest neighbors
234
235        let (mean, std) = self.gp_predict(candidate, evaluated)?;
236
237        if std < 1e-10 {
238            return Ok(0.0);
239        }
240
241        // Expected Improvement formula
242        let z = (mean - best_observed - 0.01) / std; // 0.01 is exploration parameter (xi)
243        let ei = (mean - best_observed - 0.01) * self.normal_cdf(z) + std * self.normal_pdf(z);
244
245        Ok(ei.max(0.0))
246    }
247
248    /// Simplified Gaussian Process prediction
249    fn gp_predict(
250        &self,
251        candidate: &ParameterSet,
252        evaluated: &[(ParameterSet, f64)],
253    ) -> Result<(f64, f64)> {
254        if evaluated.is_empty() {
255            return Ok((0.0, 1.0));
256        }
257
258        // Calculate distances and weights using RBF kernel
259        let length_scale = 1.0;
260        let mut total_weight = 0.0;
261        let mut weighted_mean = 0.0;
262
263        for (params, score) in evaluated {
264            let dist = self.parameter_distance(candidate, params)?;
265            let weight = (-0.5 * (dist / length_scale).powi(2)).exp();
266            total_weight += weight;
267            weighted_mean += weight * score;
268        }
269
270        let mean = if total_weight > 1e-10 {
271            weighted_mean / total_weight
272        } else {
273            evaluated.iter().map(|(_, score)| score).sum::<f64>() / evaluated.len() as f64
274        };
275
276        // Estimate uncertainty based on distance to nearest neighbor
277        let min_dist = evaluated
278            .iter()
279            .map(|(params, _)| self.parameter_distance(candidate, params).unwrap_or(1.0))
280            .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
281            .unwrap_or(1.0);
282
283        let std = (0.1 + 0.9 * (1.0 - (-min_dist.powi(2)).exp())).min(1.0);
284
285        Ok((mean, std))
286    }
287
288    /// Calculate distance between two parameter sets
289    fn parameter_distance(&self, a: &ParameterSet, b: &ParameterSet) -> Result<f64> {
290        // Normalized Euclidean distance in parameter space
291        let c_dist = ((a.c.ln() - b.c.ln()) / 3.0).powi(2); // Normalized log distance
292        let tol_dist = ((a.tol.ln() - b.tol.ln()) / 3.0).powi(2);
293        let max_iter_dist = ((a.max_iter as f64 - b.max_iter as f64) / 2500.0).powi(2);
294
295        // Kernel distance (0 if same, 1 if different)
296        let kernel_dist = if std::mem::discriminant(&a.kernel) == std::mem::discriminant(&b.kernel)
297        {
298            0.0
299        } else {
300            1.0
301        };
302
303        Ok((c_dist + tol_dist + max_iter_dist + kernel_dist).sqrt())
304    }
305
306    /// Standard normal CDF (cumulative distribution function)
307    fn normal_cdf(&self, x: f64) -> f64 {
308        0.5 * (1.0 + self.erf(x / std::f64::consts::SQRT_2))
309    }
310
311    /// Standard normal PDF (probability density function)
312    fn normal_pdf(&self, x: f64) -> f64 {
313        (1.0 / (2.0 * std::f64::consts::PI).sqrt()) * (-0.5 * x.powi(2)).exp()
314    }
315
316    /// Error function approximation
317    fn erf(&self, x: f64) -> f64 {
318        // Abramowitz and Stegun approximation
319        let a1 = 0.254829592;
320        let a2 = -0.284496736;
321        let a3 = 1.421413741;
322        let a4 = -1.453152027;
323        let a5 = 1.061405429;
324        let p = 0.3275911;
325
326        let sign = if x < 0.0 { -1.0 } else { 1.0 };
327        let x = x.abs();
328
329        let t = 1.0 / (1.0 + p * x);
330        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
331
332        sign * y
333    }
334
335    /// Sample a single value from parameter specification
336    fn sample_value(&mut self, spec: &ParameterSpec) -> Result<f64> {
337        match spec {
338            ParameterSpec::Fixed(value) => Ok(*value),
339            ParameterSpec::Uniform { min, max } => {
340                use scirs2_core::random::essentials::Uniform;
341                let dist = Uniform::new(*min, *max).map_err(|e| {
342                    SklearsError::InvalidInput(format!(
343                        "Failed to create uniform distribution: {}",
344                        e
345                    ))
346                })?;
347                Ok(self.rng.sample(dist))
348            }
349            ParameterSpec::LogUniform { min, max } => {
350                use scirs2_core::random::essentials::Uniform;
351                let log_min = min.ln();
352                let log_max = max.ln();
353                let dist = Uniform::new(log_min, log_max).map_err(|e| {
354                    SklearsError::InvalidInput(format!(
355                        "Failed to create uniform distribution: {}",
356                        e
357                    ))
358                })?;
359                let log_val = self.rng.sample(dist);
360                Ok(log_val.exp())
361            }
362            ParameterSpec::Choice(choices) => {
363                if choices.is_empty() {
364                    return Err(SklearsError::InvalidInput("Empty choice list".to_string()));
365                }
366                use scirs2_core::random::essentials::Uniform;
367                let dist = Uniform::new(0, choices.len()).map_err(|e| {
368                    SklearsError::InvalidInput(format!(
369                        "Failed to create uniform distribution: {}",
370                        e
371                    ))
372                })?;
373                let idx = self.rng.sample(dist);
374                Ok(choices[idx])
375            }
376            ParameterSpec::KernelChoice(_) => Err(SklearsError::InvalidInput(
377                "Use sample_kernel for kernel specs".to_string(),
378            )),
379        }
380    }
381
382    /// Sample a kernel from kernel specification
383    fn sample_kernel(&mut self, spec: &ParameterSpec) -> Result<KernelType> {
384        match spec {
385            ParameterSpec::KernelChoice(kernels) => {
386                if kernels.is_empty() {
387                    return Err(SklearsError::InvalidInput(
388                        "Empty kernel choice list".to_string(),
389                    ));
390                }
391                use scirs2_core::random::essentials::Uniform;
392                let dist = Uniform::new(0, kernels.len()).map_err(|e| {
393                    SklearsError::InvalidInput(format!(
394                        "Failed to create uniform distribution: {}",
395                        e
396                    ))
397                })?;
398                let idx = self.rng.sample(dist);
399                Ok(kernels[idx].clone())
400            }
401            _ => Err(SklearsError::InvalidInput(
402                "Invalid kernel specification".to_string(),
403            )),
404        }
405    }
406
407    /// Evaluate parameter set using cross-validation
408    fn evaluate_params(
409        &self,
410        params: &ParameterSet,
411        x: &DMatrix<f64>,
412        y: &DVector<f64>,
413    ) -> Result<f64> {
414        let scores = self.cross_validate(params, x, y)?;
415        Ok(scores.iter().sum::<f64>() / scores.len() as f64)
416    }
417
418    /// Perform cross-validation
419    fn cross_validate(
420        &self,
421        params: &ParameterSet,
422        x: &DMatrix<f64>,
423        y: &DVector<f64>,
424    ) -> Result<Vec<f64>> {
425        let n_samples = x.nrows();
426        let fold_size = n_samples / self.config.cv_folds;
427        let mut scores = Vec::new();
428
429        for fold in 0..self.config.cv_folds {
430            let start_idx = fold * fold_size;
431            let end_idx = if fold == self.config.cv_folds - 1 {
432                n_samples
433            } else {
434                (fold + 1) * fold_size
435            };
436
437            // Create train/test splits
438            let mut x_train_data = Vec::new();
439            let mut y_train_vals = Vec::new();
440            let mut x_test_data = Vec::new();
441            let mut y_test_vals = Vec::new();
442
443            for i in 0..n_samples {
444                if i >= start_idx && i < end_idx {
445                    // Test set
446                    for j in 0..x.ncols() {
447                        x_test_data.push(x[[i, j]]);
448                    }
449                    y_test_vals.push(y[i]);
450                } else {
451                    // Training set
452                    for j in 0..x.ncols() {
453                        x_train_data.push(x[[i, j]]);
454                    }
455                    y_train_vals.push(y[i]);
456                }
457            }
458
459            let n_train = y_train_vals.len();
460            let n_test = y_test_vals.len();
461            let n_features = x.ncols();
462
463            let x_train = Array2::from_shape_vec((n_train, n_features), x_train_data)?;
464            let y_train = Array1::from_vec(y_train_vals);
465            let x_test = Array2::from_shape_vec((n_test, n_features), x_test_data)?;
466            let y_test = Array1::from_vec(y_test_vals);
467
468            // Train and evaluate model
469            let svm = SVC::new()
470                .c(params.c)
471                .kernel(params.kernel.clone())
472                .tol(params.tol)
473                .max_iter(params.max_iter);
474
475            let fitted_svm = svm.fit(&x_train, &y_train)?;
476            let y_pred = fitted_svm.predict(&x_test)?;
477
478            let score = self.calculate_score(&y_test, &y_pred)?;
479            scores.push(score);
480        }
481
482        Ok(scores)
483    }
484
485    /// Calculate score based on scoring metric
486    fn calculate_score(&self, y_true: &DVector<f64>, y_pred: &DVector<f64>) -> Result<f64> {
487        match self.config.scoring {
488            ScoringMetric::Accuracy => {
489                let correct = y_true
490                    .iter()
491                    .zip(y_pred.iter())
492                    .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
493                    .sum::<f64>();
494                Ok(correct / y_true.len() as f64)
495            }
496            ScoringMetric::MeanSquaredError => {
497                let mse = y_true
498                    .iter()
499                    .zip(y_pred.iter())
500                    .map(|(&t, &p)| (t - p).powi(2))
501                    .sum::<f64>()
502                    / y_true.len() as f64;
503                Ok(-mse) // Negative because we want to maximize
504            }
505            ScoringMetric::MeanAbsoluteError => {
506                let mae = y_true
507                    .iter()
508                    .zip(y_pred.iter())
509                    .map(|(&t, &p)| (t - p).abs())
510                    .sum::<f64>()
511                    / y_true.len() as f64;
512                Ok(-mae) // Negative because we want to maximize
513            }
514            _ => {
515                // For now, default to accuracy for other metrics
516                let correct = y_true
517                    .iter()
518                    .zip(y_pred.iter())
519                    .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
520                    .sum::<f64>();
521                Ok(correct / y_true.len() as f64)
522            }
523        }
524    }
525}