1use crate::cross_validation::CrossValidator;
7use scirs2_core::ndarray::{Array1, Array2};
8use scirs2_core::random::rngs::StdRng;
9use scirs2_core::random::{Rng, SeedableRng};
10use sklears_core::{
11 error::{Result, SklearsError},
12 types::Float,
13};
14use std::collections::HashMap;
15
16#[derive(Debug, Clone)]
18pub enum ParamDistribution {
19 Uniform { min: Float, max: Float },
21 LogUniform { min: Float, max: Float },
23 Choice { values: Vec<Float> },
25 IntUniform { min: i32, max: i32 },
27}
28
29impl ParamDistribution {
30 pub fn sample<R: Rng>(&self, rng: &mut R) -> Float {
32 match self {
33 ParamDistribution::Uniform { min, max } => rng.gen_range(*min..=*max),
34 ParamDistribution::LogUniform { min, max } => {
35 let log_min = min.ln();
36 let log_max = max.ln();
37 let log_val = rng.gen_range(log_min..=log_max);
38 log_val.exp()
39 }
40 ParamDistribution::Choice { values } => {
41 let idx = rng.gen_range(0..values.len());
42 values[idx]
43 }
44 ParamDistribution::IntUniform { min, max } => rng.gen_range(*min..=*max) as Float,
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct BayesSearchConfig {
52 pub n_initial_points: usize,
54 pub n_calls: usize,
56 pub acquisition: AcquisitionFunction,
58 pub random_state: Option<u64>,
60 pub n_jobs: Option<i32>,
62}
63
64impl Default for BayesSearchConfig {
65 fn default() -> Self {
66 Self {
67 n_initial_points: 10,
68 n_calls: 50,
69 acquisition: AcquisitionFunction::ExpectedImprovement,
70 random_state: None,
71 n_jobs: None,
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub enum AcquisitionFunction {
79 ExpectedImprovement,
81 UpperConfidenceBound { kappa: Float },
83 ProbabilityOfImprovement,
85 TreeStructuredParzenEstimator { gamma: Float },
87}
88
89#[derive(Debug, Clone)]
91pub struct EvaluationPoint {
92 params: Vec<Float>,
93 score: Float,
94}
95
96#[derive(Debug)]
99struct SimpleGaussianProcess {
100 x_train: Array2<Float>,
101 y_train: Array1<Float>,
102 noise_level: Float,
103 length_scale: Float,
104 signal_variance: Float,
105}
106
107impl SimpleGaussianProcess {
108 fn new(noise_level: Float) -> Self {
109 Self {
110 x_train: Array2::zeros((0, 0)),
111 y_train: Array1::zeros(0),
112 noise_level,
113 length_scale: 1.0,
114 signal_variance: 1.0,
115 }
116 }
117
118 fn fit(&mut self, x: &Array2<Float>, y: &Array1<Float>) -> Result<()> {
119 self.x_train = x.clone();
120 self.y_train = y.clone();
121
122 if x.nrows() > 1 {
124 let mut distances = Vec::new();
126 for i in 0..x.nrows() {
127 for j in (i + 1)..x.nrows() {
128 let mut dist_sq = 0.0;
129 for k in 0..x.ncols() {
130 let diff = x[[i, k]] - x[[j, k]];
131 dist_sq += diff * diff;
132 }
133 distances.push(dist_sq.sqrt());
134 }
135 }
136
137 if !distances.is_empty() {
138 distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
139 self.length_scale = distances[distances.len() / 2].max(0.1);
140 }
141
142 let y_mean = y.mean().unwrap_or(0.0);
144 self.signal_variance =
145 y.iter().map(|&yi| (yi - y_mean).powi(2)).sum::<Float>() / y.len() as Float;
146 self.signal_variance = self.signal_variance.max(0.01);
147 }
148
149 Ok(())
150 }
151
152 fn predict(&self, x: &Array2<Float>) -> Result<(Array1<Float>, Array1<Float>)> {
153 let n_test = x.nrows();
154 let mut mean = Array1::zeros(n_test);
155 let mut std = Array1::zeros(n_test);
156
157 if self.x_train.nrows() == 0 {
158 return Ok((mean, Array1::from_elem(n_test, self.signal_variance.sqrt())));
160 }
161
162 for i in 0..n_test {
164 let mut kernel_values = Array1::zeros(self.x_train.nrows());
165 let mut total_weight = 0.0;
166
167 for j in 0..self.x_train.nrows() {
168 let mut dist_sq = 0.0;
170 for k in 0..x.ncols() {
171 let diff = x[[i, k]] - self.x_train[[j, k]];
172 dist_sq += diff * diff;
173 }
174
175 let kernel_val = self.signal_variance
177 * (-dist_sq / (2.0 * self.length_scale * self.length_scale)).exp();
178 kernel_values[j] = kernel_val;
179 total_weight += kernel_val;
180 }
181
182 if total_weight > 1e-8 {
183 kernel_values /= total_weight;
185
186 mean[i] = kernel_values.dot(&self.y_train);
188
189 let kernel_var: Float = kernel_values
191 .iter()
192 .zip(self.y_train.iter())
193 .map(|(&k, &y)| k * (y - mean[i]).powi(2))
194 .sum();
195
196 let predictive_var = kernel_var + self.noise_level;
198 std[i] = predictive_var.sqrt();
199 } else {
200 mean[i] = self.y_train.mean().unwrap_or(0.0);
202 std[i] = self.signal_variance.sqrt();
203 }
204 }
205
206 Ok((mean, std))
207 }
208}
209
210pub struct BayesSearchCV {
212 param_distributions: HashMap<String, ParamDistribution>,
213 config: BayesSearchConfig,
214 evaluations_: Vec<EvaluationPoint>,
216 best_params_: Option<HashMap<String, Float>>,
217 best_score_: Option<Float>,
218 gp_: SimpleGaussianProcess,
219 rng: StdRng,
220}
221
222impl BayesSearchCV {
223 pub fn new(param_distributions: HashMap<String, ParamDistribution>) -> Self {
225 let rng = StdRng::seed_from_u64(42);
226 Self {
227 param_distributions,
228 config: BayesSearchConfig::default(),
229 evaluations_: Vec::new(),
230 best_params_: None,
231 best_score_: None,
232 gp_: SimpleGaussianProcess::new(0.01),
233 rng,
234 }
235 }
236
237 pub fn n_initial_points(mut self, n_initial_points: usize) -> Self {
239 self.config.n_initial_points = n_initial_points;
240 self
241 }
242
243 pub fn n_calls(mut self, n_calls: usize) -> Self {
245 self.config.n_calls = n_calls;
246 self
247 }
248
249 pub fn acquisition(mut self, acquisition: AcquisitionFunction) -> Self {
251 self.config.acquisition = acquisition;
252 self
253 }
254
255 pub fn random_state(mut self, random_state: u64) -> Self {
257 self.config.random_state = Some(random_state);
258 self.rng = StdRng::seed_from_u64(random_state);
259 self
260 }
261
262 pub fn search<E, CV, F>(
264 &mut self,
265 estimator: E,
266 x: &Array2<Float>,
267 y: &Array1<i32>,
268 cv: CV,
269 scoring: F,
270 ) -> Result<()>
271 where
272 E: Clone,
273 CV: CrossValidator + Clone,
274 F: Fn(&Array1<i32>, &Array1<i32>) -> Float + Clone,
275 {
276 let param_names: Vec<String> = self.param_distributions.keys().cloned().collect();
277 let n_params = param_names.len();
278
279 if n_params == 0 {
280 return Err(SklearsError::InvalidInput(
281 "No parameters to optimize".to_string(),
282 ));
283 }
284
285 for _ in 0..self.config.n_initial_points.min(self.config.n_calls) {
287 let params = self.sample_random_params(¶m_names);
288 let score = self.evaluate_params(
289 params.clone(),
290 estimator.clone(),
291 x,
292 y,
293 cv.clone(),
294 scoring.clone(),
295 )?;
296
297 self.evaluations_.push(EvaluationPoint { params, score });
298 self.update_best()?;
299 }
300
301 let remaining_calls = self
303 .config
304 .n_calls
305 .saturating_sub(self.config.n_initial_points);
306
307 for _ in 0..remaining_calls {
308 self.fit_surrogate_model()?;
310
311 let next_params = self.select_next_point(¶m_names)?;
313 let score = self.evaluate_params(
314 next_params.clone(),
315 estimator.clone(),
316 x,
317 y,
318 cv.clone(),
319 scoring.clone(),
320 )?;
321
322 self.evaluations_.push(EvaluationPoint {
323 params: next_params,
324 score,
325 });
326 self.update_best()?;
327 }
328
329 Ok(())
330 }
331
332 fn sample_random_params(&mut self, param_names: &[String]) -> Vec<Float> {
333 param_names
334 .iter()
335 .map(|name| self.param_distributions[name].sample(&mut self.rng))
336 .collect()
337 }
338
339 fn evaluate_params<E, CV, F>(
340 &self,
341 params: Vec<Float>,
342 _estimator: E,
343 x: &Array2<Float>,
344 y: &Array1<i32>,
345 cv: CV,
346 scoring: F,
347 ) -> Result<Float>
348 where
349 E: Clone,
350 CV: CrossValidator + Clone,
351 F: Fn(&Array1<i32>, &Array1<i32>) -> Float + Clone,
352 {
353 let _param_array = Array1::from_vec(params.clone());
362
363 let scores = cv
366 .split(x.nrows(), Some(y))
367 .into_iter()
368 .map(|(train_indices, test_indices)| {
369 let _y_train: Array1<i32> = train_indices.iter().map(|&i| y[i]).collect();
371 let y_test: Array1<i32> = test_indices.iter().map(|&i| y[i]).collect();
372
373 let y_pred: Array1<i32> = y_test
377 .iter()
378 .map(|_| {
379 if params.iter().sum::<Float>() > params.len() as Float * 0.5 {
381 1
382 } else {
383 0
384 }
385 })
386 .collect();
387
388 scoring(&y_test, &y_pred)
389 })
390 .collect::<Vec<Float>>();
391
392 Ok(scores.iter().sum::<Float>() / scores.len() as Float)
394 }
395
396 fn fit_surrogate_model(&mut self) -> Result<()> {
397 if self.evaluations_.is_empty() {
398 return Ok(());
399 }
400
401 let n_points = self.evaluations_.len();
402 let n_params = self.evaluations_[0].params.len();
403
404 let mut x_train = Array2::zeros((n_points, n_params));
405 let mut y_train = Array1::zeros(n_points);
406
407 for (i, eval_point) in self.evaluations_.iter().enumerate() {
408 for (j, ¶m) in eval_point.params.iter().enumerate() {
409 x_train[[i, j]] = param;
410 }
411 y_train[i] = eval_point.score;
412 }
413
414 self.gp_.fit(&x_train, &y_train)?;
415 Ok(())
416 }
417
418 fn select_next_point(&mut self, param_names: &[String]) -> Result<Vec<Float>> {
419 let n_candidates = 100;
421 let mut best_params = vec![];
422 let mut best_acquisition = Float::NEG_INFINITY;
423
424 for _ in 0..n_candidates {
425 let candidate_params = self.sample_random_params(param_names);
426 let acquisition_value = self.compute_acquisition(&candidate_params)?;
427
428 if acquisition_value > best_acquisition {
429 best_acquisition = acquisition_value;
430 best_params = candidate_params;
431 }
432 }
433
434 if best_params.is_empty() {
435 best_params = self.sample_random_params(param_names);
437 }
438
439 Ok(best_params)
440 }
441
442 fn compute_acquisition(&self, params: &[Float]) -> Result<Float> {
443 if self.evaluations_.is_empty() {
444 return Ok(0.0);
445 }
446
447 let x_test = Array2::from_shape_vec((1, params.len()), params.to_vec())
449 .map_err(|_| SklearsError::InvalidInput("Invalid parameter shape".to_string()))?;
450
451 let (mean, std) = self.gp_.predict(&x_test)?;
452 let mu = mean[0];
453 let sigma = std[0];
454
455 match &self.config.acquisition {
456 AcquisitionFunction::ExpectedImprovement => {
457 let best_score = self.best_score_.unwrap_or(Float::NEG_INFINITY);
458 if sigma <= 1e-8 {
459 return Ok(0.0);
460 }
461
462 let improvement = mu - best_score;
463 let z = improvement / sigma;
464
465 let phi = 0.5 * (1.0 + erf(z / 2.0_f64.sqrt()));
467 let density = (-0.5 * z * z).exp() / (2.0 * std::f64::consts::PI).sqrt();
468
469 Ok(improvement * phi + sigma * density)
470 }
471 AcquisitionFunction::UpperConfidenceBound { kappa } => Ok(mu + kappa * sigma),
472 AcquisitionFunction::ProbabilityOfImprovement => {
473 let best_score = self.best_score_.unwrap_or(Float::NEG_INFINITY);
474 if sigma <= 1e-8 {
475 return Ok(0.0);
476 }
477
478 let z = (mu - best_score) / sigma;
479 let phi = 0.5 * (1.0 + erf(z / 2.0_f64.sqrt()));
480 Ok(phi)
481 }
482 AcquisitionFunction::TreeStructuredParzenEstimator { .. } => {
483 Ok(mu)
486 }
487 }
488 }
489
490 fn update_best(&mut self) -> Result<()> {
491 if let Some(best_eval) = self
492 .evaluations_
493 .iter()
494 .max_by(|a, b| a.score.partial_cmp(&b.score).unwrap())
495 {
496 let param_names: Vec<String> = self.param_distributions.keys().cloned().collect();
497 let mut best_params = HashMap::new();
498
499 for (i, name) in param_names.iter().enumerate() {
500 best_params.insert(name.clone(), best_eval.params[i]);
501 }
502
503 self.best_params_ = Some(best_params);
504 self.best_score_ = Some(best_eval.score);
505 }
506
507 Ok(())
508 }
509
510 pub fn best_params(&self) -> Option<&HashMap<String, Float>> {
512 self.best_params_.as_ref()
513 }
514
515 pub fn best_score(&self) -> Option<Float> {
517 self.best_score_
518 }
519
520 pub fn evaluations(&self) -> &[EvaluationPoint] {
522 &self.evaluations_
523 }
524}
525
526pub struct TPEOptimizer {
528 param_distributions: HashMap<String, ParamDistribution>,
529 config: TPEConfig,
530 evaluations_: Vec<EvaluationPoint>,
531 best_params_: Option<HashMap<String, Float>>,
532 best_score_: Option<Float>,
533 rng: StdRng,
534}
535
536#[derive(Debug, Clone)]
538pub struct TPEConfig {
539 pub n_initial_points: usize,
541 pub n_calls: usize,
543 pub gamma: Float,
545 pub n_ei_candidates: usize,
547 pub random_state: Option<u64>,
549}
550
551impl Default for TPEConfig {
552 fn default() -> Self {
553 Self {
554 n_initial_points: 10,
555 n_calls: 50,
556 gamma: 0.25, n_ei_candidates: 24,
558 random_state: None,
559 }
560 }
561}
562
563impl TPEOptimizer {
564 pub fn new(param_distributions: HashMap<String, ParamDistribution>) -> Self {
566 let rng = StdRng::seed_from_u64(42);
567 Self {
568 param_distributions,
569 config: TPEConfig::default(),
570 evaluations_: Vec::new(),
571 best_params_: None,
572 best_score_: None,
573 rng,
574 }
575 }
576
577 pub fn n_initial_points(mut self, n_initial_points: usize) -> Self {
579 self.config.n_initial_points = n_initial_points;
580 self
581 }
582
583 pub fn n_calls(mut self, n_calls: usize) -> Self {
585 self.config.n_calls = n_calls;
586 self
587 }
588
589 pub fn gamma(mut self, gamma: Float) -> Self {
591 self.config.gamma = gamma;
592 self
593 }
594
595 pub fn random_state(mut self, random_state: u64) -> Self {
597 self.config.random_state = Some(random_state);
598 self.rng = StdRng::seed_from_u64(random_state);
599 self
600 }
601
602 pub fn optimize<E, CV, F>(
604 &mut self,
605 estimator: E,
606 x: &Array2<Float>,
607 y: &Array1<i32>,
608 cv: CV,
609 scoring: F,
610 ) -> Result<()>
611 where
612 E: Clone,
613 CV: CrossValidator + Clone,
614 F: Fn(&Array1<i32>, &Array1<i32>) -> Float + Clone,
615 {
616 let param_names: Vec<String> = self.param_distributions.keys().cloned().collect();
617 let n_params = param_names.len();
618
619 if n_params == 0 {
620 return Err(SklearsError::InvalidInput(
621 "No parameters to optimize".to_string(),
622 ));
623 }
624
625 for _ in 0..self.config.n_initial_points.min(self.config.n_calls) {
627 let params = self.sample_random_params(¶m_names);
628 let score = self.evaluate_params(
629 params.clone(),
630 estimator.clone(),
631 x,
632 y,
633 cv.clone(),
634 scoring.clone(),
635 )?;
636
637 self.evaluations_.push(EvaluationPoint { params, score });
638 self.update_best()?;
639 }
640
641 let remaining_calls = self
643 .config
644 .n_calls
645 .saturating_sub(self.config.n_initial_points);
646
647 for _ in 0..remaining_calls {
648 let next_params = self.select_next_point_tpe(¶m_names)?;
650 let score = self.evaluate_params(
651 next_params.clone(),
652 estimator.clone(),
653 x,
654 y,
655 cv.clone(),
656 scoring.clone(),
657 )?;
658
659 self.evaluations_.push(EvaluationPoint {
660 params: next_params,
661 score,
662 });
663 self.update_best()?;
664 }
665
666 Ok(())
667 }
668
669 fn select_next_point_tpe(&mut self, param_names: &[String]) -> Result<Vec<Float>> {
670 if self.evaluations_.is_empty() {
671 return Ok(self.sample_random_params(param_names));
672 }
673
674 let mut sorted_evals = self.evaluations_.clone();
676 sorted_evals.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
677
678 let n_good =
680 ((self.evaluations_.len() as Float * self.config.gamma).ceil() as usize).max(1);
681 let good_observations = &sorted_evals[..n_good];
682 let bad_observations = &sorted_evals[n_good..];
683
684 if bad_observations.is_empty() {
685 return Ok(self.sample_random_params(param_names));
687 }
688
689 let mut best_ei = Float::NEG_INFINITY;
691 let mut best_params = self.sample_random_params(param_names);
692
693 for _ in 0..self.config.n_ei_candidates {
694 let candidate = self.sample_random_params(param_names);
695 let ei = self.compute_expected_improvement_tpe(
696 &candidate,
697 good_observations,
698 bad_observations,
699 )?;
700
701 if ei > best_ei {
702 best_ei = ei;
703 best_params = candidate;
704 }
705 }
706
707 Ok(best_params)
708 }
709
710 fn compute_expected_improvement_tpe(
711 &self,
712 params: &[Float],
713 good_observations: &[EvaluationPoint],
714 bad_observations: &[EvaluationPoint],
715 ) -> Result<Float> {
716 let l_good = self.compute_density(params, good_observations);
718 let l_bad = self.compute_density(params, bad_observations);
719
720 if l_bad > 1e-10 {
722 Ok(l_good / l_bad)
723 } else {
724 Ok(l_good)
725 }
726 }
727
728 fn compute_density(&self, params: &[Float], observations: &[EvaluationPoint]) -> Float {
729 if observations.is_empty() {
730 return 1.0;
731 }
732
733 let mut density = 0.0;
735 let bandwidth = 0.1; for obs in observations {
738 let mut dist_sq = 0.0;
739 for (i, ¶m) in params.iter().enumerate() {
740 let diff = param - obs.params[i];
741 dist_sq += diff * diff;
742 }
743
744 density += (-dist_sq / (2.0 * bandwidth * bandwidth)).exp();
746 }
747
748 density / observations.len() as Float
749 }
750
751 fn sample_random_params(&mut self, param_names: &[String]) -> Vec<Float> {
752 param_names
753 .iter()
754 .map(|name| self.param_distributions[name].sample(&mut self.rng))
755 .collect()
756 }
757
758 fn evaluate_params<E, CV, F>(
759 &self,
760 params: Vec<Float>,
761 _estimator: E,
762 x: &Array2<Float>,
763 y: &Array1<i32>,
764 cv: CV,
765 scoring: F,
766 ) -> Result<Float>
767 where
768 E: Clone,
769 CV: CrossValidator + Clone,
770 F: Fn(&Array1<i32>, &Array1<i32>) -> Float + Clone,
771 {
772 let scores = cv
774 .split(x.nrows(), Some(y))
775 .into_iter()
776 .map(|(train_indices, test_indices)| {
777 let _y_train: Array1<i32> = train_indices.iter().map(|&i| y[i]).collect();
778 let y_test: Array1<i32> = test_indices.iter().map(|&i| y[i]).collect();
779
780 let y_pred: Array1<i32> = y_test
781 .iter()
782 .map(|_| {
783 if params.iter().sum::<Float>() > params.len() as Float * 0.5 {
784 1
785 } else {
786 0
787 }
788 })
789 .collect();
790
791 scoring(&y_test, &y_pred)
792 })
793 .collect::<Vec<Float>>();
794
795 Ok(scores.iter().sum::<Float>() / scores.len() as Float)
796 }
797
798 fn update_best(&mut self) -> Result<()> {
799 if let Some(best_eval) = self
800 .evaluations_
801 .iter()
802 .max_by(|a, b| a.score.partial_cmp(&b.score).unwrap())
803 {
804 let param_names: Vec<String> = self.param_distributions.keys().cloned().collect();
805 let mut best_params = HashMap::new();
806
807 for (i, name) in param_names.iter().enumerate() {
808 best_params.insert(name.clone(), best_eval.params[i]);
809 }
810
811 self.best_params_ = Some(best_params);
812 self.best_score_ = Some(best_eval.score);
813 }
814
815 Ok(())
816 }
817
818 pub fn best_params(&self) -> Option<&HashMap<String, Float>> {
820 self.best_params_.as_ref()
821 }
822
823 pub fn best_score(&self) -> Option<Float> {
825 self.best_score_
826 }
827
828 pub fn evaluations(&self) -> &[EvaluationPoint] {
830 &self.evaluations_
831 }
832}
833
834fn erf(x: f64) -> f64 {
836 let a1 = 0.254829592;
838 let a2 = -0.284496736;
839 let a3 = 1.421413741;
840 let a4 = -1.453152027;
841 let a5 = 1.061405429;
842 let p = 0.3275911;
843
844 let sign = if x < 0.0 { -1.0 } else { 1.0 };
845 let x = x.abs();
846
847 let t = 1.0 / (1.0 + p * x);
848 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
849
850 sign * y
851}
852
853#[allow(non_snake_case)]
854#[cfg(test)]
855mod tests {
856 use super::*;
857 use crate::cross_validation::KFold;
858
859 #[test]
860 fn test_param_distribution_sampling() {
861 let mut rng = StdRng::seed_from_u64(42);
862
863 let uniform = ParamDistribution::Uniform { min: 0.0, max: 1.0 };
864 let sample = uniform.sample(&mut rng);
865 assert!(sample >= 0.0 && sample <= 1.0);
866
867 let log_uniform = ParamDistribution::LogUniform {
868 min: 1e-3,
869 max: 1e3,
870 };
871 let sample = log_uniform.sample(&mut rng);
872 assert!(sample >= 1e-3 && sample <= 1e3);
873
874 let choice = ParamDistribution::Choice {
875 values: vec![1.0, 2.0, 3.0],
876 };
877 let sample = choice.sample(&mut rng);
878 assert!(vec![1.0, 2.0, 3.0].contains(&sample));
879
880 let int_uniform = ParamDistribution::IntUniform { min: 1, max: 10 };
881 let sample = int_uniform.sample(&mut rng);
882 assert!(sample >= 1.0 && sample <= 10.0);
883 }
884
885 #[test]
886 fn test_bayes_search_creation() {
887 let mut param_distributions = HashMap::new();
888 param_distributions.insert(
889 "param1".to_string(),
890 ParamDistribution::Uniform { min: 0.0, max: 1.0 },
891 );
892 param_distributions.insert(
893 "param2".to_string(),
894 ParamDistribution::LogUniform {
895 min: 1e-3,
896 max: 1e3,
897 },
898 );
899
900 let search = BayesSearchCV::new(param_distributions)
901 .n_initial_points(5)
902 .n_calls(20)
903 .random_state(42);
904
905 assert_eq!(search.config.n_initial_points, 5);
906 assert_eq!(search.config.n_calls, 20);
907 assert_eq!(search.config.random_state, Some(42));
908 }
909
910 #[test]
911 fn test_simple_gaussian_process() {
912 let mut gp = SimpleGaussianProcess::new(0.01);
913
914 let x_train = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).unwrap();
915 let y_train = Array1::from_vec(vec![0.0, 1.0, 4.0]);
916
917 gp.fit(&x_train, &y_train).unwrap();
918
919 let x_test = Array2::from_shape_vec((2, 1), vec![0.5, 1.5]).unwrap();
920 let (mean, std) = gp.predict(&x_test).unwrap();
921
922 assert_eq!(mean.len(), 2);
923 assert_eq!(std.len(), 2);
924
925 for &m in mean.iter() {
927 assert!(m.is_finite());
928 }
929 for &s in std.iter() {
930 assert!(s.is_finite() && s > 0.0);
931 }
932 }
933
934 #[test]
935 fn test_acquisition_functions() {
936 let mut param_distributions = HashMap::new();
937 param_distributions.insert(
938 "param1".to_string(),
939 ParamDistribution::Uniform { min: 0.0, max: 1.0 },
940 );
941
942 let mut search = BayesSearchCV::new(param_distributions);
943
944 search.evaluations_.push(EvaluationPoint {
946 params: vec![0.3],
947 score: 0.8,
948 });
949 search.evaluations_.push(EvaluationPoint {
950 params: vec![0.7],
951 score: 0.6,
952 });
953
954 search.fit_surrogate_model().unwrap();
955
956 let _acquisition = search.compute_acquisition(&[0.5]).unwrap();
957 }
959
960 #[test]
961 fn test_erf_approximation() {
962 assert!((erf(0.0) - 0.0).abs() < 1e-6);
964 assert!((erf(1.0) - 0.8427).abs() < 1e-3);
965 assert!((erf(-1.0) - (-0.8427)).abs() < 1e-3);
966
967 for x in [-5.0, -2.0, -1.0, 0.0, 1.0, 2.0, 5.0] {
969 let result = erf(x);
970 assert!(result >= -1.0 && result <= 1.0);
971 }
972 }
973
974 #[test]
975 fn test_tpe_optimizer() {
976 let mut param_distributions = HashMap::new();
977 param_distributions.insert(
978 "param1".to_string(),
979 ParamDistribution::Uniform { min: 0.0, max: 1.0 },
980 );
981 param_distributions.insert(
982 "param2".to_string(),
983 ParamDistribution::LogUniform {
984 min: 1e-3,
985 max: 1e3,
986 },
987 );
988
989 let mut tpe = TPEOptimizer::new(param_distributions)
990 .n_initial_points(3)
991 .n_calls(10)
992 .gamma(0.25)
993 .random_state(42);
994
995 let x = Array2::from_shape_vec(
997 (8, 2),
998 vec![
999 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
1000 ],
1001 )
1002 .unwrap();
1003 let y = Array1::from_vec(vec![0, 1, 0, 1, 0, 1, 0, 1]);
1004
1005 let cv = KFold::new(2);
1006 let scoring = |y_true: &Array1<i32>, y_pred: &Array1<i32>| -> Float {
1007 let correct = y_true
1008 .iter()
1009 .zip(y_pred.iter())
1010 .filter(|(&t, &p)| t == p)
1011 .count();
1012 correct as Float / y_true.len() as Float
1013 };
1014
1015 tpe.optimize((), &x, &y, cv, scoring)
1017 .expect("TPE optimization should work");
1018
1019 assert_eq!(tpe.evaluations().len(), 10);
1021 assert!(tpe.best_score().is_some());
1022 assert!(tpe.best_params().is_some());
1023
1024 if let Some(best_score) = tpe.best_score() {
1026 assert!(best_score >= 0.0 && best_score <= 1.0);
1027 }
1028 }
1029}