scirs2_optimize/blackbox/
model_based.rs

1//! Model-Based Black-Box Optimization.
2//!
3//! Provides surrogate-model-driven optimization using ensembles of decision
4//! trees (Random Forest) as the surrogate model.  Unlike GP-based methods,
5//! tree ensembles scale more gracefully to moderate-dimensional problems and
6//! do not require careful kernel design.
7//!
8//! # Contents
9//!
10//! - [`RandomForestSurrogate`] – ensemble of regression trees, exposes `fit`
11//!   and `predict` (mean + uncertainty from tree variance).
12//! - [`ei_random_forest`] – Expected Improvement acquisition function for
13//!   the RF surrogate.
14//! - [`SmacOptimizer`] – Sequential Model-based Algorithm Configuration (SMAC)
15//!   loop, which alternates between local and random search phases.
16//!
17//! # Quick Start
18//!
19//! ```rust
20//! use scirs2_optimize::blackbox::model_based::{
21//!     SmacOptimizer, SmacConfig,
22//! };
23//!
24//! let bounds = vec![(-5.0_f64, 5.0_f64), (-5.0, 5.0)];
25//! let config = SmacConfig { n_initial: 6, n_iterations: 20, seed: Some(42),
26//!     ..Default::default() };
27//! let mut smac = SmacOptimizer::new(bounds, config).expect("build smac");
28//! let result = smac.optimize(|x: &[f64]| x[0].powi(2) + x[1].powi(2), 20)
29//!     .expect("optimize");
30//! println!("Best: {:?} f={:.4}", result.x_best, result.f_best);
31//! ```
32
33use scirs2_core::ndarray::{Array1, Array2};
34use scirs2_core::random::rngs::StdRng;
35use scirs2_core::random::{Rng, SeedableRng};
36use scirs2_core::RngExt;
37
38use crate::error::{OptimizeError, OptimizeResult};
39
40// ---------------------------------------------------------------------------
41// Decision tree node
42// ---------------------------------------------------------------------------
43
44/// A single node in a regression decision tree.
45#[derive(Debug, Clone)]
46enum TreeNode {
47    /// Internal split node.
48    Split {
49        feature: usize,
50        threshold: f64,
51        left: Box<TreeNode>,
52        right: Box<TreeNode>,
53    },
54    /// Leaf node holding the mean response value.
55    Leaf { value: f64 },
56}
57
58impl TreeNode {
59    /// Predict the response for a single sample.
60    fn predict(&self, x: &[f64]) -> f64 {
61        match self {
62            TreeNode::Leaf { value } => *value,
63            TreeNode::Split {
64                feature,
65                threshold,
66                left,
67                right,
68            } => {
69                let v = if *feature < x.len() { x[*feature] } else { 0.0 };
70                if v <= *threshold {
71                    left.predict(x)
72                } else {
73                    right.predict(x)
74                }
75            }
76        }
77    }
78}
79
80// ---------------------------------------------------------------------------
81// Decision tree builder
82// ---------------------------------------------------------------------------
83
84/// Parameters controlling tree growth.
85#[derive(Debug, Clone)]
86struct TreeParams {
87    max_depth: usize,
88    min_samples_split: usize,
89    /// Number of features to consider at each split (0 = use sqrt(n_features)).
90    max_features: usize,
91}
92
93impl Default for TreeParams {
94    fn default() -> Self {
95        Self {
96            max_depth: 10,
97            min_samples_split: 2,
98            max_features: 0,
99        }
100    }
101}
102
103/// Build a regression tree from training data.
104fn build_tree(
105    x: &[Vec<f64>],
106    y: &[f64],
107    params: &TreeParams,
108    depth: usize,
109    rng: &mut StdRng,
110) -> TreeNode {
111    let n = x.len();
112    if n == 0 {
113        return TreeNode::Leaf { value: 0.0 };
114    }
115
116    // Mean value for this node.
117    let mean = y.iter().sum::<f64>() / n as f64;
118
119    // Stopping criteria.
120    if depth >= params.max_depth || n < params.min_samples_split {
121        return TreeNode::Leaf { value: mean };
122    }
123
124    // Variance of target; if essentially zero, no need to split.
125    let var = y.iter().map(|&v| (v - mean) * (v - mean)).sum::<f64>() / n as f64;
126    if var < 1e-12 {
127        return TreeNode::Leaf { value: mean };
128    }
129
130    let n_features = if x.is_empty() { 0 } else { x[0].len() };
131    let n_try = if params.max_features == 0 {
132        ((n_features as f64).sqrt() as usize).max(1)
133    } else {
134        params.max_features.min(n_features)
135    };
136
137    // Sample a subset of features to consider.
138    let mut feature_indices: Vec<usize> = (0..n_features).collect();
139    // Fisher-Yates partial shuffle to pick n_try features.
140    for i in 0..n_try {
141        let j = i + rng.random_range(0..(n_features - i));
142        feature_indices.swap(i, j);
143    }
144    let features_to_try = &feature_indices[..n_try];
145
146    let mut best_gain = 0.0;
147    let mut best_feature = 0;
148    let mut best_threshold = 0.0;
149
150    for &feat in features_to_try {
151        // Collect unique candidate thresholds (midpoints between sorted values).
152        let mut vals: Vec<f64> = x.iter().map(|xi| xi[feat]).collect();
153        vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
154        vals.dedup();
155
156        for i in 0..(vals.len().saturating_sub(1)) {
157            let threshold = 0.5 * (vals[i] + vals[i + 1]);
158            let gain = variance_reduction(x, y, feat, threshold, mean, var, n);
159            if gain > best_gain {
160                best_gain = gain;
161                best_feature = feat;
162                best_threshold = threshold;
163            }
164        }
165    }
166
167    if best_gain <= 0.0 {
168        return TreeNode::Leaf { value: mean };
169    }
170
171    // Partition data.
172    let (x_left, y_left, x_right, y_right) = partition(x, y, best_feature, best_threshold);
173
174    if x_left.is_empty() || x_right.is_empty() {
175        return TreeNode::Leaf { value: mean };
176    }
177
178    let left = build_tree(&x_left, &y_left, params, depth + 1, rng);
179    let right = build_tree(&x_right, &y_right, params, depth + 1, rng);
180
181    TreeNode::Split {
182        feature: best_feature,
183        threshold: best_threshold,
184        left: Box::new(left),
185        right: Box::new(right),
186    }
187}
188
189/// Compute variance reduction gain for a feature/threshold split.
190fn variance_reduction(
191    x: &[Vec<f64>],
192    y: &[f64],
193    feature: usize,
194    threshold: f64,
195    parent_mean: f64,
196    parent_var: f64,
197    n: usize,
198) -> f64 {
199    let (n_l, sum_l, sq_l, n_r, sum_r, sq_r) = x.iter().zip(y.iter()).fold(
200        (0usize, 0.0_f64, 0.0_f64, 0usize, 0.0_f64, 0.0_f64),
201        |mut acc, (xi, &yi)| {
202            if xi[feature] <= threshold {
203                acc.0 += 1;
204                acc.1 += yi;
205                acc.2 += yi * yi;
206            } else {
207                acc.3 += 1;
208                acc.4 += yi;
209                acc.5 += yi * yi;
210            }
211            acc
212        },
213    );
214
215    if n_l == 0 || n_r == 0 {
216        return 0.0;
217    }
218
219    let var_l = (sq_l - sum_l * sum_l / n_l as f64) / n_l as f64;
220    let var_r = (sq_r - sum_r * sum_r / n_r as f64) / n_r as f64;
221
222    let weighted_var = (n_l as f64 * var_l + n_r as f64 * var_r) / n as f64;
223
224    let _ = parent_mean; // not needed directly
225    parent_var - weighted_var
226}
227
228/// Split data into left (≤ threshold) and right (> threshold) partitions.
229fn partition(
230    x: &[Vec<f64>],
231    y: &[f64],
232    feature: usize,
233    threshold: f64,
234) -> (Vec<Vec<f64>>, Vec<f64>, Vec<Vec<f64>>, Vec<f64>) {
235    let mut x_l = Vec::new();
236    let mut y_l = Vec::new();
237    let mut x_r = Vec::new();
238    let mut y_r = Vec::new();
239
240    for (xi, &yi) in x.iter().zip(y.iter()) {
241        if xi[feature] <= threshold {
242            x_l.push(xi.clone());
243            y_l.push(yi);
244        } else {
245            x_r.push(xi.clone());
246            y_r.push(yi);
247        }
248    }
249
250    (x_l, y_l, x_r, y_r)
251}
252
253// ---------------------------------------------------------------------------
254// Random Forest Surrogate
255// ---------------------------------------------------------------------------
256
257/// Random Forest surrogate for black-box optimization.
258///
259/// Maintains an ensemble of decision trees trained on bootstrap samples.
260/// The prediction mean and variance are computed as the mean and variance
261/// across the ensemble's individual tree predictions.
262#[derive(Debug, Clone)]
263pub struct RandomForestSurrogate {
264    trees: Vec<TreeNode>,
265    n_estimators: usize,
266    max_depth: usize,
267    min_samples_split: usize,
268    max_features: usize,
269    seed: u64,
270}
271
272impl RandomForestSurrogate {
273    /// Create a new (unfitted) Random Forest surrogate.
274    pub fn new(
275        n_estimators: usize,
276        max_depth: usize,
277        min_samples_split: usize,
278        max_features: usize,
279        seed: u64,
280    ) -> Self {
281        Self {
282            trees: Vec::new(),
283            n_estimators: n_estimators.max(1),
284            max_depth: max_depth.max(1),
285            min_samples_split: min_samples_split.max(2),
286            max_features,
287            seed,
288        }
289    }
290
291    /// Default RF configuration (100 trees, depth 10, sqrt features).
292    pub fn default_config(seed: u64) -> Self {
293        Self::new(100, 10, 2, 0, seed)
294    }
295
296    /// Fit the ensemble to training data.
297    ///
298    /// `x` shape: (n_samples, n_features);  `y` shape: (n_samples,).
299    pub fn fit(&mut self, x: &Array2<f64>, y: &Array1<f64>) -> OptimizeResult<()> {
300        if x.nrows() != y.len() {
301            return Err(OptimizeError::InvalidInput(format!(
302                "x has {} rows but y has {} elements",
303                x.nrows(),
304                y.len()
305            )));
306        }
307        if x.nrows() == 0 {
308            return Err(OptimizeError::InvalidInput(
309                "Cannot fit RF with zero samples".to_string(),
310            ));
311        }
312
313        let n = x.nrows();
314        let n_features = x.ncols();
315
316        // Convert ndarray to Vec<Vec> for the tree builder.
317        let x_vec: Vec<Vec<f64>> = (0..n)
318            .map(|i| (0..n_features).map(|j| x[[i, j]]).collect())
319            .collect();
320        let y_vec: Vec<f64> = y.iter().copied().collect();
321
322        let params = TreeParams {
323            max_depth: self.max_depth,
324            min_samples_split: self.min_samples_split,
325            max_features: self.max_features,
326        };
327
328        let mut rng = StdRng::seed_from_u64(self.seed);
329        self.trees.clear();
330
331        for t in 0..self.n_estimators {
332            // Bootstrap sample.
333            let mut x_boot = Vec::with_capacity(n);
334            let mut y_boot = Vec::with_capacity(n);
335
336            // Use different seed per tree so bootstrap samples differ.
337            let mut tree_rng = StdRng::seed_from_u64(
338                self.seed
339                    .wrapping_add((t as u64).wrapping_mul(6364136223846793005)),
340            );
341
342            for _ in 0..n {
343                let idx = rng.random_range(0..n);
344                x_boot.push(x_vec[idx].clone());
345                y_boot.push(y_vec[idx]);
346            }
347
348            let tree = build_tree(&x_boot, &y_boot, &params, 0, &mut tree_rng);
349            self.trees.push(tree);
350        }
351
352        Ok(())
353    }
354
355    /// Predict mean and variance from tree ensemble at test points.
356    ///
357    /// Returns `(mean, variance)` where `variance` is the inter-tree variance.
358    pub fn predict(&self, x: &Array2<f64>) -> OptimizeResult<(Array1<f64>, Array1<f64>)> {
359        if self.trees.is_empty() {
360            return Err(OptimizeError::ComputationError(
361                "RandomForestSurrogate has not been fitted".to_string(),
362            ));
363        }
364
365        let n = x.nrows();
366        let n_features = x.ncols();
367        let n_trees = self.trees.len() as f64;
368
369        let mut means = Array1::zeros(n);
370        let mut variances = Array1::zeros(n);
371
372        for i in 0..n {
373            let x_row: Vec<f64> = (0..n_features).map(|j| x[[i, j]]).collect();
374
375            // Collect per-tree predictions.
376            let preds: Vec<f64> = self.trees.iter().map(|t| t.predict(&x_row)).collect();
377            let mean = preds.iter().sum::<f64>() / n_trees;
378            let variance = preds.iter().map(|&p| (p - mean) * (p - mean)).sum::<f64>()
379                / n_trees.max(1.0 - 1.0 / n_trees.max(1.0)); // bias-corrected
380
381            means[i] = mean;
382            variances[i] = variance.max(0.0);
383        }
384
385        Ok((means, variances))
386    }
387
388    /// Predict at a single sample; returns (mean, std).
389    pub fn predict_single(&self, x: &[f64]) -> OptimizeResult<(f64, f64)> {
390        if self.trees.is_empty() {
391            return Err(OptimizeError::ComputationError(
392                "RandomForestSurrogate has not been fitted".to_string(),
393            ));
394        }
395        let n_trees = self.trees.len() as f64;
396        let preds: Vec<f64> = self.trees.iter().map(|t| t.predict(x)).collect();
397        let mean = preds.iter().sum::<f64>() / n_trees;
398        let variance =
399            preds.iter().map(|&p| (p - mean) * (p - mean)).sum::<f64>() / n_trees.max(1.0);
400        Ok((mean, variance.max(0.0).sqrt()))
401    }
402
403    /// Number of trees in the ensemble.
404    pub fn n_estimators(&self) -> usize {
405        self.trees.len()
406    }
407
408    /// Whether the model has been fitted.
409    pub fn is_fitted(&self) -> bool {
410        !self.trees.is_empty()
411    }
412}
413
414// ---------------------------------------------------------------------------
415// Acquisition: EI for Random Forest surrogate
416// ---------------------------------------------------------------------------
417
418/// Expected Improvement acquisition for a Random Forest surrogate.
419///
420/// ```text
421///   EI(x) = (best_y - mu(x) - xi) * Phi(z) + sigma(x) * phi(z)
422///   z = (best_y - mu(x) - xi) / sigma(x)
423/// ```
424///
425/// The `sigma` is the square root of the inter-tree variance.
426pub fn ei_random_forest(
427    x: &[f64],
428    surrogate: &RandomForestSurrogate,
429    best_y: f64,
430    xi: f64,
431) -> OptimizeResult<f64> {
432    let (mu, sigma) = surrogate.predict_single(x)?;
433    if sigma < 1e-12 {
434        return Ok(0.0);
435    }
436    let z = (best_y - mu - xi) / sigma;
437    let ei = (best_y - mu - xi) * norm_cdf(z) + sigma * norm_pdf(z);
438    Ok(ei.max(0.0))
439}
440
441// ---------------------------------------------------------------------------
442// SMAC: Sequential Model-based Algorithm Configuration
443// ---------------------------------------------------------------------------
444
445/// Configuration for the SMAC optimizer.
446#[derive(Debug, Clone)]
447pub struct SmacConfig {
448    /// Number of random initial evaluations.
449    pub n_initial: usize,
450    /// Total number of function evaluations (including initial).
451    pub n_iterations: usize,
452    /// Number of random candidates evaluated per BO step.
453    pub n_candidates: usize,
454    /// Number of intensification restarts per BO step.
455    pub n_local_search: usize,
456    /// Exploration bonus xi for EI.
457    pub xi: f64,
458    /// Random Forest number of estimators.
459    pub n_estimators: usize,
460    /// Random Forest max depth.
461    pub rf_max_depth: usize,
462    /// Random seed.
463    pub seed: Option<u64>,
464    /// Verbosity.
465    pub verbose: usize,
466}
467
468impl Default for SmacConfig {
469    fn default() -> Self {
470        Self {
471            n_initial: 10,
472            n_iterations: 50,
473            n_candidates: 300,
474            n_local_search: 5,
475            xi: 0.01,
476            n_estimators: 50,
477            rf_max_depth: 10,
478            seed: None,
479            verbose: 0,
480        }
481    }
482}
483
484/// Result of SMAC optimization.
485#[derive(Debug, Clone)]
486pub struct SmacResult {
487    /// Best input found.
488    pub x_best: Array1<f64>,
489    /// Best objective value.
490    pub f_best: f64,
491    /// Number of function evaluations.
492    pub n_evals: usize,
493    /// History of (iteration, f_value) pairs.
494    pub history: Vec<(usize, f64)>,
495}
496
497/// SMAC: Sequential Model-based Algorithm Configuration.
498///
499/// Uses a Random Forest surrogate and an EI acquisition function to guide
500/// the search, combined with local search around promising incumbents.
501pub struct SmacOptimizer {
502    bounds: Vec<(f64, f64)>,
503    config: SmacConfig,
504}
505
506impl SmacOptimizer {
507    /// Create a new SMAC optimizer.
508    pub fn new(bounds: Vec<(f64, f64)>, config: SmacConfig) -> OptimizeResult<Self> {
509        if bounds.is_empty() {
510            return Err(OptimizeError::InvalidInput(
511                "Search space bounds must not be empty".to_string(),
512            ));
513        }
514        Ok(Self { bounds, config })
515    }
516
517    /// Run SMAC optimization.
518    ///
519    /// `objective(x)` evaluates the function to minimize.
520    /// `n_evaluations` overrides `config.n_iterations` if > 0.
521    pub fn optimize<F>(&mut self, objective: F, n_evaluations: usize) -> OptimizeResult<SmacResult>
522    where
523        F: Fn(&[f64]) -> f64,
524    {
525        let n_iter = if n_evaluations > 0 {
526            n_evaluations
527        } else {
528            self.config.n_iterations
529        };
530
531        if n_iter == 0 {
532            return Err(OptimizeError::InvalidInput(
533                "n_iterations must be > 0".to_string(),
534            ));
535        }
536
537        let n_dims = self.bounds.len();
538        let seed = self.config.seed.unwrap_or(0);
539        let mut rng = StdRng::seed_from_u64(seed);
540
541        let mut x_history: Vec<Vec<f64>> = Vec::new();
542        let mut y_history: Vec<f64> = Vec::new();
543        let mut history: Vec<(usize, f64)> = Vec::new();
544        let mut best_y = f64::INFINITY;
545        let mut best_x: Option<Array1<f64>> = None;
546
547        // -------------------------------------------------------------------
548        // Phase 1: Random initial evaluations (Exploration).
549        // -------------------------------------------------------------------
550        let n_init = self.config.n_initial.min(n_iter).max(3);
551        for i in 0..n_init {
552            let x_rand: Vec<f64> = (0..n_dims)
553                .map(|d| {
554                    let lo = self.bounds[d].0;
555                    let hi = self.bounds[d].1;
556                    lo + rng.random::<f64>() * (hi - lo)
557                })
558                .collect();
559
560            let y = objective(&x_rand);
561            history.push((i + 1, y));
562
563            if y < best_y {
564                best_y = y;
565                let arr = Array1::from_vec(x_rand.clone());
566                best_x = Some(arr);
567            }
568
569            x_history.push(x_rand);
570            y_history.push(y);
571        }
572
573        if self.config.verbose >= 1 {
574            println!(
575                "[SMAC] Initial phase complete: {} evals, best_y={:.6}",
576                n_init, best_y
577            );
578        }
579
580        // -------------------------------------------------------------------
581        // Phase 2: Model-based optimization loop.
582        // -------------------------------------------------------------------
583        let mut surrogate = RandomForestSurrogate::new(
584            self.config.n_estimators,
585            self.config.rf_max_depth,
586            2,
587            0,
588            seed,
589        );
590
591        for iter in n_init..n_iter {
592            // Build training arrays.
593            let n_obs = x_history.len();
594            let mut x_train = Array2::zeros((n_obs, n_dims));
595            let mut y_train = Array1::zeros(n_obs);
596            for (i, (xi, &yi)) in x_history.iter().zip(y_history.iter()).enumerate() {
597                for d in 0..n_dims {
598                    x_train[[i, d]] = xi[d];
599                }
600                y_train[i] = yi;
601            }
602
603            // Fit surrogate.
604            if let Err(e) = surrogate.fit(&x_train, &y_train) {
605                if self.config.verbose >= 1 {
606                    println!("[SMAC] surrogate fit error at iter {}: {}", iter, e);
607                }
608                // Fall back to random evaluation.
609                let x_rand: Vec<f64> = (0..n_dims)
610                    .map(|d| {
611                        let lo = self.bounds[d].0;
612                        let hi = self.bounds[d].1;
613                        lo + rng.random::<f64>() * (hi - lo)
614                    })
615                    .collect();
616                let y = objective(&x_rand);
617                history.push((iter + 1, y));
618                if y < best_y {
619                    best_y = y;
620                    best_x = Some(Array1::from_vec(x_rand.clone()));
621                }
622                x_history.push(x_rand);
623                y_history.push(y);
624                continue;
625            }
626
627            // -----------------------------------------------------------
628            // Acquisition maximization: random candidates + local search.
629            // -----------------------------------------------------------
630            let n_cands = self.config.n_candidates;
631            let mut best_acq = f64::NEG_INFINITY;
632            let mut best_candidate = vec![0.0f64; n_dims];
633
634            // Random candidates.
635            for _ in 0..n_cands {
636                let cand: Vec<f64> = (0..n_dims)
637                    .map(|d| {
638                        let lo = self.bounds[d].0;
639                        let hi = self.bounds[d].1;
640                        lo + rng.random::<f64>() * (hi - lo)
641                    })
642                    .collect();
643
644                let acq =
645                    ei_random_forest(&cand, &surrogate, best_y, self.config.xi).unwrap_or(0.0);
646                if acq > best_acq {
647                    best_acq = acq;
648                    best_candidate = cand;
649                }
650            }
651
652            // Local search around the incumbent best point.
653            if let Some(ref inc) = best_x {
654                let n_local = self.config.n_local_search;
655                let inc_vec: Vec<f64> = inc.iter().copied().collect();
656
657                for step_size in [0.1, 0.05, 0.01] {
658                    for _ in 0..n_local {
659                        let neighbor: Vec<f64> = inc_vec
660                            .iter()
661                            .zip(self.bounds.iter())
662                            .map(|(&xi, &(lo, hi))| {
663                                let range = hi - lo;
664                                let perturb = (rng.random::<f64>() - 0.5) * 2.0 * step_size * range;
665                                (xi + perturb).clamp(lo, hi)
666                            })
667                            .collect();
668
669                        let acq = ei_random_forest(&neighbor, &surrogate, best_y, self.config.xi)
670                            .unwrap_or(0.0);
671                        if acq > best_acq {
672                            best_acq = acq;
673                            best_candidate = neighbor;
674                        }
675                    }
676                }
677            }
678
679            // -----------------------------------------------------------
680            // Evaluate the chosen candidate.
681            // -----------------------------------------------------------
682            let y_next = objective(&best_candidate);
683            history.push((iter + 1, y_next));
684
685            if y_next < best_y {
686                best_y = y_next;
687                best_x = Some(Array1::from_vec(best_candidate.clone()));
688            }
689
690            x_history.push(best_candidate);
691            y_history.push(y_next);
692
693            if self.config.verbose >= 2 {
694                println!(
695                    "[SMAC] iter={} acq={:.4} f={:.6} best={:.6}",
696                    iter + 1,
697                    best_acq,
698                    y_next,
699                    best_y
700                );
701            }
702        }
703
704        if self.config.verbose >= 1 {
705            println!(
706                "[SMAC] Done. n_evals={} best_f={:.6}",
707                history.len(),
708                best_y
709            );
710        }
711
712        let x_best = best_x.unwrap_or_else(|| Array1::zeros(n_dims));
713
714        Ok(SmacResult {
715            x_best,
716            f_best: best_y,
717            n_evals: history.len(),
718            history,
719        })
720    }
721}
722
723// ---------------------------------------------------------------------------
724// Normal distribution helpers
725// ---------------------------------------------------------------------------
726
727fn erf_approx(x: f64) -> f64 {
728    let p = 0.3275911_f64;
729    let (a1, a2, a3, a4, a5) = (
730        0.254829592_f64,
731        -0.284496736_f64,
732        1.421413741_f64,
733        -1.453152027_f64,
734        1.061405429_f64,
735    );
736    let sign = if x < 0.0 { -1.0 } else { 1.0 };
737    let xa = x.abs();
738    let t = 1.0 / (1.0 + p * xa);
739    let poly = (((a5 * t + a4) * t + a3) * t + a2) * t + a1;
740    sign * (1.0 - poly * t * (-xa * xa).exp())
741}
742
743fn norm_cdf(z: f64) -> f64 {
744    0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
745}
746
747fn norm_pdf(z: f64) -> f64 {
748    (-0.5 * z * z).exp() / (2.0 * std::f64::consts::PI).sqrt()
749}
750
751// ---------------------------------------------------------------------------
752// Tests
753// ---------------------------------------------------------------------------
754
755#[cfg(test)]
756mod tests {
757    use super::*;
758    use scirs2_core::ndarray::{Array1, Array2};
759
760    fn make_simple_data() -> (Array2<f64>, Array1<f64>) {
761        // f(x) = x^2, 8 training points.
762        let xs: Vec<f64> = vec![0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5];
763        let ys: Vec<f64> = xs.iter().map(|&x| x * x).collect();
764        let x = Array2::from_shape_vec((8, 1), xs).expect("shape");
765        let y = Array1::from_vec(ys);
766        (x, y)
767    }
768
769    #[test]
770    fn test_rf_surrogate_fit_predict() {
771        let (x, y) = make_simple_data();
772        let mut rf = RandomForestSurrogate::new(20, 8, 2, 0, 42);
773        rf.fit(&x, &y).expect("fit");
774
775        assert!(rf.is_fitted());
776        assert_eq!(rf.n_estimators(), 20);
777
778        let x_test = Array2::from_shape_vec((3, 1), vec![0.25, 1.25, 2.75]).expect("shape");
779        let (mean, var) = rf.predict(&x_test).expect("predict");
780
781        // All predictions should be in a reasonable range.
782        for i in 0..3 {
783            assert!(mean[i].is_finite(), "mean[{}] must be finite", i);
784            assert!(var[i] >= 0.0, "var[{}] must be non-negative", i);
785        }
786
787        // Prediction at x=1.25 should be roughly 1.5625.
788        assert!(
789            (mean[1] - 1.5625).abs() < 2.0,
790            "mean[1]={:.4} expected ~1.56",
791            mean[1]
792        );
793    }
794
795    #[test]
796    fn test_rf_predict_monotone_variance() {
797        let (x, y) = make_simple_data();
798        let mut rf = RandomForestSurrogate::new(30, 8, 2, 0, 99);
799        rf.fit(&x, &y).expect("fit");
800
801        // Predict at a training point vs far outside the training range.
802        let x_near = Array2::from_shape_vec((1, 1), vec![1.0]).expect("shape");
803        let x_far = Array2::from_shape_vec((1, 1), vec![10.0]).expect("shape");
804        let (_, var_near) = rf.predict(&x_near).expect("predict near");
805        let (_, var_far) = rf.predict(&x_far).expect("predict far");
806
807        // Variance at an extrapolation point should be >= near in-domain.
808        // (This is a soft check; tree variance behaviour differs from GP.)
809        assert!(var_far[0] >= 0.0);
810        assert!(var_near[0] >= 0.0);
811    }
812
813    #[test]
814    fn test_ei_rf_non_negative() {
815        let (x, y) = make_simple_data();
816        let mut rf = RandomForestSurrogate::new(20, 8, 2, 0, 7);
817        rf.fit(&x, &y).expect("fit");
818
819        let cands = vec![0.3, 1.7, 2.8];
820        for c in cands {
821            let ei = ei_random_forest(&[c], &rf, 2.0, 0.01).expect("ei");
822            assert!(ei >= 0.0, "EI must be non-negative at x={}", c);
823        }
824    }
825
826    #[test]
827    fn test_smac_minimizes_quadratic() {
828        let bounds = vec![(-3.0_f64, 3.0_f64)];
829        let config = SmacConfig {
830            n_initial: 5,
831            n_iterations: 25,
832            n_candidates: 50,
833            n_local_search: 3,
834            n_estimators: 20,
835            seed: Some(42),
836            verbose: 0,
837            ..Default::default()
838        };
839        let mut smac = SmacOptimizer::new(bounds, config).expect("build smac");
840        let result = smac.optimize(|x: &[f64]| x[0].powi(2), 25).expect("opt");
841
842        assert!(result.f_best.is_finite());
843        assert!(
844            result.f_best < 0.5,
845            "Expected f_best < 0.5, got {}",
846            result.f_best
847        );
848        assert_eq!(result.n_evals, 25);
849    }
850
851    #[test]
852    fn test_smac_2d_rosenbrock_trend() {
853        let bounds = vec![(-2.0_f64, 2.0_f64), (-2.0, 2.0)];
854        let config = SmacConfig {
855            n_initial: 8,
856            n_iterations: 30,
857            n_candidates: 80,
858            n_estimators: 25,
859            seed: Some(123),
860            verbose: 0,
861            ..Default::default()
862        };
863        let mut smac = SmacOptimizer::new(bounds, config).expect("build smac");
864        // Rosenbrock (global min at [1,1] = 0).
865        let result = smac
866            .optimize(
867                |x: &[f64]| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2),
868                30,
869            )
870            .expect("opt");
871
872        assert!(result.f_best.is_finite());
873        // Only 30 evaluations; just check it makes some progress.
874        assert!(result.f_best < 1000.0, "f_best={}", result.f_best);
875    }
876
877    #[test]
878    fn test_rf_unfitted_predict_error() {
879        let rf = RandomForestSurrogate::new(10, 5, 2, 0, 0);
880        let x = Array2::zeros((2, 1));
881        assert!(rf.predict(&x).is_err());
882    }
883}
scirs2_optimize/blackbox/model_based.rs

scirs2_optimize/blackbox/
model_based.rs