Skip to main content

math_audio_optimisation/
bayesian.rs

1//! Bayesian optimisation for expensive bounded continuous objectives.
2//!
3//! The implementation is intentionally domain-agnostic. It models objective
4//! values with a Gaussian-process surrogate over normalised `[0, 1]^n`
5//! coordinates, uses a Matérn-5/2 kernel with ARD lengthscales, and proposes
6//! one or more candidates per iteration through EI or Monte-Carlo batch
7//! q-EI/qEHVI acquisition.
8
9use nalgebra::{DMatrix, DVector};
10use ndarray::Array1;
11use rand::rngs::StdRng;
12use rand::{Rng, SeedableRng};
13use rayon::prelude::*;
14
15use crate::CallbackAction;
16use crate::error::{DEError, Result};
17use crate::init_sobol::init_halton;
18use crate::parallel_eval::ParallelConfig;
19
20const MIN_LENGTHSCALE: f64 = 1e-3;
21const MAX_LENGTHSCALE: f64 = 10.0;
22const LENGTHSCALE_SEARCH_PASSES: usize = 8;
23const QEI_MC_DRAWS: usize = 128;
24const EHVI_POSTERIOR_DRAWS: usize = 64;
25const EHVI_HV_DRAWS: usize = 512;
26const CHOLESKY_ATTEMPTS: usize = 10;
27
28/// Acquisition strategy used to select candidates from the surrogate.
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum BayesAcquisition {
31    /// Expected Improvement for scalar minimisation.
32    ExpectedImprovement,
33    /// Monte-Carlo batch Expected Improvement under the joint GP posterior.
34    QExpectedImprovement,
35    /// Thompson sampling from independent posterior marginals.
36    Thompson,
37}
38
39/// Per-iteration callback payload for [`bayesian_optimization`].
40#[derive(Debug, Clone)]
41pub struct BayesOptIntermediate {
42    /// Current best parameter vector in original bounded coordinates.
43    pub x: Array1<f64>,
44    /// Current best objective value.
45    pub fun: f64,
46    /// Batch/iteration index.
47    pub iter: usize,
48    /// Objective evaluations consumed.
49    pub nfev: usize,
50    /// Largest posterior standard deviation observed in the candidate pool.
51    pub posterior_std: f64,
52}
53
54/// Callback type used by [`BayesOptConfig`].
55pub type BayesOptCallback = Box<dyn FnMut(&BayesOptIntermediate) -> CallbackAction + Send>;
56
57/// Configuration for [`bayesian_optimization`].
58pub struct BayesOptConfig {
59    /// `(lower, upper)` bounds per parameter.
60    pub bounds: Vec<(f64, f64)>,
61    /// Optional initial point. Values outside bounds are clipped.
62    pub x0: Option<Array1<f64>>,
63    /// Number of Sobol hot-start samples. `0` uses a dimension-dependent default.
64    pub initial_samples: usize,
65    /// Number of candidates evaluated per BO iteration. `0` uses `1`.
66    pub batch_size: usize,
67    /// Maximum objective evaluations.
68    pub maxeval: usize,
69    /// Number of acquisition candidates considered per iteration.
70    pub candidate_pool_size: usize,
71    /// Optional ARD lengthscales in normalised parameter coordinates.
72    pub lengthscales: Option<Vec<f64>>,
73    /// Signal variance for the Matérn-5/2 kernel.
74    pub kernel_variance: f64,
75    /// Diagonal observation-noise/jitter floor.
76    pub noise: f64,
77    /// EI exploration offset. Larger values prefer uncertain candidates.
78    pub exploration: f64,
79    /// Stop once the candidate-pool posterior standard deviation drops below
80    /// this value. `0.0` disables the uncertainty stop.
81    pub posterior_std_threshold: f64,
82    /// Parallel batch-evaluation configuration.
83    pub parallel: ParallelConfig,
84    /// Optional RNG seed for deterministic runs.
85    pub seed: Option<u64>,
86    /// Acquisition strategy.
87    pub acquisition: BayesAcquisition,
88    /// Optional per-iteration callback. Returning [`CallbackAction::Stop`]
89    /// terminates early with the best point seen so far.
90    pub callback: Option<BayesOptCallback>,
91}
92
93impl Default for BayesOptConfig {
94    fn default() -> Self {
95        Self {
96            bounds: Vec::new(),
97            x0: None,
98            initial_samples: 0,
99            batch_size: 1,
100            maxeval: 500,
101            candidate_pool_size: 0,
102            lengthscales: None,
103            kernel_variance: 1.0,
104            noise: 1e-8,
105            exploration: 0.01,
106            posterior_std_threshold: 0.0,
107            parallel: ParallelConfig::default(),
108            seed: None,
109            acquisition: BayesAcquisition::QExpectedImprovement,
110            callback: None,
111        }
112    }
113}
114
115/// Result of a [`bayesian_optimization`] run.
116#[derive(Debug, Clone)]
117pub struct BayesOptReport {
118    /// Best parameter vector found.
119    pub x: Array1<f64>,
120    /// Objective value at [`Self::x`].
121    pub fun: f64,
122    /// Whether the run stopped by callback, target uncertainty, or budget after
123    /// at least one surrogate-guided iteration.
124    pub success: bool,
125    /// Human-readable termination message.
126    pub message: String,
127    /// Objective evaluations consumed.
128    pub nfev: usize,
129    /// BO batches completed after the initial design.
130    pub nit: usize,
131    /// Largest posterior standard deviation seen in the final candidate pool.
132    pub posterior_std: f64,
133}
134
135/// Pareto solution returned by [`BayesOptParetoReport`].
136#[derive(Debug, Clone)]
137pub struct BayesParetoSolution {
138    /// Parameter vector.
139    pub x: Array1<f64>,
140    /// Objective values, all minimised.
141    pub objectives: Vec<f64>,
142}
143
144/// Result of a [`bayesian_multi_objective`] run.
145#[derive(Debug, Clone)]
146pub struct BayesOptParetoReport {
147    /// Non-dominated front from all evaluated samples.
148    pub pareto_front: Vec<BayesParetoSolution>,
149    /// All evaluated samples.
150    pub population: Vec<BayesParetoSolution>,
151    /// Objective evaluations consumed.
152    pub nfev: usize,
153    /// BO batches completed after the initial design.
154    pub nit: usize,
155    /// Whether at least one surrogate-guided iteration completed.
156    pub success: bool,
157    /// Human-readable termination message.
158    pub message: String,
159}
160
161#[derive(Clone)]
162struct GaussianProcess {
163    x: Vec<Array1<f64>>,
164    y_mean: f64,
165    y_scale: f64,
166    alpha: DVector<f64>,
167    chol_l: DMatrix<f64>,
168    lengthscales: Vec<f64>,
169    kernel_variance: f64,
170}
171
172impl GaussianProcess {
173    fn fit(
174        x: &[Array1<f64>],
175        y: &[f64],
176        lengthscales: &[f64],
177        kernel_variance: f64,
178        noise: f64,
179    ) -> Result<Self> {
180        if x.is_empty() || y.is_empty() || x.len() != y.len() {
181            return Err(DEError::InvalidConfig {
182                message: "GP training data must be non-empty and dimensionally aligned".into(),
183            });
184        }
185
186        let n = y.len();
187        let y_mean = y.iter().sum::<f64>() / n as f64;
188        let variance = y.iter().map(|v| (v - y_mean).powi(2)).sum::<f64>() / n as f64;
189        let y_scale = variance.sqrt().max(1e-12);
190        let y_norm = DVector::from_iterator(n, y.iter().map(|v| (v - y_mean) / y_scale));
191
192        let k = kernel_matrix(x, lengthscales, kernel_variance);
193        let base_noise = noise.max(1e-12);
194        let Some((chol_l, _jitter)) = cholesky_with_jitter(&k, base_noise) else {
195            return Err(DEError::InvalidConfig {
196                message: "failed to factor Gaussian-process kernel matrix".into(),
197            });
198        };
199        let alpha = cholesky_solve(&chol_l, &y_norm);
200
201        Ok(Self {
202            x: x.to_vec(),
203            y_mean,
204            y_scale,
205            alpha,
206            chol_l,
207            lengthscales: lengthscales.to_vec(),
208            kernel_variance,
209        })
210    }
211
212    fn predict(&self, x: &Array1<f64>) -> (f64, f64) {
213        let n = self.x.len();
214        let mut k_star = DVector::zeros(n);
215        for i in 0..n {
216            k_star[i] = matern52(x, &self.x[i], &self.lengthscales, self.kernel_variance);
217        }
218        let mean_norm = k_star.dot(&self.alpha);
219        let v = solve_lower(&self.chol_l, &k_star);
220        let var_norm = (self.kernel_variance - v.dot(&v)).max(1e-14);
221        let mean = self.y_mean + self.y_scale * mean_norm;
222        let std = self.y_scale * var_norm.sqrt();
223        (mean, std.max(1e-12))
224    }
225
226    fn predict_joint(&self, xs: &[Array1<f64>]) -> (Vec<f64>, DMatrix<f64>) {
227        let n_train = self.x.len();
228        let q = xs.len();
229        let mut k_train_star = DMatrix::zeros(n_train, q);
230        for i in 0..n_train {
231            for j in 0..q {
232                k_train_star[(i, j)] =
233                    matern52(&self.x[i], &xs[j], &self.lengthscales, self.kernel_variance);
234            }
235        }
236
237        let means = (0..q)
238            .map(|j| {
239                let mean_norm = k_train_star.column(j).dot(&self.alpha);
240                self.y_mean + self.y_scale * mean_norm
241            })
242            .collect::<Vec<_>>();
243
244        let v = solve_lower_matrix(&self.chol_l, &k_train_star);
245        let mut cov = DMatrix::zeros(q, q);
246        for i in 0..q {
247            for j in i..q {
248                let prior = matern52(&xs[i], &xs[j], &self.lengthscales, self.kernel_variance);
249                let reduction = v.column(i).dot(&v.column(j));
250                let value = (prior - reduction) * self.y_scale * self.y_scale;
251                cov[(i, j)] = value;
252                cov[(j, i)] = value;
253            }
254        }
255        for i in 0..q {
256            cov[(i, i)] = cov[(i, i)].max(1e-14);
257        }
258
259        (means, cov)
260    }
261}
262
263/// Minimise `f` with Gaussian-process Bayesian optimisation.
264///
265/// The objective receives parameters in the original coordinate system.
266/// Internally the surrogate is fit on normalised `[0, 1]^n` coordinates.
267pub fn bayesian_optimization<F>(f: &F, mut config: BayesOptConfig) -> Result<BayesOptReport>
268where
269    F: Fn(&Array1<f64>) -> f64 + Sync,
270{
271    validate_config(&config)?;
272    let n = config.bounds.len();
273    let batch_size = config.batch_size.max(1);
274    let initial_samples = derive_initial_samples(n, &config).min(config.maxeval);
275    let candidate_pool_size = derive_candidate_pool_size(n, &config);
276    let lengthscales = derive_lengthscales(n, &config)?;
277    let fixed_lengthscales = config.lengthscales.is_some();
278    let mut rng = make_rng(config.seed);
279
280    let mut samples = initial_design(&config, initial_samples);
281    let mut xs_norm = Vec::with_capacity(config.maxeval);
282    let mut ys = Vec::with_capacity(config.maxeval);
283    let mut best_x = Array1::zeros(n);
284    let mut best_y = f64::INFINITY;
285
286    evaluate_and_store(
287        f,
288        &mut samples,
289        &config,
290        &mut xs_norm,
291        &mut ys,
292        &mut best_x,
293        &mut best_y,
294    );
295
296    let mut nit = 0usize;
297    let mut final_std = f64::INFINITY;
298    let mut message = String::from("evaluation budget reached");
299    let mut success = false;
300
301    while ys.len() < config.maxeval {
302        let gp = fit_gp(&xs_norm, &ys, &lengthscales, fixed_lengthscales, &config)?;
303        let candidates = candidate_pool(&config, candidate_pool_size, &xs_norm, &mut rng);
304        if candidates.is_empty() {
305            message = String::from("candidate pool exhausted");
306            break;
307        }
308        final_std = max_posterior_std(&gp, &candidates, &config.bounds, &config.parallel);
309
310        if config.posterior_std_threshold > 0.0
311            && final_std <= config.posterior_std_threshold
312            && ys.len() >= initial_samples
313        {
314            success = true;
315            message = format!(
316                "posterior std {:.3e} below threshold {:.3e}",
317                final_std, config.posterior_std_threshold
318            );
319            break;
320        }
321
322        let remaining = config.maxeval - ys.len();
323        let q = batch_size.min(remaining);
324        let mut selected = select_batch(&gp, &candidates, best_y, q, &config, &mut rng);
325        evaluate_and_store(
326            f,
327            &mut selected,
328            &config,
329            &mut xs_norm,
330            &mut ys,
331            &mut best_x,
332            &mut best_y,
333        );
334
335        nit += 1;
336        if let Some(callback) = config.callback.as_mut() {
337            let payload = BayesOptIntermediate {
338                x: best_x.clone(),
339                fun: best_y,
340                iter: nit,
341                nfev: ys.len(),
342                posterior_std: final_std,
343            };
344            if matches!(callback(&payload), CallbackAction::Stop) {
345                success = true;
346                message = String::from("stopped by callback");
347                break;
348            }
349        }
350    }
351
352    if nit > 0 && !success {
353        success = true;
354    }
355
356    Ok(BayesOptReport {
357        x: best_x,
358        fun: best_y,
359        success,
360        message,
361        nfev: ys.len(),
362        nit,
363        posterior_std: final_std,
364    })
365}
366
367/// Minimise a vector objective with Monte-Carlo EHVI.
368///
369/// This uses one independent GP per objective, samples the joint posterior
370/// over each proposed batch, and scores candidates by expected hypervolume
371/// improvement. All objectives are minimised.
372pub fn bayesian_multi_objective<F>(f: &F, config: BayesOptConfig) -> Result<BayesOptParetoReport>
373where
374    F: Fn(&Array1<f64>) -> Vec<f64> + Sync,
375{
376    validate_config(&config)?;
377    let n = config.bounds.len();
378    let batch_size = config.batch_size.max(1);
379    let initial_samples = derive_initial_samples(n, &config).min(config.maxeval);
380    let candidate_pool_size = derive_candidate_pool_size(n, &config).clamp(32, 256);
381    let lengthscales = derive_lengthscales(n, &config)?;
382    let fixed_lengthscales = config.lengthscales.is_some();
383    let mut rng = make_rng(config.seed);
384
385    let mut initial = initial_design(&config, initial_samples);
386    let mut xs_norm = Vec::with_capacity(config.maxeval);
387    let mut values = Vec::with_capacity(config.maxeval);
388    evaluate_multi_and_store(f, &mut initial, &config, &mut xs_norm, &mut values);
389
390    let mut nit = 0usize;
391    while values.len() < config.maxeval {
392        let m = values.first().map(|v| v.len()).unwrap_or(0);
393        if m == 0 {
394            return Err(DEError::InvalidConfig {
395                message: "multi-objective function returned an empty objective vector".into(),
396            });
397        }
398
399        let gps = (0..m)
400            .map(|j| {
401                let yj = values.iter().map(|v| v[j]).collect::<Vec<_>>();
402                fit_gp(&xs_norm, &yj, &lengthscales, fixed_lengthscales, &config)
403            })
404            .collect::<Result<Vec<_>>>()?;
405
406        let candidates = candidate_pool(&config, candidate_pool_size, &xs_norm, &mut rng);
407        if candidates.is_empty() {
408            break;
409        }
410        let current_front = pareto_values(&values);
411        let reference = reference_point(&values);
412        let remaining = config.maxeval - values.len();
413        let q = batch_size.min(remaining);
414        let mut selected = select_ehvi_batch(
415            &gps,
416            &candidates,
417            &current_front,
418            &reference,
419            q,
420            &config,
421            &mut rng,
422        );
423        evaluate_multi_and_store(f, &mut selected, &config, &mut xs_norm, &mut values);
424        nit += 1;
425    }
426
427    let population = xs_norm
428        .iter()
429        .zip(values.iter())
430        .map(|(x, objectives)| BayesParetoSolution {
431            x: denormalize(x, &config.bounds),
432            objectives: objectives.clone(),
433        })
434        .collect::<Vec<_>>();
435    let pareto_front = pareto_solutions(&population);
436
437    Ok(BayesOptParetoReport {
438        pareto_front,
439        population,
440        nfev: values.len(),
441        nit,
442        success: nit > 0,
443        message: if nit > 0 {
444            String::from("evaluation budget reached")
445        } else {
446            String::from("initial design consumed evaluation budget")
447        },
448    })
449}
450
451fn validate_config(config: &BayesOptConfig) -> Result<()> {
452    if config.bounds.is_empty() {
453        return Err(DEError::BoundsMismatch {
454            lower_len: 0,
455            upper_len: 0,
456        });
457    }
458    for (i, (lo, hi)) in config.bounds.iter().enumerate() {
459        if lo > hi {
460            return Err(DEError::InvalidBounds {
461                index: i,
462                lower: *lo,
463                upper: *hi,
464            });
465        }
466    }
467    if let Some(ref x0) = config.x0
468        && x0.len() != config.bounds.len()
469    {
470        return Err(DEError::X0DimensionMismatch {
471            expected: config.bounds.len(),
472            got: x0.len(),
473        });
474    }
475    if config.maxeval == 0 {
476        return Err(DEError::InvalidConfig {
477            message: "maxeval must be greater than zero".into(),
478        });
479    }
480    Ok(())
481}
482
483fn derive_initial_samples(n: usize, config: &BayesOptConfig) -> usize {
484    if config.initial_samples == 0 {
485        (2 * n + 1).max(8)
486    } else {
487        config.initial_samples
488    }
489}
490
491fn derive_candidate_pool_size(n: usize, config: &BayesOptConfig) -> usize {
492    if config.candidate_pool_size == 0 {
493        (64 * n).max(512)
494    } else {
495        config.candidate_pool_size.max(16)
496    }
497}
498
499fn derive_lengthscales(n: usize, config: &BayesOptConfig) -> Result<Vec<f64>> {
500    if let Some(ref ls) = config.lengthscales {
501        if ls.len() != n {
502            return Err(DEError::InvalidConfig {
503                message: format!(
504                    "lengthscales dimension mismatch: expected {}, got {}",
505                    n,
506                    ls.len()
507                ),
508            });
509        }
510        Ok(ls
511            .iter()
512            .map(|v| v.clamp(MIN_LENGTHSCALE, MAX_LENGTHSCALE))
513            .collect())
514    } else {
515        Ok(vec![0.25; n])
516    }
517}
518
519fn fit_gp(
520    xs_norm: &[Array1<f64>],
521    ys: &[f64],
522    initial_lengthscales: &[f64],
523    fixed_lengthscales: bool,
524    config: &BayesOptConfig,
525) -> Result<GaussianProcess> {
526    let kernel_variance = config.kernel_variance.max(1e-12);
527    let lengthscales = if fixed_lengthscales {
528        initial_lengthscales.to_vec()
529    } else {
530        learn_lengthscales(
531            xs_norm,
532            ys,
533            initial_lengthscales,
534            kernel_variance,
535            config.noise,
536        )?
537    };
538    GaussianProcess::fit(xs_norm, ys, &lengthscales, kernel_variance, config.noise)
539}
540
541fn learn_lengthscales(
542    x: &[Array1<f64>],
543    y: &[f64],
544    initial: &[f64],
545    kernel_variance: f64,
546    noise: f64,
547) -> Result<Vec<f64>> {
548    if x.len() < 3 || initial.is_empty() {
549        return Ok(initial.to_vec());
550    }
551
552    let log_min = MIN_LENGTHSCALE.ln();
553    let log_max = MAX_LENGTHSCALE.ln();
554    let mut log_ls = initial
555        .iter()
556        .map(|v| v.clamp(MIN_LENGTHSCALE, MAX_LENGTHSCALE).ln())
557        .collect::<Vec<_>>();
558    let mut best =
559        negative_log_marginal_likelihood(x, y, &log_lengthscales(&log_ls), kernel_variance, noise)
560            .unwrap_or(f64::INFINITY);
561
562    let mut step = 0.7_f64;
563    for _ in 0..LENGTHSCALE_SEARCH_PASSES {
564        let mut improved = false;
565        for d in 0..log_ls.len() {
566            let base = log_ls[d];
567            let mut best_for_dim = best;
568            let mut best_log_value = base;
569            for direction in [-1.0, 1.0] {
570                let mut trial = log_ls.clone();
571                trial[d] = (base + direction * step).clamp(log_min, log_max);
572                if (trial[d] - base).abs() < 1e-12 {
573                    continue;
574                }
575                let ls = log_lengthscales(&trial);
576                if let Some(score) =
577                    negative_log_marginal_likelihood(x, y, &ls, kernel_variance, noise)
578                    && score < best_for_dim
579                {
580                    best_for_dim = score;
581                    best_log_value = trial[d];
582                }
583            }
584            if best_for_dim < best - 1e-7 {
585                log_ls[d] = best_log_value;
586                best = best_for_dim;
587                improved = true;
588            }
589        }
590        if !improved {
591            step *= 0.5;
592            if step < 0.05 {
593                break;
594            }
595        }
596    }
597
598    Ok(log_lengthscales(&log_ls))
599}
600
601fn log_lengthscales(log_ls: &[f64]) -> Vec<f64> {
602    log_ls
603        .iter()
604        .map(|v| v.exp().clamp(MIN_LENGTHSCALE, MAX_LENGTHSCALE))
605        .collect()
606}
607
608fn make_rng(seed: Option<u64>) -> StdRng {
609    match seed {
610        Some(s) => StdRng::seed_from_u64(s),
611        None => {
612            let mut thread_rng = rand::rng();
613            StdRng::from_rng(&mut thread_rng)
614        }
615    }
616}
617
618fn initial_design(config: &BayesOptConfig, initial_samples: usize) -> Vec<Array1<f64>> {
619    let mut samples = Vec::with_capacity(initial_samples);
620    if let Some(ref x0) = config.x0 {
621        samples.push(clip_to_bounds(x0, &config.bounds));
622    }
623
624    let needed = initial_samples.saturating_sub(samples.len());
625    for s in init_halton(config.bounds.len(), needed, &config.bounds) {
626        samples.push(Array1::from(s));
627    }
628    samples
629}
630
631fn evaluate_and_store<F>(
632    f: &F,
633    candidates: &mut [Array1<f64>],
634    config: &BayesOptConfig,
635    xs_norm: &mut Vec<Array1<f64>>,
636    ys: &mut Vec<f64>,
637    best_x: &mut Array1<f64>,
638    best_y: &mut f64,
639) where
640    F: Fn(&Array1<f64>) -> f64 + Sync,
641{
642    let values = if config.parallel.enabled && candidates.len() >= 4 {
643        candidates.par_iter().map(f).collect::<Vec<_>>()
644    } else {
645        candidates.iter().map(f).collect::<Vec<_>>()
646    };
647
648    for (x, y) in candidates.iter().zip(values) {
649        xs_norm.push(normalize(x, &config.bounds));
650        ys.push(y);
651        if y < *best_y {
652            *best_y = y;
653            *best_x = x.clone();
654        }
655    }
656}
657
658fn evaluate_multi_and_store<F>(
659    f: &F,
660    candidates: &mut [Array1<f64>],
661    config: &BayesOptConfig,
662    xs_norm: &mut Vec<Array1<f64>>,
663    values: &mut Vec<Vec<f64>>,
664) where
665    F: Fn(&Array1<f64>) -> Vec<f64> + Sync,
666{
667    let ys = if config.parallel.enabled && candidates.len() >= 4 {
668        candidates.par_iter().map(f).collect::<Vec<_>>()
669    } else {
670        candidates.iter().map(f).collect::<Vec<_>>()
671    };
672
673    for (x, y) in candidates.iter().zip(ys) {
674        xs_norm.push(normalize(x, &config.bounds));
675        values.push(y);
676    }
677}
678
679fn candidate_pool(
680    config: &BayesOptConfig,
681    size: usize,
682    existing_norm: &[Array1<f64>],
683    rng: &mut StdRng,
684) -> Vec<Array1<f64>> {
685    let mut candidates = Vec::with_capacity(size);
686    let sobol_count = size / 2;
687    for s in init_halton(config.bounds.len(), sobol_count, &config.bounds) {
688        let x = Array1::from(s);
689        if is_new_point(
690            &normalize(&x, &config.bounds),
691            existing_norm,
692            &candidates,
693            &config.bounds,
694        ) {
695            candidates.push(x);
696        }
697    }
698    let mut attempts = 0usize;
699    while candidates.len() < size && attempts < size.saturating_mul(20).max(1000) {
700        let x = random_point(&config.bounds, rng);
701        if is_new_point(
702            &normalize(&x, &config.bounds),
703            existing_norm,
704            &candidates,
705            &config.bounds,
706        ) {
707            candidates.push(x);
708        }
709        attempts += 1;
710    }
711    candidates
712}
713
714fn is_new_point(
715    x_norm: &Array1<f64>,
716    existing_norm: &[Array1<f64>],
717    pending_original: &[Array1<f64>],
718    bounds: &[(f64, f64)],
719) -> bool {
720    let min_dist2 = 1e-12 / (x_norm.len().max(1) as f64);
721    if existing_norm
722        .iter()
723        .any(|x| squared_distance(x_norm, x) < min_dist2)
724    {
725        return false;
726    }
727    !pending_original
728        .iter()
729        .map(|x| normalize(x, bounds))
730        .any(|x| squared_distance(x_norm, &x) < min_dist2)
731}
732
733fn select_batch(
734    gp: &GaussianProcess,
735    candidates: &[Array1<f64>],
736    best_y: f64,
737    q: usize,
738    config: &BayesOptConfig,
739    rng: &mut StdRng,
740) -> Vec<Array1<f64>> {
741    if matches!(config.acquisition, BayesAcquisition::QExpectedImprovement) {
742        return select_qei_batch(gp, candidates, best_y, q, config, rng);
743    }
744
745    let acquisition = config.acquisition;
746    let exploration = config.exploration;
747    let bounds = &config.bounds;
748    let mut scored = if config.parallel.enabled && candidates.len() >= 4 {
749        candidates
750            .par_iter()
751            .map(|x| score_candidate(gp, x, best_y, acquisition, exploration, bounds, None))
752            .collect::<Vec<_>>()
753    } else {
754        candidates
755            .iter()
756            .map(|x| score_candidate(gp, x, best_y, acquisition, exploration, bounds, None))
757            .collect::<Vec<_>>()
758    };
759
760    if matches!(config.acquisition, BayesAcquisition::Thompson) {
761        for item in &mut scored {
762            item.0 = score_candidate(
763                gp,
764                &item.1,
765                best_y,
766                acquisition,
767                exploration,
768                bounds,
769                Some(rng),
770            )
771            .0;
772        }
773        scored.sort_by(|a, b| a.0.total_cmp(&b.0));
774    } else {
775        scored.sort_by(|a, b| b.0.total_cmp(&a.0));
776    }
777
778    let mut selected = Vec::with_capacity(q);
779    for (_, x) in scored {
780        let x_norm = normalize(&x, &config.bounds);
781        let diverse = selected.iter().all(|s| {
782            let s_norm = normalize(s, &config.bounds);
783            squared_distance(&x_norm, &s_norm) > 1e-8
784        });
785        if diverse {
786            selected.push(x);
787        }
788        if selected.len() == q {
789            break;
790        }
791    }
792    selected
793}
794
795fn score_candidate(
796    gp: &GaussianProcess,
797    x: &Array1<f64>,
798    best_y: f64,
799    acquisition: BayesAcquisition,
800    exploration: f64,
801    bounds: &[(f64, f64)],
802    rng: Option<&mut StdRng>,
803) -> (f64, Array1<f64>) {
804    let x_norm = normalize(x, bounds);
805    let (mean, std) = gp.predict(&x_norm);
806    let score = match (acquisition, rng) {
807        (BayesAcquisition::ExpectedImprovement, _) => {
808            expected_improvement(best_y, mean, std, exploration)
809        }
810        (BayesAcquisition::Thompson, Some(rng)) => mean + std * standard_normal(rng),
811        (BayesAcquisition::QExpectedImprovement | BayesAcquisition::Thompson, None)
812        | (BayesAcquisition::QExpectedImprovement, Some(_)) => mean,
813    };
814    (score, x.clone())
815}
816
817fn select_qei_batch(
818    gp: &GaussianProcess,
819    candidates: &[Array1<f64>],
820    best_y: f64,
821    q: usize,
822    config: &BayesOptConfig,
823    rng: &mut StdRng,
824) -> Vec<Array1<f64>> {
825    let candidate_norms = candidates
826        .iter()
827        .map(|x| normalize(x, &config.bounds))
828        .collect::<Vec<_>>();
829    let mut available = (0..candidates.len()).collect::<Vec<_>>();
830    let mut selected_indices: Vec<usize> = Vec::with_capacity(q);
831
832    while selected_indices.len() < q && !available.is_empty() {
833        let batch_width = selected_indices.len() + 1;
834        let common_normals = normal_draws(QEI_MC_DRAWS, batch_width, rng);
835        let mut best_candidate = available[0];
836        let mut best_score = f64::NEG_INFINITY;
837
838        for &idx in &available {
839            let mut batch = selected_indices
840                .iter()
841                .map(|&i| candidate_norms[i].clone())
842                .collect::<Vec<_>>();
843            batch.push(candidate_norms[idx].clone());
844            let score =
845                q_expected_improvement_mc(gp, &batch, best_y, config.exploration, &common_normals);
846            if score > best_score {
847                best_score = score;
848                best_candidate = idx;
849            }
850        }
851
852        selected_indices.push(best_candidate);
853        available.retain(|&idx| idx != best_candidate);
854    }
855
856    selected_indices
857        .into_iter()
858        .map(|idx| candidates[idx].clone())
859        .collect()
860}
861
862fn q_expected_improvement_mc(
863    gp: &GaussianProcess,
864    batch_norm: &[Array1<f64>],
865    best_y: f64,
866    xi: f64,
867    normals: &[Vec<f64>],
868) -> f64 {
869    if batch_norm.is_empty() || normals.is_empty() {
870        return 0.0;
871    }
872    let (means, cov) = gp.predict_joint(batch_norm);
873    let mut total = 0.0;
874    for z in normals {
875        let sample = sample_multivariate_normal(&means, &cov, z);
876        let batch_min = sample.into_iter().fold(f64::INFINITY, f64::min);
877        total += (best_y - batch_min - xi.max(0.0)).max(0.0);
878    }
879    total / normals.len() as f64
880}
881
882fn max_posterior_std(
883    gp: &GaussianProcess,
884    candidates: &[Array1<f64>],
885    bounds: &[(f64, f64)],
886    parallel: &ParallelConfig,
887) -> f64 {
888    if parallel.enabled && candidates.len() >= 4 {
889        candidates
890            .par_iter()
891            .map(|x| gp.predict(&normalize(x, bounds)).1)
892            .reduce(|| 0.0, f64::max)
893    } else {
894        candidates
895            .iter()
896            .map(|x| gp.predict(&normalize(x, bounds)).1)
897            .fold(0.0, f64::max)
898    }
899}
900
901fn select_ehvi_batch(
902    gps: &[GaussianProcess],
903    candidates: &[Array1<f64>],
904    current_front: &[Vec<f64>],
905    reference: &[f64],
906    q: usize,
907    config: &BayesOptConfig,
908    rng: &mut StdRng,
909) -> Vec<Array1<f64>> {
910    let candidate_norms = candidates
911        .iter()
912        .map(|x| normalize(x, &config.bounds))
913        .collect::<Vec<_>>();
914    let lower = hypervolume_integration_lower(current_front, gps, &candidate_norms, reference);
915    let hv_samples = HypervolumeSamples::new(current_front, &lower, reference, EHVI_HV_DRAWS, rng);
916    if hv_samples.points.is_empty() {
917        return candidates.iter().take(q).cloned().collect();
918    }
919
920    let mut available = (0..candidates.len()).collect::<Vec<_>>();
921    let mut selected_indices: Vec<usize> = Vec::with_capacity(q);
922    while selected_indices.len() < q && !available.is_empty() {
923        let batch_width = selected_indices.len() + 1;
924        let common_normals = normal_tensor(EHVI_POSTERIOR_DRAWS, gps.len(), batch_width, rng);
925        let mut best_candidate = available[0];
926        let mut best_score = f64::NEG_INFINITY;
927
928        for &idx in &available {
929            let mut batch = selected_indices
930                .iter()
931                .map(|&i| candidate_norms[i].clone())
932                .collect::<Vec<_>>();
933            batch.push(candidate_norms[idx].clone());
934            let score = expected_hypervolume_improvement_mc(
935                gps,
936                &batch,
937                reference,
938                &hv_samples,
939                &common_normals,
940            );
941            if score > best_score {
942                best_score = score;
943                best_candidate = idx;
944            }
945        }
946
947        selected_indices.push(best_candidate);
948        available.retain(|&idx| idx != best_candidate);
949    }
950
951    selected_indices
952        .into_iter()
953        .map(|idx| candidates[idx].clone())
954        .collect()
955}
956
957fn matern52(a: &Array1<f64>, b: &Array1<f64>, lengthscales: &[f64], variance: f64) -> f64 {
958    let mut r2 = 0.0;
959    for i in 0..a.len() {
960        let ell = lengthscales[i].max(1e-6);
961        r2 += ((a[i] - b[i]) / ell).powi(2);
962    }
963    let r = r2.sqrt();
964    let sqrt5r = 5.0_f64.sqrt() * r;
965    variance * (1.0 + sqrt5r + 5.0 * r * r / 3.0) * (-sqrt5r).exp()
966}
967
968fn kernel_matrix(x: &[Array1<f64>], lengthscales: &[f64], kernel_variance: f64) -> DMatrix<f64> {
969    let n = x.len();
970    let mut k = DMatrix::zeros(n, n);
971    for i in 0..n {
972        for j in i..n {
973            let kij = matern52(&x[i], &x[j], lengthscales, kernel_variance);
974            k[(i, j)] = kij;
975            k[(j, i)] = kij;
976        }
977    }
978    k
979}
980
981fn negative_log_marginal_likelihood(
982    x: &[Array1<f64>],
983    y: &[f64],
984    lengthscales: &[f64],
985    kernel_variance: f64,
986    noise: f64,
987) -> Option<f64> {
988    if x.is_empty() || x.len() != y.len() {
989        return None;
990    }
991    let n = y.len();
992    let y_mean = y.iter().sum::<f64>() / n as f64;
993    let variance = y.iter().map(|v| (v - y_mean).powi(2)).sum::<f64>() / n as f64;
994    let y_scale = variance.sqrt().max(1e-12);
995    let y_norm = DVector::from_iterator(n, y.iter().map(|v| (v - y_mean) / y_scale));
996    let k = kernel_matrix(x, lengthscales, kernel_variance);
997    let (chol_l, _) = cholesky_with_jitter(&k, noise.max(1e-12))?;
998    let alpha = cholesky_solve(&chol_l, &y_norm);
999    let data_fit = 0.5 * y_norm.dot(&alpha);
1000    let complexity = (0..n).map(|i| chol_l[(i, i)].ln()).sum::<f64>();
1001    let normalizer = 0.5 * n as f64 * (2.0 * std::f64::consts::PI).ln();
1002    let score = data_fit + complexity + normalizer;
1003    score.is_finite().then_some(score)
1004}
1005
1006fn cholesky_with_jitter(matrix: &DMatrix<f64>, base_jitter: f64) -> Option<(DMatrix<f64>, f64)> {
1007    for attempt in 0..CHOLESKY_ATTEMPTS {
1008        let jitter = base_jitter.max(1e-14) * 10f64.powi(attempt as i32);
1009        let mut shifted = matrix.clone();
1010        for i in 0..shifted.nrows() {
1011            shifted[(i, i)] += jitter;
1012        }
1013        if let Some(l) = cholesky_lower(&shifted) {
1014            return Some((l, jitter));
1015        }
1016    }
1017    None
1018}
1019
1020fn cholesky_lower(matrix: &DMatrix<f64>) -> Option<DMatrix<f64>> {
1021    let n = matrix.nrows();
1022    if n != matrix.ncols() {
1023        return None;
1024    }
1025    let mut l = DMatrix::zeros(n, n);
1026    for i in 0..n {
1027        for j in 0..=i {
1028            let mut sum = matrix[(i, j)];
1029            for k in 0..j {
1030                sum -= l[(i, k)] * l[(j, k)];
1031            }
1032            if i == j {
1033                if !sum.is_finite() || sum <= 0.0 {
1034                    return None;
1035                }
1036                l[(i, j)] = sum.sqrt();
1037            } else {
1038                let diag = l[(j, j)];
1039                if diag <= 0.0 {
1040                    return None;
1041                }
1042                l[(i, j)] = sum / diag;
1043            }
1044        }
1045    }
1046    Some(l)
1047}
1048
1049fn cholesky_solve(l: &DMatrix<f64>, b: &DVector<f64>) -> DVector<f64> {
1050    let y = solve_lower(l, b);
1051    solve_upper_from_lower(l, &y)
1052}
1053
1054fn solve_lower(l: &DMatrix<f64>, b: &DVector<f64>) -> DVector<f64> {
1055    let n = l.nrows();
1056    let mut x = DVector::zeros(n);
1057    for i in 0..n {
1058        let mut sum = b[i];
1059        for j in 0..i {
1060            sum -= l[(i, j)] * x[j];
1061        }
1062        x[i] = sum / l[(i, i)];
1063    }
1064    x
1065}
1066
1067fn solve_upper_from_lower(l: &DMatrix<f64>, b: &DVector<f64>) -> DVector<f64> {
1068    let n = l.nrows();
1069    let mut x = DVector::zeros(n);
1070    for i in (0..n).rev() {
1071        let mut sum = b[i];
1072        for j in (i + 1)..n {
1073            sum -= l[(j, i)] * x[j];
1074        }
1075        x[i] = sum / l[(i, i)];
1076    }
1077    x
1078}
1079
1080fn solve_lower_matrix(l: &DMatrix<f64>, b: &DMatrix<f64>) -> DMatrix<f64> {
1081    let mut out = DMatrix::zeros(b.nrows(), b.ncols());
1082    for col in 0..b.ncols() {
1083        let rhs = DVector::from_iterator(b.nrows(), (0..b.nrows()).map(|row| b[(row, col)]));
1084        let solved = solve_lower(l, &rhs);
1085        for row in 0..b.nrows() {
1086            out[(row, col)] = solved[row];
1087        }
1088    }
1089    out
1090}
1091
1092fn expected_improvement(best: f64, mean: f64, std: f64, xi: f64) -> f64 {
1093    if std <= 1e-12 {
1094        return 0.0;
1095    }
1096    let improvement = best - mean - xi.max(0.0);
1097    let z = improvement / std;
1098    improvement * normal_cdf(z) + std * normal_pdf(z)
1099}
1100
1101fn normal_pdf(z: f64) -> f64 {
1102    (-0.5 * z * z).exp() / (2.0 * std::f64::consts::PI).sqrt()
1103}
1104
1105fn normal_cdf(z: f64) -> f64 {
1106    0.5 * (1.0 + erf(z / 2.0_f64.sqrt()))
1107}
1108
1109fn erf(x: f64) -> f64 {
1110    let sign = if x < 0.0 { -1.0 } else { 1.0 };
1111    let x = x.abs();
1112    let t = 1.0 / (1.0 + 0.3275911 * x);
1113    let y = 1.0
1114        - (((((1.061405429 * t - 1.453152027) * t) + 1.421413741) * t - 0.284496736) * t
1115            + 0.254829592)
1116            * t
1117            * (-x * x).exp();
1118    sign * y
1119}
1120
1121fn standard_normal(rng: &mut StdRng) -> f64 {
1122    let u1 = rng.random::<f64>().clamp(1e-12, 1.0);
1123    let u2 = rng.random::<f64>();
1124    (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
1125}
1126
1127fn normal_draws(draws: usize, dim: usize, rng: &mut StdRng) -> Vec<Vec<f64>> {
1128    (0..draws.max(1))
1129        .map(|_| (0..dim).map(|_| standard_normal(rng)).collect())
1130        .collect()
1131}
1132
1133fn normal_tensor(
1134    draws: usize,
1135    objectives: usize,
1136    dim: usize,
1137    rng: &mut StdRng,
1138) -> Vec<Vec<Vec<f64>>> {
1139    (0..draws.max(1))
1140        .map(|_| {
1141            (0..objectives)
1142                .map(|_| (0..dim).map(|_| standard_normal(rng)).collect())
1143                .collect()
1144        })
1145        .collect()
1146}
1147
1148fn sample_multivariate_normal(mean: &[f64], cov: &DMatrix<f64>, normals: &[f64]) -> Vec<f64> {
1149    let q = mean.len();
1150    if q == 0 {
1151        return Vec::new();
1152    }
1153    if normals.len() != q || cov.nrows() != q || cov.ncols() != q {
1154        return mean.to_vec();
1155    }
1156
1157    let mut sym = DMatrix::zeros(q, q);
1158    for i in 0..q {
1159        for j in 0..q {
1160            sym[(i, j)] = 0.5 * (cov[(i, j)] + cov[(j, i)]);
1161        }
1162    }
1163    if let Some((l, _)) = cholesky_with_jitter(&sym, 1e-12) {
1164        let z = DVector::from_column_slice(normals);
1165        let correlated = l * z;
1166        return mean
1167            .iter()
1168            .enumerate()
1169            .map(|(i, m)| m + correlated[i])
1170            .collect();
1171    }
1172
1173    mean.iter()
1174        .enumerate()
1175        .map(|(i, m)| m + cov[(i, i)].max(1e-14).sqrt() * normals[i])
1176        .collect()
1177}
1178
1179fn normalize(x: &Array1<f64>, bounds: &[(f64, f64)]) -> Array1<f64> {
1180    if bounds.is_empty() {
1181        return x.clone();
1182    }
1183    Array1::from_iter(x.iter().zip(bounds.iter()).map(|(&v, &(lo, hi))| {
1184        let span = (hi - lo).max(1e-12);
1185        ((v - lo) / span).clamp(0.0, 1.0)
1186    }))
1187}
1188
1189fn denormalize(x: &Array1<f64>, bounds: &[(f64, f64)]) -> Array1<f64> {
1190    Array1::from_iter(
1191        x.iter()
1192            .zip(bounds.iter())
1193            .map(|(&v, &(lo, hi))| lo + v.clamp(0.0, 1.0) * (hi - lo)),
1194    )
1195}
1196
1197fn clip_to_bounds(x: &Array1<f64>, bounds: &[(f64, f64)]) -> Array1<f64> {
1198    Array1::from_iter(
1199        x.iter()
1200            .zip(bounds.iter())
1201            .map(|(&v, &(lo, hi))| v.clamp(lo, hi)),
1202    )
1203}
1204
1205fn random_point(bounds: &[(f64, f64)], rng: &mut StdRng) -> Array1<f64> {
1206    Array1::from_iter(bounds.iter().map(|&(lo, hi)| {
1207        if hi <= lo {
1208            lo
1209        } else {
1210            rng.random_range(lo..=hi)
1211        }
1212    }))
1213}
1214
1215fn squared_distance(a: &Array1<f64>, b: &Array1<f64>) -> f64 {
1216    a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum()
1217}
1218
1219fn pareto_solutions(population: &[BayesParetoSolution]) -> Vec<BayesParetoSolution> {
1220    let mut front = Vec::new();
1221    for (i, solution) in population.iter().enumerate() {
1222        let dominated = population
1223            .iter()
1224            .enumerate()
1225            .any(|(j, other)| i != j && dominates(&other.objectives, &solution.objectives));
1226        if !dominated {
1227            front.push(solution.clone());
1228        }
1229    }
1230    front
1231}
1232
1233fn pareto_values(values: &[Vec<f64>]) -> Vec<Vec<f64>> {
1234    let mut front = Vec::new();
1235    for (i, value) in values.iter().enumerate() {
1236        let dominated = values
1237            .iter()
1238            .enumerate()
1239            .any(|(j, other)| i != j && dominates(other, value));
1240        if !dominated {
1241            front.push(value.clone());
1242        }
1243    }
1244    front
1245}
1246
1247fn dominates(a: &[f64], b: &[f64]) -> bool {
1248    a.iter().zip(b.iter()).all(|(x, y)| x <= y) && a.iter().zip(b.iter()).any(|(x, y)| x < y)
1249}
1250
1251fn reference_point(values: &[Vec<f64>]) -> Vec<f64> {
1252    let m = values.first().map(|v| v.len()).unwrap_or(0);
1253    let mut ideal = vec![f64::INFINITY; m];
1254    let mut nadir = vec![f64::NEG_INFINITY; m];
1255    for v in values {
1256        for j in 0..m {
1257            ideal[j] = ideal[j].min(v[j]);
1258            nadir[j] = nadir[j].max(v[j]);
1259        }
1260    }
1261    (0..m)
1262        .map(|j| {
1263            let span = (nadir[j] - ideal[j]).abs().max(1.0);
1264            nadir[j] + 0.2 * span
1265        })
1266        .collect()
1267}
1268
1269struct HypervolumeSamples {
1270    points: Vec<Vec<f64>>,
1271    front_dominated: Vec<bool>,
1272    volume: f64,
1273}
1274
1275impl HypervolumeSamples {
1276    fn new(
1277        front: &[Vec<f64>],
1278        lower: &[f64],
1279        reference: &[f64],
1280        draws: usize,
1281        rng: &mut StdRng,
1282    ) -> Self {
1283        if lower.len() != reference.len() || lower.iter().zip(reference).any(|(lo, hi)| lo >= hi) {
1284            return Self {
1285                points: Vec::new(),
1286                front_dominated: Vec::new(),
1287                volume: 0.0,
1288            };
1289        }
1290
1291        let volume = lower
1292            .iter()
1293            .zip(reference.iter())
1294            .map(|(lo, hi)| hi - lo)
1295            .product::<f64>();
1296        let mut points = Vec::with_capacity(draws.max(1));
1297        let mut front_dominated = Vec::with_capacity(draws.max(1));
1298        for _ in 0..draws.max(1) {
1299            let point = lower
1300                .iter()
1301                .zip(reference.iter())
1302                .map(|(lo, hi)| lo + rng.random::<f64>() * (hi - lo))
1303                .collect::<Vec<_>>();
1304            front_dominated.push(front.iter().any(|p| dominates_or_equal(p, &point)));
1305            points.push(point);
1306        }
1307
1308        Self {
1309            points,
1310            front_dominated,
1311            volume,
1312        }
1313    }
1314}
1315
1316fn hypervolume_integration_lower(
1317    front: &[Vec<f64>],
1318    gps: &[GaussianProcess],
1319    candidates_norm: &[Array1<f64>],
1320    reference: &[f64],
1321) -> Vec<f64> {
1322    let m = reference.len();
1323    let mut lower = vec![f64::INFINITY; m];
1324    for point in front {
1325        for j in 0..m {
1326            lower[j] = lower[j].min(point[j]);
1327        }
1328    }
1329    for (j, gp) in gps.iter().enumerate().take(m) {
1330        for x in candidates_norm {
1331            let (mean, std) = gp.predict(x);
1332            lower[j] = lower[j].min(mean - 4.0 * std);
1333        }
1334    }
1335    for j in 0..m {
1336        if !lower[j].is_finite() {
1337            lower[j] = reference[j] - 1.0;
1338        }
1339        if lower[j] >= reference[j] {
1340            lower[j] = reference[j] - 1.0;
1341        }
1342    }
1343    lower
1344}
1345
1346fn expected_hypervolume_improvement_mc(
1347    gps: &[GaussianProcess],
1348    batch_norm: &[Array1<f64>],
1349    reference: &[f64],
1350    hv_samples: &HypervolumeSamples,
1351    normals: &[Vec<Vec<f64>>],
1352) -> f64 {
1353    if gps.is_empty() || batch_norm.is_empty() || normals.is_empty() {
1354        return 0.0;
1355    }
1356
1357    let joint = gps
1358        .iter()
1359        .map(|gp| gp.predict_joint(batch_norm))
1360        .collect::<Vec<_>>();
1361    let mut total = 0.0;
1362    for draw in normals {
1363        let mut sampled_points = vec![vec![0.0; gps.len()]; batch_norm.len()];
1364        for (objective, (means, cov)) in joint.iter().enumerate() {
1365            let sample = sample_multivariate_normal(means, cov, &draw[objective]);
1366            for i in 0..batch_norm.len() {
1367                sampled_points[i][objective] = sample[i];
1368            }
1369        }
1370        total += hypervolume_improvement_from_samples(&sampled_points, reference, hv_samples);
1371    }
1372    total / normals.len() as f64
1373}
1374
1375fn hypervolume_improvement_from_samples(
1376    candidates: &[Vec<f64>],
1377    reference: &[f64],
1378    hv_samples: &HypervolumeSamples,
1379) -> f64 {
1380    if candidates
1381        .iter()
1382        .any(|point| point.len() != reference.len())
1383    {
1384        return 0.0;
1385    }
1386
1387    let mut improved = 0usize;
1388    for (point, front_dominated) in hv_samples
1389        .points
1390        .iter()
1391        .zip(hv_samples.front_dominated.iter())
1392    {
1393        if *front_dominated {
1394            continue;
1395        }
1396        if candidates
1397            .iter()
1398            .any(|candidate| dominates_or_equal(candidate, point))
1399        {
1400            improved += 1;
1401        }
1402    }
1403    hv_samples.volume * improved as f64 / hv_samples.points.len().max(1) as f64
1404}
1405
1406fn dominates_or_equal(a: &[f64], b: &[f64]) -> bool {
1407    a.iter().zip(b.iter()).all(|(x, y)| x <= y)
1408}
1409
1410#[cfg(test)]
1411mod tests {
1412    use super::*;
1413
1414    #[test]
1415    fn expected_improvement_prefers_better_mean() {
1416        let better = expected_improvement(1.0, 0.5, 0.1, 0.0);
1417        let worse = expected_improvement(1.0, 1.5, 0.1, 0.0);
1418        assert!(better > worse);
1419    }
1420
1421    #[test]
1422    fn learned_lengthscales_detect_irrelevant_dimension() {
1423        let mut xs = Vec::new();
1424        let mut ys = Vec::new();
1425        for i in 0..5 {
1426            for j in 0..5 {
1427                let x0 = i as f64 / 4.0;
1428                let x1 = j as f64 / 4.0;
1429                xs.push(Array1::from(vec![x0, x1]));
1430                ys.push((x0 - 0.35).powi(2));
1431            }
1432        }
1433        let learned = learn_lengthscales(&xs, &ys, &[0.25, 0.25], 1.0, 1e-8).unwrap();
1434        assert!(
1435            learned[1] > learned[0],
1436            "irrelevant dimension should get longer lengthscale: {learned:?}"
1437        );
1438    }
1439
1440    #[test]
1441    fn gp_prediction_reports_latent_not_observation_variance() {
1442        let xs = vec![Array1::from(vec![0.0]), Array1::from(vec![1.0])];
1443        let ys = vec![0.0, 1.0];
1444        let gp = GaussianProcess::fit(&xs, &ys, &[0.4], 1.0, 1e-2).unwrap();
1445        let (_, std) = gp.predict(&Array1::from(vec![0.0]));
1446        assert!(
1447            std < 0.08,
1448            "latent posterior std should not include observation noise: {std}"
1449        );
1450    }
1451
1452    #[test]
1453    fn qei_batch_value_includes_joint_improvement() {
1454        let xs = vec![Array1::from(vec![0.0]), Array1::from(vec![1.0])];
1455        let ys = vec![0.0, 1.0];
1456        let gp = GaussianProcess::fit(&xs, &ys, &[0.5], 1.0, 1e-8).unwrap();
1457        let batch = vec![Array1::from(vec![0.25]), Array1::from(vec![0.75])];
1458        let normals = vec![vec![0.0, 0.0], vec![-1.0, 1.0], vec![1.0, -1.0]];
1459        let score = q_expected_improvement_mc(&gp, &batch, 0.5, 0.0, &normals);
1460        assert!(score.is_finite() && score > 0.0);
1461    }
1462
1463    #[test]
1464    fn bayes_opt_converges_on_quadratic() {
1465        let f = |x: &Array1<f64>| -> f64 { (x[0] - 0.25).powi(2) + (x[1] + 0.5).powi(2) };
1466        let cfg = BayesOptConfig {
1467            bounds: vec![(-1.0, 1.0), (-1.0, 1.0)],
1468            maxeval: 60,
1469            initial_samples: 8,
1470            batch_size: 2,
1471            candidate_pool_size: 128,
1472            seed: Some(7),
1473            ..Default::default()
1474        };
1475        let report = bayesian_optimization(&f, cfg).unwrap();
1476        assert!(report.fun < 0.05, "fun={}", report.fun);
1477    }
1478
1479    #[test]
1480    fn multi_objective_returns_non_dominated_front() {
1481        let f = |x: &Array1<f64>| -> Vec<f64> { vec![x[0].powi(2), (x[0] - 1.0).powi(2)] };
1482        let cfg = BayesOptConfig {
1483            bounds: vec![(0.0, 1.0)],
1484            maxeval: 20,
1485            initial_samples: 6,
1486            batch_size: 2,
1487            candidate_pool_size: 64,
1488            seed: Some(3),
1489            ..Default::default()
1490        };
1491        let report = bayesian_multi_objective(&f, cfg).unwrap();
1492        assert!(!report.pareto_front.is_empty());
1493    }
1494
1495    #[test]
1496    fn is_new_point_scales_with_dimension() {
1497        // In 1D the old fixed threshold 1e-18 (distance 1e-9) is too lax:
1498        // points 1e-7 apart have squared distance 1e-14 > 1e-18, so they are
1499        // wrongly admitted as distinct. A dimension-scaled threshold catches
1500        // more duplicates in low dimensions.
1501        let bounds_1d = vec![(0.0, 1.0)];
1502        let q1 = Array1::from_vec(vec![0.5]);
1503        let q2 = Array1::from_vec(vec![0.5 + 1e-7]);
1504        assert!(
1505            !is_new_point(
1506                &normalize(&q2, &bounds_1d),
1507                &[normalize(&q1, &bounds_1d)],
1508                &[],
1509                &bounds_1d
1510            ),
1511            "points 1e-7 apart in 1D should be treated as duplicates"
1512        );
1513
1514        // In high dimensions the old threshold is too strict relative to the
1515        // natural scale of the space. With a scaled threshold, points that
1516        // differ by a small but meaningful amount in every coordinate are
1517        // still accepted as distinct.
1518        let bounds_100d = vec![(0.0, 1.0); 100];
1519        let p1 = Array1::from_vec(vec![0.5; 100]);
1520        let mut p2 = p1.clone();
1521        for i in 0..100 {
1522            p2[i] += 1e-8; // small but clear perturbation
1523        }
1524        assert!(
1525            is_new_point(
1526                &normalize(&p2, &bounds_100d),
1527                &[normalize(&p1, &bounds_100d)],
1528                &[],
1529                &bounds_100d
1530            ),
1531            "points differing by 1e-8 in every coordinate should be distinct in 100D"
1532        );
1533
1534        // Exact duplicates must still be rejected regardless of dimension.
1535        let r1 = Array1::from_vec(vec![0.3; 50]);
1536        let r2 = r1.clone();
1537        assert!(
1538            !is_new_point(
1539                &normalize(&r2, &bounds_100d),
1540                &[normalize(&r1, &bounds_100d)],
1541                &[],
1542                &bounds_100d
1543            ),
1544            "exact duplicates should be rejected in 50D"
1545        );
1546    }
1547}