use std::collections::HashMap;
use nalgebra::DMatrix;
use parking_lot::Mutex;
use crate::distribution::Distribution;
use crate::param::ParamValue;
use crate::rng_util;
use crate::sampler::{CompletedTrial, Sampler};
use super::common::{from_internal, internal_bounds, sample_random, to_internal};
pub struct GpSampler {
state: Mutex<GpState>,
}
impl GpSampler {
#[must_use]
pub fn new() -> Self {
Self {
state: Mutex::new(GpState::new(None, None, None, None)),
}
}
#[must_use]
pub fn with_seed(seed: u64) -> Self {
Self {
state: Mutex::new(GpState::new(None, None, None, Some(seed))),
}
}
#[must_use]
pub fn builder() -> GpSamplerBuilder {
GpSamplerBuilder::new()
}
}
impl Default for GpSampler {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct GpSamplerBuilder {
n_startup_trials: Option<usize>,
n_candidates: Option<usize>,
noise_variance: Option<f64>,
seed: Option<u64>,
}
impl GpSamplerBuilder {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn n_startup_trials(mut self, n: usize) -> Self {
self.n_startup_trials = Some(n);
self
}
#[must_use]
pub fn n_candidates(mut self, n: usize) -> Self {
self.n_candidates = Some(n);
self
}
#[must_use]
pub fn noise_variance(mut self, v: f64) -> Self {
self.noise_variance = Some(v);
self
}
#[must_use]
pub fn seed(mut self, seed: u64) -> Self {
self.seed = Some(seed);
self
}
#[must_use]
pub fn build(self) -> GpSampler {
GpSampler {
state: Mutex::new(GpState::new(
self.n_startup_trials,
self.n_candidates,
self.noise_variance,
self.seed,
)),
}
}
}
const DEFAULT_N_STARTUP: usize = 10;
const DEFAULT_N_CANDIDATES: usize = 1000;
const DEFAULT_NOISE_VAR: f64 = 1e-6;
#[derive(Clone, Debug)]
struct DimensionInfo {
distribution: Distribution,
is_continuous: bool,
bounds: Option<(f64, f64)>,
}
#[derive(Clone, Debug)]
struct TrialProgress {
values: Vec<ParamValue>,
next_dim: usize,
}
enum GpPhase {
Discovery,
Active,
}
struct GpModel {
cholesky: nalgebra::linalg::Cholesky<f64, nalgebra::Dyn>,
alpha: nalgebra::DVector<f64>,
x_train: Vec<Vec<f64>>,
lengthscales: Vec<f64>,
signal_var: f64,
_y_mean: f64,
_y_std: f64,
f_best: f64,
}
struct GpState {
rng: fastrand::Rng,
n_startup_trials: usize,
n_candidates: usize,
noise_variance: f64,
phase: GpPhase,
dimensions: Vec<DimensionInfo>,
trial_progress: HashMap<u64, TrialProgress>,
discovery_trial_id: Option<u64>,
}
impl GpState {
fn new(
n_startup: Option<usize>,
n_candidates: Option<usize>,
noise_var: Option<f64>,
seed: Option<u64>,
) -> Self {
let rng = seed.map_or_else(fastrand::Rng::new, fastrand::Rng::with_seed);
Self {
rng,
n_startup_trials: n_startup.unwrap_or(DEFAULT_N_STARTUP),
n_candidates: n_candidates.unwrap_or(DEFAULT_N_CANDIDATES),
noise_variance: noise_var.unwrap_or(DEFAULT_NOISE_VAR),
phase: GpPhase::Discovery,
dimensions: Vec::new(),
trial_progress: HashMap::new(),
discovery_trial_id: None,
}
}
}
fn matern52(x1: &[f64], x2: &[f64], lengthscales: &[f64], signal_var: f64) -> f64 {
let mut r_sq = 0.0;
for i in 0..x1.len() {
let diff = (x1[i] - x2[i]) / lengthscales[i];
r_sq += diff * diff;
}
let r = r_sq.sqrt();
let sqrt5_r = SQRT_5 * r;
signal_var * (1.0 + sqrt5_r + 5.0 / 3.0 * r_sq) * (-sqrt5_r).exp()
}
fn kernel_matrix(
x: &[Vec<f64>],
lengthscales: &[f64],
signal_var: f64,
noise_var: f64,
) -> DMatrix<f64> {
let n = x.len();
DMatrix::from_fn(n, n, |i, j| {
let k = matern52(&x[i], &x[j], lengthscales, signal_var);
if i == j { k + noise_var } else { k }
})
}
fn kernel_vector(
x_star: &[f64],
x_train: &[Vec<f64>],
lengthscales: &[f64],
signal_var: f64,
) -> nalgebra::DVector<f64> {
nalgebra::DVector::from_fn(x_train.len(), |i, _| {
matern52(x_star, &x_train[i], lengthscales, signal_var)
})
}
const SQRT_5: f64 = 2.236_213_562_373_095;
#[allow(clippy::cast_precision_loss)]
fn fit_gp(x_train: &[Vec<f64>], y_train: &[f64], noise_var: f64) -> Option<GpModel> {
let n = y_train.len();
if n == 0 {
return None;
}
let y_mean = y_train.iter().sum::<f64>() / n as f64;
let y_var = if n > 1 {
y_train.iter().map(|&y| (y - y_mean).powi(2)).sum::<f64>() / (n - 1) as f64
} else {
1.0
};
let y_std = y_var.sqrt().max(1e-10);
let y_standardized: Vec<f64> = y_train.iter().map(|&y| (y - y_mean) / y_std).collect();
let f_best = y_standardized.iter().copied().fold(f64::INFINITY, f64::min);
let d = if x_train.is_empty() {
0
} else {
x_train[0].len()
};
let lengthscales: Vec<f64> = (0..d)
.map(|j| {
let vals: Vec<f64> = x_train.iter().map(|x| x[j]).collect();
let mean_j = vals.iter().sum::<f64>() / n as f64;
let var_j = vals.iter().map(|&v| (v - mean_j).powi(2)).sum::<f64>() / n as f64;
var_j.sqrt().max(0.01)
})
.collect();
let signal_var = 1.0;
let k = kernel_matrix(x_train, &lengthscales, signal_var, noise_var);
let cholesky = nalgebra::linalg::Cholesky::new(k)?;
let y_vec = nalgebra::DVector::from_column_slice(&y_standardized);
let alpha = cholesky.solve(&y_vec);
Some(GpModel {
cholesky,
alpha,
x_train: x_train.to_vec(),
lengthscales,
signal_var,
_y_mean: y_mean,
_y_std: y_std,
f_best,
})
}
fn predict(model: &GpModel, x: &[f64]) -> (f64, f64) {
let k_star = kernel_vector(x, &model.x_train, &model.lengthscales, model.signal_var);
let mean = k_star.dot(&model.alpha);
let k_self = model.signal_var;
let v = model.cholesky.solve(&k_star);
let var = (k_self - k_star.dot(&v)).max(0.0);
(mean, var.sqrt())
}
fn norm_pdf(x: f64) -> f64 {
const INV_SQRT_2PI: f64 = 0.398_942_280_401_432_7;
INV_SQRT_2PI * (-0.5 * x * x).exp()
}
fn norm_cdf(x: f64) -> f64 {
if x < -8.0 {
return 0.0;
}
if x > 8.0 {
return 1.0;
}
let abs_x = x.abs();
let t = 1.0 / (1.0 + 0.231_641_9 * abs_x);
let t2 = t * t;
let t3 = t2 * t;
let t4 = t3 * t;
let t5 = t4 * t;
let poly = 0.319_381_530 * t - 0.356_563_782 * t2 + 1.781_477_937 * t3 - 1.821_255_978 * t4
+ 1.330_274_429 * t5;
let pdf = norm_pdf(abs_x);
let cdf = 1.0 - pdf * poly;
if x >= 0.0 { cdf } else { 1.0 - cdf }
}
fn expected_improvement(mean: f64, std: f64, f_best: f64) -> f64 {
if std < 1e-12 {
return (f_best - mean).max(0.0);
}
let z = (f_best - mean) / std;
let improvement = (f_best - mean) * norm_cdf(z) + std * norm_pdf(z);
improvement.max(0.0)
}
fn optimize_acquisition(
model: &GpModel,
n_dims: usize,
n_candidates: usize,
rng: &mut fastrand::Rng,
) -> Vec<f64> {
let mut best_ei = f64::NEG_INFINITY;
let mut best_x = vec![0.5; n_dims];
for _ in 0..n_candidates {
let x: Vec<f64> = (0..n_dims)
.map(|_| rng_util::f64_range(rng, 0.0, 1.0))
.collect();
let (mean, std) = predict(model, &x);
let ei = expected_improvement(mean, std, model.f_best);
if ei > best_ei {
best_ei = ei;
best_x = x;
}
}
best_x
}
fn to_normalized(value: f64, lo: f64, hi: f64) -> f64 {
if (hi - lo).abs() < 1e-15 {
0.5
} else {
(value - lo) / (hi - lo)
}
}
fn from_normalized(value: f64, lo: f64, hi: f64) -> f64 {
lo + value * (hi - lo)
}
const MAX_TRAIN_POINTS: usize = 100;
fn establish_param_mapping(
trial: &CompletedTrial,
dimensions: &[DimensionInfo],
) -> Vec<Option<crate::parameter::ParamId>> {
use crate::parameter::ParamId;
let mut available: Vec<(ParamId, &Distribution)> =
trial.distributions.iter().map(|(id, d)| (*id, d)).collect();
available.sort_by_key(|(id, _)| *id);
let mut mapping = Vec::with_capacity(dimensions.len());
for dim in dimensions {
let pos = available.iter().position(|(_, d)| **d == dim.distribution);
if let Some(pos) = pos {
mapping.push(Some(available.remove(pos).0));
} else {
mapping.push(None);
}
}
mapping
}
#[allow(clippy::cast_precision_loss)]
fn build_training_data(
history: &[CompletedTrial],
dimensions: &[DimensionInfo],
) -> (Vec<Vec<f64>>, Vec<f64>) {
if history.is_empty() {
return (Vec::new(), Vec::new());
}
let start = history.len().saturating_sub(MAX_TRAIN_POINTS);
let recent = &history[start..];
let param_mapping = establish_param_mapping(&recent[0], dimensions);
let continuous_indices: Vec<usize> = dimensions
.iter()
.enumerate()
.filter(|(_, d)| d.is_continuous)
.map(|(i, _)| i)
.collect();
let mut x_train = Vec::with_capacity(recent.len());
let mut y_train = Vec::with_capacity(recent.len());
for trial in recent {
let mut x_row = Vec::with_capacity(continuous_indices.len());
let mut valid = true;
for &dim_idx in &continuous_indices {
let dim_info = &dimensions[dim_idx];
if let Some(param_id) = param_mapping[dim_idx] {
if let Some(param_val) = trial.params.get(¶m_id) {
let internal = to_internal(param_val, &dim_info.distribution);
let (lo, hi) = dim_info.bounds.unwrap_or((0.0, 1.0));
x_row.push(to_normalized(internal, lo, hi));
} else {
valid = false;
break;
}
} else {
valid = false;
break;
}
}
if valid && x_row.len() == continuous_indices.len() {
x_train.push(x_row);
y_train.push(trial.value);
}
}
(x_train, y_train)
}
impl Sampler for GpSampler {
#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
fn sample(
&self,
distribution: &Distribution,
trial_id: u64,
history: &[CompletedTrial],
) -> ParamValue {
let mut state = self.state.lock();
match &state.phase {
GpPhase::Discovery => sample_discovery(&mut state, distribution, trial_id),
GpPhase::Active => sample_active(&mut state, distribution, trial_id, history),
}
}
}
fn sample_discovery(state: &mut GpState, distribution: &Distribution, trial_id: u64) -> ParamValue {
if let Some(prev_id) = state.discovery_trial_id
&& trial_id != prev_id
{
finalize_discovery(state);
return sample_active(state, distribution, trial_id, &[]);
}
state.discovery_trial_id = Some(trial_id);
let is_continuous = !matches!(distribution, Distribution::Categorical(_));
let bounds = internal_bounds(distribution);
state.dimensions.push(DimensionInfo {
distribution: distribution.clone(),
is_continuous,
bounds,
});
sample_random(&mut state.rng, distribution)
}
fn finalize_discovery(state: &mut GpState) {
state.phase = GpPhase::Active;
state.trial_progress.clear();
}
fn sample_active(
state: &mut GpState,
distribution: &Distribution,
trial_id: u64,
history: &[CompletedTrial],
) -> ParamValue {
if let Some(progress) = state.trial_progress.get_mut(&trial_id) {
let dim_idx = progress.next_dim;
progress.next_dim += 1;
if dim_idx < progress.values.len() {
return progress.values[dim_idx].clone();
}
return sample_random(&mut state.rng, distribution);
}
let n_continuous = state.dimensions.iter().filter(|d| d.is_continuous).count();
let use_gp = n_continuous > 0 && history.len() >= state.n_startup_trials;
let values = if use_gp {
compute_gp_candidate(state, history)
} else {
compute_random_candidate(state)
};
let first_value = values
.first()
.cloned()
.unwrap_or_else(|| sample_random(&mut state.rng, distribution));
state.trial_progress.insert(
trial_id,
TrialProgress {
values,
next_dim: 1,
},
);
first_value
}
fn compute_gp_candidate(state: &mut GpState, history: &[CompletedTrial]) -> Vec<ParamValue> {
let (x_train, y_train) = build_training_data(history, &state.dimensions);
let model = fit_gp(&x_train, &y_train, state.noise_variance);
let n_continuous = state.dimensions.iter().filter(|d| d.is_continuous).count();
let normalized_candidate = if let Some(ref model) = model {
optimize_acquisition(model, n_continuous, state.n_candidates, &mut state.rng)
} else {
(0..n_continuous)
.map(|_| rng_util::f64_range(&mut state.rng, 0.0, 1.0))
.collect()
};
let mut values = Vec::with_capacity(state.dimensions.len());
let mut ci = 0;
for dim in &state.dimensions {
if dim.is_continuous {
let (lo, hi) = dim.bounds.unwrap_or((0.0, 1.0));
let internal_val = from_normalized(normalized_candidate[ci], lo, hi);
values.push(from_internal(internal_val, &dim.distribution));
ci += 1;
} else {
values.push(sample_random(&mut state.rng, &dim.distribution));
}
}
values
}
fn compute_random_candidate(state: &mut GpState) -> Vec<ParamValue> {
state
.dimensions
.iter()
.map(|dim| sample_random(&mut state.rng, &dim.distribution))
.collect()
}