use super::config::{BayesianOptError, BayesianOptResult};
pub type GaussianProcessConfig = GaussianProcessSurrogate;
#[derive(Debug, Clone)]
pub struct GaussianProcessSurrogate {
pub kernel: KernelFunction,
pub noise_variance: f64,
pub mean_function: MeanFunction,
}
impl Default for GaussianProcessSurrogate {
fn default() -> Self {
Self {
kernel: KernelFunction::RBF,
noise_variance: 1e-6,
mean_function: MeanFunction::Zero,
}
}
}
impl GaussianProcessSurrogate {
pub const fn predict(&self, _x: &[f64]) -> BayesianOptResult<(f64, f64)> {
Ok((0.0, 1.0))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum KernelFunction {
RBF,
Matern,
Linear,
Polynomial,
SpectralMixture,
}
#[derive(Debug, Clone, PartialEq)]
pub enum MeanFunction {
Zero,
Constant(f64),
Linear,
Polynomial { degree: usize },
}
#[derive(Debug, Clone)]
pub struct GPHyperparameters {
pub length_scales: Vec<f64>,
pub signal_variance: f64,
pub noise_variance: f64,
pub mean_parameters: Vec<f64>,
}
impl Default for GPHyperparameters {
fn default() -> Self {
Self {
length_scales: vec![1.0],
signal_variance: 1.0,
noise_variance: 1e-6,
mean_parameters: vec![0.0],
}
}
}
#[derive(Debug, Clone)]
pub struct GaussianProcessModel {
pub x_train: Vec<Vec<f64>>,
pub y_train: Vec<f64>,
pub config: GaussianProcessConfig,
pub hyperparameters: GPHyperparameters,
pub k_inv: Option<Vec<Vec<f64>>>,
}
impl GaussianProcessModel {
pub fn new(
x_train: Vec<Vec<f64>>,
y_train: Vec<f64>,
config: GaussianProcessConfig,
) -> BayesianOptResult<Self> {
if x_train.len() != y_train.len() {
return Err(BayesianOptError::GaussianProcessError(
"Training inputs and outputs must have same length".to_string(),
));
}
if x_train.is_empty() {
return Err(BayesianOptError::GaussianProcessError(
"Training data cannot be empty".to_string(),
));
}
let input_dim = x_train[0].len();
let hyperparameters = GPHyperparameters {
length_scales: vec![1.0; input_dim],
signal_variance: 1.0,
noise_variance: config.noise_variance,
mean_parameters: vec![0.0],
};
let mut model = Self {
x_train,
y_train,
config,
hyperparameters,
k_inv: None,
};
model.fit()?;
Ok(model)
}
pub fn fit(&mut self) -> BayesianOptResult<()> {
self.optimize_hyperparameters()?;
self.precompute_kernel_inverse()?;
Ok(())
}
fn optimize_hyperparameters(&mut self) -> BayesianOptResult<()> {
let n = self.x_train.len();
if n == 0 {
return Ok(());
}
let input_dim = self.x_train[0].len();
for dim in 0..input_dim {
let values: Vec<f64> = self.x_train.iter().map(|x| x[dim]).collect();
let min_val = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
let range = (max_val - min_val).max(1e-6);
self.hyperparameters.length_scales[dim] = range / 2.0;
}
let mean_y = self.y_train.iter().sum::<f64>() / n as f64;
let var_y = self
.y_train
.iter()
.map(|&y| (y - mean_y).powi(2))
.sum::<f64>()
/ n as f64;
self.hyperparameters.signal_variance = var_y.max(1e-6);
Ok(())
}
fn precompute_kernel_inverse(&mut self) -> BayesianOptResult<()> {
let n = self.x_train.len();
let mut k_matrix = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..n {
k_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
if i == j {
k_matrix[i][j] += self.hyperparameters.noise_variance;
}
}
}
let k_inv = self.matrix_inverse(k_matrix)?;
self.k_inv = Some(k_inv);
Ok(())
}
fn matrix_inverse(&self, mut matrix: Vec<Vec<f64>>) -> BayesianOptResult<Vec<Vec<f64>>> {
let n = matrix.len();
let mut augmented = vec![vec![0.0; 2 * n]; n];
for i in 0..n {
for j in 0..n {
augmented[i][j] = matrix[i][j];
}
augmented[i][i + n] = 1.0;
}
for i in 0..n {
let mut max_row = i;
for k in (i + 1)..n {
if augmented[k][i].abs() > augmented[max_row][i].abs() {
max_row = k;
}
}
if max_row != i {
augmented.swap(i, max_row);
}
if augmented[i][i].abs() < 1e-12 {
return Err(BayesianOptError::GaussianProcessError(
"Singular kernel matrix".to_string(),
));
}
let pivot = augmented[i][i];
for j in 0..(2 * n) {
augmented[i][j] /= pivot;
}
for k in 0..n {
if k != i {
let factor = augmented[k][i];
for j in 0..(2 * n) {
augmented[k][j] -= factor * augmented[i][j];
}
}
}
}
let mut inverse = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..n {
inverse[i][j] = augmented[i][j + n];
}
}
Ok(inverse)
}
fn kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
match self.config.kernel {
KernelFunction::RBF => self.rbf_kernel(x1, x2),
KernelFunction::Matern => self.matern_kernel(x1, x2),
KernelFunction::Linear => self.linear_kernel(x1, x2),
KernelFunction::Polynomial => self.polynomial_kernel(x1, x2),
KernelFunction::SpectralMixture => self.rbf_kernel(x1, x2), }
}
fn rbf_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
let mut distance_sq = 0.0;
for (i, (&xi, &xj)) in x1.iter().zip(x2.iter()).enumerate() {
let length_scale = self.hyperparameters.length_scales.get(i).unwrap_or(&1.0);
distance_sq += ((xi - xj) / length_scale).powi(2);
}
self.hyperparameters.signal_variance * (-0.5 * distance_sq).exp()
}
fn matern_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
let mut distance = 0.0;
for (i, (&xi, &xj)) in x1.iter().zip(x2.iter()).enumerate() {
let length_scale = self.hyperparameters.length_scales.get(i).unwrap_or(&1.0);
distance += ((xi - xj) / length_scale).powi(2);
}
distance = distance.sqrt();
let sqrt3_r = 3.0_f64.sqrt() * distance;
self.hyperparameters.signal_variance * (1.0 + sqrt3_r) * (-sqrt3_r).exp()
}
fn linear_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
let dot_product: f64 = x1.iter().zip(x2.iter()).map(|(&xi, &xj)| xi * xj).sum();
self.hyperparameters.signal_variance * dot_product
}
fn polynomial_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
let dot_product: f64 = x1.iter().zip(x2.iter()).map(|(&xi, &xj)| xi * xj).sum();
self.hyperparameters.signal_variance * (1.0 + dot_product).powi(2)
}
pub fn predict(&self, x: &[f64]) -> BayesianOptResult<(f64, f64)> {
let k_inv = self.k_inv.as_ref().ok_or_else(|| {
BayesianOptError::GaussianProcessError("Model not fitted".to_string())
})?;
let k_star: Vec<f64> = self
.x_train
.iter()
.map(|x_train| self.kernel(x, x_train))
.collect();
let mut mean = 0.0;
for i in 0..self.y_train.len() {
for j in 0..self.y_train.len() {
mean += k_star[i] * k_inv[i][j] * self.y_train[j];
}
}
mean += self.mean_function_value(x);
let k_star_star = self.kernel(x, x);
let mut variance = k_star_star;
for i in 0..k_star.len() {
for j in 0..k_star.len() {
variance -= k_star[i] * k_inv[i][j] * k_star[j];
}
}
variance = variance.max(1e-12);
Ok((mean, variance))
}
fn mean_function_value(&self, x: &[f64]) -> f64 {
match self.config.mean_function {
MeanFunction::Zero => 0.0,
MeanFunction::Constant(c) => c,
MeanFunction::Linear => {
x.iter().sum::<f64>() * self.hyperparameters.mean_parameters.get(0).unwrap_or(&0.0)
}
MeanFunction::Polynomial { degree: _ } => {
let x_sum = x.iter().sum::<f64>();
x_sum * self.hyperparameters.mean_parameters.get(0).unwrap_or(&0.0)
}
}
}
pub fn log_marginal_likelihood(&self) -> BayesianOptResult<f64> {
let k_inv = self.k_inv.as_ref().ok_or_else(|| {
BayesianOptError::GaussianProcessError("Model not fitted".to_string())
})?;
let n = self.y_train.len();
let mut quad_form = 0.0;
for i in 0..n {
for j in 0..n {
quad_form += self.y_train[i] * k_inv[i][j] * self.y_train[j];
}
}
let log_det = self.log_determinant()?;
let log_likelihood = (0.5 * n as f64).mul_add(
-(2.0 * std::f64::consts::PI).ln(),
(-0.5f64).mul_add(quad_form, -(0.5 * log_det)),
);
Ok(log_likelihood)
}
fn log_determinant(&self) -> BayesianOptResult<f64> {
let n = self.x_train.len();
let mut k_matrix = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..n {
k_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
if i == j {
k_matrix[i][j] += self.hyperparameters.noise_variance;
}
}
}
let mut det = 1.0;
for i in 0..n {
det *= k_matrix[i][i].max(1e-12);
}
Ok(det.ln())
}
}