quantrs2_anneal/bayesian_hyperopt/
gaussian_process.rs1use super::config::{BayesianOptError, BayesianOptResult};
4
5pub type GaussianProcessConfig = GaussianProcessSurrogate;
7
8#[derive(Debug, Clone)]
10pub struct GaussianProcessSurrogate {
11 pub kernel: KernelFunction,
12 pub noise_variance: f64,
13 pub mean_function: MeanFunction,
14}
15
16impl Default for GaussianProcessSurrogate {
17 fn default() -> Self {
18 Self {
19 kernel: KernelFunction::RBF,
20 noise_variance: 1e-6,
21 mean_function: MeanFunction::Zero,
22 }
23 }
24}
25
26impl GaussianProcessSurrogate {
27 pub const fn predict(&self, _x: &[f64]) -> BayesianOptResult<(f64, f64)> {
29 Ok((0.0, 1.0))
32 }
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
37pub enum KernelFunction {
38 RBF,
40 Matern,
42 Linear,
44 Polynomial,
46 SpectralMixture,
48}
49
50#[derive(Debug, Clone, PartialEq)]
52pub enum MeanFunction {
53 Zero,
55 Constant(f64),
57 Linear,
59 Polynomial { degree: usize },
61}
62
63#[derive(Debug, Clone)]
65pub struct GPHyperparameters {
66 pub length_scales: Vec<f64>,
67 pub signal_variance: f64,
68 pub noise_variance: f64,
69 pub mean_parameters: Vec<f64>,
70}
71
72impl Default for GPHyperparameters {
73 fn default() -> Self {
74 Self {
75 length_scales: vec![1.0],
76 signal_variance: 1.0,
77 noise_variance: 1e-6,
78 mean_parameters: vec![0.0],
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85pub struct GaussianProcessModel {
86 pub x_train: Vec<Vec<f64>>,
88 pub y_train: Vec<f64>,
90 pub config: GaussianProcessConfig,
92 pub hyperparameters: GPHyperparameters,
94 pub k_inv: Option<Vec<Vec<f64>>>,
96}
97
98impl GaussianProcessModel {
99 pub fn new(
101 x_train: Vec<Vec<f64>>,
102 y_train: Vec<f64>,
103 config: GaussianProcessConfig,
104 ) -> BayesianOptResult<Self> {
105 if x_train.len() != y_train.len() {
106 return Err(BayesianOptError::GaussianProcessError(
107 "Training inputs and outputs must have same length".to_string(),
108 ));
109 }
110
111 if x_train.is_empty() {
112 return Err(BayesianOptError::GaussianProcessError(
113 "Training data cannot be empty".to_string(),
114 ));
115 }
116
117 let input_dim = x_train[0].len();
118 let hyperparameters = GPHyperparameters {
119 length_scales: vec![1.0; input_dim],
120 signal_variance: 1.0,
121 noise_variance: config.noise_variance,
122 mean_parameters: vec![0.0],
123 };
124
125 let mut model = Self {
126 x_train,
127 y_train,
128 config,
129 hyperparameters,
130 k_inv: None,
131 };
132
133 model.fit()?;
135
136 Ok(model)
137 }
138
139 pub fn fit(&mut self) -> BayesianOptResult<()> {
141 self.optimize_hyperparameters()?;
143
144 self.precompute_kernel_inverse()?;
146
147 Ok(())
148 }
149
150 fn optimize_hyperparameters(&mut self) -> BayesianOptResult<()> {
152 let n = self.x_train.len();
153 if n == 0 {
154 return Ok(());
155 }
156
157 let input_dim = self.x_train[0].len();
159
160 for dim in 0..input_dim {
162 let values: Vec<f64> = self.x_train.iter().map(|x| x[dim]).collect();
163 let min_val = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
164 let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
165 let range = (max_val - min_val).max(1e-6);
166
167 self.hyperparameters.length_scales[dim] = range / 2.0;
168 }
169
170 let mean_y = self.y_train.iter().sum::<f64>() / n as f64;
172 let var_y = self
173 .y_train
174 .iter()
175 .map(|&y| (y - mean_y).powi(2))
176 .sum::<f64>()
177 / n as f64;
178
179 self.hyperparameters.signal_variance = var_y.max(1e-6);
180
181 Ok(())
182 }
183
184 fn precompute_kernel_inverse(&mut self) -> BayesianOptResult<()> {
186 let n = self.x_train.len();
187
188 let mut k_matrix = vec![vec![0.0; n]; n];
190 for i in 0..n {
191 for j in 0..n {
192 k_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
193 if i == j {
194 k_matrix[i][j] += self.hyperparameters.noise_variance;
195 }
196 }
197 }
198
199 let k_inv = self.matrix_inverse(k_matrix)?;
201 self.k_inv = Some(k_inv);
202
203 Ok(())
204 }
205
206 fn matrix_inverse(&self, mut matrix: Vec<Vec<f64>>) -> BayesianOptResult<Vec<Vec<f64>>> {
208 let n = matrix.len();
209
210 let mut augmented = vec![vec![0.0; 2 * n]; n];
212 for i in 0..n {
213 for j in 0..n {
214 augmented[i][j] = matrix[i][j];
215 }
216 augmented[i][i + n] = 1.0;
217 }
218
219 for i in 0..n {
221 let mut max_row = i;
223 for k in (i + 1)..n {
224 if augmented[k][i].abs() > augmented[max_row][i].abs() {
225 max_row = k;
226 }
227 }
228
229 if max_row != i {
231 augmented.swap(i, max_row);
232 }
233
234 if augmented[i][i].abs() < 1e-12 {
236 return Err(BayesianOptError::GaussianProcessError(
237 "Singular kernel matrix".to_string(),
238 ));
239 }
240
241 let pivot = augmented[i][i];
243 for j in 0..(2 * n) {
244 augmented[i][j] /= pivot;
245 }
246
247 for k in 0..n {
249 if k != i {
250 let factor = augmented[k][i];
251 for j in 0..(2 * n) {
252 augmented[k][j] -= factor * augmented[i][j];
253 }
254 }
255 }
256 }
257
258 let mut inverse = vec![vec![0.0; n]; n];
260 for i in 0..n {
261 for j in 0..n {
262 inverse[i][j] = augmented[i][j + n];
263 }
264 }
265
266 Ok(inverse)
267 }
268
269 fn kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
271 match self.config.kernel {
272 KernelFunction::RBF => self.rbf_kernel(x1, x2),
273 KernelFunction::Matern => self.matern_kernel(x1, x2),
274 KernelFunction::Linear => self.linear_kernel(x1, x2),
275 KernelFunction::Polynomial => self.polynomial_kernel(x1, x2),
276 KernelFunction::SpectralMixture => self.rbf_kernel(x1, x2), }
278 }
279
280 fn rbf_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
282 let mut distance_sq = 0.0;
283 for (i, (&xi, &xj)) in x1.iter().zip(x2.iter()).enumerate() {
284 let length_scale = self.hyperparameters.length_scales.get(i).unwrap_or(&1.0);
285 distance_sq += ((xi - xj) / length_scale).powi(2);
286 }
287
288 self.hyperparameters.signal_variance * (-0.5 * distance_sq).exp()
289 }
290
291 fn matern_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
293 let mut distance = 0.0;
294 for (i, (&xi, &xj)) in x1.iter().zip(x2.iter()).enumerate() {
295 let length_scale = self.hyperparameters.length_scales.get(i).unwrap_or(&1.0);
296 distance += ((xi - xj) / length_scale).powi(2);
297 }
298 distance = distance.sqrt();
299
300 let sqrt3_r = 3.0_f64.sqrt() * distance;
301 self.hyperparameters.signal_variance * (1.0 + sqrt3_r) * (-sqrt3_r).exp()
302 }
303
304 fn linear_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
306 let dot_product: f64 = x1.iter().zip(x2.iter()).map(|(&xi, &xj)| xi * xj).sum();
307 self.hyperparameters.signal_variance * dot_product
308 }
309
310 fn polynomial_kernel(&self, x1: &[f64], x2: &[f64]) -> f64 {
312 let dot_product: f64 = x1.iter().zip(x2.iter()).map(|(&xi, &xj)| xi * xj).sum();
313 self.hyperparameters.signal_variance * (1.0 + dot_product).powi(2)
314 }
315
316 pub fn predict(&self, x: &[f64]) -> BayesianOptResult<(f64, f64)> {
318 let k_inv = self.k_inv.as_ref().ok_or_else(|| {
319 BayesianOptError::GaussianProcessError("Model not fitted".to_string())
320 })?;
321
322 let k_star: Vec<f64> = self
324 .x_train
325 .iter()
326 .map(|x_train| self.kernel(x, x_train))
327 .collect();
328
329 let mut mean = 0.0;
331 for i in 0..self.y_train.len() {
332 for j in 0..self.y_train.len() {
333 mean += k_star[i] * k_inv[i][j] * self.y_train[j];
334 }
335 }
336
337 mean += self.mean_function_value(x);
339
340 let k_star_star = self.kernel(x, x);
342 let mut variance = k_star_star;
343
344 for i in 0..k_star.len() {
345 for j in 0..k_star.len() {
346 variance -= k_star[i] * k_inv[i][j] * k_star[j];
347 }
348 }
349
350 variance = variance.max(1e-12);
352
353 Ok((mean, variance))
354 }
355
356 fn mean_function_value(&self, x: &[f64]) -> f64 {
358 match self.config.mean_function {
359 MeanFunction::Zero => 0.0,
360 MeanFunction::Constant(c) => c,
361 MeanFunction::Linear => {
362 x.iter().sum::<f64>() * self.hyperparameters.mean_parameters.get(0).unwrap_or(&0.0)
364 }
365 MeanFunction::Polynomial { degree: _ } => {
366 let x_sum = x.iter().sum::<f64>();
368 x_sum * self.hyperparameters.mean_parameters.get(0).unwrap_or(&0.0)
369 }
370 }
371 }
372
373 pub fn log_marginal_likelihood(&self) -> BayesianOptResult<f64> {
375 let k_inv = self.k_inv.as_ref().ok_or_else(|| {
376 BayesianOptError::GaussianProcessError("Model not fitted".to_string())
377 })?;
378
379 let n = self.y_train.len();
380
381 let mut quad_form = 0.0;
383 for i in 0..n {
384 for j in 0..n {
385 quad_form += self.y_train[i] * k_inv[i][j] * self.y_train[j];
386 }
387 }
388
389 let log_det = self.log_determinant()?;
391
392 let log_likelihood = (0.5 * n as f64).mul_add(
393 -(2.0 * std::f64::consts::PI).ln(),
394 (-0.5f64).mul_add(quad_form, -(0.5 * log_det)),
395 );
396
397 Ok(log_likelihood)
398 }
399
400 fn log_determinant(&self) -> BayesianOptResult<f64> {
402 let n = self.x_train.len();
404
405 let mut k_matrix = vec![vec![0.0; n]; n];
407 for i in 0..n {
408 for j in 0..n {
409 k_matrix[i][j] = self.kernel(&self.x_train[i], &self.x_train[j]);
410 if i == j {
411 k_matrix[i][j] += self.hyperparameters.noise_variance;
412 }
413 }
414 }
415
416 let mut det = 1.0;
418 for i in 0..n {
419 det *= k_matrix[i][i].max(1e-12);
420 }
421
422 Ok(det.ln())
423 }
424}