use super::linalg::{cholesky_jitter, solve_lower, solve_lower_matrix, solve_upper};
use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{Array1, Array2};
#[derive(Debug, Clone)]
pub struct DeepGPLayerConfig {
pub num_inducing: usize,
pub out_dim: usize,
pub length_scale: f64,
pub variance: f64,
pub noise: f64,
}
impl Default for DeepGPLayerConfig {
fn default() -> Self {
Self {
num_inducing: 20,
out_dim: 1,
length_scale: 1.0,
variance: 1.0,
noise: 0.01,
}
}
}
#[derive(Debug, Clone)]
struct DeepGPLayer {
config: DeepGPLayerConfig,
inducing_mean: Array2<f64>,
inducing_inputs: Array2<f64>,
chol_s: Array2<f64>,
}
impl DeepGPLayer {
fn new(config: DeepGPLayerConfig, _in_dim: usize, inducing_inputs: Array2<f64>) -> Self {
let m = inducing_inputs.nrows();
let chol_s = Array2::<f64>::eye(m) * config.noise.sqrt();
let inducing_mean = Array2::<f64>::zeros((m, config.out_dim));
Self { config, inducing_mean, inducing_inputs, chol_s }
}
fn rbf(&self, x1: &[f64], x2: &[f64]) -> f64 {
let ls = self.config.length_scale;
let var_k = self.config.variance;
let sq: f64 = x1.iter().zip(x2).map(|(&a, &b)| (a - b).powi(2) / (ls * ls)).sum();
var_k * (-0.5 * sq).exp()
}
fn propagate(
&self,
h_mean: &Array2<f64>,
h_var: &Array2<f64>,
) -> StatsResult<(Array2<f64>, Array2<f64>)> {
let n = h_mean.nrows();
let out_d = self.config.out_dim;
let m = self.inducing_inputs.nrows();
let in_d = h_mean.ncols();
let ls = self.config.length_scale;
let var_k = self.config.variance;
let mut k_uu = Array2::<f64>::zeros((m, m));
for i in 0..m {
let xi: Vec<f64> = self.inducing_inputs.row(i).iter().copied().collect();
for j in 0..m {
let xj: Vec<f64> = self.inducing_inputs.row(j).iter().copied().collect();
k_uu[[i, j]] = self.rbf(&xi, &xj);
}
k_uu[[i, i]] += 1e-6;
}
let l_uu = cholesky_jitter(&k_uu)?;
let mut k_fu = Array2::<f64>::zeros((n, m));
for i in 0..n {
let xi: Vec<f64> = h_mean.row(i).iter().copied().collect();
let var_i: Vec<f64> = h_var.row(i).iter().copied().collect();
let correction: f64 = (0..in_d.min(var_i.len()))
.map(|d_| -0.5 * var_i[d_] / (ls * ls))
.sum::<f64>()
.exp();
for j in 0..m {
let zj: Vec<f64> = self.inducing_inputs.row(j).iter().copied().collect();
let sq: f64 = xi.iter().zip(zj.iter())
.map(|(&a, &b)| (a - b).powi(2) / (ls * ls))
.sum();
k_fu[[i, j]] = var_k * (-0.5 * sq).exp() * correction;
}
}
let kinv_m = {
let tmp = solve_lower_matrix(&l_uu, &self.inducing_mean)?;
solve_lower_matrix(&l_uu.t().to_owned(), &tmp)?
};
let mean_out = k_fu.dot(&kinv_m);
let q_fu = solve_lower_matrix(&l_uu, &k_fu.t().to_owned())?;
let l_inv_chol_s = solve_lower_matrix(&l_uu, &self.chol_s)?;
let mut var_out = Array2::<f64>::zeros((n, out_d));
for i in 0..n {
let var_i: Vec<f64> = h_var.row(i).iter().copied().collect();
let correction: f64 = (0..in_d.min(var_i.len()))
.map(|d_| -0.5 * var_i[d_] / (ls * ls))
.sum::<f64>()
.exp();
let k_self = var_k * correction;
let q_sq: f64 = q_fu.column(i).iter().map(|&v| v * v).sum();
let s_sq: f64 = l_inv_chol_s.t().dot(&q_fu.column(i).to_owned())
.iter()
.map(|&v| v * v)
.sum();
let v = (k_self - q_sq + s_sq + self.config.noise).max(1e-10);
for od in 0..out_d {
var_out[[i, od]] = v;
}
}
Ok((mean_out, var_out))
}
}
#[derive(Debug, Clone)]
pub struct DeepGP {
layers: Vec<DeepGPLayer>,
input_dim: usize,
}
impl DeepGP {
pub fn new(
layer_configs: Vec<DeepGPLayerConfig>,
x_init: &Array2<f64>,
) -> StatsResult<Self> {
if layer_configs.is_empty() {
return Err(StatsError::InvalidArgument(
"DeepGP requires at least one layer".into(),
));
}
let input_dim = x_init.ncols();
let mut layers = Vec::with_capacity(layer_configs.len());
let mut cur_in_dim = input_dim;
for config in layer_configs {
let m = config.num_inducing.min(x_init.nrows());
let inducing = if cur_in_dim <= x_init.ncols() {
let cols = cur_in_dim;
let mut ind = Array2::<f64>::zeros((m, cols));
for i in 0..m {
let row_idx = i * x_init.nrows() / m;
for d in 0..cols {
ind[[i, d]] = x_init[[row_idx, d]];
}
}
ind
} else {
Array2::<f64>::zeros((m, cur_in_dim))
};
let next_dim = config.out_dim;
let layer = DeepGPLayer::new(config, cur_in_dim, inducing);
layers.push(layer);
cur_in_dim = next_dim;
}
Ok(Self { layers, input_dim })
}
pub fn predict(
&self,
x_test: &Array2<f64>,
) -> StatsResult<(Array2<f64>, Array2<f64>)> {
if self.layers.is_empty() {
return Err(StatsError::InvalidArgument("DeepGP has no layers".into()));
}
let n = x_test.nrows();
let mut h_mean = x_test.clone();
let mut h_var = Array2::<f64>::zeros((n, self.input_dim));
for layer in &self.layers {
let in_d = h_mean.ncols();
if h_var.ncols() != in_d {
h_var = Array2::<f64>::zeros((n, in_d));
}
let (new_mean, new_var) = layer.propagate(&h_mean, &h_var)?;
h_mean = new_mean;
h_var = new_var;
}
Ok((h_mean, h_var))
}
pub fn n_layers(&self) -> usize {
self.layers.len()
}
}