mod advi;
mod families;
mod svi;
pub use advi::*;
pub use families::*;
pub use svi::*;
use crate::error::{StatsError, StatsResult as Result};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis};
use scirs2_core::validation::*;
use statrs::statistics::Statistics;
use std::f64::consts::PI;
#[derive(Debug, Clone)]
pub struct VariationalDiagnostics {
pub elbo_trace: Vec<f64>,
pub gradient_norms: Vec<f64>,
pub param_change_norms: Vec<f64>,
pub converged: bool,
pub n_iterations: usize,
pub final_elbo: f64,
}
impl VariationalDiagnostics {
pub fn new() -> Self {
Self {
elbo_trace: Vec::new(),
gradient_norms: Vec::new(),
param_change_norms: Vec::new(),
converged: false,
n_iterations: 0,
final_elbo: f64::NEG_INFINITY,
}
}
pub fn record_elbo(&mut self, elbo: f64) {
self.elbo_trace.push(elbo);
self.final_elbo = elbo;
self.n_iterations = self.elbo_trace.len();
}
pub fn record_gradient_norm(&mut self, norm: f64) {
self.gradient_norms.push(norm);
}
pub fn record_param_change(&mut self, norm: f64) {
self.param_change_norms.push(norm);
}
pub fn check_elbo_convergence(&self, tol: f64) -> bool {
if self.elbo_trace.len() < 2 {
return false;
}
let n = self.elbo_trace.len();
(self.elbo_trace[n - 1] - self.elbo_trace[n - 2]).abs() < tol
}
pub fn check_gradient_convergence(&self, tol: f64) -> bool {
if let Some(&last_norm) = self.gradient_norms.last() {
last_norm < tol
} else {
false
}
}
pub fn check_param_convergence(&self, tol: f64) -> bool {
if let Some(&last_change) = self.param_change_norms.last() {
last_change < tol
} else {
false
}
}
pub fn relative_elbo_change(&self, window: usize) -> Option<f64> {
let n = self.elbo_trace.len();
if n < window + 1 {
return None;
}
let recent = self.elbo_trace[n - 1];
let earlier = self.elbo_trace[n - 1 - window];
if earlier.abs() < 1e-15 {
return Some(f64::INFINITY);
}
Some((recent - earlier).abs() / earlier.abs())
}
pub fn elbo_summary(&self) -> ElboSummary {
let n = self.elbo_trace.len();
if n == 0 {
return ElboSummary {
min: f64::NAN,
max: f64::NAN,
final_value: f64::NAN,
mean_change: f64::NAN,
monotonic: true,
};
}
let min = self
.elbo_trace
.iter()
.copied()
.fold(f64::INFINITY, f64::min);
let max = self
.elbo_trace
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max);
let mut monotonic = true;
let mut total_change = 0.0;
for i in 1..n {
let change = self.elbo_trace[i] - self.elbo_trace[i - 1];
total_change += change.abs();
if change < -1e-10 {
monotonic = false;
}
}
let mean_change = if n > 1 {
total_change / (n - 1) as f64
} else {
0.0
};
ElboSummary {
min,
max,
final_value: self.elbo_trace[n - 1],
mean_change,
monotonic,
}
}
}
impl Default for VariationalDiagnostics {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct ElboSummary {
pub min: f64,
pub max: f64,
pub final_value: f64,
pub mean_change: f64,
pub monotonic: bool,
}
#[derive(Debug, Clone)]
pub struct MeanFieldGaussian {
pub means: Array1<f64>,
pub log_stds: Array1<f64>,
pub dim: usize,
}
impl MeanFieldGaussian {
pub fn new(dim: usize) -> Result<Self> {
check_positive(dim, "dim")?;
Ok(Self {
means: Array1::zeros(dim),
log_stds: Array1::zeros(dim), dim,
})
}
pub fn from_params(means: Array1<f64>, log_stds: Array1<f64>) -> Result<Self> {
if means.len() != log_stds.len() {
return Err(StatsError::DimensionMismatch(format!(
"means length ({}) must match log_stds length ({})",
means.len(),
log_stds.len()
)));
}
checkarray_finite(&means, "means")?;
checkarray_finite(&log_stds, "log_stds")?;
let dim = means.len();
Ok(Self {
means,
log_stds,
dim,
})
}
pub fn stds(&self) -> Array1<f64> {
self.log_stds.mapv(f64::exp)
}
pub fn variances(&self) -> Array1<f64> {
self.log_stds.mapv(|ls| (2.0 * ls).exp())
}
pub fn sample(&self, epsilon: &Array1<f64>) -> Result<Array1<f64>> {
if epsilon.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"epsilon length ({}) must match dimension ({})",
epsilon.len(),
self.dim
)));
}
let stds = self.stds();
Ok(&self.means + &(&stds * epsilon))
}
pub fn entropy(&self) -> f64 {
let base = 0.5 * (1.0 + (2.0 * PI).ln());
self.log_stds.iter().map(|&ls| base + ls).sum::<f64>()
}
pub fn log_prob(&self, z: &Array1<f64>) -> Result<f64> {
if z.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"z length ({}) must match dimension ({})",
z.len(),
self.dim
)));
}
let stds = self.stds();
let mut log_prob = 0.0;
for i in 0..self.dim {
let diff = z[i] - self.means[i];
log_prob += -0.5 * (2.0 * PI).ln() - self.log_stds[i] - 0.5 * (diff / stds[i]).powi(2);
}
Ok(log_prob)
}
pub fn n_params(&self) -> usize {
2 * self.dim
}
pub fn get_params(&self) -> Array1<f64> {
let mut params = Array1::zeros(2 * self.dim);
for i in 0..self.dim {
params[i] = self.means[i];
params[self.dim + i] = self.log_stds[i];
}
params
}
pub fn set_params(&mut self, params: &Array1<f64>) -> Result<()> {
if params.len() != 2 * self.dim {
return Err(StatsError::DimensionMismatch(format!(
"params length ({}) must be 2 * dim ({})",
params.len(),
2 * self.dim
)));
}
for i in 0..self.dim {
self.means[i] = params[i];
self.log_stds[i] = params[self.dim + i];
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct FullRankGaussian {
pub mean: Array1<f64>,
pub chol_factor: Array2<f64>,
pub dim: usize,
}
impl FullRankGaussian {
pub fn new(dim: usize) -> Result<Self> {
check_positive(dim, "dim")?;
Ok(Self {
mean: Array1::zeros(dim),
chol_factor: Array2::eye(dim), dim,
})
}
pub fn from_params(mean: Array1<f64>, chol_factor: Array2<f64>) -> Result<Self> {
let dim = mean.len();
if chol_factor.nrows() != dim || chol_factor.ncols() != dim {
return Err(StatsError::DimensionMismatch(format!(
"chol_factor shape ({},{}) must be ({},{})",
chol_factor.nrows(),
chol_factor.ncols(),
dim,
dim
)));
}
checkarray_finite(&mean, "mean")?;
checkarray_finite(&chol_factor, "chol_factor")?;
Ok(Self {
mean,
chol_factor,
dim,
})
}
pub fn covariance(&self) -> Array2<f64> {
self.chol_factor.dot(&self.chol_factor.t())
}
pub fn precision(&self) -> Result<Array2<f64>> {
let cov = self.covariance();
scirs2_linalg::inv(&cov.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to invert covariance: {}", e))
})
}
pub fn sample(&self, epsilon: &Array1<f64>) -> Result<Array1<f64>> {
if epsilon.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"epsilon length ({}) must match dimension ({})",
epsilon.len(),
self.dim
)));
}
Ok(&self.mean + &self.chol_factor.dot(epsilon))
}
pub fn entropy(&self) -> f64 {
let base = 0.5 * self.dim as f64 * (1.0 + (2.0 * PI).ln());
let log_det: f64 = (0..self.dim)
.map(|i| self.chol_factor[[i, i]].abs().ln())
.sum();
base + log_det
}
pub fn log_prob(&self, z: &Array1<f64>) -> Result<f64> {
if z.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"z length ({}) must match dimension ({})",
z.len(),
self.dim
)));
}
let precision = self.precision()?;
let diff = z - &self.mean;
let mahal = diff.dot(&precision.dot(&diff));
let log_det: f64 = (0..self.dim)
.map(|i| self.chol_factor[[i, i]].abs().ln())
.sum();
Ok(-0.5 * self.dim as f64 * (2.0 * PI).ln() - log_det - 0.5 * mahal)
}
pub fn n_params(&self) -> usize {
self.dim + self.dim * (self.dim + 1) / 2
}
pub fn get_params(&self) -> Array1<f64> {
let n_tril = self.dim * (self.dim + 1) / 2;
let mut params = Array1::zeros(self.dim + n_tril);
for i in 0..self.dim {
params[i] = self.mean[i];
}
let mut idx = self.dim;
for i in 0..self.dim {
for j in 0..=i {
params[idx] = self.chol_factor[[i, j]];
idx += 1;
}
}
params
}
pub fn set_params(&mut self, params: &Array1<f64>) -> Result<()> {
let n_tril = self.dim * (self.dim + 1) / 2;
let expected = self.dim + n_tril;
if params.len() != expected {
return Err(StatsError::DimensionMismatch(format!(
"params length ({}) must be {}",
params.len(),
expected
)));
}
for i in 0..self.dim {
self.mean[i] = params[i];
}
let mut idx = self.dim;
self.chol_factor = Array2::zeros((self.dim, self.dim));
for i in 0..self.dim {
for j in 0..=i {
self.chol_factor[[i, j]] = params[idx];
idx += 1;
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct NormalizingFlowVI {
pub base: MeanFieldGaussian,
pub flows: Vec<FlowLayer>,
pub dim: usize,
}
#[derive(Debug, Clone)]
pub enum FlowLayer {
Planar {
u: Array1<f64>,
w: Array1<f64>,
b: f64,
},
Radial {
z0: Array1<f64>,
log_alpha: f64,
beta: f64,
},
}
impl NormalizingFlowVI {
pub fn new(dim: usize, n_flows: usize) -> Result<Self> {
check_positive(dim, "dim")?;
let base = MeanFieldGaussian::new(dim)?;
let mut flows = Vec::with_capacity(n_flows);
for _ in 0..n_flows {
let u = Array1::from_elem(dim, 0.01);
let w = Array1::from_elem(dim, 0.01);
flows.push(FlowLayer::Planar { u, w, b: 0.0 });
}
Ok(Self { base, flows, dim })
}
pub fn add_planar_flow(&mut self, u: Array1<f64>, w: Array1<f64>, b: f64) -> Result<()> {
if u.len() != self.dim || w.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"u ({}) and w ({}) must have dimension {}",
u.len(),
w.len(),
self.dim
)));
}
self.flows.push(FlowLayer::Planar { u, w, b });
Ok(())
}
pub fn add_radial_flow(&mut self, z0: Array1<f64>, log_alpha: f64, beta: f64) -> Result<()> {
if z0.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"z0 ({}) must have dimension {}",
z0.len(),
self.dim
)));
}
self.flows.push(FlowLayer::Radial {
z0,
log_alpha,
beta,
});
Ok(())
}
pub fn transform(&self, z0: &Array1<f64>) -> Result<(Array1<f64>, f64)> {
if z0.len() != self.dim {
return Err(StatsError::DimensionMismatch(format!(
"z0 length ({}) must match dimension ({})",
z0.len(),
self.dim
)));
}
let mut z = z0.clone();
let mut sum_log_det_jac = 0.0;
for flow in &self.flows {
let (z_new, log_det) = apply_flow_layer(flow, &z)?;
z = z_new;
sum_log_det_jac += log_det;
}
Ok((z, sum_log_det_jac))
}
pub fn sample(&self, epsilon: &Array1<f64>) -> Result<(Array1<f64>, f64)> {
let z0 = self.base.sample(epsilon)?;
let (z_k, sum_log_det) = self.transform(&z0)?;
let log_q0 = self.base.log_prob(&z0)?;
let log_q_k = log_q0 - sum_log_det;
Ok((z_k, log_q_k))
}
pub fn n_flow_params(&self) -> usize {
self.flows
.iter()
.map(|f| match f {
FlowLayer::Planar { u, w, .. } => u.len() + w.len() + 1,
FlowLayer::Radial { z0, .. } => z0.len() + 2,
})
.sum()
}
}
fn apply_flow_layer(flow: &FlowLayer, z: &Array1<f64>) -> Result<(Array1<f64>, f64)> {
match flow {
FlowLayer::Planar { u, w, b } => {
let activation = w.dot(z) + b;
let tanh_val = activation.tanh();
let z_new = z + &(u * tanh_val);
let dtanh = 1.0 - tanh_val * tanh_val;
let psi = w * dtanh;
let det = 1.0 + u.dot(&psi);
let log_det = det.abs().ln();
Ok((z_new, log_det))
}
FlowLayer::Radial {
z0,
log_alpha,
beta,
} => {
let alpha = log_alpha.exp();
let diff = z - z0;
let r = diff.dot(&diff).sqrt();
let h = 1.0 / (alpha + r);
let z_new = z + &(&diff * (*beta * h));
let d = z.len() as f64;
let h_prime = -1.0 / ((alpha + r) * (alpha + r));
let term1 = (1.0 + beta * h).powi(d as i32 - 1);
let term2 = 1.0 + beta * h + beta * h_prime * r;
let det = term1 * term2;
let log_det = det.abs().ln();
Ok((z_new, log_det))
}
}
}
#[derive(Debug, Clone)]
pub struct VariationalBayesianRegression {
pub mean_beta: Array1<f64>,
pub cov_beta: Array2<f64>,
pub shape_tau: f64,
pub rate_tau: f64,
pub prior_mean_beta: Array1<f64>,
pub prior_cov_beta: Array2<f64>,
pub priorshape_tau: f64,
pub prior_rate_tau: f64,
pub n_features: usize,
pub fit_intercept: bool,
}
impl VariationalBayesianRegression {
pub fn new(n_features: usize, fit_intercept: bool) -> Result<Self> {
check_positive(n_features, "n_features")?;
let prior_mean_beta = Array1::zeros(n_features);
let prior_cov_beta = Array2::eye(n_features) * 100.0; let priorshape_tau = 1e-3;
let prior_rate_tau = 1e-3;
Ok(Self {
mean_beta: prior_mean_beta.clone(),
cov_beta: prior_cov_beta.clone(),
shape_tau: priorshape_tau,
rate_tau: prior_rate_tau,
prior_mean_beta,
prior_cov_beta,
priorshape_tau,
prior_rate_tau,
n_features,
fit_intercept,
})
}
pub fn with_priors(
mut self,
prior_mean_beta: Array1<f64>,
prior_cov_beta: Array2<f64>,
priorshape_tau: f64,
prior_rate_tau: f64,
) -> Result<Self> {
checkarray_finite(&prior_mean_beta, "prior_mean_beta")?;
checkarray_finite(&prior_cov_beta, "prior_cov_beta")?;
check_positive(priorshape_tau, "priorshape_tau")?;
check_positive(prior_rate_tau, "prior_rate_tau")?;
self.prior_mean_beta = prior_mean_beta.clone();
self.prior_cov_beta = prior_cov_beta.clone();
self.priorshape_tau = priorshape_tau;
self.prior_rate_tau = prior_rate_tau;
self.mean_beta = prior_mean_beta;
self.cov_beta = prior_cov_beta;
self.shape_tau = priorshape_tau;
self.rate_tau = prior_rate_tau;
Ok(self)
}
pub fn fit(
&mut self,
x: ArrayView2<f64>,
y: ArrayView1<f64>,
max_iter: usize,
tol: f64,
) -> Result<VariationalRegressionResult> {
checkarray_finite(&x, "x")?;
checkarray_finite(&y, "y")?;
check_positive(max_iter, "max_iter")?;
check_positive(tol, "tol")?;
let (n_samples_, n_features) = x.dim();
if y.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(format!(
"y length ({}) must match x rows ({})",
y.len(),
n_samples_
)));
}
if n_features != self.n_features {
return Err(StatsError::DimensionMismatch(format!(
"x features ({}) must match model features ({})",
n_features, self.n_features
)));
}
let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
let x_mean = x.mean_axis(Axis(0)).expect("Operation failed");
let y_mean = y.mean();
let mut x_centered = x.to_owned();
for mut row in x_centered.rows_mut() {
row -= &x_mean;
}
let y_centered = &y.to_owned() - y_mean;
(x_centered, y_centered, Some(x_mean), Some(y_mean))
} else {
(x.to_owned(), y.to_owned(), None, None)
};
let xtx = x_centered.t().dot(&x_centered);
let xty = x_centered.t().dot(&y_centered);
let yty = y_centered.dot(&y_centered);
let prior_precision =
scirs2_linalg::inv(&self.prior_cov_beta.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to invert prior covariance: {}", e))
})?;
let mut prev_elbo = f64::NEG_INFINITY;
let mut elbo_history = Vec::new();
for _iter in 0..max_iter {
self.update_beta_variational(&xtx, &xty, &prior_precision)?;
self.update_tau_variational(n_samples_ as f64, &xtx, yty)?;
let elbo = self.compute_elbo(n_samples_ as f64, &xtx, &xty, yty, &prior_precision)?;
elbo_history.push(elbo);
if _iter > 0 && (elbo - prev_elbo).abs() < tol {
break;
}
prev_elbo = elbo;
}
Ok(VariationalRegressionResult {
mean_beta: self.mean_beta.clone(),
cov_beta: self.cov_beta.clone(),
shape_tau: self.shape_tau,
rate_tau: self.rate_tau,
elbo: prev_elbo,
elbo_history: elbo_history.clone(),
n_samples_,
n_features: self.n_features,
x_mean,
y_mean,
converged: elbo_history.len() < max_iter,
})
}
fn update_beta_variational(
&mut self,
xtx: &Array2<f64>,
xty: &Array1<f64>,
prior_precision: &Array2<f64>,
) -> Result<()> {
let expected_tau = self.shape_tau / self.rate_tau;
let precision_beta = prior_precision + &(xtx * expected_tau);
self.cov_beta = scirs2_linalg::inv(&precision_beta.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to invert precision: {}", e))
})?;
let prior_contrib = prior_precision.dot(&self.prior_mean_beta);
let data_contrib = xty * expected_tau;
self.mean_beta = self.cov_beta.dot(&(prior_contrib + data_contrib));
Ok(())
}
fn update_tau_variational(
&mut self,
n_samples_: f64,
xtx: &Array2<f64>,
yty: f64,
) -> Result<()> {
self.shape_tau = self.priorshape_tau + n_samples_ / 2.0;
let expected_beta_outer = &self.cov_beta + outer_product(&self.mean_beta);
let trace_term = (xtx * &expected_beta_outer).sum();
let quadratic_term = 2.0 * self.mean_beta.dot(&xtx.dot(&self.mean_beta));
self.rate_tau = self.prior_rate_tau + 0.5 * (yty - quadratic_term + trace_term);
Ok(())
}
fn compute_elbo(
&self,
n_samples_: f64,
xtx: &Array2<f64>,
xty: &Array1<f64>,
yty: f64,
prior_precision: &Array2<f64>,
) -> Result<f64> {
let expected_tau = self.shape_tau / self.rate_tau;
let expected_log_tau = digamma(self.shape_tau) - self.rate_tau.ln();
let diff =
yty - 2.0 * self.mean_beta.dot(xty) + self.mean_beta.dot(&xtx.dot(&self.mean_beta));
let trace_term = (xtx * &self.cov_beta).sum();
let likelihood_term = 0.5 * n_samples_ * expected_log_tau
- 0.5 * n_samples_ * (2.0_f64 * PI).ln()
- 0.5 * expected_tau * (diff + trace_term);
let beta_diff = &self.mean_beta - &self.prior_mean_beta;
let beta_quad = beta_diff.dot(&prior_precision.dot(&beta_diff));
let beta_trace = (prior_precision * &self.cov_beta).sum();
let prior_det = scirs2_linalg::det(&prior_precision.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to compute determinant: {}", e))
})?;
let beta_prior_term = 0.5 * prior_det.ln()
- 0.5 * self.n_features as f64 * (2.0_f64 * PI).ln()
- 0.5 * (beta_quad + beta_trace);
let tau_prior_term = self.priorshape_tau * self.prior_rate_tau.ln()
- lgamma(self.priorshape_tau)
+ (self.priorshape_tau - 1.0) * expected_log_tau
- self.prior_rate_tau * expected_tau;
let var_det = scirs2_linalg::det(&self.cov_beta.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to compute determinant: {}", e))
})?;
let beta_entropy =
0.5 * self.n_features as f64 * (1.0 + (2.0_f64 * PI).ln()) + 0.5 * var_det.ln();
let tau_entropy = self.shape_tau - self.rate_tau.ln()
+ lgamma(self.shape_tau)
+ (1.0 - self.shape_tau) * digamma(self.shape_tau);
Ok(likelihood_term + beta_prior_term + tau_prior_term + beta_entropy + tau_entropy)
}
pub fn predict(
&self,
x: ArrayView2<f64>,
result: &VariationalRegressionResult,
) -> Result<VariationalPredictionResult> {
checkarray_finite(&x, "x")?;
let (n_test, n_features) = x.dim();
if n_features != result.n_features {
return Err(StatsError::DimensionMismatch(format!(
"x has {} features, expected {}",
n_features, result.n_features
)));
}
let x_centered = if let Some(ref x_mean) = result.x_mean {
let mut x_c = x.to_owned();
for mut row in x_c.rows_mut() {
row -= x_mean;
}
x_c
} else {
x.to_owned()
};
let y_pred_centered = x_centered.dot(&result.mean_beta);
let y_pred = if let Some(y_mean) = result.y_mean {
&y_pred_centered + y_mean
} else {
y_pred_centered.clone()
};
let expected_noise_variance = result.rate_tau / result.shape_tau;
let mut predictive_variance = Array1::zeros(n_test);
for i in 0..n_test {
let x_row = x_centered.row(i);
let model_variance = x_row.dot(&result.cov_beta.dot(&x_row));
predictive_variance[i] = expected_noise_variance + model_variance;
}
Ok(VariationalPredictionResult {
mean: y_pred,
variance: predictive_variance.clone(),
model_uncertainty: predictive_variance.mapv(|v| (v - expected_noise_variance).max(0.0)),
noise_variance: expected_noise_variance,
})
}
}
#[derive(Debug, Clone)]
pub struct VariationalRegressionResult {
pub mean_beta: Array1<f64>,
pub cov_beta: Array2<f64>,
pub shape_tau: f64,
pub rate_tau: f64,
pub elbo: f64,
pub elbo_history: Vec<f64>,
pub n_samples_: usize,
pub n_features: usize,
pub x_mean: Option<Array1<f64>>,
pub y_mean: Option<f64>,
pub converged: bool,
}
impl VariationalRegressionResult {
pub fn std_beta(&self) -> Array1<f64> {
self.cov_beta.diag().mapv(f64::sqrt)
}
pub fn precision_stats(&self) -> (f64, f64) {
let mean = self.shape_tau / self.rate_tau;
let variance = self.shape_tau / (self.rate_tau * self.rate_tau);
(mean, variance.sqrt())
}
pub fn credible_intervals(&self, confidence: f64) -> Result<Array2<f64>> {
check_probability(confidence, "confidence")?;
let n_features = self.mean_beta.len();
let mut intervals = Array2::zeros((n_features, 2));
let alpha = (1.0 - confidence) / 2.0;
for i in 0..n_features {
let mean = self.mean_beta[i];
let std = self.cov_beta[[i, i]].sqrt();
let z_critical = normal_ppf(1.0 - alpha)?;
intervals[[i, 0]] = mean - z_critical * std;
intervals[[i, 1]] = mean + z_critical * std;
}
Ok(intervals)
}
}
#[derive(Debug, Clone)]
pub struct VariationalPredictionResult {
pub mean: Array1<f64>,
pub variance: Array1<f64>,
pub model_uncertainty: Array1<f64>,
pub noise_variance: f64,
}
impl VariationalPredictionResult {
pub fn std(&self) -> Array1<f64> {
self.variance.mapv(f64::sqrt)
}
pub fn credible_intervals(&self, confidence: f64) -> Result<Array2<f64>> {
check_probability(confidence, "confidence")?;
let n_predictions = self.mean.len();
let mut intervals = Array2::zeros((n_predictions, 2));
let alpha = (1.0 - confidence) / 2.0;
let z_critical = normal_ppf(1.0 - alpha)?;
for i in 0..n_predictions {
let mean = self.mean[i];
let std = self.variance[i].sqrt();
intervals[[i, 0]] = mean - z_critical * std;
intervals[[i, 1]] = mean + z_critical * std;
}
Ok(intervals)
}
}
#[derive(Debug, Clone)]
pub struct VariationalARD {
pub mean_beta: Array1<f64>,
pub var_beta: Array1<f64>,
pub shape_alpha: Array1<f64>,
pub rate_alpha: Array1<f64>,
pub shape_tau: f64,
pub rate_tau: f64,
pub priorshape_alpha: f64,
pub prior_rate_alpha: f64,
pub priorshape_tau: f64,
pub prior_rate_tau: f64,
pub n_features: usize,
pub fit_intercept: bool,
}
impl VariationalARD {
pub fn new(n_features: usize, fit_intercept: bool) -> Result<Self> {
check_positive(n_features, "n_features")?;
let priorshape_alpha = 1e-3;
let prior_rate_alpha = 1e-3;
let priorshape_tau = 1e-3;
let prior_rate_tau = 1e-3;
Ok(Self {
mean_beta: Array1::zeros(n_features),
var_beta: Array1::from_elem(n_features, 1.0),
shape_alpha: Array1::from_elem(n_features, priorshape_alpha),
rate_alpha: Array1::from_elem(n_features, prior_rate_alpha),
shape_tau: priorshape_tau,
rate_tau: prior_rate_tau,
priorshape_alpha,
prior_rate_alpha,
priorshape_tau,
prior_rate_tau,
n_features,
fit_intercept,
})
}
pub fn fit(
&mut self,
x: ArrayView2<f64>,
y: ArrayView1<f64>,
max_iter: usize,
tol: f64,
) -> Result<VariationalARDResult> {
checkarray_finite(&x, "x")?;
checkarray_finite(&y, "y")?;
check_positive(max_iter, "max_iter")?;
check_positive(tol, "tol")?;
let (n_samples_, n_features) = x.dim();
if y.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(format!(
"y length ({}) must match x rows ({})",
y.len(),
n_samples_
)));
}
let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
let x_mean = x.mean_axis(Axis(0)).expect("Operation failed");
let y_mean = y.mean();
let mut x_centered = x.to_owned();
for mut row in x_centered.rows_mut() {
row -= &x_mean;
}
let y_centered = &y.to_owned() - y_mean;
(x_centered, y_centered, Some(x_mean), Some(y_mean))
} else {
(x.to_owned(), y.to_owned(), None, None)
};
let xtx = x_centered.t().dot(&x_centered);
let xty = x_centered.t().dot(&y_centered);
let yty = y_centered.dot(&y_centered);
let mut prev_elbo = f64::NEG_INFINITY;
let mut elbo_history = Vec::new();
for _iter in 0..max_iter {
self.update_beta_ard(&xtx, &xty)?;
self.update_alpha_ard()?;
self.update_tau_ard(n_samples_ as f64, &xtx, yty)?;
let elbo = self.compute_elbo_ard(n_samples_ as f64, &xtx, &xty, yty)?;
elbo_history.push(elbo);
if _iter > 0 && (elbo - prev_elbo).abs() < tol {
break;
}
if _iter % 10 == 0 {
self.prune_features()?;
}
prev_elbo = elbo;
}
Ok(VariationalARDResult {
mean_beta: self.mean_beta.clone(),
var_beta: self.var_beta.clone(),
shape_alpha: self.shape_alpha.clone(),
rate_alpha: self.rate_alpha.clone(),
shape_tau: self.shape_tau,
rate_tau: self.rate_tau,
elbo: prev_elbo,
elbo_history: elbo_history.clone(),
n_samples_,
n_features: self.n_features,
x_mean,
y_mean,
converged: elbo_history.len() < max_iter,
})
}
fn update_beta_ard(&mut self, xtx: &Array2<f64>, xty: &Array1<f64>) -> Result<()> {
let expected_tau = self.shape_tau / self.rate_tau;
let expected_alpha = &self.shape_alpha / &self.rate_alpha;
for i in 0..self.n_features {
let precision_i = expected_alpha[i] + expected_tau * xtx[[i, i]];
self.var_beta[i] = 1.0 / precision_i;
}
for i in 0..self.n_features {
let sum_j = (0..self.n_features)
.filter(|&j| j != i)
.map(|j| xtx[[i, j]] * self.mean_beta[j])
.sum::<f64>();
self.mean_beta[i] = expected_tau * self.var_beta[i] * (xty[i] - sum_j);
}
Ok(())
}
fn update_alpha_ard(&mut self) -> Result<()> {
for i in 0..self.n_features {
self.shape_alpha[i] = self.priorshape_alpha + 0.5;
self.rate_alpha[i] =
self.prior_rate_alpha + 0.5 * (self.mean_beta[i].powi(2) + self.var_beta[i]);
}
Ok(())
}
fn update_tau_ard(&mut self, n_samples_: f64, xtx: &Array2<f64>, yty: f64) -> Result<()> {
self.shape_tau = self.priorshape_tau + n_samples_ / 2.0;
let mut quadratic_term = 0.0;
for i in 0..self.n_features {
for j in 0..self.n_features {
if i == j {
quadratic_term += xtx[[i, j]] * (self.mean_beta[i].powi(2) + self.var_beta[i]);
} else {
quadratic_term += xtx[[i, j]] * self.mean_beta[i] * self.mean_beta[j];
}
}
}
self.rate_tau = self.prior_rate_tau
+ 0.5 * (yty - 2.0 * self.mean_beta.dot(&xtx.dot(&self.mean_beta)) + quadratic_term);
Ok(())
}
fn compute_elbo_ard(
&self,
n_samples_: f64,
xtx: &Array2<f64>,
xty: &Array1<f64>,
yty: f64,
) -> Result<f64> {
let expected_tau = self.shape_tau / self.rate_tau;
let expected_log_tau = digamma(self.shape_tau) - self.rate_tau.ln();
let mut quadratic_form = yty - 2.0 * self.mean_beta.dot(xty);
for i in 0..self.n_features {
for j in 0..self.n_features {
if i == j {
quadratic_form += xtx[[i, j]] * (self.mean_beta[i].powi(2) + self.var_beta[i]);
} else {
quadratic_form += xtx[[i, j]] * self.mean_beta[i] * self.mean_beta[j];
}
}
}
let likelihood_term = 0.5 * n_samples_ * expected_log_tau
- 0.5 * n_samples_ * (2.0_f64 * PI).ln()
- 0.5 * expected_tau * quadratic_form;
let mut prior_term = 0.0;
for i in 0..self.n_features {
let expected_alpha_i = self.shape_alpha[i] / self.rate_alpha[i];
let expected_log_alpha_i = digamma(self.shape_alpha[i]) - self.rate_alpha[i].ln();
prior_term += 0.5 * expected_log_alpha_i
- 0.5 * (2.0_f64 * PI).ln()
- 0.5 * expected_alpha_i * (self.mean_beta[i].powi(2) + self.var_beta[i]);
}
let mut entropy_term = 0.0;
for i in 0..self.n_features {
entropy_term += 0.5 * (1.0 + (2.0 * PI * self.var_beta[i]).ln());
}
Ok(likelihood_term + prior_term + entropy_term)
}
fn prune_features(&mut self) -> Result<()> {
let threshold = 1e12;
for i in 0..self.n_features {
let expected_alpha = self.shape_alpha[i] / self.rate_alpha[i];
if expected_alpha > threshold {
self.mean_beta[i] = 0.0;
self.var_beta[i] = 1e-12;
}
}
Ok(())
}
pub fn feature_relevance(&self) -> Array1<f64> {
let expected_alpha = &self.shape_alpha / &self.rate_alpha;
expected_alpha.mapv(|alpha| 1.0 / alpha)
}
}
#[derive(Debug, Clone)]
pub struct VariationalARDResult {
pub mean_beta: Array1<f64>,
pub var_beta: Array1<f64>,
pub shape_alpha: Array1<f64>,
pub rate_alpha: Array1<f64>,
pub shape_tau: f64,
pub rate_tau: f64,
pub elbo: f64,
pub elbo_history: Vec<f64>,
pub n_samples_: usize,
pub n_features: usize,
pub x_mean: Option<Array1<f64>>,
pub y_mean: Option<f64>,
pub converged: bool,
}
impl VariationalARDResult {
pub fn selected_features(&self, threshold: f64) -> Vec<usize> {
let expected_alpha = &self.shape_alpha / &self.rate_alpha;
expected_alpha
.iter()
.enumerate()
.filter(|(_, &alpha)| alpha < threshold) .map(|(i, _)| i)
.collect()
}
pub fn feature_importance(&self) -> Array1<f64> {
self.mean_beta.mapv(f64::abs)
}
}
pub(crate) fn outer_product(v: &Array1<f64>) -> Array2<f64> {
let n = v.len();
let mut result = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
result[[i, j]] = v[i] * v[j];
}
}
result
}
pub(crate) fn normal_ppf(p: f64) -> Result<f64> {
if p <= 0.0 || p >= 1.0 {
return Err(StatsError::InvalidArgument(
"p must be between 0 and 1".to_string(),
));
}
let a = [
-3.969683028665376e+01,
2.209460984245205e+02,
-2.759285104469687e+02,
1.383_577_518_672_69e2,
-3.066479806614716e+01,
2.506628277459239e+00,
];
let b = [
-5.447609879822406e+01,
1.615858368580409e+02,
-1.556989798598866e+02,
6.680131188771972e+01,
-1.328068155288572e+01,
];
let c = [
-7.784894002430293e-03,
-3.223964580411365e-01,
-2.400758277161838e+00,
-2.549732539343734e+00,
4.374664141464968e+00,
2.938163982698783e+00,
];
let d = [
7.784695709041462e-03,
3.224671290700398e-01,
2.445134137142996e+00,
3.754408661907416e+00,
];
let p_low = 0.02425;
let p_high = 1.0 - p_low;
if p < p_low {
let q = (-2.0 * p.ln()).sqrt();
Ok(
(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0),
)
} else if p <= p_high {
let q = p - 0.5;
let r = q * q;
Ok(
(((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
/ (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0),
)
} else {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
Ok(
(-((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0),
)
}
}
pub(crate) fn digamma(x: f64) -> f64 {
if x <= 0.0 {
return f64::NEG_INFINITY;
}
if x < 8.0 {
return digamma(x + 1.0) - 1.0 / x;
}
let inv_x = 1.0 / x;
let inv_x2 = inv_x * inv_x;
x.ln() - 0.5 * inv_x - inv_x2 / 12.0 + inv_x2 * inv_x2 / 120.0
- inv_x2 * inv_x2 * inv_x2 / 252.0
}
pub(crate) fn lgamma(x: f64) -> f64 {
if x <= 0.0 {
return f64::NEG_INFINITY;
}
if x < 0.5 {
return (PI / (PI * x).sin()).ln() - lgamma(1.0 - x);
}
const G: f64 = 7.0;
const C: [f64; 9] = [
0.99999999999980993,
676.5203681218851,
-1259.1392167224028,
771.323_428_777_653_1,
-176.615_029_162_140_6,
12.507_343_278_686_905,
-0.138_571_095_265_720_12,
9.984_369_578_019_572e-6,
1.505_632_735_149_311_6e-7,
];
let x = x - 1.0;
let mut a = C[0];
let t = x + G + 0.5;
for (i, &c) in C[1..].iter().enumerate() {
a += c / (x + (i as f64 + 1.0));
}
0.5 * (2.0 * PI).ln() + (x + 0.5) * t.ln() - t + a.ln()
}
pub(crate) fn trigamma(x: f64) -> f64 {
if x <= 0.0 {
return f64::INFINITY;
}
if x < 8.0 {
return trigamma(x + 1.0) + 1.0 / (x * x);
}
let inv_x = 1.0 / x;
let inv_x2 = inv_x * inv_x;
inv_x + 0.5 * inv_x2 + inv_x2 * inv_x / 6.0 - inv_x2 * inv_x2 * inv_x / 30.0
+ inv_x2 * inv_x2 * inv_x2 * inv_x / 42.0
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_mean_field_gaussian_creation() {
let mf = MeanFieldGaussian::new(5).expect("should create mean-field Gaussian");
assert_eq!(mf.dim, 5);
assert_eq!(mf.means.len(), 5);
assert_eq!(mf.log_stds.len(), 5);
assert_eq!(mf.n_params(), 10);
}
#[test]
fn test_mean_field_gaussian_entropy() {
let mf = MeanFieldGaussian::new(2).expect("should create");
let entropy = mf.entropy();
let expected = 2.0 * 0.5 * (1.0 + (2.0 * PI).ln());
assert!((entropy - expected).abs() < 1e-10);
}
#[test]
fn test_mean_field_gaussian_sample() {
let mf = MeanFieldGaussian::new(3).expect("should create");
let epsilon = Array1::from_vec(vec![0.5, -0.3, 1.0]);
let sample = mf.sample(&epsilon).expect("should sample");
assert_eq!(sample.len(), 3);
for i in 0..3 {
assert!((sample[i] - epsilon[i]).abs() < 1e-10);
}
}
#[test]
fn test_mean_field_gaussian_params_roundtrip() {
let mut mf = MeanFieldGaussian::new(3).expect("should create");
let params = Array1::from_vec(vec![1.0, 2.0, 3.0, 0.5, -0.3, 0.1]);
mf.set_params(¶ms).expect("should set params");
let retrieved = mf.get_params();
for i in 0..6 {
assert!((retrieved[i] - params[i]).abs() < 1e-10);
}
}
#[test]
fn test_full_rank_gaussian_creation() {
let fr = FullRankGaussian::new(3).expect("should create full-rank Gaussian");
assert_eq!(fr.dim, 3);
assert_eq!(fr.mean.len(), 3);
assert_eq!(fr.n_params(), 9);
}
#[test]
fn test_full_rank_gaussian_entropy() {
let fr = FullRankGaussian::new(2).expect("should create");
let entropy = fr.entropy();
let expected = 2.0 * 0.5 * (1.0 + (2.0 * PI).ln());
assert!((entropy - expected).abs() < 1e-10);
}
#[test]
fn test_full_rank_gaussian_sample() {
let fr = FullRankGaussian::new(2).expect("should create");
let epsilon = Array1::from_vec(vec![1.0, -1.0]);
let sample = fr.sample(&epsilon).expect("should sample");
assert_eq!(sample.len(), 2);
for i in 0..2 {
assert!((sample[i] - epsilon[i]).abs() < 1e-10);
}
}
#[test]
fn test_normalizing_flow_creation() {
let nf = NormalizingFlowVI::new(3, 2).expect("should create");
assert_eq!(nf.dim, 3);
assert_eq!(nf.flows.len(), 2);
}
#[test]
fn test_normalizing_flow_transform() {
let nf = NormalizingFlowVI::new(2, 1).expect("should create");
let z0 = Array1::from_vec(vec![0.5, -0.5]);
let (z_k, log_det) = nf.transform(&z0).expect("should transform");
assert_eq!(z_k.len(), 2);
assert!(log_det.is_finite());
}
#[test]
fn test_diagnostics() {
let mut diag = VariationalDiagnostics::new();
diag.record_elbo(-100.0);
diag.record_elbo(-90.0);
diag.record_elbo(-85.0);
diag.record_gradient_norm(10.0);
diag.record_gradient_norm(5.0);
assert_eq!(diag.n_iterations, 3);
assert!(!diag.check_elbo_convergence(1.0));
assert!(diag.check_elbo_convergence(10.0));
let summary = diag.elbo_summary();
assert!((summary.min - (-100.0)).abs() < 1e-10);
assert!((summary.max - (-85.0)).abs() < 1e-10);
assert!(summary.monotonic);
}
#[test]
fn test_variational_bayesian_regression() {
let n = 50;
let mut x_data = Vec::with_capacity(n);
let mut y_data = Vec::with_capacity(n);
for i in 0..n {
let xi = i as f64 / n as f64;
x_data.push(xi);
y_data.push(2.0 * xi + 1.0 + 0.1 * ((i * 7 % 13) as f64 - 6.0) / 6.0);
}
let x = Array2::from_shape_fn((n, 1), |(i, _)| x_data[i]);
let y = Array1::from_vec(y_data);
let mut model = VariationalBayesianRegression::new(1, true).expect("should create model");
let result = model
.fit(x.view(), y.view(), 100, 1e-6)
.expect("should fit");
assert!(
(result.mean_beta[0] - 2.0).abs() < 0.5,
"beta should be close to 2.0, got {}",
result.mean_beta[0]
);
}
#[test]
fn test_trigamma() {
let expected = PI * PI / 6.0;
let computed = trigamma(1.0);
assert!(
(computed - expected).abs() < 0.01,
"trigamma(1) should be close to pi^2/6, got {}",
computed
);
}
}