use scirs2_core::ndarray::{Array1, Array2, Axis};
use super::types::LikelihoodFamily;
use crate::error::StatsError;
#[derive(Debug, Clone)]
pub struct ModeResult {
pub mode: Array1<f64>,
pub neg_hessian: Array2<f64>,
pub iterations: usize,
pub converged: bool,
pub log_posterior_at_mode: f64,
}
pub fn log_likelihood(
y: &Array1<f64>,
eta: &Array1<f64>,
likelihood: LikelihoodFamily,
n_trials: Option<&Array1<f64>>,
obs_precision: Option<f64>,
) -> f64 {
let n = y.len();
let mut ll = 0.0;
for i in 0..n {
ll += log_likelihood_single(
y[i],
eta[i],
likelihood,
n_trials.map(|t| t[i]),
obs_precision,
);
}
ll
}
fn log_likelihood_single(
y_i: f64,
eta_i: f64,
likelihood: LikelihoodFamily,
n_trial: Option<f64>,
obs_precision: Option<f64>,
) -> f64 {
match likelihood {
LikelihoodFamily::Gaussian => {
let tau = obs_precision.unwrap_or(1.0);
let diff = y_i - eta_i;
-0.5 * tau * diff * diff + 0.5 * tau.ln() - 0.5 * (2.0 * std::f64::consts::PI).ln()
}
LikelihoodFamily::Poisson => {
let lambda = eta_i.exp().min(1e15); y_i * eta_i - lambda - log_factorial_approx(y_i)
}
LikelihoodFamily::Binomial => {
let n = n_trial.unwrap_or(1.0);
let log1pexp = log1p_exp(eta_i);
y_i * eta_i - n * log1pexp + log_binom_coeff(n, y_i)
}
LikelihoodFamily::NegativeBinomial => {
let lambda = eta_i.exp().min(1e15);
y_i * eta_i - lambda - log_factorial_approx(y_i)
}
_ => 0.0, }
}
pub fn log_likelihood_gradient_single(
y_i: f64,
eta_i: f64,
likelihood: LikelihoodFamily,
n_trial: Option<f64>,
obs_precision: Option<f64>,
) -> f64 {
match likelihood {
LikelihoodFamily::Gaussian => {
let tau = obs_precision.unwrap_or(1.0);
tau * (y_i - eta_i)
}
LikelihoodFamily::Poisson => {
y_i - eta_i.exp().min(1e15)
}
LikelihoodFamily::Binomial => {
let n = n_trial.unwrap_or(1.0);
let p = sigmoid(eta_i);
y_i - n * p
}
LikelihoodFamily::NegativeBinomial => y_i - eta_i.exp().min(1e15),
_ => 0.0,
}
}
pub fn log_likelihood_hessian_single(
eta_i: f64,
likelihood: LikelihoodFamily,
n_trial: Option<f64>,
obs_precision: Option<f64>,
) -> f64 {
match likelihood {
LikelihoodFamily::Gaussian => {
let tau = obs_precision.unwrap_or(1.0);
-tau
}
LikelihoodFamily::Poisson => {
-eta_i.exp().min(1e15)
}
LikelihoodFamily::Binomial => {
let n = n_trial.unwrap_or(1.0);
let p = sigmoid(eta_i);
-n * p * (1.0 - p)
}
LikelihoodFamily::NegativeBinomial => -eta_i.exp().min(1e15),
_ => -1.0,
}
}
pub fn log_posterior_gradient(
x: &Array1<f64>,
y: &Array1<f64>,
design: &Array2<f64>,
precision: &Array2<f64>,
likelihood: LikelihoodFamily,
n_trials: Option<&Array1<f64>>,
obs_precision: Option<f64>,
) -> Array1<f64> {
let eta = design.dot(x);
let n = y.len();
let p = x.len();
let mut grad_eta = Array1::zeros(n);
for i in 0..n {
grad_eta[i] = log_likelihood_gradient_single(
y[i],
eta[i],
likelihood,
n_trials.map(|t| t[i]),
obs_precision,
);
}
let at_grad = design.t().dot(&grad_eta);
let prior_grad = {
let mut qx: Array1<f64> = Array1::zeros(p);
for i in 0..p {
for j in 0..p {
qx[i] += precision[[i, j]] * x[j];
}
}
qx
};
at_grad - prior_grad
}
pub fn compute_neg_hessian(
x: &Array1<f64>,
design: &Array2<f64>,
precision: &Array2<f64>,
likelihood: LikelihoodFamily,
n_trials: Option<&Array1<f64>>,
obs_precision: Option<f64>,
) -> Array2<f64> {
let eta = design.dot(x);
let n = eta.len();
let p = x.len();
let mut weights = Array1::zeros(n);
for i in 0..n {
let h = log_likelihood_hessian_single(
eta[i],
likelihood,
n_trials.map(|t| t[i]),
obs_precision,
);
weights[i] = (-h).max(1e-12); }
let mut neg_hess = precision.clone();
for i in 0..p {
for j in 0..p {
let mut sum = 0.0;
for k in 0..n {
sum += design[[k, i]] * weights[k] * design[[k, j]];
}
neg_hess[[i, j]] += sum;
}
}
neg_hess
}
pub fn find_mode(
precision: &Array2<f64>,
y: &Array1<f64>,
design: &Array2<f64>,
likelihood: LikelihoodFamily,
n_trials: Option<&Array1<f64>>,
obs_precision: Option<f64>,
max_iter: usize,
tol: f64,
damping: f64,
) -> Result<ModeResult, StatsError> {
let p = precision.nrows();
if precision.ncols() != p {
return Err(StatsError::DimensionMismatch(
"Precision matrix must be square".to_string(),
));
}
if design.ncols() != p {
return Err(StatsError::DimensionMismatch(format!(
"Design matrix columns ({}) must match precision matrix size ({})",
design.ncols(),
p
)));
}
if design.nrows() != y.len() {
return Err(StatsError::DimensionMismatch(format!(
"Design matrix rows ({}) must match observation length ({})",
design.nrows(),
y.len()
)));
}
let mut x = Array1::zeros(p);
let mut converged = false;
let mut iterations = 0;
for iter in 0..max_iter {
iterations = iter + 1;
let grad = log_posterior_gradient(
&x,
y,
design,
precision,
likelihood,
n_trials,
obs_precision,
);
let neg_hess =
compute_neg_hessian(&x, design, precision, likelihood, n_trials, obs_precision);
let delta = solve_symmetric_positive_definite(&neg_hess, &grad)?;
let update_norm: f64 = delta.iter().map(|d| d * d).sum::<f64>().sqrt();
x = &x + &(damping * &delta);
if update_norm < tol {
converged = true;
break;
}
}
let neg_hess = compute_neg_hessian(&x, design, precision, likelihood, n_trials, obs_precision);
let eta = design.dot(&x);
let ll = log_likelihood(y, &eta, likelihood, n_trials, obs_precision);
let log_prior = -0.5 * x.dot(&precision.dot(&x));
let log_posterior = ll + log_prior;
Ok(ModeResult {
mode: x,
neg_hessian: neg_hess,
iterations,
converged,
log_posterior_at_mode: log_posterior,
})
}
pub fn laplace_log_marginal_likelihood(
mode_result: &ModeResult,
precision: &Array2<f64>,
) -> Result<f64, StatsError> {
let p = mode_result.mode.len() as f64;
let log_det_h = log_determinant(&mode_result.neg_hessian)?;
let log_det_q = log_determinant(precision)?;
let result = mode_result.log_posterior_at_mode + 0.5 * log_det_q - 0.5 * log_det_h;
Ok(result)
}
fn solve_symmetric_positive_definite(
a: &Array2<f64>,
b: &Array1<f64>,
) -> Result<Array1<f64>, StatsError> {
let n = a.nrows();
if n == 0 {
return Ok(Array1::zeros(0));
}
let l = cholesky_decompose(a)?;
let mut z = Array1::zeros(n);
for i in 0..n {
let mut sum = b[i];
for j in 0..i {
sum -= l[[i, j]] * z[j];
}
if l[[i, i]].abs() < 1e-15 {
return Err(StatsError::ComputationError(
"Singular matrix in Cholesky solve".to_string(),
));
}
z[i] = sum / l[[i, i]];
}
let mut x = Array1::zeros(n);
for i in (0..n).rev() {
let mut sum = z[i];
for j in (i + 1)..n {
sum -= l[[j, i]] * x[j];
}
x[i] = sum / l[[i, i]];
}
Ok(x)
}
fn cholesky_decompose(a: &Array2<f64>) -> Result<Array2<f64>, StatsError> {
let n = a.nrows();
let mut l = Array2::zeros((n, n));
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0;
for k in 0..j {
sum += l[[i, k]] * l[[j, k]];
}
if i == j {
let diag = a[[i, i]] - sum;
if diag <= 0.0 {
return Err(StatsError::ComputationError(format!(
"Matrix not positive definite at index {} (value: {:.6e})",
i, diag
)));
}
l[[i, j]] = diag.sqrt();
} else {
if l[[j, j]].abs() < 1e-15 {
return Err(StatsError::ComputationError(
"Zero diagonal in Cholesky decomposition".to_string(),
));
}
l[[i, j]] = (a[[i, j]] - sum) / l[[j, j]];
}
}
}
Ok(l)
}
fn log_determinant(a: &Array2<f64>) -> Result<f64, StatsError> {
let l = cholesky_decompose(a)?;
let n = a.nrows();
let mut log_det = 0.0;
for i in 0..n {
log_det += l[[i, i]].ln();
}
Ok(2.0 * log_det)
}
pub fn inverse_diagonal(a: &Array2<f64>) -> Result<Array1<f64>, StatsError> {
let n = a.nrows();
let mut diag = Array1::zeros(n);
for i in 0..n {
let mut e_i = Array1::zeros(n);
e_i[i] = 1.0;
let col = solve_symmetric_positive_definite(a, &e_i)?;
diag[i] = col[i];
}
Ok(diag)
}
pub fn full_inverse(a: &Array2<f64>) -> Result<Array2<f64>, StatsError> {
let n = a.nrows();
let mut inv = Array2::zeros((n, n));
for i in 0..n {
let mut e_i = Array1::zeros(n);
e_i[i] = 1.0;
let col = solve_symmetric_positive_definite(a, &e_i)?;
for j in 0..n {
inv[[j, i]] = col[j];
}
}
Ok(inv)
}
fn sigmoid(x: f64) -> f64 {
if x >= 0.0 {
let ex = (-x).exp();
1.0 / (1.0 + ex)
} else {
let ex = x.exp();
ex / (1.0 + ex)
}
}
fn log1p_exp(x: f64) -> f64 {
if x > 35.0 {
x
} else if x < -10.0 {
x.exp()
} else {
(1.0 + x.exp()).ln()
}
}
fn log_factorial_approx(n: f64) -> f64 {
if n < 0.0 {
return 0.0;
}
let n_int = n as u64;
if n_int <= 20 {
let mut result = 0.0f64;
for i in 2..=n_int {
result += (i as f64).ln();
}
result
} else {
n * n.ln() - n + 0.5 * (2.0 * std::f64::consts::PI * n).ln()
}
}
fn log_binom_coeff(n: f64, k: f64) -> f64 {
if k < 0.0 || k > n {
return f64::NEG_INFINITY;
}
log_factorial_approx(n) - log_factorial_approx(k) - log_factorial_approx(n - k)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{array, Array2};
#[test]
fn test_sigmoid() {
assert!((sigmoid(0.0) - 0.5).abs() < 1e-10);
assert!(sigmoid(100.0) > 0.99);
assert!(sigmoid(-100.0) < 0.01);
}
#[test]
fn test_log1p_exp() {
assert!((log1p_exp(50.0) - 50.0).abs() < 1e-10);
assert!((log1p_exp(0.0) - 2.0f64.ln()).abs() < 1e-10);
}
#[test]
fn test_log_factorial() {
assert!((log_factorial_approx(0.0)).abs() < 1e-10);
assert!((log_factorial_approx(1.0)).abs() < 1e-10);
assert!((log_factorial_approx(5.0) - (120.0f64).ln()).abs() < 1e-10);
}
#[test]
fn test_cholesky() {
let a = array![[4.0, 2.0], [2.0, 5.0]];
let l = cholesky_decompose(&a).expect("Cholesky should succeed");
let reconstructed = l.dot(&l.t());
for i in 0..2 {
for j in 0..2 {
assert!(
(reconstructed[[i, j]] - a[[i, j]]).abs() < 1e-10,
"Cholesky reconstruction mismatch at ({}, {})",
i,
j
);
}
}
}
#[test]
fn test_solve_spd() {
let a = array![[4.0, 2.0], [2.0, 5.0]];
let b = array![1.0, 2.0];
let x = solve_symmetric_positive_definite(&a, &b).expect("Solve should succeed");
let ax = a.dot(&x);
for i in 0..2 {
assert!(
(ax[i] - b[i]).abs() < 1e-10,
"Solution mismatch at index {}",
i
);
}
}
#[test]
fn test_log_determinant() {
let a = array![[4.0, 2.0], [2.0, 5.0]];
let log_det = log_determinant(&a).expect("Log determinant should succeed");
assert!((log_det - 16.0f64.ln()).abs() < 1e-10);
}
#[test]
fn test_gaussian_log_likelihood() {
let y = array![1.0, 2.0, 3.0];
let eta = array![1.0, 2.0, 3.0]; let ll = log_likelihood(&y, &eta, LikelihoodFamily::Gaussian, None, Some(1.0));
let expected = -1.5 * (2.0 * std::f64::consts::PI).ln();
assert!((ll - expected).abs() < 1e-10);
}
#[test]
fn test_find_mode_gaussian_identity() {
let n = 3;
let y = array![1.0, 2.0, 3.0];
let design = Array2::eye(n);
let precision = Array2::eye(n);
let obs_prec = 10.0;
let result = find_mode(
&precision,
&y,
&design,
LikelihoodFamily::Gaussian,
None,
Some(obs_prec),
100,
1e-10,
1.0,
)
.expect("Mode finding should succeed");
assert!(result.converged, "Newton-Raphson should converge");
for i in 0..n {
let expected = obs_prec * y[i] / (1.0 + obs_prec);
assert!(
(result.mode[i] - expected).abs() < 1e-6,
"Mode mismatch at index {}: got {}, expected {}",
i,
result.mode[i],
expected
);
}
}
#[test]
fn test_find_mode_poisson() {
let n = 3;
let y = array![2.0, 5.0, 1.0];
let design = Array2::eye(n);
let precision = Array2::eye(n);
let result = find_mode(
&precision,
&y,
&design,
LikelihoodFamily::Poisson,
None,
None,
100,
1e-10,
1.0,
)
.expect("Mode finding should succeed");
assert!(
result.converged,
"Newton-Raphson should converge for Poisson"
);
assert!(result.mode[1] > 0.5, "Mode for y=5 should be positive");
}
#[test]
fn test_laplace_log_marginal() {
let n = 2;
let y = array![1.0, 2.0];
let design = Array2::eye(n);
let precision = Array2::eye(n);
let mode_result = find_mode(
&precision,
&y,
&design,
LikelihoodFamily::Gaussian,
None,
Some(1.0),
100,
1e-10,
1.0,
)
.expect("Mode finding should succeed");
let log_ml = laplace_log_marginal_likelihood(&mode_result, &precision)
.expect("Laplace approximation should succeed");
let expected = -0.5 * (y[0] * y[0] + y[1] * y[1]) / 2.0
- 0.5 * (2.0f64).ln() * 2.0
- (2.0 * std::f64::consts::PI).ln();
assert!(
(log_ml - expected).abs() < 0.5,
"Laplace log marginal: got {}, expected {}",
log_ml,
expected
);
}
#[test]
fn test_inverse_diagonal() {
let a = array![[2.0, 0.0], [0.0, 4.0]];
let diag = inverse_diagonal(&a).expect("Inverse diagonal should succeed");
assert!((diag[0] - 0.5).abs() < 1e-10);
assert!((diag[1] - 0.25).abs() < 1e-10);
}
#[test]
fn test_dimension_mismatch() {
let y = array![1.0, 2.0];
let design = Array2::eye(2);
let precision = Array2::eye(3);
let result = find_mode(
&precision,
&y,
&design,
LikelihoodFamily::Gaussian,
None,
None,
10,
1e-8,
1.0,
);
assert!(result.is_err());
}
#[test]
fn test_gradient_gaussian() {
let x = array![0.5, 1.0];
let y = array![1.0, 2.0];
let design = Array2::eye(2);
let precision = Array2::eye(2);
let grad = log_posterior_gradient(
&x,
&y,
&design,
&precision,
LikelihoodFamily::Gaussian,
None,
Some(1.0),
);
assert!((grad[0] - (1.0 - 2.0 * 0.5)).abs() < 1e-10);
assert!((grad[1] - (2.0 - 2.0 * 1.0)).abs() < 1e-10);
}
}