use std::error::Error;
use ndarray::Array1;
use ndarray::Array2;
use ndarray::Axis;
use ndarray_linalg::Cholesky;
use ndarray_linalg::Inverse;
use ndarray_linalg::UPLO;
use stochastic_rs_distributions::normal::SimdNormal;
use stochastic_rs_distributions::special::ndtri;
use stochastic_rs_distributions::special::norm_cdf;
use super::CopulaType;
use crate::traits::MultivariateExt;
#[derive(Debug, Clone, Default)]
pub struct GaussianMultivariate {
dim: usize,
corr: Option<Array2<f64>>,
inv_corr: Option<Array2<f64>>,
chol_lower: Option<Array2<f64>>,
log_det_corr: Option<f64>,
}
impl GaussianMultivariate {
pub fn new() -> Self {
Self::default()
}
pub fn new_with_corr(corr: Array2<f64>) -> Result<Self, Box<dyn Error>> {
let dim = corr.nrows();
if dim != corr.ncols() {
return Err("Correlation matrix must be square".into());
}
let mut g = Self::new();
g.set_corr(corr)?;
Ok(g)
}
pub fn correlation(&self) -> Option<&Array2<f64>> {
self.corr.as_ref()
}
fn set_corr(&mut self, corr: Array2<f64>) -> Result<(), Box<dyn Error>> {
let dim = corr.nrows();
self.dim = dim;
let l_arr = corr
.cholesky(UPLO::Lower)
.map_err(|_| -> Box<dyn Error> { "Correlation matrix is not positive definite".into() })?;
let mut log_det = 0.0;
for i in 0..dim {
log_det += l_arr[[i, i]].ln();
}
log_det *= 2.0;
let inv_arr = corr
.inv()
.map_err(|_| -> Box<dyn Error> { "Failed to invert correlation matrix".into() })?;
self.corr = Some(corr);
self.inv_corr = Some(inv_arr);
self.chol_lower = Some(l_arr);
self.log_det_corr = Some(log_det);
Ok(())
}
fn transform_to_normal(&self, u: &Array2<f64>) -> Array2<f64> {
let eps = 1e-12;
let mut z = u.clone();
for mut row in z.axis_iter_mut(Axis(0)) {
for val in row.iter_mut() {
let clamped = val.clamp(eps, 1.0 - eps);
*val = ndtri(clamped);
}
}
z
}
fn estimate_corr_from_normal(&self, z: &Array2<f64>) -> Array2<f64> {
let n = z.nrows() as f64;
let d = z.ncols();
let means = z.mean_axis(Axis(0)).unwrap();
let mut zc = z.clone();
let nrows = zc.nrows();
for j in 0..d {
let m = means[j];
let mut col = zc.column_mut(j);
for i in 0..nrows {
col[i] -= m;
}
}
let mut stds = Array1::<f64>::zeros(d);
for j in 0..d {
let col = zc.column(j);
let var = col.iter().map(|&x| x * x).sum::<f64>() / (n - 1.0);
stds[j] = var.max(1e-18).sqrt();
}
let mut corr = Array2::<f64>::zeros((d, d));
for k in 0..d {
corr[[k, k]] = 1.0;
}
for i in 0..d {
for j in i..d {
if i == j {
continue;
}
let dot = zc
.column(i)
.iter()
.zip(zc.column(j).iter())
.map(|(&a, &b)| a * b)
.sum::<f64>();
let cov = dot / (n - 1.0);
let r = (cov / (stds[i] * stds[j])).clamp(-0.999_999, 0.999_999);
corr[[i, j]] = r;
corr[[j, i]] = r;
}
}
let mut jitter = 0usize;
while !Self::is_spd(&corr) && jitter < 6 {
let eps = 10f64.powi(-6_i32 + jitter as i32);
for k in 0..d {
corr[[k, k]] = 1.0 + eps;
}
jitter += 1;
}
corr
}
fn is_spd(a: &Array2<f64>) -> bool {
let dim = a.nrows();
if dim != a.ncols() {
return false;
}
a.cholesky(UPLO::Lower).is_ok()
}
fn require_fitted(&self) -> Result<(), Box<dyn Error>> {
if self.corr.is_none()
|| self.inv_corr.is_none()
|| self.chol_lower.is_none()
|| self.log_det_corr.is_none()
{
return Err("Fit the copula or provide a correlation matrix first".into());
}
Ok(())
}
}
impl MultivariateExt for GaussianMultivariate {
fn r#type(&self) -> CopulaType {
CopulaType::Gaussian
}
fn sample(&self, n: usize) -> Result<Array2<f64>, Box<dyn Error>> {
self.require_fitted()?;
let d = self.dim;
let l = self.chol_lower.as_ref().unwrap(); let normal = SimdNormal::<f64>::new(0.0, 1.0);
let g = Array2::from_shape_fn((n, d), |_| normal.sample_fast());
let z = g.dot(&l.t());
let mut u = z.clone();
for mut row in u.axis_iter_mut(Axis(0)) {
for val in row.iter_mut() {
*val = norm_cdf(*val);
}
}
Ok(u)
}
fn fit(&mut self, X: Array2<f64>) -> Result<(), Box<dyn Error>> {
if X.nrows() < 2 || X.ncols() < 2 {
return Err("Need at least 2 samples and 2 dimensions".into());
}
if X.iter().any(|&v| !(0.0..=1.0).contains(&v)) {
return Err("Input data must be in [0,1] for Gaussian copula fit".into());
}
self.dim = X.ncols();
let z = self.transform_to_normal(&X);
let corr = self.estimate_corr_from_normal(&z);
self.set_corr(corr)?;
Ok(())
}
fn check_fit(&self, X: &Array2<f64>) -> Result<(), Box<dyn Error>> {
self.require_fitted()?;
if X.ncols() != self.dim {
return Err("Dimension mismatch".into());
}
if X.iter().any(|&v| !(0.0..=1.0).contains(&v)) {
return Err("Input X must be in [0,1] for Gaussian copula".into());
}
Ok(())
}
fn pdf(&self, X: Array2<f64>) -> Result<Array1<f64>, Box<dyn Error>> {
self.check_fit(&X)?;
let z = self.transform_to_normal(&X); let inv = self.inv_corr.as_ref().unwrap();
let log_det = self.log_det_corr.unwrap();
let mut out = Array1::<f64>::zeros(z.nrows());
for (i, row) in z.axis_iter(Axis(0)).enumerate() {
let mut q = inv.dot(&row.to_owned());
for k in 0..q.len() {
q[k] -= row[k];
}
let quad = row.dot(&q);
let log_c = -0.5 * (log_det + quad);
out[i] = log_c.exp();
}
Ok(out)
}
fn cdf(&self, X: Array2<f64>) -> Result<Array1<f64>, Box<dyn Error>> {
self.check_fit(&X)?;
let z = self.transform_to_normal(&X); let l = self.chol_lower.as_ref().unwrap(); let n = z.nrows();
let m_samples = 4000usize; let mut out = Array1::<f64>::zeros(n);
let normal = SimdNormal::<f64>::new(0.0, 1.0);
let g = Array2::from_shape_fn((m_samples, self.dim), |_| normal.sample_fast());
let y = g.dot(&l.t());
for (i, row) in z.axis_iter(Axis(0)).enumerate() {
let mut count = 0usize;
'outer: for r in 0..m_samples {
for c in 0..self.dim {
if y[[r, c]] > row[c] {
continue 'outer;
}
}
count += 1;
}
out[i] = count as f64 / m_samples as f64;
}
Ok(out)
}
}