use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum CopulaType {
#[default]
Gaussian,
Clayton,
Gumbel,
Frank,
StudentT,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CopulaConfig {
pub copula_type: CopulaType,
pub theta: f64,
#[serde(default = "default_df")]
pub degrees_of_freedom: f64,
}
fn default_df() -> f64 {
4.0
}
impl Default for CopulaConfig {
fn default() -> Self {
Self {
copula_type: CopulaType::Gaussian,
theta: 0.5,
degrees_of_freedom: 4.0,
}
}
}
impl CopulaConfig {
pub fn gaussian(correlation: f64) -> Self {
Self {
copula_type: CopulaType::Gaussian,
theta: correlation.clamp(-0.999, 0.999),
degrees_of_freedom: 4.0,
}
}
pub fn clayton(theta: f64) -> Self {
Self {
copula_type: CopulaType::Clayton,
theta: theta.max(0.001),
degrees_of_freedom: 4.0,
}
}
pub fn gumbel(theta: f64) -> Self {
Self {
copula_type: CopulaType::Gumbel,
theta: theta.max(1.0),
degrees_of_freedom: 4.0,
}
}
pub fn frank(theta: f64) -> Self {
Self {
copula_type: CopulaType::Frank,
theta: if theta.abs() < 0.001 { 0.001 } else { theta },
degrees_of_freedom: 4.0,
}
}
pub fn student_t(correlation: f64, df: f64) -> Self {
Self {
copula_type: CopulaType::StudentT,
theta: correlation.clamp(-0.999, 0.999),
degrees_of_freedom: df.max(2.0),
}
}
pub fn validate(&self) -> Result<(), String> {
match self.copula_type {
CopulaType::Gaussian | CopulaType::StudentT => {
if self.theta < -1.0 || self.theta > 1.0 {
return Err(format!(
"Correlation must be in [-1, 1], got {}",
self.theta
));
}
}
CopulaType::Clayton => {
if self.theta <= 0.0 {
return Err(format!("Clayton theta must be > 0, got {}", self.theta));
}
}
CopulaType::Gumbel => {
if self.theta < 1.0 {
return Err(format!("Gumbel theta must be >= 1, got {}", self.theta));
}
}
CopulaType::Frank => {
if self.theta.abs() < 0.0001 {
return Err("Frank theta must be non-zero".to_string());
}
}
}
if self.copula_type == CopulaType::StudentT && self.degrees_of_freedom <= 0.0 {
return Err("Degrees of freedom must be positive".to_string());
}
Ok(())
}
pub fn kendalls_tau(&self) -> f64 {
match self.copula_type {
CopulaType::Gaussian | CopulaType::StudentT => {
2.0 * self.theta.asin() / std::f64::consts::PI
}
CopulaType::Clayton => {
self.theta / (self.theta + 2.0)
}
CopulaType::Gumbel => {
1.0 - 1.0 / self.theta
}
CopulaType::Frank => {
let abs_theta = self.theta.abs();
if abs_theta < 10.0 {
1.0 - 4.0 / self.theta + 4.0 / self.theta.powi(2) * debye_1(abs_theta)
} else {
self.theta.signum() * (1.0 - 4.0 / abs_theta)
}
}
}
}
pub fn lower_tail_dependence(&self) -> f64 {
match self.copula_type {
CopulaType::Gaussian | CopulaType::Frank => 0.0,
CopulaType::Clayton => 2.0_f64.powf(-1.0 / self.theta),
CopulaType::Gumbel => 0.0,
CopulaType::StudentT => {
let nu = self.degrees_of_freedom;
let rho = self.theta;
let arg = ((nu + 1.0) * (1.0 - rho) / (1.0 + rho)).sqrt();
2.0 * student_t_cdf(-arg, nu + 1.0)
}
}
}
pub fn upper_tail_dependence(&self) -> f64 {
match self.copula_type {
CopulaType::Gaussian | CopulaType::Frank => 0.0,
CopulaType::Clayton => 0.0,
CopulaType::Gumbel => 2.0 - 2.0_f64.powf(1.0 / self.theta),
CopulaType::StudentT => self.lower_tail_dependence(), }
}
}
pub struct BivariateCopulaSampler {
rng: ChaCha8Rng,
config: CopulaConfig,
}
impl BivariateCopulaSampler {
pub fn new(seed: u64, config: CopulaConfig) -> Result<Self, String> {
config.validate()?;
Ok(Self {
rng: ChaCha8Rng::seed_from_u64(seed),
config,
})
}
pub fn sample(&mut self) -> (f64, f64) {
match self.config.copula_type {
CopulaType::Gaussian => self.sample_gaussian(),
CopulaType::Clayton => self.sample_clayton(),
CopulaType::Gumbel => self.sample_gumbel(),
CopulaType::Frank => self.sample_frank(),
CopulaType::StudentT => self.sample_student_t(),
}
}
fn sample_gaussian(&mut self) -> (f64, f64) {
let rho = self.config.theta;
let z1 = self.sample_standard_normal();
let z2 = self.sample_standard_normal();
let x1 = z1;
let x2 = rho * z1 + (1.0 - rho.powi(2)).sqrt() * z2;
(standard_normal_cdf(x1), standard_normal_cdf(x2))
}
fn sample_clayton(&mut self) -> (f64, f64) {
let theta = self.config.theta;
let u: f64 = self.rng.random();
let t: f64 = self.rng.random();
let v = (u.powf(-theta) * (t.powf(-theta / (theta + 1.0)) - 1.0) + 1.0).powf(-1.0 / theta);
(u, v.clamp(0.0, 1.0))
}
fn sample_gumbel(&mut self) -> (f64, f64) {
let theta = self.config.theta;
let u: f64 = self.rng.random();
let t: f64 = self.rng.random();
let s = sample_positive_stable(&mut self.rng, 1.0 / theta);
let e1 = sample_exponential(&mut self.rng, 1.0);
let e2 = sample_exponential(&mut self.rng, 1.0);
let v1 = (-e1 / s).exp().powf(1.0 / theta);
let v2 = (-e2 / s).exp().powf(1.0 / theta);
let c_u = v1 / (v1 + v2);
let c_v = v2 / (v1 + v2);
let u_out = (-((-u.ln()).powf(theta) + (-c_u.ln()).powf(theta)).powf(1.0 / theta)).exp();
let v_out = (-((-t.ln()).powf(theta) + (-c_v.ln()).powf(theta)).powf(1.0 / theta)).exp();
(u_out.clamp(0.0001, 0.9999), v_out.clamp(0.0001, 0.9999))
}
fn sample_frank(&mut self) -> (f64, f64) {
let theta = self.config.theta;
let u: f64 = self.rng.random();
let t: f64 = self.rng.random();
let v = -((1.0 - t)
/ (t * (-theta).exp() + (1.0 - t) * (1.0 - u * (1.0 - (-theta).exp())).recip()))
.ln()
/ theta;
(u, v.clamp(0.0, 1.0))
}
fn sample_student_t(&mut self) -> (f64, f64) {
let rho = self.config.theta;
let nu = self.config.degrees_of_freedom;
let chi2 = sample_chi_squared(&mut self.rng, nu);
let scale = (nu / chi2).sqrt();
let z1 = self.sample_standard_normal();
let z2 = self.sample_standard_normal();
let x1 = z1 * scale;
let x2 = (rho * z1 + (1.0 - rho.powi(2)).sqrt() * z2) * scale;
(student_t_cdf(x1, nu), student_t_cdf(x2, nu))
}
fn sample_standard_normal(&mut self) -> f64 {
let u1: f64 = self.rng.random();
let u2: f64 = self.rng.random();
(-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
}
pub fn sample_n(&mut self, n: usize) -> Vec<(f64, f64)> {
(0..n).map(|_| self.sample()).collect()
}
pub fn reset(&mut self, seed: u64) {
self.rng = ChaCha8Rng::seed_from_u64(seed);
}
pub fn config(&self) -> &CopulaConfig {
&self.config
}
}
pub fn cholesky_decompose(matrix: &[Vec<f64>]) -> Option<Vec<Vec<f64>>> {
let n = matrix.len();
let mut l = vec![vec![0.0; n]; n];
for i in 0..n {
for j in 0..=i {
let sum: f64 = (0..j).map(|k| l[i][k] * l[j][k]).sum();
if i == j {
let diag = matrix[i][i] - sum;
if diag <= 0.0 {
l[i][j] = (diag + 0.001).sqrt();
} else {
l[i][j] = diag.sqrt();
}
} else {
if l[j][j].abs() < 1e-10 {
return None;
}
l[i][j] = (matrix[i][j] - sum) / l[j][j];
}
}
}
Some(l)
}
pub fn standard_normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
}
pub fn standard_normal_quantile(p: f64) -> f64 {
if p <= 0.0 {
return f64::NEG_INFINITY;
}
if p >= 1.0 {
return f64::INFINITY;
}
let p_low = 0.02425;
let p_high = 1.0 - p_low;
if p < p_low {
let q = (-2.0 * p.ln()).sqrt();
let c = [2.515517, 0.802853, 0.010328];
let d = [1.432788, 0.189269, 0.001308];
-(c[0] + c[1] * q + c[2] * q.powi(2))
/ (1.0 + d[0] * q + d[1] * q.powi(2) + d[2] * q.powi(3))
+ q
} else if p <= p_high {
let q = p - 0.5;
let r = q * q;
let a = [
2.50662823884,
-18.61500062529,
41.39119773534,
-25.44106049637,
];
let b = [
-8.47351093090,
23.08336743743,
-21.06224101826,
3.13082909833,
];
q * (a[0] + a[1] * r + a[2] * r.powi(2) + a[3] * r.powi(3))
/ (1.0 + b[0] * r + b[1] * r.powi(2) + b[2] * r.powi(3) + b[3] * r.powi(4))
} else {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
let c = [2.515517, 0.802853, 0.010328];
let d = [1.432788, 0.189269, 0.001308];
(c[0] + c[1] * q + c[2] * q.powi(2))
/ (1.0 + d[0] * q + d[1] * q.powi(2) + d[2] * q.powi(3))
- q
}
}
fn erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
fn student_t_cdf(x: f64, df: f64) -> f64 {
if df > 30.0 {
return standard_normal_cdf(x);
}
let t2 = x * x;
let prob = 0.5 * incomplete_beta(df / 2.0, 0.5, df / (df + t2));
if x > 0.0 {
1.0 - prob
} else {
prob
}
}
fn incomplete_beta(a: f64, b: f64, x: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return 1.0;
}
let lbeta = ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b);
let front = (x.powf(a) * (1.0 - x).powf(b)) / lbeta.exp();
let mut c: f64 = 1.0;
let mut d: f64 = 1.0 / (1.0 - (a + b) * x / (a + 1.0)).max(1e-30);
let mut h = d;
for m in 1..100 {
let m = m as f64;
let d1 = m * (b - m) * x / ((a + 2.0 * m - 1.0) * (a + 2.0 * m));
let d2 = -(a + m) * (a + b + m) * x / ((a + 2.0 * m) * (a + 2.0 * m + 1.0));
d = 1.0 / (1.0 + d1 * d).max(1e-30);
c = 1.0 + d1 / c.max(1e-30);
h *= c * d;
d = 1.0 / (1.0 + d2 * d).max(1e-30);
c = 1.0 + d2 / c.max(1e-30);
h *= c * d;
if ((c * d) - 1.0).abs() < 1e-8 {
break;
}
}
front * h / a
}
fn ln_gamma(x: f64) -> f64 {
if x <= 0.0 {
return f64::INFINITY;
}
0.5 * (2.0 * std::f64::consts::PI / x).ln() + x * ((x + 1.0 / (12.0 * x)).ln() - 1.0)
}
fn debye_1(x: f64) -> f64 {
if x.abs() < 0.01 {
return 1.0 - x / 4.0 + x.powi(2) / 36.0;
}
let n = 100;
let h = x / n as f64;
let mut sum = 0.0;
for i in 1..n {
let t = i as f64 * h;
sum += t / (t.exp() - 1.0);
}
(sum + 0.5 * (h / (h.exp() - 1.0) + x / (x.exp() - 1.0))) * h / x
}
fn sample_exponential(rng: &mut ChaCha8Rng, lambda: f64) -> f64 {
let u: f64 = rng.random();
-u.ln() / lambda
}
fn sample_chi_squared(rng: &mut ChaCha8Rng, df: f64) -> f64 {
let n = df.floor() as usize;
let mut sum = 0.0;
for _ in 0..n {
let u1: f64 = rng.random();
let u2: f64 = rng.random();
let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
sum += z * z;
}
sum
}
fn sample_positive_stable(rng: &mut ChaCha8Rng, alpha: f64) -> f64 {
if (alpha - 1.0).abs() < 0.001 {
return 1.0;
}
let u: f64 = rng.random::<f64>() * std::f64::consts::PI - std::f64::consts::PI / 2.0;
let e = sample_exponential(rng, 1.0);
let b = (std::f64::consts::PI * alpha / 2.0).tan();
let s = (1.0 + b * b).powf(1.0 / (2.0 * alpha));
let term1 = (alpha * u).sin();
let term2 = (u.cos()).powf(1.0 / alpha);
let term3 = ((1.0 - alpha) * u).cos() / e;
s * term1 / term2 * term3.powf((1.0 - alpha) / alpha)
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_copula_validation() {
let gaussian = CopulaConfig::gaussian(0.5);
assert!(gaussian.validate().is_ok());
let invalid_gaussian = CopulaConfig {
copula_type: CopulaType::Gaussian,
theta: 1.5, degrees_of_freedom: 4.0,
};
assert!(invalid_gaussian.validate().is_err());
let clayton = CopulaConfig::clayton(2.0);
assert!(clayton.validate().is_ok());
let invalid_clayton = CopulaConfig {
copula_type: CopulaType::Clayton,
theta: -1.0, degrees_of_freedom: 4.0,
};
assert!(invalid_clayton.validate().is_err());
let gumbel = CopulaConfig::gumbel(2.0);
assert!(gumbel.validate().is_ok());
let invalid_gumbel = CopulaConfig {
copula_type: CopulaType::Gumbel,
theta: 0.5, degrees_of_freedom: 4.0,
};
assert!(invalid_gumbel.validate().is_err());
}
#[test]
fn test_gaussian_copula_sampling() {
let config = CopulaConfig::gaussian(0.7);
let mut sampler = BivariateCopulaSampler::new(42, config).unwrap();
let samples = sampler.sample_n(1000);
assert_eq!(samples.len(), 1000);
assert!(samples
.iter()
.all(|(u, v)| *u >= 0.0 && *u <= 1.0 && *v >= 0.0 && *v <= 1.0));
let mean_u: f64 = samples.iter().map(|(u, _)| u).sum::<f64>() / 1000.0;
let mean_v: f64 = samples.iter().map(|(_, v)| v).sum::<f64>() / 1000.0;
let covariance: f64 = samples
.iter()
.map(|(u, v)| (u - mean_u) * (v - mean_v))
.sum::<f64>()
/ 1000.0;
assert!(covariance > 0.0); }
#[test]
fn test_copula_determinism() {
let config = CopulaConfig::gaussian(0.5);
let mut sampler1 = BivariateCopulaSampler::new(42, config.clone()).unwrap();
let mut sampler2 = BivariateCopulaSampler::new(42, config).unwrap();
for _ in 0..100 {
assert_eq!(sampler1.sample(), sampler2.sample());
}
}
#[test]
fn test_kendalls_tau() {
let gaussian = CopulaConfig::gaussian(0.5);
let tau = gaussian.kendalls_tau();
let expected = 2.0 * (0.5_f64).asin() / std::f64::consts::PI;
assert!((tau - expected).abs() < 0.001);
let clayton = CopulaConfig::clayton(2.0);
let tau = clayton.kendalls_tau();
assert!((tau - 0.5).abs() < 0.001);
let gumbel = CopulaConfig::gumbel(2.0);
let tau = gumbel.kendalls_tau();
assert!((tau - 0.5).abs() < 0.001);
}
#[test]
fn test_tail_dependence() {
let gaussian = CopulaConfig::gaussian(0.7);
assert_eq!(gaussian.lower_tail_dependence(), 0.0);
assert_eq!(gaussian.upper_tail_dependence(), 0.0);
let clayton = CopulaConfig::clayton(2.0);
assert!(clayton.lower_tail_dependence() > 0.0);
assert_eq!(clayton.upper_tail_dependence(), 0.0);
let gumbel = CopulaConfig::gumbel(2.0);
assert_eq!(gumbel.lower_tail_dependence(), 0.0);
assert!(gumbel.upper_tail_dependence() > 0.0);
}
#[test]
fn test_cholesky_decomposition() {
let matrix = vec![vec![1.0, 0.5], vec![0.5, 1.0]];
let l = cholesky_decompose(&matrix).unwrap();
let reconstructed_00 = l[0][0] * l[0][0];
let reconstructed_01 = l[0][0] * l[1][0];
let reconstructed_11 = l[1][0] * l[1][0] + l[1][1] * l[1][1];
assert!((reconstructed_00 - 1.0).abs() < 0.001);
assert!((reconstructed_01 - 0.5).abs() < 0.001);
assert!((reconstructed_11 - 1.0).abs() < 0.001);
}
#[test]
fn test_standard_normal_cdf() {
assert!((standard_normal_cdf(0.0) - 0.5).abs() < 0.001);
assert!(standard_normal_cdf(-3.0) < 0.01);
assert!(standard_normal_cdf(3.0) > 0.99);
}
#[test]
fn test_clayton_copula() {
let config = CopulaConfig::clayton(2.0);
let mut sampler = BivariateCopulaSampler::new(42, config).unwrap();
let samples = sampler.sample_n(1000);
assert!(samples
.iter()
.all(|(u, v)| *u >= 0.0 && *u <= 1.0 && *v >= 0.0 && *v <= 1.0));
}
#[test]
fn test_frank_copula() {
let config = CopulaConfig::frank(5.0);
let mut sampler = BivariateCopulaSampler::new(42, config).unwrap();
let samples = sampler.sample_n(1000);
assert!(samples
.iter()
.all(|(u, v)| *u >= 0.0 && *u <= 1.0 && *v >= 0.0 && *v <= 1.0));
}
}