use rust_decimal::Decimal;
use thiserror::Error;
pub const MIN_OBSERVATIONS: usize = 30;
#[derive(Debug, Error)]
pub enum CointegrationMathError {
#[error("insufficient data: need at least {minimum} observations, got {actual}")]
InsufficientData { actual: usize, minimum: usize },
#[error("series length mismatch: {len_a} vs {len_b}")]
LengthMismatch { len_a: usize, len_b: usize },
#[error("degenerate data: {0}")]
DegenerateData(String),
}
#[derive(Debug, Clone)]
pub struct EngleGrangerResult {
pub adf_statistic: f64,
pub p_value: f64,
pub beta: f64,
pub alpha: f64,
pub half_life: Option<f64>,
pub correlation: f64,
pub spread_mean: f64,
pub spread_std: f64,
}
pub fn engle_granger(
prices_a: &[f64],
prices_b: &[f64],
) -> Result<EngleGrangerResult, CointegrationMathError> {
let n = prices_a.len();
if n != prices_b.len() {
return Err(CointegrationMathError::LengthMismatch {
len_a: n,
len_b: prices_b.len(),
});
}
if n < MIN_OBSERVATIONS {
return Err(CointegrationMathError::InsufficientData {
actual: n,
minimum: MIN_OBSERVATIONS,
});
}
let (alpha, beta) = ols_regression(prices_a, prices_b)?;
let residuals: Vec<f64> = prices_b
.iter()
.zip(prices_a.iter())
.map(|(y, x)| y - alpha - beta * x)
.collect();
let spread_mean = mean(&residuals);
let spread_std = std_dev(&residuals, spread_mean);
if spread_std < 1e-15 {
return Err(CointegrationMathError::DegenerateData(
"spread has zero variance".to_string(),
));
}
let adf_statistic = adf_test_statistic(&residuals)?;
let p_value = adf_p_value(adf_statistic, n);
let half_life = ornstein_uhlenbeck_half_life(&residuals);
let correlation = pearson_correlation(prices_a, prices_b)?;
Ok(EngleGrangerResult {
adf_statistic,
p_value,
beta,
alpha,
half_life,
correlation,
spread_mean,
spread_std,
})
}
pub fn spread_stats_to_decimal(mean: f64, std: f64) -> (Decimal, Decimal) {
let ratio_mean = Decimal::from_f64_retain(mean).unwrap_or(Decimal::ZERO);
let ratio_std = Decimal::from_f64_retain(std).unwrap_or(Decimal::ZERO);
(ratio_mean, ratio_std)
}
fn ols_regression(x: &[f64], y: &[f64]) -> Result<(f64, f64), CointegrationMathError> {
let n = x.len() as f64;
let mean_x = x.iter().sum::<f64>() / n;
let mean_y = y.iter().sum::<f64>() / n;
let mut sum_dx_dy: f64 = 0.0;
let mut sum_dx2: f64 = 0.0;
for (xi, yi) in x.iter().zip(y.iter()) {
let dx = xi - mean_x;
let dy = yi - mean_y;
sum_dx_dy += dx * dy;
sum_dx2 += dx * dx;
}
if sum_dx2 < 1e-15 {
return Err(CointegrationMathError::DegenerateData(
"x series has zero variance".to_string(),
));
}
let beta = sum_dx_dy / sum_dx2;
let alpha = mean_y - beta * mean_x;
Ok((alpha, beta))
}
fn adf_test_statistic(residuals: &[f64]) -> Result<f64, CointegrationMathError> {
let n = residuals.len();
if n < 3 {
return Err(CointegrationMathError::InsufficientData {
actual: n,
minimum: 3,
});
}
let mut sum_xy: f64 = 0.0;
let mut sum_x2: f64 = 0.0;
let mut sum_e2: f64 = 0.0;
for t in 1..n {
let x = residuals[t - 1];
let y = residuals[t] - residuals[t - 1];
sum_xy += x * y;
sum_x2 += x * x;
}
if sum_x2 < 1e-15 {
return Err(CointegrationMathError::DegenerateData(
"lagged residuals have zero variance".to_string(),
));
}
let theta = sum_xy / sum_x2;
for t in 1..n {
let x = residuals[t - 1];
let y = residuals[t] - residuals[t - 1];
let e = y - theta * x;
sum_e2 += e * e;
}
let m = (n - 1) as f64; let sigma2 = sum_e2 / (m - 1.0); let se_theta = (sigma2 / sum_x2).sqrt();
if se_theta < 1e-15 {
return Err(CointegrationMathError::DegenerateData(
"standard error of theta is zero".to_string(),
));
}
Ok(theta / se_theta)
}
fn adf_p_value(t_stat: f64, _n: usize) -> f64 {
if t_stat <= -3.90 {
0.01
} else if t_stat <= -3.34 {
0.01 + (0.04) * (t_stat - (-3.90)) / ((-3.34) - (-3.90))
} else if t_stat <= -3.04 {
0.05 + (0.05) * (t_stat - (-3.34)) / ((-3.04) - (-3.34))
} else if t_stat <= -2.50 {
0.10 + (0.15) * (t_stat - (-3.04)) / ((-2.50) - (-3.04))
} else {
0.50_f64
.min(0.25 + (0.25) * (t_stat - (-2.50)) / ((-1.50) - (-2.50)))
.max(0.25)
}
}
fn ornstein_uhlenbeck_half_life(residuals: &[f64]) -> Option<f64> {
let n = residuals.len();
if n < 3 {
return None;
}
let mut sum_xy: f64 = 0.0;
let mut sum_x2: f64 = 0.0;
for t in 1..n {
sum_xy += residuals[t] * residuals[t - 1];
sum_x2 += residuals[t - 1] * residuals[t - 1];
}
if sum_x2 < 1e-15 {
return None;
}
let phi = sum_xy / sum_x2;
if phi >= 1.0 || phi <= 0.0 {
return None; }
let half_life = -f64::ln(2.0) / f64::ln(phi);
if half_life.is_finite() && half_life > 0.0 {
Some(half_life)
} else {
None
}
}
pub fn pearson_correlation(x: &[f64], y: &[f64]) -> Result<f64, CointegrationMathError> {
let n = x.len() as f64;
let mean_x = mean(x);
let mean_y = mean(y);
let mut sum_xy: f64 = 0.0;
let mut sum_x2: f64 = 0.0;
let mut sum_y2: f64 = 0.0;
for (xi, yi) in x.iter().zip(y.iter()) {
let dx = xi - mean_x;
let dy = yi - mean_y;
sum_xy += dx * dy;
sum_x2 += dx * dx;
sum_y2 += dy * dy;
}
let denom = (sum_x2 * sum_y2).sqrt();
if denom < 1e-15 * n {
return Ok(0.0);
}
Ok(sum_xy / denom)
}
fn mean(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
data.iter().sum::<f64>() / data.len() as f64
}
fn std_dev(data: &[f64], m: f64) -> f64 {
if data.len() < 2 {
return 0.0;
}
let variance = data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() - 1) as f64;
variance.sqrt()
}
#[cfg(test)]
mod tests;