use greeners::{CovarianceType, GreenersError, OLS};
use ndarray::{s, Array1, Array2};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct AssetDiagnostics {
pub asset: String,
pub n_obs: usize,
pub durbin_watson: f64,
pub lb_stat: f64,
pub lb_p_value: f64,
pub bp_stat: f64,
pub bp_p_value: f64,
pub white_stat: f64,
pub white_p_value: f64,
pub reset_f: f64,
pub reset_p_value: f64,
pub chow_f: f64,
pub chow_p_value: f64,
pub arch_stat: f64,
pub arch_p_value: f64,
pub jb_stat: f64,
pub jb_p_value: f64,
}
impl AssetDiagnostics {
pub fn has_positive_autocorr(&self) -> bool {
self.durbin_watson < 1.5
}
pub fn has_negative_autocorr(&self) -> bool {
self.durbin_watson > 2.5
}
pub fn has_heteroscedasticity(&self) -> bool {
self.bp_p_value < 0.05
}
pub fn white_rejects(&self) -> bool {
self.white_p_value < 0.05
}
pub fn has_misspecification(&self) -> bool {
self.reset_p_value < 0.05
}
pub fn has_structural_break(&self) -> bool {
self.chow_p_value < 0.05
}
pub fn has_arch_effects(&self) -> bool {
self.arch_p_value < 0.05
}
pub fn non_normal_residuals(&self) -> bool {
self.jb_p_value < 0.05
}
pub fn count_issues(&self) -> usize {
let mut count = 0;
if self.has_positive_autocorr() || self.has_negative_autocorr() {
count += 1;
}
if self.has_heteroscedasticity() {
count += 1;
}
if self.has_misspecification() {
count += 1;
}
if self.has_structural_break() {
count += 1;
}
if self.has_arch_effects() {
count += 1;
}
if self.non_normal_residuals() {
count += 1;
}
count
}
}
#[derive(Debug, Clone)]
pub struct ResidualDiagnostics {
pub diagnostics: HashMap<String, AssetDiagnostics>,
}
impl ResidualDiagnostics {
pub fn fit(
returns_excess: &Array2<f64>,
factors: &Array2<f64>,
cov_type: CovarianceType,
asset_names: Option<Vec<String>>,
) -> Result<Self, GreenersError> {
let (t, n) = returns_excess.dim();
let (t_factors, k) = factors.dim();
if t != t_factors {
return Err(GreenersError::ShapeMismatch(format!(
"Returns has {} observations but factors has {}",
t, t_factors
)));
}
let asset_names = asset_names.unwrap_or_else(|| {
(0..n)
.map(|i| format!("Asset{}", i + 1))
.collect::<Vec<String>>()
});
if asset_names.len() != n {
return Err(GreenersError::ShapeMismatch(format!(
"Expected {} asset names but got {}",
n,
asset_names.len()
)));
}
let mut x_with_intercept = Array2::zeros((t, k + 1));
x_with_intercept.column_mut(0).fill(1.0);
for i in 0..k {
x_with_intercept
.column_mut(i + 1)
.assign(&factors.column(i));
}
let mut diagnostics = HashMap::new();
for (j, asset_name) in asset_names.iter().enumerate() {
let y = returns_excess.column(j).to_owned();
if t < k + 10 {
continue;
}
match OLS::fit(&y, &x_with_intercept, cov_type.clone()) {
Ok(model) => {
let resid = model.residuals(&y, &x_with_intercept);
let dw = durbin_watson(&resid);
let (lb_stat, lb_p) = ljung_box(&resid, 12);
let (bp_stat, bp_p) = breusch_pagan(&resid, &x_with_intercept);
let (white_stat, white_p) = white_test(&resid, &x_with_intercept);
let (reset_f, reset_p) = reset_test(&y, &x_with_intercept, &resid);
let split_idx = t / 2;
let (chow_f, chow_p) = chow_test(&y, &x_with_intercept, split_idx);
let (arch_stat, arch_p) = arch_test(&resid, 12);
let (jb_stat, jb_p) = jarque_bera(&resid);
diagnostics.insert(
asset_name.clone(),
AssetDiagnostics {
asset: asset_name.clone(),
n_obs: t,
durbin_watson: dw,
lb_stat,
lb_p_value: lb_p,
bp_stat,
bp_p_value: bp_p,
white_stat,
white_p_value: white_p,
reset_f,
reset_p_value: reset_p,
chow_f,
chow_p_value: chow_p,
arch_stat,
arch_p_value: arch_p,
jb_stat,
jb_p_value: jb_p,
},
);
}
Err(_) => continue,
}
}
Ok(ResidualDiagnostics { diagnostics })
}
pub fn get(&self, asset: &str) -> Option<&AssetDiagnostics> {
self.diagnostics.get(asset)
}
pub fn assets_with_issues(&self) -> Vec<String> {
self.diagnostics
.values()
.filter(|d| d.count_issues() > 0)
.map(|d| d.asset.clone())
.collect()
}
pub fn to_csv_string(&self) -> String {
let mut csv = String::new();
csv.push_str("Asset,N_Obs,DW,LB_Stat,LB_P,BP_Stat,BP_P,White_Stat,White_P,RESET_F,RESET_P,Chow_F,Chow_P,ARCH_Stat,ARCH_P,JB_Stat,JB_P,Issues\n");
let mut assets: Vec<_> = self.diagnostics.keys().collect();
assets.sort();
for asset in assets {
if let Some(diag) = self.diagnostics.get(asset) {
csv.push_str(&format!(
"{},{},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{:.4},{}\n",
diag.asset,
diag.n_obs,
diag.durbin_watson,
diag.lb_stat,
diag.lb_p_value,
diag.bp_stat,
diag.bp_p_value,
diag.white_stat,
diag.white_p_value,
diag.reset_f,
diag.reset_p_value,
diag.chow_f,
diag.chow_p_value,
diag.arch_stat,
diag.arch_p_value,
diag.jb_stat,
diag.jb_p_value,
diag.count_issues()
));
}
}
csv
}
}
fn durbin_watson(residuals: &Array1<f64>) -> f64 {
let n = residuals.len();
if n < 2 {
return f64::NAN;
}
let mut numerator = 0.0;
let mut denominator = 0.0;
for i in 1..n {
let diff = residuals[i] - residuals[i - 1];
numerator += diff * diff;
}
for i in 0..n {
denominator += residuals[i] * residuals[i];
}
if denominator == 0.0 {
return f64::NAN;
}
numerator / denominator
}
fn ljung_box(residuals: &Array1<f64>, max_lag: usize) -> (f64, f64) {
let n = residuals.len();
if n < max_lag + 2 {
return (f64::NAN, f64::NAN);
}
let mean = residuals.mean().unwrap_or(0.0);
let var = residuals.mapv(|x| (x - mean).powi(2)).sum() / n as f64;
if var == 0.0 {
return (f64::NAN, f64::NAN);
}
let mut q_stat = 0.0;
for lag in 1..=max_lag {
let mut acf = 0.0;
for i in lag..n {
acf += (residuals[i] - mean) * (residuals[i - lag] - mean);
}
acf /= n as f64 * var;
q_stat += acf * acf / (n - lag) as f64;
}
q_stat *= n as f64 * (n + 2) as f64;
let p_value = chi_squared_cdf_complement(q_stat, max_lag);
(q_stat, p_value)
}
fn breusch_pagan(residuals: &Array1<f64>, x: &Array2<f64>) -> (f64, f64) {
let n = residuals.len();
let k = x.ncols();
if n < k + 5 {
return (f64::NAN, f64::NAN);
}
let resid_sq = residuals.mapv(|r| r * r);
let sigma_sq = resid_sq.sum() / n as f64;
if sigma_sq == 0.0 {
return (f64::NAN, f64::NAN);
}
let y_bp = resid_sq.mapv(|r| r / sigma_sq);
match OLS::fit(&y_bp, x, CovarianceType::NonRobust) {
Ok(model) => {
let bp_stat = n as f64 * model.r_squared;
let df = k - 1; let p_value = chi_squared_cdf_complement(bp_stat, df);
(bp_stat, p_value)
}
Err(_) => (f64::NAN, f64::NAN),
}
}
fn white_test(residuals: &Array1<f64>, x: &Array2<f64>) -> (f64, f64) {
let n = residuals.len();
let k = x.ncols();
if n < k * 2 + 5 {
return (f64::NAN, f64::NAN);
}
let resid_sq = residuals.mapv(|r| r * r);
let mut x_white_cols = Vec::new();
for j in 0..k {
x_white_cols.push(x.column(j).to_owned());
}
for j in 1..k {
let col = x.column(j);
x_white_cols.push(col.mapv(|v| v * v));
}
for j in 1..k {
for l in (j + 1)..k {
let col_j = x.column(j);
let col_l = x.column(l);
let cross: Array1<f64> = col_j.iter().zip(col_l.iter()).map(|(a, b)| a * b).collect();
x_white_cols.push(cross);
}
}
let k_white = x_white_cols.len();
let mut x_white = Array2::zeros((n, k_white));
for (j, col) in x_white_cols.iter().enumerate() {
x_white.column_mut(j).assign(col);
}
match OLS::fit(&resid_sq, &x_white, CovarianceType::NonRobust) {
Ok(model) => {
let white_stat = n as f64 * model.r_squared;
let df = k_white - 1; let p_value = chi_squared_cdf_complement(white_stat, df);
(white_stat, p_value)
}
Err(_) => (f64::NAN, f64::NAN),
}
}
fn reset_test(y: &Array1<f64>, x: &Array2<f64>, residuals: &Array1<f64>) -> (f64, f64) {
let n = y.len();
let k = x.ncols();
if n < k + 5 {
return (f64::NAN, f64::NAN);
}
match OLS::fit(y, x, CovarianceType::NonRobust) {
Ok(model) => {
let y_hat = x.dot(&model.params);
let y_hat_sq = y_hat.mapv(|v| v * v);
let mut x_reset = Array2::zeros((n, k + 1));
for j in 0..k {
x_reset.column_mut(j).assign(&x.column(j));
}
x_reset.column_mut(k).assign(&y_hat_sq);
match OLS::fit(y, &x_reset, CovarianceType::NonRobust) {
Ok(model_aug) => {
let rss_restricted = residuals.mapv(|r| r * r).sum();
let resid_aug = model_aug.residuals(y, &x_reset);
let rss_unrestricted = resid_aug.mapv(|r| r * r).sum();
let df1 = 1; let df2 = n - k - 1;
let numerator = (rss_restricted - rss_unrestricted) / df1 as f64;
let denominator = rss_unrestricted / df2 as f64;
if denominator <= 0.0 {
return (f64::NAN, f64::NAN);
}
let f_stat = numerator / denominator;
let p_value = f_cdf_complement(f_stat, df1, df2);
(f_stat, p_value)
}
Err(_) => (f64::NAN, f64::NAN),
}
}
Err(_) => (f64::NAN, f64::NAN),
}
}
fn chow_test(y: &Array1<f64>, x: &Array2<f64>, split_idx: usize) -> (f64, f64) {
let n = y.len();
let k = x.ncols();
if split_idx < k + 2 || n - split_idx < k + 2 {
return (f64::NAN, f64::NAN);
}
let rss_full = match OLS::fit(y, x, CovarianceType::NonRobust) {
Ok(model) => {
let resid = model.residuals(y, x);
resid.mapv(|r| r * r).sum()
}
Err(_) => return (f64::NAN, f64::NAN),
};
let y1 = y.slice(s![..split_idx]).to_owned();
let x1 = x.slice(s![..split_idx, ..]).to_owned();
let rss1 = match OLS::fit(&y1, &x1, CovarianceType::NonRobust) {
Ok(model) => {
let resid = model.residuals(&y1, &x1);
resid.mapv(|r| r * r).sum()
}
Err(_) => return (f64::NAN, f64::NAN),
};
let y2 = y.slice(s![split_idx..]).to_owned();
let x2 = x.slice(s![split_idx.., ..]).to_owned();
let rss2 = match OLS::fit(&y2, &x2, CovarianceType::NonRobust) {
Ok(model) => {
let resid = model.residuals(&y2, &x2);
resid.mapv(|r| r * r).sum()
}
Err(_) => return (f64::NAN, f64::NAN),
};
let rss_split = rss1 + rss2;
let numerator = (rss_full - rss_split) / k as f64;
let denominator = rss_split / (n - 2 * k) as f64;
if denominator <= 0.0 {
return (f64::NAN, f64::NAN);
}
let f_stat = numerator / denominator;
let df1 = k;
let df2 = n - 2 * k;
let p_value = f_cdf_complement(f_stat, df1, df2);
(f_stat, p_value)
}
fn arch_test(residuals: &Array1<f64>, lags: usize) -> (f64, f64) {
let n = residuals.len();
if n < lags + 10 {
return (f64::NAN, f64::NAN);
}
let resid_sq = residuals.mapv(|r| r * r);
let n_eff = n - lags;
let mut x_arch = Array2::zeros((n_eff, lags + 1));
x_arch.column_mut(0).fill(1.0);
for lag in 1..=lags {
for i in 0..n_eff {
x_arch[[i, lag]] = resid_sq[i + lags - lag];
}
}
let y_arch = resid_sq.slice(s![lags..]).to_owned();
match OLS::fit(&y_arch, &x_arch, CovarianceType::NonRobust) {
Ok(model) => {
let arch_stat = n_eff as f64 * model.r_squared;
let p_value = chi_squared_cdf_complement(arch_stat, lags);
(arch_stat, p_value)
}
Err(_) => (f64::NAN, f64::NAN),
}
}
fn jarque_bera(residuals: &Array1<f64>) -> (f64, f64) {
let n = residuals.len();
if n < 4 {
return (f64::NAN, f64::NAN);
}
let mean = residuals.mean().unwrap_or(0.0);
let std = residuals.std(0.0);
if std == 0.0 {
return (f64::NAN, f64::NAN);
}
let std_resid = residuals.mapv(|r| (r - mean) / std);
let skew = std_resid.mapv(|r| r.powi(3)).sum() / n as f64;
let kurt = std_resid.mapv(|r| r.powi(4)).sum() / n as f64;
let jb = (n as f64 / 6.0) * (skew.powi(2) + (kurt - 3.0).powi(2) / 4.0);
let p_value = chi_squared_cdf_complement(jb, 2);
(jb, p_value)
}
fn chi_squared_cdf_complement(x: f64, df: usize) -> f64 {
if x <= 0.0 {
return 1.0;
}
let k = df as f64 / 2.0;
let x_half = x / 2.0;
if df > 100 {
let mean = df as f64;
let variance = 2.0 * df as f64;
let z = (x - mean) / variance.sqrt();
return normal_cdf_complement(z);
}
gamma_p_complement(k, x_half)
}
fn f_cdf_complement(x: f64, df1: usize, df2: usize) -> f64 {
if x <= 0.0 {
return 1.0;
}
let d1 = df1 as f64;
let d2 = df2 as f64;
let y = d1 * x / (d1 * x + d2);
incomplete_beta_complement(d2 / 2.0, d1 / 2.0, y)
}
fn normal_cdf_complement(z: f64) -> f64 {
0.5 * erfc(z / std::f64::consts::SQRT_2)
}
fn erfc(x: f64) -> f64 {
if x >= 0.0 {
let t = 1.0 / (1.0 + 0.5 * x);
t * (-x * x - 1.26551223
+ t * (1.00002368
+ t * (0.37409196
+ t * (0.09678418
+ t * (-0.18628806
+ t * (0.27886807
+ t * (-1.13520398
+ t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))))))))
.exp()
} else {
2.0 - erfc(-x)
}
}
fn gamma_p_complement(a: f64, x: f64) -> f64 {
if x < 0.0 || a <= 0.0 {
return 1.0;
}
if x < a + 1.0 {
1.0 - gamma_p_series(a, x)
} else {
gamma_q_continued_fraction(a, x)
}
}
fn gamma_p_series(a: f64, x: f64) -> f64 {
let mut sum = 1.0 / a;
let mut term = 1.0 / a;
for n in 1..100 {
term *= x / (a + n as f64);
sum += term;
if term.abs() < sum.abs() * 1e-10 {
break;
}
}
sum * (-x + a * x.ln() - ln_gamma(a)).exp()
}
fn gamma_q_continued_fraction(a: f64, x: f64) -> f64 {
let mut b = x + 1.0 - a;
let mut c = 1.0 / 1e-30;
let mut d = 1.0 / b;
let mut h = d;
for i in 1..100 {
let an = -i as f64 * (i as f64 - a);
b += 2.0;
d = an * d + b;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = b + an / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
let delta = d * c;
h *= delta;
if (delta - 1.0).abs() < 1e-10 {
break;
}
}
(-x + a * x.ln() - ln_gamma(a)).exp() * h
}
fn ln_gamma(x: f64) -> f64 {
if x > 10.0 {
return (x - 0.5) * x.ln() - x + 0.5 * (2.0 * std::f64::consts::PI).ln() + 1.0 / (12.0 * x)
- 1.0 / (360.0 * x.powi(3));
}
let coefficients = [
76.18009172947146,
-86.50532032941677,
24.01409824083091,
-1.231739572450155,
0.1208650973866179e-2,
-0.5395239384953e-5,
];
let mut y = x;
let mut tmp = x + 5.5;
tmp -= (x + 0.5) * tmp.ln();
let mut ser = 1.000000000190015;
for &coef in coefficients.iter() {
y += 1.0;
ser += coef / y;
}
-tmp + (2.5066282746310005 * ser / x).ln()
}
fn incomplete_beta_complement(a: f64, b: f64, x: f64) -> f64 {
if x <= 0.0 {
return 1.0;
}
if x >= 1.0 {
return 0.0;
}
1.0 - incomplete_beta(a, b, x)
}
fn incomplete_beta(a: f64, b: f64, x: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return 1.0;
}
let ln_beta_ab = ln_beta(a, b);
let front = (a * x.ln() + b * (1.0 - x).ln() - ln_beta_ab).exp();
let cf = beta_continued_fraction(a, b, x);
front * cf / a
}
fn ln_beta(a: f64, b: f64) -> f64 {
ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b)
}
fn beta_continued_fraction(a: f64, b: f64, x: f64) -> f64 {
let max_iter = 100;
let epsilon = 1e-10;
let qab = a + b;
let qap = a + 1.0;
let qam = a - 1.0;
let mut c = 1.0;
let mut d = 1.0 - qab * x / qap;
if d.abs() < 1e-30 {
d = 1e-30;
}
d = 1.0 / d;
let mut h = d;
for m in 1..max_iter {
let m_f = m as f64;
let m2 = 2.0 * m_f;
let aa = m_f * (b - m_f) * x / ((qam + m2) * (a + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
h *= d * c;
let aa = -(a + m_f) * (qab + m_f) * x / ((a + m2) * (qap + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
let delta = d * c;
h *= delta;
if (delta - 1.0).abs() < epsilon {
break;
}
}
h
}
#[cfg(test)]
mod tests {
use super::*;
use greeners::CovarianceType;
use ndarray::Array2;
#[test]
fn test_residual_diagnostics_basic() {
let t = 120;
let n = 5;
let k = 2;
let mut rng = 12345u64;
let mut rand = || {
rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
((rng / 65536) % 32768) as f64 / 32768.0 - 0.5
};
let factors = Array2::from_shape_fn((t, k), |_| rand() * 0.02);
let returns = Array2::from_shape_fn((t, n), |_| rand() * 0.03);
let result =
ResidualDiagnostics::fit(&returns, &factors, CovarianceType::HC3, None).unwrap();
assert_eq!(result.diagnostics.len(), n);
for diag in result.diagnostics.values() {
assert!((0.0..=4.0).contains(&diag.durbin_watson));
assert!((0.0..=1.0).contains(&diag.lb_p_value));
assert!((0.0..=1.0).contains(&diag.jb_p_value));
}
}
#[test]
fn test_csv_export() {
let t = 100;
let n = 3;
let k = 2;
let mut rng = 42u64;
let mut rand = || {
rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
((rng / 65536) % 32768) as f64 / 32768.0 - 0.5
};
let factors = Array2::from_shape_fn((t, k), |_| rand() * 0.02);
let returns = Array2::from_shape_fn((t, n), |_| rand() * 0.03);
let result =
ResidualDiagnostics::fit(&returns, &factors, CovarianceType::HC3, None).unwrap();
let csv = result.to_csv_string();
assert!(csv.contains("Asset,N_Obs,DW"));
assert!(csv.contains("Asset1"));
}
#[test]
fn test_durbin_watson() {
let resid = Array1::from_vec(vec![0.1, -0.1, 0.05, -0.05, 0.08, -0.08]);
let dw = durbin_watson(&resid);
assert!((0.0..=4.0).contains(&dw));
let resid_pos = Array1::from_vec(vec![0.1, 0.11, 0.12, 0.11, 0.10, 0.09]);
let dw_pos = durbin_watson(&resid_pos);
assert!(dw_pos < 2.0);
let resid_neg = Array1::from_vec(vec![0.1, -0.1, 0.1, -0.1, 0.1, -0.1]);
let dw_neg = durbin_watson(&resid_neg);
assert!(dw_neg > 2.0); }
#[test]
fn test_jarque_bera() {
let resid = Array1::from_vec(vec![
0.1, -0.1, 0.05, -0.05, 0.08, -0.08, 0.02, -0.02, 0.12, -0.12,
]);
let (jb, p) = jarque_bera(&resid);
assert!(jb >= 0.0);
assert!((0.0..=1.0).contains(&p));
}
}