use scirs2_core::ndarray::Array1;
use crate::error::{Result, TimeSeriesError};
pub fn realized_variance(prices: &[f64]) -> Result<f64> {
if prices.len() < 2 {
return Err(TimeSeriesError::InsufficientData {
message: "realized_variance needs at least 2 prices".into(),
required: 2,
actual: prices.len(),
});
}
if prices.iter().any(|&p| p <= 0.0) {
return Err(TimeSeriesError::InvalidInput(
"realized_variance: all prices must be positive".into(),
));
}
let rv: f64 = prices
.windows(2)
.map(|w| {
let r = (w[1] / w[0]).ln();
r * r
})
.sum();
Ok(rv)
}
pub fn bipower_variation(prices: &[f64]) -> Result<f64> {
if prices.len() < 3 {
return Err(TimeSeriesError::InsufficientData {
message: "bipower_variation needs at least 3 prices".into(),
required: 3,
actual: prices.len(),
});
}
if prices.iter().any(|&p| p <= 0.0) {
return Err(TimeSeriesError::InvalidInput(
"bipower_variation: all prices must be positive".into(),
));
}
let returns: Vec<f64> = prices
.windows(2)
.map(|w| (w[1] / w[0]).ln())
.collect();
let bv: f64 = returns
.windows(2)
.map(|w| w[0].abs() * w[1].abs())
.sum::<f64>()
* (std::f64::consts::PI / 2.0);
Ok(bv)
}
pub fn realized_kernel(prices: &[f64], bandwidth: usize) -> Result<f64> {
if prices.len() < bandwidth + 2 {
return Err(TimeSeriesError::InsufficientData {
message: format!(
"realized_kernel: need at least bandwidth+2 = {} prices",
bandwidth + 2
),
required: bandwidth + 2,
actual: prices.len(),
});
}
if prices.iter().any(|&p| p <= 0.0) {
return Err(TimeSeriesError::InvalidInput(
"realized_kernel: all prices must be positive".into(),
));
}
let returns: Vec<f64> = prices
.windows(2)
.map(|w| (w[1] / w[0]).ln())
.collect();
let m = returns.len();
let gamma_0: f64 = returns.iter().map(|&r| r * r).sum();
let mut rk = gamma_0;
for h in 1..=bandwidth {
let gamma_h: f64 = (h..m).map(|j| returns[j] * returns[j - h]).sum();
let weight = 1.0 - h as f64 / (bandwidth + 1) as f64;
rk += 2.0 * weight * gamma_h;
}
Ok(rk.max(0.0))
}
#[derive(Debug, Clone)]
pub struct ABTJumpTestResult {
pub rv: f64,
pub bv: f64,
pub statistic: f64,
pub p_value: f64,
pub jump_variation: f64,
}
pub fn jump_test_abt(rv: f64, bv: f64, n_obs: usize) -> Result<ABTJumpTestResult> {
if rv <= 0.0 {
return Err(TimeSeriesError::InvalidInput(
"ABT jump test: RV must be positive".into(),
));
}
if bv < 0.0 {
return Err(TimeSeriesError::InvalidInput(
"ABT jump test: BV must be non-negative".into(),
));
}
if n_obs < 2 {
return Err(TimeSeriesError::InvalidInput(
"ABT jump test: n_obs must be >= 2".into(),
));
}
let kappa4 = (std::f64::consts::PI / 2.0).powi(2) + std::f64::consts::PI - 5.0;
let statistic = (rv - bv) / rv * (n_obs as f64 / kappa4).sqrt();
let p_value = 2.0 * standard_normal_sf(statistic.abs());
Ok(ABTJumpTestResult {
rv,
bv,
statistic,
p_value,
jump_variation: (rv - bv).max(0.0),
})
}
#[derive(Debug, Clone)]
pub struct HARModel {
pub c: f64,
pub phi_d: f64,
pub phi_w: f64,
pub phi_m: f64,
pub residual_std: f64,
pub r_squared: f64,
pub n_obs: usize,
}
pub fn har_regressors(rv_series: &[f64]) -> Result<(Vec<f64>, Vec<f64>, Vec<f64>)> {
let n = rv_series.len();
if n < 24 {
return Err(TimeSeriesError::InsufficientData {
message: "HAR regressors: need at least 24 RV observations (22 for monthly lag + 2)".into(),
required: 24,
actual: n,
});
}
let t_start = 22;
let n_reg = n - t_start;
let mut rv_d = Vec::with_capacity(n_reg);
let mut rv_w = Vec::with_capacity(n_reg);
let mut rv_m = Vec::with_capacity(n_reg);
for t in t_start..n {
rv_d.push(rv_series[t - 1]);
rv_w.push(rv_series[t - 5..t].iter().sum::<f64>() / 5.0);
rv_m.push(rv_series[t - 22..t].iter().sum::<f64>() / 22.0);
}
Ok((rv_d, rv_w, rv_m))
}
pub fn fit_har(rv_series: &[f64]) -> Result<HARModel> {
let n = rv_series.len();
if n < 24 {
return Err(TimeSeriesError::InsufficientData {
message: "fit_har: need at least 24 RV observations".into(),
required: 24,
actual: n,
});
}
let t_start = 22;
let n_reg = n - t_start;
let (rv_d, rv_w, rv_m) = har_regressors(rv_series)?;
let y: Vec<f64> = rv_series[t_start..].to_vec();
let k = 4_usize;
let mut xtx = vec![0.0_f64; k * k];
let mut xty = vec![0.0_f64; k];
for i in 0..n_reg {
let row = [1.0_f64, rv_d[i], rv_w[i], rv_m[i]];
for (a, &ra) in row.iter().enumerate() {
xty[a] += ra * y[i];
for (b, &rb) in row.iter().enumerate() {
xtx[a * k + b] += ra * rb;
}
}
}
let beta = solve_4x4(&xtx, &xty)?;
let c = beta[0];
let phi_d = beta[1];
let phi_w = beta[2];
let phi_m = beta[3];
let y_mean = y.iter().sum::<f64>() / n_reg as f64;
let mut ss_res = 0.0_f64;
let mut ss_tot = 0.0_f64;
for i in 0..n_reg {
let y_hat = c + phi_d * rv_d[i] + phi_w * rv_w[i] + phi_m * rv_m[i];
let resid = y[i] - y_hat;
ss_res += resid * resid;
ss_tot += (y[i] - y_mean).powi(2);
}
let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
let residual_std = (ss_res / (n_reg as f64 - 4.0).max(1.0)).sqrt();
Ok(HARModel {
c,
phi_d,
phi_w,
phi_m,
residual_std,
r_squared,
n_obs: n_reg,
})
}
pub fn har_forecast(model: &HARModel, rv_series: &[f64], h: usize) -> Result<Vec<f64>> {
if h == 0 {
return Err(TimeSeriesError::InvalidInput(
"har_forecast: h must be >= 1".into(),
));
}
if rv_series.len() < 22 {
return Err(TimeSeriesError::InsufficientData {
message: "har_forecast: need at least 22 in-sample RV values".into(),
required: 22,
actual: rv_series.len(),
});
}
let mut history: Vec<f64> = rv_series.to_vec();
let mut forecasts = Vec::with_capacity(h);
for _ in 0..h {
let m = history.len();
let rv_d = history[m - 1];
let rv_w = history[m - 5..m].iter().sum::<f64>() / 5.0;
let rv_m = history[m - 22..m].iter().sum::<f64>() / 22.0;
let rv_next = (model.c
+ model.phi_d * rv_d
+ model.phi_w * rv_w
+ model.phi_m * rv_m)
.max(0.0);
forecasts.push(rv_next);
history.push(rv_next);
}
Ok(forecasts)
}
#[derive(Debug, Clone)]
pub struct HARJModel {
pub c: f64,
pub phi_d: f64,
pub phi_w: f64,
pub phi_m: f64,
pub phi_j: f64,
pub r_squared: f64,
pub n_obs: usize,
}
pub fn fit_har_j(rv_series: &[f64], bv_series: &[f64]) -> Result<HARJModel> {
if rv_series.len() != bv_series.len() {
return Err(TimeSeriesError::InvalidInput(
"HAR-J: rv_series and bv_series must have the same length".into(),
));
}
let n = rv_series.len();
if n < 24 {
return Err(TimeSeriesError::InsufficientData {
message: "HAR-J: need at least 24 observations".into(),
required: 24,
actual: n,
});
}
let t_start = 22;
let n_reg = n - t_start;
let (rv_d, rv_w, rv_m) = har_regressors(rv_series)?;
let y: Vec<f64> = rv_series[t_start..].to_vec();
let jump: Vec<f64> = (0..n_reg)
.map(|i| (rv_series[t_start - 1 + i] - bv_series[t_start - 1 + i]).max(0.0))
.collect();
let k = 5_usize;
let mut xtx = vec![0.0_f64; k * k];
let mut xty = vec![0.0_f64; k];
for i in 0..n_reg {
let row = [1.0, rv_d[i], rv_w[i], rv_m[i], jump[i]];
for (a, &ra) in row.iter().enumerate() {
xty[a] += ra * y[i];
for (b, &rb) in row.iter().enumerate() {
xtx[a * k + b] += ra * rb;
}
}
}
let beta = solve_nxn(&xtx, &xty, k)?;
let y_mean = y.iter().sum::<f64>() / n_reg as f64;
let mut ss_res = 0.0_f64;
let mut ss_tot = 0.0_f64;
for i in 0..n_reg {
let y_hat = beta[0]
+ beta[1] * rv_d[i]
+ beta[2] * rv_w[i]
+ beta[3] * rv_m[i]
+ beta[4] * jump[i];
ss_res += (y[i] - y_hat).powi(2);
ss_tot += (y[i] - y_mean).powi(2);
}
let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
Ok(HARJModel {
c: beta[0],
phi_d: beta[1],
phi_w: beta[2],
phi_m: beta[3],
phi_j: beta[4],
r_squared,
n_obs: n_reg,
})
}
pub fn diebold_mariano_test(
e1: &[f64],
e2: &[f64],
squared: bool,
h: usize,
) -> Result<(f64, f64)> {
if e1.len() != e2.len() {
return Err(TimeSeriesError::InvalidInput(
"Diebold-Mariano: e1 and e2 must have the same length".into(),
));
}
let n = e1.len();
if n < 4 {
return Err(TimeSeriesError::InsufficientData {
message: "Diebold-Mariano: need at least 4 observations".into(),
required: 4,
actual: n,
});
}
let loss = |e: f64| if squared { e * e } else { e.abs() };
let d: Vec<f64> = e1.iter().zip(e2.iter()).map(|(&a, &b)| loss(a) - loss(b)).collect();
let d_mean = d.iter().sum::<f64>() / n as f64;
let bandwidth = h.max(1);
let mut lrv = d.iter().map(|&di| (di - d_mean).powi(2)).sum::<f64>() / n as f64;
for lag in 1..=bandwidth {
let gamma_h: f64 = (lag..n)
.map(|t| (d[t] - d_mean) * (d[t - lag] - d_mean))
.sum::<f64>()
/ n as f64;
let weight = 1.0 - lag as f64 / (bandwidth + 1) as f64;
lrv += 2.0 * weight * gamma_h;
}
if lrv <= 0.0 {
lrv = d.iter().map(|&di| (di - d_mean).powi(2)).sum::<f64>() / (n - 1) as f64;
}
let dm_stat = d_mean / (lrv / n as f64).sqrt().max(1e-15);
let p_value = 2.0 * standard_normal_sf(dm_stat.abs());
Ok((dm_stat, p_value))
}
pub(crate) fn standard_normal_sf(z: f64) -> f64 {
0.5 * erfc_approx(z / std::f64::consts::SQRT_2)
}
fn erfc_approx(x: f64) -> f64 {
if x < 0.0 {
return 2.0 - erfc_approx(-x);
}
let t = 1.0 / (1.0 + 0.3275911 * x);
let poly = t * (0.254_829_592
+ t * (-0.284_496_736
+ t * (1.421_413_741 + t * (-1.453_152_027 + t * 1.061_405_429))));
(-x * x).exp() * poly
}
fn solve_4x4(a: &[f64], b: &[f64]) -> Result<Vec<f64>> {
solve_nxn(a, b, 4)
}
fn solve_nxn(a_in: &[f64], b_in: &[f64], n: usize) -> Result<Vec<f64>> {
let mut a = a_in.to_vec();
let mut b = b_in.to_vec();
for col in 0..n {
let pivot_row = (col..n)
.max_by(|&i, &j| a[i * n + col].abs().partial_cmp(&a[j * n + col].abs()).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(col);
if a[pivot_row * n + col].abs() < 1e-14 {
return Err(TimeSeriesError::NumericalError(
"OLS solve: singular matrix (multicollinearity?)".into(),
));
}
if pivot_row != col {
for k in 0..n {
a.swap(col * n + k, pivot_row * n + k);
}
b.swap(col, pivot_row);
}
for row in (col + 1)..n {
let factor = a[row * n + col] / a[col * n + col];
for k in col..n {
let val = a[col * n + k] * factor;
a[row * n + k] -= val;
}
b[row] -= b[col] * factor;
}
}
let mut x = vec![0.0_f64; n];
for i in (0..n).rev() {
let mut sum = b[i];
for j in (i + 1)..n {
sum -= a[i * n + j] * x[j];
}
x[i] = sum / a[i * n + i];
}
Ok(x)
}
#[cfg(test)]
mod tests {
use super::*;
fn make_prices(n: usize) -> Vec<f64> {
let mut prices = vec![100.0_f64];
for i in 1..n {
let ret = 0.001 * ((i as f64 * 1.37 + 0.5).sin());
prices.push(prices[i - 1] * (1.0 + ret));
}
prices
}
fn make_rv_series(n: usize) -> Vec<f64> {
(0..n)
.map(|i| {
let base = 0.0001_f64;
let cluster = if i % 7 < 3 { 3.0 } else { 1.0 };
base * cluster * (1.0 + 0.5 * ((i as f64 * 0.31).sin()).abs())
})
.collect()
}
#[test]
fn test_realized_variance_basic() {
let prices = make_prices(20);
let rv = realized_variance(&prices).expect("Should compute RV");
assert!(rv >= 0.0, "RV must be non-negative: {rv}");
assert!(rv.is_finite());
}
#[test]
fn test_realized_variance_too_few() {
assert!(realized_variance(&[100.0]).is_err());
}
#[test]
fn test_bipower_variation_basic() {
let prices = make_prices(20);
let bv = bipower_variation(&prices).expect("Should compute BV");
assert!(bv >= 0.0, "BV must be non-negative: {bv}");
assert!(bv.is_finite());
}
#[test]
fn test_bipower_variation_too_few() {
assert!(bipower_variation(&[100.0, 101.0]).is_err());
}
#[test]
fn test_rv_ge_zero() {
let prices = make_prices(50);
let rv = realized_variance(&prices).expect("RV");
let bv = bipower_variation(&prices).expect("BV");
assert!(rv >= 0.0);
assert!(bv >= 0.0);
}
#[test]
fn test_realized_kernel_basic() {
let prices = make_prices(50);
let rk = realized_kernel(&prices, 5).expect("Should compute RK");
assert!(rk >= 0.0);
assert!(rk.is_finite());
}
#[test]
fn test_jump_test_abt_basic() {
let prices = make_prices(78);
let rv = realized_variance(&prices).expect("RV");
let bv = bipower_variation(&prices).expect("BV");
let result = jump_test_abt(rv, bv, 78).expect("ABT test should compute");
assert!(result.statistic.is_finite());
assert!((0.0..=1.0).contains(&result.p_value));
assert!(result.jump_variation >= 0.0);
}
#[test]
fn test_jump_test_abt_invalid() {
assert!(jump_test_abt(-0.001, 0.0, 78).is_err());
assert!(jump_test_abt(0.001, 0.0, 1).is_err());
}
#[test]
fn test_har_regressors_shape() {
let rv = make_rv_series(50);
let (rv_d, rv_w, rv_m) = har_regressors(&rv).expect("Should build regressors");
let expected_len = rv.len() - 22;
assert_eq!(rv_d.len(), expected_len);
assert_eq!(rv_w.len(), expected_len);
assert_eq!(rv_m.len(), expected_len);
}
#[test]
fn test_fit_har_basic() {
let rv = make_rv_series(60);
let model = fit_har(&rv).expect("HAR should fit");
assert!(model.c.is_finite());
assert!(model.phi_d.is_finite());
assert!(model.phi_w.is_finite());
assert!(model.phi_m.is_finite());
assert!((0.0..=1.0).contains(&model.r_squared.max(0.0).min(1.0)));
}
#[test]
fn test_har_forecast_basic() {
let rv = make_rv_series(60);
let model = fit_har(&rv).expect("HAR should fit");
let forecasts = har_forecast(&model, &rv, 5).expect("Should forecast");
assert_eq!(forecasts.len(), 5);
for &f in &forecasts {
assert!(f >= 0.0, "Forecasted RV must be non-negative: {f}");
assert!(f.is_finite());
}
}
#[test]
fn test_fit_har_j_basic() {
let rv = make_rv_series(60);
let bv: Vec<f64> = rv.iter().enumerate()
.map(|(i, &v)| v * (0.75 + 0.2 * ((i as f64 * 0.7).sin() * 0.5 + 0.5)))
.collect();
let model = fit_har_j(&rv, &bv).expect("HAR-J should fit");
assert!(model.c.is_finite());
assert!(model.phi_j.is_finite());
}
#[test]
fn test_diebold_mariano_basic() {
let e1: Vec<f64> = vec![0.01, -0.02, 0.03, -0.01, 0.02, -0.015, 0.025, -0.005];
let e2: Vec<f64> = vec![0.015, -0.01, 0.02, -0.015, 0.025, -0.01, 0.015, -0.01];
let (dm, p) = diebold_mariano_test(&e1, &e2, true, 1)
.expect("DM test should compute");
assert!(dm.is_finite());
assert!((0.0..=1.0).contains(&p));
}
#[test]
fn test_standard_normal_sf() {
let p = standard_normal_sf(0.0);
assert!((p - 0.5).abs() < 1e-6, "P(Z>0) = 0.5, got {p}");
let p2 = standard_normal_sf(1.96);
assert!((p2 - 0.025).abs() < 0.001, "P(Z>1.96) ≈ 0.025, got {p2}");
}
}