use crate::error::{InferustError, Result};
use crate::regression::Ols;
use nalgebra::{DMatrix, DVector};
use statrs::distribution::{ChiSquared, ContinuousCDF};
#[derive(Debug, Clone)]
pub struct AutoRegressive {
lags: usize,
}
#[derive(Debug, Clone)]
pub struct AutoRegressiveResult {
pub intercept: f64,
pub coefficients: Vec<f64>,
pub fitted_values: Vec<f64>,
pub residuals: Vec<f64>,
pub sigma2: f64,
pub n: usize,
}
impl AutoRegressive {
pub fn new(lags: usize) -> Self {
Self { lags }
}
pub fn fit(&self, y: &[f64]) -> Result<AutoRegressiveResult> {
if self.lags == 0 {
return Err(InferustError::InvalidInput(
"AR lags must be at least 1".into(),
));
}
if y.len() <= self.lags + 1 {
return Err(InferustError::InsufficientData {
needed: self.lags + 2,
got: y.len(),
});
}
let mut x = Vec::with_capacity(y.len() - self.lags);
let mut target = Vec::with_capacity(y.len() - self.lags);
for t in self.lags..y.len() {
let row: Vec<f64> = (1..=self.lags).map(|lag| y[t - lag]).collect();
x.push(row);
target.push(y[t]);
}
let ols = Ols::new().fit(&x, &target)?;
Ok(AutoRegressiveResult {
intercept: ols.coefficients[0],
coefficients: ols.coefficients[1..].to_vec(),
fitted_values: ols.fitted_values,
residuals: ols.residuals,
sigma2: ols.mse_resid,
n: target.len(),
})
}
}
impl AutoRegressiveResult {
pub fn forecast(&self, history: &[f64], steps: usize) -> Result<Vec<f64>> {
if history.len() < self.coefficients.len() {
return Err(InferustError::InsufficientData {
needed: self.coefficients.len(),
got: history.len(),
});
}
let mut buf = history.to_vec();
let mut out = Vec::with_capacity(steps);
for _ in 0..steps {
let t = buf.len();
let mut next = self.intercept;
for (i, &c) in self.coefficients.iter().enumerate() {
next += c * buf[t - 1 - i];
}
buf.push(next);
out.push(next);
}
Ok(out)
}
}
#[derive(Debug, Clone)]
pub struct Arima {
pub p: usize,
pub d: usize,
pub q: usize,
max_iter: usize,
tolerance: f64,
}
#[derive(Debug, Clone)]
pub struct ArimaResult {
pub intercept: f64,
pub ar_coefficients: Vec<f64>,
pub ma_coefficients: Vec<f64>,
pub fitted_values: Vec<f64>,
pub residuals: Vec<f64>,
pub sigma2: f64,
pub log_likelihood: f64,
pub aic: f64,
pub bic: f64,
pub n: usize,
pub p: usize,
pub d: usize,
pub q: usize,
original_tails: Vec<Vec<f64>>,
last_residuals: Vec<f64>,
}
impl Arima {
pub fn new(p: usize, d: usize, q: usize) -> Self {
Self {
p,
d,
q,
max_iter: 2000,
tolerance: 1e-7,
}
}
pub fn max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
pub fn fit(&self, y: &[f64]) -> Result<ArimaResult> {
let min_len = self.d + self.p.max(self.q) + 2;
if y.len() < min_len {
return Err(InferustError::InsufficientData {
needed: min_len,
got: y.len(),
});
}
let mut original_tails: Vec<Vec<f64>> = Vec::with_capacity(self.d);
let mut series = y.to_vec();
for _ in 0..self.d {
original_tails.push(series.clone());
series = series.windows(2).map(|w| w[1] - w[0]).collect();
}
let k = 1 + self.p + self.q;
if self.q == 0 {
if self.p == 0 {
let mu = series.iter().sum::<f64>() / series.len() as f64;
let resids: Vec<f64> = series.iter().map(|v| v - mu).collect();
let n = resids.len();
let sigma2 = resids.iter().map(|e| e * e).sum::<f64>() / n.max(1) as f64;
let ll = gaussian_log_likelihood(&resids, sigma2);
return Ok(ArimaResult {
intercept: mu,
ar_coefficients: vec![],
ma_coefficients: vec![],
fitted_values: vec![mu; n],
residuals: resids.clone(),
sigma2,
log_likelihood: ll,
aic: -2.0 * ll + 2.0 * k as f64,
bic: -2.0 * ll + k as f64 * (n as f64).ln(),
n,
p: 0,
d: self.d,
q: 0,
original_tails,
last_residuals: resids,
});
}
let ar = AutoRegressive::new(self.p).fit(&series)?;
let n = ar.n;
let sigma2 = ar.sigma2;
let ll = gaussian_log_likelihood(&ar.residuals, sigma2);
let last_residuals = ar.residuals.clone();
return Ok(ArimaResult {
intercept: ar.intercept,
ar_coefficients: ar.coefficients,
ma_coefficients: vec![],
fitted_values: ar.fitted_values,
residuals: ar.residuals,
sigma2,
log_likelihood: ll,
aic: -2.0 * ll + 2.0 * k as f64,
bic: -2.0 * ll + k as f64 * (n as f64).ln(),
n,
p: self.p,
d: self.d,
q: 0,
original_tails,
last_residuals,
});
}
let mut init = vec![0.0_f64; k];
init[0] = series.iter().sum::<f64>() / series.len() as f64;
if self.p > 0 {
if let Ok(ar) = AutoRegressive::new(self.p).fit(&series) {
init[0] = ar.intercept;
for (i, &c) in ar.coefficients.iter().enumerate() {
if 1 + i < k {
init[1 + i] = c;
}
}
}
}
for i in 0..self.q {
init[1 + self.p + i] = 0.01;
}
let params = css_optimize(
&series,
self.p,
self.q,
&init,
self.max_iter,
self.tolerance,
);
let (fitted, resids) = css_fitted_residuals(¶ms, &series, self.p, self.q);
let n = fitted.len();
let sigma2 = if n > 1 {
resids.iter().map(|e| e * e).sum::<f64>() / (n - 1) as f64
} else {
resids.iter().map(|e| e * e).sum::<f64>()
};
let ll = gaussian_log_likelihood(&resids, sigma2);
let last_residuals = resids.clone();
Ok(ArimaResult {
intercept: params[0],
ar_coefficients: params[1..=self.p].to_vec(),
ma_coefficients: params[self.p + 1..].to_vec(),
fitted_values: fitted,
residuals: resids,
sigma2,
log_likelihood: ll,
aic: -2.0 * ll + 2.0 * k as f64,
bic: -2.0 * ll + k as f64 * (n as f64).ln(),
n,
p: self.p,
d: self.d,
q: self.q,
original_tails,
last_residuals,
})
}
}
impl ArimaResult {
pub fn print_summary(&self) {
println!();
println!("═══════════════════════════════════════════════════");
println!(" ARIMA({}, {}, {}) Results", self.p, self.d, self.q);
println!("═══════════════════════════════════════════════════");
println!(" n : {} σ² : {:.6}", self.n, self.sigma2);
println!(" Log-lik : {:.4}", self.log_likelihood);
println!(" AIC : {:.4} BIC : {:.4}", self.aic, self.bic);
println!("───────────────────────────────────────────────────");
println!(" const = {:.6}", self.intercept);
for (i, &c) in self.ar_coefficients.iter().enumerate() {
println!(" ar.L{} = {:.6}", i + 1, c);
}
for (i, &c) in self.ma_coefficients.iter().enumerate() {
println!(" ma.L{} = {:.6}", i + 1, c);
}
println!("═══════════════════════════════════════════════════");
println!();
}
pub fn forecast(&self, history: &[f64], steps: usize) -> Result<Vec<f64>> {
if steps == 0 {
return Ok(vec![]);
}
let p = self.p;
let q = self.q;
let mut diff_hist = history.to_vec();
for _ in 0..self.d {
if diff_hist.len() < 2 {
return Err(InferustError::InsufficientData {
needed: 2,
got: diff_hist.len(),
});
}
diff_hist = diff_hist.windows(2).map(|w| w[1] - w[0]).collect();
}
let mut buf = diff_hist.clone();
let mut eps_buf = vec![0.0_f64; buf.len()];
let copy_from = buf.len().saturating_sub(self.last_residuals.len());
for (i, &e) in self.last_residuals.iter().enumerate() {
let idx = copy_from + i;
if idx < eps_buf.len() {
eps_buf[idx] = e;
}
}
let mut diff_fcast = Vec::with_capacity(steps);
for step in 0..steps {
let t = buf.len();
let mut pred = self.intercept;
for i in 0..p {
if t > i {
pred += self.ar_coefficients[i] * buf[t - 1 - i];
}
}
for j in 0..q {
if j + 1 > step {
let lookback = j - step; let lr_len = self.last_residuals.len();
if lookback < lr_len {
pred +=
self.ma_coefficients[j] * self.last_residuals[lr_len - 1 - lookback];
}
}
}
buf.push(pred);
eps_buf.push(0.0);
diff_fcast.push(pred);
}
let mut fcast = diff_fcast;
for level in (0..self.d).rev() {
let seed = self.original_tails[level].last().copied().unwrap_or(0.0);
let mut prev = seed;
for f in fcast.iter_mut() {
prev += *f;
*f = prev;
}
}
Ok(fcast)
}
}
fn css_residuals(params: &[f64], y: &[f64], p: usize, q: usize) -> Vec<f64> {
let n = y.len();
let mut eps = vec![0.0_f64; n]; let start = p;
for t in start..n {
let mut pred = params[0]; for i in 0..p {
pred += params[1 + i] * y[t - 1 - i];
}
for j in 0..q {
if t > j {
pred += params[1 + p + j] * eps[t - 1 - j];
}
}
eps[t] = y[t] - pred;
}
eps[start..].to_vec()
}
fn css_objective(params: &[f64], y: &[f64], p: usize, q: usize) -> f64 {
css_residuals(params, y, p, q).iter().map(|e| e * e).sum()
}
fn css_gradient(params: &[f64], y: &[f64], p: usize, q: usize) -> Vec<f64> {
let h = 1e-5;
let f0 = css_objective(params, y, p, q);
let mut grad = vec![0.0_f64; params.len()];
let mut ph = params.to_vec();
for i in 0..params.len() {
ph[i] += h;
grad[i] = (css_objective(&ph, y, p, q) - f0) / h;
ph[i] = params[i];
}
grad
}
fn css_optimize(
y: &[f64],
p: usize,
q: usize,
init: &[f64],
max_iter: usize,
tol: f64,
) -> Vec<f64> {
let mut params = init.to_vec();
let np = params.len();
let (alpha, beta1, beta2, eps) = (0.05_f64, 0.9_f64, 0.999_f64, 1e-8_f64);
let mut m = vec![0.0_f64; np];
let mut v = vec![0.0_f64; np];
for iter in 1..=max_iter {
let grad = css_gradient(¶ms, y, p, q);
let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
if gnorm < tol {
break;
}
let t = iter as f64;
for i in 0..np {
m[i] = beta1 * m[i] + (1.0 - beta1) * grad[i];
v[i] = beta2 * v[i] + (1.0 - beta2) * grad[i] * grad[i];
let m_hat = m[i] / (1.0 - beta1.powf(t));
let v_hat = v[i] / (1.0 - beta2.powf(t));
params[i] -= alpha * m_hat / (v_hat.sqrt() + eps);
}
}
params
}
fn css_fitted_residuals(params: &[f64], y: &[f64], p: usize, q: usize) -> (Vec<f64>, Vec<f64>) {
let n = y.len();
let mut eps = vec![0.0_f64; n];
let start = p;
let mut fitted = Vec::with_capacity(n - start);
for t in start..n {
let mut pred = params[0];
for i in 0..p {
pred += params[1 + i] * y[t - 1 - i];
}
for j in 0..q {
if t > j {
pred += params[1 + p + j] * eps[t - 1 - j];
}
}
eps[t] = y[t] - pred;
fitted.push(pred);
}
(fitted, eps[start..].to_vec())
}
fn gaussian_log_likelihood(residuals: &[f64], sigma2: f64) -> f64 {
let n = residuals.len() as f64;
let s2 = sigma2.max(f64::EPSILON);
-0.5 * n * (2.0 * std::f64::consts::PI * s2).ln()
- 0.5 * residuals.iter().map(|e| e * e).sum::<f64>() / s2
}
fn regularized_inverse(matrix: &DMatrix<f64>) -> Result<DMatrix<f64>> {
if let Some(inv) = matrix.clone().try_inverse() {
return Ok(inv);
}
let mut ridge = matrix.clone();
let scale = matrix.trace().abs().max(1.0) * 1e-10;
for i in 0..ridge.nrows().min(ridge.ncols()) {
ridge[(i, i)] += scale;
}
ridge.try_inverse().ok_or(InferustError::SingularMatrix)
}
fn row_dot_matrix(x: &DMatrix<f64>, row: usize, beta: &DVector<f64>) -> f64 {
(0..x.ncols()).map(|col| x[(row, col)] * beta[col]).sum()
}
#[derive(Debug, Clone)]
pub struct Var {
lags: usize,
}
#[derive(Debug, Clone)]
pub struct VarResult {
pub coefficients: Vec<Vec<f64>>,
pub residuals: Vec<Vec<f64>>,
pub n: usize,
pub k: usize,
pub lags: usize,
pub aic: f64,
pub bic: f64,
}
impl Var {
pub fn new(lags: usize) -> Self {
Self { lags }
}
pub fn fit(&self, series: &[Vec<f64>]) -> Result<VarResult> {
let k = series.len();
if k < 2 {
return Err(InferustError::InvalidInput(
"VAR requires at least 2 variables".into(),
));
}
if self.lags == 0 {
return Err(InferustError::InvalidInput(
"VAR lags must be at least 1".into(),
));
}
let t = series[0].len();
for (i, s) in series.iter().enumerate() {
if s.len() != t {
return Err(InferustError::DimensionMismatch {
x_rows: s.len(),
y_len: t,
});
}
let _ = i;
}
let n = t - self.lags;
if n < 2 {
return Err(InferustError::InsufficientData {
needed: self.lags + 2,
got: t,
});
}
let x_cols = k * self.lags + 1;
let x_mat = DMatrix::from_fn(n, x_cols, |row, col| {
if col == 0 {
1.0
} else {
let t_idx = row + self.lags;
let lag_col = col - 1;
let lag = lag_col / k + 1;
let var = lag_col % k;
series[var][t_idx - lag]
}
});
let xtx_inv = regularized_inverse(&(x_mat.transpose() * &x_mat))?;
let mut coefficients = Vec::with_capacity(k);
let mut residuals_all = Vec::with_capacity(k);
let mut total_ll = 0.0_f64;
let total_params = k * x_cols;
for var in series.iter() {
let y_eq = DVector::from_fn(n, |row, _| var[row + self.lags]);
let beta = &xtx_inv * (x_mat.transpose() * &y_eq);
let fitted = &x_mat * β
let resids: Vec<f64> = y_eq.iter().zip(fitted.iter()).map(|(a, b)| a - b).collect();
let sigma2 = resids.iter().map(|e| e * e).sum::<f64>() / n.max(1) as f64;
let ll = gaussian_log_likelihood(&resids, sigma2);
total_ll += ll;
coefficients.push(beta.iter().copied().collect());
residuals_all.push(resids);
}
let n_f = n as f64;
let p_f = total_params as f64;
let aic = -2.0 * total_ll + 2.0 * p_f;
let bic = -2.0 * total_ll + p_f * n_f.ln();
Ok(VarResult {
coefficients,
residuals: residuals_all,
n,
k,
lags: self.lags,
aic,
bic,
})
}
}
impl VarResult {
pub fn forecast(&self, history: &[Vec<f64>], steps: usize) -> Result<Vec<Vec<f64>>> {
if steps == 0 {
return Ok(vec![vec![]; self.k]);
}
if history.len() != self.k {
return Err(InferustError::DimensionMismatch {
x_rows: history.len(),
y_len: self.k,
});
}
let mut bufs: Vec<Vec<f64>> = history.to_vec();
let mut out: Vec<Vec<f64>> = vec![Vec::with_capacity(steps); self.k];
for _ in 0..steps {
let t = bufs[0].len();
let mut row = Vec::with_capacity(self.k * self.lags);
for lag in 1..=self.lags {
for buf in bufs.iter() {
if t >= lag {
row.push(buf[t - lag]);
} else {
row.push(0.0);
}
}
}
for (i, coefs) in self.coefficients.iter().enumerate() {
let mut pred = coefs[0]; for (j, &c) in coefs[1..].iter().enumerate() {
if j < row.len() {
pred += c * row[j];
}
}
bufs[i].push(pred);
out[i].push(pred);
}
}
Ok(out)
}
}
#[derive(Debug, Clone)]
pub struct LjungBoxResult {
pub lag: usize,
pub statistic: f64,
pub p_value: f64,
}
pub fn acf(series: &[f64], max_lag: usize) -> Result<Vec<f64>> {
if series.len() < 2 {
return Err(InferustError::InsufficientData {
needed: 2,
got: series.len(),
});
}
let mean = series.iter().sum::<f64>() / series.len() as f64;
let denom = series.iter().map(|v| (v - mean).powi(2)).sum::<f64>();
Ok((0..=max_lag)
.map(|lag| {
if lag == 0 {
return 1.0;
}
series
.iter()
.skip(lag)
.zip(series.iter())
.map(|(a, b)| (a - mean) * (b - mean))
.sum::<f64>()
/ denom.max(f64::EPSILON)
})
.collect())
}
pub fn pacf(series: &[f64], max_lag: usize) -> Result<Vec<f64>> {
let mut out = vec![1.0_f64];
for lag in 1..=max_lag {
let fit = AutoRegressive::new(lag).fit(series)?;
out.push(*fit.coefficients.last().unwrap_or(&0.0));
}
Ok(out)
}
pub fn ljung_box(series: &[f64], max_lag: usize) -> Result<Vec<LjungBoxResult>> {
let rhos = acf(series, max_lag)?;
let n = series.len() as f64;
let mut results = Vec::with_capacity(max_lag);
for lag in 1..=max_lag {
let stat = n
* (n + 2.0)
* rhos
.iter()
.enumerate()
.skip(1)
.take(lag)
.map(|(k, rho)| rho.powi(2) / (n - k as f64))
.sum::<f64>();
let chi = ChiSquared::new(lag as f64)
.map_err(|_| InferustError::InvalidInput("invalid χ² df".into()))?;
results.push(LjungBoxResult {
lag,
statistic: stat,
p_value: 1.0 - chi.cdf(stat),
});
}
Ok(results)
}
#[derive(Debug, Clone)]
pub struct AdfResult {
pub statistic: f64,
pub p_value: f64,
pub lags: usize,
pub n: usize,
pub critical_values: [f64; 3],
}
impl AdfResult {
pub fn print(&self) {
println!();
println!("── Augmented Dickey-Fuller Test ───────────────────────────");
println!(" H₀: unit root (reject when stat << critical value)");
println!(
" Lags: {} n: {} stat: {:.4} p ≈ {:.4}",
self.lags, self.n, self.statistic, self.p_value
);
let [cv1, cv5, cv10] = self.critical_values;
println!(" Critical values: 1% {cv1:.3} 5% {cv5:.3} 10% {cv10:.3}");
let sig = if self.statistic < cv1 {
"***"
} else if self.statistic < cv5 {
"**"
} else if self.statistic < cv10 {
"*"
} else {
"(not significant)"
};
println!(" Verdict: {sig}");
println!();
}
}
pub fn adf_test(y: &[f64], lags: usize) -> Result<AdfResult> {
let n = y.len();
let min_len = lags + 3;
if n < min_len {
return Err(InferustError::InsufficientData {
needed: min_len,
got: n,
});
}
let dy: Vec<f64> = y.windows(2).map(|w| w[1] - w[0]).collect();
let t_start = lags + 1;
let n_obs = n - 1 - t_start; if n_obs < 2 {
return Err(InferustError::InsufficientData {
needed: t_start + 3,
got: n,
});
}
let mut x: Vec<Vec<f64>> = Vec::with_capacity(n_obs);
let mut target: Vec<f64> = Vec::with_capacity(n_obs);
for t in t_start..=(n - 2) {
let mut row = vec![y[t]]; for i in 1..=lags {
row.push(dy[t - i]); }
x.push(row);
target.push(dy[t]); }
let feat: Vec<String> = std::iter::once("y_lag1".to_string())
.chain((1..=lags).map(|i| format!("dy_lag{i}")))
.collect();
let ols = Ols::new().with_feature_names(feat).fit(&x, &target)?;
let stat = ols.t_statistics[1];
let n_used = ols.n;
let cv = mackinnon_critical_values_constant(n_used);
let p = mackinnon_pvalue_constant(stat, n_used);
Ok(AdfResult {
statistic: stat,
p_value: p,
lags,
n: n_used,
critical_values: cv,
})
}
fn mackinnon_critical_values_constant(n: usize) -> [f64; 3] {
let n = n as f64;
let cv1 = -3.43035 - 6.5393 / n - 16.786 / (n * n);
let cv5 = -2.86154 - 2.8903 / n - 4.234 / (n * n);
let cv10 = -2.56677 - 1.5384 / n - 2.809 / (n * n);
[cv1, cv5, cv10]
}
fn mackinnon_pvalue_constant(stat: f64, n: usize) -> f64 {
let [cv1, cv5, cv10] = mackinnon_critical_values_constant(n);
if stat <= cv1 {
return 0.01;
}
if stat <= cv5 {
return 0.01 + 0.04 * (stat - cv1) / (cv5 - cv1);
}
if stat <= cv10 {
return 0.05 + 0.05 * (stat - cv5) / (cv10 - cv5);
}
let z = (stat - cv10) / (cv10.abs().max(0.1)); (0.10 + 0.89 / (1.0 + (-2.0 * z).exp())).min(0.999)
}
#[derive(Debug, Clone)]
pub struct KpssResult {
pub statistic: f64,
pub lags: usize,
pub n: usize,
pub critical_values: [f64; 3],
pub trend: bool,
}
impl KpssResult {
pub fn print(&self) {
let spec = if self.trend { "trend" } else { "constant" };
println!();
println!("── KPSS Stationarity Test ({spec}) ────────────────────────");
println!(" H₀: series is stationary (reject when stat > critical value)");
println!(
" Lags: {} n: {} stat: {:.4}",
self.lags, self.n, self.statistic
);
let [cv10, cv5, cv1] = self.critical_values;
println!(" Critical values: 10% {cv10:.3} 5% {cv5:.3} 1% {cv1:.3}");
let sig = if self.statistic > cv1 {
"reject H₀ at 1% ***"
} else if self.statistic > cv5 {
"reject H₀ at 5% **"
} else if self.statistic > cv10 {
"reject H₀ at 10% *"
} else {
"fail to reject H₀ (evidence of stationarity)"
};
println!(" Verdict: {sig}");
println!();
}
}
pub fn kpss_test(y: &[f64], lags: usize, trend: bool) -> Result<KpssResult> {
let n = y.len();
if n < 3 {
return Err(InferustError::InsufficientData { needed: 3, got: n });
}
let resids: Vec<f64> = if trend {
let x: Vec<Vec<f64>> = (0..n).map(|t| vec![t as f64]).collect();
Ols::new().fit(&x, y)?.residuals
} else {
let mu = y.iter().sum::<f64>() / n as f64;
y.iter().map(|v| v - mu).collect()
};
let mut s = vec![0.0_f64; n];
let mut cum = 0.0;
for (t, &e) in resids.iter().enumerate() {
cum += e;
s[t] = cum;
}
let gamma0: f64 = resids.iter().map(|e| e * e).sum::<f64>() / n as f64;
let mut lr_var = gamma0;
for l in 1..=lags {
let w = 1.0 - l as f64 / (lags + 1) as f64; let gamma_l: f64 = resids
.iter()
.skip(l)
.zip(resids.iter())
.map(|(a, b)| a * b)
.sum::<f64>()
/ n as f64;
lr_var += 2.0 * w * gamma_l;
}
let lr_var = lr_var.max(f64::EPSILON);
let stat = s.iter().map(|si| si * si).sum::<f64>() / (n as f64 * n as f64 * lr_var);
let cv = if trend {
[0.119, 0.146, 0.216] } else {
[0.347, 0.463, 0.739] };
Ok(KpssResult {
statistic: stat,
lags,
n,
critical_values: cv,
trend,
})
}
#[cfg(test)]
mod tests {
use super::{acf, adf_test, kpss_test, ljung_box, pacf, Arima, AutoRegressive, Var};
#[test]
fn ar_fits_and_forecasts() {
let y = vec![1.0, 1.8, 2.7, 3.5, 4.6, 5.4, 6.5, 7.4, 8.3];
let fit = AutoRegressive::new(1).fit(&y).unwrap();
assert_eq!(fit.coefficients.len(), 1);
assert_eq!(fit.forecast(&y, 2).unwrap().len(), 2);
}
#[test]
fn arima_p_d_0_matches_ar_ols() {
let y = vec![1.0, 2.0, 4.0, 7.0, 11.0, 16.0, 22.0];
let fit = Arima::new(1, 1, 0).fit(&y).unwrap();
assert_eq!(fit.ar_coefficients.len(), 1);
assert_eq!(fit.ma_coefficients.len(), 0);
}
#[test]
fn arima_1_0_1_produces_valid_ma_coef() {
let y = vec![
0.5, 1.2, 0.8, 1.5, 1.1, 0.9, 1.3, 0.7, 1.4, 1.0, 0.6, 1.6, 0.9, 1.2, 0.8, 1.1, 0.7,
1.3, 0.5, 1.4,
];
let fit = Arima::new(1, 0, 1).fit(&y).unwrap();
assert_eq!(fit.ar_coefficients.len(), 1);
assert_eq!(fit.ma_coefficients.len(), 1);
assert!(fit.ma_coefficients[0].is_finite());
}
#[test]
fn arima_forecast_returns_correct_length() {
let y: Vec<f64> = (0..30).map(|i| i as f64).collect();
let fit = Arima::new(1, 1, 1).fit(&y).unwrap();
let fcast = fit.forecast(&y, 5).unwrap();
assert_eq!(fcast.len(), 5);
}
#[test]
fn acf_pacf_ljung_box_lengths() {
let y = vec![1.0, 1.8, 2.7, 3.5, 4.6, 5.4, 6.5, 7.4, 8.3];
assert_eq!(acf(&y, 3).unwrap().len(), 4);
assert_eq!(pacf(&y, 2).unwrap().len(), 3);
assert_eq!(ljung_box(&y, 2).unwrap().len(), 2);
}
#[test]
fn var_fits_bivariate_series() {
let y1: Vec<f64> = (0..20).map(|i| i as f64).collect();
let y2: Vec<f64> = (0..20).map(|i| (i as f64) * 0.5 + 1.0).collect();
let result = Var::new(1).fit(&[y1.clone(), y2.clone()]).unwrap();
assert_eq!(result.k, 2);
assert_eq!(result.lags, 1);
let fcast = result.forecast(&[y1, y2], 3).unwrap();
assert_eq!(fcast.len(), 2);
assert_eq!(fcast[0].len(), 3);
}
#[test]
fn adf_rejects_stationary_series() {
let y: Vec<f64> = (0..50).map(|i| (i as f64 * 0.3).sin()).collect();
let res = adf_test(&y, 1).unwrap();
assert!(
res.statistic < res.critical_values[1], "ADF stat {:.3} should be below 5% cv {:.3}",
res.statistic,
res.critical_values[1]
);
}
#[test]
fn kpss_fails_to_reject_stationary_series() {
let y: Vec<f64> = (0..40).map(|i| (i as f64 * 0.3).sin()).collect();
let res = kpss_test(&y, 3, false).unwrap();
assert!(
res.statistic < res.critical_values[1], "KPSS stat {:.4} should be below 5% cv {:.3}",
res.statistic,
res.critical_values[1]
);
}
}
#[derive(Debug, Clone)]
pub struct Sarima {
pub p: usize,
pub d: usize,
pub q: usize,
pub ps: usize,
pub ds: usize,
pub qs: usize,
pub s: usize,
max_iter: usize,
tolerance: f64,
}
#[derive(Debug, Clone)]
pub struct SarimaResult {
pub ar_coefficients: Vec<f64>,
pub ma_coefficients: Vec<f64>,
pub seasonal_ar: Vec<f64>,
pub seasonal_ma: Vec<f64>,
pub intercept: f64,
pub residuals: Vec<f64>,
pub fitted_values: Vec<f64>,
pub sigma2: f64,
pub log_likelihood: f64,
pub aic: f64,
pub bic: f64,
pub n: usize,
original_tails: Vec<Vec<f64>>, seasonal_tails: Vec<Vec<f64>>, last_residuals: Vec<f64>,
s: usize,
d: usize,
ds: usize,
}
impl Sarima {
pub fn new(p: usize, d: usize, q: usize, ps: usize, ds: usize, qs: usize, s: usize) -> Self {
Self {
p,
d,
q,
ps,
ds,
qs,
s,
max_iter: 3000,
tolerance: 1e-7,
}
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn fit(&self, y: &[f64]) -> Result<SarimaResult> {
if self.s < 2 {
return Err(InferustError::InvalidInput(
"seasonal period s must be ≥ 2".into(),
));
}
let min_len = self.ds * self.s
+ self.d
+ self
.p
.max(self.q)
.max(self.ps * self.s)
.max(self.qs * self.s)
+ 2;
if y.len() < min_len {
return Err(InferustError::InsufficientData {
needed: min_len,
got: y.len(),
});
}
let mut seasonal_tails: Vec<Vec<f64>> = Vec::new();
let mut series = y.to_vec();
for _ in 0..self.ds {
seasonal_tails.push(series.clone());
let new: Vec<f64> = series[self.s..]
.iter()
.zip(series.iter())
.map(|(a, b)| a - b)
.collect();
series = new;
}
let mut original_tails: Vec<Vec<f64>> = Vec::new();
for _ in 0..self.d {
original_tails.push(series.clone());
series = series.windows(2).map(|w| w[1] - w[0]).collect();
}
let k = 1 + self.p + self.q + self.ps + self.qs;
let mut init = vec![0.0_f64; k];
init[0] = series.iter().sum::<f64>() / series.len().max(1) as f64;
if self.p > 0 {
if let Ok(ar) = AutoRegressive::new(self.p).fit(&series) {
init[0] = ar.intercept;
for (i, &c) in ar.coefficients.iter().enumerate() {
if 1 + i < k {
init[1 + i] = c;
}
}
}
}
for i in 0..self.qs + self.q {
let idx = 1 + self.p + i;
if idx < k {
init[idx] = 0.01;
}
}
let p = self.p;
let q = self.q;
let ps = self.ps;
let qs = self.qs;
let s = self.s;
let params = sarima_css_optimize(
&series,
p,
q,
ps,
qs,
s,
&init,
self.max_iter,
self.tolerance,
);
let (fitted, resids) = sarima_css_fitted(¶ms, &series, p, q, ps, qs, s);
let n = fitted.len();
let sigma2 = if n > 1 {
resids.iter().map(|e| e * e).sum::<f64>() / (n - 1) as f64
} else {
resids.iter().map(|e| e * e).sum::<f64>()
};
let ll = gaussian_log_likelihood(&resids, sigma2);
let last_residuals = resids.clone();
Ok(SarimaResult {
intercept: params[0],
ar_coefficients: params[1..=p].to_vec(),
ma_coefficients: params[1 + p..1 + p + q].to_vec(),
seasonal_ar: params[1 + p + q..1 + p + q + ps].to_vec(),
seasonal_ma: params[1 + p + q + ps..].to_vec(),
residuals: resids,
fitted_values: fitted,
sigma2,
log_likelihood: ll,
aic: -2.0 * ll + 2.0 * k as f64,
bic: -2.0 * ll + k as f64 * (n as f64).ln(),
n,
original_tails,
seasonal_tails,
last_residuals,
s,
d: self.d,
ds: self.ds,
})
}
}
impl SarimaResult {
pub fn print_summary(&self) {
println!();
println!("═══════════════════════════════════════════════════════");
println!(" SARIMA Results");
println!("═══════════════════════════════════════════════════════");
println!(" n = {} σ² = {:.6}", self.n, self.sigma2);
println!(
" Log-lik = {:.4} AIC = {:.4} BIC = {:.4}",
self.log_likelihood, self.aic, self.bic
);
println!("─────────────────────────────────────────────────────");
println!(" const = {:.6}", self.intercept);
for (i, &c) in self.ar_coefficients.iter().enumerate() {
println!(" ar.L{} = {:.6}", i + 1, c);
}
for (i, &c) in self.ma_coefficients.iter().enumerate() {
println!(" ma.L{} = {:.6}", i + 1, c);
}
for (i, &c) in self.seasonal_ar.iter().enumerate() {
println!(" ar.S{} = {:.6}", i + 1, c);
}
for (i, &c) in self.seasonal_ma.iter().enumerate() {
println!(" ma.S{} = {:.6}", i + 1, c);
}
println!("═══════════════════════════════════════════════════════");
println!();
}
pub fn forecast(&self, history: &[f64], steps: usize) -> Result<Vec<f64>> {
if steps == 0 {
return Ok(vec![]);
}
let s = self.s;
let mut series = history.to_vec();
for _ in 0..self.ds {
if series.len() <= s {
return Err(InferustError::InsufficientData {
needed: s + 1,
got: series.len(),
});
}
series = series[s..]
.iter()
.zip(series.iter())
.map(|(a, b)| a - b)
.collect();
}
for _ in 0..self.d {
if series.len() < 2 {
return Err(InferustError::InsufficientData {
needed: 2,
got: series.len(),
});
}
series = series.windows(2).map(|w| w[1] - w[0]).collect();
}
let p = self.ar_coefficients.len();
let q = self.ma_coefficients.len();
let ps = self.seasonal_ar.len();
let qs = self.seasonal_ma.len();
let mut buf = series;
let mut eps_buf = vec![0.0_f64; buf.len()];
let copy_from = buf.len().saturating_sub(self.last_residuals.len());
for (i, &e) in self.last_residuals.iter().enumerate() {
let idx = copy_from + i;
if idx < eps_buf.len() {
eps_buf[idx] = e;
}
}
let mut diff_fcast = Vec::with_capacity(steps);
for step in 0..steps {
let t = buf.len();
let mut pred = self.intercept;
for i in 0..p {
if t > i {
pred += self.ar_coefficients[i] * buf[t - 1 - i];
}
}
for j in 0..q {
if j + 1 > step {
let lb = j - step;
let lr = self.last_residuals.len();
if lb < lr {
pred += self.ma_coefficients[j] * self.last_residuals[lr - 1 - lb];
}
}
}
for i in 0..ps {
let lag = (i + 1) * s;
if t >= lag {
pred += self.seasonal_ar[i] * buf[t - lag];
}
}
for j in 0..qs {
let lag = (j + 1) * s;
if lag > step {
let lb = lag - step - 1;
let lr = self.last_residuals.len();
if lb < lr {
pred += self.seasonal_ma[j] * self.last_residuals[lr - 1 - lb];
}
}
}
buf.push(pred);
eps_buf.push(0.0);
diff_fcast.push(pred);
}
let mut fcast = diff_fcast;
for level in (0..self.d).rev() {
let seed = self.original_tails[level].last().copied().unwrap_or(0.0);
let mut prev = seed;
for f in fcast.iter_mut() {
prev += *f;
*f = prev;
}
}
for level in (0..self.ds).rev() {
let tail = &self.seasonal_tails[level];
let tail_len = tail.len();
let mut extended = tail.clone();
extended.extend_from_slice(&fcast);
for i in 0..fcast.len() {
fcast[i] = extended[tail_len + i] + extended[tail_len + i - s];
}
}
Ok(fcast)
}
}
fn sarima_css_residuals(
params: &[f64],
y: &[f64],
p: usize,
q: usize,
ps: usize,
qs: usize,
s: usize,
) -> Vec<f64> {
let n = y.len();
let mut eps = vec![0.0_f64; n];
let start = p.max(ps * s);
for t in start..n {
let mut pred = params[0]; for i in 0..p {
if t > i {
pred += params[1 + i] * y[t - 1 - i];
}
}
for j in 0..q {
if t > j {
pred += params[1 + p + j] * eps[t - 1 - j];
}
}
for i in 0..ps {
let lag = (i + 1) * s;
if t >= lag {
pred += params[1 + p + q + i] * y[t - lag];
}
}
for j in 0..qs {
let lag = (j + 1) * s;
if t >= lag {
pred += params[1 + p + q + ps + j] * eps[t - lag];
}
}
for i in 0..p {
for ii in 0..ps {
let lag = (i + 1) + (ii + 1) * s;
if t >= lag {
pred -= params[1 + i] * params[1 + p + q + ii] * y[t - lag];
}
}
}
for j in 0..q {
for jj in 0..qs {
let lag = (j + 1) + (jj + 1) * s;
if t >= lag {
pred -= params[1 + p + j] * params[1 + p + q + ps + jj] * eps[t - lag];
}
}
}
eps[t] = y[t] - pred;
}
eps[start..].to_vec()
}
fn sarima_css_objective(
params: &[f64],
y: &[f64],
p: usize,
q: usize,
ps: usize,
qs: usize,
s: usize,
) -> f64 {
sarima_css_residuals(params, y, p, q, ps, qs, s)
.iter()
.map(|e| e * e)
.sum()
}
fn sarima_css_gradient(
params: &[f64],
y: &[f64],
p: usize,
q: usize,
ps: usize,
qs: usize,
s: usize,
) -> Vec<f64> {
let h = 1e-5;
let f0 = sarima_css_objective(params, y, p, q, ps, qs, s);
let mut grad = vec![0.0_f64; params.len()];
let mut ph = params.to_vec();
for i in 0..params.len() {
ph[i] += h;
grad[i] = (sarima_css_objective(&ph, y, p, q, ps, qs, s) - f0) / h;
ph[i] = params[i];
}
grad
}
#[allow(clippy::too_many_arguments)]
fn sarima_css_optimize(
y: &[f64],
p: usize,
q: usize,
ps: usize,
qs: usize,
s: usize,
init: &[f64],
max_iter: usize,
tol: f64,
) -> Vec<f64> {
let mut params = init.to_vec();
let np = params.len();
let (alpha, beta1, beta2, eps) = (0.05_f64, 0.9_f64, 0.999_f64, 1e-8_f64);
let mut m = vec![0.0_f64; np];
let mut v = vec![0.0_f64; np];
for iter in 1..=max_iter {
let grad = sarima_css_gradient(¶ms, y, p, q, ps, qs, s);
let gnorm: f64 = grad.iter().map(|g| g * g).sum::<f64>().sqrt();
if gnorm < tol {
break;
}
let t = iter as f64;
for i in 0..np {
m[i] = beta1 * m[i] + (1.0 - beta1) * grad[i];
v[i] = beta2 * v[i] + (1.0 - beta2) * grad[i] * grad[i];
let m_hat = m[i] / (1.0 - beta1.powf(t));
let v_hat = v[i] / (1.0 - beta2.powf(t));
params[i] -= alpha * m_hat / (v_hat.sqrt() + eps);
}
}
params
}
fn sarima_css_fitted(
params: &[f64],
y: &[f64],
p: usize,
q: usize,
ps: usize,
qs: usize,
s: usize,
) -> (Vec<f64>, Vec<f64>) {
let n = y.len();
let start = p.max(ps * s);
let mut eps = vec![0.0_f64; n];
let mut fitted = Vec::with_capacity(n - start);
for t in start..n {
let mut pred = params[0];
for i in 0..p {
if t > i {
pred += params[1 + i] * y[t - 1 - i];
}
}
for j in 0..q {
if t > j {
pred += params[1 + p + j] * eps[t - 1 - j];
}
}
for i in 0..ps {
let lag = (i + 1) * s;
if t >= lag {
pred += params[1 + p + q + i] * y[t - lag];
}
}
for j in 0..qs {
let lag = (j + 1) * s;
if t >= lag {
pred += params[1 + p + q + ps + j] * eps[t - lag];
}
}
for i in 0..p {
for ii in 0..ps {
let lag = (i + 1) + (ii + 1) * s;
if t >= lag {
pred -= params[1 + i] * params[1 + p + q + ii] * y[t - lag];
}
}
}
for j in 0..q {
for jj in 0..qs {
let lag = (j + 1) + (jj + 1) * s;
if t >= lag {
pred -= params[1 + p + j] * params[1 + p + q + ps + jj] * eps[t - lag];
}
}
}
eps[t] = y[t] - pred;
fitted.push(pred);
}
(fitted, eps[start..].to_vec())
}
#[derive(Debug, Clone)]
pub struct Sarimax {
inner: Sarima,
}
#[derive(Debug, Clone)]
pub struct SarimaxResult {
pub exog_coefficients: Vec<f64>,
pub exog_names: Vec<String>,
pub sarima: SarimaResult,
}
impl Sarimax {
pub fn new(p: usize, d: usize, q: usize, ps: usize, ds: usize, qs: usize, s: usize) -> Self {
Self {
inner: Sarima::new(p, d, q, ps, ds, qs, s),
}
}
pub fn max_iter(mut self, n: usize) -> Self {
self.inner = self.inner.max_iter(n);
self
}
pub fn fit(&self, y: &[f64], x: &[Vec<f64>]) -> Result<SarimaxResult> {
let n = y.len();
if x.len() != n {
return Err(InferustError::DimensionMismatch {
x_rows: x.len(),
y_len: n,
});
}
let k = x[0].len();
let x_mat = DMatrix::from_fn(
n,
k + 1,
|row, col| {
if col == 0 {
1.0
} else {
x[row][col - 1]
}
},
);
let y_vec = DVector::from_column_slice(y);
let xtx = x_mat.transpose() * &x_mat;
let xty = x_mat.transpose() * &y_vec;
let xtx_inv = regularized_inverse(&xtx)?;
let beta_exog = &xtx_inv * &xty;
let exog_coefficients: Vec<f64> = beta_exog.iter().copied().collect();
let fitted_exog: Vec<f64> = (0..n)
.map(|i| row_dot_matrix(&x_mat, i, &beta_exog))
.collect();
let y_adj: Vec<f64> = y
.iter()
.zip(fitted_exog.iter())
.map(|(yi, fi)| yi - fi)
.collect();
let sarima_result = self.inner.fit(&y_adj)?;
let exog_names: Vec<String> = std::iter::once("const".to_string())
.chain((1..=k).map(|i| format!("exog{i}")))
.collect();
Ok(SarimaxResult {
exog_coefficients,
exog_names,
sarima: sarima_result,
})
}
}
impl SarimaxResult {
pub fn print_summary(&self) {
println!("── SARIMAX exogenous coefficients ──────────────────────");
for (name, coef) in self.exog_names.iter().zip(self.exog_coefficients.iter()) {
println!(" {:<18} = {:.6}", name, coef);
}
self.sarima.print_summary();
}
}
#[cfg(test)]
mod sarima_tests {
use super::{Sarima, Sarimax};
#[test]
fn sarima_1_0_1_1_0_0_12_fits() {
let y: Vec<f64> = (0..60)
.map(|i| {
(i as f64) * 0.3 + 2.0 * ((i as f64 * std::f64::consts::PI * 2.0) / 12.0).sin()
})
.collect();
let res = Sarima::new(1, 0, 1, 1, 0, 0, 12).fit(&y).unwrap();
assert_eq!(res.ar_coefficients.len(), 1);
assert_eq!(res.seasonal_ar.len(), 1);
assert!(res.sigma2 > 0.0);
}
#[test]
fn sarima_forecast_length() {
let y: Vec<f64> = (0..48)
.map(|i| i as f64 + ((i as f64 / 12.0) * std::f64::consts::TAU).sin())
.collect();
let res = Sarima::new(1, 1, 0, 0, 1, 0, 12).fit(&y).unwrap();
let fcast = res.forecast(&y, 6).unwrap();
assert_eq!(fcast.len(), 6);
}
#[test]
fn sarimax_fits_with_exog() {
let y: Vec<f64> = (0..48)
.map(|i| i as f64 + ((i as f64 / 12.0) * std::f64::consts::TAU).sin())
.collect();
let x: Vec<Vec<f64>> = (0..48).map(|i| vec![(i % 2) as f64]).collect();
let res = Sarimax::new(1, 0, 0, 0, 1, 0, 12).fit(&y, &x).unwrap();
assert_eq!(res.exog_coefficients.len(), 2); }
}
#[derive(Debug, Clone)]
pub struct Vecm {
pub lags: usize,
pub rank: usize,
}
#[derive(Debug, Clone)]
pub struct VecmResult {
pub eigenvalues: Vec<f64>,
pub beta: Vec<Vec<f64>>,
pub alpha: Vec<Vec<f64>>,
pub gamma: Vec<Vec<Vec<f64>>>,
pub trace_statistics: Vec<f64>,
pub k: usize,
pub rank: usize,
pub n: usize,
}
impl Vecm {
pub fn new(lags: usize, rank: usize) -> Self {
Self { lags, rank }
}
pub fn fit(&self, series: &[Vec<f64>]) -> Result<VecmResult> {
let k = series.len();
if k < 2 {
return Err(InferustError::InvalidInput(
"VECM requires at least 2 series".into(),
));
}
if self.rank == 0 || self.rank >= k {
return Err(InferustError::InvalidInput(format!(
"rank must satisfy 0 < rank < k (k = {k})"
)));
}
let t = series[0].len();
for s in series.iter() {
if s.len() != t {
return Err(InferustError::DimensionMismatch {
x_rows: s.len(),
y_len: t,
});
}
}
let p = self.lags;
let n = t - p - 1; if n < k + 1 {
return Err(InferustError::InsufficientData {
needed: k + p + 2,
got: t,
});
}
let dy: Vec<Vec<f64>> = (0..t - 1)
.map(|i| (0..k).map(|v| series[v][i + 1] - series[v][i]).collect())
.collect();
let (r0, r1) = johansen_residuals(&dy, series, p, n, k, t);
let s00 = moment_matrix(&r0, n, k);
let s11 = moment_matrix(&r1, n, k);
let s01 = cross_moment(&r0, &r1, n, k);
let s10 = cross_moment(&r1, &r0, n, k);
let s11_mat = DMatrix::from_fn(k, k, |r, c| s11[r][c]);
let s00_mat = DMatrix::from_fn(k, k, |r, c| s00[r][c]);
let s01_mat = DMatrix::from_fn(k, k, |r, c| s01[r][c]);
let s10_mat = DMatrix::from_fn(k, k, |r, c| s10[r][c]);
let s11_inv = regularized_inverse(&s11_mat).map_err(|_| {
InferustError::InvalidInput(
"S11 is singular - possible perfect multicollinearity".into(),
)
})?;
let s00_inv = regularized_inverse(&s00_mat)
.map_err(|_| InferustError::InvalidInput("S00 is singular".into()))?;
let m = &s11_inv * &s10_mat * &s00_inv * &s01_mat;
let m_sym = (&m + m.transpose()) * 0.5;
let eig = nalgebra::SymmetricEigen::new(m_sym);
let mut eig_pairs: Vec<(f64, Vec<f64>)> = eig
.eigenvalues
.iter()
.copied()
.zip(
eig.eigenvectors
.column_iter()
.map(|c| c.iter().copied().collect::<Vec<_>>()),
)
.collect();
eig_pairs.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
let eigenvalues: Vec<f64> = eig_pairs.iter().map(|(e, _)| e.abs()).collect();
let r = self.rank;
let beta: Vec<Vec<f64>> = (0..r).map(|i| eig_pairs[i].1.clone()).collect();
let beta_mat = DMatrix::from_fn(k, r, |row, col| beta[col][row]);
let alpha_mat = &s01_mat * &beta_mat;
let alpha: Vec<Vec<f64>> = (0..k)
.map(|i| (0..r).map(|j| alpha_mat[(i, j)]).collect())
.collect();
let gamma = estimate_gamma(&dy, series, p, n, k, t);
let trace_statistics: Vec<f64> = (0..k)
.map(|r0| {
-(n as f64)
* eigenvalues[r0..]
.iter()
.map(|&lam| (1.0_f64 - lam.clamp(0.0, 0.9999)).ln())
.sum::<f64>()
})
.collect();
Ok(VecmResult {
eigenvalues,
beta,
alpha,
gamma,
trace_statistics,
k,
rank: r,
n,
})
}
}
impl VecmResult {
pub fn print_summary(&self) {
println!();
println!("══════════════════════════════════════════════════════════");
println!(
" VECM (Johansen) k = {} rank = {} n = {}",
self.k, self.rank, self.n
);
println!("══════════════════════════════════════════════════════════");
println!(" Eigenvalues:");
for (i, &lam) in self.eigenvalues.iter().enumerate() {
println!(
" λ_{} = {:.6} trace stat = {:.4}",
i + 1,
lam,
self.trace_statistics[i]
);
}
println!("──────────────────────────────────────────────────────────");
println!(" Cointegrating vectors β (columns):");
for i in 0..self.k {
let row: Vec<String> = (0..self.rank)
.map(|j| format!("{:>10.4}", self.beta[j][i]))
.collect();
println!(" y{} {}", i + 1, row.join(" "));
}
println!("══════════════════════════════════════════════════════════");
println!();
}
}
fn johansen_residuals(
dy: &[Vec<f64>],
series: &[Vec<f64>],
p: usize,
n: usize,
k: usize,
t: usize,
) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
let lag_cols = k * p;
if lag_cols == 0 {
let r0: Vec<Vec<f64>> = (p + 1..t).map(|t_idx| dy[t_idx - 1].clone()).collect();
let r1: Vec<Vec<f64>> = (p + 1..t)
.map(|t_idx| (0..k).map(|v| series[v][t_idx - 1]).collect())
.collect();
return (r0, r1);
}
let z_mat = DMatrix::from_fn(n, lag_cols, |row, col| {
let t_idx = row + p + 1; let lag = col / k + 1;
let var = col % k;
dy[t_idx - 1 - lag][var]
});
let ztzt = &z_mat.transpose() * &z_mat;
let ztzt_inv = match regularized_inverse(&ztzt) {
Ok(inv) => inv,
Err(_) => {
let r0: Vec<Vec<f64>> = (p + 1..t).map(|t_idx| dy[t_idx - 1].clone()).collect();
let r1: Vec<Vec<f64>> = (p + 1..t)
.map(|t_idx| (0..k).map(|v| series[v][t_idx - 1]).collect())
.collect();
return (r0, r1);
}
};
let dy0_mat = DMatrix::from_fn(n, k, |row, col| dy[row + p][col]);
let coef0 = &ztzt_inv * z_mat.transpose() * &dy0_mat;
let r0_mat = &dy0_mat - &z_mat * &coef0;
let y1_mat = DMatrix::from_fn(n, k, |row, col| series[col][row + p]);
let coef1 = &ztzt_inv * z_mat.transpose() * &y1_mat;
let r1_mat = &y1_mat - &z_mat * &coef1;
let r0: Vec<Vec<f64>> = (0..n)
.map(|i| (0..k).map(|j| r0_mat[(i, j)]).collect())
.collect();
let r1: Vec<Vec<f64>> = (0..n)
.map(|i| (0..k).map(|j| r1_mat[(i, j)]).collect())
.collect();
(r0, r1)
}
fn moment_matrix(r: &[Vec<f64>], n: usize, k: usize) -> Vec<Vec<f64>> {
let mut s = vec![vec![0.0f64; k]; k];
for row in r.iter() {
for i in 0..k {
for j in 0..k {
s[i][j] += row[i] * row[j];
}
}
}
for row in s.iter_mut().take(k) {
for value in row.iter_mut().take(k) {
*value /= n as f64;
}
}
s
}
fn cross_moment(r0: &[Vec<f64>], r1: &[Vec<f64>], n: usize, k: usize) -> Vec<Vec<f64>> {
let mut s = vec![vec![0.0f64; k]; k];
for (a, b) in r0.iter().zip(r1.iter()) {
for i in 0..k {
for j in 0..k {
s[i][j] += a[i] * b[j];
}
}
}
for row in s.iter_mut().take(k) {
for value in row.iter_mut().take(k) {
*value /= n as f64;
}
}
s
}
fn estimate_gamma(
dy: &[Vec<f64>],
_series: &[Vec<f64>],
p: usize,
n: usize,
k: usize,
_t: usize,
) -> Vec<Vec<Vec<f64>>> {
if p == 0 {
return Vec::new();
}
let x_cols = k * p;
let x_mat = DMatrix::from_fn(n, x_cols, |row, col| {
let t_idx = row + p + 1;
let lag = col / k + 1;
let var = col % k;
dy[t_idx - 1 - lag][var]
});
let xtx = x_mat.transpose() * &x_mat;
let xtx_inv = match regularized_inverse(&xtx) {
Ok(inv) => inv,
Err(_) => return vec![vec![vec![0.0; k]; k]; p],
};
let mut gammas = vec![vec![vec![0.0f64; k]; k]; p];
for eq in 0..k {
let y_eq = DVector::from_fn(n, |row, _| dy[row + p][eq]);
let coef = &xtx_inv * (x_mat.transpose() * &y_eq);
for lag in 0..p {
for var in 0..k {
gammas[lag][eq][var] = coef[lag * k + var];
}
}
}
gammas
}
#[derive(Debug, Clone)]
pub struct Varmax {
lags: usize,
}
#[derive(Debug, Clone)]
pub struct VarmaxResult {
pub coefficients: Vec<Vec<f64>>,
pub residuals: Vec<Vec<f64>>,
pub k: usize,
pub k_x: usize,
pub lags: usize,
pub n: usize,
pub aic: f64,
pub bic: f64,
}
impl Varmax {
pub fn new(lags: usize) -> Self {
Self { lags }
}
pub fn fit(&self, series: &[Vec<f64>], exog: &[Vec<f64>]) -> Result<VarmaxResult> {
let k = series.len();
if k < 1 {
return Err(InferustError::InvalidInput(
"VARMAX requires at least 1 endogenous variable".into(),
));
}
let t = series[0].len();
for s in series.iter() {
if s.len() != t {
return Err(InferustError::DimensionMismatch {
x_rows: s.len(),
y_len: t,
});
}
}
if exog.len() != t {
return Err(InferustError::DimensionMismatch {
x_rows: exog.len(),
y_len: t,
});
}
let k_x = exog[0].len();
let n = t - self.lags;
if n < 2 {
return Err(InferustError::InsufficientData {
needed: self.lags + 2,
got: t,
});
}
let reg_cols = k * self.lags + k_x;
let x_cols = reg_cols + 1;
let x_mat = DMatrix::from_fn(n, x_cols, |row, col| {
let t_idx = row + self.lags;
if col == 0 {
1.0
} else if col - 1 < k * self.lags {
let lag = (col - 1) / k + 1;
let var = (col - 1) % k;
series[var][t_idx - lag]
} else {
let exog_col = col - 1 - k * self.lags;
exog[t_idx][exog_col]
}
});
let xtx = x_mat.transpose() * &x_mat;
let xtx_inv = regularized_inverse(&xtx)?;
let mut coefficients = Vec::with_capacity(k);
let mut residuals_all = Vec::with_capacity(k);
let mut total_ll = 0.0_f64;
let total_params = k * x_cols;
for var in series.iter() {
let y_eq = DVector::from_fn(n, |row, _| var[row + self.lags]);
let beta = &xtx_inv * (x_mat.transpose() * &y_eq);
let fitted: DVector<f64> = &x_mat * β
let resids: Vec<f64> = y_eq.iter().zip(fitted.iter()).map(|(a, b)| a - b).collect();
let sigma2 = resids.iter().map(|e| e * e).sum::<f64>() / n.max(1) as f64;
total_ll += gaussian_log_likelihood(&resids, sigma2);
coefficients.push(beta.iter().copied().collect());
residuals_all.push(resids);
}
let aic = -2.0 * total_ll + 2.0 * total_params as f64;
let bic = -2.0 * total_ll + total_params as f64 * (n as f64).ln();
Ok(VarmaxResult {
coefficients,
residuals: residuals_all,
k,
k_x,
lags: self.lags,
n,
aic,
bic,
})
}
}
impl VarmaxResult {
pub fn forecast(
&self,
history: &[Vec<f64>],
exog_future: &[Vec<f64>],
) -> Result<Vec<Vec<f64>>> {
let steps = exog_future.len();
if steps == 0 {
return Ok(vec![vec![]; self.k]);
}
if history.len() != self.k {
return Err(InferustError::DimensionMismatch {
x_rows: history.len(),
y_len: self.k,
});
}
let mut bufs: Vec<Vec<f64>> = history.to_vec();
let mut out: Vec<Vec<f64>> = vec![Vec::with_capacity(steps); self.k];
for exog_row in exog_future.iter().take(steps) {
let t = bufs[0].len();
let mut row = Vec::with_capacity(self.k * self.lags + self.k_x + 1);
row.push(1.0);
for lag in 1..=self.lags {
for buf in bufs.iter() {
row.push(if t >= lag { buf[t - lag] } else { 0.0 });
}
}
for &xval in exog_row.iter() {
row.push(xval);
}
for (i, coefs) in self.coefficients.iter().enumerate() {
let mut pred = 0.0;
for (j, &c) in coefs.iter().enumerate() {
if j < row.len() {
pred += c * row[j];
}
}
bufs[i].push(pred);
out[i].push(pred);
}
}
Ok(out)
}
}
#[cfg(test)]
mod vecm_varmax_tests {
use super::{Varmax, Vecm};
#[test]
fn vecm_fits_cointegrated_series() {
let y1: Vec<f64> = (0..50).map(|i| i as f64 + (i as f64 * 0.1).sin()).collect();
let y2: Vec<f64> = (0..50)
.map(|i| i as f64 * 1.5 + 1.0 + (i as f64 * 0.1).cos())
.collect();
let res = Vecm::new(1, 1).fit(&[y1, y2]).unwrap();
assert_eq!(res.k, 2);
assert_eq!(res.rank, 1);
assert_eq!(res.beta.len(), 1); assert_eq!(res.eigenvalues.len(), 2);
}
#[test]
fn vecm_trace_statistics_non_negative() {
let y1: Vec<f64> = (0..40).map(|i| i as f64).collect();
let y2: Vec<f64> = (0..40).map(|i| 2.0 * i as f64 + 1.0).collect();
let res = Vecm::new(0, 1).fit(&[y1, y2]).unwrap();
for &ts in &res.trace_statistics {
assert!(ts >= 0.0, "trace stat = {ts:.4}");
}
}
#[test]
fn varmax_fits_with_exog() {
let y1: Vec<f64> = (0..25).map(|i| i as f64).collect();
let y2: Vec<f64> = (0..25).map(|i| i as f64 * 0.5 + 1.0).collect();
let x: Vec<Vec<f64>> = (0..25).map(|i| vec![(i % 3) as f64]).collect();
let res = Varmax::new(1).fit(&[y1.clone(), y2.clone()], &x).unwrap();
assert_eq!(res.k, 2);
assert_eq!(res.k_x, 1);
let fcast = res
.forecast(
&[y1, y2],
&(0..3).map(|i| vec![(i % 3) as f64]).collect::<Vec<_>>(),
)
.unwrap();
assert_eq!(fcast.len(), 2);
assert_eq!(fcast[0].len(), 3);
}
}