use scirs2_core::ndarray::ArrayStatCompat;
use scirs2_core::ndarray::{s, Array1, ArrayBase, Data, Ix1, ScalarOperand};
use scirs2_core::numeric::{Float, FromPrimitive};
use std::fmt::{Debug, Display};
use crate::error::{Result, TimeSeriesError};
use crate::optimization::{LBFGSOptimizer, OptimizationOptions};
use crate::tests::{adf_test, ADFRegression};
use crate::utils::{autocorrelation, partial_autocorrelation};
use statrs::statistics::Statistics;
#[derive(Debug, Clone)]
pub struct ArimaModel<F> {
pub p: usize,
pub d: usize,
pub q: usize,
pub ar_coeffs: Array1<F>,
pub ma_coeffs: Array1<F>,
pub intercept: F,
pub sigma2: F,
pub log_likelihood: F,
pub n_obs: usize,
pub is_fitted: bool,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ArimaConfig {
pub p: usize,
pub d: usize,
pub q: usize,
pub seasonal_p: usize,
pub seasonal_d: usize,
pub seasonal_q: usize,
pub seasonal_period: usize,
}
#[derive(Debug, Clone)]
pub struct SarimaParams {
pub pdq: (usize, usize, usize),
pub seasonal_pdq: (usize, usize, usize),
pub seasonal_period: usize,
}
#[derive(Debug, Clone)]
pub struct ArimaSelectionOptions {
pub max_p: usize,
pub max_d: usize,
pub max_q: usize,
pub seasonal: bool,
pub seasonal_period: Option<usize>,
pub max_seasonal_p: usize,
pub max_seasonal_d: usize,
pub max_seasonal_q: usize,
pub criterion: SelectionCriterion,
pub stepwise: bool,
pub test_stationarity: bool,
pub alpha: f64,
pub parallel: bool,
pub include_constant: bool,
pub max_iter: usize,
pub tolerance: f64,
}
impl Default for ArimaSelectionOptions {
fn default() -> Self {
Self {
max_p: 5,
max_d: 2,
max_q: 5,
seasonal: false,
seasonal_period: None,
max_seasonal_p: 2,
max_seasonal_d: 1,
max_seasonal_q: 2,
criterion: SelectionCriterion::AIC,
stepwise: true,
test_stationarity: true,
alpha: 0.05,
parallel: false,
include_constant: true,
max_iter: 1000,
tolerance: 1e-8,
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum SelectionCriterion {
AIC,
AICc,
BIC,
HQC,
}
impl<F> ArimaModel<F>
where
F: Float + FromPrimitive + Debug + Display + ScalarOperand,
{
pub fn new(p: usize, d: usize, q: usize) -> Result<Self> {
crate::validation::validate_arima_orders(p, d, q)?;
Ok(Self {
p,
d,
q,
ar_coeffs: Array1::zeros(p),
ma_coeffs: Array1::zeros(q),
intercept: F::zero(),
sigma2: F::one(),
log_likelihood: F::neg_infinity(),
n_obs: 0,
is_fitted: false,
})
}
pub fn fit<S>(&mut self, data: &ArrayBase<S, Ix1>) -> Result<()>
where
S: Data<Elem = F>,
{
scirs2_core::validation::checkarray_finite(data, "data")?;
let min_required = self.p + self.q + self.d + 1;
crate::validation::check_array_length(data, min_required, "ARIMA model fitting")?;
let diff_data = self.difference(data)?;
self.n_obs = diff_data.len();
self.initialize_parameters(&diff_data)?;
self.optimize_parameters(&diff_data)?;
self.log_likelihood = self.calculate_log_likelihood(&diff_data)?;
self.is_fitted = true;
Ok(())
}
fn difference<S>(&self, data: &ArrayBase<S, Ix1>) -> Result<Array1<F>>
where
S: Data<Elem = F>,
{
let mut result = data.to_owned();
for _ in 0..self.d {
let mut diff = Array1::zeros(result.len() - 1);
for i in 1..result.len() {
diff[i - 1] = result[i] - result[i - 1];
}
result = diff;
}
Ok(result)
}
fn initialize_parameters(&mut self, data: &Array1<F>) -> Result<()> {
if self.p > 0 {
let _acf = autocorrelation(data, Some(self.p))?;
let pacf = partial_autocorrelation(data, Some(self.p))?;
for i in 0..self.p {
self.ar_coeffs[i] = pacf[i + 1];
}
}
for i in 0..self.q {
self.ma_coeffs[i] = F::from(0.1).expect("Failed to convert constant to float");
}
self.intercept = data.mean_or(F::zero());
self.sigma2 = data
.mapv(|x| (x - self.intercept) * (x - self.intercept))
.mean()
.unwrap_or(F::one());
Ok(())
}
fn optimize_parameters(&mut self, data: &Array1<F>) -> Result<()> {
let n_params = self.p + self.q + 1; let mut params = Array1::zeros(n_params);
for i in 0..self.p {
params[i] = self.ar_coeffs[i];
}
for i in 0..self.q {
params[self.p + i] = self.ma_coeffs[i];
}
params[self.p + self.q] = self.intercept;
let mut optimizer = LBFGSOptimizer::new(OptimizationOptions::default());
let p = self.p;
let q = self.q;
let data_clone = data.clone();
let objective = |params: &Array1<F>| -> F {
let mut model = self.clone();
for i in 0..p {
model.ar_coeffs[i] = params[i];
}
for i in 0..q {
model.ma_coeffs[i] = params[p + i];
}
model.intercept = params[p + q];
if let Ok(residuals) = model.calculate_residuals(&data_clone) {
let n = F::from(data_clone.len()).expect("Operation failed");
let sigma2 = residuals.dot(&residuals) / n;
n / F::from(2.0).expect("Failed to convert constant to float")
* (F::one()
+ F::from(2.0 * std::f64::consts::PI)
.expect("Failed to convert to float")
.ln())
+ n / F::from(2.0).expect("Failed to convert constant to float") * sigma2.ln()
+ residuals.dot(&residuals)
/ (F::from(2.0).expect("Failed to convert constant to float") * sigma2)
} else {
F::infinity()
}
};
let gradient = |params: &Array1<F>| -> Array1<F> {
let mut grad = Array1::zeros(n_params);
let mut model = self.clone();
for i in 0..p {
model.ar_coeffs[i] = params[i];
}
for i in 0..q {
model.ma_coeffs[i] = params[p + i];
}
model.intercept = params[p + q];
if let Ok(residuals) = model.calculate_residuals(&data_clone) {
let n = F::from(data_clone.len()).expect("Operation failed");
model.sigma2 = residuals.dot(&residuals) / n;
for i in 0..p {
if let Ok(g) = model.ar_gradient(&data_clone, &residuals, i) {
grad[i] = -g; }
}
for i in 0..q {
if let Ok(g) = model.ma_gradient(&data_clone, &residuals, i) {
grad[p + i] = -g; }
}
grad[p + q] = -residuals.sum() / model.sigma2 / n;
}
grad
};
let result = optimizer.optimize(objective, gradient, ¶ms)?;
for i in 0..self.p {
self.ar_coeffs[i] = result.x[i];
}
for i in 0..self.q {
self.ma_coeffs[i] = result.x[self.p + i];
}
self.intercept = result.x[self.p + self.q];
let residuals = self.calculate_residuals(data)?;
self.sigma2 = residuals.dot(&residuals) / F::from(data.len()).expect("Operation failed");
Ok(())
}
fn calculate_residuals(&self, data: &Array1<F>) -> Result<Array1<F>> {
let n = data.len();
let mut residuals = Array1::zeros(n);
for t in self.p.max(self.q)..n {
let mut pred = self.intercept;
for i in 0..self.p {
if t > i {
pred = pred + self.ar_coeffs[i] * data[t - i - 1];
}
}
for i in 0..self.q {
if t > i {
pred = pred + self.ma_coeffs[i] * residuals[t - i - 1];
}
}
residuals[t] = data[t] - pred;
}
Ok(residuals)
}
fn ar_gradient(&self, data: &Array1<F>, residuals: &Array1<F>, idx: usize) -> Result<F> {
let n = data.len();
let mut grad = F::zero();
for t in self.p.max(self.q)..n {
if t > idx {
grad = grad - residuals[t] * data[t - idx - 1] / self.sigma2;
}
}
Ok(grad / F::from(n).expect("Failed to convert to float"))
}
fn ma_gradient(&self, data: &Array1<F>, residuals: &Array1<F>, idx: usize) -> Result<F> {
let n = residuals.len();
let mut grad = F::zero();
for t in self.p.max(self.q)..n {
if t > idx {
grad = grad - residuals[t] * residuals[t - idx - 1] / self.sigma2;
}
}
Ok(grad / F::from(n).expect("Failed to convert to float"))
}
fn calculate_log_likelihood(&self, data: &Array1<F>) -> Result<F> {
let residuals = self.calculate_residuals(data)?;
let n = F::from(data.len()).expect("Operation failed");
let ll = -n / F::from(2.0).expect("Failed to convert constant to float")
* (F::one()
+ F::from(2.0).expect("Failed to convert constant to float")
* F::from(std::f64::consts::PI).expect("Failed to convert to float"))
.ln()
- n / F::from(2.0).expect("Failed to convert constant to float") * self.sigma2.ln()
- residuals.dot(&residuals)
/ (F::from(2.0).expect("Failed to convert constant to float") * self.sigma2);
Ok(ll)
}
pub fn forecast(&self, steps: usize, data: &Array1<F>) -> Result<Array1<F>> {
if !self.is_fitted {
return Err(TimeSeriesError::InvalidModel(
"Model must be fitted before forecasting".to_string(),
));
}
crate::validation::validate_forecast_horizon(steps, Some(1000))?;
let diff_data = self.difference(data)?;
let mut forecasts = Array1::zeros(steps);
let mut extended_data = diff_data.to_vec();
let mut residuals = self.calculate_residuals(&diff_data)?.to_vec();
for h in 0..steps {
let mut pred = self.intercept;
for i in 0..self.p {
let idx = extended_data.len() - i - 1;
if idx < extended_data.len() {
pred = pred + self.ar_coeffs[i] * extended_data[idx];
}
}
for i in 0..self.q {
let idx = residuals.len() - i - 1;
if idx < residuals.len() && i < h {
pred = pred + self.ma_coeffs[i] * residuals[idx];
}
}
forecasts[h] = pred;
extended_data.push(pred);
residuals.push(F::zero()); }
if self.d > 0 {
self.integrate_forecast(&forecasts, data)
} else {
Ok(forecasts)
}
}
fn integrate_forecast(&self, forecasts: &Array1<F>, original: &Array1<F>) -> Result<Array1<F>> {
let mut result = forecasts.to_owned();
let mut last_values = original.slice(s![original.len() - self.d..]).to_vec();
for _ in 0..self.d {
let mut integrated = Array1::zeros(result.len());
let last_val = last_values[last_values.len() - 1];
integrated[0] = result[0] + last_val;
for i in 1..result.len() {
integrated[i] = result[i] + integrated[i - 1];
}
result = integrated;
last_values.push(result[0]);
}
Ok(result)
}
pub fn aic(&self) -> F {
let k = F::from(self.p + self.q + 1).expect("Failed to convert to float"); F::from(2.0).expect("Failed to convert constant to float") * k
- F::from(2.0).expect("Failed to convert constant to float") * self.log_likelihood
}
pub fn aicc(&self) -> F {
let k = F::from(self.p + self.q + 1).expect("Failed to convert to float");
let n = F::from(self.n_obs).expect("Failed to convert to float");
self.aic()
+ F::from(2.0).expect("Failed to convert constant to float") * k * (k + F::one())
/ (n - k - F::one())
}
pub fn bic(&self) -> F {
let k = F::from(self.p + self.q + 1).expect("Failed to convert to float");
let n = F::from(self.n_obs).expect("Failed to convert to float");
k * n.ln()
- F::from(2.0).expect("Failed to convert constant to float") * self.log_likelihood
}
pub fn hqc(&self) -> F {
let k = F::from(self.p + self.q + 1).expect("Failed to convert to float");
let n = F::from(self.n_obs).expect("Failed to convert to float");
F::from(2.0).expect("Failed to convert constant to float") * k * n.ln().ln()
- F::from(2.0).expect("Failed to convert constant to float") * self.log_likelihood
}
pub fn get_ic(&self, criterion: SelectionCriterion) -> F {
match criterion {
SelectionCriterion::AIC => self.aic(),
SelectionCriterion::AICc => self.aicc(),
SelectionCriterion::BIC => self.bic(),
SelectionCriterion::HQC => self.hqc(),
}
}
}
#[allow(dead_code)]
pub fn auto_arima<S, F>(
data: &ArrayBase<S, Ix1>,
options: &ArimaSelectionOptions,
) -> Result<(ArimaModel<F>, SarimaParams)>
where
S: Data<Elem = F>,
F: Float + FromPrimitive + Debug + Display + ScalarOperand,
{
scirs2_core::validation::checkarray_finite(data, "data")?;
let d = if options.test_stationarity {
determine_differencing_order(data, options.max_d)?
} else {
0
};
let seasonal_d = if options.seasonal && options.seasonal_period.is_some() {
determine_seasonal_differencing_order(
data,
options.seasonal_period.expect("Operation failed"),
options.max_seasonal_d,
)?
} else {
0
};
let mut best_model = ArimaModel::new(0, d, 0)?;
let mut best_ic = F::infinity();
let mut best_params = SarimaParams {
pdq: (0, d, 0),
seasonal_pdq: (0, seasonal_d, 0),
seasonal_period: options.seasonal_period.unwrap_or(1),
};
let diff_data = apply_differencing(data, d, seasonal_d, options.seasonal_period)?;
if options.stepwise {
(best_model, best_params) = stepwise_search(&diff_data, d, seasonal_d, options)?;
} else {
for p in 0..=options.max_p {
for q in 0..=options.max_q {
if let Ok(mut model) = ArimaModel::new(p, d, q) {
if model.fit(&diff_data).is_ok() {
let ic = model.get_ic(options.criterion);
if ic < best_ic {
best_ic = ic;
best_model = model;
best_params.pdq = (p, d, q);
}
}
}
}
}
if options.seasonal {
for sp in 0..=options.max_seasonal_p {
for sq in 0..=options.max_seasonal_q {
best_params.seasonal_pdq = (sp, seasonal_d, sq);
}
}
}
}
best_model.fit(data)?;
Ok((best_model, best_params))
}
#[allow(dead_code)]
fn stepwise_search<F>(
data: &Array1<F>,
d: usize,
seasonal_d: usize,
options: &ArimaSelectionOptions,
) -> Result<(ArimaModel<F>, SarimaParams)>
where
F: Float + FromPrimitive + Debug + Display + ScalarOperand,
{
let mut best_model = ArimaModel::new(0, d, 0)?;
let mut best_ic = F::infinity();
let mut best_params = SarimaParams {
pdq: (0, d, 0),
seasonal_pdq: (0, seasonal_d, 0),
seasonal_period: options.seasonal_period.unwrap_or(1),
};
let candidates = vec![
(0, d, 0),
(1, d, 0),
(0, d, 1),
(1, d, 1),
(2, d, 0),
(0, d, 2),
];
for (p, d_val, q) in candidates {
if p <= options.max_p && q <= options.max_q {
if let Ok(mut model) = ArimaModel::new(p, d_val, q) {
if model.fit(data).is_ok() {
let ic = model.get_ic(options.criterion);
if ic < best_ic {
best_ic = ic;
best_model = model;
best_params.pdq = (p, d_val, q);
}
}
}
}
}
let (best_p, _, best_q) = best_params.pdq;
for dp in -1i32..=1 {
for dq in -1i32..=1 {
let new_p = (best_p as i32 + dp).max(0) as usize;
let new_q = (best_q as i32 + dq).max(0) as usize;
if new_p <= options.max_p
&& new_q <= options.max_q
&& (new_p != best_p || new_q != best_q)
{
if let Ok(mut model) = ArimaModel::new(new_p, d, new_q) {
if model.fit(data).is_ok() {
let ic = model.get_ic(options.criterion);
if ic < best_ic {
best_ic = ic;
best_model = model;
best_params.pdq = (new_p, d, new_q);
}
}
}
}
}
}
Ok((best_model, best_params))
}
#[allow(dead_code)]
fn determine_differencing_order<S, F>(data: &ArrayBase<S, Ix1>, max_d: usize) -> Result<usize>
where
S: Data<Elem = F>,
F: Float + FromPrimitive + Debug + Display + ScalarOperand,
{
let alpha = F::from(0.05).expect("Failed to convert constant to float");
for d in 0..=max_d {
let diff_data = apply_single_differencing(data, d)?;
if diff_data.len() < 10 {
return Ok(d.saturating_sub(1));
}
if let Ok(test) = adf_test(&diff_data, None, ADFRegression::ConstantAndTrend, alpha) {
if test.is_stationary {
return Ok(d);
}
}
}
Ok(max_d)
}
#[allow(dead_code)]
fn determine_seasonal_differencing_order<S, F>(
data: &ArrayBase<S, Ix1>,
period: usize,
max_d: usize,
) -> Result<usize>
where
S: Data<Elem = F>,
F: Float + FromPrimitive + Debug + Display + ScalarOperand,
{
let alpha = F::from(0.05).expect("Failed to convert constant to float");
for d in 0..=max_d {
let diff_data = apply_seasonal_differencing(data, period, d)?;
if diff_data.len() < 10 {
return Ok(d.saturating_sub(1));
}
if let Ok(test) = adf_test(&diff_data, None, ADFRegression::ConstantAndTrend, alpha) {
if test.is_stationary {
return Ok(d);
}
}
}
Ok(max_d)
}
#[allow(dead_code)]
fn apply_single_differencing<S, F>(data: &ArrayBase<S, Ix1>, d: usize) -> Result<Array1<F>>
where
S: Data<Elem = F>,
F: Float + FromPrimitive,
{
let mut result = data.to_owned();
for _ in 0..d {
if result.len() <= 1 {
return Err(TimeSeriesError::InvalidInput(
"Insufficient data for differencing".to_string(),
));
}
let mut diff = Array1::zeros(result.len() - 1);
for i in 1..result.len() {
diff[i - 1] = result[i] - result[i - 1];
}
result = diff;
}
Ok(result)
}
#[allow(dead_code)]
fn apply_seasonal_differencing<S, F>(
data: &ArrayBase<S, Ix1>,
period: usize,
d: usize,
) -> Result<Array1<F>>
where
S: Data<Elem = F>,
F: Float + FromPrimitive,
{
let mut result = data.to_owned();
for _ in 0..d {
if result.len() <= period {
return Err(TimeSeriesError::InvalidInput(
"Insufficient data for seasonal differencing".to_string(),
));
}
let mut diff = Array1::zeros(result.len() - period);
for i in period..result.len() {
diff[i - period] = result[i] - result[i - period];
}
result = diff;
}
Ok(result)
}
#[allow(dead_code)]
fn apply_differencing<S, F>(
data: &ArrayBase<S, Ix1>,
d: usize,
seasonal_d: usize,
period: Option<usize>,
) -> Result<Array1<F>>
where
S: Data<Elem = F>,
F: Float + FromPrimitive,
{
let mut result = apply_single_differencing(data, d)?;
if seasonal_d > 0 && period.is_some() {
result =
apply_seasonal_differencing(&result, period.expect("Operation failed"), seasonal_d)?;
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_arima_creation() {
let model = ArimaModel::<f64>::new(2, 1, 1).expect("Operation failed");
assert_eq!(model.p, 2);
assert_eq!(model.d, 1);
assert_eq!(model.q, 1);
assert!(!model.is_fitted);
}
#[test]
fn test_arima_creation_invalid() {
let result = ArimaModel::<f64>::new(11, 1, 1);
assert!(result.is_err());
let result = ArimaModel::<f64>::new(1, 4, 1);
assert!(result.is_err());
let result = ArimaModel::<f64>::new(1, 1, 11);
assert!(result.is_err());
}
#[test]
fn test_differencing() {
let data = array![1.0, 2.0, 4.0, 7.0, 11.0];
let model = ArimaModel::<f64>::new(0, 1, 0).expect("Operation failed");
let diff = model.difference(&data).expect("Operation failed");
assert_eq!(diff.len(), 4);
assert_eq!(diff[0], 1.0);
assert_eq!(diff[1], 2.0);
assert_eq!(diff[2], 3.0);
assert_eq!(diff[3], 4.0);
}
#[test]
fn test_arima_fit() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let mut model = ArimaModel::new(1, 0, 1).expect("Operation failed");
let result = model.fit(&data);
assert!(result.is_ok());
assert!(model.is_fitted);
}
#[test]
fn test_auto_arima() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let options = ArimaSelectionOptions::default();
let result = auto_arima(&data, &options);
assert!(result.is_ok());
let (model, params) = result.expect("Operation failed");
assert!(model.is_fitted);
assert_eq!(params.pdq.1, model.d);
}
#[test]
fn test_determine_differencing() {
let data = array![1.0, 2.0, 4.0, 7.0, 11.0, 16.0, 22.0, 29.0];
let d = determine_differencing_order(&data, 2).expect("Operation failed");
assert!(d <= 2);
}
#[test]
fn test_forecast() {
let data = array![1.0, 2.4, 3.2, 4.1, 5.5, 6.2, 7.8, 8.3, 9.7, 10.1];
let mut model = ArimaModel::new(1, 0, 0).expect("Operation failed");
model.fit(&data).expect("Operation failed");
let forecasts = model.forecast(3, &data).expect("Operation failed");
assert_eq!(forecasts.len(), 3);
}
}