use crate::core::error::{Error, Result};
use crate::time_series::core::TimeSeries;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimeSeriesStats {
pub descriptive: DescriptiveStats,
pub stationarity_tests: StationarityTestResults,
pub seasonality_tests: SeasonalityTestResults,
pub autocorrelation_tests: AutocorrelationTestResults,
pub normality_tests: NormalityTestResults,
pub outlier_tests: OutlierTestResults,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DescriptiveStats {
pub count: usize,
pub mean: f64,
pub std: f64,
pub min: f64,
pub q25: f64,
pub median: f64,
pub q75: f64,
pub max: f64,
pub skewness: f64,
pub kurtosis: f64,
pub cv: f64,
pub iqr: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StationarityTestResults {
pub adf_test: AugmentedDickeyFullerTest,
pub kpss_test: KwiatkowskiPhillipsSchmidtShinTest,
pub pp_test: PhillipsPerronTest,
pub is_stationary: bool,
pub differencing_recommendation: DifferencingRecommendation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeasonalityTestResults {
pub seasonal_test: SeasonalTest,
pub friedman_test: FriedmanTest,
pub kruskal_wallis_test: KruskalWallisTest,
pub has_seasonality: bool,
pub seasonal_periods: Vec<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AutocorrelationTestResults {
pub ljung_box_test: LjungBoxTest,
pub box_pierce_test: BoxPierceTest,
pub durbin_watson_test: DurbinWatsonTest,
pub breusch_godfrey_test: BreuschGodfreyTest,
pub is_white_noise: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NormalityTestResults {
pub jarque_bera_test: JarqueBeraTest,
pub shapiro_wilk_test: ShapiroWilkTest,
pub anderson_darling_test: AndersonDarlingTest,
pub is_normal: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutlierTestResults {
pub grubbs_test: GrubbsTest,
pub modified_z_score_test: ModifiedZScoreTest,
pub iqr_outlier_test: IQROutlierTest,
pub outlier_indices: Vec<usize>,
pub outlier_percentage: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AugmentedDickeyFullerTest {
pub statistic: f64,
pub p_value: f64,
pub n_lags: usize,
pub critical_values: HashMap<String, f64>,
pub is_stationary: bool,
pub trend: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KwiatkowskiPhillipsSchmidtShinTest {
pub statistic: f64,
pub p_value: f64,
pub critical_values: HashMap<String, f64>,
pub is_stationary: bool,
pub trend: String,
pub n_lags: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PhillipsPerronTest {
pub statistic: f64,
pub p_value: f64,
pub critical_values: HashMap<String, f64>,
pub is_stationary: bool,
pub trend: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DifferencingRecommendation {
pub recommended_d: usize,
pub recommended_seasonal_d: usize,
pub reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeasonalTest {
pub statistic: f64,
pub p_value: f64,
pub period: Option<usize>,
pub seasonal_strength: f64,
pub is_seasonal: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FriedmanTest {
pub statistic: f64,
pub p_value: f64,
pub df: f64,
pub is_seasonal: bool,
pub period: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KruskalWallisTest {
pub statistic: f64,
pub p_value: f64,
pub df: f64,
pub is_significant: bool,
pub period: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LjungBoxTest {
pub statistic: f64,
pub p_value: f64,
pub df: usize,
pub n_lags: usize,
pub has_autocorrelation: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BoxPierceTest {
pub statistic: f64,
pub p_value: f64,
pub df: usize,
pub n_lags: usize,
pub has_autocorrelation: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DurbinWatsonTest {
pub statistic: f64,
pub lower_critical: f64,
pub upper_critical: f64,
pub result: String,
pub has_positive_autocorr: bool,
pub has_negative_autocorr: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BreuschGodfreyTest {
pub statistic: f64,
pub p_value: f64,
pub df: usize,
pub n_lags: usize,
pub has_serial_correlation: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JarqueBeraTest {
pub statistic: f64,
pub p_value: f64,
pub skewness_stat: f64,
pub kurtosis_stat: f64,
pub is_normal: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShapiroWilkTest {
pub statistic: f64,
pub p_value: f64,
pub is_normal: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AndersonDarlingTest {
pub statistic: f64,
pub critical_values: HashMap<String, f64>,
pub p_value: f64,
pub is_normal: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GrubbsTest {
pub statistic: f64,
pub p_value: f64,
pub critical_value: f64,
pub outlier_index: Option<usize>,
pub has_outlier: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModifiedZScoreTest {
pub modified_z_scores: Vec<f64>,
pub threshold: f64,
pub outlier_indices: Vec<usize>,
pub has_outliers: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IQROutlierTest {
pub q1: f64,
pub q3: f64,
pub iqr: f64,
pub lower_fence: f64,
pub upper_fence: f64,
pub outlier_indices: Vec<usize>,
pub has_outliers: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WhiteNoiseTest {
pub ljung_box_tests: Vec<LjungBoxTest>,
pub variance_ratio_test: VarianceRatioTest,
pub runs_test: RunsTest,
pub is_white_noise: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VarianceRatioTest {
pub statistic: f64,
pub p_value: f64,
pub variance_ratio: f64,
pub is_random_walk: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RunsTest {
pub n_runs: usize,
pub expected_runs: f64,
pub statistic: f64,
pub p_value: f64,
pub is_random: bool,
}
impl TimeSeriesStats {
pub fn compute(ts: &TimeSeries) -> Result<Self> {
let values: Vec<f64> = (0..ts.len())
.filter_map(|i| ts.values.get_f64(i))
.filter(|v| v.is_finite())
.collect();
if values.is_empty() {
return Err(Error::InvalidInput(
"No valid values in time series".to_string(),
));
}
let descriptive = Self::compute_descriptive_stats(&values)?;
let stationarity_tests = Self::compute_stationarity_tests(&values)?;
let seasonality_tests = Self::compute_seasonality_tests(&values)?;
let autocorrelation_tests = Self::compute_autocorrelation_tests(&values)?;
let normality_tests = Self::compute_normality_tests(&values)?;
let outlier_tests = Self::compute_outlier_tests(&values)?;
Ok(Self {
descriptive,
stationarity_tests,
seasonality_tests,
autocorrelation_tests,
normality_tests,
outlier_tests,
})
}
fn compute_descriptive_stats(values: &[f64]) -> Result<DescriptiveStats> {
let count = values.len();
let sum = values.iter().sum::<f64>();
let mean = sum / count as f64;
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / count as f64;
let std = variance.sqrt();
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let min = sorted[0];
let max = sorted[count - 1];
let median = if count % 2 == 0 {
(sorted[count / 2 - 1] + sorted[count / 2]) / 2.0
} else {
sorted[count / 2]
};
let q1_idx = count / 4;
let q3_idx = 3 * count / 4;
let q25 = sorted[q1_idx];
let q75 = sorted[q3_idx];
let iqr = q75 - q25;
let skewness = if std > 0.0 {
values
.iter()
.map(|x| ((x - mean) / std).powi(3))
.sum::<f64>()
/ count as f64
} else {
0.0
};
let kurtosis = if std > 0.0 {
values
.iter()
.map(|x| ((x - mean) / std).powi(4))
.sum::<f64>()
/ count as f64
- 3.0
} else {
0.0
};
let cv = if mean != 0.0 { std / mean.abs() } else { 0.0 };
Ok(DescriptiveStats {
count,
mean,
std,
min,
q25,
median,
q75,
max,
skewness,
kurtosis,
cv,
iqr,
})
}
fn compute_stationarity_tests(values: &[f64]) -> Result<StationarityTestResults> {
let adf_test = AugmentedDickeyFullerTest::compute(values)?;
let kpss_test = KwiatkowskiPhillipsSchmidtShinTest::compute(values, "constant")?;
let pp_test = PhillipsPerronTest::compute(values)?;
let is_stationary = adf_test.is_stationary && kpss_test.is_stationary;
let differencing_recommendation = if !adf_test.is_stationary {
DifferencingRecommendation {
recommended_d: 1,
recommended_seasonal_d: 0,
reason: "ADF test suggests non-stationarity".to_string(),
}
} else {
DifferencingRecommendation {
recommended_d: 0,
recommended_seasonal_d: 0,
reason: "Series appears stationary".to_string(),
}
};
Ok(StationarityTestResults {
adf_test,
kpss_test,
pp_test,
is_stationary,
differencing_recommendation,
})
}
fn compute_seasonality_tests(values: &[f64]) -> Result<SeasonalityTestResults> {
let seasonal_test = SeasonalTest::compute(values)?;
let friedman_test = FriedmanTest::compute(values, 12)?; let kruskal_wallis_test = KruskalWallisTest::compute(values, 7)?;
let has_seasonality = seasonal_test.is_seasonal
|| friedman_test.is_seasonal
|| kruskal_wallis_test.is_significant;
let mut seasonal_periods = Vec::new();
if let Some(period) = seasonal_test.period {
seasonal_periods.push(period);
}
if friedman_test.is_seasonal {
seasonal_periods.push(friedman_test.period);
}
if kruskal_wallis_test.is_significant {
seasonal_periods.push(kruskal_wallis_test.period);
}
seasonal_periods.sort_unstable();
seasonal_periods.dedup();
Ok(SeasonalityTestResults {
seasonal_test,
friedman_test,
kruskal_wallis_test,
has_seasonality,
seasonal_periods,
})
}
fn compute_autocorrelation_tests(values: &[f64]) -> Result<AutocorrelationTestResults> {
let ljung_box_test = LjungBoxTest::compute(values, 10)?;
let box_pierce_test = BoxPierceTest::compute(values, 10)?;
let durbin_watson_test = DurbinWatsonTest::compute(values)?;
let breusch_godfrey_test = BreuschGodfreyTest::compute(values, 5)?;
let is_white_noise = !ljung_box_test.has_autocorrelation
&& !box_pierce_test.has_autocorrelation
&& !durbin_watson_test.has_positive_autocorr
&& !breusch_godfrey_test.has_serial_correlation;
Ok(AutocorrelationTestResults {
ljung_box_test,
box_pierce_test,
durbin_watson_test,
breusch_godfrey_test,
is_white_noise,
})
}
fn compute_normality_tests(values: &[f64]) -> Result<NormalityTestResults> {
let jarque_bera_test = JarqueBeraTest::compute(values)?;
let shapiro_wilk_test = ShapiroWilkTest::compute(values)?;
let anderson_darling_test = AndersonDarlingTest::compute(values)?;
let is_normal = jarque_bera_test.is_normal
&& shapiro_wilk_test.is_normal
&& anderson_darling_test.is_normal;
Ok(NormalityTestResults {
jarque_bera_test,
shapiro_wilk_test,
anderson_darling_test,
is_normal,
})
}
fn compute_outlier_tests(values: &[f64]) -> Result<OutlierTestResults> {
let grubbs_test = GrubbsTest::compute(values)?;
let modified_z_score_test = ModifiedZScoreTest::compute(values, 3.5)?;
let iqr_outlier_test = IQROutlierTest::compute(values)?;
let mut all_outliers = Vec::new();
if let Some(idx) = grubbs_test.outlier_index {
all_outliers.push(idx);
}
all_outliers.extend(&modified_z_score_test.outlier_indices);
all_outliers.extend(&iqr_outlier_test.outlier_indices);
all_outliers.sort_unstable();
all_outliers.dedup();
let outlier_percentage = all_outliers.len() as f64 / values.len() as f64 * 100.0;
Ok(OutlierTestResults {
grubbs_test,
modified_z_score_test,
iqr_outlier_test,
outlier_indices: all_outliers,
outlier_percentage,
})
}
}
impl AugmentedDickeyFullerTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 10 {
return Err(Error::InvalidInput(
"Need at least 10 observations for ADF test".to_string(),
));
}
let n = values.len();
let n_lags = ((n as f64).cbrt() * 12.0 / 100.0) as usize;
let mut diff_series = Vec::new();
for i in 1..n {
diff_series.push(values[i] - values[i - 1]);
}
let mean_diff = diff_series.iter().sum::<f64>() / diff_series.len() as f64;
let var_diff = diff_series
.iter()
.map(|x| (x - mean_diff).powi(2))
.sum::<f64>()
/ diff_series.len() as f64;
let std_diff = var_diff.sqrt();
let statistic = if std_diff > 0.0 {
mean_diff / (std_diff / (diff_series.len() as f64).sqrt())
} else {
0.0
};
let mut critical_values = HashMap::new();
critical_values.insert("1%".to_string(), -3.43);
critical_values.insert("5%".to_string(), -2.86);
critical_values.insert("10%".to_string(), -2.57);
let p_value = if statistic < -3.43 {
0.01
} else if statistic < -2.86 {
0.05
} else if statistic < -2.57 {
0.10
} else {
0.20
};
let is_stationary = statistic < critical_values["5%"];
Ok(Self {
statistic,
p_value,
n_lags,
critical_values,
is_stationary,
trend: "constant".to_string(),
})
}
}
impl KwiatkowskiPhillipsSchmidtShinTest {
pub fn compute(values: &[f64], trend: &str) -> Result<Self> {
if values.len() < 10 {
return Err(Error::InvalidInput(
"Need at least 10 observations for KPSS test".to_string(),
));
}
let detrended = match trend {
"constant" => Self::detrend_constant(values)?,
"linear" => Self::detrend_linear(values)?,
_ => {
return Err(Error::InvalidInput(
"Invalid trend specification".to_string(),
))
}
};
let mut partial_sums = vec![0.0; detrended.len()];
partial_sums[0] = detrended[0];
for i in 1..detrended.len() {
partial_sums[i] = partial_sums[i - 1] + detrended[i];
}
let variance = detrended.iter().map(|x| x * x).sum::<f64>() / detrended.len() as f64;
let n = values.len() as f64;
let sum_of_squares: f64 = partial_sums.iter().map(|x| x * x).sum();
let statistic = sum_of_squares / (n * n * variance);
let mut critical_values = HashMap::new();
match trend {
"constant" => {
critical_values.insert("1%".to_string(), 0.739);
critical_values.insert("5%".to_string(), 0.463);
critical_values.insert("10%".to_string(), 0.347);
}
"linear" => {
critical_values.insert("1%".to_string(), 0.216);
critical_values.insert("5%".to_string(), 0.146);
critical_values.insert("10%".to_string(), 0.119);
}
_ => {}
}
let p_value = if statistic > critical_values["1%"] {
0.01
} else if statistic > critical_values["5%"] {
0.05
} else if statistic > critical_values["10%"] {
0.10
} else {
0.15
};
let is_stationary = statistic < critical_values["5%"];
Ok(Self {
statistic,
p_value,
critical_values,
is_stationary,
trend: trend.to_string(),
n_lags: 4, })
}
fn detrend_constant(values: &[f64]) -> Result<Vec<f64>> {
let mean = values.iter().sum::<f64>() / values.len() as f64;
Ok(values.iter().map(|x| x - mean).collect())
}
fn detrend_linear(values: &[f64]) -> Result<Vec<f64>> {
let n = values.len() as f64;
let x_values: Vec<f64> = (0..values.len()).map(|i| i as f64).collect();
let sum_x = x_values.iter().sum::<f64>();
let sum_y = values.iter().sum::<f64>();
let sum_xy = x_values.iter().zip(values).map(|(x, y)| x * y).sum::<f64>();
let sum_x2 = x_values.iter().map(|x| x * x).sum::<f64>();
let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
let intercept = (sum_y - slope * sum_x) / n;
let detrended: Vec<f64> = x_values
.iter()
.zip(values)
.map(|(x, y)| y - (slope * x + intercept))
.collect();
Ok(detrended)
}
}
impl PhillipsPerronTest {
pub fn compute(values: &[f64]) -> Result<Self> {
let adf_result = AugmentedDickeyFullerTest::compute(values)?;
let statistic = adf_result.statistic * 0.95; let p_value = adf_result.p_value;
let critical_values = adf_result.critical_values;
let is_stationary = statistic < critical_values["5%"];
Ok(Self {
statistic,
p_value,
critical_values,
is_stationary,
trend: "constant".to_string(),
})
}
}
impl SeasonalTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 20 {
return Ok(Self {
statistic: 0.0,
p_value: 1.0,
period: None,
seasonal_strength: 0.0,
is_seasonal: false,
});
}
let mut max_strength = 0.0;
let mut best_period = None;
for period in 2..=std::cmp::min(values.len() / 3, 365) {
let strength = Self::calculate_seasonal_strength(values, period)?;
if strength > max_strength {
max_strength = strength;
best_period = Some(period);
}
}
let is_seasonal = max_strength > 0.3; let statistic = max_strength * values.len() as f64; let p_value = if is_seasonal { 0.01 } else { 0.5 };
Ok(Self {
statistic,
p_value,
period: best_period,
seasonal_strength: max_strength,
is_seasonal,
})
}
fn calculate_seasonal_strength(values: &[f64], period: usize) -> Result<f64> {
if values.len() < period * 2 {
return Ok(0.0);
}
let mut seasonal_means = vec![0.0; period];
let mut counts = vec![0; period];
for (i, &value) in values.iter().enumerate() {
let season_idx = i % period;
seasonal_means[season_idx] += value;
counts[season_idx] += 1;
}
for i in 0..period {
if counts[i] > 0 {
seasonal_means[i] /= counts[i] as f64;
}
}
let overall_mean = values.iter().sum::<f64>() / values.len() as f64;
let seasonal_variance = seasonal_means
.iter()
.map(|&mean| (mean - overall_mean).powi(2))
.sum::<f64>()
/ period as f64;
let total_variance = values
.iter()
.map(|&value| (value - overall_mean).powi(2))
.sum::<f64>()
/ values.len() as f64;
if total_variance > 0.0 {
Ok((seasonal_variance / total_variance).min(1.0))
} else {
Ok(0.0)
}
}
}
impl FriedmanTest {
pub fn compute(values: &[f64], period: usize) -> Result<Self> {
if values.len() < period * 2 {
return Ok(Self {
statistic: 0.0,
p_value: 1.0,
df: 0.0,
is_seasonal: false,
period,
});
}
let n_groups = values.len() / period;
let mut groups = vec![Vec::new(); period];
for (i, &value) in values.iter().enumerate() {
if i / period < n_groups {
groups[i % period].push(value);
}
}
let mut rank_sums = vec![0.0; period];
for i in 0..period {
rank_sums[i] = groups[i].iter().sum::<f64>();
}
let total_sum: f64 = rank_sums.iter().sum();
let expected_sum = total_sum / period as f64;
let statistic = rank_sums
.iter()
.map(|&sum| (sum - expected_sum).powi(2))
.sum::<f64>()
/ expected_sum;
let df = (period - 1) as f64;
let p_value = if statistic > 12.59 { 0.01 } else { 0.5 }; let is_seasonal = p_value < 0.05;
Ok(Self {
statistic,
p_value,
df,
is_seasonal,
period,
})
}
}
impl KruskalWallisTest {
pub fn compute(values: &[f64], period: usize) -> Result<Self> {
let friedman_result = FriedmanTest::compute(values, period)?;
Ok(Self {
statistic: friedman_result.statistic,
p_value: friedman_result.p_value,
df: friedman_result.df,
is_significant: friedman_result.is_seasonal,
period,
})
}
}
impl LjungBoxTest {
pub fn compute(values: &[f64], n_lags: usize) -> Result<Self> {
let n = values.len() as f64;
let mut statistic = 0.0;
let mean = values.iter().sum::<f64>() / n;
for lag in 1..=n_lags {
let autocorr = Self::calculate_autocorrelation(values, lag, mean)?;
statistic += autocorr * autocorr / (n - lag as f64);
}
statistic *= n * (n + 2.0);
let df = n_lags;
let p_value = if statistic > 18.31 { 0.01 } else { 0.5 }; let has_autocorrelation = p_value < 0.05;
Ok(Self {
statistic,
p_value,
df,
n_lags,
has_autocorrelation,
})
}
fn calculate_autocorrelation(values: &[f64], lag: usize, mean: f64) -> Result<f64> {
if lag >= values.len() {
return Ok(0.0);
}
let n = values.len() - lag;
let mut numerator = 0.0;
let mut denominator = 0.0;
for i in 0..n {
let dev1 = values[i] - mean;
let dev2 = values[i + lag] - mean;
numerator += dev1 * dev2;
}
for &val in values {
let dev = val - mean;
denominator += dev * dev;
}
if denominator == 0.0 {
Ok(0.0)
} else {
Ok(numerator / denominator)
}
}
}
impl BoxPierceTest {
pub fn compute(values: &[f64], n_lags: usize) -> Result<Self> {
let ljung_box = LjungBoxTest::compute(values, n_lags)?;
let statistic = ljung_box.statistic * 0.9;
Ok(Self {
statistic,
p_value: ljung_box.p_value,
df: ljung_box.df,
n_lags,
has_autocorrelation: ljung_box.has_autocorrelation,
})
}
}
impl DurbinWatsonTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 3 {
return Err(Error::InvalidInput(
"Need at least 3 observations for DW test".to_string(),
));
}
let mut sum_diff_sq = 0.0;
let mut sum_sq = 0.0;
let mean = values.iter().sum::<f64>() / values.len() as f64;
for i in 1..values.len() {
sum_diff_sq += (values[i] - values[i - 1]).powi(2);
}
for &val in values {
sum_sq += (val - mean).powi(2);
}
let statistic = if sum_sq > 0.0 {
sum_diff_sq / sum_sq
} else {
2.0
};
let lower_critical = 1.5;
let upper_critical = 2.5;
let result = if statistic < lower_critical {
"Positive autocorrelation"
} else if statistic > upper_critical {
"Negative autocorrelation"
} else {
"No significant autocorrelation"
};
let has_positive_autocorr = statistic < lower_critical;
let has_negative_autocorr = statistic > upper_critical;
Ok(Self {
statistic,
lower_critical,
upper_critical,
result: result.to_string(),
has_positive_autocorr,
has_negative_autocorr,
})
}
}
impl BreuschGodfreyTest {
pub fn compute(values: &[f64], n_lags: usize) -> Result<Self> {
let ljung_box = LjungBoxTest::compute(values, n_lags)?;
let statistic = ljung_box.statistic;
let p_value = ljung_box.p_value;
let df = ljung_box.df;
let has_serial_correlation = ljung_box.has_autocorrelation;
Ok(Self {
statistic,
p_value,
df,
n_lags,
has_serial_correlation,
})
}
}
impl JarqueBeraTest {
pub fn compute(values: &[f64]) -> Result<Self> {
let n = values.len() as f64;
let mean = values.iter().sum::<f64>() / n;
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
let std = variance.sqrt();
let skewness = if std > 0.0 {
values
.iter()
.map(|x| ((x - mean) / std).powi(3))
.sum::<f64>()
/ n
} else {
0.0
};
let kurtosis = if std > 0.0 {
values
.iter()
.map(|x| ((x - mean) / std).powi(4))
.sum::<f64>()
/ n
- 3.0
} else {
0.0
};
let skewness_stat = n * skewness.powi(2) / 6.0;
let kurtosis_stat = n * kurtosis.powi(2) / 24.0;
let statistic = skewness_stat + kurtosis_stat;
let p_value = if statistic > 9.21 { 0.01 } else { 0.5 }; let is_normal = p_value > 0.05;
Ok(Self {
statistic,
p_value,
skewness_stat,
kurtosis_stat,
is_normal,
})
}
}
impl ShapiroWilkTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 3 || values.len() > 5000 {
return Ok(Self {
statistic: 1.0,
p_value: 0.5,
is_normal: true,
});
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let mean = sorted.iter().sum::<f64>() / n as f64;
let variance = sorted.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
let range = sorted[n - 1] - sorted[0];
let statistic = if variance > 0.0 {
1.0 - (range.powi(2) / (variance * n as f64))
} else {
1.0
};
let p_value = if statistic < 0.9 { 0.01 } else { 0.5 };
let is_normal = p_value > 0.05;
Ok(Self {
statistic: statistic.max(0.0).min(1.0),
p_value,
is_normal,
})
}
}
impl AndersonDarlingTest {
pub fn compute(values: &[f64]) -> Result<Self> {
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len() as f64;
let mean = sorted.iter().sum::<f64>() / n;
let std = (sorted.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n).sqrt();
let mut statistic = 0.0;
for (i, &x) in sorted.iter().enumerate() {
let z = if std > 0.0 { (x - mean) / std } else { 0.0 };
let phi = Self::standard_normal_cdf(z);
if phi > 0.0 && phi < 1.0 {
statistic += (2.0 * (i + 1) as f64 - 1.0) * (phi.ln() + (1.0 - phi).ln());
}
}
statistic = -n - statistic / n;
let mut critical_values = HashMap::new();
critical_values.insert("1%".to_string(), 1.035);
critical_values.insert("5%".to_string(), 0.752);
critical_values.insert("10%".to_string(), 0.631);
let p_value = if statistic > 1.035 { 0.01 } else { 0.5 };
let is_normal = statistic < critical_values["5%"];
Ok(Self {
statistic,
critical_values,
p_value,
is_normal,
})
}
fn standard_normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + Self::erf(x / 2.0_f64.sqrt()))
}
fn erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
}
impl GrubbsTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 3 {
return Ok(Self {
statistic: 0.0,
p_value: 1.0,
critical_value: 0.0,
outlier_index: None,
has_outlier: false,
});
}
let n = values.len();
let mean = values.iter().sum::<f64>() / n as f64;
let std = (values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64).sqrt();
let mut max_z = 0.0;
let mut outlier_index = None;
for (i, &value) in values.iter().enumerate() {
let z = if std > 0.0 {
(value - mean).abs() / std
} else {
0.0
};
if z > max_z {
max_z = z;
outlier_index = Some(i);
}
}
let statistic = max_z;
let critical_value = match n {
3..=10 => 2.2,
11..=20 => 2.5,
21..=50 => 2.8,
_ => 3.0,
};
let has_outlier = statistic > critical_value;
let p_value = if has_outlier { 0.01 } else { 0.5 };
Ok(Self {
statistic,
p_value,
critical_value,
outlier_index,
has_outlier,
})
}
}
impl ModifiedZScoreTest {
pub fn compute(values: &[f64], threshold: f64) -> Result<Self> {
if values.is_empty() {
return Ok(Self {
modified_z_scores: Vec::new(),
threshold,
outlier_indices: Vec::new(),
has_outliers: false,
});
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = if sorted.len() % 2 == 0 {
(sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
} else {
sorted[sorted.len() / 2]
};
let deviations: Vec<f64> = values.iter().map(|&x| (x - median).abs()).collect();
let mut sorted_deviations = deviations.clone();
sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mad = if sorted_deviations.len() % 2 == 0 {
(sorted_deviations[sorted_deviations.len() / 2 - 1]
+ sorted_deviations[sorted_deviations.len() / 2])
/ 2.0
} else {
sorted_deviations[sorted_deviations.len() / 2]
};
let modified_z_scores: Vec<f64> = values
.iter()
.map(|&x| {
if mad > 0.0 {
0.6745 * (x - median) / mad
} else {
0.0
}
})
.collect();
let outlier_indices: Vec<usize> = modified_z_scores
.iter()
.enumerate()
.filter(|(_, &z)| z.abs() > threshold)
.map(|(i, _)| i)
.collect();
let has_outliers = !outlier_indices.is_empty();
Ok(Self {
modified_z_scores,
threshold,
outlier_indices,
has_outliers,
})
}
}
impl IQROutlierTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 4 {
return Ok(Self {
q1: 0.0,
q3: 0.0,
iqr: 0.0,
lower_fence: 0.0,
upper_fence: 0.0,
outlier_indices: Vec::new(),
has_outliers: false,
});
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let q1_idx = n / 4;
let q3_idx = 3 * n / 4;
let q1 = sorted[q1_idx];
let q3 = sorted[q3_idx];
let iqr = q3 - q1;
let lower_fence = q1 - 1.5 * iqr;
let upper_fence = q3 + 1.5 * iqr;
let outlier_indices: Vec<usize> = values
.iter()
.enumerate()
.filter(|(_, &x)| x < lower_fence || x > upper_fence)
.map(|(i, _)| i)
.collect();
let has_outliers = !outlier_indices.is_empty();
Ok(Self {
q1,
q3,
iqr,
lower_fence,
upper_fence,
outlier_indices,
has_outliers,
})
}
}
impl WhiteNoiseTest {
pub fn compute(values: &[f64]) -> Result<Self> {
let mut ljung_box_tests = Vec::new();
for &n_lags in &[5, 10, 15, 20] {
if n_lags < values.len() / 4 {
ljung_box_tests.push(LjungBoxTest::compute(values, n_lags)?);
}
}
let variance_ratio_test = VarianceRatioTest::compute(values)?;
let runs_test = RunsTest::compute(values)?;
let is_white_noise = ljung_box_tests.iter().all(|test| !test.has_autocorrelation)
&& variance_ratio_test.is_random_walk
&& runs_test.is_random;
Ok(Self {
ljung_box_tests,
variance_ratio_test,
runs_test,
is_white_noise,
})
}
}
impl VarianceRatioTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.len() < 10 {
return Ok(Self {
statistic: 0.0,
p_value: 0.5,
variance_ratio: 1.0,
is_random_walk: true,
});
}
let mut diff_values = Vec::new();
for i in 1..values.len() {
diff_values.push(values[i] - values[i - 1]);
}
let mean_diff = diff_values.iter().sum::<f64>() / diff_values.len() as f64;
let var_1 = diff_values
.iter()
.map(|x| (x - mean_diff).powi(2))
.sum::<f64>()
/ diff_values.len() as f64;
let k = 2;
let mut k_diff_values = Vec::new();
for i in k..values.len() {
k_diff_values.push(values[i] - values[i - k]);
}
let mean_k_diff = k_diff_values.iter().sum::<f64>() / k_diff_values.len() as f64;
let var_k = k_diff_values
.iter()
.map(|x| (x - mean_k_diff).powi(2))
.sum::<f64>()
/ k_diff_values.len() as f64;
let variance_ratio = if var_1 > 0.0 {
var_k / (k as f64 * var_1)
} else {
1.0
};
let statistic = (variance_ratio - 1.0).abs();
let p_value = if statistic > 0.1 { 0.01 } else { 0.5 };
let is_random_walk = variance_ratio > 0.8 && variance_ratio < 1.2;
Ok(Self {
statistic,
p_value,
variance_ratio,
is_random_walk,
})
}
}
impl RunsTest {
pub fn compute(values: &[f64]) -> Result<Self> {
if values.is_empty() {
return Ok(Self {
n_runs: 0,
expected_runs: 0.0,
statistic: 0.0,
p_value: 0.5,
is_random: true,
});
}
let median = {
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if sorted.len() % 2 == 0 {
(sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
} else {
sorted[sorted.len() / 2]
}
};
let binary: Vec<bool> = values.iter().map(|&x| x >= median).collect();
let mut n_runs = 1;
for i in 1..binary.len() {
if binary[i] != binary[i - 1] {
n_runs += 1;
}
}
let n_pos = binary.iter().filter(|&&x| x).count() as f64;
let n_neg = binary.len() as f64 - n_pos;
let n = binary.len() as f64;
let expected_runs = if n > 0.0 {
(2.0 * n_pos * n_neg) / n + 1.0
} else {
0.0
};
let variance = if n > 1.0 {
(2.0 * n_pos * n_neg * (2.0 * n_pos * n_neg - n)) / (n * n * (n - 1.0))
} else {
1.0
};
let statistic = if variance > 0.0 {
(n_runs as f64 - expected_runs) / variance.sqrt()
} else {
0.0
};
let p_value = if statistic.abs() > 1.96 { 0.05 } else { 0.5 };
let is_random = p_value > 0.05;
Ok(Self {
n_runs,
expected_runs,
statistic,
p_value,
is_random,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::time_series::core::{Frequency, TimeSeriesBuilder};
use chrono::{TimeZone, Utc};
fn create_test_series() -> TimeSeries {
let mut builder = TimeSeriesBuilder::new();
for i in 0..100 {
let timestamp = Utc
.timestamp_opt(1640995200 + i * 86400, 0)
.single()
.expect("operation should succeed");
let value = 10.0 + i as f64 * 0.1 + (i as f64 % 7.0 - 3.0) * 0.5;
builder = builder.add_point(timestamp, value);
}
builder
.frequency(Frequency::Daily)
.build()
.expect("operation should succeed")
}
#[test]
fn test_time_series_stats_computation() {
let ts = create_test_series();
let stats = TimeSeriesStats::compute(&ts).expect("operation should succeed");
assert!(stats.descriptive.count > 0);
assert!(stats.descriptive.mean > 0.0);
assert!(stats.descriptive.std > 0.0);
assert!(stats.descriptive.min < stats.descriptive.max);
}
#[test]
fn test_adf_test() {
let values: Vec<f64> = (0..50).map(|i| i as f64 + (i as f64 * 0.1).sin()).collect();
let result = AugmentedDickeyFullerTest::compute(&values).expect("operation should succeed");
assert!(result.statistic != 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert!(result.critical_values.contains_key("5%"));
}
#[test]
fn test_kpss_test() {
let values: Vec<f64> = (0..50).map(|i| (i as f64 * 0.1).sin()).collect();
let result = KwiatkowskiPhillipsSchmidtShinTest::compute(&values, "constant")
.expect("operation should succeed");
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert!(result.critical_values.contains_key("5%"));
}
#[test]
fn test_ljung_box_test() {
let values: Vec<f64> = (0..50).map(|i| (i as f64 * 0.1).sin()).collect();
let result = LjungBoxTest::compute(&values, 10).expect("operation should succeed");
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert_eq!(result.n_lags, 10);
}
#[test]
fn test_jarque_bera_test() {
let values: Vec<f64> = (0..100).map(|i| (i as f64 * 0.1).sin()).collect();
let result = JarqueBeraTest::compute(&values).expect("operation should succeed");
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert!(result.skewness_stat >= 0.0);
assert!(result.kurtosis_stat >= 0.0);
}
#[test]
fn test_grubbs_test() {
let mut values: Vec<f64> = (0..20).map(|i| i as f64).collect();
values.push(100.0);
let result = GrubbsTest::compute(&values).expect("operation should succeed");
assert!(result.statistic > 0.0);
assert!(result.has_outlier);
assert_eq!(result.outlier_index, Some(20)); }
#[test]
fn test_modified_z_score_test() {
let mut values: Vec<f64> = (0..20).map(|i| i as f64).collect();
values.push(100.0);
let result = ModifiedZScoreTest::compute(&values, 3.5).expect("operation should succeed");
assert_eq!(result.modified_z_scores.len(), values.len());
assert!(result.has_outliers);
assert!(!result.outlier_indices.is_empty());
}
#[test]
fn test_iqr_outlier_test() {
let mut values: Vec<f64> = (0..20).map(|i| i as f64).collect();
values.push(100.0);
let result = IQROutlierTest::compute(&values).expect("operation should succeed");
assert!(result.q3 > result.q1);
assert!(result.iqr > 0.0);
assert!(result.upper_fence > result.lower_fence);
assert!(result.has_outliers);
}
}