1use ndarray::{Array1, Array2};
8use so_core::error::{Error, Result};
9use std::collections::HashMap;
10
11pub fn acf(series: &Array1<f64>, max_lag: usize) -> Array1<f64> {
13 let n = series.len();
14 let mean = series.mean().unwrap_or(0.0);
15 let variance = series.var(1.0);
16
17 if variance <= 0.0 || n <= 1 {
18 return Array1::zeros(max_lag.min(n - 1));
19 }
20
21 let mut acf_values = Array1::zeros(max_lag.min(n - 1));
22
23 for lag in 1..=max_lag.min(n - 1) {
24 let mut autocov = 0.0;
25 for t in lag..n {
26 autocov += (series[t] - mean) * (series[t - lag] - mean);
27 }
28 acf_values[lag - 1] = autocov / (variance * n as f64);
29 }
30
31 acf_values
32}
33
34pub fn pacf(series: &Array1<f64>, max_lag: usize) -> Result<Array1<f64>> {
36 let n = series.len();
37 if n <= max_lag {
38 return Err(Error::DataError(format!(
39 "Need more observations than max_lag: n={}, max_lag={}",
40 n, max_lag
41 )));
42 }
43
44 let mut pacf_values = Array1::zeros(max_lag);
45
46 let mut phi = Array1::zeros(max_lag + 1);
48 let mut v = Array1::zeros(max_lag + 1);
49
50 phi[0] = 1.0;
52 v[0] = series.var(1.0);
53
54 let r = acf(series, max_lag);
56
57 for k in 1..=max_lag {
58 let mut num = r[k - 1];
60 for j in 1..k {
61 num -= phi[j] * r[(k - j - 1).min(max_lag - 1)];
62 }
63
64 let phi_kk = num / v[k - 1];
65 pacf_values[k - 1] = phi_kk;
66
67 phi[k] = phi_kk;
69 for j in 1..k {
70 phi[j] = phi[j] - phi_kk * phi[k - j];
71 }
72
73 v[k] = v[k - 1] * (1.0 - phi_kk.powi(2));
74 }
75
76 Ok(pacf_values)
77}
78
79pub fn ccf(x: &Array1<f64>, y: &Array1<f64>, max_lag: usize) -> Array1<f64> {
81 let n = x.len();
82 if n != y.len() {
83 return Array1::zeros(max_lag * 2 + 1);
84 }
85
86 let x_mean = x.mean().unwrap_or(0.0);
87 let y_mean = y.mean().unwrap_or(0.0);
88 let x_var = x.var(1.0);
89 let y_var = y.var(1.0);
90
91 if x_var <= 0.0 || y_var <= 0.0 {
92 return Array1::zeros(max_lag * 2 + 1);
93 }
94
95 let mut ccf_values = Array1::zeros(max_lag * 2 + 1);
96
97 for lag in -(max_lag as isize)..=max_lag as isize {
98 let idx = (lag + max_lag as isize) as usize;
99 let mut cross_cov = 0.0;
100
101 if lag >= 0 {
102 for t in lag as usize..n {
103 cross_cov += (x[t] - x_mean) * (y[t - lag as usize] - y_mean);
104 }
105 } else {
106 for t in (-lag) as usize..n {
107 cross_cov += (x[t + lag as usize] - x_mean) * (y[t] - y_mean);
108 }
109 }
110
111 ccf_values[idx] = cross_cov / (x_var.sqrt() * y_var.sqrt() * n as f64);
112 }
113
114 ccf_values
115}
116
117pub fn periodogram(series: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
119 let n = series.len();
120 let n_freq = n / 2 + 1;
121
122 let mut frequencies = Array1::zeros(n_freq);
123 let mut spectrum = Array1::zeros(n_freq);
124
125 for k in 0..n_freq {
127 frequencies[k] = k as f64 / n as f64;
128
129 let mut real = 0.0;
131 let mut imag = 0.0;
132
133 for t in 0..n {
134 let angle = -2.0 * std::f64::consts::PI * k as f64 * t as f64 / n as f64;
135 real += series[t] * angle.cos();
136 imag += series[t] * angle.sin();
137 }
138
139 spectrum[k] = (real.powi(2) + imag.powi(2)) / n as f64;
141 }
142
143 (frequencies, spectrum)
144}
145
146pub fn spectrum(series: &Array1<f64>, window: &str, bandwidth: f64) -> (Array1<f64>, Array1<f64>) {
148 let (freq, mut periodogram) = periodogram(series);
149 let n_freq = freq.len();
150
151 match window {
153 "bartlett" => {
154 let m = (bandwidth * n_freq as f64).round() as usize;
155 for k in 0..n_freq {
156 let mut smoothed = 0.0;
157 let mut weight_sum = 0.0;
158
159 for j in (k as isize - m as isize)..=(k as isize + m as isize) {
160 if j >= 0 && (j as usize) < n_freq {
161 let weight = 1.0 - (j - k as isize).abs() as f64 / m as f64;
162 smoothed += weight * periodogram[j as usize];
163 weight_sum += weight;
164 }
165 }
166
167 if weight_sum > 0.0 {
168 periodogram[k] = smoothed / weight_sum;
169 }
170 }
171 }
172 "parzen" => {
173 let m = (bandwidth * n_freq as f64).round() as usize;
174 for k in 0..n_freq {
175 let mut smoothed = 0.0;
176 let mut weight_sum = 0.0;
177
178 for j in (k as isize - m as isize)..=(k as isize + m as isize) {
179 if j >= 0 && (j as usize) < n_freq {
180 let u = (j - k as isize).abs() as f64 / m as f64;
181 let weight = if u <= 0.5 {
182 1.0 - 6.0 * u.powi(2) + 6.0 * u.powi(3)
183 } else {
184 2.0 * (1.0 - u).powi(3)
185 };
186
187 smoothed += weight * periodogram[j as usize];
188 weight_sum += weight;
189 }
190 }
191
192 if weight_sum > 0.0 {
193 periodogram[k] = smoothed / weight_sum;
194 }
195 }
196 }
197 _ => {} }
199
200 (freq, periodogram)
201}
202
203pub fn detrend_poly(series: &Array1<f64>, degree: usize) -> Result<Array1<f64>> {
205 let n = series.len();
206 if n <= degree {
207 return Err(Error::DataError(format!(
208 "Need more observations than degree: n={}, degree={}",
209 n, degree
210 )));
211 }
212
213 let mut x = Array2::zeros((n, degree + 1));
215 for i in 0..n {
216 for j in 0..=degree {
217 x[(i, j)] = (i as f64).powi(j as i32);
218 }
219 }
220
221 use so_linalg;
223 let xt = x.t();
224 let xtx = xt.dot(&x);
225 let xty = xt.dot(series);
226
227 let beta = so_linalg::solve(&xtx, &xty)
228 .map_err(|e| Error::DataError(format!("Detrend failed: {}", e)))?;
229
230 let trend = x.dot(&beta);
232 let detrended = series - &trend;
233
234 Ok(detrended)
235}
236
237pub fn box_cox(series: &Array1<f64>, lambda: f64) -> Array1<f64> {
239 if lambda.abs() < 1e-10 {
240 series.mapv(|x| (x + 1e-10).ln())
242 } else {
243 series.mapv(|x| (x.powf(lambda) - 1.0) / lambda)
244 }
245}
246
247pub fn box_cox_lambda(series: &Array1<f64>, lambda_range: (f64, f64), steps: usize) -> f64 {
249 let (min_lambda, max_lambda) = lambda_range;
250 let step = (max_lambda - min_lambda) / steps as f64;
251
252 let mut best_lambda = 0.0;
253 let mut best_log_lik = f64::NEG_INFINITY;
254
255 for i in 0..=steps {
256 let lambda = min_lambda + i as f64 * step;
257 let transformed = box_cox(series, lambda);
258
259 let mean = transformed.mean().unwrap_or(0.0);
261 let variance = transformed.var(1.0);
262 if variance > 0.0 {
263 let log_lik = -0.5 * series.len() as f64 * (2.0 * std::f64::consts::PI * variance).ln()
264 - 0.5 / variance * transformed.mapv(|x| (x - mean).powi(2)).sum()
265 + (lambda - 1.0) * series.mapv(|x| x.ln()).sum();
266
267 if log_lik > best_log_lik {
268 best_log_lik = log_lik;
269 best_lambda = lambda;
270 }
271 }
272 }
273
274 best_lambda
275}
276
277pub fn rolling_statistic(
279 series: &Array1<f64>,
280 window: usize,
281 stat: &str,
282 center: bool,
283) -> Array1<f64> {
284 let n = series.len();
285 let mut result = Array1::zeros(n);
286
287 for i in 0..n {
288 let start = if center {
289 i.saturating_sub(window / 2)
290 } else {
291 i.saturating_sub(window - 1).min(i)
292 };
293 let end = if center {
294 (i + window / 2 + 1).min(n)
295 } else {
296 (i + 1).min(n)
297 };
298
299 let window_size = end - start;
300 if window_size > 0 {
301 let window_data = series.slice(ndarray::s![start..end]);
302
303 result[i] = match stat {
304 "mean" => window_data.mean().unwrap_or(0.0),
305 "median" => {
306 let mut sorted: Vec<f64> = window_data.to_vec();
307 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
308 let mid = window_size / 2;
309 if window_size % 2 == 0 {
310 (sorted[mid - 1] + sorted[mid]) / 2.0
311 } else {
312 sorted[mid]
313 }
314 }
315 "std" => window_data.std(1.0),
316 "var" => window_data.var(1.0),
317 "min" => window_data.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
318 "max" => window_data.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
319 "sum" => window_data.sum(),
320 _ => window_data.mean().unwrap_or(0.0),
321 };
322 }
323 }
324
325 result
326}
327
328pub fn ewma(series: &Array1<f64>, alpha: f64) -> Array1<f64> {
330 let n = series.len();
331 let mut ewma_values = Array1::zeros(n);
332
333 if n > 0 {
334 ewma_values[0] = series[0];
335 for i in 1..n {
336 ewma_values[i] = alpha * series[i] + (1.0 - alpha) * ewma_values[i - 1];
337 }
338 }
339
340 ewma_values
341}
342
343pub fn seasonal_dummies(n: usize, period: usize, include_all: bool) -> Array2<f64> {
345 let n_dummies = if include_all { period } else { period - 1 };
346 let mut dummies = Array2::zeros((n, n_dummies));
347
348 for i in 0..n {
349 let season = i % period;
350 if season < n_dummies {
351 dummies[(i, season)] = 1.0;
352 }
353 }
354
355 dummies
356}
357
358pub fn forecast_errors(actual: &Array1<f64>, forecast: &Array1<f64>) -> HashMap<String, f64> {
360 use std::collections::HashMap;
361
362 let n = actual.len().min(forecast.len());
363 let mut errors = HashMap::new();
364
365 if n == 0 {
366 return errors;
367 }
368
369 let mut mae = 0.0;
370 let mut mse = 0.0;
371 let mut mape = 0.0;
372 let mut mape_count = 0;
373
374 for i in 0..n {
375 let error = actual[i] - forecast[i];
376 mae += error.abs();
377 mse += error.powi(2);
378
379 if actual[i] != 0.0 {
380 mape += (error.abs() / actual[i].abs()) * 100.0;
381 mape_count += 1;
382 }
383 }
384
385 errors.insert("MAE".to_string(), mae / n as f64);
386 errors.insert("MSE".to_string(), mse / n as f64);
387 errors.insert("RMSE".to_string(), (mse / n as f64).sqrt());
388
389 if mape_count > 0 {
390 errors.insert("MAPE".to_string(), mape / mape_count as f64);
391 }
392
393 let naive_forecast: f64 = if n > 1 {
395 let mut naive_error = 0.0;
396 for i in 1..n {
397 naive_error += (actual[i] - actual[i - 1]).powi(2);
398 }
399 (naive_error / (n - 1) as f64).sqrt()
400 } else {
401 0.0
402 };
403
404 let rmse = (mse / n as f64).sqrt();
405 if naive_forecast > 0.0 {
406 errors.insert("TheilU".to_string(), rmse / naive_forecast);
407 }
408
409 errors
410}
411
412pub fn information_criteria(
414 log_likelihood: f64,
415 n_obs: usize,
416 n_params: usize,
417) -> HashMap<String, f64> {
418 use std::collections::HashMap;
419
420 let mut criteria = HashMap::new();
421
422 let aic = 2.0 * n_params as f64 - 2.0 * log_likelihood;
424 criteria.insert("AIC".to_string(), aic);
425
426 let bic = (n_obs as f64).ln() * n_params as f64 - 2.0 * log_likelihood;
428 criteria.insert("BIC".to_string(), bic);
429
430 let aicc = if n_obs > n_params + 1 {
432 aic + 2.0 * n_params as f64 * (n_params as f64 + 1.0)
433 / (n_obs as f64 - n_params as f64 - 1.0)
434 } else {
435 aic
436 };
437 criteria.insert("AICc".to_string(), aicc);
438
439 criteria
440}
441
442pub fn diebold_mariano(
444 errors_a: &Array1<f64>,
445 errors_b: &Array1<f64>,
446 loss_fn: &str,
447) -> (f64, f64) {
448 let n = errors_a.len().min(errors_b.len());
449 if n < 2 {
450 return (0.0, 1.0);
451 }
452
453 let mut d = Array1::zeros(n);
455 for i in 0..n {
456 let loss_a = match loss_fn {
457 "squared" => errors_a[i].powi(2),
458 "absolute" => errors_a[i].abs(),
459 "percentage" => {
460 if errors_a[i] != 0.0 {
461 errors_a[i].abs()
462 } else {
463 0.0
464 }
465 }
466 _ => errors_a[i].powi(2),
467 };
468
469 let loss_b = match loss_fn {
470 "squared" => errors_b[i].powi(2),
471 "absolute" => errors_b[i].abs(),
472 "percentage" => {
473 if errors_b[i] != 0.0 {
474 errors_b[i].abs()
475 } else {
476 0.0
477 }
478 }
479 _ => errors_b[i].powi(2),
480 };
481
482 d[i] = loss_a - loss_b;
483 }
484
485 let mean_d = d.mean().unwrap_or(0.0);
487 let var_d = d.var(1.0);
488
489 if var_d <= 0.0 {
490 return (0.0, 1.0);
491 }
492
493 let dm_stat = mean_d / (var_d / n as f64).sqrt();
494
495 use super::results::chi2_cdf;
497 let p_value = 2.0 * (1.0 - chi2_cdf(1, dm_stat.abs()));
498
499 (dm_stat, p_value)
500}