1use ndarray::Array1;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7use so_core::data::{DataFrame, Series};
8use so_core::error::{Error, Result};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12pub enum Frequency {
13 Yearly,
15 Quarterly,
17 Monthly,
19 Weekly,
21 Daily,
23 Hourly,
25 Minutely,
27 Secondly,
29 Custom(u32),
31 Irregular,
33}
34
35impl Frequency {
36 pub fn periods_per_year(&self) -> Option<f64> {
38 match self {
39 Frequency::Yearly => Some(1.0),
40 Frequency::Quarterly => Some(4.0),
41 Frequency::Monthly => Some(12.0),
42 Frequency::Weekly => Some(52.1775), Frequency::Daily => Some(365.25), Frequency::Hourly => Some(365.25 * 24.0),
45 Frequency::Minutely => Some(365.25 * 24.0 * 60.0),
46 Frequency::Secondly => Some(365.25 * 24.0 * 60.0 * 60.0),
47 Frequency::Custom(n) => Some(*n as f64),
48 Frequency::Irregular => None,
49 }
50 }
51
52 pub fn name(&self) -> &'static str {
54 match self {
55 Frequency::Yearly => "Yearly",
56 Frequency::Quarterly => "Quarterly",
57 Frequency::Monthly => "Monthly",
58 Frequency::Weekly => "Weekly",
59 Frequency::Daily => "Daily",
60 Frequency::Hourly => "Hourly",
61 Frequency::Minutely => "Minutely",
62 Frequency::Secondly => "Secondly",
63 Frequency::Custom(_n) => "Custom",
64 Frequency::Irregular => "Irregular",
65 }
66 }
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct TimeSeries {
72 pub name: String,
74 pub timestamps: Vec<i64>,
76 pub values: Array1<f64>,
78 pub frequency: Option<Frequency>,
80 pub metadata: HashMap<String, String>,
82}
83
84impl TimeSeries {
85 pub fn new(
87 name: impl Into<String>,
88 timestamps: Vec<i64>,
89 values: Array1<f64>,
90 frequency: Option<Frequency>,
91 ) -> Result<Self> {
92 if timestamps.len() != values.len() {
93 return Err(Error::DimensionMismatch(format!(
94 "Timestamps length {} != values length {}",
95 timestamps.len(),
96 values.len()
97 )));
98 }
99
100 if values.len() < 2 {
101 return Err(Error::DataError(
102 "Time series must have at least 2 observations".to_string(),
103 ));
104 }
105
106 for i in 1..timestamps.len() {
108 if timestamps[i] <= timestamps[i - 1] {
109 return Err(Error::DataError(
110 "Timestamps must be strictly increasing".to_string(),
111 ));
112 }
113 }
114
115 Ok(Self {
116 name: name.into(),
117 timestamps,
118 values,
119 frequency,
120 metadata: HashMap::new(),
121 })
122 }
123
124 pub fn from_dataframe(df: &DataFrame, value_col: &str, date_col: &str) -> Result<Self> {
126 let value_series = df
127 .column(value_col)
128 .ok_or_else(|| Error::DataError(format!("Value column '{}' not found", value_col)))?;
129
130 let date_series = df
131 .column(date_col)
132 .ok_or_else(|| Error::DataError(format!("Date column '{}' not found", date_col)))?;
133
134 let timestamps: Vec<i64> = date_series.data().iter().map(|&x| x as i64).collect();
137
138 Self::new(
139 value_col,
140 timestamps,
141 value_series.data().to_owned(),
142 None, )
144 }
145
146 pub fn regular(name: impl Into<String>, values: Array1<f64>, frequency: Frequency) -> Self {
148 let n = values.len();
149 let timestamps: Vec<i64> = (0..n).map(|i| i as i64).collect();
150
151 Self {
152 name: name.into(),
153 timestamps,
154 values,
155 frequency: Some(frequency),
156 metadata: HashMap::new(),
157 }
158 }
159
160 pub fn len(&self) -> usize {
162 self.values.len()
163 }
164
165 pub fn is_empty(&self) -> bool {
167 self.values.is_empty()
168 }
169
170 pub fn name(&self) -> &str {
172 &self.name
173 }
174
175 pub fn values(&self) -> &Array1<f64> {
177 &self.values
178 }
179
180 pub fn timestamps(&self) -> &[i64] {
182 &self.timestamps
183 }
184
185 pub fn frequency(&self) -> Option<Frequency> {
187 self.frequency
188 }
189
190 pub fn set_metadata(&mut self, key: impl Into<String>, value: impl Into<String>) {
192 self.metadata.insert(key.into(), value.into());
193 }
194
195 pub fn get_metadata(&self, key: &str) -> Option<&String> {
197 self.metadata.get(key)
198 }
199
200 pub fn start_time(&self) -> Option<i64> {
202 self.timestamps.first().copied()
203 }
204
205 pub fn end_time(&self) -> Option<i64> {
207 self.timestamps.last().copied()
208 }
209
210 pub fn stats(&self) -> TimeSeriesStats {
212 let n = self.len() as f64;
213
214 let mean = self.values.mean().unwrap_or(0.0);
215 let variance = self.values.var(1.0);
216 let std = variance.sqrt();
217
218 let acf1 = if n > 1.0 {
220 let mut sum = 0.0;
221 for i in 1..self.len() {
222 sum += (self.values[i] - mean) * (self.values[i - 1] - mean);
223 }
224 sum / ((n - 1.0) * variance)
225 } else {
226 0.0
227 };
228
229 TimeSeriesStats {
230 n_obs: self.len(),
231 mean,
232 std,
233 variance,
234 min: self.values.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
235 max: self.values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
236 acf1,
237 }
238 }
239
240 pub fn diff(&self, lag: usize, order: usize) -> Result<TimeSeries> {
242 if lag < 1 {
243 return Err(Error::DataError("Lag must be >= 1".to_string()));
244 }
245
246 if order < 1 {
247 return Ok(self.clone());
248 }
249
250 let mut current = self.values.clone();
251 let mut timestamps = self.timestamps[lag..].to_vec();
252
253 for _ in 0..order {
254 let n = current.len();
255 if n <= lag {
256 return Err(Error::DataError(
257 "Not enough observations for differencing".to_string(),
258 ));
259 }
260
261 let diffed: Array1<f64> = (lag..n).map(|i| current[i] - current[i - lag]).collect();
262
263 current = diffed;
264 timestamps = timestamps[lag..].to_vec();
265 }
266
267 TimeSeries::new(
268 format!("{}_diff{}", self.name, order),
269 timestamps,
270 current,
271 self.frequency,
272 )
273 }
274
275 pub fn log(&self, offset: f64) -> TimeSeries {
277 let values = self.values.mapv(|x| (x + offset).ln());
278
279 TimeSeries {
280 name: format!("log({})", self.name),
281 timestamps: self.timestamps.clone(),
282 values,
283 frequency: self.frequency,
284 metadata: self.metadata.clone(),
285 }
286 }
287
288 pub fn boxcox(&self, lambda: f64) -> TimeSeries {
290 let values = if lambda == 0.0 {
291 self.values.mapv(|x| x.ln())
292 } else {
293 self.values.mapv(|x| (x.powf(lambda) - 1.0) / lambda)
294 };
295
296 TimeSeries {
297 name: format!("boxcox({}, λ={})", self.name, lambda),
298 timestamps: self.timestamps.clone(),
299 values,
300 frequency: self.frequency,
301 metadata: self.metadata.clone(),
302 }
303 }
304
305 pub fn slice(&self, start: Option<i64>, end: Option<i64>) -> Result<TimeSeries> {
307 let start_idx = match start {
308 Some(t) => self.timestamps.iter().position(|&ts| ts >= t).unwrap_or(0),
309 None => 0,
310 };
311
312 let end_idx = match end {
313 Some(t) => self
314 .timestamps
315 .iter()
316 .rposition(|&ts| ts <= t)
317 .map(|pos| pos + 1)
318 .unwrap_or(self.len()),
319 None => self.len(),
320 };
321
322 if start_idx >= end_idx {
323 return Err(Error::DataError("Invalid slice: start >= end".to_string()));
324 }
325
326 TimeSeries::new(
327 self.name.clone(),
328 self.timestamps[start_idx..end_idx].to_vec(),
329 self.values
330 .slice(ndarray::s![start_idx..end_idx])
331 .to_owned(),
332 self.frequency,
333 )
334 }
335
336 pub fn fillna(&self, method: FillMethod) -> TimeSeries {
338 let values = self.values.to_vec();
339 let filled = match method {
340 FillMethod::Mean => {
341 let mean = self.values.mean().unwrap_or(0.0);
342 values
343 .iter()
344 .map(|&x| if x.is_nan() { mean } else { x })
345 .collect()
346 }
347 FillMethod::Median => {
348 let mut sorted: Vec<f64> =
349 values.iter().filter(|&&x| !x.is_nan()).copied().collect();
350 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
351 let median = if sorted.is_empty() {
352 0.0
353 } else if sorted.len() % 2 == 0 {
354 (sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
355 } else {
356 sorted[sorted.len() / 2]
357 };
358 values
359 .iter()
360 .map(|&x| if x.is_nan() { median } else { x })
361 .collect()
362 }
363 FillMethod::ForwardFill => {
364 let mut last_valid = 0.0;
365 values
366 .iter()
367 .map(|&x| {
368 if !x.is_nan() {
369 last_valid = x;
370 x
371 } else {
372 last_valid
373 }
374 })
375 .collect()
376 }
377 FillMethod::BackwardFill => {
378 let mut filled = values.clone();
379 let mut last_valid = 0.0;
380 for i in (0..filled.len()).rev() {
381 if !filled[i].is_nan() {
382 last_valid = filled[i];
383 } else {
384 filled[i] = last_valid;
385 }
386 }
387 filled
388 }
389 FillMethod::Linear => {
390 let mut filled = values.clone();
391 let mut i = 0;
392 while i < filled.len() {
393 if filled[i].is_nan() {
394 let start = i;
395 while i < filled.len() && filled[i].is_nan() {
396 i += 1;
397 }
398 let end = i;
399
400 if start > 0 && end < filled.len() {
401 let prev_val = filled[start - 1];
402 let next_val = filled[end];
403 let step = (next_val - prev_val) / (end - start + 1) as f64;
404
405 for j in start..end {
406 filled[j] = prev_val + step * (j - start + 1) as f64;
407 }
408 }
409 }
410 i += 1;
411 }
412 filled
413 }
414 };
415
416 TimeSeries {
417 name: format!("{}_filled", self.name),
418 timestamps: self.timestamps.clone(),
419 values: Array1::from_vec(filled),
420 frequency: self.frequency,
421 metadata: self.metadata.clone(),
422 }
423 }
424
425 pub fn detect_outliers(&self, threshold: f64) -> Vec<usize> {
427 let mut sorted: Vec<f64> = self.values.to_vec();
429 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
430
431 let n = sorted.len();
432 let q1_idx = ((n as f64) * 0.25).floor() as usize;
433 let q3_idx = ((n as f64) * 0.75).floor() as usize;
434
435 let q1 = sorted.get(q1_idx).copied().unwrap_or(0.0);
436 let q3 = sorted.get(q3_idx).copied().unwrap_or(0.0);
437 let iqr = q3 - q1;
438
439 let lower_bound = q1 - threshold * iqr;
440 let upper_bound = q3 + threshold * iqr;
441
442 self.values
443 .iter()
444 .enumerate()
445 .filter(|&(_, &x)| x < lower_bound || x > upper_bound)
446 .map(|(i, _)| i)
447 .collect()
448 }
449
450 pub fn to_dataframe(&self) -> DataFrame {
452 let mut columns = HashMap::new();
453
454 let timestamps_series = Series::new(
456 "timestamp",
457 Array1::from_vec(self.timestamps.iter().map(|&t| t as f64).collect()),
458 );
459 columns.insert("timestamp".to_string(), timestamps_series);
460
461 let values_series = Series::new(&self.name, self.values.clone());
463 columns.insert(self.name.clone(), values_series);
464
465 DataFrame::from_series(columns).unwrap_or_default()
466 }
467}
468
469#[derive(Debug, Clone)]
471pub struct TimeSeriesStats {
472 pub n_obs: usize,
473 pub mean: f64,
474 pub std: f64,
475 pub variance: f64,
476 pub min: f64,
477 pub max: f64,
478 pub acf1: f64,
479}
480
481#[derive(Debug, Clone, Copy, PartialEq)]
483pub enum FillMethod {
484 Mean,
485 Median,
486 ForwardFill,
487 BackwardFill,
488 Linear,
489}
490
491impl TimeSeriesStats {
492 pub fn summary(&self) -> String {
494 format!(
495 "Observations: {}\nMean: {:.4}\nStd: {:.4}\nMin: {:.4}\nMax: {:.4}\nACF(1): {:.4}",
496 self.n_obs, self.mean, self.std, self.min, self.max, self.acf1
497 )
498 }
499}