scirs2_transform/
time_series.rs

1//! Time series feature extraction
2//!
3//! This module provides utilities for extracting features from time series data,
4//! including Fourier features, wavelet features, and lag features.
5
6use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix1, Ix2};
7use scirs2_core::numeric::Complex;
8use scirs2_core::numeric::{Float, NumCast};
9use scirs2_fft::fft;
10
11use crate::error::{Result, TransformError};
12
13/// Fourier feature extractor for time series
14///
15/// Extracts frequency domain features using Fast Fourier Transform (FFT).
16/// Useful for capturing periodic patterns in time series data.
17#[derive(Debug, Clone)]
18pub struct FourierFeatures {
19    /// Number of Fourier components to extract
20    n_components: usize,
21    /// Whether to include phase information
22    include_phase: bool,
23    /// Whether to normalize by series length
24    normalize: bool,
25    /// Sampling frequency (if known)
26    sampling_freq: Option<f64>,
27}
28
29impl FourierFeatures {
30    /// Create a new FourierFeatures extractor
31    ///
32    /// # Arguments
33    /// * `n_components` - Number of frequency components to extract
34    pub fn new(ncomponents: usize) -> Self {
35        FourierFeatures {
36            n_components: ncomponents,
37            include_phase: false,
38            normalize: true,
39            sampling_freq: None,
40        }
41    }
42
43    /// Include phase information in features
44    pub fn with_phase(mut self) -> Self {
45        self.include_phase = true;
46        self
47    }
48
49    /// Set sampling frequency
50    pub fn with_sampling_freq(mut self, freq: f64) -> Self {
51        self.sampling_freq = Some(freq);
52        self
53    }
54
55    /// Extract Fourier features from a single time series
56    fn extract_features_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array1<f64>>
57    where
58        S: Data,
59        S::Elem: Float + NumCast,
60    {
61        let n = x.len();
62        if n == 0 {
63            return Err(TransformError::InvalidInput(
64                "Empty time series".to_string(),
65            ));
66        }
67
68        // Convert to f64 for FFT
69        let real_data: Vec<f64> = x
70            .iter()
71            .map(|&val| NumCast::from(val).unwrap_or(0.0))
72            .collect();
73
74        // Compute FFT
75        let fft_result = fft(&real_data, None)?;
76
77        // Extract features (only positive frequencies due to symmetry)
78        let n_freq = (n / 2).min(self.n_components);
79        let mut features = if self.include_phase {
80            Array1::zeros(n_freq * 2)
81        } else {
82            Array1::zeros(n_freq)
83        };
84
85        let norm_factor = if self.normalize { 1.0 / n as f64 } else { 1.0 };
86
87        for i in 0..n_freq {
88            let magnitude =
89                (fft_result[i].re * fft_result[i].re + fft_result[i].im * fft_result[i].im).sqrt()
90                    * norm_factor;
91
92            features[i] = magnitude;
93
94            if self.include_phase && magnitude > 1e-10 {
95                let phase = fft_result[i].im.atan2(fft_result[i].re);
96                features[n_freq + i] = phase;
97            }
98        }
99
100        Ok(features)
101    }
102
103    /// Transform time series data to Fourier features
104    ///
105    /// # Arguments
106    /// * `x` - Time series data, shape (n_samples, n_timesteps)
107    ///
108    /// # Returns
109    /// * Fourier features, shape (n_samples, n_features)
110    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
111    where
112        S: Data,
113        S::Elem: Float + NumCast,
114    {
115        let n_samples = x.shape()[0];
116        let n_features = if self.include_phase {
117            self.n_components * 2
118        } else {
119            self.n_components
120        };
121
122        let mut result = Array2::zeros((n_samples, n_features));
123
124        for i in 0..n_samples {
125            let features = self.extract_features_1d(&x.row(i))?;
126            let feat_len = features.len().min(n_features);
127            result
128                .slice_mut(scirs2_core::ndarray::s![i, ..feat_len])
129                .assign(&features.slice(scirs2_core::ndarray::s![..feat_len]));
130        }
131
132        Ok(result)
133    }
134}
135
136/// Lag feature extractor for time series
137///
138/// Creates lagged versions of time series as features. Useful for
139/// autoregressive modeling and capturing temporal dependencies.
140#[derive(Debug, Clone)]
141pub struct LagFeatures {
142    /// List of lags to include
143    lags: Vec<usize>,
144    /// Whether to drop NaN values resulting from lagging
145    drop_na: bool,
146}
147
148impl LagFeatures {
149    /// Create a new LagFeatures extractor
150    ///
151    /// # Arguments
152    /// * `lags` - List of lag values (e.g., vec![1, 2, 3] for lags 1, 2, and 3)
153    pub fn new(lags: Vec<usize>) -> Self {
154        LagFeatures {
155            lags,
156            drop_na: true,
157        }
158    }
159
160    /// Create with a range of lags
161    pub fn with_range(start: usize, end: usize) -> Self {
162        let lags = (start..=end).collect();
163        LagFeatures {
164            lags,
165            drop_na: true,
166        }
167    }
168
169    /// Set whether to drop NaN values
170    pub fn with_drop_na(mut self, dropna: bool) -> Self {
171        self.drop_na = dropna;
172        self
173    }
174
175    /// Transform time series data to lag features
176    ///
177    /// # Arguments
178    /// * `x` - Time series data, shape (n_timesteps,) or (n_samples, n_timesteps)
179    ///
180    /// # Returns
181    /// * Lag features
182    pub fn transform_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array2<f64>>
183    where
184        S: Data,
185        S::Elem: Float + NumCast,
186    {
187        let n = x.len();
188        let max_lag = *self.lags.iter().max().unwrap_or(&0);
189
190        if max_lag >= n {
191            return Err(TransformError::InvalidInput(format!(
192                "Maximum lag {max_lag} must be less than series length {n}"
193            )));
194        }
195
196        let start_idx = if self.drop_na { max_lag } else { 0 };
197        let n_samples = n - start_idx;
198        let n_features = self.lags.len() + 1; // Original + lags
199
200        let mut result = Array2::zeros((n_samples, n_features));
201
202        // Original series
203        for i in 0..n_samples {
204            result[[i, 0]] = NumCast::from(x[start_idx + i]).unwrap_or(0.0);
205        }
206
207        // Lagged features
208        for (lag_idx, &lag) in self.lags.iter().enumerate() {
209            for i in 0..n_samples {
210                let idx = start_idx + i;
211                if idx >= lag {
212                    result[[i, lag_idx + 1]] = NumCast::from(x[idx - lag]).unwrap_or(0.0);
213                } else if !self.drop_na {
214                    result[[i, lag_idx + 1]] = f64::NAN;
215                }
216            }
217        }
218
219        Ok(result)
220    }
221
222    /// Transform multiple time series
223    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Vec<Array2<f64>>>
224    where
225        S: Data,
226        S::Elem: Float + NumCast,
227    {
228        let n_series = x.shape()[0];
229        let mut results = Vec::new();
230
231        for i in 0..n_series {
232            let series = x.row(i);
233            let lag_features = self.transform_1d(&series)?;
234            results.push(lag_features);
235        }
236
237        Ok(results)
238    }
239}
240
241/// Wavelet feature extractor for time series
242///
243/// Extracts features using wavelet decomposition. Useful for
244/// multi-resolution analysis of time series.
245#[derive(Debug, Clone)]
246pub struct WaveletFeatures {
247    /// Wavelet type: 'db1' (Haar), 'db2', 'db4', 'db8', 'bior2.2', 'bior4.4'
248    wavelet: String,
249    /// Decomposition level
250    level: usize,
251    /// Whether to include approximation coefficients
252    include_approx: bool,
253}
254
255impl WaveletFeatures {
256    /// Create a new WaveletFeatures extractor
257    ///
258    /// # Arguments
259    /// * `wavelet` - Wavelet type (e.g., "db1" for Haar wavelet)
260    /// * `level` - Decomposition level
261    pub fn new(wavelet: &str, level: usize) -> Self {
262        WaveletFeatures {
263            wavelet: wavelet.to_string(),
264            level,
265            include_approx: true,
266        }
267    }
268
269    /// Set whether to include approximation coefficients
270    pub fn with_include_approx(mut self, include: bool) -> Self {
271        self.include_approx = include;
272        self
273    }
274
275    /// Haar wavelet transform (simplified)
276    #[allow(dead_code)]
277    fn haar_transform(&self, x: &[f64]) -> (Vec<f64>, Vec<f64>) {
278        let n = x.len();
279        let mut approx = Vec::with_capacity(n / 2);
280        let mut detail = Vec::with_capacity(n / 2);
281
282        for i in (0..n).step_by(2) {
283            if i + 1 < n {
284                approx.push((x[i] + x[i + 1]) / 2.0_f64.sqrt());
285                detail.push((x[i] - x[i + 1]) / 2.0_f64.sqrt());
286            } else {
287                // Handle odd length
288                approx.push(x[i]);
289            }
290        }
291
292        (approx, detail)
293    }
294
295    /// Get wavelet filter coefficients
296    fn get_wavelet_coeffs(&self) -> Result<(Vec<f64>, Vec<f64>)> {
297        match self.wavelet.as_str() {
298            "db1" | "haar" => {
299                // Haar wavelet
300                let h = vec![1.0 / 2.0_f64.sqrt(), 1.0 / 2.0_f64.sqrt()];
301                let g = vec![1.0 / 2.0_f64.sqrt(), -1.0 / 2.0_f64.sqrt()];
302                Ok((h, g))
303            }
304            "db2" => {
305                // Daubechies-2 wavelet
306                let sqrt3 = 3.0_f64.sqrt();
307                let denom = 4.0 * 2.0_f64.sqrt();
308                let h = vec![
309                    (1.0 + sqrt3) / denom,
310                    (3.0 + sqrt3) / denom,
311                    (3.0 - sqrt3) / denom,
312                    (1.0 - sqrt3) / denom,
313                ];
314                let g = vec![h[3], -h[2], h[1], -h[0]];
315                Ok((h, g))
316            }
317            "db4" => {
318                // Daubechies-4 wavelet
319                let h = vec![
320                    -0.010597401784997,
321                    0.032883011666983,
322                    0.030841381835987,
323                    -0.187034811718881,
324                    -0.027983769416984,
325                    0.630880767929590,
326                    0.714846570552542,
327                    0.230377813308855,
328                ];
329                let mut g = Vec::with_capacity(h.len());
330                for (i, &coeff) in h.iter().enumerate() {
331                    g.push(if i % 2 == 0 { coeff } else { -coeff });
332                }
333                g.reverse();
334                Ok((h, g))
335            }
336            "db8" => {
337                // Daubechies-8 wavelet
338                let h = vec![
339                    -0.00011747678400228,
340                    0.0006754494059985,
341                    -0.0003917403729959,
342                    -0.00487035299301066,
343                    0.008746094047015655,
344                    0.013981027917015516,
345                    -0.04408825393106472,
346                    -0.01736930100202211,
347                    0.128747426620186,
348                    0.00047248457399797254,
349                    -0.2840155429624281,
350                    -0.015829105256023893,
351                    0.5853546836548691,
352                    0.6756307362980128,
353                    0.3182301045617746,
354                    0.05441584224308161,
355                ];
356                let mut g = Vec::with_capacity(h.len());
357                for (i, &coeff) in h.iter().enumerate() {
358                    g.push(if i % 2 == 0 { coeff } else { -coeff });
359                }
360                g.reverse();
361                Ok((h, g))
362            }
363            "bior2.2" => {
364                // Biorthogonal 2.2 wavelet
365                let h = vec![
366                    0.0,
367                    -0.17677669529663687,
368                    0.35355339059327373,
369                    1.0606601717798214,
370                    0.35355339059327373,
371                    -0.17677669529663687,
372                    0.0,
373                ];
374                let g = vec![
375                    0.0,
376                    0.35355339059327373,
377                    -std::f64::consts::FRAC_1_SQRT_2,
378                    0.35355339059327373,
379                    0.0,
380                ];
381                Ok((h, g))
382            }
383            "bior4.4" => {
384                // Biorthogonal 4.4 wavelet
385                let h = vec![
386                    0.0,
387                    0.03314563036811941,
388                    -0.06629126073623884,
389                    -0.17677669529663687,
390                    0.4198446513295126,
391                    0.9943689110435825,
392                    0.4198446513295126,
393                    -0.17677669529663687,
394                    -0.06629126073623884,
395                    0.03314563036811941,
396                    0.0,
397                ];
398                let g = vec![
399                    0.0,
400                    -0.06453888262893856,
401                    0.04068941760955867,
402                    0.41809227322161724,
403                    -0.7884856164056651,
404                    0.41809227322161724,
405                    0.04068941760955867,
406                    -0.06453888262893856,
407                    0.0,
408                ];
409                Ok((h, g))
410            }
411            _ => Err(TransformError::InvalidInput(format!(
412                "Unsupported wavelet type: {}",
413                self.wavelet
414            ))),
415        }
416    }
417
418    /// Generic wavelet transform using filter bank
419    fn wavelet_transform(&self, x: &[f64]) -> Result<(Vec<f64>, Vec<f64>)> {
420        let (h, g) = self.get_wavelet_coeffs()?;
421
422        if x.len() < h.len() {
423            return Err(TransformError::InvalidInput(
424                "Input signal too short for selected wavelet".to_string(),
425            ));
426        }
427
428        let n = x.len();
429        let mut approx = Vec::with_capacity(n / 2);
430        let mut detail = Vec::with_capacity(n / 2);
431
432        // Convolution with downsampling
433        for i in (0..n).step_by(2) {
434            let mut h_sum = 0.0;
435            let mut g_sum = 0.0;
436
437            for (j, (&h_coeff, &g_coeff)) in h.iter().zip(g.iter()).enumerate() {
438                let idx = (i + j) % n; // Periodic boundary condition
439                h_sum += h_coeff * x[idx];
440                g_sum += g_coeff * x[idx];
441            }
442
443            approx.push(h_sum);
444            detail.push(g_sum);
445        }
446
447        Ok((approx, detail))
448    }
449
450    /// Multi-level wavelet decomposition
451    fn wavelet_decompose(&self, x: &[f64]) -> Result<Vec<Vec<f64>>> {
452        let mut coefficients = Vec::new();
453        let mut current = x.to_vec();
454
455        for _ in 0..self.level {
456            let (approx, detail) = self.wavelet_transform(&current)?;
457            coefficients.push(detail);
458            current = approx;
459
460            if current.len() < 2 {
461                break;
462            }
463        }
464
465        if self.include_approx {
466            coefficients.push(current);
467        }
468
469        Ok(coefficients)
470    }
471
472    /// Extract wavelet features from a single time series
473    fn extract_features_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array1<f64>>
474    where
475        S: Data,
476        S::Elem: Float + NumCast,
477    {
478        let x_vec: Vec<f64> = x.iter().map(|&v| NumCast::from(v).unwrap_or(0.0)).collect();
479
480        let coefficients = self.wavelet_decompose(&x_vec)?;
481
482        // Calculate total number of features
483        let n_features: usize = coefficients.iter().map(|c| c.len()).sum();
484        let mut features = Array1::zeros(n_features);
485
486        let mut idx = 0;
487        for coeff_level in coefficients {
488            for &coeff in &coeff_level {
489                features[idx] = coeff;
490                idx += 1;
491            }
492        }
493
494        Ok(features)
495    }
496
497    /// Transform time series data to wavelet features
498    ///
499    /// # Arguments
500    /// * `x` - Time series data, shape (n_samples, n_timesteps)
501    ///
502    /// # Returns
503    /// * Wavelet features
504    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Vec<Array1<f64>>>
505    where
506        S: Data,
507        S::Elem: Float + NumCast,
508    {
509        let n_samples = x.shape()[0];
510        let mut results = Vec::new();
511
512        for i in 0..n_samples {
513            let features = self.extract_features_1d(&x.row(i))?;
514            results.push(features);
515        }
516
517        Ok(results)
518    }
519}
520
521/// Combined time series feature extractor
522///
523/// Combines multiple feature extraction methods for comprehensive
524/// time series feature engineering.
525#[derive(Debug, Clone)]
526pub struct TimeSeriesFeatures {
527    /// Whether to include Fourier features
528    use_fourier: bool,
529    /// Whether to include lag features
530    use_lags: bool,
531    /// Whether to include wavelet features
532    use_wavelets: bool,
533    /// Fourier feature configuration
534    fourier_config: Option<FourierFeatures>,
535    /// Lag feature configuration
536    lag_config: Option<LagFeatures>,
537    /// Wavelet feature configuration
538    wavelet_config: Option<WaveletFeatures>,
539}
540
541impl TimeSeriesFeatures {
542    /// Create a new combined feature extractor
543    pub fn new() -> Self {
544        TimeSeriesFeatures {
545            use_fourier: true,
546            use_lags: true,
547            use_wavelets: false,
548            fourier_config: Some(FourierFeatures::new(10)),
549            lag_config: Some(LagFeatures::with_range(1, 5)),
550            wavelet_config: None,
551        }
552    }
553
554    /// Configure Fourier features
555    pub fn with_fourier(mut self, n_components: usize, includephase: bool) -> Self {
556        self.use_fourier = true;
557        let mut fourier = FourierFeatures::new(n_components);
558        if includephase {
559            fourier = fourier.with_phase();
560        }
561        self.fourier_config = Some(fourier);
562        self
563    }
564
565    /// Configure lag features
566    pub fn with_lags(mut self, lags: Vec<usize>) -> Self {
567        self.use_lags = true;
568        self.lag_config = Some(LagFeatures::new(lags));
569        self
570    }
571
572    /// Configure wavelet features
573    pub fn with_wavelets(mut self, wavelet: &str, level: usize) -> Self {
574        self.use_wavelets = true;
575        self.wavelet_config = Some(WaveletFeatures::new(wavelet, level));
576        self
577    }
578}
579
580impl Default for TimeSeriesFeatures {
581    fn default() -> Self {
582        Self::new()
583    }
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589    use scirs2_core::ndarray::Array;
590
591    #[test]
592    fn test_fourier_features() {
593        // Create a simple sinusoidal signal
594        let n = 100;
595        let mut signal = Vec::new();
596        for i in 0..n {
597            let t = i as f64 / n as f64 * 4.0 * std::f64::consts::PI;
598            signal.push((t).sin() + 0.5 * (2.0 * t).sin());
599        }
600        let x = Array::from_shape_vec((1, n), signal).unwrap();
601
602        let fourier = FourierFeatures::new(10);
603        let features = fourier.transform(&x).unwrap();
604
605        assert_eq!(features.shape(), &[1, 10]);
606
607        // DC component should be near zero for this signal
608        assert!(features[[0, 0]].abs() < 1e-10);
609
610        // The fundamental frequency components should have significant magnitude
611        // sin(t) component should be at index 1
612        // sin(2t) component should be at index 2
613        assert!(features[[0, 1]] > 0.1); // Fundamental frequency
614        assert!(features[[0, 2]] > 0.04); // Second harmonic (0.5 amplitude)
615    }
616
617    #[test]
618    fn test_lag_features() {
619        let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
620
621        let lag_extractor = LagFeatures::new(vec![1, 2]);
622        let features = lag_extractor.transform_1d(&x.view()).unwrap();
623
624        // Should have 4 samples (6 - max_lag(2)) and 3 features (original + 2 lags)
625        assert_eq!(features.shape(), &[4, 3]);
626
627        // Check first row: x[2]=3, x[1]=2, x[0]=1
628        assert_eq!(features[[0, 0]], 3.0);
629        assert_eq!(features[[0, 1]], 2.0);
630        assert_eq!(features[[0, 2]], 1.0);
631    }
632
633    #[test]
634    fn test_wavelet_features() {
635        let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
636        let x_2d = x.clone().into_shape_with_order((1, 8)).unwrap();
637
638        let wavelet = WaveletFeatures::new("db1", 2);
639        let features = wavelet.transform(&x_2d).unwrap();
640
641        assert!(!features.is_empty());
642        assert!(features[0].len() > 0);
643    }
644}