scirs2_transform/
time_series.rs

1//! Time series feature extraction
2//!
3//! This module provides utilities for extracting features from time series data,
4//! including Fourier features, wavelet features, and lag features.
5
6use ndarray::{Array1, Array2, ArrayBase, Data, Ix1, Ix2};
7use num_complex::Complex;
8use num_traits::{Float, NumCast};
9use scirs2_fft::fft;
10
11use crate::error::{Result, TransformError};
12
13/// Fourier feature extractor for time series
14///
15/// Extracts frequency domain features using Fast Fourier Transform (FFT).
16/// Useful for capturing periodic patterns in time series data.
17#[derive(Debug, Clone)]
18pub struct FourierFeatures {
19    /// Number of Fourier components to extract
20    n_components: usize,
21    /// Whether to include phase information
22    include_phase: bool,
23    /// Whether to normalize by series length
24    normalize: bool,
25    /// Sampling frequency (if known)
26    sampling_freq: Option<f64>,
27}
28
29impl FourierFeatures {
30    /// Create a new FourierFeatures extractor
31    ///
32    /// # Arguments
33    /// * `n_components` - Number of frequency components to extract
34    pub fn new(ncomponents: usize) -> Self {
35        FourierFeatures {
36            n_components: ncomponents,
37            include_phase: false,
38            normalize: true,
39            sampling_freq: None,
40        }
41    }
42
43    /// Include phase information in features
44    pub fn with_phase(mut self) -> Self {
45        self.include_phase = true;
46        self
47    }
48
49    /// Set sampling frequency
50    pub fn with_sampling_freq(mut self, freq: f64) -> Self {
51        self.sampling_freq = Some(freq);
52        self
53    }
54
55    /// Extract Fourier features from a single time series
56    fn extract_features_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array1<f64>>
57    where
58        S: Data,
59        S::Elem: Float + NumCast,
60    {
61        let n = x.len();
62        if n == 0 {
63            return Err(TransformError::InvalidInput(
64                "Empty time series".to_string(),
65            ));
66        }
67
68        // Convert to f64 for FFT
69        let real_data: Vec<f64> = x
70            .iter()
71            .map(|&val| num_traits::cast::<S::Elem, f64>(val).unwrap_or(0.0))
72            .collect();
73
74        // Compute FFT
75        let fft_result = fft(&real_data, None)?;
76
77        // Extract features (only positive frequencies due to symmetry)
78        let n_freq = (n / 2).min(self.n_components);
79        let mut features = if self.include_phase {
80            Array1::zeros(n_freq * 2)
81        } else {
82            Array1::zeros(n_freq)
83        };
84
85        let norm_factor = if self.normalize { 1.0 / n as f64 } else { 1.0 };
86
87        for i in 0..n_freq {
88            let magnitude =
89                (fft_result[i].re * fft_result[i].re + fft_result[i].im * fft_result[i].im).sqrt()
90                    * norm_factor;
91
92            features[i] = magnitude;
93
94            if self.include_phase && magnitude > 1e-10 {
95                let phase = fft_result[i].im.atan2(fft_result[i].re);
96                features[n_freq + i] = phase;
97            }
98        }
99
100        Ok(features)
101    }
102
103    /// Transform time series data to Fourier features
104    ///
105    /// # Arguments
106    /// * `x` - Time series data, shape (n_samples, n_timesteps)
107    ///
108    /// # Returns
109    /// * Fourier features, shape (n_samples, n_features)
110    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
111    where
112        S: Data,
113        S::Elem: Float + NumCast,
114    {
115        let n_samples = x.shape()[0];
116        let n_features = if self.include_phase {
117            self.n_components * 2
118        } else {
119            self.n_components
120        };
121
122        let mut result = Array2::zeros((n_samples, n_features));
123
124        for i in 0..n_samples {
125            let features = self.extract_features_1d(&x.row(i))?;
126            let feat_len = features.len().min(n_features);
127            result
128                .slice_mut(ndarray::s![i, ..feat_len])
129                .assign(&features.slice(ndarray::s![..feat_len]));
130        }
131
132        Ok(result)
133    }
134}
135
136/// Lag feature extractor for time series
137///
138/// Creates lagged versions of time series as features. Useful for
139/// autoregressive modeling and capturing temporal dependencies.
140#[derive(Debug, Clone)]
141pub struct LagFeatures {
142    /// List of lags to include
143    lags: Vec<usize>,
144    /// Whether to drop NaN values resulting from lagging
145    drop_na: bool,
146}
147
148impl LagFeatures {
149    /// Create a new LagFeatures extractor
150    ///
151    /// # Arguments
152    /// * `lags` - List of lag values (e.g., vec![1, 2, 3] for lags 1, 2, and 3)
153    pub fn new(lags: Vec<usize>) -> Self {
154        LagFeatures {
155            lags,
156            drop_na: true,
157        }
158    }
159
160    /// Create with a range of lags
161    pub fn with_range(start: usize, end: usize) -> Self {
162        let lags = (start..=end).collect();
163        LagFeatures {
164            lags,
165            drop_na: true,
166        }
167    }
168
169    /// Set whether to drop NaN values
170    pub fn with_drop_na(mut self, dropna: bool) -> Self {
171        self.drop_na = dropna;
172        self
173    }
174
175    /// Transform time series data to lag features
176    ///
177    /// # Arguments
178    /// * `x` - Time series data, shape (n_timesteps,) or (n_samples, n_timesteps)
179    ///
180    /// # Returns
181    /// * Lag features
182    pub fn transform_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array2<f64>>
183    where
184        S: Data,
185        S::Elem: Float + NumCast,
186    {
187        let n = x.len();
188        let max_lag = *self.lags.iter().max().unwrap_or(&0);
189
190        if max_lag >= n {
191            return Err(TransformError::InvalidInput(format!(
192                "Maximum lag {max_lag} must be less than series length {n}"
193            )));
194        }
195
196        let start_idx = if self.drop_na { max_lag } else { 0 };
197        let n_samples = n - start_idx;
198        let n_features = self.lags.len() + 1; // Original + lags
199
200        let mut result = Array2::zeros((n_samples, n_features));
201
202        // Original series
203        for i in 0..n_samples {
204            result[[i, 0]] = num_traits::cast::<S::Elem, f64>(x[start_idx + i]).unwrap_or(0.0);
205        }
206
207        // Lagged features
208        for (lag_idx, &lag) in self.lags.iter().enumerate() {
209            for i in 0..n_samples {
210                let idx = start_idx + i;
211                if idx >= lag {
212                    result[[i, lag_idx + 1]] =
213                        num_traits::cast::<S::Elem, f64>(x[idx - lag]).unwrap_or(0.0);
214                } else if !self.drop_na {
215                    result[[i, lag_idx + 1]] = f64::NAN;
216                }
217            }
218        }
219
220        Ok(result)
221    }
222
223    /// Transform multiple time series
224    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Vec<Array2<f64>>>
225    where
226        S: Data,
227        S::Elem: Float + NumCast,
228    {
229        let n_series = x.shape()[0];
230        let mut results = Vec::new();
231
232        for i in 0..n_series {
233            let series = x.row(i);
234            let lag_features = self.transform_1d(&series)?;
235            results.push(lag_features);
236        }
237
238        Ok(results)
239    }
240}
241
242/// Wavelet feature extractor for time series
243///
244/// Extracts features using wavelet decomposition. Useful for
245/// multi-resolution analysis of time series.
246#[derive(Debug, Clone)]
247pub struct WaveletFeatures {
248    /// Wavelet type: 'db1' (Haar), 'db2', 'db4', 'db8', 'bior2.2', 'bior4.4'
249    wavelet: String,
250    /// Decomposition level
251    level: usize,
252    /// Whether to include approximation coefficients
253    include_approx: bool,
254}
255
256impl WaveletFeatures {
257    /// Create a new WaveletFeatures extractor
258    ///
259    /// # Arguments
260    /// * `wavelet` - Wavelet type (e.g., "db1" for Haar wavelet)
261    /// * `level` - Decomposition level
262    pub fn new(wavelet: &str, level: usize) -> Self {
263        WaveletFeatures {
264            wavelet: wavelet.to_string(),
265            level,
266            include_approx: true,
267        }
268    }
269
270    /// Set whether to include approximation coefficients
271    pub fn with_include_approx(mut self, include: bool) -> Self {
272        self.include_approx = include;
273        self
274    }
275
276    /// Haar wavelet transform (simplified)
277    #[allow(dead_code)]
278    fn haar_transform(&self, x: &[f64]) -> (Vec<f64>, Vec<f64>) {
279        let n = x.len();
280        let mut approx = Vec::with_capacity(n / 2);
281        let mut detail = Vec::with_capacity(n / 2);
282
283        for i in (0..n).step_by(2) {
284            if i + 1 < n {
285                approx.push((x[i] + x[i + 1]) / 2.0_f64.sqrt());
286                detail.push((x[i] - x[i + 1]) / 2.0_f64.sqrt());
287            } else {
288                // Handle odd length
289                approx.push(x[i]);
290            }
291        }
292
293        (approx, detail)
294    }
295
296    /// Get wavelet filter coefficients
297    fn get_wavelet_coeffs(&self) -> Result<(Vec<f64>, Vec<f64>)> {
298        match self.wavelet.as_str() {
299            "db1" | "haar" => {
300                // Haar wavelet
301                let h = vec![1.0 / 2.0_f64.sqrt(), 1.0 / 2.0_f64.sqrt()];
302                let g = vec![1.0 / 2.0_f64.sqrt(), -1.0 / 2.0_f64.sqrt()];
303                Ok((h, g))
304            }
305            "db2" => {
306                // Daubechies-2 wavelet
307                let sqrt3 = 3.0_f64.sqrt();
308                let denom = 4.0 * 2.0_f64.sqrt();
309                let h = vec![
310                    (1.0 + sqrt3) / denom,
311                    (3.0 + sqrt3) / denom,
312                    (3.0 - sqrt3) / denom,
313                    (1.0 - sqrt3) / denom,
314                ];
315                let g = vec![h[3], -h[2], h[1], -h[0]];
316                Ok((h, g))
317            }
318            "db4" => {
319                // Daubechies-4 wavelet
320                let h = vec![
321                    -0.010597401784997,
322                    0.032883011666983,
323                    0.030841381835987,
324                    -0.187034811718881,
325                    -0.027983769416984,
326                    0.630880767929590,
327                    0.714846570552542,
328                    0.230377813308855,
329                ];
330                let mut g = Vec::with_capacity(h.len());
331                for (i, &coeff) in h.iter().enumerate() {
332                    g.push(if i % 2 == 0 { coeff } else { -coeff });
333                }
334                g.reverse();
335                Ok((h, g))
336            }
337            "db8" => {
338                // Daubechies-8 wavelet
339                let h = vec![
340                    -0.00011747678400228,
341                    0.0006754494059985,
342                    -0.0003917403729959,
343                    -0.00487035299301066,
344                    0.008746094047015655,
345                    0.013981027917015516,
346                    -0.04408825393106472,
347                    -0.01736930100202211,
348                    0.128747426620186,
349                    0.00047248457399797254,
350                    -0.2840155429624281,
351                    -0.015829105256023893,
352                    0.5853546836548691,
353                    0.6756307362980128,
354                    0.3182301045617746,
355                    0.05441584224308161,
356                ];
357                let mut g = Vec::with_capacity(h.len());
358                for (i, &coeff) in h.iter().enumerate() {
359                    g.push(if i % 2 == 0 { coeff } else { -coeff });
360                }
361                g.reverse();
362                Ok((h, g))
363            }
364            "bior2.2" => {
365                // Biorthogonal 2.2 wavelet
366                let h = vec![
367                    0.0,
368                    -0.17677669529663687,
369                    0.35355339059327373,
370                    1.0606601717798214,
371                    0.35355339059327373,
372                    -0.17677669529663687,
373                    0.0,
374                ];
375                let g = vec![
376                    0.0,
377                    0.35355339059327373,
378                    -std::f64::consts::FRAC_1_SQRT_2,
379                    0.35355339059327373,
380                    0.0,
381                ];
382                Ok((h, g))
383            }
384            "bior4.4" => {
385                // Biorthogonal 4.4 wavelet
386                let h = vec![
387                    0.0,
388                    0.03314563036811941,
389                    -0.06629126073623884,
390                    -0.17677669529663687,
391                    0.4198446513295126,
392                    0.9943689110435825,
393                    0.4198446513295126,
394                    -0.17677669529663687,
395                    -0.06629126073623884,
396                    0.03314563036811941,
397                    0.0,
398                ];
399                let g = vec![
400                    0.0,
401                    -0.06453888262893856,
402                    0.04068941760955867,
403                    0.41809227322161724,
404                    -0.7884856164056651,
405                    0.41809227322161724,
406                    0.04068941760955867,
407                    -0.06453888262893856,
408                    0.0,
409                ];
410                Ok((h, g))
411            }
412            _ => Err(TransformError::InvalidInput(format!(
413                "Unsupported wavelet type: {}",
414                self.wavelet
415            ))),
416        }
417    }
418
419    /// Generic wavelet transform using filter bank
420    fn wavelet_transform(&self, x: &[f64]) -> Result<(Vec<f64>, Vec<f64>)> {
421        let (h, g) = self.get_wavelet_coeffs()?;
422
423        if x.len() < h.len() {
424            return Err(TransformError::InvalidInput(
425                "Input signal too short for selected wavelet".to_string(),
426            ));
427        }
428
429        let n = x.len();
430        let mut approx = Vec::with_capacity(n / 2);
431        let mut detail = Vec::with_capacity(n / 2);
432
433        // Convolution with downsampling
434        for i in (0..n).step_by(2) {
435            let mut h_sum = 0.0;
436            let mut g_sum = 0.0;
437
438            for (j, (&h_coeff, &g_coeff)) in h.iter().zip(g.iter()).enumerate() {
439                let idx = (i + j) % n; // Periodic boundary condition
440                h_sum += h_coeff * x[idx];
441                g_sum += g_coeff * x[idx];
442            }
443
444            approx.push(h_sum);
445            detail.push(g_sum);
446        }
447
448        Ok((approx, detail))
449    }
450
451    /// Multi-level wavelet decomposition
452    fn wavelet_decompose(&self, x: &[f64]) -> Result<Vec<Vec<f64>>> {
453        let mut coefficients = Vec::new();
454        let mut current = x.to_vec();
455
456        for _ in 0..self.level {
457            let (approx, detail) = self.wavelet_transform(&current)?;
458            coefficients.push(detail);
459            current = approx;
460
461            if current.len() < 2 {
462                break;
463            }
464        }
465
466        if self.include_approx {
467            coefficients.push(current);
468        }
469
470        Ok(coefficients)
471    }
472
473    /// Extract wavelet features from a single time series
474    fn extract_features_1d<S>(&self, x: &ArrayBase<S, Ix1>) -> Result<Array1<f64>>
475    where
476        S: Data,
477        S::Elem: Float + NumCast,
478    {
479        let x_vec: Vec<f64> = x
480            .iter()
481            .map(|&v| num_traits::cast::<S::Elem, f64>(v).unwrap_or(0.0))
482            .collect();
483
484        let coefficients = self.wavelet_decompose(&x_vec)?;
485
486        // Calculate total number of features
487        let n_features: usize = coefficients.iter().map(|c| c.len()).sum();
488        let mut features = Array1::zeros(n_features);
489
490        let mut idx = 0;
491        for coeff_level in coefficients {
492            for &coeff in &coeff_level {
493                features[idx] = coeff;
494                idx += 1;
495            }
496        }
497
498        Ok(features)
499    }
500
501    /// Transform time series data to wavelet features
502    ///
503    /// # Arguments
504    /// * `x` - Time series data, shape (n_samples, n_timesteps)
505    ///
506    /// # Returns
507    /// * Wavelet features
508    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Vec<Array1<f64>>>
509    where
510        S: Data,
511        S::Elem: Float + NumCast,
512    {
513        let n_samples = x.shape()[0];
514        let mut results = Vec::new();
515
516        for i in 0..n_samples {
517            let features = self.extract_features_1d(&x.row(i))?;
518            results.push(features);
519        }
520
521        Ok(results)
522    }
523}
524
525/// Combined time series feature extractor
526///
527/// Combines multiple feature extraction methods for comprehensive
528/// time series feature engineering.
529#[derive(Debug, Clone)]
530pub struct TimeSeriesFeatures {
531    /// Whether to include Fourier features
532    use_fourier: bool,
533    /// Whether to include lag features
534    use_lags: bool,
535    /// Whether to include wavelet features
536    use_wavelets: bool,
537    /// Fourier feature configuration
538    fourier_config: Option<FourierFeatures>,
539    /// Lag feature configuration
540    lag_config: Option<LagFeatures>,
541    /// Wavelet feature configuration
542    wavelet_config: Option<WaveletFeatures>,
543}
544
545impl TimeSeriesFeatures {
546    /// Create a new combined feature extractor
547    pub fn new() -> Self {
548        TimeSeriesFeatures {
549            use_fourier: true,
550            use_lags: true,
551            use_wavelets: false,
552            fourier_config: Some(FourierFeatures::new(10)),
553            lag_config: Some(LagFeatures::with_range(1, 5)),
554            wavelet_config: None,
555        }
556    }
557
558    /// Configure Fourier features
559    pub fn with_fourier(mut self, n_components: usize, includephase: bool) -> Self {
560        self.use_fourier = true;
561        let mut fourier = FourierFeatures::new(n_components);
562        if includephase {
563            fourier = fourier.with_phase();
564        }
565        self.fourier_config = Some(fourier);
566        self
567    }
568
569    /// Configure lag features
570    pub fn with_lags(mut self, lags: Vec<usize>) -> Self {
571        self.use_lags = true;
572        self.lag_config = Some(LagFeatures::new(lags));
573        self
574    }
575
576    /// Configure wavelet features
577    pub fn with_wavelets(mut self, wavelet: &str, level: usize) -> Self {
578        self.use_wavelets = true;
579        self.wavelet_config = Some(WaveletFeatures::new(wavelet, level));
580        self
581    }
582}
583
584impl Default for TimeSeriesFeatures {
585    fn default() -> Self {
586        Self::new()
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593    use ndarray::Array;
594
595    #[test]
596    fn test_fourier_features() {
597        // Create a simple sinusoidal signal
598        let n = 100;
599        let mut signal = Vec::new();
600        for i in 0..n {
601            let t = i as f64 / n as f64 * 4.0 * std::f64::consts::PI;
602            signal.push((t).sin() + 0.5 * (2.0 * t).sin());
603        }
604        let x = Array::from_shape_vec((1, n), signal).unwrap();
605
606        let fourier = FourierFeatures::new(10);
607        let features = fourier.transform(&x).unwrap();
608
609        assert_eq!(features.shape(), &[1, 10]);
610
611        // DC component should be near zero for this signal
612        assert!(features[[0, 0]].abs() < 1e-10);
613
614        // The fundamental frequency components should have significant magnitude
615        // sin(t) component should be at index 1
616        // sin(2t) component should be at index 2
617        assert!(features[[0, 1]] > 0.1); // Fundamental frequency
618        assert!(features[[0, 2]] > 0.04); // Second harmonic (0.5 amplitude)
619    }
620
621    #[test]
622    fn test_lag_features() {
623        let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
624
625        let lag_extractor = LagFeatures::new(vec![1, 2]);
626        let features = lag_extractor.transform_1d(&x.view()).unwrap();
627
628        // Should have 4 samples (6 - max_lag(2)) and 3 features (original + 2 lags)
629        assert_eq!(features.shape(), &[4, 3]);
630
631        // Check first row: x[2]=3, x[1]=2, x[0]=1
632        assert_eq!(features[[0, 0]], 3.0);
633        assert_eq!(features[[0, 1]], 2.0);
634        assert_eq!(features[[0, 2]], 1.0);
635    }
636
637    #[test]
638    fn test_wavelet_features() {
639        let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
640        let x_2d = x.clone().into_shape_with_order((1, 8)).unwrap();
641
642        let wavelet = WaveletFeatures::new("db1", 2);
643        let features = wavelet.transform(&x_2d).unwrap();
644
645        assert!(!features.is_empty());
646        assert!(features[0].len() > 0);
647    }
648}