avila_telemetry/
features.rs

1//! Feature engineering for time series
2
3use crate::{Result, TelemetryError, TimeSeries};
4
5/// Feature extractor for time series data
6pub struct FeatureExtractor;
7
8impl FeatureExtractor {
9    /// Create lag features
10    pub fn create_lag_features(ts: &TimeSeries, lags: &[usize]) -> Result<Vec<Vec<f64>>> {
11        if lags.is_empty() {
12            return Err(TelemetryError::InvalidParameter(
13                "Must specify at least one lag".to_string(),
14            ));
15        }
16
17        let max_lag = *lags.iter().max().unwrap();
18        if max_lag >= ts.len() {
19            return Err(TelemetryError::InvalidParameter(
20                "Lag is too large for the time series".to_string(),
21            ));
22        }
23
24        let mut features = Vec::new();
25
26        for &lag in lags {
27            let mut lag_feature = Vec::new();
28            for i in lag..ts.len() {
29                lag_feature.push(ts.values[i - lag]);
30            }
31            features.push(lag_feature);
32        }
33
34        Ok(features)
35    }
36
37    /// Calculate rolling statistics (mean, std, min, max)
38    pub fn rolling_statistics(ts: &TimeSeries, window: usize) -> Result<RollingStats> {
39        if window == 0 {
40            return Err(TelemetryError::InvalidParameter(
41                "Window size must be greater than 0".to_string(),
42            ));
43        }
44
45        if window > ts.len() {
46            return Err(TelemetryError::InsufficientData(
47                "Window size is larger than series length".to_string(),
48            ));
49        }
50
51        let mut means = Vec::new();
52        let mut stds = Vec::new();
53        let mut mins = Vec::new();
54        let mut maxs = Vec::new();
55
56        for i in 0..=(ts.len() - window) {
57            let window_data = &ts.values[i..i + window];
58
59            let mean = window_data.iter().sum::<f64>() / window as f64;
60            means.push(mean);
61
62            let variance =
63                window_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / window as f64;
64            stds.push(variance.sqrt());
65
66            mins.push(
67                *window_data
68                    .iter()
69                    .min_by(|a, b| a.partial_cmp(b).unwrap())
70                    .unwrap(),
71            );
72
73            maxs.push(
74                *window_data
75                    .iter()
76                    .max_by(|a, b| a.partial_cmp(b).unwrap())
77                    .unwrap(),
78            );
79        }
80
81        Ok(RollingStats {
82            means,
83            stds,
84            mins,
85            maxs,
86        })
87    }
88
89    /// Extract trend features using linear regression
90    pub fn trend_features(ts: &TimeSeries, window: usize) -> Result<Vec<f64>> {
91        if window < 2 {
92            return Err(TelemetryError::InvalidParameter(
93                "Window size must be at least 2 for trend calculation".to_string(),
94            ));
95        }
96
97        let mut trends = Vec::new();
98
99        for i in 0..=(ts.len() - window) {
100            let window_data = &ts.values[i..i + window];
101            let slope = Self::calculate_slope(window_data);
102            trends.push(slope);
103        }
104
105        Ok(trends)
106    }
107
108    /// Calculate slope using simple linear regression
109    fn calculate_slope(data: &[f64]) -> f64 {
110        let n = data.len() as f64;
111        let x_mean = (n - 1.0) / 2.0;
112        let y_mean = data.iter().sum::<f64>() / n;
113
114        let mut numerator = 0.0;
115        let mut denominator = 0.0;
116
117        for (i, &y) in data.iter().enumerate() {
118            let x = i as f64;
119            numerator += (x - x_mean) * (y - y_mean);
120            denominator += (x - x_mean).powi(2);
121        }
122
123        if denominator == 0.0 {
124            0.0
125        } else {
126            numerator / denominator
127        }
128    }
129
130    /// Extract rate of change features
131    pub fn rate_of_change(ts: &TimeSeries, periods: usize) -> Result<Vec<f64>> {
132        if periods == 0 {
133            return Err(TelemetryError::InvalidParameter(
134                "Periods must be greater than 0".to_string(),
135            ));
136        }
137
138        if periods >= ts.len() {
139            return Err(TelemetryError::InsufficientData(
140                "Periods is too large for the time series".to_string(),
141            ));
142        }
143
144        let mut roc = Vec::new();
145
146        for i in periods..ts.len() {
147            let change = if ts.values[i - periods] == 0.0 {
148                0.0
149            } else {
150                (ts.values[i] - ts.values[i - periods]) / ts.values[i - periods]
151            };
152            roc.push(change);
153        }
154
155        Ok(roc)
156    }
157}
158
159/// Rolling statistics result
160#[derive(Debug, Clone)]
161pub struct RollingStats {
162    pub means: Vec<f64>,
163    pub stds: Vec<f64>,
164    pub mins: Vec<f64>,
165    pub maxs: Vec<f64>,
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_lag_features() {
174        let ts = TimeSeries::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
175        let features = FeatureExtractor::create_lag_features(&ts, &[1, 2]).unwrap();
176
177        assert_eq!(features.len(), 2);
178        assert_eq!(features[0], vec![1.0, 2.0, 3.0, 4.0]);
179        assert_eq!(features[1], vec![1.0, 2.0, 3.0]);
180    }
181
182    #[test]
183    fn test_rolling_statistics() {
184        let ts = TimeSeries::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
185        let stats = FeatureExtractor::rolling_statistics(&ts, 3).unwrap();
186
187        assert_eq!(stats.means.len(), 3);
188        assert_eq!(stats.means[0], 2.0);
189    }
190}