avila_telemetry/
anomaly.rs

1//! Anomaly detection algorithms
2
3use crate::{Result, TelemetryError, TimeSeries};
4
5/// Type of anomaly detected
6#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
7pub enum AnomalyType {
8    /// Point anomaly (single outlier)
9    Point,
10    /// Contextual anomaly (unusual in context)
11    Contextual,
12    /// Collective anomaly (unusual pattern)
13    Collective,
14}
15
16/// Represents a detected anomaly
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
18pub struct Anomaly {
19    /// Index in the time series
20    pub index: usize,
21    /// Value at the anomaly point
22    pub value: f64,
23    /// Type of anomaly
24    pub anomaly_type: AnomalyType,
25    /// Anomaly score (higher = more anomalous)
26    pub score: f64,
27}
28
29/// Anomaly detector using various statistical methods
30#[derive(Debug)]
31pub struct AnomalyDetector {
32    /// Z-score threshold for detection
33    pub z_threshold: f64,
34    /// IQR multiplier for detection
35    pub iqr_multiplier: f64,
36}
37
38impl Default for AnomalyDetector {
39    fn default() -> Self {
40        Self {
41            z_threshold: 3.0,
42            iqr_multiplier: 1.5,
43        }
44    }
45}
46
47impl AnomalyDetector {
48    /// Create a new anomaly detector with custom parameters
49    pub fn new(z_threshold: f64, iqr_multiplier: f64) -> Self {
50        Self {
51            z_threshold,
52            iqr_multiplier,
53        }
54    }
55
56    /// Detect anomalies using Z-score method
57    pub fn detect_zscore(&self, ts: &TimeSeries) -> Result<Vec<Anomaly>> {
58        if ts.len() < 3 {
59            return Err(TelemetryError::InsufficientData(
60                "Need at least 3 data points for Z-score detection".to_string(),
61            ));
62        }
63
64        let stats = ts.statistics();
65        let mut anomalies = Vec::new();
66
67        for (i, &value) in ts.values.iter().enumerate() {
68            let z_score = ((value - stats.mean) / stats.std_dev).abs();
69
70            if z_score > self.z_threshold {
71                anomalies.push(Anomaly {
72                    index: i,
73                    value,
74                    anomaly_type: AnomalyType::Point,
75                    score: z_score,
76                });
77            }
78        }
79
80        Ok(anomalies)
81    }
82
83    /// Detect anomalies using IQR (Interquartile Range) method
84    pub fn detect_iqr(&self, ts: &TimeSeries) -> Result<Vec<Anomaly>> {
85        if ts.len() < 4 {
86            return Err(TelemetryError::InsufficientData(
87                "Need at least 4 data points for IQR detection".to_string(),
88            ));
89        }
90
91        let mut sorted = ts.values.clone();
92        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
93
94        let n = sorted.len();
95        let q1 = sorted[n / 4];
96        let q3 = sorted[3 * n / 4];
97        let iqr = q3 - q1;
98
99        let lower_bound = q1 - self.iqr_multiplier * iqr;
100        let upper_bound = q3 + self.iqr_multiplier * iqr;
101
102        let mut anomalies = Vec::new();
103
104        for (i, &value) in ts.values.iter().enumerate() {
105            if value < lower_bound || value > upper_bound {
106                let score = if value < lower_bound {
107                    (lower_bound - value) / iqr
108                } else {
109                    (value - upper_bound) / iqr
110                };
111
112                anomalies.push(Anomaly {
113                    index: i,
114                    value,
115                    anomaly_type: AnomalyType::Point,
116                    score,
117                });
118            }
119        }
120
121        Ok(anomalies)
122    }
123
124    /// Detect anomalies using moving average deviation
125    pub fn detect_moving_average(&self, ts: &TimeSeries, window: usize) -> Result<Vec<Anomaly>> {
126        let ma = ts.moving_average(window)?;
127        let mut anomalies = Vec::new();
128
129        // Calculate deviations from moving average
130        let offset = window / 2;
131        for (i, &ma_value) in ma.iter().enumerate() {
132            let actual_idx = i + offset;
133            if actual_idx >= ts.values.len() {
134                break;
135            }
136
137            let deviation = (ts.values[actual_idx] - ma_value).abs();
138            let relative_dev = deviation / ma_value.abs().max(1e-10);
139
140            if relative_dev > 0.5 {
141                // 50% deviation threshold
142                anomalies.push(Anomaly {
143                    index: actual_idx,
144                    value: ts.values[actual_idx],
145                    anomaly_type: AnomalyType::Contextual,
146                    score: relative_dev,
147                });
148            }
149        }
150
151        Ok(anomalies)
152    }
153
154    /// Detect anomalies using multiple methods and aggregate results
155    pub fn detect_ensemble(&self, ts: &TimeSeries) -> Result<Vec<Anomaly>> {
156        let zscore_anomalies = self.detect_zscore(ts)?;
157        let iqr_anomalies = self.detect_iqr(ts)?;
158
159        // Combine and deduplicate anomalies
160        let mut all_indices: Vec<usize> = zscore_anomalies
161            .iter()
162            .chain(iqr_anomalies.iter())
163            .map(|a| a.index)
164            .collect();
165
166        all_indices.sort_unstable();
167        all_indices.dedup();
168
169        let mut anomalies = Vec::new();
170        for idx in all_indices {
171            let value = ts.values[idx];
172            let zscore_score = zscore_anomalies
173                .iter()
174                .find(|a| a.index == idx)
175                .map(|a| a.score)
176                .unwrap_or(0.0);
177
178            let iqr_score = iqr_anomalies
179                .iter()
180                .find(|a| a.index == idx)
181                .map(|a| a.score)
182                .unwrap_or(0.0);
183
184            // Average the scores
185            let score = (zscore_score + iqr_score) / 2.0;
186
187            anomalies.push(Anomaly {
188                index: idx,
189                value,
190                anomaly_type: AnomalyType::Point,
191                score,
192            });
193        }
194
195        Ok(anomalies)
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_zscore_detection() {
205        // Create data with more normal values and a clear outlier
206        let mut data = vec![10.0, 12.0, 11.0, 13.0, 12.0, 10.0, 11.0, 12.0];
207        data.push(100.0); // Clear outlier
208        data.extend(vec![11.0, 12.0, 10.0]);
209
210        let ts = TimeSeries::new(data);
211        let detector = AnomalyDetector::default();
212
213        let anomalies = detector.detect_zscore(&ts).unwrap();
214        assert!(!anomalies.is_empty(), "Should detect at least one anomaly");
215        // The 100.0 value should be detected as an anomaly
216        let has_large_value_anomaly = anomalies.iter().any(|a| a.value > 50.0);
217        assert!(
218            has_large_value_anomaly,
219            "Should detect the outlier value of 100.0"
220        );
221    }
222    #[test]
223    fn test_iqr_detection() {
224        let data = vec![1.0, 2.0, 3.0, 2.0, 1.0, 100.0, 2.0, 1.0];
225        let ts = TimeSeries::new(data);
226        let detector = AnomalyDetector::default();
227
228        let anomalies = detector.detect_iqr(&ts).unwrap();
229        assert!(!anomalies.is_empty());
230    }
231}