Skip to main content

oxigdal_analytics/timeseries/
anomaly.rs

1//! Anomaly Detection for Time Series
2//!
3//! This module provides various anomaly detection methods including:
4//! - Z-score based detection
5//! - IQR (Interquartile Range) based detection
6//! - Modified Z-score (using median absolute deviation)
7
8use crate::error::{AnalyticsError, Result};
9use scirs2_core::ndarray::{Array1, ArrayView1};
10
11/// Anomaly detection methods
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum AnomalyMethod {
14    /// Z-score based detection (assumes normal distribution)
15    ZScore,
16    /// IQR (Interquartile Range) based detection (robust to outliers)
17    IQR,
18    /// Modified Z-score using Median Absolute Deviation
19    ModifiedZScore,
20}
21
22/// Result of anomaly detection
23#[derive(Debug, Clone)]
24pub struct AnomalyResult {
25    /// Indices of detected anomalies
26    pub anomaly_indices: Vec<usize>,
27    /// Anomaly scores for each data point
28    pub scores: Array1<f64>,
29    /// Threshold used for detection
30    pub threshold: f64,
31    /// Detection method used
32    pub method: AnomalyMethod,
33}
34
35/// Anomaly detector for time series
36pub struct AnomalyDetector {
37    method: AnomalyMethod,
38    threshold: f64,
39}
40
41impl AnomalyDetector {
42    /// Create a new anomaly detector
43    ///
44    /// # Arguments
45    /// * `method` - Anomaly detection method
46    /// * `threshold` - Detection threshold (e.g., 3.0 for Z-score, 1.5 for IQR)
47    pub fn new(method: AnomalyMethod, threshold: f64) -> Self {
48        Self { method, threshold }
49    }
50
51    /// Detect anomalies in time series
52    ///
53    /// # Arguments
54    /// * `values` - Time series values
55    ///
56    /// # Errors
57    /// Returns error if computation fails or insufficient data
58    pub fn detect(&self, values: &ArrayView1<f64>) -> Result<AnomalyResult> {
59        let scores = match self.method {
60            AnomalyMethod::ZScore => self.z_score(values)?,
61            AnomalyMethod::IQR => self.iqr_score(values)?,
62            AnomalyMethod::ModifiedZScore => self.modified_z_score(values)?,
63        };
64
65        // Find anomalies based on threshold
66        let anomaly_indices: Vec<usize> = scores
67            .iter()
68            .enumerate()
69            .filter_map(|(i, &score)| {
70                // Use >= to include boundary cases where score equals threshold
71                if score.abs() >= self.threshold {
72                    Some(i)
73                } else {
74                    None
75                }
76            })
77            .collect();
78
79        Ok(AnomalyResult {
80            anomaly_indices,
81            scores,
82            threshold: self.threshold,
83            method: self.method,
84        })
85    }
86
87    /// Z-score based anomaly detection
88    ///
89    /// Calculates standardized scores: z = (x - mean) / std
90    fn z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
91        if values.len() < 2 {
92            return Err(AnalyticsError::insufficient_data(
93                "Z-score requires at least 2 data points",
94            ));
95        }
96
97        let n = values.len() as f64;
98        let mean = values.sum() / n;
99        let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
100        let std = variance.sqrt();
101
102        if std < f64::EPSILON {
103            return Err(AnalyticsError::numerical_instability(
104                "Standard deviation is too small",
105            ));
106        }
107
108        let mut scores = Array1::zeros(values.len());
109        for (i, &value) in values.iter().enumerate() {
110            scores[i] = (value - mean) / std;
111        }
112
113        Ok(scores)
114    }
115
116    /// IQR-based anomaly detection
117    ///
118    /// Uses interquartile range to detect outliers
119    fn iqr_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
120        if values.len() < 4 {
121            return Err(AnalyticsError::insufficient_data(
122                "IQR requires at least 4 data points",
123            ));
124        }
125
126        // Sort values for percentile calculation
127        let mut sorted = values.to_vec();
128        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
129
130        let q1 = percentile(&sorted, 25.0)?;
131        let q3 = percentile(&sorted, 75.0)?;
132        let iqr = q3 - q1;
133
134        if iqr < f64::EPSILON {
135            return Err(AnalyticsError::numerical_instability("IQR is too small"));
136        }
137
138        let median = percentile(&sorted, 50.0)?;
139
140        // Calculate scores based on distance from median in units of IQR
141        let mut scores = Array1::zeros(values.len());
142        for (i, &value) in values.iter().enumerate() {
143            scores[i] = (value - median).abs() / iqr;
144        }
145
146        Ok(scores)
147    }
148
149    /// Modified Z-score using Median Absolute Deviation (MAD)
150    ///
151    /// More robust to outliers than standard Z-score
152    fn modified_z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
153        if values.len() < 2 {
154            return Err(AnalyticsError::insufficient_data(
155                "Modified Z-score requires at least 2 data points",
156            ));
157        }
158
159        // Sort values for median calculation
160        let mut sorted = values.to_vec();
161        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
162
163        let median = percentile(&sorted, 50.0)?;
164
165        // Calculate MAD
166        let mut abs_deviations: Vec<f64> = values.iter().map(|x| (x - median).abs()).collect();
167        abs_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
168        let mad = percentile(&abs_deviations, 50.0)?;
169
170        // Consistency constant for normal distribution
171        let consistency_constant = 1.4826;
172        let normalized_mad = consistency_constant * mad;
173
174        if normalized_mad < f64::EPSILON {
175            return Err(AnalyticsError::numerical_instability("MAD is too small"));
176        }
177
178        // Calculate modified Z-scores
179        let mut scores = Array1::zeros(values.len());
180        for (i, &value) in values.iter().enumerate() {
181            scores[i] = 0.6745 * (value - median) / normalized_mad;
182        }
183
184        Ok(scores)
185    }
186}
187
188/// Calculate percentile of sorted data
189///
190/// # Arguments
191/// * `sorted_data` - Sorted array of values
192/// * `percentile` - Percentile to calculate (0-100)
193///
194/// # Errors
195/// Returns error if data is empty or percentile is out of range
196fn percentile(sorted_data: &[f64], percentile: f64) -> Result<f64> {
197    if sorted_data.is_empty() {
198        return Err(AnalyticsError::insufficient_data("Data is empty"));
199    }
200
201    if !(0.0..=100.0).contains(&percentile) {
202        return Err(AnalyticsError::invalid_parameter(
203            "percentile",
204            "must be between 0 and 100",
205        ));
206    }
207
208    let n = sorted_data.len();
209    if n == 1 {
210        return Ok(sorted_data[0]);
211    }
212
213    // Linear interpolation between closest ranks
214    let rank = (percentile / 100.0) * ((n - 1) as f64);
215    let lower_idx = rank.floor() as usize;
216    let upper_idx = rank.ceil() as usize;
217    let fraction = rank - (lower_idx as f64);
218
219    Ok(sorted_data[lower_idx] + fraction * (sorted_data[upper_idx] - sorted_data[lower_idx]))
220}
221
222/// Change point detection using cumulative sum (CUSUM)
223///
224/// # Arguments
225/// * `values` - Time series values
226/// * `threshold` - Detection threshold
227///
228/// # Errors
229/// Returns error if computation fails
230pub fn detect_change_points(values: &ArrayView1<f64>, threshold: f64) -> Result<Vec<usize>> {
231    if values.len() < 3 {
232        return Err(AnalyticsError::insufficient_data(
233            "Change point detection requires at least 3 data points",
234        ));
235    }
236
237    let n = values.len() as f64;
238    let mean = values.sum() / n;
239
240    // Calculate cumulative sum of deviations
241    let mut cusum_pos = 0.0;
242    let mut cusum_neg = 0.0;
243    let mut change_points = Vec::new();
244
245    for (i, &value) in values.iter().enumerate() {
246        let deviation = value - mean;
247        cusum_pos = (cusum_pos + deviation).max(0.0);
248        cusum_neg = (cusum_neg - deviation).max(0.0);
249
250        if cusum_pos > threshold || cusum_neg > threshold {
251            change_points.push(i);
252            cusum_pos = 0.0;
253            cusum_neg = 0.0;
254        }
255    }
256
257    Ok(change_points)
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use approx::assert_abs_diff_eq;
264    use scirs2_core::ndarray::array;
265
266    #[test]
267    fn test_z_score_detection() {
268        let values = array![1.0, 2.0, 3.0, 4.0, 100.0]; // 100.0 is an outlier
269        // Use threshold of 1.9 instead of 2.0 to account for the actual z-score being ~1.999
270        // (just under 2.0 due to how the outlier affects both mean and std dev)
271        let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 1.9);
272        let result = detector
273            .detect(&values.view())
274            .expect("Z-score detection should succeed with valid data");
275
276        assert!(!result.anomaly_indices.is_empty());
277        assert!(result.anomaly_indices.contains(&4));
278    }
279
280    #[test]
281    fn test_iqr_detection() {
282        let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; // 100.0 is an outlier
283        let detector = AnomalyDetector::new(AnomalyMethod::IQR, 1.5);
284        let result = detector
285            .detect(&values.view())
286            .expect("IQR detection should succeed with valid data");
287
288        assert!(!result.anomaly_indices.is_empty());
289        assert!(result.anomaly_indices.contains(&5));
290    }
291
292    #[test]
293    fn test_modified_z_score() {
294        let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; // 100.0 is an outlier
295        let detector = AnomalyDetector::new(AnomalyMethod::ModifiedZScore, 3.5);
296        let result = detector
297            .detect(&values.view())
298            .expect("Modified Z-score detection should succeed with valid data");
299
300        assert!(!result.anomaly_indices.is_empty());
301    }
302
303    #[test]
304    fn test_percentile() {
305        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
306        let p50 = percentile(&data, 50.0).expect("50th percentile calculation should succeed");
307        assert_abs_diff_eq!(p50, 3.0, epsilon = 1e-10);
308
309        let p25 = percentile(&data, 25.0).expect("25th percentile calculation should succeed");
310        assert_abs_diff_eq!(p25, 2.0, epsilon = 1e-10);
311
312        let p75 = percentile(&data, 75.0).expect("75th percentile calculation should succeed");
313        assert_abs_diff_eq!(p75, 4.0, epsilon = 1e-10);
314    }
315
316    #[test]
317    fn test_change_point_detection() {
318        let values = array![1.0, 1.0, 1.0, 5.0, 5.0, 5.0]; // Change at index 3
319        let change_points = detect_change_points(&values.view(), 1.0)
320            .expect("Change point detection should succeed with valid data");
321
322        assert!(!change_points.is_empty());
323    }
324
325    #[test]
326    fn test_no_anomalies() {
327        let values = array![1.0, 2.0, 3.0, 4.0, 5.0];
328        let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 3.0);
329        let result = detector
330            .detect(&values.view())
331            .expect("Anomaly detection should succeed with valid data");
332
333        assert!(result.anomaly_indices.is_empty());
334    }
335}