Skip to main content

oxigdal_temporal/analysis/
anomaly.rs

1//! Anomaly Detection Module
2//!
3//! Implements anomaly detection algorithms for identifying unusual patterns
4//! and outliers in temporal data.
5
6use crate::error::{Result, TemporalError};
7use crate::timeseries::TimeSeriesRaster;
8use scirs2_core::ndarray::Array3;
9use serde::{Deserialize, Serialize};
10use tracing::info;
11
12/// Anomaly detection method
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14pub enum AnomalyMethod {
15    /// Z-score based detection
16    ZScore,
17    /// Interquartile range (IQR) method
18    IQR,
19    /// Modified Z-score using median
20    ModifiedZScore,
21    /// Isolation Forest (simplified)
22    IsolationForest,
23}
24
25/// Anomaly detection result
26#[derive(Debug, Clone)]
27pub struct AnomalyResult {
28    /// Anomaly scores (higher = more anomalous)
29    pub scores: Array3<f64>,
30    /// Binary anomaly mask (1 = anomaly, 0 = normal)
31    pub mask: Array3<u8>,
32    /// Anomaly count per pixel
33    pub count: Array3<usize>,
34    /// Threshold used for detection
35    pub threshold: f64,
36}
37
38impl AnomalyResult {
39    /// Create new anomaly result
40    #[must_use]
41    pub fn new(
42        scores: Array3<f64>,
43        mask: Array3<u8>,
44        count: Array3<usize>,
45        threshold: f64,
46    ) -> Self {
47        Self {
48            scores,
49            mask,
50            count,
51            threshold,
52        }
53    }
54}
55
56/// Anomaly detector
57pub struct AnomalyDetector;
58
59impl AnomalyDetector {
60    /// Detect anomalies in time series
61    ///
62    /// # Errors
63    /// Returns error if detection fails
64    pub fn detect(
65        ts: &TimeSeriesRaster,
66        method: AnomalyMethod,
67        threshold: f64,
68    ) -> Result<AnomalyResult> {
69        match method {
70            AnomalyMethod::ZScore => Self::zscore_detection(ts, threshold),
71            AnomalyMethod::ModifiedZScore => Self::modified_zscore_detection(ts, threshold),
72            AnomalyMethod::IQR => Self::iqr_detection(ts, threshold),
73            AnomalyMethod::IsolationForest => Self::isolation_forest_detection(ts, threshold),
74        }
75    }
76
77    /// Z-score based anomaly detection
78    fn zscore_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
79        if ts.len() < 3 {
80            return Err(TemporalError::insufficient_data(
81                "Need at least 3 observations",
82            ));
83        }
84
85        let (height, width, n_bands) = ts
86            .expected_shape()
87            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
88
89        let mut scores = Array3::zeros((height, width, n_bands));
90        let mut mask = Array3::zeros((height, width, n_bands));
91        let mut count = Array3::zeros((height, width, n_bands));
92
93        for i in 0..height {
94            for j in 0..width {
95                for k in 0..n_bands {
96                    let values = ts.extract_pixel_timeseries(i, j, k)?;
97
98                    // Calculate mean and std dev
99                    let mean = values.iter().sum::<f64>() / values.len() as f64;
100                    let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>()
101                        / values.len() as f64;
102                    let std_dev = variance.sqrt();
103
104                    // Calculate max absolute Z-score
105                    let max_zscore = if std_dev > 0.0 {
106                        values
107                            .iter()
108                            .map(|v| ((v - mean) / std_dev).abs())
109                            .fold(0.0_f64, f64::max)
110                    } else {
111                        0.0
112                    };
113
114                    scores[[i, j, k]] = max_zscore;
115
116                    // Count anomalies
117                    if std_dev > 0.0 {
118                        let anomaly_count = values
119                            .iter()
120                            .filter(|v| ((*v - mean) / std_dev).abs() > threshold)
121                            .count();
122                        count[[i, j, k]] = anomaly_count;
123                        mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
124                    }
125                }
126            }
127        }
128
129        info!("Completed Z-score anomaly detection");
130        Ok(AnomalyResult::new(scores, mask, count, threshold))
131    }
132
133    /// Modified Z-score using median (robust to outliers)
134    fn modified_zscore_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
135        if ts.len() < 3 {
136            return Err(TemporalError::insufficient_data(
137                "Need at least 3 observations",
138            ));
139        }
140
141        let (height, width, n_bands) = ts
142            .expected_shape()
143            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
144
145        let mut scores = Array3::zeros((height, width, n_bands));
146        let mut mask = Array3::zeros((height, width, n_bands));
147        let mut count = Array3::zeros((height, width, n_bands));
148
149        for i in 0..height {
150            for j in 0..width {
151                for k in 0..n_bands {
152                    let mut values = ts.extract_pixel_timeseries(i, j, k)?;
153                    values.sort_by(|a: &f64, b: &f64| {
154                        a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
155                    });
156
157                    // Calculate median
158                    let median = if values.len() % 2 == 0 {
159                        (values[values.len() / 2 - 1] + values[values.len() / 2]) / 2.0
160                    } else {
161                        values[values.len() / 2]
162                    };
163
164                    // Calculate MAD (Median Absolute Deviation)
165                    let mut deviations: Vec<f64> =
166                        values.iter().map(|v: &f64| (v - median).abs()).collect();
167                    deviations.sort_by(|a: &f64, b: &f64| {
168                        a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
169                    });
170
171                    let mad = if deviations.len() % 2 == 0 {
172                        (deviations[deviations.len() / 2 - 1] + deviations[deviations.len() / 2])
173                            / 2.0
174                    } else {
175                        deviations[deviations.len() / 2]
176                    };
177
178                    // Modified Z-score: 0.6745 * (x - median) / MAD
179                    let max_modified_zscore = if mad > 0.0 {
180                        values
181                            .iter()
182                            .map(|v: &f64| (0.6745 * (v - median) / mad).abs())
183                            .fold(0.0_f64, f64::max)
184                    } else {
185                        0.0
186                    };
187
188                    scores[[i, j, k]] = max_modified_zscore;
189
190                    let anomaly_count = if mad > 0.0 {
191                        values
192                            .iter()
193                            .filter(|v: &&f64| (0.6745 * (*v - median) / mad).abs() > threshold)
194                            .count()
195                    } else {
196                        0
197                    };
198
199                    count[[i, j, k]] = anomaly_count;
200                    mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
201                }
202            }
203        }
204
205        info!("Completed modified Z-score anomaly detection");
206        Ok(AnomalyResult::new(scores, mask, count, threshold))
207    }
208
209    /// IQR-based anomaly detection
210    fn iqr_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
211        if ts.len() < 4 {
212            return Err(TemporalError::insufficient_data(
213                "Need at least 4 observations for IQR",
214            ));
215        }
216
217        let (height, width, n_bands) = ts
218            .expected_shape()
219            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
220
221        let mut scores = Array3::zeros((height, width, n_bands));
222        let mut mask = Array3::zeros((height, width, n_bands));
223        let mut count = Array3::zeros((height, width, n_bands));
224
225        for i in 0..height {
226            for j in 0..width {
227                for k in 0..n_bands {
228                    let mut values = ts.extract_pixel_timeseries(i, j, k)?;
229                    values.sort_by(|a: &f64, b: &f64| {
230                        a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
231                    });
232
233                    // Calculate Q1, Q3, and IQR
234                    let q1_idx = values.len() / 4;
235                    let q3_idx = 3 * values.len() / 4;
236
237                    let q1 = values[q1_idx];
238                    let q3 = values[q3_idx];
239                    let iqr = q3 - q1;
240
241                    // Outlier bounds
242                    let lower_bound = q1 - threshold * iqr;
243                    let upper_bound = q3 + threshold * iqr;
244
245                    // Count outliers
246                    let anomaly_count = values
247                        .iter()
248                        .filter(|&&v| v < lower_bound || v > upper_bound)
249                        .count();
250
251                    // Score as distance from bounds
252                    let max_score = values
253                        .iter()
254                        .map(|&v| {
255                            if v < lower_bound {
256                                (lower_bound - v) / iqr
257                            } else if v > upper_bound {
258                                (v - upper_bound) / iqr
259                            } else {
260                                0.0
261                            }
262                        })
263                        .fold(0.0_f64, f64::max);
264
265                    scores[[i, j, k]] = max_score;
266                    count[[i, j, k]] = anomaly_count;
267                    mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
268                }
269            }
270        }
271
272        info!("Completed IQR anomaly detection");
273        Ok(AnomalyResult::new(scores, mask, count, threshold))
274    }
275
276    /// Simplified isolation forest detection
277    fn isolation_forest_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
278        // For now, use modified Z-score as approximation
279        // Full isolation forest would require tree-based implementation
280        info!("Using modified Z-score approximation for isolation forest");
281        Self::modified_zscore_detection(ts, threshold)
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use crate::timeseries::{TemporalMetadata, TimeSeriesRaster};
289    use chrono::{DateTime, NaiveDate};
290
291    #[test]
292    fn test_zscore_detection() {
293        let mut ts = TimeSeriesRaster::new();
294
295        // Create data with an outlier
296        for i in 0..10 {
297            let dt = DateTime::from_timestamp(1640995200 + i * 86400, 0).expect("valid");
298            let date = NaiveDate::from_ymd_opt(2022, 1, 1 + i as u32).expect("valid");
299            let metadata = TemporalMetadata::new(dt, date);
300
301            let value = if i == 5 { 100.0 } else { 10.0 }; // Outlier at i=5
302            let data = Array3::from_elem((3, 3, 1), value);
303            ts.add_raster(metadata, data).expect("should add");
304        }
305
306        let result =
307            AnomalyDetector::detect(&ts, AnomalyMethod::ZScore, 2.0).expect("should detect");
308
309        assert!(result.mask[[0, 0, 0]] == 1); // Should detect anomaly
310        assert!(result.count[[0, 0, 0]] > 0);
311    }
312
313    #[test]
314    fn test_iqr_detection() {
315        let mut ts = TimeSeriesRaster::new();
316
317        for i in 0..20 {
318            let dt = DateTime::from_timestamp(1640995200 + i * 86400, 0).expect("valid");
319            let date = NaiveDate::from_ymd_opt(2022, 1, 1 + i as u32).expect("valid");
320            let metadata = TemporalMetadata::new(dt, date);
321
322            let value = if i == 10 { 200.0 } else { 50.0 + (i as f64) };
323            let data = Array3::from_elem((3, 3, 1), value);
324            ts.add_raster(metadata, data).expect("should add");
325        }
326
327        let result = AnomalyDetector::detect(&ts, AnomalyMethod::IQR, 1.5).expect("should detect");
328
329        assert!(result.mask[[0, 0, 0]] == 1);
330    }
331}