oxigdal_temporal/analysis/
anomaly.rs1use crate::error::{Result, TemporalError};
7use crate::timeseries::TimeSeriesRaster;
8use scirs2_core::ndarray::Array3;
9use serde::{Deserialize, Serialize};
10use tracing::info;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14pub enum AnomalyMethod {
15 ZScore,
17 IQR,
19 ModifiedZScore,
21 IsolationForest,
23}
24
25#[derive(Debug, Clone)]
27pub struct AnomalyResult {
28 pub scores: Array3<f64>,
30 pub mask: Array3<u8>,
32 pub count: Array3<usize>,
34 pub threshold: f64,
36}
37
38impl AnomalyResult {
39 #[must_use]
41 pub fn new(
42 scores: Array3<f64>,
43 mask: Array3<u8>,
44 count: Array3<usize>,
45 threshold: f64,
46 ) -> Self {
47 Self {
48 scores,
49 mask,
50 count,
51 threshold,
52 }
53 }
54}
55
56pub struct AnomalyDetector;
58
59impl AnomalyDetector {
60 pub fn detect(
65 ts: &TimeSeriesRaster,
66 method: AnomalyMethod,
67 threshold: f64,
68 ) -> Result<AnomalyResult> {
69 match method {
70 AnomalyMethod::ZScore => Self::zscore_detection(ts, threshold),
71 AnomalyMethod::ModifiedZScore => Self::modified_zscore_detection(ts, threshold),
72 AnomalyMethod::IQR => Self::iqr_detection(ts, threshold),
73 AnomalyMethod::IsolationForest => Self::isolation_forest_detection(ts, threshold),
74 }
75 }
76
77 fn zscore_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
79 if ts.len() < 3 {
80 return Err(TemporalError::insufficient_data(
81 "Need at least 3 observations",
82 ));
83 }
84
85 let (height, width, n_bands) = ts
86 .expected_shape()
87 .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
88
89 let mut scores = Array3::zeros((height, width, n_bands));
90 let mut mask = Array3::zeros((height, width, n_bands));
91 let mut count = Array3::zeros((height, width, n_bands));
92
93 for i in 0..height {
94 for j in 0..width {
95 for k in 0..n_bands {
96 let values = ts.extract_pixel_timeseries(i, j, k)?;
97
98 let mean = values.iter().sum::<f64>() / values.len() as f64;
100 let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>()
101 / values.len() as f64;
102 let std_dev = variance.sqrt();
103
104 let max_zscore = if std_dev > 0.0 {
106 values
107 .iter()
108 .map(|v| ((v - mean) / std_dev).abs())
109 .fold(0.0_f64, f64::max)
110 } else {
111 0.0
112 };
113
114 scores[[i, j, k]] = max_zscore;
115
116 if std_dev > 0.0 {
118 let anomaly_count = values
119 .iter()
120 .filter(|v| ((*v - mean) / std_dev).abs() > threshold)
121 .count();
122 count[[i, j, k]] = anomaly_count;
123 mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
124 }
125 }
126 }
127 }
128
129 info!("Completed Z-score anomaly detection");
130 Ok(AnomalyResult::new(scores, mask, count, threshold))
131 }
132
133 fn modified_zscore_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
135 if ts.len() < 3 {
136 return Err(TemporalError::insufficient_data(
137 "Need at least 3 observations",
138 ));
139 }
140
141 let (height, width, n_bands) = ts
142 .expected_shape()
143 .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
144
145 let mut scores = Array3::zeros((height, width, n_bands));
146 let mut mask = Array3::zeros((height, width, n_bands));
147 let mut count = Array3::zeros((height, width, n_bands));
148
149 for i in 0..height {
150 for j in 0..width {
151 for k in 0..n_bands {
152 let mut values = ts.extract_pixel_timeseries(i, j, k)?;
153 values.sort_by(|a: &f64, b: &f64| {
154 a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
155 });
156
157 let median = if values.len() % 2 == 0 {
159 (values[values.len() / 2 - 1] + values[values.len() / 2]) / 2.0
160 } else {
161 values[values.len() / 2]
162 };
163
164 let mut deviations: Vec<f64> =
166 values.iter().map(|v: &f64| (v - median).abs()).collect();
167 deviations.sort_by(|a: &f64, b: &f64| {
168 a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
169 });
170
171 let mad = if deviations.len() % 2 == 0 {
172 (deviations[deviations.len() / 2 - 1] + deviations[deviations.len() / 2])
173 / 2.0
174 } else {
175 deviations[deviations.len() / 2]
176 };
177
178 let max_modified_zscore = if mad > 0.0 {
180 values
181 .iter()
182 .map(|v: &f64| (0.6745 * (v - median) / mad).abs())
183 .fold(0.0_f64, f64::max)
184 } else {
185 0.0
186 };
187
188 scores[[i, j, k]] = max_modified_zscore;
189
190 let anomaly_count = if mad > 0.0 {
191 values
192 .iter()
193 .filter(|v: &&f64| (0.6745 * (*v - median) / mad).abs() > threshold)
194 .count()
195 } else {
196 0
197 };
198
199 count[[i, j, k]] = anomaly_count;
200 mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
201 }
202 }
203 }
204
205 info!("Completed modified Z-score anomaly detection");
206 Ok(AnomalyResult::new(scores, mask, count, threshold))
207 }
208
209 fn iqr_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
211 if ts.len() < 4 {
212 return Err(TemporalError::insufficient_data(
213 "Need at least 4 observations for IQR",
214 ));
215 }
216
217 let (height, width, n_bands) = ts
218 .expected_shape()
219 .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
220
221 let mut scores = Array3::zeros((height, width, n_bands));
222 let mut mask = Array3::zeros((height, width, n_bands));
223 let mut count = Array3::zeros((height, width, n_bands));
224
225 for i in 0..height {
226 for j in 0..width {
227 for k in 0..n_bands {
228 let mut values = ts.extract_pixel_timeseries(i, j, k)?;
229 values.sort_by(|a: &f64, b: &f64| {
230 a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
231 });
232
233 let q1_idx = values.len() / 4;
235 let q3_idx = 3 * values.len() / 4;
236
237 let q1 = values[q1_idx];
238 let q3 = values[q3_idx];
239 let iqr = q3 - q1;
240
241 let lower_bound = q1 - threshold * iqr;
243 let upper_bound = q3 + threshold * iqr;
244
245 let anomaly_count = values
247 .iter()
248 .filter(|&&v| v < lower_bound || v > upper_bound)
249 .count();
250
251 let max_score = values
253 .iter()
254 .map(|&v| {
255 if v < lower_bound {
256 (lower_bound - v) / iqr
257 } else if v > upper_bound {
258 (v - upper_bound) / iqr
259 } else {
260 0.0
261 }
262 })
263 .fold(0.0_f64, f64::max);
264
265 scores[[i, j, k]] = max_score;
266 count[[i, j, k]] = anomaly_count;
267 mask[[i, j, k]] = if anomaly_count > 0 { 1 } else { 0 };
268 }
269 }
270 }
271
272 info!("Completed IQR anomaly detection");
273 Ok(AnomalyResult::new(scores, mask, count, threshold))
274 }
275
276 fn isolation_forest_detection(ts: &TimeSeriesRaster, threshold: f64) -> Result<AnomalyResult> {
278 info!("Using modified Z-score approximation for isolation forest");
281 Self::modified_zscore_detection(ts, threshold)
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288 use crate::timeseries::{TemporalMetadata, TimeSeriesRaster};
289 use chrono::{DateTime, NaiveDate};
290
291 #[test]
292 fn test_zscore_detection() {
293 let mut ts = TimeSeriesRaster::new();
294
295 for i in 0..10 {
297 let dt = DateTime::from_timestamp(1640995200 + i * 86400, 0).expect("valid");
298 let date = NaiveDate::from_ymd_opt(2022, 1, 1 + i as u32).expect("valid");
299 let metadata = TemporalMetadata::new(dt, date);
300
301 let value = if i == 5 { 100.0 } else { 10.0 }; let data = Array3::from_elem((3, 3, 1), value);
303 ts.add_raster(metadata, data).expect("should add");
304 }
305
306 let result =
307 AnomalyDetector::detect(&ts, AnomalyMethod::ZScore, 2.0).expect("should detect");
308
309 assert!(result.mask[[0, 0, 0]] == 1); assert!(result.count[[0, 0, 0]] > 0);
311 }
312
313 #[test]
314 fn test_iqr_detection() {
315 let mut ts = TimeSeriesRaster::new();
316
317 for i in 0..20 {
318 let dt = DateTime::from_timestamp(1640995200 + i * 86400, 0).expect("valid");
319 let date = NaiveDate::from_ymd_opt(2022, 1, 1 + i as u32).expect("valid");
320 let metadata = TemporalMetadata::new(dt, date);
321
322 let value = if i == 10 { 200.0 } else { 50.0 + (i as f64) };
323 let data = Array3::from_elem((3, 3, 1), value);
324 ts.add_raster(metadata, data).expect("should add");
325 }
326
327 let result = AnomalyDetector::detect(&ts, AnomalyMethod::IQR, 1.5).expect("should detect");
328
329 assert!(result.mask[[0, 0, 0]] == 1);
330 }
331}