oxigdal_analytics/timeseries/
anomaly.rs1use crate::error::{AnalyticsError, Result};
9use scirs2_core::ndarray::{Array1, ArrayView1};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum AnomalyMethod {
14 ZScore,
16 IQR,
18 ModifiedZScore,
20}
21
22#[derive(Debug, Clone)]
24pub struct AnomalyResult {
25 pub anomaly_indices: Vec<usize>,
27 pub scores: Array1<f64>,
29 pub threshold: f64,
31 pub method: AnomalyMethod,
33}
34
35pub struct AnomalyDetector {
37 method: AnomalyMethod,
38 threshold: f64,
39}
40
41impl AnomalyDetector {
42 pub fn new(method: AnomalyMethod, threshold: f64) -> Self {
48 Self { method, threshold }
49 }
50
51 pub fn detect(&self, values: &ArrayView1<f64>) -> Result<AnomalyResult> {
59 let scores = match self.method {
60 AnomalyMethod::ZScore => self.z_score(values)?,
61 AnomalyMethod::IQR => self.iqr_score(values)?,
62 AnomalyMethod::ModifiedZScore => self.modified_z_score(values)?,
63 };
64
65 let anomaly_indices: Vec<usize> = scores
67 .iter()
68 .enumerate()
69 .filter_map(|(i, &score)| {
70 if score.abs() >= self.threshold {
72 Some(i)
73 } else {
74 None
75 }
76 })
77 .collect();
78
79 Ok(AnomalyResult {
80 anomaly_indices,
81 scores,
82 threshold: self.threshold,
83 method: self.method,
84 })
85 }
86
87 fn z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
91 if values.len() < 2 {
92 return Err(AnalyticsError::insufficient_data(
93 "Z-score requires at least 2 data points",
94 ));
95 }
96
97 let n = values.len() as f64;
98 let mean = values.sum() / n;
99 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
100 let std = variance.sqrt();
101
102 if std < f64::EPSILON {
103 return Err(AnalyticsError::numerical_instability(
104 "Standard deviation is too small",
105 ));
106 }
107
108 let mut scores = Array1::zeros(values.len());
109 for (i, &value) in values.iter().enumerate() {
110 scores[i] = (value - mean) / std;
111 }
112
113 Ok(scores)
114 }
115
116 fn iqr_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
120 if values.len() < 4 {
121 return Err(AnalyticsError::insufficient_data(
122 "IQR requires at least 4 data points",
123 ));
124 }
125
126 let mut sorted = values.to_vec();
128 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
129
130 let q1 = percentile(&sorted, 25.0)?;
131 let q3 = percentile(&sorted, 75.0)?;
132 let iqr = q3 - q1;
133
134 if iqr < f64::EPSILON {
135 return Err(AnalyticsError::numerical_instability("IQR is too small"));
136 }
137
138 let median = percentile(&sorted, 50.0)?;
139
140 let mut scores = Array1::zeros(values.len());
142 for (i, &value) in values.iter().enumerate() {
143 scores[i] = (value - median).abs() / iqr;
144 }
145
146 Ok(scores)
147 }
148
149 fn modified_z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
153 if values.len() < 2 {
154 return Err(AnalyticsError::insufficient_data(
155 "Modified Z-score requires at least 2 data points",
156 ));
157 }
158
159 let mut sorted = values.to_vec();
161 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
162
163 let median = percentile(&sorted, 50.0)?;
164
165 let mut abs_deviations: Vec<f64> = values.iter().map(|x| (x - median).abs()).collect();
167 abs_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
168 let mad = percentile(&abs_deviations, 50.0)?;
169
170 let consistency_constant = 1.4826;
172 let normalized_mad = consistency_constant * mad;
173
174 if normalized_mad < f64::EPSILON {
175 return Err(AnalyticsError::numerical_instability("MAD is too small"));
176 }
177
178 let mut scores = Array1::zeros(values.len());
180 for (i, &value) in values.iter().enumerate() {
181 scores[i] = 0.6745 * (value - median) / normalized_mad;
182 }
183
184 Ok(scores)
185 }
186}
187
188fn percentile(sorted_data: &[f64], percentile: f64) -> Result<f64> {
197 if sorted_data.is_empty() {
198 return Err(AnalyticsError::insufficient_data("Data is empty"));
199 }
200
201 if !(0.0..=100.0).contains(&percentile) {
202 return Err(AnalyticsError::invalid_parameter(
203 "percentile",
204 "must be between 0 and 100",
205 ));
206 }
207
208 let n = sorted_data.len();
209 if n == 1 {
210 return Ok(sorted_data[0]);
211 }
212
213 let rank = (percentile / 100.0) * ((n - 1) as f64);
215 let lower_idx = rank.floor() as usize;
216 let upper_idx = rank.ceil() as usize;
217 let fraction = rank - (lower_idx as f64);
218
219 Ok(sorted_data[lower_idx] + fraction * (sorted_data[upper_idx] - sorted_data[lower_idx]))
220}
221
222pub fn detect_change_points(values: &ArrayView1<f64>, threshold: f64) -> Result<Vec<usize>> {
231 if values.len() < 3 {
232 return Err(AnalyticsError::insufficient_data(
233 "Change point detection requires at least 3 data points",
234 ));
235 }
236
237 let n = values.len() as f64;
238 let mean = values.sum() / n;
239
240 let mut cusum_pos = 0.0;
242 let mut cusum_neg = 0.0;
243 let mut change_points = Vec::new();
244
245 for (i, &value) in values.iter().enumerate() {
246 let deviation = value - mean;
247 cusum_pos = (cusum_pos + deviation).max(0.0);
248 cusum_neg = (cusum_neg - deviation).max(0.0);
249
250 if cusum_pos > threshold || cusum_neg > threshold {
251 change_points.push(i);
252 cusum_pos = 0.0;
253 cusum_neg = 0.0;
254 }
255 }
256
257 Ok(change_points)
258}
259
260#[cfg(test)]
261mod tests {
262 use super::*;
263 use approx::assert_abs_diff_eq;
264 use scirs2_core::ndarray::array;
265
266 #[test]
267 fn test_z_score_detection() {
268 let values = array![1.0, 2.0, 3.0, 4.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 1.9);
272 let result = detector
273 .detect(&values.view())
274 .expect("Z-score detection should succeed with valid data");
275
276 assert!(!result.anomaly_indices.is_empty());
277 assert!(result.anomaly_indices.contains(&4));
278 }
279
280 #[test]
281 fn test_iqr_detection() {
282 let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::IQR, 1.5);
284 let result = detector
285 .detect(&values.view())
286 .expect("IQR detection should succeed with valid data");
287
288 assert!(!result.anomaly_indices.is_empty());
289 assert!(result.anomaly_indices.contains(&5));
290 }
291
292 #[test]
293 fn test_modified_z_score() {
294 let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::ModifiedZScore, 3.5);
296 let result = detector
297 .detect(&values.view())
298 .expect("Modified Z-score detection should succeed with valid data");
299
300 assert!(!result.anomaly_indices.is_empty());
301 }
302
303 #[test]
304 fn test_percentile() {
305 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
306 let p50 = percentile(&data, 50.0).expect("50th percentile calculation should succeed");
307 assert_abs_diff_eq!(p50, 3.0, epsilon = 1e-10);
308
309 let p25 = percentile(&data, 25.0).expect("25th percentile calculation should succeed");
310 assert_abs_diff_eq!(p25, 2.0, epsilon = 1e-10);
311
312 let p75 = percentile(&data, 75.0).expect("75th percentile calculation should succeed");
313 assert_abs_diff_eq!(p75, 4.0, epsilon = 1e-10);
314 }
315
316 #[test]
317 fn test_change_point_detection() {
318 let values = array![1.0, 1.0, 1.0, 5.0, 5.0, 5.0]; let change_points = detect_change_points(&values.view(), 1.0)
320 .expect("Change point detection should succeed with valid data");
321
322 assert!(!change_points.is_empty());
323 }
324
325 #[test]
326 fn test_no_anomalies() {
327 let values = array![1.0, 2.0, 3.0, 4.0, 5.0];
328 let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 3.0);
329 let result = detector
330 .detect(&values.view())
331 .expect("Anomaly detection should succeed with valid data");
332
333 assert!(result.anomaly_indices.is_empty());
334 }
335}