Skip to main content

cbtop/anomaly_detection/
detector.rs

1//! Anomaly detector with Z-score, IQR, and change point detection.
2
3use super::{
4    Anomaly, AnomalyReport, AnomalyType, ChangePoint, DEFAULT_IQR_MULTIPLIER,
5    DEFAULT_ZSCORE_THRESHOLD, MIN_SAMPLES_FOR_DETECTION,
6};
7
8/// Anomaly detector
9#[derive(Debug)]
10pub struct AnomalyDetector {
11    /// Data buffer
12    data: Vec<f64>,
13    /// Z-score threshold
14    zscore_threshold: f64,
15    /// IQR multiplier
16    iqr_multiplier: f64,
17    /// Sliding window size for real-time detection
18    window_size: usize,
19    /// Detected anomalies
20    anomalies: Vec<Anomaly>,
21    /// Detected change points
22    change_points: Vec<ChangePoint>,
23}
24
25impl Default for AnomalyDetector {
26    fn default() -> Self {
27        Self {
28            data: Vec::new(),
29            zscore_threshold: DEFAULT_ZSCORE_THRESHOLD,
30            iqr_multiplier: DEFAULT_IQR_MULTIPLIER,
31            window_size: 50,
32            anomalies: Vec::new(),
33            change_points: Vec::new(),
34        }
35    }
36}
37
38impl AnomalyDetector {
39    /// Create new detector
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    /// Set Z-score threshold
45    pub fn with_zscore_threshold(mut self, threshold: f64) -> Self {
46        self.zscore_threshold = threshold.max(1.0);
47        self
48    }
49
50    /// Set IQR multiplier
51    pub fn with_iqr_multiplier(mut self, multiplier: f64) -> Self {
52        self.iqr_multiplier = multiplier.max(0.5);
53        self
54    }
55
56    /// Set sliding window size
57    pub fn with_window_size(mut self, size: usize) -> Self {
58        self.window_size = size.max(10);
59        self
60    }
61
62    /// Add data point
63    pub fn add(&mut self, value: f64) {
64        self.data.push(value);
65    }
66
67    /// Add multiple data points
68    pub fn add_all(&mut self, values: &[f64]) {
69        self.data.extend_from_slice(values);
70    }
71
72    /// Get data count
73    pub fn data_count(&self) -> usize {
74        self.data.len()
75    }
76
77    /// Check if sufficient data for analysis
78    pub fn has_sufficient_data(&self) -> bool {
79        self.data.len() >= MIN_SAMPLES_FOR_DETECTION
80    }
81
82    /// Calculate mean of slice
83    fn mean(data: &[f64]) -> f64 {
84        if data.is_empty() {
85            return 0.0;
86        }
87        data.iter().sum::<f64>() / data.len() as f64
88    }
89
90    /// Calculate standard deviation
91    fn std_dev(data: &[f64], mean: f64) -> f64 {
92        if data.len() < 2 {
93            return 0.0;
94        }
95        let variance =
96            data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64;
97        variance.sqrt()
98    }
99
100    /// Calculate percentile
101    fn percentile(sorted_data: &[f64], p: f64) -> f64 {
102        if sorted_data.is_empty() {
103            return 0.0;
104        }
105        let idx = (p / 100.0 * (sorted_data.len() - 1) as f64).round() as usize;
106        sorted_data[idx.min(sorted_data.len() - 1)]
107    }
108
109    /// Detect outliers using Z-score method
110    pub fn detect_zscore_outliers(&mut self) -> Vec<Anomaly> {
111        if !self.has_sufficient_data() {
112            return Vec::new();
113        }
114
115        let mean = Self::mean(&self.data);
116        let std_dev = Self::std_dev(&self.data, mean);
117
118        if std_dev < 1e-10 {
119            return Vec::new(); // No variation
120        }
121
122        let mut outliers = Vec::new();
123        for (i, &value) in self.data.iter().enumerate() {
124            let z_score = (value - mean) / std_dev;
125            if z_score.abs() > self.zscore_threshold {
126                let anomaly_type = if z_score > 0.0 {
127                    AnomalyType::Spike
128                } else {
129                    AnomalyType::Drop
130                };
131                outliers.push(Anomaly::new(i, value, mean, z_score, anomaly_type));
132            }
133        }
134
135        self.anomalies.extend(outliers.clone());
136        outliers
137    }
138
139    /// Detect outliers using IQR method (robust to heavy tails)
140    pub fn detect_iqr_outliers(&mut self) -> Vec<Anomaly> {
141        if !self.has_sufficient_data() {
142            return Vec::new();
143        }
144
145        let mut sorted = self.data.clone();
146        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
147
148        let q1 = Self::percentile(&sorted, 25.0);
149        let q3 = Self::percentile(&sorted, 75.0);
150        let iqr = q3 - q1;
151
152        if iqr < 1e-10 {
153            return Vec::new(); // No variation
154        }
155
156        let lower_bound = q1 - self.iqr_multiplier * iqr;
157        let upper_bound = q3 + self.iqr_multiplier * iqr;
158        let median = Self::percentile(&sorted, 50.0);
159
160        let mut outliers = Vec::new();
161        for (i, &value) in self.data.iter().enumerate() {
162            if value < lower_bound || value > upper_bound {
163                let deviation = if value < lower_bound {
164                    (lower_bound - value) / iqr
165                } else {
166                    (value - upper_bound) / iqr
167                };
168                let anomaly_type = if value > median {
169                    AnomalyType::Spike
170                } else {
171                    AnomalyType::Drop
172                };
173                outliers.push(Anomaly::new(i, value, median, deviation, anomaly_type));
174            }
175        }
176
177        self.anomalies.extend(outliers.clone());
178        outliers
179    }
180
181    /// Detect change points using CUSUM-like algorithm
182    pub fn detect_change_points(&mut self) -> Vec<ChangePoint> {
183        if self.data.len() < 20 {
184            return Vec::new();
185        }
186
187        let overall_mean = Self::mean(&self.data);
188        let overall_std = Self::std_dev(&self.data, overall_mean);
189
190        if overall_std < 1e-10 {
191            return Vec::new();
192        }
193
194        let mut change_points = Vec::new();
195        let min_segment = 10;
196
197        // Simple change point detection: find points where mean shifts significantly
198        for i in min_segment..(self.data.len() - min_segment) {
199            let before = &self.data[..i];
200            let after = &self.data[i..];
201
202            let mean_before = Self::mean(before);
203            let mean_after = Self::mean(after);
204
205            let change = ChangePoint::new(i, mean_before, mean_after);
206
207            // Check if change is significant (>1 std dev shift)
208            if change.magnitude > overall_std {
209                change_points.push(change);
210            }
211        }
212
213        // Filter to keep only most significant change points
214        if change_points.len() > 1 {
215            change_points.sort_by(|a, b| {
216                b.magnitude
217                    .partial_cmp(&a.magnitude)
218                    .unwrap_or(std::cmp::Ordering::Equal)
219            });
220            // Keep top change points
221            change_points.truncate(3);
222        }
223
224        self.change_points.extend(change_points.clone());
225        change_points
226    }
227
228    /// Classify anomaly based on context
229    pub fn classify_anomaly(&self, anomaly: &Anomaly) -> AnomalyType {
230        // Check if it's part of a change point
231        for cp in &self.change_points {
232            if (anomaly.index as i64 - cp.index as i64).abs() < 5 {
233                return AnomalyType::ChangePoint;
234            }
235        }
236
237        // Check if correlated with other anomalies (cluster)
238        let nearby_count = self
239            .anomalies
240            .iter()
241            .filter(|a| {
242                (a.index as i64 - anomaly.index as i64).abs() < 3 && a.index != anomaly.index
243            })
244            .count();
245
246        if nearby_count >= 2 {
247            return AnomalyType::Correlated;
248        }
249
250        anomaly.anomaly_type
251    }
252
253    /// Run all detection methods and generate report
254    pub fn analyze(&mut self) -> AnomalyReport {
255        self.anomalies.clear();
256        self.change_points.clear();
257
258        let zscore_outliers = self.detect_zscore_outliers();
259        let _iqr_outliers = self.detect_iqr_outliers();
260        let change_points = self.detect_change_points();
261
262        // Deduplicate anomalies by index
263        let mut seen = std::collections::HashSet::new();
264        self.anomalies.retain(|a| seen.insert(a.index));
265
266        // Sort by severity (critical first)
267        self.anomalies.sort_by(|a, b| b.severity.cmp(&a.severity));
268
269        let mean = Self::mean(&self.data);
270        let std_dev = Self::std_dev(&self.data, mean);
271
272        AnomalyReport {
273            total_points: self.data.len(),
274            anomalies: self.anomalies.clone(),
275            change_points,
276            mean,
277            std_dev,
278            method: if zscore_outliers.is_empty() {
279                "iqr"
280            } else {
281                "zscore"
282            },
283        }
284    }
285
286    /// Real-time anomaly detection on sliding window
287    pub fn detect_realtime(&mut self, new_value: f64) -> Option<Anomaly> {
288        self.data.push(new_value);
289
290        if self.data.len() < self.window_size {
291            return None;
292        }
293
294        // Use sliding window
295        let start = self.data.len().saturating_sub(self.window_size);
296        let window = &self.data[start..self.data.len() - 1]; // Exclude new value
297
298        let mean = Self::mean(window);
299        let std_dev = Self::std_dev(window, mean);
300
301        if std_dev < 1e-10 {
302            return None;
303        }
304
305        let z_score = (new_value - mean) / std_dev;
306        if z_score.abs() > self.zscore_threshold {
307            let anomaly_type = if z_score > 0.0 {
308                AnomalyType::Spike
309            } else {
310                AnomalyType::Drop
311            };
312            let anomaly = Anomaly::new(self.data.len() - 1, new_value, mean, z_score, anomaly_type);
313            self.anomalies.push(anomaly.clone());
314            return Some(anomaly);
315        }
316
317        None
318    }
319
320    /// Get all detected anomalies
321    pub fn get_anomalies(&self) -> &[Anomaly] {
322        &self.anomalies
323    }
324
325    /// Get all detected change points
326    pub fn get_change_points(&self) -> &[ChangePoint] {
327        &self.change_points
328    }
329
330    /// Clear all data and detections
331    pub fn clear(&mut self) {
332        self.data.clear();
333        self.anomalies.clear();
334        self.change_points.clear();
335    }
336}