cbtop/anomaly_detection/
detector.rs1use super::{
4 Anomaly, AnomalyReport, AnomalyType, ChangePoint, DEFAULT_IQR_MULTIPLIER,
5 DEFAULT_ZSCORE_THRESHOLD, MIN_SAMPLES_FOR_DETECTION,
6};
7
8#[derive(Debug)]
10pub struct AnomalyDetector {
11 data: Vec<f64>,
13 zscore_threshold: f64,
15 iqr_multiplier: f64,
17 window_size: usize,
19 anomalies: Vec<Anomaly>,
21 change_points: Vec<ChangePoint>,
23}
24
25impl Default for AnomalyDetector {
26 fn default() -> Self {
27 Self {
28 data: Vec::new(),
29 zscore_threshold: DEFAULT_ZSCORE_THRESHOLD,
30 iqr_multiplier: DEFAULT_IQR_MULTIPLIER,
31 window_size: 50,
32 anomalies: Vec::new(),
33 change_points: Vec::new(),
34 }
35 }
36}
37
38impl AnomalyDetector {
39 pub fn new() -> Self {
41 Self::default()
42 }
43
44 pub fn with_zscore_threshold(mut self, threshold: f64) -> Self {
46 self.zscore_threshold = threshold.max(1.0);
47 self
48 }
49
50 pub fn with_iqr_multiplier(mut self, multiplier: f64) -> Self {
52 self.iqr_multiplier = multiplier.max(0.5);
53 self
54 }
55
56 pub fn with_window_size(mut self, size: usize) -> Self {
58 self.window_size = size.max(10);
59 self
60 }
61
62 pub fn add(&mut self, value: f64) {
64 self.data.push(value);
65 }
66
67 pub fn add_all(&mut self, values: &[f64]) {
69 self.data.extend_from_slice(values);
70 }
71
72 pub fn data_count(&self) -> usize {
74 self.data.len()
75 }
76
77 pub fn has_sufficient_data(&self) -> bool {
79 self.data.len() >= MIN_SAMPLES_FOR_DETECTION
80 }
81
82 fn mean(data: &[f64]) -> f64 {
84 if data.is_empty() {
85 return 0.0;
86 }
87 data.iter().sum::<f64>() / data.len() as f64
88 }
89
90 fn std_dev(data: &[f64], mean: f64) -> f64 {
92 if data.len() < 2 {
93 return 0.0;
94 }
95 let variance =
96 data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64;
97 variance.sqrt()
98 }
99
100 fn percentile(sorted_data: &[f64], p: f64) -> f64 {
102 if sorted_data.is_empty() {
103 return 0.0;
104 }
105 let idx = (p / 100.0 * (sorted_data.len() - 1) as f64).round() as usize;
106 sorted_data[idx.min(sorted_data.len() - 1)]
107 }
108
109 pub fn detect_zscore_outliers(&mut self) -> Vec<Anomaly> {
111 if !self.has_sufficient_data() {
112 return Vec::new();
113 }
114
115 let mean = Self::mean(&self.data);
116 let std_dev = Self::std_dev(&self.data, mean);
117
118 if std_dev < 1e-10 {
119 return Vec::new(); }
121
122 let mut outliers = Vec::new();
123 for (i, &value) in self.data.iter().enumerate() {
124 let z_score = (value - mean) / std_dev;
125 if z_score.abs() > self.zscore_threshold {
126 let anomaly_type = if z_score > 0.0 {
127 AnomalyType::Spike
128 } else {
129 AnomalyType::Drop
130 };
131 outliers.push(Anomaly::new(i, value, mean, z_score, anomaly_type));
132 }
133 }
134
135 self.anomalies.extend(outliers.clone());
136 outliers
137 }
138
139 pub fn detect_iqr_outliers(&mut self) -> Vec<Anomaly> {
141 if !self.has_sufficient_data() {
142 return Vec::new();
143 }
144
145 let mut sorted = self.data.clone();
146 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
147
148 let q1 = Self::percentile(&sorted, 25.0);
149 let q3 = Self::percentile(&sorted, 75.0);
150 let iqr = q3 - q1;
151
152 if iqr < 1e-10 {
153 return Vec::new(); }
155
156 let lower_bound = q1 - self.iqr_multiplier * iqr;
157 let upper_bound = q3 + self.iqr_multiplier * iqr;
158 let median = Self::percentile(&sorted, 50.0);
159
160 let mut outliers = Vec::new();
161 for (i, &value) in self.data.iter().enumerate() {
162 if value < lower_bound || value > upper_bound {
163 let deviation = if value < lower_bound {
164 (lower_bound - value) / iqr
165 } else {
166 (value - upper_bound) / iqr
167 };
168 let anomaly_type = if value > median {
169 AnomalyType::Spike
170 } else {
171 AnomalyType::Drop
172 };
173 outliers.push(Anomaly::new(i, value, median, deviation, anomaly_type));
174 }
175 }
176
177 self.anomalies.extend(outliers.clone());
178 outliers
179 }
180
181 pub fn detect_change_points(&mut self) -> Vec<ChangePoint> {
183 if self.data.len() < 20 {
184 return Vec::new();
185 }
186
187 let overall_mean = Self::mean(&self.data);
188 let overall_std = Self::std_dev(&self.data, overall_mean);
189
190 if overall_std < 1e-10 {
191 return Vec::new();
192 }
193
194 let mut change_points = Vec::new();
195 let min_segment = 10;
196
197 for i in min_segment..(self.data.len() - min_segment) {
199 let before = &self.data[..i];
200 let after = &self.data[i..];
201
202 let mean_before = Self::mean(before);
203 let mean_after = Self::mean(after);
204
205 let change = ChangePoint::new(i, mean_before, mean_after);
206
207 if change.magnitude > overall_std {
209 change_points.push(change);
210 }
211 }
212
213 if change_points.len() > 1 {
215 change_points.sort_by(|a, b| {
216 b.magnitude
217 .partial_cmp(&a.magnitude)
218 .unwrap_or(std::cmp::Ordering::Equal)
219 });
220 change_points.truncate(3);
222 }
223
224 self.change_points.extend(change_points.clone());
225 change_points
226 }
227
228 pub fn classify_anomaly(&self, anomaly: &Anomaly) -> AnomalyType {
230 for cp in &self.change_points {
232 if (anomaly.index as i64 - cp.index as i64).abs() < 5 {
233 return AnomalyType::ChangePoint;
234 }
235 }
236
237 let nearby_count = self
239 .anomalies
240 .iter()
241 .filter(|a| {
242 (a.index as i64 - anomaly.index as i64).abs() < 3 && a.index != anomaly.index
243 })
244 .count();
245
246 if nearby_count >= 2 {
247 return AnomalyType::Correlated;
248 }
249
250 anomaly.anomaly_type
251 }
252
253 pub fn analyze(&mut self) -> AnomalyReport {
255 self.anomalies.clear();
256 self.change_points.clear();
257
258 let zscore_outliers = self.detect_zscore_outliers();
259 let _iqr_outliers = self.detect_iqr_outliers();
260 let change_points = self.detect_change_points();
261
262 let mut seen = std::collections::HashSet::new();
264 self.anomalies.retain(|a| seen.insert(a.index));
265
266 self.anomalies.sort_by(|a, b| b.severity.cmp(&a.severity));
268
269 let mean = Self::mean(&self.data);
270 let std_dev = Self::std_dev(&self.data, mean);
271
272 AnomalyReport {
273 total_points: self.data.len(),
274 anomalies: self.anomalies.clone(),
275 change_points,
276 mean,
277 std_dev,
278 method: if zscore_outliers.is_empty() {
279 "iqr"
280 } else {
281 "zscore"
282 },
283 }
284 }
285
286 pub fn detect_realtime(&mut self, new_value: f64) -> Option<Anomaly> {
288 self.data.push(new_value);
289
290 if self.data.len() < self.window_size {
291 return None;
292 }
293
294 let start = self.data.len().saturating_sub(self.window_size);
296 let window = &self.data[start..self.data.len() - 1]; let mean = Self::mean(window);
299 let std_dev = Self::std_dev(window, mean);
300
301 if std_dev < 1e-10 {
302 return None;
303 }
304
305 let z_score = (new_value - mean) / std_dev;
306 if z_score.abs() > self.zscore_threshold {
307 let anomaly_type = if z_score > 0.0 {
308 AnomalyType::Spike
309 } else {
310 AnomalyType::Drop
311 };
312 let anomaly = Anomaly::new(self.data.len() - 1, new_value, mean, z_score, anomaly_type);
313 self.anomalies.push(anomaly.clone());
314 return Some(anomaly);
315 }
316
317 None
318 }
319
320 pub fn get_anomalies(&self) -> &[Anomaly] {
322 &self.anomalies
323 }
324
325 pub fn get_change_points(&self) -> &[ChangePoint] {
327 &self.change_points
328 }
329
330 pub fn clear(&mut self) {
332 self.data.clear();
333 self.anomalies.clear();
334 self.change_points.clear();
335 }
336}