do_memory_mcp/patterns/predictive/
anomaly.rs1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use tracing::{debug, info, instrument, warn};
5
6use super::dbscan::{AdaptiveDBSCAN, ClusterLabel, DBSCANConfig};
7use super::forecasting::types::PredictiveConfig;
8
9#[allow(dead_code)]
10pub struct AnomalyDetector {
11 config: PredictiveConfig,
12 dbscan: AdaptiveDBSCAN,
13}
14
15impl AnomalyDetector {
16 pub fn new() -> Result<Self> {
18 Self::with_config(PredictiveConfig::default())
19 }
20
21 pub fn with_config(config: PredictiveConfig) -> Result<Self> {
23 let dbscan_config = DBSCANConfig {
24 density: 0.1 * config.anomaly_sensitivity, min_cluster_size: 3,
26 max_distance: 1.0,
27 window_size: config.reservoir_size.min(1000),
28 };
29 let dbscan = AdaptiveDBSCAN::new(dbscan_config)?;
30 Ok(Self { config, dbscan })
31 }
32
33 #[instrument(skip(self, data))]
35 pub fn detect_anomalies(
36 &mut self,
37 data: &HashMap<String, Vec<f64>>,
38 ) -> Result<Vec<AnomalyResult>> {
39 let mut results = Vec::new();
40
41 info!("Detecting anomalies in {} variables", data.len());
42
43 for (var_name, series) in data {
44 if series.len() < 3 {
45 warn!(
46 "Skipping anomaly detection for {}: insufficient data points (need at least 3)",
47 var_name
48 );
49 continue;
50 }
51
52 let anomaly_result = self.detect_variable_anomalies(var_name, series)?;
53 results.push(anomaly_result);
54 }
55
56 debug!("Detected anomalies in {} variables", results.len());
57 Ok(results)
58 }
59
60 fn detect_variable_anomalies(
62 &mut self,
63 variable: &str,
64 series: &[f64],
65 ) -> Result<AnomalyResult> {
66 let timestamps: Vec<f64> = (0..series.len()).map(|i| i as f64).collect();
68
69 let cluster_labels = self.dbscan.detect_anomalies_dbscan(series, ×tamps);
71
72 let mut anomaly_indices = Vec::new();
74 let mut anomaly_scores = Vec::new();
75
76 for (i, &label) in cluster_labels.iter().enumerate() {
77 match label {
78 ClusterLabel::Noise => {
79 anomaly_indices.push(i);
80 let deviation =
82 (series[i] - series.iter().sum::<f64>() / series.len() as f64).abs();
83 let variance: f64 = series
84 .iter()
85 .map(|&x| {
86 let mean = series.iter().sum::<f64>() / series.len() as f64;
87 (x - mean).powi(2)
88 })
89 .sum::<f64>()
90 / series.len() as f64;
91 let std_dev = variance.sqrt();
92 anomaly_scores.push(if std_dev > 0.0 {
93 deviation / std_dev
94 } else {
95 1.0
96 });
97 }
98 ClusterLabel::Cluster(_) => {
99 anomaly_scores.push(0.0);
101 }
102 }
103 }
104
105 let confidence = if !series.is_empty() {
107 let cluster_count = cluster_labels
108 .iter()
109 .filter(|&label| !matches!(label, ClusterLabel::Noise))
110 .count();
111 let noise_ratio = (series.len() - cluster_count) as f64 / series.len() as f64;
112 (1.0 - noise_ratio).clamp(0.0, 1.0)
114 } else {
115 0.0
116 };
117
118 Ok(AnomalyResult {
119 variable: variable.to_string(),
120 anomaly_indices,
121 anomaly_scores,
122 method: "DBSCAN".to_string(),
123 confidence: confidence.clamp(0.0, 1.0),
124 })
125 }
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct AnomalyResult {
131 pub variable: String,
133 pub anomaly_indices: Vec<usize>,
135 pub anomaly_scores: Vec<f64>,
137 pub method: String,
139 pub confidence: f64,
141}