Skip to main content

do_memory_mcp/patterns/predictive/
anomaly.rs

1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use tracing::{debug, info, instrument, warn};
5
6use super::dbscan::{AdaptiveDBSCAN, ClusterLabel, DBSCANConfig};
7use super::forecasting::types::PredictiveConfig;
8
9#[allow(dead_code)]
10pub struct AnomalyDetector {
11    config: PredictiveConfig,
12    dbscan: AdaptiveDBSCAN,
13}
14
15impl AnomalyDetector {
16    /// Create a new anomaly detector
17    pub fn new() -> Result<Self> {
18        Self::with_config(PredictiveConfig::default())
19    }
20
21    /// Create a new anomaly detector with custom config
22    pub fn with_config(config: PredictiveConfig) -> Result<Self> {
23        let dbscan_config = DBSCANConfig {
24            density: 0.1 * config.anomaly_sensitivity, // Scale density with sensitivity
25            min_cluster_size: 3,
26            max_distance: 1.0,
27            window_size: config.reservoir_size.min(1000),
28        };
29        let dbscan = AdaptiveDBSCAN::new(dbscan_config)?;
30        Ok(Self { config, dbscan })
31    }
32
33    /// Detect anomalies in time series data
34    #[instrument(skip(self, data))]
35    pub fn detect_anomalies(
36        &mut self,
37        data: &HashMap<String, Vec<f64>>,
38    ) -> Result<Vec<AnomalyResult>> {
39        let mut results = Vec::new();
40
41        info!("Detecting anomalies in {} variables", data.len());
42
43        for (var_name, series) in data {
44            if series.len() < 3 {
45                warn!(
46                    "Skipping anomaly detection for {}: insufficient data points (need at least 3)",
47                    var_name
48                );
49                continue;
50            }
51
52            let anomaly_result = self.detect_variable_anomalies(var_name, series)?;
53            results.push(anomaly_result);
54        }
55
56        debug!("Detected anomalies in {} variables", results.len());
57        Ok(results)
58    }
59
60    /// Detect anomalies in a single variable using DBSCAN
61    fn detect_variable_anomalies(
62        &mut self,
63        variable: &str,
64        series: &[f64],
65    ) -> Result<AnomalyResult> {
66        // Create timestamps for temporal context
67        let timestamps: Vec<f64> = (0..series.len()).map(|i| i as f64).collect();
68
69        // Use DBSCAN-based anomaly detection
70        let cluster_labels = self.dbscan.detect_anomalies_dbscan(series, &timestamps);
71
72        // Process results
73        let mut anomaly_indices = Vec::new();
74        let mut anomaly_scores = Vec::new();
75
76        for (i, &label) in cluster_labels.iter().enumerate() {
77            match label {
78                ClusterLabel::Noise => {
79                    anomaly_indices.push(i);
80                    // Higher score for noise points (anomalies)
81                    let deviation =
82                        (series[i] - series.iter().sum::<f64>() / series.len() as f64).abs();
83                    let variance: f64 = series
84                        .iter()
85                        .map(|&x| {
86                            let mean = series.iter().sum::<f64>() / series.len() as f64;
87                            (x - mean).powi(2)
88                        })
89                        .sum::<f64>()
90                        / series.len() as f64;
91                    let std_dev = variance.sqrt();
92                    anomaly_scores.push(if std_dev > 0.0 {
93                        deviation / std_dev
94                    } else {
95                        1.0
96                    });
97                }
98                ClusterLabel::Cluster(_) => {
99                    // Normal point - low anomaly score
100                    anomaly_scores.push(0.0);
101                }
102            }
103        }
104
105        // Calculate confidence based on cluster quality
106        let confidence = if !series.is_empty() {
107            let cluster_count = cluster_labels
108                .iter()
109                .filter(|&label| !matches!(label, ClusterLabel::Noise))
110                .count();
111            let noise_ratio = (series.len() - cluster_count) as f64 / series.len() as f64;
112            // Higher confidence when we have good clustering (low noise ratio)
113            (1.0 - noise_ratio).clamp(0.0, 1.0)
114        } else {
115            0.0
116        };
117
118        Ok(AnomalyResult {
119            variable: variable.to_string(),
120            anomaly_indices,
121            anomaly_scores,
122            method: "DBSCAN".to_string(),
123            confidence: confidence.clamp(0.0, 1.0),
124        })
125    }
126}
127
128/// Anomaly detection results
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct AnomalyResult {
131    /// Variable name
132    pub variable: String,
133    /// Indices of detected anomalies
134    pub anomaly_indices: Vec<usize>,
135    /// Anomaly scores for each point
136    pub anomaly_scores: Vec<f64>,
137    /// Detection method used
138    pub method: String,
139    /// Detection confidence
140    pub confidence: f64,
141}