use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tracing::{debug, info, instrument, warn};
use super::dbscan::{AdaptiveDBSCAN, ClusterLabel, DBSCANConfig};
use super::forecasting::types::PredictiveConfig;
#[allow(dead_code)]
pub struct AnomalyDetector {
config: PredictiveConfig,
dbscan: AdaptiveDBSCAN,
}
impl AnomalyDetector {
pub fn new() -> Result<Self> {
Self::with_config(PredictiveConfig::default())
}
pub fn with_config(config: PredictiveConfig) -> Result<Self> {
let dbscan_config = DBSCANConfig {
density: 0.1 * config.anomaly_sensitivity, min_cluster_size: 3,
max_distance: 1.0,
window_size: config.reservoir_size.min(1000),
};
let dbscan = AdaptiveDBSCAN::new(dbscan_config)?;
Ok(Self { config, dbscan })
}
#[instrument(skip(self, data))]
pub fn detect_anomalies(
&mut self,
data: &HashMap<String, Vec<f64>>,
) -> Result<Vec<AnomalyResult>> {
let mut results = Vec::new();
info!("Detecting anomalies in {} variables", data.len());
for (var_name, series) in data {
if series.len() < 3 {
warn!(
"Skipping anomaly detection for {}: insufficient data points (need at least 3)",
var_name
);
continue;
}
let anomaly_result = self.detect_variable_anomalies(var_name, series)?;
results.push(anomaly_result);
}
debug!("Detected anomalies in {} variables", results.len());
Ok(results)
}
fn detect_variable_anomalies(
&mut self,
variable: &str,
series: &[f64],
) -> Result<AnomalyResult> {
let timestamps: Vec<f64> = (0..series.len()).map(|i| i as f64).collect();
let cluster_labels = self.dbscan.detect_anomalies_dbscan(series, ×tamps);
let mut anomaly_indices = Vec::new();
let mut anomaly_scores = Vec::new();
for (i, &label) in cluster_labels.iter().enumerate() {
match label {
ClusterLabel::Noise => {
anomaly_indices.push(i);
let deviation =
(series[i] - series.iter().sum::<f64>() / series.len() as f64).abs();
let variance: f64 = series
.iter()
.map(|&x| {
let mean = series.iter().sum::<f64>() / series.len() as f64;
(x - mean).powi(2)
})
.sum::<f64>()
/ series.len() as f64;
let std_dev = variance.sqrt();
anomaly_scores.push(if std_dev > 0.0 {
deviation / std_dev
} else {
1.0
});
}
ClusterLabel::Cluster(_) => {
anomaly_scores.push(0.0);
}
}
}
let confidence = if !series.is_empty() {
let cluster_count = cluster_labels
.iter()
.filter(|&label| !matches!(label, ClusterLabel::Noise))
.count();
let noise_ratio = (series.len() - cluster_count) as f64 / series.len() as f64;
(1.0 - noise_ratio).clamp(0.0, 1.0)
} else {
0.0
};
Ok(AnomalyResult {
variable: variable.to_string(),
anomaly_indices,
anomaly_scores,
method: "DBSCAN".to_string(),
confidence: confidence.clamp(0.0, 1.0),
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnomalyResult {
pub variable: String,
pub anomaly_indices: Vec<usize>,
pub anomaly_scores: Vec<f64>,
pub method: String,
pub confidence: f64,
}