do_memory_mcp/patterns/predictive/
dbscan.rs1use anyhow::Result;
2use serde::Serialize;
3
4use super::kdtree::{KDTree, Point};
5
6#[derive(Debug, Clone, Serialize)]
7pub struct Cluster {
8 pub id: usize,
9 pub points: Vec<Point>,
10 pub centroid: Vec<f64>,
11 pub density: f64,
12}
13
14#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum ClusterLabel {
17 Noise,
19 Cluster(usize),
21}
22
23#[derive(Debug, Clone)]
25pub struct DBSCANConfig {
26 pub density: f64,
28 pub min_cluster_size: usize,
30 pub max_distance: f64,
32 pub window_size: usize,
34}
35
36impl Default for DBSCANConfig {
37 fn default() -> Self {
38 Self {
39 density: 0.1,
40 min_cluster_size: 3,
41 max_distance: 1.0,
42 window_size: 1000,
43 }
44 }
45}
46
47#[derive(Debug)]
49pub struct StreamingClusters {
50 pub clusters: Vec<Cluster>,
51 pub kd_tree: KDTree,
52 pub window: Vec<Point>,
53 pub config: DBSCANConfig,
54}
55
56impl StreamingClusters {
57 pub fn new(config: DBSCANConfig) -> Self {
58 Self {
59 clusters: Vec::new(),
60 kd_tree: KDTree::new(),
61 window: Vec::with_capacity(config.window_size),
62 config,
63 }
64 }
65
66 pub fn update(&mut self, new_point: Point) -> ClusterLabel {
68 self.window.push(new_point.clone());
70 if self.window.len() > self.config.window_size {
71 self.window.remove(0);
72 }
73
74 self.kd_tree = KDTree::build(&self.window);
76
77 let local_density = self.calculate_local_density(&new_point);
79
80 if local_density < self.config.density {
82 ClusterLabel::Noise
83 } else {
84 self.assign_to_cluster(new_point, local_density)
86 }
87 }
88
89 fn calculate_local_density(&self, point: &Point) -> f64 {
91 let neighbors = self
92 .kd_tree
93 .find_neighbors(&point.features, self.config.max_distance);
94
95 let filtered: Vec<Point> = neighbors.into_iter().filter(|p| p.id != point.id).collect();
97
98 if filtered.is_empty() {
100 0.0
101 } else {
102 let avg_distance: f64 = filtered
103 .iter()
104 .map(|neighbor| calculate_distance(&point.features, &neighbor.features))
105 .sum::<f64>()
106 / filtered.len() as f64;
107
108 filtered.len() as f64 / (1.0 + avg_distance)
110 }
111 }
112
113 fn assign_to_cluster(&mut self, point: Point, density: f64) -> ClusterLabel {
115 let mut nearest_cluster = None;
117 let mut min_distance = f64::INFINITY;
118
119 for (i, cluster) in self.clusters.iter().enumerate() {
120 let distance = calculate_distance(&point.features, &cluster.centroid);
121 if distance < min_distance && distance <= self.config.max_distance {
122 min_distance = distance;
123 nearest_cluster = Some(i);
124 }
125 }
126
127 if let Some(cluster_idx) = nearest_cluster {
128 self.clusters[cluster_idx].points.push(point.clone());
130 self.update_cluster_centroid(cluster_idx);
131 ClusterLabel::Cluster(cluster_idx)
132 } else {
133 let new_cluster = Cluster {
135 id: self.clusters.len(),
136 points: vec![point.clone()],
137 centroid: point.features.clone(),
138 density,
139 };
140 self.clusters.push(new_cluster);
141 ClusterLabel::Cluster(self.clusters.len() - 1)
142 }
143 }
144
145 fn update_cluster_centroid(&mut self, cluster_idx: usize) {
147 if self.clusters[cluster_idx].points.is_empty() {
148 return;
149 }
150
151 let cluster = &self.clusters[cluster_idx];
152 let feature_count = cluster.points[0].features.len();
153 let mut new_centroid = vec![0.0; feature_count];
154
155 for point in &cluster.points {
156 for (i, &feature) in point.features.iter().enumerate() {
157 new_centroid[i] += feature;
158 }
159 }
160
161 for centroid_val in &mut new_centroid {
162 *centroid_val /= cluster.points.len() as f64;
163 }
164
165 self.clusters[cluster_idx].centroid = new_centroid;
166 }
167
168 pub fn calculate_density_maps(&self) -> Vec<f64> {
170 let mut density_map = Vec::with_capacity(self.window.len());
171
172 for point in &self.window {
173 let density = self.calculate_local_density(point);
174 density_map.push(density);
175 }
176
177 density_map
178 }
179}
180
181#[derive(Debug)]
183#[allow(dead_code)]
184pub struct AdaptiveDBSCAN {
185 config: DBSCANConfig,
186 streaming_clusters: StreamingClusters,
187}
188
189impl AdaptiveDBSCAN {
190 pub fn new(config: DBSCANConfig) -> Result<Self> {
191 Ok(Self {
192 streaming_clusters: StreamingClusters::new(config.clone()),
193 config,
194 })
195 }
196
197 pub fn detect_anomalies_dbscan(
199 &mut self,
200 values: &[f64],
201 timestamps: &[f64],
202 ) -> Vec<ClusterLabel> {
203 let points: Vec<Point> = values
206 .iter()
207 .enumerate()
208 .map(|(i, &value)| {
209 let timestamp = timestamps.get(i).copied().unwrap_or(i as f64);
210 Point {
211 id: i,
212 values: vec![value],
213 embedding: None,
214 timestamp,
215 features: vec![value],
216 }
217 })
218 .collect();
219
220 self.adaptive_dbscan_clustering(&points)
221 }
222
223 pub fn adaptive_dbscan_clustering(&mut self, points: &[Point]) -> Vec<ClusterLabel> {
225 let adaptive_params = self.calculate_adaptive_parameters(points);
227
228 self.apply_dbscan(points, adaptive_params)
230 }
231
232 fn calculate_adaptive_parameters(&self, points: &[Point]) -> (f64, usize) {
234 if points.len() < 3 {
235 return (0.5, 2);
236 }
237
238 let features: Vec<Vec<f64>> = points.iter().map(|p| p.features.clone()).collect();
240
241 let mut all_values = Vec::new();
243 for feature_vec in &features {
244 all_values.extend(feature_vec);
245 }
246
247 let mean: f64 = all_values.iter().sum::<f64>() / all_values.len() as f64;
248 let variance: f64 =
249 all_values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / all_values.len() as f64;
250 let std_dev = variance.sqrt();
251
252 let adaptive_epsilon = (std_dev * 2.0).max(1e-6); let adaptive_min_samples = (points.len() as f64 * 0.1).max(3.0) as usize; (
258 adaptive_epsilon,
259 adaptive_min_samples.min(points.len().saturating_sub(1)),
260 )
261 }
262
263 fn apply_dbscan(&self, points: &[Point], params: (f64, usize)) -> Vec<ClusterLabel> {
265 let (epsilon, min_samples) = params;
266
267 if points.is_empty() {
268 return Vec::new();
269 }
270
271 let kd_tree = KDTree::build(points);
273
274 let mut labels = vec![ClusterLabel::Noise; points.len()];
275 let mut cluster_id = 0;
276
277 for (i, point) in points.iter().enumerate() {
278 if !matches!(labels[i], ClusterLabel::Noise) {
279 continue; }
281
282 let neighbors = kd_tree.find_neighbors(&point.features, epsilon);
284
285 if neighbors.len() < min_samples {
286 labels[i] = ClusterLabel::Noise; } else {
288 labels[i] = ClusterLabel::Cluster(cluster_id);
290 let mut cluster_points = vec![i];
291
292 let mut queue = neighbors.iter().map(|n| n.id).collect::<Vec<_>>();
294
295 while let Some(neighbor_id) = queue.pop() {
296 if matches!(labels[neighbor_id], ClusterLabel::Noise) {
297 labels[neighbor_id] = ClusterLabel::Cluster(cluster_id);
298 cluster_points.push(neighbor_id);
299
300 let neighbor_neighbors =
302 kd_tree.find_neighbors(&points[neighbor_id].features, epsilon);
303
304 for neighbor_neighbor in &neighbor_neighbors {
305 if matches!(labels[neighbor_neighbor.id], ClusterLabel::Noise) {
306 queue.push(neighbor_neighbor.id);
307 }
308 }
309 }
310 }
311
312 cluster_id += 1;
313 }
314 }
315
316 labels
317 }
318
319 pub fn update_streaming_clusters(&mut self, new_point: Point) -> ClusterLabel {
321 self.streaming_clusters.update(new_point)
322 }
323
324 pub fn get_density_maps(&self) -> Vec<f64> {
326 self.streaming_clusters.calculate_density_maps()
327 }
328}
329
330fn calculate_distance(a: &[f64], b: &[f64]) -> f64 {
332 let len = a.len().min(b.len());
333 if len == 0 {
334 return 0.0;
335 }
336
337 let mut sum = 0.0;
338 for i in 0..len {
339 let diff = a[i] - b[i];
340 sum += diff * diff;
341 }
342 sum.sqrt()
343}