do_memory_mcp/patterns/predictive/
extraction.rs1use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9use super::dbscan::{Cluster, ClusterLabel};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ExtractedPattern {
14 pub id: String,
16 pub cluster_id: usize,
18 pub description: String,
20 pub characteristics: ClusterCharacteristics,
22 pub quality_score: f64,
24 pub pattern_type: PatternType,
26 pub variables: Vec<String>,
28 pub temporal_range: (usize, usize),
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ClusterCharacteristics {
35 pub size: usize,
37 pub centroid: Vec<f64>,
39 pub density: f64,
41 pub variance: f64,
43 pub compactness: f64,
45 pub time_span: f64,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub enum PatternType {
52 Temporal { pattern: String },
54 Anomaly { severity: String },
56 Stable { consistency: f64 },
58 Transition { from: String, to: String },
60 Unknown,
62}
63
64#[derive(Debug, Clone)]
66pub struct ExtractionConfig {
67 pub min_quality: f64,
69 pub min_cluster_size: usize,
71 pub verbose: bool,
73}
74
75impl Default for ExtractionConfig {
76 fn default() -> Self {
77 Self {
78 min_quality: 0.6,
79 min_cluster_size: 3,
80 verbose: true,
81 }
82 }
83}
84
85pub struct PatternExtractor {
87 config: ExtractionConfig,
88}
89
90impl PatternExtractor {
91 pub fn new(config: ExtractionConfig) -> Self {
93 Self { config }
94 }
95
96 pub fn default_config() -> Self {
98 Self::new(ExtractionConfig::default())
99 }
100
101 pub fn extract_patterns(
103 &self,
104 clusters: &[Cluster],
105 labels: &[ClusterLabel],
106 variable_names: &[String],
107 ) -> Result<Vec<ExtractedPattern>> {
108 let mut patterns = Vec::new();
109
110 for (cluster_idx, cluster) in clusters.iter().enumerate() {
111 if cluster.points.len() < self.config.min_cluster_size {
113 continue;
114 }
115
116 let characteristics = self.compute_cluster_characteristics(cluster)?;
117
118 let quality_score = self.compute_quality_score(cluster, &characteristics);
119
120 if quality_score < self.config.min_quality {
121 continue;
122 }
123
124 let pattern_type = self.classify_pattern_type(cluster, &characteristics);
125
126 let description = if self.config.verbose {
127 self.generate_detailed_description(cluster, &characteristics, &pattern_type)
128 } else {
129 self.generate_simple_description(cluster, &pattern_type)
130 };
131
132 let temporal_range = self.compute_temporal_range(cluster);
133
134 let pattern = ExtractedPattern {
135 id: format!("pattern_{}", cluster_idx),
136 cluster_id: cluster_idx,
137 description,
138 characteristics,
139 quality_score,
140 pattern_type,
141 variables: variable_names.to_vec(),
142 temporal_range,
143 };
144
145 patterns.push(pattern);
146 }
147
148 let noise_patterns = self.extract_noise_patterns(labels, variable_names)?;
150 patterns.extend(noise_patterns);
151
152 Ok(patterns)
153 }
154
155 fn compute_cluster_characteristics(&self, cluster: &Cluster) -> Result<ClusterCharacteristics> {
157 if cluster.points.is_empty() {
158 anyhow::bail!("Cannot compute characteristics for empty cluster");
159 }
160
161 let size = cluster.points.len();
163
164 let centroid = cluster.centroid.clone();
166
167 let density = cluster.density;
169
170 let variance = if size > 1 {
172 cluster
173 .points
174 .iter()
175 .map(|p| {
176 p.features
177 .iter()
178 .zip(¢roid)
179 .map(|(&x, &c)| (x - c).powi(2))
180 .sum::<f64>()
181 })
182 .sum::<f64>()
183 / size as f64
184 } else {
185 0.0
186 };
187
188 let compactness = if variance > 0.0 {
190 1.0 / (1.0 + variance)
191 } else {
192 1.0
193 };
194
195 let time_span = if size > 1 {
197 let min_time = cluster
198 .points
199 .iter()
200 .map(|p| p.timestamp)
201 .fold(f64::INFINITY, f64::min);
202 let max_time = cluster
203 .points
204 .iter()
205 .map(|p| p.timestamp)
206 .fold(f64::NEG_INFINITY, f64::max);
207 max_time - min_time
208 } else {
209 0.0
210 };
211
212 Ok(ClusterCharacteristics {
213 size,
214 centroid,
215 density,
216 variance,
217 compactness,
218 time_span,
219 })
220 }
221
222 fn compute_quality_score(
224 &self,
225 cluster: &Cluster,
226 characteristics: &ClusterCharacteristics,
227 ) -> f64 {
228 let size_score = (cluster.points.len() as f64).ln() / 10.0;
231 let size_score = size_score.clamp(0.0, 1.0);
232
233 let density_score = characteristics.density.clamp(0.0, 1.0);
235
236 let compactness_score = characteristics.compactness;
238
239 let stability_score = 1.0 / (1.0 + characteristics.variance);
241
242 0.3 * size_score + 0.3 * density_score + 0.2 * compactness_score + 0.2 * stability_score
244 }
245
246 fn classify_pattern_type(
248 &self,
249 cluster: &Cluster,
250 characteristics: &ClusterCharacteristics,
251 ) -> PatternType {
252 if characteristics.time_span > cluster.points.len() as f64 * 0.5 {
254 let first_point = &cluster.points[0];
258 let last_point = &cluster.points[cluster.points.len() - 1];
259
260 if !first_point.features.is_empty() && !last_point.features.is_empty() {
261 let trend = last_point.features[0] - first_point.features[0];
262
263 if trend.abs() > 0.1 {
264 return PatternType::Temporal {
265 pattern: if trend > 0.0 {
266 "increasing_trend".to_string()
267 } else {
268 "decreasing_trend".to_string()
269 },
270 };
271 }
272 }
273
274 return PatternType::Temporal {
275 pattern: "temporal_pattern".to_string(),
276 };
277 }
278
279 if characteristics.compactness > 0.8 && characteristics.variance < 0.5 {
281 return PatternType::Stable {
282 consistency: characteristics.compactness,
283 };
284 }
285
286 PatternType::Unknown
288 }
289
290 fn generate_detailed_description(
292 &self,
293 cluster: &Cluster,
294 characteristics: &ClusterCharacteristics,
295 pattern_type: &PatternType,
296 ) -> String {
297 let mut desc = String::new();
298
299 desc.push_str(&format!(
300 "Cluster {} contains {} points with density {:.2}. ",
301 cluster.id, characteristics.size, characteristics.density
302 ));
303
304 desc.push_str(&format!(
305 "Centroid: {:?}, Variance: {:.2}, Compactness: {:.2}. ",
306 characteristics.centroid, characteristics.variance, characteristics.compactness
307 ));
308
309 match pattern_type {
310 PatternType::Temporal { pattern } => {
311 desc.push_str(&format!("Pattern type: Temporal ({})", pattern));
312 }
313 PatternType::Anomaly { severity } => {
314 desc.push_str(&format!("Pattern type: Anomaly (severity: {})", severity));
315 }
316 PatternType::Stable { consistency } => {
317 desc.push_str(&format!(
318 "Pattern type: Stable (consistency: {:.2})",
319 consistency
320 ));
321 }
322 PatternType::Transition { from, to } => {
323 desc.push_str(&format!("Pattern type: Transition ({} -> {})", from, to));
324 }
325 PatternType::Unknown => {
326 desc.push_str("Pattern type: Unknown");
327 }
328 }
329
330 desc
331 }
332
333 fn generate_simple_description(&self, cluster: &Cluster, pattern_type: &PatternType) -> String {
335 match pattern_type {
336 PatternType::Temporal { pattern } => {
337 format!(
338 "Temporal pattern: {} ({} points)",
339 pattern,
340 cluster.points.len()
341 )
342 }
343 PatternType::Anomaly { severity } => {
344 format!(
345 "Anomaly detected: {} ({} points)",
346 severity,
347 cluster.points.len()
348 )
349 }
350 PatternType::Stable { consistency } => {
351 format!(
352 "Stable pattern: {:.2}% consistency ({} points)",
353 consistency * 100.0,
354 cluster.points.len()
355 )
356 }
357 PatternType::Transition { from, to } => {
358 format!(
359 "Transition: {} -> {} ({} points)",
360 from,
361 to,
362 cluster.points.len()
363 )
364 }
365 PatternType::Unknown => {
366 format!(
367 "Cluster {} with {} points",
368 cluster.id,
369 cluster.points.len()
370 )
371 }
372 }
373 }
374
375 fn compute_temporal_range(&self, cluster: &Cluster) -> (usize, usize) {
377 if cluster.points.is_empty() {
378 return (0, 0);
379 }
380
381 let min_idx = cluster.points.iter().map(|p| p.id).min().unwrap_or(0);
382 let max_idx = cluster.points.iter().map(|p| p.id).max().unwrap_or(0);
383
384 (min_idx, max_idx)
385 }
386
387 fn extract_noise_patterns(
389 &self,
390 labels: &[ClusterLabel],
391 variable_names: &[String],
392 ) -> Result<Vec<ExtractedPattern>> {
393 let mut noise_indices = Vec::new();
394
395 for (i, label) in labels.iter().enumerate() {
396 if matches!(label, ClusterLabel::Noise) {
397 noise_indices.push(i);
398 }
399 }
400
401 if noise_indices.is_empty() {
402 return Ok(Vec::new());
403 }
404
405 let pattern = ExtractedPattern {
407 id: "anomaly_pattern".to_string(),
408 cluster_id: usize::MAX,
409 description: format!(
410 "Detected {} anomaly points across {} variables",
411 noise_indices.len(),
412 variable_names.len()
413 ),
414 characteristics: ClusterCharacteristics {
415 size: noise_indices.len(),
416 centroid: vec![0.0], density: 0.0,
418 variance: f64::INFINITY,
419 compactness: 0.0,
420 time_span: 0.0,
421 },
422 quality_score: 0.5,
423 pattern_type: PatternType::Anomaly {
424 severity: if noise_indices.len() > 5 {
425 "high".to_string()
426 } else if noise_indices.len() > 2 {
427 "medium".to_string()
428 } else {
429 "low".to_string()
430 },
431 },
432 variables: variable_names.to_vec(),
433 temporal_range: (
434 *noise_indices.first().unwrap_or(&0),
435 *noise_indices.last().unwrap_or(&0),
436 ),
437 };
438
439 Ok(vec![pattern])
440 }
441
442 pub fn filter_by_quality(&self, patterns: &[ExtractedPattern]) -> Vec<ExtractedPattern> {
444 patterns
445 .iter()
446 .filter(|p| p.quality_score >= self.config.min_quality)
447 .cloned()
448 .collect()
449 }
450
451 pub fn get_pattern_stats(&self, patterns: &[ExtractedPattern]) -> HashMap<String, f64> {
453 let mut stats = HashMap::new();
454
455 stats.insert("total_patterns".to_string(), patterns.len() as f64);
456
457 let temporal_count = patterns
458 .iter()
459 .filter(|p| matches!(p.pattern_type, PatternType::Temporal { .. }))
460 .count();
461 stats.insert("temporal_patterns".to_string(), temporal_count as f64);
462
463 let anomaly_count = patterns
464 .iter()
465 .filter(|p| matches!(p.pattern_type, PatternType::Anomaly { .. }))
466 .count();
467 stats.insert("anomaly_patterns".to_string(), anomaly_count as f64);
468
469 let stable_count = patterns
470 .iter()
471 .filter(|p| matches!(p.pattern_type, PatternType::Stable { .. }))
472 .count();
473 stats.insert("stable_patterns".to_string(), stable_count as f64);
474
475 let avg_quality = if patterns.is_empty() {
476 0.0
477 } else {
478 patterns.iter().map(|p| p.quality_score).sum::<f64>() / patterns.len() as f64
479 };
480 stats.insert("average_quality".to_string(), avg_quality);
481
482 stats
483 }
484}
485
486#[cfg(test)]
487mod tests;