1use std::collections::HashMap;
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::{CoherenceSignal, FrameworkError, Result};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct DiscoveryConfig {
13 pub min_signal_strength: f64,
15
16 pub lookback_windows: usize,
18
19 pub emergence_threshold: f64,
21
22 pub split_threshold: f64,
24
25 pub bridge_threshold: f64,
27
28 pub detect_anomalies: bool,
30
31 pub anomaly_sigma: f64,
33}
34
35impl Default for DiscoveryConfig {
36 fn default() -> Self {
37 Self {
38 min_signal_strength: 0.01,
39 lookback_windows: 10,
40 emergence_threshold: 0.2,
41 split_threshold: 0.5,
42 bridge_threshold: 0.3,
43 detect_anomalies: true,
44 anomaly_sigma: 2.5,
45 }
46 }
47}
48
49#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
51pub enum PatternCategory {
52 Emergence,
54
55 Split,
57
58 Merge,
60
61 Bridge,
63
64 Anomaly,
66
67 Consolidation,
69
70 Dissolution,
72
73 Cyclical,
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Ord, PartialOrd)]
79pub enum PatternStrength {
80 Weak,
82
83 Moderate,
85
86 Strong,
88
89 VeryStrong,
91}
92
93impl PatternStrength {
94 pub fn from_score(score: f64) -> Self {
96 if score < 0.25 {
97 PatternStrength::Weak
98 } else if score < 0.5 {
99 PatternStrength::Moderate
100 } else if score < 0.75 {
101 PatternStrength::Strong
102 } else {
103 PatternStrength::VeryStrong
104 }
105 }
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct DiscoveryPattern {
111 pub id: String,
113
114 pub category: PatternCategory,
116
117 pub strength: PatternStrength,
119
120 pub confidence: f64,
122
123 pub detected_at: DateTime<Utc>,
125
126 pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
128
129 pub entities: Vec<String>,
131
132 pub description: String,
134
135 pub evidence: Vec<PatternEvidence>,
137
138 pub metadata: HashMap<String, serde_json::Value>,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct PatternEvidence {
145 pub evidence_type: String,
147
148 pub value: f64,
150
151 pub source_ref: String,
153
154 pub explanation: String,
156}
157
158pub struct DiscoveryEngine {
160 config: DiscoveryConfig,
161 patterns: Vec<DiscoveryPattern>,
162 signal_history: Vec<CoherenceSignal>,
163}
164
165impl DiscoveryEngine {
166 pub fn new(config: DiscoveryConfig) -> Self {
168 Self {
169 config,
170 patterns: Vec::new(),
171 signal_history: Vec::new(),
172 }
173 }
174
175 pub fn detect(&mut self, signals: &[CoherenceSignal]) -> Result<Vec<DiscoveryPattern>> {
177 self.signal_history.extend(signals.iter().cloned());
178
179 let mut patterns = Vec::new();
180
181 if self.signal_history.len() < 2 {
183 return Ok(patterns);
184 }
185
186 patterns.extend(self.detect_emergence()?);
188 patterns.extend(self.detect_splits()?);
189 patterns.extend(self.detect_bridges()?);
190 patterns.extend(self.detect_trends()?);
191
192 if self.config.detect_anomalies {
193 patterns.extend(self.detect_anomalies()?);
194 }
195
196 self.patterns.extend(patterns.clone());
197 Ok(patterns)
198 }
199
200 fn detect_emergence(&self) -> Result<Vec<DiscoveryPattern>> {
202 let mut patterns = Vec::new();
203
204 if self.signal_history.len() < self.config.lookback_windows {
205 return Ok(patterns);
206 }
207
208 let recent = &self.signal_history[self.signal_history.len() - self.config.lookback_windows..];
209
210 let node_growth: Vec<i64> = recent
212 .windows(2)
213 .map(|w| w[1].node_count as i64 - w[0].node_count as i64)
214 .collect();
215
216 let avg_growth = node_growth.iter().sum::<i64>() as f64 / node_growth.len() as f64;
217
218 if avg_growth > self.config.emergence_threshold * recent[0].node_count as f64 {
219 let latest = recent.last().unwrap();
220
221 patterns.push(DiscoveryPattern {
222 id: format!("emergence_{}", self.patterns.len()),
223 category: PatternCategory::Emergence,
224 strength: PatternStrength::from_score(avg_growth / 10.0),
225 confidence: (avg_growth / 10.0).min(1.0),
226 detected_at: Utc::now(),
227 time_range: Some((recent[0].window.start, latest.window.end)),
228 entities: latest.cut_nodes.clone(),
229 description: format!(
230 "Emerging structure detected: {} new nodes over {} windows",
231 (avg_growth * recent.len() as f64) as i64,
232 recent.len()
233 ),
234 evidence: vec![PatternEvidence {
235 evidence_type: "node_growth".to_string(),
236 value: avg_growth,
237 source_ref: latest.id.clone(),
238 explanation: "Sustained node count growth".to_string(),
239 }],
240 metadata: HashMap::new(),
241 });
242 }
243
244 Ok(patterns)
245 }
246
247 fn detect_splits(&self) -> Result<Vec<DiscoveryPattern>> {
249 let mut patterns = Vec::new();
250
251 if self.signal_history.len() < 2 {
252 return Ok(patterns);
253 }
254
255 for i in 1..self.signal_history.len() {
257 let prev = &self.signal_history[i - 1];
258 let curr = &self.signal_history[i];
259
260 if prev.min_cut_value > 0.0 {
261 let drop_ratio = (prev.min_cut_value - curr.min_cut_value) / prev.min_cut_value;
262
263 if drop_ratio > self.config.split_threshold {
264 patterns.push(DiscoveryPattern {
265 id: format!("split_{}", self.patterns.len()),
266 category: PatternCategory::Split,
267 strength: PatternStrength::from_score(drop_ratio),
268 confidence: drop_ratio.min(1.0),
269 detected_at: curr.window.start,
270 time_range: Some((prev.window.start, curr.window.end)),
271 entities: curr.cut_nodes.clone(),
272 description: format!(
273 "Structure split detected: {:.1}% coherence drop",
274 drop_ratio * 100.0
275 ),
276 evidence: vec![PatternEvidence {
277 evidence_type: "mincut_drop".to_string(),
278 value: drop_ratio,
279 source_ref: curr.id.clone(),
280 explanation: format!(
281 "Min-cut dropped from {:.3} to {:.3}",
282 prev.min_cut_value, curr.min_cut_value
283 ),
284 }],
285 metadata: HashMap::new(),
286 });
287 }
288 }
289 }
290
291 Ok(patterns)
292 }
293
294 fn detect_bridges(&self) -> Result<Vec<DiscoveryPattern>> {
296 let mut patterns = Vec::new();
297
298 if self.signal_history.is_empty() {
299 return Ok(patterns);
300 }
301
302 let mut boundary_counts: HashMap<String, usize> = HashMap::new();
304
305 for signal in &self.signal_history {
306 for node in &signal.cut_nodes {
307 *boundary_counts.entry(node.clone()).or_default() += 1;
308 }
309 }
310
311 let threshold = (self.signal_history.len() as f64 * self.config.bridge_threshold) as usize;
312
313 let bridge_nodes: Vec<_> = boundary_counts
314 .iter()
315 .filter(|(_, &count)| count >= threshold)
316 .map(|(node, &count)| (node.clone(), count))
317 .collect();
318
319 if !bridge_nodes.is_empty() {
320 let latest = self.signal_history.last().unwrap();
321
322 patterns.push(DiscoveryPattern {
323 id: format!("bridge_{}", self.patterns.len()),
324 category: PatternCategory::Bridge,
325 strength: PatternStrength::Moderate,
326 confidence: 0.6,
327 detected_at: Utc::now(),
328 time_range: Some((
329 self.signal_history[0].window.start,
330 latest.window.end,
331 )),
332 entities: bridge_nodes.iter().map(|(n, _)| n.clone()).collect(),
333 description: format!(
334 "Bridge nodes detected: {} nodes consistently on boundaries",
335 bridge_nodes.len()
336 ),
337 evidence: bridge_nodes
338 .iter()
339 .map(|(node, count)| PatternEvidence {
340 evidence_type: "boundary_frequency".to_string(),
341 value: *count as f64,
342 source_ref: node.clone(),
343 explanation: format!("{} appeared in {} cut boundaries", node, count),
344 })
345 .collect(),
346 metadata: HashMap::new(),
347 });
348 }
349
350 Ok(patterns)
351 }
352
353 fn detect_trends(&self) -> Result<Vec<DiscoveryPattern>> {
355 let mut patterns = Vec::new();
356
357 if self.signal_history.len() < self.config.lookback_windows {
358 return Ok(patterns);
359 }
360
361 let recent = &self.signal_history[self.signal_history.len() - self.config.lookback_windows..];
362
363 let values: Vec<f64> = recent.iter().map(|s| s.min_cut_value).collect();
365
366 let (slope, _) = self.linear_regression(&values);
367
368 if slope.abs() > 0.1 {
369 let latest = recent.last().unwrap();
370 let category = if slope > 0.0 {
371 PatternCategory::Consolidation
372 } else {
373 PatternCategory::Dissolution
374 };
375
376 patterns.push(DiscoveryPattern {
377 id: format!("trend_{}", self.patterns.len()),
378 category,
379 strength: PatternStrength::from_score(slope.abs()),
380 confidence: slope.abs().min(1.0),
381 detected_at: Utc::now(),
382 time_range: Some((recent[0].window.start, latest.window.end)),
383 entities: vec![],
384 description: format!(
385 "{} trend detected: {:.2}% per window",
386 if slope > 0.0 {
387 "Strengthening"
388 } else {
389 "Weakening"
390 },
391 slope * 100.0
392 ),
393 evidence: vec![PatternEvidence {
394 evidence_type: "trend_slope".to_string(),
395 value: slope,
396 source_ref: latest.id.clone(),
397 explanation: format!(
398 "Linear trend slope: {:.4} over {} windows",
399 slope,
400 recent.len()
401 ),
402 }],
403 metadata: HashMap::new(),
404 });
405 }
406
407 Ok(patterns)
408 }
409
410 fn detect_anomalies(&self) -> Result<Vec<DiscoveryPattern>> {
412 let mut patterns = Vec::new();
413
414 if self.signal_history.len() < 5 {
415 return Ok(patterns);
416 }
417
418 let values: Vec<f64> = self.signal_history.iter().map(|s| s.min_cut_value).collect();
420
421 let mean = values.iter().sum::<f64>() / values.len() as f64;
422 let variance =
423 values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
424 let std_dev = variance.sqrt();
425
426 for (i, signal) in self.signal_history.iter().enumerate() {
428 let z_score = if std_dev > 0.0 {
429 (signal.min_cut_value - mean) / std_dev
430 } else {
431 0.0
432 };
433
434 if z_score.abs() > self.config.anomaly_sigma {
435 patterns.push(DiscoveryPattern {
436 id: format!("anomaly_{}", i),
437 category: PatternCategory::Anomaly,
438 strength: PatternStrength::from_score(z_score.abs() / 5.0),
439 confidence: (z_score.abs() / 5.0).min(1.0),
440 detected_at: signal.window.start,
441 time_range: Some((signal.window.start, signal.window.end)),
442 entities: signal.cut_nodes.clone(),
443 description: format!(
444 "Anomalous coherence: {:.2}σ from mean",
445 z_score
446 ),
447 evidence: vec![PatternEvidence {
448 evidence_type: "z_score".to_string(),
449 value: z_score,
450 source_ref: signal.id.clone(),
451 explanation: format!(
452 "Value {:.4} vs mean {:.4} (σ={:.4})",
453 signal.min_cut_value, mean, std_dev
454 ),
455 }],
456 metadata: HashMap::new(),
457 });
458 }
459 }
460
461 Ok(patterns)
462 }
463
464 fn linear_regression(&self, values: &[f64]) -> (f64, f64) {
466 let n = values.len() as f64;
467 let x_mean = (n - 1.0) / 2.0;
468 let y_mean = values.iter().sum::<f64>() / n;
469
470 let mut num = 0.0;
471 let mut denom = 0.0;
472
473 for (i, &y) in values.iter().enumerate() {
474 let x = i as f64;
475 num += (x - x_mean) * (y - y_mean);
476 denom += (x - x_mean).powi(2);
477 }
478
479 let slope = if denom > 0.0 { num / denom } else { 0.0 };
480 let intercept = y_mean - slope * x_mean;
481
482 (slope, intercept)
483 }
484
485 pub fn patterns(&self) -> &[DiscoveryPattern] {
487 &self.patterns
488 }
489
490 pub fn patterns_by_category(&self, category: PatternCategory) -> Vec<&DiscoveryPattern> {
492 self.patterns
493 .iter()
494 .filter(|p| p.category == category)
495 .collect()
496 }
497
498 pub fn clear(&mut self) {
500 self.patterns.clear();
501 self.signal_history.clear();
502 }
503}
504
505#[cfg(test)]
506mod tests {
507 use super::*;
508 use crate::TemporalWindow;
509
510 fn make_signal(id: &str, min_cut: f64, nodes: usize) -> CoherenceSignal {
511 CoherenceSignal {
512 id: id.to_string(),
513 window: TemporalWindow::new(Utc::now(), Utc::now(), 0),
514 min_cut_value: min_cut,
515 node_count: nodes,
516 edge_count: nodes * 2,
517 partition_sizes: Some((nodes / 2, nodes - nodes / 2)),
518 is_exact: true,
519 cut_nodes: vec![],
520 delta: None,
521 }
522 }
523
524 #[test]
525 fn test_discovery_engine_creation() {
526 let config = DiscoveryConfig::default();
527 let engine = DiscoveryEngine::new(config);
528 assert!(engine.patterns().is_empty());
529 }
530
531 #[test]
532 fn test_pattern_strength() {
533 assert_eq!(PatternStrength::from_score(0.1), PatternStrength::Weak);
534 assert_eq!(PatternStrength::from_score(0.3), PatternStrength::Moderate);
535 assert_eq!(PatternStrength::from_score(0.6), PatternStrength::Strong);
536 assert_eq!(
537 PatternStrength::from_score(0.9),
538 PatternStrength::VeryStrong
539 );
540 }
541
542 #[test]
543 fn test_empty_signals() {
544 let config = DiscoveryConfig::default();
545 let mut engine = DiscoveryEngine::new(config);
546
547 let patterns = engine.detect(&[]).unwrap();
548 assert!(patterns.is_empty());
549 }
550
551 #[test]
552 fn test_linear_regression() {
553 let config = DiscoveryConfig::default();
554 let engine = DiscoveryEngine::new(config);
555
556 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
557 let (slope, intercept) = engine.linear_regression(&values);
558
559 assert!((slope - 1.0).abs() < 0.001);
560 assert!((intercept - 1.0).abs() < 0.001);
561 }
562}