1use std::collections::HashMap;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct SemanticVector {
14 pub id: String,
16 pub embedding: Vec<f32>,
18 pub domain: Domain,
20 pub timestamp: DateTime<Utc>,
22 pub metadata: HashMap<String, String>,
24}
25
26#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
28pub enum Domain {
29 Climate,
30 Finance,
31 Research,
32 Medical,
33 Economic,
34 Genomics,
35 Physics,
36 Seismic,
37 Ocean,
38 Space,
39 Transportation,
40 Geospatial,
41 Government,
42 CrossDomain,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct GraphNode {
49 pub id: u32,
51 pub external_id: String,
53 pub domain: Domain,
55 pub vector_idx: Option<usize>,
57 pub weight: f64,
59 pub attributes: HashMap<String, f64>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct GraphEdge {
67 pub source: u32,
69 pub target: u32,
71 pub weight: f64,
73 pub edge_type: EdgeType,
75 pub timestamp: DateTime<Utc>,
77}
78
79#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
81pub enum EdgeType {
82 Correlation,
84 Similarity,
86 Citation,
88 Causal,
90 CrossDomain,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct NativeEngineConfig {
97 pub min_edge_weight: f64,
99 pub similarity_threshold: f64,
101 pub mincut_sensitivity: f64,
103 pub cross_domain: bool,
105 pub window_seconds: i64,
107 pub hnsw_m: usize,
109 pub hnsw_ef_construction: usize,
110 pub hnsw_ef_search: usize,
111 pub dimension: usize,
113 pub batch_size: usize,
115 pub checkpoint_interval: u64,
117 pub parallel_workers: usize,
119}
120
121impl Default for NativeEngineConfig {
122 fn default() -> Self {
123 Self {
124 min_edge_weight: 0.3,
125 similarity_threshold: 0.7,
126 mincut_sensitivity: 0.15,
127 cross_domain: true,
128 window_seconds: 86400 * 30, hnsw_m: 16,
130 hnsw_ef_construction: 200,
131 hnsw_ef_search: 50,
132 dimension: 384,
133 batch_size: 1000,
134 checkpoint_interval: 10_000,
135 parallel_workers: 4,
136 }
137 }
138}
139
140pub struct NativeDiscoveryEngine {
147 config: NativeEngineConfig,
148
149 vectors: Vec<SemanticVector>,
151
152 nodes: HashMap<u32, GraphNode>,
154
155 edges: Vec<GraphEdge>,
157
158 coherence_history: Vec<(DateTime<Utc>, f64, CoherenceSnapshot)>,
160
161 next_node_id: u32,
163
164 domain_nodes: HashMap<Domain, Vec<u32>>,
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct CoherenceSnapshot {
171 pub mincut_value: f64,
173 pub node_count: usize,
175 pub edge_count: usize,
177 pub partition_sizes: (usize, usize),
179 pub boundary_nodes: Vec<u32>,
181 pub avg_edge_weight: f64,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct DiscoveredPattern {
188 pub id: String,
190 pub pattern_type: PatternType,
192 pub confidence: f64,
194 pub affected_nodes: Vec<u32>,
196 pub detected_at: DateTime<Utc>,
198 pub description: String,
200 pub evidence: Vec<Evidence>,
202 pub cross_domain_links: Vec<CrossDomainLink>,
204}
205
206#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
208pub enum PatternType {
209 CoherenceBreak,
211 Consolidation,
213 EmergingCluster,
215 DissolvingCluster,
217 BridgeFormation,
219 AnomalousNode,
221 TemporalShift,
223 Cascade,
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct Evidence {
230 pub evidence_type: String,
231 pub value: f64,
232 pub description: String,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct CrossDomainLink {
238 pub source_domain: Domain,
239 pub target_domain: Domain,
240 pub source_nodes: Vec<u32>,
241 pub target_nodes: Vec<u32>,
242 pub link_strength: f64,
243 pub link_type: String,
244}
245
246impl NativeDiscoveryEngine {
247 pub fn new(config: NativeEngineConfig) -> Self {
249 Self {
250 config,
251 vectors: Vec::new(),
252 nodes: HashMap::new(),
253 edges: Vec::new(),
254 coherence_history: Vec::new(),
255 next_node_id: 0,
256 domain_nodes: HashMap::new(),
257 }
258 }
259
260 pub fn add_vector(&mut self, vector: SemanticVector) -> u32 {
263 let node_id = self.next_node_id;
264 self.next_node_id += 1;
265
266 let vector_idx = self.vectors.len();
267 self.vectors.push(vector.clone());
268
269 let node = GraphNode {
270 id: node_id,
271 external_id: vector.id.clone(),
272 domain: vector.domain,
273 vector_idx: Some(vector_idx),
274 weight: 1.0,
275 attributes: HashMap::new(),
276 };
277
278 self.nodes.insert(node_id, node);
279 self.domain_nodes.entry(vector.domain).or_default().push(node_id);
280
281 self.connect_similar_vectors(node_id);
283
284 node_id
285 }
286
287 fn connect_similar_vectors(&mut self, node_id: u32) {
290 let node = match self.nodes.get(&node_id) {
291 Some(n) => n.clone(),
292 None => return,
293 };
294
295 let vector_idx = match node.vector_idx {
296 Some(idx) => idx,
297 None => return,
298 };
299
300 let source_vec = &self.vectors[vector_idx].embedding;
301
302 for (other_id, other_node) in &self.nodes {
304 if *other_id == node_id {
305 continue;
306 }
307
308 if let Some(other_idx) = other_node.vector_idx {
309 let other_vec = &self.vectors[other_idx].embedding;
310 let similarity = cosine_similarity(source_vec, other_vec);
311
312 if similarity >= self.config.similarity_threshold as f32 {
313 let edge_type = if node.domain != other_node.domain {
315 EdgeType::CrossDomain
316 } else {
317 EdgeType::Similarity
318 };
319
320 self.edges.push(GraphEdge {
321 source: node_id,
322 target: *other_id,
323 weight: similarity as f64,
324 edge_type,
325 timestamp: Utc::now(),
326 });
327 }
328 }
329 }
330 }
331
332 pub fn add_correlation_edge(&mut self, source: u32, target: u32, correlation: f64) {
334 if correlation.abs() >= self.config.min_edge_weight {
335 self.edges.push(GraphEdge {
336 source,
337 target,
338 weight: correlation.abs(),
339 edge_type: EdgeType::Correlation,
340 timestamp: Utc::now(),
341 });
342 }
343 }
344
345 pub fn compute_coherence(&self) -> CoherenceSnapshot {
350 if self.nodes.is_empty() || self.edges.is_empty() {
351 return CoherenceSnapshot {
352 mincut_value: 0.0,
353 node_count: self.nodes.len(),
354 edge_count: self.edges.len(),
355 partition_sizes: (0, 0),
356 boundary_nodes: vec![],
357 avg_edge_weight: 0.0,
358 };
359 }
360
361 let mincut_result = self.stoer_wagner_mincut();
364
365 let avg_edge_weight = if self.edges.is_empty() {
366 0.0
367 } else {
368 self.edges.iter().map(|e| e.weight).sum::<f64>() / self.edges.len() as f64
369 };
370
371 CoherenceSnapshot {
372 mincut_value: mincut_result.0,
373 node_count: self.nodes.len(),
374 edge_count: self.edges.len(),
375 partition_sizes: mincut_result.1,
376 boundary_nodes: mincut_result.2,
377 avg_edge_weight,
378 }
379 }
380
381 fn stoer_wagner_mincut(&self) -> (f64, (usize, usize), Vec<u32>) {
384 let n = self.nodes.len();
385 if n < 2 {
386 return (0.0, (n, 0), vec![]);
387 }
388
389 let node_ids: Vec<u32> = self.nodes.keys().copied().collect();
391 let id_to_idx: HashMap<u32, usize> = node_ids.iter()
392 .enumerate()
393 .map(|(i, &id)| (id, i))
394 .collect();
395
396 let mut adj = vec![vec![0.0; n]; n];
397 for edge in &self.edges {
398 if let (Some(&i), Some(&j)) = (id_to_idx.get(&edge.source), id_to_idx.get(&edge.target)) {
399 adj[i][j] += edge.weight;
400 adj[j][i] += edge.weight;
401 }
402 }
403
404 let mut best_cut = f64::INFINITY;
406 let mut best_partition = (0, 0);
407 let mut best_boundary = vec![];
408
409 let mut active: Vec<bool> = vec![true; n];
410 let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
411
412 for phase in 0..(n - 1) {
413 let mut in_a = vec![false; n];
415 let mut key = vec![0.0; n];
416
417 let start = (0..n).find(|&i| active[i]).unwrap();
419 in_a[start] = true;
420
421 for j in 0..n {
423 if active[j] && !in_a[j] {
424 key[j] = adj[start][j];
425 }
426 }
427
428 let mut s = start;
429 let mut t = start;
430
431 for _ in 1..=(n - 1 - phase) {
432 let mut max_key = f64::NEG_INFINITY;
434 let mut max_node = 0;
435
436 for j in 0..n {
437 if active[j] && !in_a[j] && key[j] > max_key {
438 max_key = key[j];
439 max_node = j;
440 }
441 }
442
443 s = t;
444 t = max_node;
445 in_a[t] = true;
446
447 for j in 0..n {
449 if active[j] && !in_a[j] {
450 key[j] += adj[t][j];
451 }
452 }
453 }
454
455 let cut_weight = key[t];
457
458 if cut_weight < best_cut {
459 best_cut = cut_weight;
460
461 let partition_a: Vec<usize> = merged[t].clone();
463 let partition_b: Vec<usize> = (0..n)
464 .filter(|&i| active[i] && i != t)
465 .flat_map(|i| merged[i].iter().copied())
466 .collect();
467
468 best_partition = (partition_a.len(), partition_b.len());
469
470 best_boundary = partition_a.iter()
472 .map(|&i| node_ids[i])
473 .collect();
474 }
475
476 active[t] = false;
478 let to_merge: Vec<usize> = merged[t].clone();
479 merged[s].extend(to_merge);
480
481 for i in 0..n {
482 if active[i] && i != s {
483 adj[s][i] += adj[t][i];
484 adj[i][s] += adj[i][t];
485 }
486 }
487 }
488
489 (best_cut, best_partition, best_boundary)
490 }
491
492 pub fn detect_patterns(&mut self) -> Vec<DiscoveredPattern> {
494 let mut patterns = Vec::new();
495
496 let current = self.compute_coherence();
497 let now = Utc::now();
498
499 if let Some((prev_time, prev_mincut, prev_snapshot)) = self.coherence_history.last() {
501 let mincut_delta = current.mincut_value - prev_mincut;
502 let relative_change = if *prev_mincut > 0.0 {
503 mincut_delta.abs() / prev_mincut
504 } else {
505 mincut_delta.abs()
506 };
507
508 if mincut_delta < -self.config.mincut_sensitivity {
510 patterns.push(DiscoveredPattern {
511 id: format!("coherence_break_{}", now.timestamp()),
512 pattern_type: PatternType::CoherenceBreak,
513 confidence: (relative_change.min(1.0) * 0.5 + 0.5),
514 affected_nodes: current.boundary_nodes.clone(),
515 detected_at: now,
516 description: format!(
517 "Network coherence dropped from {:.3} to {:.3} ({:.1}% decrease)",
518 prev_mincut, current.mincut_value, relative_change * 100.0
519 ),
520 evidence: vec![
521 Evidence {
522 evidence_type: "mincut_delta".to_string(),
523 value: mincut_delta,
524 description: "Change in min-cut value".to_string(),
525 },
526 Evidence {
527 evidence_type: "boundary_size".to_string(),
528 value: current.boundary_nodes.len() as f64,
529 description: "Number of nodes on the cut".to_string(),
530 },
531 ],
532 cross_domain_links: self.find_cross_domain_at_boundary(¤t.boundary_nodes),
533 });
534 }
535
536 if mincut_delta > self.config.mincut_sensitivity {
538 patterns.push(DiscoveredPattern {
539 id: format!("consolidation_{}", now.timestamp()),
540 pattern_type: PatternType::Consolidation,
541 confidence: (relative_change.min(1.0) * 0.5 + 0.5),
542 affected_nodes: current.boundary_nodes.clone(),
543 detected_at: now,
544 description: format!(
545 "Network coherence increased from {:.3} to {:.3} ({:.1}% increase)",
546 prev_mincut, current.mincut_value, relative_change * 100.0
547 ),
548 evidence: vec![
549 Evidence {
550 evidence_type: "mincut_delta".to_string(),
551 value: mincut_delta,
552 description: "Change in min-cut value".to_string(),
553 },
554 ],
555 cross_domain_links: vec![],
556 });
557 }
558
559 let (part_a, part_b) = current.partition_sizes;
561 let imbalance = (part_a as f64 - part_b as f64).abs() / (part_a + part_b) as f64;
562 let (prev_a, prev_b) = prev_snapshot.partition_sizes;
563 let prev_imbalance = if prev_a + prev_b > 0 {
564 (prev_a as f64 - prev_b as f64).abs() / (prev_a + prev_b) as f64
565 } else {
566 0.0
567 };
568
569 if imbalance > prev_imbalance + 0.2 {
570 patterns.push(DiscoveredPattern {
571 id: format!("emerging_cluster_{}", now.timestamp()),
572 pattern_type: PatternType::EmergingCluster,
573 confidence: 0.7,
574 affected_nodes: current.boundary_nodes.clone(),
575 detected_at: now,
576 description: format!(
577 "Partition imbalance increased: {} vs {} nodes (was {} vs {})",
578 part_a, part_b, prev_a, prev_b
579 ),
580 evidence: vec![],
581 cross_domain_links: vec![],
582 });
583 }
584 }
585
586 if self.config.cross_domain {
588 patterns.extend(self.detect_cross_domain_patterns());
589 }
590
591 self.coherence_history.push((now, current.mincut_value, current));
593
594 patterns
595 }
596
597 fn find_cross_domain_at_boundary(&self, boundary: &[u32]) -> Vec<CrossDomainLink> {
599 let mut links = Vec::new();
600
601 for edge in &self.edges {
603 if edge.edge_type == EdgeType::CrossDomain {
604 if boundary.contains(&edge.source) || boundary.contains(&edge.target) {
605 if let (Some(src_node), Some(tgt_node)) =
606 (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
607 {
608 links.push(CrossDomainLink {
609 source_domain: src_node.domain,
610 target_domain: tgt_node.domain,
611 source_nodes: vec![edge.source],
612 target_nodes: vec![edge.target],
613 link_strength: edge.weight,
614 link_type: "boundary_crossing".to_string(),
615 });
616 }
617 }
618 }
619 }
620
621 links
622 }
623
624 fn detect_cross_domain_patterns(&self) -> Vec<DiscoveredPattern> {
626 let mut patterns = Vec::new();
627
628 let mut cross_counts: HashMap<(Domain, Domain), Vec<&GraphEdge>> = HashMap::new();
630
631 for edge in &self.edges {
632 if edge.edge_type == EdgeType::CrossDomain {
633 if let (Some(src), Some(tgt)) =
634 (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
635 {
636 let key = if src.domain < tgt.domain {
637 (src.domain, tgt.domain)
638 } else {
639 (tgt.domain, src.domain)
640 };
641 cross_counts.entry(key).or_default().push(edge);
642 }
643 }
644 }
645
646 for ((domain_a, domain_b), edges) in cross_counts {
648 if edges.len() >= 3 {
649 let avg_strength = edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64;
650
651 if avg_strength > self.config.similarity_threshold as f64 {
652 patterns.push(DiscoveredPattern {
653 id: format!("bridge_{:?}_{:?}_{}", domain_a, domain_b, Utc::now().timestamp()),
654 pattern_type: PatternType::BridgeFormation,
655 confidence: avg_strength,
656 affected_nodes: edges.iter()
657 .flat_map(|e| vec![e.source, e.target])
658 .collect(),
659 detected_at: Utc::now(),
660 description: format!(
661 "Cross-domain bridge detected: {:?} ↔ {:?} ({} connections, avg strength {:.3})",
662 domain_a, domain_b, edges.len(), avg_strength
663 ),
664 evidence: vec![
665 Evidence {
666 evidence_type: "edge_count".to_string(),
667 value: edges.len() as f64,
668 description: "Number of cross-domain connections".to_string(),
669 },
670 ],
671 cross_domain_links: vec![CrossDomainLink {
672 source_domain: domain_a,
673 target_domain: domain_b,
674 source_nodes: edges.iter().map(|e| e.source).collect(),
675 target_nodes: edges.iter().map(|e| e.target).collect(),
676 link_strength: avg_strength,
677 link_type: "semantic_bridge".to_string(),
678 }],
679 });
680 }
681 }
682 }
683
684 patterns
685 }
686
687 pub fn domain_coherence(&self, domain: Domain) -> Option<f64> {
689 let domain_node_ids = self.domain_nodes.get(&domain)?;
690
691 if domain_node_ids.len() < 2 {
692 return None;
693 }
694
695 let mut internal_weight = 0.0;
697 let mut edge_count = 0;
698
699 for edge in &self.edges {
700 if domain_node_ids.contains(&edge.source) && domain_node_ids.contains(&edge.target) {
701 internal_weight += edge.weight;
702 edge_count += 1;
703 }
704 }
705
706 if edge_count == 0 {
707 return Some(0.0);
708 }
709
710 Some(internal_weight / edge_count as f64)
711 }
712
713 pub fn stats(&self) -> EngineStats {
715 let mut domain_counts = HashMap::new();
716 for domain in self.domain_nodes.keys() {
717 domain_counts.insert(*domain, self.domain_nodes[domain].len());
718 }
719
720 let mut cross_domain_edges = 0;
721 for edge in &self.edges {
722 if edge.edge_type == EdgeType::CrossDomain {
723 cross_domain_edges += 1;
724 }
725 }
726
727 EngineStats {
728 total_nodes: self.nodes.len(),
729 total_edges: self.edges.len(),
730 total_vectors: self.vectors.len(),
731 domain_counts,
732 cross_domain_edges,
733 history_length: self.coherence_history.len(),
734 }
735 }
736
737 pub fn get_patterns(&self) -> Vec<DiscoveredPattern> {
739 vec![]
742 }
743
744 pub fn export_graph(&self) -> GraphExport {
746 GraphExport {
747 nodes: self.nodes.values().cloned().collect(),
748 edges: self.edges.clone(),
749 domains: self.domain_nodes.clone(),
750 }
751 }
752
753 pub fn get_coherence_history(&self) -> Vec<CoherenceHistoryEntry> {
755 self.coherence_history.iter()
756 .map(|(timestamp, mincut, snapshot)| {
757 CoherenceHistoryEntry {
758 timestamp: *timestamp,
759 mincut_value: *mincut,
760 snapshot: snapshot.clone(),
761 }
762 })
763 .collect()
764 }
765}
766
767#[derive(Debug, Clone, Serialize, Deserialize)]
769pub struct EngineStats {
770 pub total_nodes: usize,
771 pub total_edges: usize,
772 pub total_vectors: usize,
773 pub domain_counts: HashMap<Domain, usize>,
774 pub cross_domain_edges: usize,
775 pub history_length: usize,
776}
777
778#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct GraphExport {
781 pub nodes: Vec<GraphNode>,
782 pub edges: Vec<GraphEdge>,
783 pub domains: HashMap<Domain, Vec<u32>>,
784}
785
786#[derive(Debug, Clone, Serialize, Deserialize)]
788pub struct CoherenceHistoryEntry {
789 pub timestamp: DateTime<Utc>,
790 pub mincut_value: f64,
791 pub snapshot: CoherenceSnapshot,
792}
793
794fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
796 if a.len() != b.len() || a.is_empty() {
797 return 0.0;
798 }
799
800 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
801 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
802 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
803
804 if norm_a == 0.0 || norm_b == 0.0 {
805 return 0.0;
806 }
807
808 dot / (norm_a * norm_b)
809}
810
811impl PartialOrd for Domain {
813 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
814 Some(self.cmp(other))
815 }
816}
817
818impl Ord for Domain {
819 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
820 (*self as u8).cmp(&(*other as u8))
821 }
822}
823
824#[cfg(test)]
825mod tests {
826 use super::*;
827
828 #[test]
829 fn test_cosine_similarity() {
830 let a = vec![1.0, 0.0, 0.0];
831 let b = vec![1.0, 0.0, 0.0];
832 assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
833
834 let c = vec![0.0, 1.0, 0.0];
835 assert!((cosine_similarity(&a, &c)).abs() < 0.001);
836 }
837
838 #[test]
839 fn test_engine_basic() {
840 let config = NativeEngineConfig::default();
841 let mut engine = NativeDiscoveryEngine::new(config);
842
843 let v1 = SemanticVector {
845 id: "climate_1".to_string(),
846 embedding: vec![1.0, 0.5, 0.2],
847 domain: Domain::Climate,
848 timestamp: Utc::now(),
849 metadata: HashMap::new(),
850 };
851
852 let v2 = SemanticVector {
853 id: "climate_2".to_string(),
854 embedding: vec![0.9, 0.6, 0.3],
855 domain: Domain::Climate,
856 timestamp: Utc::now(),
857 metadata: HashMap::new(),
858 };
859
860 engine.add_vector(v1);
861 engine.add_vector(v2);
862
863 let stats = engine.stats();
864 assert_eq!(stats.total_nodes, 2);
865 assert_eq!(stats.total_vectors, 2);
866 }
867}