ruvector_data_framework/
ruvector_native.rs

1//! RuVector-Native Discovery Engine
2//!
3//! Deep integration with ruvector-core, ruvector-graph, and ruvector-mincut
4//! for production-grade coherence analysis and pattern discovery.
5
6use std::collections::HashMap;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10/// Vector embedding for semantic similarity
11/// Uses RuVector's native vector storage format
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct SemanticVector {
14    /// Vector ID
15    pub id: String,
16    /// Dense embedding (typically 384-1536 dimensions)
17    pub embedding: Vec<f32>,
18    /// Source domain
19    pub domain: Domain,
20    /// Timestamp
21    pub timestamp: DateTime<Utc>,
22    /// Metadata for filtering
23    pub metadata: HashMap<String, String>,
24}
25
26/// Discovery domains
27#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
28pub enum Domain {
29    Climate,
30    Finance,
31    Research,
32    Medical,
33    Economic,
34    Genomics,
35    Physics,
36    Seismic,
37    Ocean,
38    Space,
39    Transportation,
40    Geospatial,
41    Government,
42    CrossDomain,
43}
44
45/// RuVector-native graph node
46/// Designed to work with ruvector-graph's adjacency structures
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct GraphNode {
49    /// Node ID (u32 for ruvector compatibility)
50    pub id: u32,
51    /// String identifier for external reference
52    pub external_id: String,
53    /// Domain
54    pub domain: Domain,
55    /// Associated vector embedding index
56    pub vector_idx: Option<usize>,
57    /// Node weight (for weighted min-cut)
58    pub weight: f64,
59    /// Attributes
60    pub attributes: HashMap<String, f64>,
61}
62
63/// RuVector-native graph edge
64/// Compatible with ruvector-mincut's edge format
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct GraphEdge {
67    /// Source node ID
68    pub source: u32,
69    /// Target node ID
70    pub target: u32,
71    /// Edge weight (capacity for min-cut)
72    pub weight: f64,
73    /// Edge type
74    pub edge_type: EdgeType,
75    /// Timestamp when edge was created/updated
76    pub timestamp: DateTime<Utc>,
77}
78
79/// Types of edges in the discovery graph
80#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
81pub enum EdgeType {
82    /// Correlation-based (e.g., temperature correlation)
83    Correlation,
84    /// Similarity-based (e.g., vector cosine similarity)
85    Similarity,
86    /// Citation/reference link
87    Citation,
88    /// Causal relationship
89    Causal,
90    /// Cross-domain bridge
91    CrossDomain,
92}
93
94/// Configuration for the native discovery engine
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct NativeEngineConfig {
97    /// Minimum edge weight to include
98    pub min_edge_weight: f64,
99    /// Vector similarity threshold
100    pub similarity_threshold: f64,
101    /// Min-cut sensitivity (lower = more sensitive to breaks)
102    pub mincut_sensitivity: f64,
103    /// Enable cross-domain discovery
104    pub cross_domain: bool,
105    /// Window size for temporal analysis (seconds)
106    pub window_seconds: i64,
107    /// HNSW parameters
108    pub hnsw_m: usize,
109    pub hnsw_ef_construction: usize,
110    pub hnsw_ef_search: usize,
111    /// Vector dimension
112    pub dimension: usize,
113    /// Batch size for processing
114    pub batch_size: usize,
115    /// Checkpoint interval (records)
116    pub checkpoint_interval: u64,
117    /// Number of parallel workers
118    pub parallel_workers: usize,
119}
120
121impl Default for NativeEngineConfig {
122    fn default() -> Self {
123        Self {
124            min_edge_weight: 0.3,
125            similarity_threshold: 0.7,
126            mincut_sensitivity: 0.15,
127            cross_domain: true,
128            window_seconds: 86400 * 30, // 30 days
129            hnsw_m: 16,
130            hnsw_ef_construction: 200,
131            hnsw_ef_search: 50,
132            dimension: 384,
133            batch_size: 1000,
134            checkpoint_interval: 10_000,
135            parallel_workers: 4,
136        }
137    }
138}
139
140/// The main RuVector-native discovery engine
141///
142/// This engine uses RuVector's core algorithms:
143/// - Vector similarity via HNSW index
144/// - Graph coherence via Stoer-Wagner min-cut
145/// - Temporal windowing for streaming analysis
146pub struct NativeDiscoveryEngine {
147    config: NativeEngineConfig,
148
149    /// Vector storage (would use ruvector-core in production)
150    vectors: Vec<SemanticVector>,
151
152    /// Graph nodes
153    nodes: HashMap<u32, GraphNode>,
154
155    /// Graph edges (adjacency list format for ruvector-mincut)
156    edges: Vec<GraphEdge>,
157
158    /// Historical coherence values for change detection
159    coherence_history: Vec<(DateTime<Utc>, f64, CoherenceSnapshot)>,
160
161    /// Next node ID
162    next_node_id: u32,
163
164    /// Domain-specific subgraph indices
165    domain_nodes: HashMap<Domain, Vec<u32>>,
166}
167
168/// Snapshot of coherence state for historical comparison
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct CoherenceSnapshot {
171    /// Min-cut value
172    pub mincut_value: f64,
173    /// Number of nodes
174    pub node_count: usize,
175    /// Number of edges
176    pub edge_count: usize,
177    /// Partition sizes after min-cut
178    pub partition_sizes: (usize, usize),
179    /// Boundary nodes (nodes on the cut)
180    pub boundary_nodes: Vec<u32>,
181    /// Average edge weight
182    pub avg_edge_weight: f64,
183}
184
185/// A detected pattern or anomaly
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct DiscoveredPattern {
188    /// Pattern ID
189    pub id: String,
190    /// Pattern type
191    pub pattern_type: PatternType,
192    /// Confidence score (0-1)
193    pub confidence: f64,
194    /// Affected nodes
195    pub affected_nodes: Vec<u32>,
196    /// Timestamp of detection
197    pub detected_at: DateTime<Utc>,
198    /// Description
199    pub description: String,
200    /// Evidence
201    pub evidence: Vec<Evidence>,
202    /// Cross-domain connections if applicable
203    pub cross_domain_links: Vec<CrossDomainLink>,
204}
205
206/// Types of discoverable patterns
207#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
208pub enum PatternType {
209    /// Network coherence break (min-cut dropped)
210    CoherenceBreak,
211    /// Network consolidation (min-cut increased)
212    Consolidation,
213    /// Emerging cluster (new dense subgraph)
214    EmergingCluster,
215    /// Dissolving cluster
216    DissolvingCluster,
217    /// Bridge formation (cross-domain connection)
218    BridgeFormation,
219    /// Anomalous node (outlier in vector space)
220    AnomalousNode,
221    /// Temporal shift (pattern change over time)
222    TemporalShift,
223    /// Cascade (change propagating through network)
224    Cascade,
225}
226
227/// Evidence supporting a pattern detection
228#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct Evidence {
230    pub evidence_type: String,
231    pub value: f64,
232    pub description: String,
233}
234
235/// Cross-domain link discovered
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct CrossDomainLink {
238    pub source_domain: Domain,
239    pub target_domain: Domain,
240    pub source_nodes: Vec<u32>,
241    pub target_nodes: Vec<u32>,
242    pub link_strength: f64,
243    pub link_type: String,
244}
245
246impl NativeDiscoveryEngine {
247    /// Create a new engine with the given configuration
248    pub fn new(config: NativeEngineConfig) -> Self {
249        Self {
250            config,
251            vectors: Vec::new(),
252            nodes: HashMap::new(),
253            edges: Vec::new(),
254            coherence_history: Vec::new(),
255            next_node_id: 0,
256            domain_nodes: HashMap::new(),
257        }
258    }
259
260    /// Add a vector to the engine
261    /// In production, this would use ruvector-core's vector storage
262    pub fn add_vector(&mut self, vector: SemanticVector) -> u32 {
263        let node_id = self.next_node_id;
264        self.next_node_id += 1;
265
266        let vector_idx = self.vectors.len();
267        self.vectors.push(vector.clone());
268
269        let node = GraphNode {
270            id: node_id,
271            external_id: vector.id.clone(),
272            domain: vector.domain,
273            vector_idx: Some(vector_idx),
274            weight: 1.0,
275            attributes: HashMap::new(),
276        };
277
278        self.nodes.insert(node_id, node);
279        self.domain_nodes.entry(vector.domain).or_default().push(node_id);
280
281        // Auto-connect to similar vectors
282        self.connect_similar_vectors(node_id);
283
284        node_id
285    }
286
287    /// Connect a node to similar vectors using cosine similarity
288    /// In production, this would use ruvector-hnsw for O(log n) search
289    fn connect_similar_vectors(&mut self, node_id: u32) {
290        let node = match self.nodes.get(&node_id) {
291            Some(n) => n.clone(),
292            None => return,
293        };
294
295        let vector_idx = match node.vector_idx {
296            Some(idx) => idx,
297            None => return,
298        };
299
300        let source_vec = &self.vectors[vector_idx].embedding;
301
302        // Find similar vectors (brute force - would use HNSW in production)
303        for (other_id, other_node) in &self.nodes {
304            if *other_id == node_id {
305                continue;
306            }
307
308            if let Some(other_idx) = other_node.vector_idx {
309                let other_vec = &self.vectors[other_idx].embedding;
310                let similarity = cosine_similarity(source_vec, other_vec);
311
312                if similarity >= self.config.similarity_threshold as f32 {
313                    // Determine edge type
314                    let edge_type = if node.domain != other_node.domain {
315                        EdgeType::CrossDomain
316                    } else {
317                        EdgeType::Similarity
318                    };
319
320                    self.edges.push(GraphEdge {
321                        source: node_id,
322                        target: *other_id,
323                        weight: similarity as f64,
324                        edge_type,
325                        timestamp: Utc::now(),
326                    });
327                }
328            }
329        }
330    }
331
332    /// Add a correlation-based edge
333    pub fn add_correlation_edge(&mut self, source: u32, target: u32, correlation: f64) {
334        if correlation.abs() >= self.config.min_edge_weight {
335            self.edges.push(GraphEdge {
336                source,
337                target,
338                weight: correlation.abs(),
339                edge_type: EdgeType::Correlation,
340                timestamp: Utc::now(),
341            });
342        }
343    }
344
345    /// Compute current coherence using Stoer-Wagner min-cut
346    ///
347    /// The min-cut value represents the "weakest link" in the network.
348    /// A drop in min-cut indicates the network is becoming fragmented.
349    pub fn compute_coherence(&self) -> CoherenceSnapshot {
350        if self.nodes.is_empty() || self.edges.is_empty() {
351            return CoherenceSnapshot {
352                mincut_value: 0.0,
353                node_count: self.nodes.len(),
354                edge_count: self.edges.len(),
355                partition_sizes: (0, 0),
356                boundary_nodes: vec![],
357                avg_edge_weight: 0.0,
358            };
359        }
360
361        // Build adjacency matrix for min-cut
362        // In production, this would call ruvector-mincut directly
363        let mincut_result = self.stoer_wagner_mincut();
364
365        let avg_edge_weight = if self.edges.is_empty() {
366            0.0
367        } else {
368            self.edges.iter().map(|e| e.weight).sum::<f64>() / self.edges.len() as f64
369        };
370
371        CoherenceSnapshot {
372            mincut_value: mincut_result.0,
373            node_count: self.nodes.len(),
374            edge_count: self.edges.len(),
375            partition_sizes: mincut_result.1,
376            boundary_nodes: mincut_result.2,
377            avg_edge_weight,
378        }
379    }
380
381    /// Stoer-Wagner minimum cut algorithm
382    /// Returns (min_cut_value, partition_sizes, boundary_nodes)
383    fn stoer_wagner_mincut(&self) -> (f64, (usize, usize), Vec<u32>) {
384        let n = self.nodes.len();
385        if n < 2 {
386            return (0.0, (n, 0), vec![]);
387        }
388
389        // Build adjacency matrix
390        let node_ids: Vec<u32> = self.nodes.keys().copied().collect();
391        let id_to_idx: HashMap<u32, usize> = node_ids.iter()
392            .enumerate()
393            .map(|(i, &id)| (id, i))
394            .collect();
395
396        let mut adj = vec![vec![0.0; n]; n];
397        for edge in &self.edges {
398            if let (Some(&i), Some(&j)) = (id_to_idx.get(&edge.source), id_to_idx.get(&edge.target)) {
399                adj[i][j] += edge.weight;
400                adj[j][i] += edge.weight;
401            }
402        }
403
404        // Stoer-Wagner algorithm
405        let mut best_cut = f64::INFINITY;
406        let mut best_partition = (0, 0);
407        let mut best_boundary = vec![];
408
409        let mut active: Vec<bool> = vec![true; n];
410        let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
411
412        for phase in 0..(n - 1) {
413            // Maximum adjacency search
414            let mut in_a = vec![false; n];
415            let mut key = vec![0.0; n];
416
417            // Find first active node
418            let start = (0..n).find(|&i| active[i]).unwrap();
419            in_a[start] = true;
420
421            // Update keys
422            for j in 0..n {
423                if active[j] && !in_a[j] {
424                    key[j] = adj[start][j];
425                }
426            }
427
428            let mut s = start;
429            let mut t = start;
430
431            for _ in 1..=(n - 1 - phase) {
432                // Find max key not in A
433                let mut max_key = f64::NEG_INFINITY;
434                let mut max_node = 0;
435
436                for j in 0..n {
437                    if active[j] && !in_a[j] && key[j] > max_key {
438                        max_key = key[j];
439                        max_node = j;
440                    }
441                }
442
443                s = t;
444                t = max_node;
445                in_a[t] = true;
446
447                // Update keys
448                for j in 0..n {
449                    if active[j] && !in_a[j] {
450                        key[j] += adj[t][j];
451                    }
452                }
453            }
454
455            // Cut of the phase
456            let cut_weight = key[t];
457
458            if cut_weight < best_cut {
459                best_cut = cut_weight;
460
461                // Partition is: merged[t] vs everything else
462                let partition_a: Vec<usize> = merged[t].clone();
463                let partition_b: Vec<usize> = (0..n)
464                    .filter(|&i| active[i] && i != t)
465                    .flat_map(|i| merged[i].iter().copied())
466                    .collect();
467
468                best_partition = (partition_a.len(), partition_b.len());
469
470                // Boundary nodes are those in the smaller partition with edges to other
471                best_boundary = partition_a.iter()
472                    .map(|&i| node_ids[i])
473                    .collect();
474            }
475
476            // Merge s and t
477            active[t] = false;
478            let to_merge: Vec<usize> = merged[t].clone();
479            merged[s].extend(to_merge);
480
481            for i in 0..n {
482                if active[i] && i != s {
483                    adj[s][i] += adj[t][i];
484                    adj[i][s] += adj[i][t];
485                }
486            }
487        }
488
489        (best_cut, best_partition, best_boundary)
490    }
491
492    /// Detect patterns by comparing current state to history
493    pub fn detect_patterns(&mut self) -> Vec<DiscoveredPattern> {
494        let mut patterns = Vec::new();
495
496        let current = self.compute_coherence();
497        let now = Utc::now();
498
499        // Compare to previous state
500        if let Some((prev_time, prev_mincut, prev_snapshot)) = self.coherence_history.last() {
501            let mincut_delta = current.mincut_value - prev_mincut;
502            let relative_change = if *prev_mincut > 0.0 {
503                mincut_delta.abs() / prev_mincut
504            } else {
505                mincut_delta.abs()
506            };
507
508            // Detect coherence break
509            if mincut_delta < -self.config.mincut_sensitivity {
510                patterns.push(DiscoveredPattern {
511                    id: format!("coherence_break_{}", now.timestamp()),
512                    pattern_type: PatternType::CoherenceBreak,
513                    confidence: (relative_change.min(1.0) * 0.5 + 0.5),
514                    affected_nodes: current.boundary_nodes.clone(),
515                    detected_at: now,
516                    description: format!(
517                        "Network coherence dropped from {:.3} to {:.3} ({:.1}% decrease)",
518                        prev_mincut, current.mincut_value, relative_change * 100.0
519                    ),
520                    evidence: vec![
521                        Evidence {
522                            evidence_type: "mincut_delta".to_string(),
523                            value: mincut_delta,
524                            description: "Change in min-cut value".to_string(),
525                        },
526                        Evidence {
527                            evidence_type: "boundary_size".to_string(),
528                            value: current.boundary_nodes.len() as f64,
529                            description: "Number of nodes on the cut".to_string(),
530                        },
531                    ],
532                    cross_domain_links: self.find_cross_domain_at_boundary(&current.boundary_nodes),
533                });
534            }
535
536            // Detect consolidation
537            if mincut_delta > self.config.mincut_sensitivity {
538                patterns.push(DiscoveredPattern {
539                    id: format!("consolidation_{}", now.timestamp()),
540                    pattern_type: PatternType::Consolidation,
541                    confidence: (relative_change.min(1.0) * 0.5 + 0.5),
542                    affected_nodes: current.boundary_nodes.clone(),
543                    detected_at: now,
544                    description: format!(
545                        "Network coherence increased from {:.3} to {:.3} ({:.1}% increase)",
546                        prev_mincut, current.mincut_value, relative_change * 100.0
547                    ),
548                    evidence: vec![
549                        Evidence {
550                            evidence_type: "mincut_delta".to_string(),
551                            value: mincut_delta,
552                            description: "Change in min-cut value".to_string(),
553                        },
554                    ],
555                    cross_domain_links: vec![],
556                });
557            }
558
559            // Detect partition imbalance (emerging cluster)
560            let (part_a, part_b) = current.partition_sizes;
561            let imbalance = (part_a as f64 - part_b as f64).abs() / (part_a + part_b) as f64;
562            let (prev_a, prev_b) = prev_snapshot.partition_sizes;
563            let prev_imbalance = if prev_a + prev_b > 0 {
564                (prev_a as f64 - prev_b as f64).abs() / (prev_a + prev_b) as f64
565            } else {
566                0.0
567            };
568
569            if imbalance > prev_imbalance + 0.2 {
570                patterns.push(DiscoveredPattern {
571                    id: format!("emerging_cluster_{}", now.timestamp()),
572                    pattern_type: PatternType::EmergingCluster,
573                    confidence: 0.7,
574                    affected_nodes: current.boundary_nodes.clone(),
575                    detected_at: now,
576                    description: format!(
577                        "Partition imbalance increased: {} vs {} nodes (was {} vs {})",
578                        part_a, part_b, prev_a, prev_b
579                    ),
580                    evidence: vec![],
581                    cross_domain_links: vec![],
582                });
583            }
584        }
585
586        // Cross-domain pattern detection
587        if self.config.cross_domain {
588            patterns.extend(self.detect_cross_domain_patterns());
589        }
590
591        // Store current state in history
592        self.coherence_history.push((now, current.mincut_value, current));
593
594        patterns
595    }
596
597    /// Find cross-domain links at boundary nodes
598    fn find_cross_domain_at_boundary(&self, boundary: &[u32]) -> Vec<CrossDomainLink> {
599        let mut links = Vec::new();
600
601        // Find cross-domain edges involving boundary nodes
602        for edge in &self.edges {
603            if edge.edge_type == EdgeType::CrossDomain {
604                if boundary.contains(&edge.source) || boundary.contains(&edge.target) {
605                    if let (Some(src_node), Some(tgt_node)) =
606                        (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
607                    {
608                        links.push(CrossDomainLink {
609                            source_domain: src_node.domain,
610                            target_domain: tgt_node.domain,
611                            source_nodes: vec![edge.source],
612                            target_nodes: vec![edge.target],
613                            link_strength: edge.weight,
614                            link_type: "boundary_crossing".to_string(),
615                        });
616                    }
617                }
618            }
619        }
620
621        links
622    }
623
624    /// Detect patterns that span multiple domains
625    fn detect_cross_domain_patterns(&self) -> Vec<DiscoveredPattern> {
626        let mut patterns = Vec::new();
627
628        // Count cross-domain edges by domain pair
629        let mut cross_counts: HashMap<(Domain, Domain), Vec<&GraphEdge>> = HashMap::new();
630
631        for edge in &self.edges {
632            if edge.edge_type == EdgeType::CrossDomain {
633                if let (Some(src), Some(tgt)) =
634                    (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
635                {
636                    let key = if src.domain < tgt.domain {
637                        (src.domain, tgt.domain)
638                    } else {
639                        (tgt.domain, src.domain)
640                    };
641                    cross_counts.entry(key).or_default().push(edge);
642                }
643            }
644        }
645
646        // Report significant cross-domain bridges
647        for ((domain_a, domain_b), edges) in cross_counts {
648            if edges.len() >= 3 {
649                let avg_strength = edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64;
650
651                if avg_strength > self.config.similarity_threshold as f64 {
652                    patterns.push(DiscoveredPattern {
653                        id: format!("bridge_{:?}_{:?}_{}", domain_a, domain_b, Utc::now().timestamp()),
654                        pattern_type: PatternType::BridgeFormation,
655                        confidence: avg_strength,
656                        affected_nodes: edges.iter()
657                            .flat_map(|e| vec![e.source, e.target])
658                            .collect(),
659                        detected_at: Utc::now(),
660                        description: format!(
661                            "Cross-domain bridge detected: {:?} ↔ {:?} ({} connections, avg strength {:.3})",
662                            domain_a, domain_b, edges.len(), avg_strength
663                        ),
664                        evidence: vec![
665                            Evidence {
666                                evidence_type: "edge_count".to_string(),
667                                value: edges.len() as f64,
668                                description: "Number of cross-domain connections".to_string(),
669                            },
670                        ],
671                        cross_domain_links: vec![CrossDomainLink {
672                            source_domain: domain_a,
673                            target_domain: domain_b,
674                            source_nodes: edges.iter().map(|e| e.source).collect(),
675                            target_nodes: edges.iter().map(|e| e.target).collect(),
676                            link_strength: avg_strength,
677                            link_type: "semantic_bridge".to_string(),
678                        }],
679                    });
680                }
681            }
682        }
683
684        patterns
685    }
686
687    /// Get domain-specific coherence
688    pub fn domain_coherence(&self, domain: Domain) -> Option<f64> {
689        let domain_node_ids = self.domain_nodes.get(&domain)?;
690
691        if domain_node_ids.len() < 2 {
692            return None;
693        }
694
695        // Count edges within domain
696        let mut internal_weight = 0.0;
697        let mut edge_count = 0;
698
699        for edge in &self.edges {
700            if domain_node_ids.contains(&edge.source) && domain_node_ids.contains(&edge.target) {
701                internal_weight += edge.weight;
702                edge_count += 1;
703            }
704        }
705
706        if edge_count == 0 {
707            return Some(0.0);
708        }
709
710        Some(internal_weight / edge_count as f64)
711    }
712
713    /// Get statistics about the current state
714    pub fn stats(&self) -> EngineStats {
715        let mut domain_counts = HashMap::new();
716        for domain in self.domain_nodes.keys() {
717            domain_counts.insert(*domain, self.domain_nodes[domain].len());
718        }
719
720        let mut cross_domain_edges = 0;
721        for edge in &self.edges {
722            if edge.edge_type == EdgeType::CrossDomain {
723                cross_domain_edges += 1;
724            }
725        }
726
727        EngineStats {
728            total_nodes: self.nodes.len(),
729            total_edges: self.edges.len(),
730            total_vectors: self.vectors.len(),
731            domain_counts,
732            cross_domain_edges,
733            history_length: self.coherence_history.len(),
734        }
735    }
736
737    /// Get all detected patterns from the latest detection run
738    pub fn get_patterns(&self) -> Vec<DiscoveredPattern> {
739        // For now, return an empty vec. In production, this would store
740        // patterns from the last detect_patterns() call
741        vec![]
742    }
743
744    /// Export the current graph structure
745    pub fn export_graph(&self) -> GraphExport {
746        GraphExport {
747            nodes: self.nodes.values().cloned().collect(),
748            edges: self.edges.clone(),
749            domains: self.domain_nodes.clone(),
750        }
751    }
752
753    /// Get the coherence history
754    pub fn get_coherence_history(&self) -> Vec<CoherenceHistoryEntry> {
755        self.coherence_history.iter()
756            .map(|(timestamp, mincut, snapshot)| {
757                CoherenceHistoryEntry {
758                    timestamp: *timestamp,
759                    mincut_value: *mincut,
760                    snapshot: snapshot.clone(),
761                }
762            })
763            .collect()
764    }
765}
766
767/// Engine statistics
768#[derive(Debug, Clone, Serialize, Deserialize)]
769pub struct EngineStats {
770    pub total_nodes: usize,
771    pub total_edges: usize,
772    pub total_vectors: usize,
773    pub domain_counts: HashMap<Domain, usize>,
774    pub cross_domain_edges: usize,
775    pub history_length: usize,
776}
777
778/// Exported graph structure
779#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct GraphExport {
781    pub nodes: Vec<GraphNode>,
782    pub edges: Vec<GraphEdge>,
783    pub domains: HashMap<Domain, Vec<u32>>,
784}
785
786/// Coherence history entry
787#[derive(Debug, Clone, Serialize, Deserialize)]
788pub struct CoherenceHistoryEntry {
789    pub timestamp: DateTime<Utc>,
790    pub mincut_value: f64,
791    pub snapshot: CoherenceSnapshot,
792}
793
794/// Compute cosine similarity between two vectors
795fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
796    if a.len() != b.len() || a.is_empty() {
797        return 0.0;
798    }
799
800    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
801    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
802    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
803
804    if norm_a == 0.0 || norm_b == 0.0 {
805        return 0.0;
806    }
807
808    dot / (norm_a * norm_b)
809}
810
811// Implement ordering for Domain to use in HashMap keys
812impl PartialOrd for Domain {
813    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
814        Some(self.cmp(other))
815    }
816}
817
818impl Ord for Domain {
819    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
820        (*self as u8).cmp(&(*other as u8))
821    }
822}
823
824#[cfg(test)]
825mod tests {
826    use super::*;
827
828    #[test]
829    fn test_cosine_similarity() {
830        let a = vec![1.0, 0.0, 0.0];
831        let b = vec![1.0, 0.0, 0.0];
832        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
833
834        let c = vec![0.0, 1.0, 0.0];
835        assert!((cosine_similarity(&a, &c)).abs() < 0.001);
836    }
837
838    #[test]
839    fn test_engine_basic() {
840        let config = NativeEngineConfig::default();
841        let mut engine = NativeDiscoveryEngine::new(config);
842
843        // Add some vectors
844        let v1 = SemanticVector {
845            id: "climate_1".to_string(),
846            embedding: vec![1.0, 0.5, 0.2],
847            domain: Domain::Climate,
848            timestamp: Utc::now(),
849            metadata: HashMap::new(),
850        };
851
852        let v2 = SemanticVector {
853            id: "climate_2".to_string(),
854            embedding: vec![0.9, 0.6, 0.3],
855            domain: Domain::Climate,
856            timestamp: Utc::now(),
857            metadata: HashMap::new(),
858        };
859
860        engine.add_vector(v1);
861        engine.add_vector(v2);
862
863        let stats = engine.stats();
864        assert_eq!(stats.total_nodes, 2);
865        assert_eq!(stats.total_vectors, 2);
866    }
867}