ruvector_data_framework/
ruvector_native.rs

1//! RuVector-Native Discovery Engine
2//!
3//! Deep integration with ruvector-core, ruvector-graph, and ruvector-mincut
4//! for production-grade coherence analysis and pattern discovery.
5
6use std::collections::HashMap;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use crate::utils::cosine_similarity;
11
12/// Vector embedding for semantic similarity
13/// Uses RuVector's native vector storage format
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct SemanticVector {
16    /// Vector ID
17    pub id: String,
18    /// Dense embedding (typically 384-1536 dimensions)
19    pub embedding: Vec<f32>,
20    /// Source domain
21    pub domain: Domain,
22    /// Timestamp
23    pub timestamp: DateTime<Utc>,
24    /// Metadata for filtering
25    pub metadata: HashMap<String, String>,
26}
27
28/// Discovery domains
29#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
30pub enum Domain {
31    Climate,
32    Finance,
33    Research,
34    Medical,
35    Economic,
36    Genomics,
37    Physics,
38    Seismic,
39    Ocean,
40    Space,
41    Transportation,
42    Geospatial,
43    Government,
44    CrossDomain,
45}
46
47/// RuVector-native graph node
48/// Designed to work with ruvector-graph's adjacency structures
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct GraphNode {
51    /// Node ID (u32 for ruvector compatibility)
52    pub id: u32,
53    /// String identifier for external reference
54    pub external_id: String,
55    /// Domain
56    pub domain: Domain,
57    /// Associated vector embedding index
58    pub vector_idx: Option<usize>,
59    /// Node weight (for weighted min-cut)
60    pub weight: f64,
61    /// Attributes
62    pub attributes: HashMap<String, f64>,
63}
64
65/// RuVector-native graph edge
66/// Compatible with ruvector-mincut's edge format
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct GraphEdge {
69    /// Source node ID
70    pub source: u32,
71    /// Target node ID
72    pub target: u32,
73    /// Edge weight (capacity for min-cut)
74    pub weight: f64,
75    /// Edge type
76    pub edge_type: EdgeType,
77    /// Timestamp when edge was created/updated
78    pub timestamp: DateTime<Utc>,
79}
80
81/// Types of edges in the discovery graph
82#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
83pub enum EdgeType {
84    /// Correlation-based (e.g., temperature correlation)
85    Correlation,
86    /// Similarity-based (e.g., vector cosine similarity)
87    Similarity,
88    /// Citation/reference link
89    Citation,
90    /// Causal relationship
91    Causal,
92    /// Cross-domain bridge
93    CrossDomain,
94}
95
96/// Configuration for the native discovery engine
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct NativeEngineConfig {
99    /// Minimum edge weight to include
100    pub min_edge_weight: f64,
101    /// Vector similarity threshold
102    pub similarity_threshold: f64,
103    /// Min-cut sensitivity (lower = more sensitive to breaks)
104    pub mincut_sensitivity: f64,
105    /// Enable cross-domain discovery
106    pub cross_domain: bool,
107    /// Window size for temporal analysis (seconds)
108    pub window_seconds: i64,
109    /// HNSW parameters
110    pub hnsw_m: usize,
111    pub hnsw_ef_construction: usize,
112    pub hnsw_ef_search: usize,
113    /// Vector dimension
114    pub dimension: usize,
115    /// Batch size for processing
116    pub batch_size: usize,
117    /// Checkpoint interval (records)
118    pub checkpoint_interval: u64,
119    /// Number of parallel workers
120    pub parallel_workers: usize,
121}
122
123impl Default for NativeEngineConfig {
124    fn default() -> Self {
125        Self {
126            min_edge_weight: 0.3,
127            similarity_threshold: 0.7,
128            mincut_sensitivity: 0.15,
129            cross_domain: true,
130            window_seconds: 86400 * 30, // 30 days
131            hnsw_m: 16,
132            hnsw_ef_construction: 200,
133            hnsw_ef_search: 50,
134            dimension: 384,
135            batch_size: 1000,
136            checkpoint_interval: 10_000,
137            parallel_workers: 4,
138        }
139    }
140}
141
142/// The main RuVector-native discovery engine
143///
144/// This engine uses RuVector's core algorithms:
145/// - Vector similarity via HNSW index
146/// - Graph coherence via Stoer-Wagner min-cut
147/// - Temporal windowing for streaming analysis
148pub struct NativeDiscoveryEngine {
149    config: NativeEngineConfig,
150
151    /// Vector storage (would use ruvector-core in production)
152    vectors: Vec<SemanticVector>,
153
154    /// Graph nodes
155    nodes: HashMap<u32, GraphNode>,
156
157    /// Graph edges (adjacency list format for ruvector-mincut)
158    edges: Vec<GraphEdge>,
159
160    /// Historical coherence values for change detection
161    coherence_history: Vec<(DateTime<Utc>, f64, CoherenceSnapshot)>,
162
163    /// Next node ID
164    next_node_id: u32,
165
166    /// Domain-specific subgraph indices
167    domain_nodes: HashMap<Domain, Vec<u32>>,
168}
169
170/// Snapshot of coherence state for historical comparison
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct CoherenceSnapshot {
173    /// Min-cut value
174    pub mincut_value: f64,
175    /// Number of nodes
176    pub node_count: usize,
177    /// Number of edges
178    pub edge_count: usize,
179    /// Partition sizes after min-cut
180    pub partition_sizes: (usize, usize),
181    /// Boundary nodes (nodes on the cut)
182    pub boundary_nodes: Vec<u32>,
183    /// Average edge weight
184    pub avg_edge_weight: f64,
185}
186
187/// A detected pattern or anomaly
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct DiscoveredPattern {
190    /// Pattern ID
191    pub id: String,
192    /// Pattern type
193    pub pattern_type: PatternType,
194    /// Confidence score (0-1)
195    pub confidence: f64,
196    /// Affected nodes
197    pub affected_nodes: Vec<u32>,
198    /// Timestamp of detection
199    pub detected_at: DateTime<Utc>,
200    /// Description
201    pub description: String,
202    /// Evidence
203    pub evidence: Vec<Evidence>,
204    /// Cross-domain connections if applicable
205    pub cross_domain_links: Vec<CrossDomainLink>,
206}
207
208/// Types of discoverable patterns
209#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
210pub enum PatternType {
211    /// Network coherence break (min-cut dropped)
212    CoherenceBreak,
213    /// Network consolidation (min-cut increased)
214    Consolidation,
215    /// Emerging cluster (new dense subgraph)
216    EmergingCluster,
217    /// Dissolving cluster
218    DissolvingCluster,
219    /// Bridge formation (cross-domain connection)
220    BridgeFormation,
221    /// Anomalous node (outlier in vector space)
222    AnomalousNode,
223    /// Temporal shift (pattern change over time)
224    TemporalShift,
225    /// Cascade (change propagating through network)
226    Cascade,
227}
228
229/// Evidence supporting a pattern detection
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct Evidence {
232    pub evidence_type: String,
233    pub value: f64,
234    pub description: String,
235}
236
237/// Cross-domain link discovered
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct CrossDomainLink {
240    pub source_domain: Domain,
241    pub target_domain: Domain,
242    pub source_nodes: Vec<u32>,
243    pub target_nodes: Vec<u32>,
244    pub link_strength: f64,
245    pub link_type: String,
246}
247
248impl NativeDiscoveryEngine {
249    /// Create a new engine with the given configuration
250    pub fn new(config: NativeEngineConfig) -> Self {
251        Self {
252            config,
253            vectors: Vec::new(),
254            nodes: HashMap::new(),
255            edges: Vec::new(),
256            coherence_history: Vec::new(),
257            next_node_id: 0,
258            domain_nodes: HashMap::new(),
259        }
260    }
261
262    /// Add a vector to the engine
263    /// In production, this would use ruvector-core's vector storage
264    pub fn add_vector(&mut self, vector: SemanticVector) -> u32 {
265        let node_id = self.next_node_id;
266        self.next_node_id += 1;
267
268        let vector_idx = self.vectors.len();
269        self.vectors.push(vector.clone());
270
271        let node = GraphNode {
272            id: node_id,
273            external_id: vector.id.clone(),
274            domain: vector.domain,
275            vector_idx: Some(vector_idx),
276            weight: 1.0,
277            attributes: HashMap::new(),
278        };
279
280        self.nodes.insert(node_id, node);
281        self.domain_nodes.entry(vector.domain).or_default().push(node_id);
282
283        // Auto-connect to similar vectors
284        self.connect_similar_vectors(node_id);
285
286        node_id
287    }
288
289    /// Connect a node to similar vectors using cosine similarity
290    /// In production, this would use ruvector-hnsw for O(log n) search
291    fn connect_similar_vectors(&mut self, node_id: u32) {
292        let node = match self.nodes.get(&node_id) {
293            Some(n) => n.clone(),
294            None => return,
295        };
296
297        let vector_idx = match node.vector_idx {
298            Some(idx) => idx,
299            None => return,
300        };
301
302        let source_vec = &self.vectors[vector_idx].embedding;
303
304        // Find similar vectors (brute force - would use HNSW in production)
305        for (other_id, other_node) in &self.nodes {
306            if *other_id == node_id {
307                continue;
308            }
309
310            if let Some(other_idx) = other_node.vector_idx {
311                let other_vec = &self.vectors[other_idx].embedding;
312                let similarity = cosine_similarity(source_vec, other_vec);
313
314                if similarity >= self.config.similarity_threshold as f32 {
315                    // Determine edge type
316                    let edge_type = if node.domain != other_node.domain {
317                        EdgeType::CrossDomain
318                    } else {
319                        EdgeType::Similarity
320                    };
321
322                    self.edges.push(GraphEdge {
323                        source: node_id,
324                        target: *other_id,
325                        weight: similarity as f64,
326                        edge_type,
327                        timestamp: Utc::now(),
328                    });
329                }
330            }
331        }
332    }
333
334    /// Add a correlation-based edge
335    pub fn add_correlation_edge(&mut self, source: u32, target: u32, correlation: f64) {
336        if correlation.abs() >= self.config.min_edge_weight {
337            self.edges.push(GraphEdge {
338                source,
339                target,
340                weight: correlation.abs(),
341                edge_type: EdgeType::Correlation,
342                timestamp: Utc::now(),
343            });
344        }
345    }
346
347    /// Compute current coherence using Stoer-Wagner min-cut
348    ///
349    /// The min-cut value represents the "weakest link" in the network.
350    /// A drop in min-cut indicates the network is becoming fragmented.
351    pub fn compute_coherence(&self) -> CoherenceSnapshot {
352        if self.nodes.is_empty() || self.edges.is_empty() {
353            return CoherenceSnapshot {
354                mincut_value: 0.0,
355                node_count: self.nodes.len(),
356                edge_count: self.edges.len(),
357                partition_sizes: (0, 0),
358                boundary_nodes: vec![],
359                avg_edge_weight: 0.0,
360            };
361        }
362
363        // Build adjacency matrix for min-cut
364        // In production, this would call ruvector-mincut directly
365        let mincut_result = self.stoer_wagner_mincut();
366
367        let avg_edge_weight = if self.edges.is_empty() {
368            0.0
369        } else {
370            self.edges.iter().map(|e| e.weight).sum::<f64>() / self.edges.len() as f64
371        };
372
373        CoherenceSnapshot {
374            mincut_value: mincut_result.0,
375            node_count: self.nodes.len(),
376            edge_count: self.edges.len(),
377            partition_sizes: mincut_result.1,
378            boundary_nodes: mincut_result.2,
379            avg_edge_weight,
380        }
381    }
382
383    /// Stoer-Wagner minimum cut algorithm
384    /// Returns (min_cut_value, partition_sizes, boundary_nodes)
385    fn stoer_wagner_mincut(&self) -> (f64, (usize, usize), Vec<u32>) {
386        let n = self.nodes.len();
387        if n < 2 {
388            return (0.0, (n, 0), vec![]);
389        }
390
391        // Build adjacency matrix
392        let node_ids: Vec<u32> = self.nodes.keys().copied().collect();
393        let id_to_idx: HashMap<u32, usize> = node_ids.iter()
394            .enumerate()
395            .map(|(i, &id)| (id, i))
396            .collect();
397
398        let mut adj = vec![vec![0.0; n]; n];
399        for edge in &self.edges {
400            if let (Some(&i), Some(&j)) = (id_to_idx.get(&edge.source), id_to_idx.get(&edge.target)) {
401                adj[i][j] += edge.weight;
402                adj[j][i] += edge.weight;
403            }
404        }
405
406        // Stoer-Wagner algorithm
407        let mut best_cut = f64::INFINITY;
408        let mut best_partition = (0, 0);
409        let mut best_boundary = vec![];
410
411        let mut active: Vec<bool> = vec![true; n];
412        let mut merged: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
413
414        for phase in 0..(n - 1) {
415            // Maximum adjacency search
416            let mut in_a = vec![false; n];
417            let mut key = vec![0.0; n];
418
419            // Find first active node
420            let start = (0..n).find(|&i| active[i]).unwrap();
421            in_a[start] = true;
422
423            // Update keys
424            for j in 0..n {
425                if active[j] && !in_a[j] {
426                    key[j] = adj[start][j];
427                }
428            }
429
430            let mut s = start;
431            let mut t = start;
432
433            for _ in 1..=(n - 1 - phase) {
434                // Find max key not in A
435                let mut max_key = f64::NEG_INFINITY;
436                let mut max_node = 0;
437
438                for j in 0..n {
439                    if active[j] && !in_a[j] && key[j] > max_key {
440                        max_key = key[j];
441                        max_node = j;
442                    }
443                }
444
445                s = t;
446                t = max_node;
447                in_a[t] = true;
448
449                // Update keys
450                for j in 0..n {
451                    if active[j] && !in_a[j] {
452                        key[j] += adj[t][j];
453                    }
454                }
455            }
456
457            // Cut of the phase
458            let cut_weight = key[t];
459
460            if cut_weight < best_cut {
461                best_cut = cut_weight;
462
463                // Partition is: merged[t] vs everything else
464                let partition_a: Vec<usize> = merged[t].clone();
465                let partition_b: Vec<usize> = (0..n)
466                    .filter(|&i| active[i] && i != t)
467                    .flat_map(|i| merged[i].iter().copied())
468                    .collect();
469
470                best_partition = (partition_a.len(), partition_b.len());
471
472                // Boundary nodes are those in the smaller partition with edges to other
473                best_boundary = partition_a.iter()
474                    .map(|&i| node_ids[i])
475                    .collect();
476            }
477
478            // Merge s and t
479            active[t] = false;
480            let to_merge: Vec<usize> = merged[t].clone();
481            merged[s].extend(to_merge);
482
483            for i in 0..n {
484                if active[i] && i != s {
485                    adj[s][i] += adj[t][i];
486                    adj[i][s] += adj[i][t];
487                }
488            }
489        }
490
491        (best_cut, best_partition, best_boundary)
492    }
493
494    /// Detect patterns by comparing current state to history
495    pub fn detect_patterns(&mut self) -> Vec<DiscoveredPattern> {
496        let mut patterns = Vec::new();
497
498        let current = self.compute_coherence();
499        let now = Utc::now();
500
501        // Compare to previous state
502        if let Some((prev_time, prev_mincut, prev_snapshot)) = self.coherence_history.last() {
503            let mincut_delta = current.mincut_value - prev_mincut;
504            let relative_change = if *prev_mincut > 0.0 {
505                mincut_delta.abs() / prev_mincut
506            } else {
507                mincut_delta.abs()
508            };
509
510            // Detect coherence break
511            if mincut_delta < -self.config.mincut_sensitivity {
512                patterns.push(DiscoveredPattern {
513                    id: format!("coherence_break_{}", now.timestamp()),
514                    pattern_type: PatternType::CoherenceBreak,
515                    confidence: (relative_change.min(1.0) * 0.5 + 0.5),
516                    affected_nodes: current.boundary_nodes.clone(),
517                    detected_at: now,
518                    description: format!(
519                        "Network coherence dropped from {:.3} to {:.3} ({:.1}% decrease)",
520                        prev_mincut, current.mincut_value, relative_change * 100.0
521                    ),
522                    evidence: vec![
523                        Evidence {
524                            evidence_type: "mincut_delta".to_string(),
525                            value: mincut_delta,
526                            description: "Change in min-cut value".to_string(),
527                        },
528                        Evidence {
529                            evidence_type: "boundary_size".to_string(),
530                            value: current.boundary_nodes.len() as f64,
531                            description: "Number of nodes on the cut".to_string(),
532                        },
533                    ],
534                    cross_domain_links: self.find_cross_domain_at_boundary(&current.boundary_nodes),
535                });
536            }
537
538            // Detect consolidation
539            if mincut_delta > self.config.mincut_sensitivity {
540                patterns.push(DiscoveredPattern {
541                    id: format!("consolidation_{}", now.timestamp()),
542                    pattern_type: PatternType::Consolidation,
543                    confidence: (relative_change.min(1.0) * 0.5 + 0.5),
544                    affected_nodes: current.boundary_nodes.clone(),
545                    detected_at: now,
546                    description: format!(
547                        "Network coherence increased from {:.3} to {:.3} ({:.1}% increase)",
548                        prev_mincut, current.mincut_value, relative_change * 100.0
549                    ),
550                    evidence: vec![
551                        Evidence {
552                            evidence_type: "mincut_delta".to_string(),
553                            value: mincut_delta,
554                            description: "Change in min-cut value".to_string(),
555                        },
556                    ],
557                    cross_domain_links: vec![],
558                });
559            }
560
561            // Detect partition imbalance (emerging cluster)
562            let (part_a, part_b) = current.partition_sizes;
563            let imbalance = (part_a as f64 - part_b as f64).abs() / (part_a + part_b) as f64;
564            let (prev_a, prev_b) = prev_snapshot.partition_sizes;
565            let prev_imbalance = if prev_a + prev_b > 0 {
566                (prev_a as f64 - prev_b as f64).abs() / (prev_a + prev_b) as f64
567            } else {
568                0.0
569            };
570
571            if imbalance > prev_imbalance + 0.2 {
572                patterns.push(DiscoveredPattern {
573                    id: format!("emerging_cluster_{}", now.timestamp()),
574                    pattern_type: PatternType::EmergingCluster,
575                    confidence: 0.7,
576                    affected_nodes: current.boundary_nodes.clone(),
577                    detected_at: now,
578                    description: format!(
579                        "Partition imbalance increased: {} vs {} nodes (was {} vs {})",
580                        part_a, part_b, prev_a, prev_b
581                    ),
582                    evidence: vec![],
583                    cross_domain_links: vec![],
584                });
585            }
586        }
587
588        // Cross-domain pattern detection
589        if self.config.cross_domain {
590            patterns.extend(self.detect_cross_domain_patterns());
591        }
592
593        // Store current state in history
594        self.coherence_history.push((now, current.mincut_value, current));
595
596        patterns
597    }
598
599    /// Find cross-domain links at boundary nodes
600    fn find_cross_domain_at_boundary(&self, boundary: &[u32]) -> Vec<CrossDomainLink> {
601        let mut links = Vec::new();
602
603        // Find cross-domain edges involving boundary nodes
604        for edge in &self.edges {
605            if edge.edge_type == EdgeType::CrossDomain {
606                if boundary.contains(&edge.source) || boundary.contains(&edge.target) {
607                    if let (Some(src_node), Some(tgt_node)) =
608                        (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
609                    {
610                        links.push(CrossDomainLink {
611                            source_domain: src_node.domain,
612                            target_domain: tgt_node.domain,
613                            source_nodes: vec![edge.source],
614                            target_nodes: vec![edge.target],
615                            link_strength: edge.weight,
616                            link_type: "boundary_crossing".to_string(),
617                        });
618                    }
619                }
620            }
621        }
622
623        links
624    }
625
626    /// Detect patterns that span multiple domains
627    fn detect_cross_domain_patterns(&self) -> Vec<DiscoveredPattern> {
628        let mut patterns = Vec::new();
629
630        // Count cross-domain edges by domain pair
631        let mut cross_counts: HashMap<(Domain, Domain), Vec<&GraphEdge>> = HashMap::new();
632
633        for edge in &self.edges {
634            if edge.edge_type == EdgeType::CrossDomain {
635                if let (Some(src), Some(tgt)) =
636                    (self.nodes.get(&edge.source), self.nodes.get(&edge.target))
637                {
638                    let key = if src.domain < tgt.domain {
639                        (src.domain, tgt.domain)
640                    } else {
641                        (tgt.domain, src.domain)
642                    };
643                    cross_counts.entry(key).or_default().push(edge);
644                }
645            }
646        }
647
648        // Report significant cross-domain bridges
649        for ((domain_a, domain_b), edges) in cross_counts {
650            if edges.len() >= 3 {
651                let avg_strength = edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64;
652
653                if avg_strength > self.config.similarity_threshold as f64 {
654                    patterns.push(DiscoveredPattern {
655                        id: format!("bridge_{:?}_{:?}_{}", domain_a, domain_b, Utc::now().timestamp()),
656                        pattern_type: PatternType::BridgeFormation,
657                        confidence: avg_strength,
658                        affected_nodes: edges.iter()
659                            .flat_map(|e| vec![e.source, e.target])
660                            .collect(),
661                        detected_at: Utc::now(),
662                        description: format!(
663                            "Cross-domain bridge detected: {:?} ↔ {:?} ({} connections, avg strength {:.3})",
664                            domain_a, domain_b, edges.len(), avg_strength
665                        ),
666                        evidence: vec![
667                            Evidence {
668                                evidence_type: "edge_count".to_string(),
669                                value: edges.len() as f64,
670                                description: "Number of cross-domain connections".to_string(),
671                            },
672                        ],
673                        cross_domain_links: vec![CrossDomainLink {
674                            source_domain: domain_a,
675                            target_domain: domain_b,
676                            source_nodes: edges.iter().map(|e| e.source).collect(),
677                            target_nodes: edges.iter().map(|e| e.target).collect(),
678                            link_strength: avg_strength,
679                            link_type: "semantic_bridge".to_string(),
680                        }],
681                    });
682                }
683            }
684        }
685
686        patterns
687    }
688
689    /// Get domain-specific coherence
690    pub fn domain_coherence(&self, domain: Domain) -> Option<f64> {
691        let domain_node_ids = self.domain_nodes.get(&domain)?;
692
693        if domain_node_ids.len() < 2 {
694            return None;
695        }
696
697        // Count edges within domain
698        let mut internal_weight = 0.0;
699        let mut edge_count = 0;
700
701        for edge in &self.edges {
702            if domain_node_ids.contains(&edge.source) && domain_node_ids.contains(&edge.target) {
703                internal_weight += edge.weight;
704                edge_count += 1;
705            }
706        }
707
708        if edge_count == 0 {
709            return Some(0.0);
710        }
711
712        Some(internal_weight / edge_count as f64)
713    }
714
715    /// Get statistics about the current state
716    pub fn stats(&self) -> EngineStats {
717        let mut domain_counts = HashMap::new();
718        for domain in self.domain_nodes.keys() {
719            domain_counts.insert(*domain, self.domain_nodes[domain].len());
720        }
721
722        let mut cross_domain_edges = 0;
723        for edge in &self.edges {
724            if edge.edge_type == EdgeType::CrossDomain {
725                cross_domain_edges += 1;
726            }
727        }
728
729        EngineStats {
730            total_nodes: self.nodes.len(),
731            total_edges: self.edges.len(),
732            total_vectors: self.vectors.len(),
733            domain_counts,
734            cross_domain_edges,
735            history_length: self.coherence_history.len(),
736        }
737    }
738
739    /// Get all detected patterns from the latest detection run
740    pub fn get_patterns(&self) -> Vec<DiscoveredPattern> {
741        // For now, return an empty vec. In production, this would store
742        // patterns from the last detect_patterns() call
743        vec![]
744    }
745
746    /// Export the current graph structure
747    pub fn export_graph(&self) -> GraphExport {
748        GraphExport {
749            nodes: self.nodes.values().cloned().collect(),
750            edges: self.edges.clone(),
751            domains: self.domain_nodes.clone(),
752        }
753    }
754
755    /// Get the coherence history
756    pub fn get_coherence_history(&self) -> Vec<CoherenceHistoryEntry> {
757        self.coherence_history.iter()
758            .map(|(timestamp, mincut, snapshot)| {
759                CoherenceHistoryEntry {
760                    timestamp: *timestamp,
761                    mincut_value: *mincut,
762                    snapshot: snapshot.clone(),
763                }
764            })
765            .collect()
766    }
767}
768
769/// Engine statistics
770#[derive(Debug, Clone, Serialize, Deserialize)]
771pub struct EngineStats {
772    pub total_nodes: usize,
773    pub total_edges: usize,
774    pub total_vectors: usize,
775    pub domain_counts: HashMap<Domain, usize>,
776    pub cross_domain_edges: usize,
777    pub history_length: usize,
778}
779
780/// Exported graph structure
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct GraphExport {
783    pub nodes: Vec<GraphNode>,
784    pub edges: Vec<GraphEdge>,
785    pub domains: HashMap<Domain, Vec<u32>>,
786}
787
788/// Coherence history entry
789#[derive(Debug, Clone, Serialize, Deserialize)]
790pub struct CoherenceHistoryEntry {
791    pub timestamp: DateTime<Utc>,
792    pub mincut_value: f64,
793    pub snapshot: CoherenceSnapshot,
794}
795
796// Note: cosine_similarity is imported from crate::utils
797
798// Implement ordering for Domain to use in HashMap keys
799impl PartialOrd for Domain {
800    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
801        Some(self.cmp(other))
802    }
803}
804
805impl Ord for Domain {
806    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
807        (*self as u8).cmp(&(*other as u8))
808    }
809}
810
811#[cfg(test)]
812mod tests {
813    use super::*;
814
815    #[test]
816    fn test_cosine_similarity() {
817        let a = vec![1.0, 0.0, 0.0];
818        let b = vec![1.0, 0.0, 0.0];
819        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
820
821        let c = vec![0.0, 1.0, 0.0];
822        assert!((cosine_similarity(&a, &c)).abs() < 0.001);
823    }
824
825    #[test]
826    fn test_engine_basic() {
827        let config = NativeEngineConfig::default();
828        let mut engine = NativeDiscoveryEngine::new(config);
829
830        // Add some vectors
831        let v1 = SemanticVector {
832            id: "climate_1".to_string(),
833            embedding: vec![1.0, 0.5, 0.2],
834            domain: Domain::Climate,
835            timestamp: Utc::now(),
836            metadata: HashMap::new(),
837        };
838
839        let v2 = SemanticVector {
840            id: "climate_2".to_string(),
841            embedding: vec![0.9, 0.6, 0.3],
842            domain: Domain::Climate,
843            timestamp: Utc::now(),
844            metadata: HashMap::new(),
845        };
846
847        engine.add_vector(v1);
848        engine.add_vector(v2);
849
850        let stats = engine.stats();
851        assert_eq!(stats.total_nodes, 2);
852        assert_eq!(stats.total_vectors, 2);
853    }
854}