Skip to main content

graphify_build/
lib.rs

1//! Graph assembly and deduplication for graphify.
2//!
3//! Takes [`ExtractionResult`]s from multiple files and assembles them into a
4//! single [`KnowledgeGraph`], skipping dangling edges.
5
6use std::collections::HashSet;
7
8use tracing::debug;
9
10use graphify_core::error::Result;
11use graphify_core::graph::KnowledgeGraph;
12use graphify_core::model::ExtractionResult;
13
14/// Build a [`KnowledgeGraph`] from a single extraction result.
15///
16/// All nodes are added first; edges that reference unknown source/target
17/// nodes are silently skipped (dangling-edge protection).
18pub fn build_from_extraction(extraction: &ExtractionResult) -> Result<KnowledgeGraph> {
19    let mut graph = KnowledgeGraph::new();
20
21    // Add all nodes
22    for node in &extraction.nodes {
23        let _ = graph.add_node(node.clone());
24    }
25
26    // Collect known node IDs for dangling-edge check
27    let node_ids: HashSet<&str> = extraction.nodes.iter().map(|n| n.id.as_str()).collect();
28
29    // Add edges, skipping those that reference unknown nodes
30    let mut skipped = 0usize;
31    for edge in &extraction.edges {
32        if node_ids.contains(edge.source.as_str()) && node_ids.contains(edge.target.as_str()) {
33            let _ = graph.add_edge(edge.clone());
34        } else {
35            skipped += 1;
36        }
37    }
38    if skipped > 0 {
39        debug!("skipped {skipped} dangling edge(s)");
40    }
41
42    // Store hyperedges
43    graph.set_hyperedges(extraction.hyperedges.clone());
44
45    Ok(graph)
46}
47
48/// Merge multiple extraction results into one graph.
49///
50/// Later extractions override earlier ones for same node IDs (first-write-wins
51/// via `add_node` which rejects duplicates, so the first occurrence is kept).
52pub fn build(extractions: &[ExtractionResult]) -> Result<KnowledgeGraph> {
53    let mut combined = ExtractionResult::default();
54    for ext in extractions {
55        combined.nodes.extend(ext.nodes.clone());
56        combined.edges.extend(ext.edges.clone());
57        combined.hyperedges.extend(ext.hyperedges.clone());
58    }
59    build_from_extraction(&combined)
60}
61
62// ---------------------------------------------------------------------------
63// Tests
64// ---------------------------------------------------------------------------
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69    use graphify_core::confidence::Confidence;
70    use graphify_core::model::{GraphEdge, GraphNode, Hyperedge, NodeType};
71    use std::collections::HashMap;
72
73    fn make_node(id: &str) -> GraphNode {
74        GraphNode {
75            id: id.into(),
76            label: id.into(),
77            source_file: "test.rs".into(),
78            source_location: None,
79            node_type: NodeType::Class,
80            community: None,
81            extra: HashMap::new(),
82        }
83    }
84
85    fn make_edge(src: &str, tgt: &str) -> GraphEdge {
86        GraphEdge {
87            source: src.into(),
88            target: tgt.into(),
89            relation: "calls".into(),
90            confidence: Confidence::Extracted,
91            confidence_score: 1.0,
92            source_file: "test.rs".into(),
93            source_location: None,
94            weight: 1.0,
95            extra: HashMap::new(),
96        }
97    }
98
99    #[test]
100    fn build_from_empty() {
101        let ext = ExtractionResult::default();
102        let graph = build_from_extraction(&ext).unwrap();
103        assert_eq!(graph.node_count(), 0);
104        assert_eq!(graph.edge_count(), 0);
105    }
106
107    #[test]
108    fn build_with_nodes_and_edges() {
109        let ext = ExtractionResult {
110            nodes: vec![make_node("a"), make_node("b"), make_node("c")],
111            edges: vec![make_edge("a", "b"), make_edge("b", "c")],
112            hyperedges: vec![],
113        };
114        let graph = build_from_extraction(&ext).unwrap();
115        assert_eq!(graph.node_count(), 3);
116        assert_eq!(graph.edge_count(), 2);
117        assert!(graph.get_node("a").is_some());
118        assert!(graph.get_node("b").is_some());
119        assert!(graph.get_node("c").is_some());
120    }
121
122    #[test]
123    fn dangling_edges_skipped() {
124        let ext = ExtractionResult {
125            nodes: vec![make_node("a"), make_node("b")],
126            edges: vec![
127                make_edge("a", "b"),       // valid
128                make_edge("a", "missing"), // dangling
129                make_edge("gone", "b"),    // dangling
130            ],
131            hyperedges: vec![],
132        };
133        let graph = build_from_extraction(&ext).unwrap();
134        assert_eq!(graph.node_count(), 2);
135        assert_eq!(graph.edge_count(), 1); // only a->b
136    }
137
138    #[test]
139    fn build_merges_multiple_extractions() {
140        let ext1 = ExtractionResult {
141            nodes: vec![make_node("a"), make_node("b")],
142            edges: vec![make_edge("a", "b")],
143            hyperedges: vec![],
144        };
145        let ext2 = ExtractionResult {
146            nodes: vec![make_node("c")],
147            edges: vec![make_edge("b", "c")],
148            hyperedges: vec![],
149        };
150        let graph = build(&[ext1, ext2]).unwrap();
151        assert_eq!(graph.node_count(), 3);
152        assert_eq!(graph.edge_count(), 2);
153    }
154
155    #[test]
156    fn duplicate_nodes_first_wins() {
157        let ext = ExtractionResult {
158            nodes: vec![make_node("a"), make_node("a")],
159            edges: vec![],
160            hyperedges: vec![],
161        };
162        let graph = build_from_extraction(&ext).unwrap();
163        // second "a" silently skipped
164        assert_eq!(graph.node_count(), 1);
165    }
166
167    #[test]
168    fn hyperedges_stored() {
169        let ext = ExtractionResult {
170            nodes: vec![make_node("a"), make_node("b")],
171            edges: vec![],
172            hyperedges: vec![Hyperedge {
173                nodes: vec!["a".into(), "b".into()],
174                relation: "coexist".into(),
175                label: "together".into(),
176            }],
177        };
178        let graph = build_from_extraction(&ext).unwrap();
179        assert_eq!(graph.hyperedges.len(), 1);
180        assert_eq!(graph.hyperedges[0].relation, "coexist");
181    }
182}