Skip to main content

graphify_build/
lib.rs

1//! Graph assembly and deduplication for graphify.
2//!
3//! Takes [`ExtractionResult`]s from multiple files and assembles them into a
4//! single [`KnowledgeGraph`], skipping dangling edges.
5
6use std::collections::HashSet;
7
8use tracing::debug;
9
10use graphify_core::error::Result;
11use graphify_core::graph::KnowledgeGraph;
12use graphify_core::model::ExtractionResult;
13
14/// Build a [`KnowledgeGraph`] from a single extraction result.
15///
16/// All nodes are added first; edges that reference unknown source/target
17/// nodes are silently skipped (dangling-edge protection).
18pub fn build_from_extraction(extraction: &ExtractionResult) -> Result<KnowledgeGraph> {
19    let mut graph = KnowledgeGraph::new();
20
21    for node in &extraction.nodes {
22        let _ = graph.add_node(node.clone());
23    }
24
25    let node_ids: HashSet<&str> = extraction.nodes.iter().map(|n| n.id.as_str()).collect();
26
27    let mut skipped = 0usize;
28    for edge in &extraction.edges {
29        if node_ids.contains(edge.source.as_str()) && node_ids.contains(edge.target.as_str()) {
30            let _ = graph.add_edge(edge.clone());
31        } else {
32            skipped += 1;
33        }
34    }
35    if skipped > 0 {
36        debug!("skipped {skipped} dangling edge(s)");
37    }
38
39    graph.set_hyperedges(extraction.hyperedges.clone());
40
41    Ok(graph)
42}
43
44/// Merge multiple extraction results into one graph.
45///
46/// Later extractions override earlier ones for same node IDs (first-write-wins
47/// via `add_node` which rejects duplicates, so the first occurrence is kept).
48pub fn build(extractions: &[ExtractionResult]) -> Result<KnowledgeGraph> {
49    let mut combined = ExtractionResult::default();
50    for ext in extractions {
51        combined.nodes.extend(ext.nodes.clone());
52        combined.edges.extend(ext.edges.clone());
53        combined.hyperedges.extend(ext.hyperedges.clone());
54    }
55    build_from_extraction(&combined)
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61    use graphify_core::confidence::Confidence;
62    use graphify_core::model::{GraphEdge, GraphNode, Hyperedge, NodeType};
63    use std::collections::HashMap;
64
65    fn make_node(id: &str) -> GraphNode {
66        GraphNode {
67            id: id.into(),
68            label: id.into(),
69            source_file: "test.rs".into(),
70            source_location: None,
71            node_type: NodeType::Class,
72            community: None,
73            extra: HashMap::new(),
74        }
75    }
76
77    fn make_edge(src: &str, tgt: &str) -> GraphEdge {
78        GraphEdge {
79            source: src.into(),
80            target: tgt.into(),
81            relation: "calls".into(),
82            confidence: Confidence::Extracted,
83            confidence_score: 1.0,
84            source_file: "test.rs".into(),
85            source_location: None,
86            weight: 1.0,
87            extra: HashMap::new(),
88        }
89    }
90
91    #[test]
92    fn build_from_empty() {
93        let ext = ExtractionResult::default();
94        let graph = build_from_extraction(&ext).unwrap();
95        assert_eq!(graph.node_count(), 0);
96        assert_eq!(graph.edge_count(), 0);
97    }
98
99    #[test]
100    fn build_with_nodes_and_edges() {
101        let ext = ExtractionResult {
102            nodes: vec![make_node("a"), make_node("b"), make_node("c")],
103            edges: vec![make_edge("a", "b"), make_edge("b", "c")],
104            hyperedges: vec![],
105        };
106        let graph = build_from_extraction(&ext).unwrap();
107        assert_eq!(graph.node_count(), 3);
108        assert_eq!(graph.edge_count(), 2);
109        assert!(graph.get_node("a").is_some());
110        assert!(graph.get_node("b").is_some());
111        assert!(graph.get_node("c").is_some());
112    }
113
114    #[test]
115    fn dangling_edges_skipped() {
116        let ext = ExtractionResult {
117            nodes: vec![make_node("a"), make_node("b")],
118            edges: vec![
119                make_edge("a", "b"),       // valid
120                make_edge("a", "missing"), // dangling
121                make_edge("gone", "b"),    // dangling
122            ],
123            hyperedges: vec![],
124        };
125        let graph = build_from_extraction(&ext).unwrap();
126        assert_eq!(graph.node_count(), 2);
127        assert_eq!(graph.edge_count(), 1); // only a->b
128    }
129
130    #[test]
131    fn build_merges_multiple_extractions() {
132        let ext1 = ExtractionResult {
133            nodes: vec![make_node("a"), make_node("b")],
134            edges: vec![make_edge("a", "b")],
135            hyperedges: vec![],
136        };
137        let ext2 = ExtractionResult {
138            nodes: vec![make_node("c")],
139            edges: vec![make_edge("b", "c")],
140            hyperedges: vec![],
141        };
142        let graph = build(&[ext1, ext2]).unwrap();
143        assert_eq!(graph.node_count(), 3);
144        assert_eq!(graph.edge_count(), 2);
145    }
146
147    #[test]
148    fn duplicate_nodes_first_wins() {
149        let ext = ExtractionResult {
150            nodes: vec![make_node("a"), make_node("a")],
151            edges: vec![],
152            hyperedges: vec![],
153        };
154        let graph = build_from_extraction(&ext).unwrap();
155        assert_eq!(graph.node_count(), 1);
156    }
157
158    #[test]
159    fn hyperedges_stored() {
160        let ext = ExtractionResult {
161            nodes: vec![make_node("a"), make_node("b")],
162            edges: vec![],
163            hyperedges: vec![Hyperedge {
164                nodes: vec!["a".into(), "b".into()],
165                relation: "coexist".into(),
166                label: "together".into(),
167            }],
168        };
169        let graph = build_from_extraction(&ext).unwrap();
170        assert_eq!(graph.hyperedges.len(), 1);
171        assert_eq!(graph.hyperedges[0].relation, "coexist");
172    }
173}