Skip to main content

graphify_build/
lib.rs

1//! Graph assembly and deduplication for graphify.
2//!
3//! Takes [`ExtractionResult`]s from multiple files and assembles them into a
4//! single [`KnowledgeGraph`], skipping dangling edges.
5
6mod codegraph_merge;
7
8pub use codegraph_merge::merge_codegraph_edges;
9
10use std::collections::HashSet;
11
12use tracing::debug;
13
14use graphify_core::error::Result;
15use graphify_core::graph::KnowledgeGraph;
16use graphify_core::model::ExtractionResult;
17
18/// Build a [`KnowledgeGraph`] from a single extraction result.
19///
20/// All nodes are added first; edges that reference unknown source/target
21/// nodes are silently skipped (dangling-edge protection).
22pub fn build_from_extraction(extraction: &ExtractionResult) -> Result<KnowledgeGraph> {
23    let mut graph = KnowledgeGraph::new();
24
25    for node in &extraction.nodes {
26        let _ = graph.add_node(node.clone());
27    }
28
29    let node_ids: HashSet<&str> = extraction.nodes.iter().map(|n| n.id.as_str()).collect();
30
31    let mut skipped = 0usize;
32    for edge in &extraction.edges {
33        if node_ids.contains(edge.source.as_str()) && node_ids.contains(edge.target.as_str()) {
34            let _ = graph.add_edge(edge.clone());
35        } else {
36            skipped += 1;
37        }
38    }
39    if skipped > 0 {
40        debug!("skipped {skipped} dangling edge(s)");
41    }
42
43    graph.set_hyperedges(extraction.hyperedges.clone());
44
45    Ok(graph)
46}
47
48/// Merge multiple extraction results into one graph.
49///
50/// Later extractions override earlier ones for same node IDs (first-write-wins
51/// via `add_node` which rejects duplicates, so the first occurrence is kept).
52pub fn build(extractions: &[ExtractionResult]) -> Result<KnowledgeGraph> {
53    let mut combined = ExtractionResult::default();
54    for ext in extractions {
55        combined.nodes.extend(ext.nodes.clone());
56        combined.edges.extend(ext.edges.clone());
57        combined.hyperedges.extend(ext.hyperedges.clone());
58    }
59    build_from_extraction(&combined)
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use graphify_core::confidence::Confidence;
66    use graphify_core::model::{GraphEdge, GraphNode, Hyperedge, NodeType};
67    use std::collections::HashMap;
68
69    fn make_node(id: &str) -> GraphNode {
70        GraphNode {
71            id: id.into(),
72            label: id.into(),
73            source_file: "test.rs".into(),
74            source_location: None,
75            node_type: NodeType::Class,
76            community: None,
77            extra: HashMap::new(),
78        }
79    }
80
81    fn make_edge(src: &str, tgt: &str) -> GraphEdge {
82        GraphEdge {
83            source: src.into(),
84            target: tgt.into(),
85            relation: "calls".into(),
86            confidence: Confidence::Extracted,
87            confidence_score: 1.0,
88            source_file: "test.rs".into(),
89            source_location: None,
90            weight: 1.0,
91            extra: HashMap::new(),
92        }
93    }
94
95    #[test]
96    fn build_from_empty() {
97        let ext = ExtractionResult::default();
98        let graph = build_from_extraction(&ext).unwrap();
99        assert_eq!(graph.node_count(), 0);
100        assert_eq!(graph.edge_count(), 0);
101    }
102
103    #[test]
104    fn build_with_nodes_and_edges() {
105        let ext = ExtractionResult {
106            nodes: vec![make_node("a"), make_node("b"), make_node("c")],
107            edges: vec![make_edge("a", "b"), make_edge("b", "c")],
108            hyperedges: vec![],
109        };
110        let graph = build_from_extraction(&ext).unwrap();
111        assert_eq!(graph.node_count(), 3);
112        assert_eq!(graph.edge_count(), 2);
113        assert!(graph.get_node("a").is_some());
114        assert!(graph.get_node("b").is_some());
115        assert!(graph.get_node("c").is_some());
116    }
117
118    #[test]
119    fn dangling_edges_skipped() {
120        let ext = ExtractionResult {
121            nodes: vec![make_node("a"), make_node("b")],
122            edges: vec![
123                make_edge("a", "b"),       // valid
124                make_edge("a", "missing"), // dangling
125                make_edge("gone", "b"),    // dangling
126            ],
127            hyperedges: vec![],
128        };
129        let graph = build_from_extraction(&ext).unwrap();
130        assert_eq!(graph.node_count(), 2);
131        assert_eq!(graph.edge_count(), 1); // only a->b
132    }
133
134    #[test]
135    fn build_merges_multiple_extractions() {
136        let ext1 = ExtractionResult {
137            nodes: vec![make_node("a"), make_node("b")],
138            edges: vec![make_edge("a", "b")],
139            hyperedges: vec![],
140        };
141        let ext2 = ExtractionResult {
142            nodes: vec![make_node("c")],
143            edges: vec![make_edge("b", "c")],
144            hyperedges: vec![],
145        };
146        let graph = build(&[ext1, ext2]).unwrap();
147        assert_eq!(graph.node_count(), 3);
148        assert_eq!(graph.edge_count(), 2);
149    }
150
151    #[test]
152    fn duplicate_nodes_first_wins() {
153        let ext = ExtractionResult {
154            nodes: vec![make_node("a"), make_node("a")],
155            edges: vec![],
156            hyperedges: vec![],
157        };
158        let graph = build_from_extraction(&ext).unwrap();
159        assert_eq!(graph.node_count(), 1);
160    }
161
162    #[test]
163    fn hyperedges_stored() {
164        let ext = ExtractionResult {
165            nodes: vec![make_node("a"), make_node("b")],
166            edges: vec![],
167            hyperedges: vec![Hyperedge {
168                nodes: vec!["a".into(), "b".into()],
169                relation: "coexist".into(),
170                label: "together".into(),
171            }],
172        };
173        let graph = build_from_extraction(&ext).unwrap();
174        assert_eq!(graph.hyperedges.len(), 1);
175        assert_eq!(graph.hyperedges[0].relation, "coexist");
176    }
177}