graphify_extract/
dedup.rs1use std::collections::HashSet;
7
8use graphify_core::model::ExtractionResult;
9
10pub fn dedup_file(result: &mut ExtractionResult) {
15 let mut seen_nodes = HashSet::new();
16 result.nodes.retain(|n| seen_nodes.insert(n.id.clone()));
17
18 let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
19 result
20 .edges
21 .retain(|e| seen_edges.insert((e.source.clone(), e.target.clone(), e.relation.clone())));
22}
23
24pub fn dedup_results(results: Vec<ExtractionResult>) -> ExtractionResult {
26 let mut combined = ExtractionResult::default();
27 for r in results {
28 combined.nodes.extend(r.nodes);
29 combined.edges.extend(r.edges);
30 combined.hyperedges.extend(r.hyperedges);
31 }
32 dedup_file(&mut combined);
33 combined
34}
35
36#[cfg(test)]
37mod tests {
38 use super::*;
39 use graphify_core::confidence::Confidence;
40 use graphify_core::model::{GraphEdge, GraphNode, NodeType};
41 use std::collections::HashMap;
42
43 fn node(id: &str) -> GraphNode {
44 GraphNode {
45 id: id.to_string(),
46 label: id.to_string(),
47 source_file: "test.rs".to_string(),
48 source_location: None,
49 node_type: NodeType::Function,
50 community: None,
51 extra: HashMap::new(),
52 }
53 }
54
55 fn edge(src: &str, tgt: &str, rel: &str) -> GraphEdge {
56 GraphEdge {
57 source: src.to_string(),
58 target: tgt.to_string(),
59 relation: rel.to_string(),
60 confidence: Confidence::Extracted,
61 confidence_score: 1.0,
62 source_file: "test.rs".to_string(),
63 source_location: None,
64 weight: 1.0,
65 extra: HashMap::new(),
66 }
67 }
68
69 #[test]
70 fn dedup_removes_duplicate_nodes() {
71 let mut result = ExtractionResult {
72 nodes: vec![node("a"), node("b"), node("a"), node("c"), node("b")],
73 edges: Vec::new(),
74 hyperedges: Vec::new(),
75 };
76 dedup_file(&mut result);
77 assert_eq!(result.nodes.len(), 3);
78 let ids: Vec<&str> = result.nodes.iter().map(|n| n.id.as_str()).collect();
79 assert_eq!(ids, vec!["a", "b", "c"]);
80 }
81
82 #[test]
83 fn dedup_removes_duplicate_edges() {
84 let mut result = ExtractionResult {
85 nodes: Vec::new(),
86 edges: vec![
87 edge("a", "b", "calls"),
88 edge("a", "b", "calls"), edge("a", "b", "imports"), edge("c", "d", "calls"),
91 ],
92 hyperedges: Vec::new(),
93 };
94 dedup_file(&mut result);
95 assert_eq!(result.edges.len(), 3);
96 }
97
98 #[test]
99 fn dedup_preserves_first_occurrence() {
100 let mut n1 = node("x");
101 n1.label = "first".to_string();
102 let mut n2 = node("x");
103 n2.label = "second".to_string();
104
105 let mut result = ExtractionResult {
106 nodes: vec![n1, n2],
107 edges: Vec::new(),
108 hyperedges: Vec::new(),
109 };
110 dedup_file(&mut result);
111 assert_eq!(result.nodes.len(), 1);
112 assert_eq!(result.nodes[0].label, "first");
113 }
114
115 #[test]
116 fn dedup_no_duplicates_is_noop() {
117 let mut result = ExtractionResult {
118 nodes: vec![node("a"), node("b")],
119 edges: vec![edge("a", "b", "calls")],
120 hyperedges: Vec::new(),
121 };
122 dedup_file(&mut result);
123 assert_eq!(result.nodes.len(), 2);
124 assert_eq!(result.edges.len(), 1);
125 }
126
127 #[test]
128 fn dedup_empty_is_noop() {
129 let mut result = ExtractionResult::default();
130 dedup_file(&mut result);
131 assert!(result.nodes.is_empty());
132 assert!(result.edges.is_empty());
133 }
134
135 #[test]
136 fn dedup_results_merges_and_deduplicates() {
137 let r1 = ExtractionResult {
138 nodes: vec![node("a"), node("b")],
139 edges: vec![edge("a", "b", "calls")],
140 hyperedges: Vec::new(),
141 };
142 let r2 = ExtractionResult {
143 nodes: vec![node("b"), node("c")],
144 edges: vec![edge("a", "b", "calls"), edge("b", "c", "imports")],
145 hyperedges: Vec::new(),
146 };
147 let merged = dedup_results(vec![r1, r2]);
148 assert_eq!(merged.nodes.len(), 3);
149 assert_eq!(merged.edges.len(), 2);
150 }
151}