graphify_extract/
dedup.rs1use std::collections::HashSet;
7
8use graphify_core::model::ExtractionResult;
9
10pub fn dedup_file(result: &mut ExtractionResult) {
15 let mut seen_nodes = HashSet::new();
17 result.nodes.retain(|n| seen_nodes.insert(n.id.clone()));
18
19 let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
21 result
22 .edges
23 .retain(|e| seen_edges.insert((e.source.clone(), e.target.clone(), e.relation.clone())));
24}
25
26pub fn dedup_results(results: Vec<ExtractionResult>) -> ExtractionResult {
28 let mut combined = ExtractionResult::default();
29 for r in results {
30 combined.nodes.extend(r.nodes);
31 combined.edges.extend(r.edges);
32 combined.hyperedges.extend(r.hyperedges);
33 }
34 dedup_file(&mut combined);
35 combined
36}
37
38#[cfg(test)]
43mod tests {
44 use super::*;
45 use graphify_core::confidence::Confidence;
46 use graphify_core::model::{GraphEdge, GraphNode, NodeType};
47 use std::collections::HashMap;
48
49 fn node(id: &str) -> GraphNode {
50 GraphNode {
51 id: id.to_string(),
52 label: id.to_string(),
53 source_file: "test.rs".to_string(),
54 source_location: None,
55 node_type: NodeType::Function,
56 community: None,
57 extra: HashMap::new(),
58 }
59 }
60
61 fn edge(src: &str, tgt: &str, rel: &str) -> GraphEdge {
62 GraphEdge {
63 source: src.to_string(),
64 target: tgt.to_string(),
65 relation: rel.to_string(),
66 confidence: Confidence::Extracted,
67 confidence_score: 1.0,
68 source_file: "test.rs".to_string(),
69 source_location: None,
70 weight: 1.0,
71 extra: HashMap::new(),
72 }
73 }
74
75 #[test]
76 fn dedup_removes_duplicate_nodes() {
77 let mut result = ExtractionResult {
78 nodes: vec![node("a"), node("b"), node("a"), node("c"), node("b")],
79 edges: Vec::new(),
80 hyperedges: Vec::new(),
81 };
82 dedup_file(&mut result);
83 assert_eq!(result.nodes.len(), 3);
84 let ids: Vec<&str> = result.nodes.iter().map(|n| n.id.as_str()).collect();
85 assert_eq!(ids, vec!["a", "b", "c"]);
86 }
87
88 #[test]
89 fn dedup_removes_duplicate_edges() {
90 let mut result = ExtractionResult {
91 nodes: Vec::new(),
92 edges: vec![
93 edge("a", "b", "calls"),
94 edge("a", "b", "calls"), edge("a", "b", "imports"), edge("c", "d", "calls"),
97 ],
98 hyperedges: Vec::new(),
99 };
100 dedup_file(&mut result);
101 assert_eq!(result.edges.len(), 3);
102 }
103
104 #[test]
105 fn dedup_preserves_first_occurrence() {
106 let mut n1 = node("x");
107 n1.label = "first".to_string();
108 let mut n2 = node("x");
109 n2.label = "second".to_string();
110
111 let mut result = ExtractionResult {
112 nodes: vec![n1, n2],
113 edges: Vec::new(),
114 hyperedges: Vec::new(),
115 };
116 dedup_file(&mut result);
117 assert_eq!(result.nodes.len(), 1);
118 assert_eq!(result.nodes[0].label, "first");
119 }
120
121 #[test]
122 fn dedup_no_duplicates_is_noop() {
123 let mut result = ExtractionResult {
124 nodes: vec![node("a"), node("b")],
125 edges: vec![edge("a", "b", "calls")],
126 hyperedges: Vec::new(),
127 };
128 dedup_file(&mut result);
129 assert_eq!(result.nodes.len(), 2);
130 assert_eq!(result.edges.len(), 1);
131 }
132
133 #[test]
134 fn dedup_empty_is_noop() {
135 let mut result = ExtractionResult::default();
136 dedup_file(&mut result);
137 assert!(result.nodes.is_empty());
138 assert!(result.edges.is_empty());
139 }
140
141 #[test]
142 fn dedup_results_merges_and_deduplicates() {
143 let r1 = ExtractionResult {
144 nodes: vec![node("a"), node("b")],
145 edges: vec![edge("a", "b", "calls")],
146 hyperedges: Vec::new(),
147 };
148 let r2 = ExtractionResult {
149 nodes: vec![node("b"), node("c")],
150 edges: vec![edge("a", "b", "calls"), edge("b", "c", "imports")],
151 hyperedges: Vec::new(),
152 };
153 let merged = dedup_results(vec![r1, r2]);
154 assert_eq!(merged.nodes.len(), 3);
155 assert_eq!(merged.edges.len(), 2);
156 }
157}