cognee_cognify/graph_integration/
deduplication.rs1use std::collections::HashMap;
7
8use crate::graph_integration::types::{GraphEdgePair, GraphNodePair};
9
10#[derive(Debug, Clone)]
12pub struct DeduplicationResult {
13 pub unique_nodes: Vec<GraphNodePair>,
15
16 pub unique_edges: Vec<GraphEdgePair>,
18}
19
20pub fn deduplicate_nodes_and_edges(
32 nodes: Vec<GraphNodePair>,
33 edges: Vec<GraphEdgePair>,
34) -> DeduplicationResult {
35 let mut node_map = HashMap::new();
37 for node in nodes {
38 let key = node.entity.base.id;
39 node_map.insert(key, node);
41 }
42
43 let mut edge_map = HashMap::new();
45 for edge in edges {
46 let key = edge.dedup_key();
47 edge_map.insert(key, edge);
49 }
50
51 DeduplicationResult {
52 unique_nodes: node_map.into_values().collect(),
53 unique_edges: edge_map.into_values().collect(),
54 }
55}
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60 use cognee_models::{Entity, EntityType};
61 use uuid::Uuid;
62
63 fn create_test_node(name: &str, type_name: &str) -> GraphNodePair {
64 let entity = Entity::new(name, None, format!("{name} description"), None);
65 let entity_type = EntityType::from_node_type(type_name, None);
66 GraphNodePair {
67 entity,
68 entity_type,
69 }
70 }
71
72 fn create_test_edge(source_id: Uuid, target_id: Uuid, relationship: &str) -> GraphEdgePair {
73 GraphEdgePair::new(source_id, target_id, relationship)
74 }
75
76 #[test]
77 fn test_deduplicate_nodes_removes_duplicates() {
78 let node1 = create_test_node("TechCorp", "Organization");
79 let node2 = create_test_node("TechCorp", "Organization");
80
81 let mut node2_clone = node2.clone();
83 node2_clone.entity.base.id = node1.entity.base.id;
84
85 let result = deduplicate_nodes_and_edges(vec![node1.clone(), node2_clone], vec![]);
86
87 assert_eq!(result.unique_nodes.len(), 1);
88 assert_eq!(result.unique_nodes[0].entity.name, "TechCorp");
89 }
90
91 #[test]
92 fn test_deduplicate_edges_removes_duplicates() {
93 let source_id = Uuid::new_v4();
94 let target_id = Uuid::new_v4();
95
96 let edge1 = create_test_edge(source_id, target_id, "works_at");
97 let edge2 = create_test_edge(source_id, target_id, "works_at");
98
99 let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
100
101 assert_eq!(result.unique_edges.len(), 1);
102 assert_eq!(result.unique_edges[0].relationship_name, "works_at");
103 }
104
105 #[test]
106 fn test_deduplicate_preserves_unique_nodes() {
107 let node1 = create_test_node("TechCorp", "Organization");
108 let node2 = create_test_node("Alice", "Person");
109 let node3 = create_test_node("London", "Location");
110
111 let result = deduplicate_nodes_and_edges(vec![node1, node2, node3], vec![]);
112
113 assert_eq!(result.unique_nodes.len(), 3);
114
115 let names: Vec<String> = result
117 .unique_nodes
118 .iter()
119 .map(|n| n.entity.name.clone())
120 .collect();
121 assert!(names.contains(&"TechCorp".to_string()));
122 assert!(names.contains(&"Alice".to_string()));
123 assert!(names.contains(&"London".to_string()));
124 }
125
126 #[test]
127 fn test_deduplicate_preserves_unique_edges() {
128 let source_id = Uuid::new_v4();
129 let target_id1 = Uuid::new_v4();
130 let target_id2 = Uuid::new_v4();
131
132 let edge1 = create_test_edge(source_id, target_id1, "works_at");
133 let edge2 = create_test_edge(source_id, target_id2, "located_in");
134
135 let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
136
137 assert_eq!(result.unique_edges.len(), 2);
138 }
139
140 #[test]
141 fn test_deduplicate_different_relationships_same_entities() {
142 let source_id = Uuid::new_v4();
143 let target_id = Uuid::new_v4();
144
145 let edge1 = create_test_edge(source_id, target_id, "works_at");
147 let edge2 = create_test_edge(source_id, target_id, "founded");
148
149 let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
150
151 assert_eq!(result.unique_edges.len(), 2);
153 }
154
155 #[test]
156 fn test_deduplicate_empty_input() {
157 let result = deduplicate_nodes_and_edges(vec![], vec![]);
158
159 assert_eq!(result.unique_nodes.len(), 0);
160 assert_eq!(result.unique_edges.len(), 0);
161 }
162
163 #[test]
164 fn test_deduplicate_later_entry_overwrites() {
165 let source_id = Uuid::new_v4();
166 let target_id = Uuid::new_v4();
167
168 let mut edge1 = create_test_edge(source_id, target_id, "works_at");
170 edge1.add_property("since", "2020");
171
172 let mut edge2 = create_test_edge(source_id, target_id, "works_at");
173 edge2.add_property("since", "2021");
174
175 let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
176
177 assert_eq!(result.unique_edges.len(), 1);
178 assert_eq!(
180 result.unique_edges[0].properties.get("since"),
181 Some(&"2021".to_string())
182 );
183 }
184
185 #[test]
186 fn test_deduplicate_mixed_unique_and_duplicate() {
187 let node1 = create_test_node("TechCorp", "Organization");
188 let node2 = create_test_node("Alice", "Person");
189 let node3_dup = create_test_node("Alice", "Person");
190
191 let mut node3 = node3_dup.clone();
193 node3.entity.base.id = node2.entity.base.id;
194
195 let result = deduplicate_nodes_and_edges(vec![node1, node2.clone(), node3], vec![]);
196
197 assert_eq!(result.unique_nodes.len(), 2);
199 }
200}