use std::collections::HashMap;
use crate::graph_integration::types::{GraphEdgePair, GraphNodePair};
#[derive(Debug, Clone)]
pub struct DeduplicationResult {
pub unique_nodes: Vec<GraphNodePair>,
pub unique_edges: Vec<GraphEdgePair>,
}
pub fn deduplicate_nodes_and_edges(
nodes: Vec<GraphNodePair>,
edges: Vec<GraphEdgePair>,
) -> DeduplicationResult {
let mut node_map = HashMap::new();
for node in nodes {
let key = node.entity.base.id;
node_map.insert(key, node);
}
let mut edge_map = HashMap::new();
for edge in edges {
let key = edge.dedup_key();
edge_map.insert(key, edge);
}
DeduplicationResult {
unique_nodes: node_map.into_values().collect(),
unique_edges: edge_map.into_values().collect(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use cognee_models::{Entity, EntityType};
use uuid::Uuid;
fn create_test_node(name: &str, type_name: &str) -> GraphNodePair {
let entity = Entity::new(name, None, format!("{name} description"), None);
let entity_type = EntityType::from_node_type(type_name, None);
GraphNodePair {
entity,
entity_type,
}
}
fn create_test_edge(source_id: Uuid, target_id: Uuid, relationship: &str) -> GraphEdgePair {
GraphEdgePair::new(source_id, target_id, relationship)
}
#[test]
fn test_deduplicate_nodes_removes_duplicates() {
let node1 = create_test_node("TechCorp", "Organization");
let node2 = create_test_node("TechCorp", "Organization");
let mut node2_clone = node2.clone();
node2_clone.entity.base.id = node1.entity.base.id;
let result = deduplicate_nodes_and_edges(vec![node1.clone(), node2_clone], vec![]);
assert_eq!(result.unique_nodes.len(), 1);
assert_eq!(result.unique_nodes[0].entity.name, "TechCorp");
}
#[test]
fn test_deduplicate_edges_removes_duplicates() {
let source_id = Uuid::new_v4();
let target_id = Uuid::new_v4();
let edge1 = create_test_edge(source_id, target_id, "works_at");
let edge2 = create_test_edge(source_id, target_id, "works_at");
let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
assert_eq!(result.unique_edges.len(), 1);
assert_eq!(result.unique_edges[0].relationship_name, "works_at");
}
#[test]
fn test_deduplicate_preserves_unique_nodes() {
let node1 = create_test_node("TechCorp", "Organization");
let node2 = create_test_node("Alice", "Person");
let node3 = create_test_node("London", "Location");
let result = deduplicate_nodes_and_edges(vec![node1, node2, node3], vec![]);
assert_eq!(result.unique_nodes.len(), 3);
let names: Vec<String> = result
.unique_nodes
.iter()
.map(|n| n.entity.name.clone())
.collect();
assert!(names.contains(&"TechCorp".to_string()));
assert!(names.contains(&"Alice".to_string()));
assert!(names.contains(&"London".to_string()));
}
#[test]
fn test_deduplicate_preserves_unique_edges() {
let source_id = Uuid::new_v4();
let target_id1 = Uuid::new_v4();
let target_id2 = Uuid::new_v4();
let edge1 = create_test_edge(source_id, target_id1, "works_at");
let edge2 = create_test_edge(source_id, target_id2, "located_in");
let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
assert_eq!(result.unique_edges.len(), 2);
}
#[test]
fn test_deduplicate_different_relationships_same_entities() {
let source_id = Uuid::new_v4();
let target_id = Uuid::new_v4();
let edge1 = create_test_edge(source_id, target_id, "works_at");
let edge2 = create_test_edge(source_id, target_id, "founded");
let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
assert_eq!(result.unique_edges.len(), 2);
}
#[test]
fn test_deduplicate_empty_input() {
let result = deduplicate_nodes_and_edges(vec![], vec![]);
assert_eq!(result.unique_nodes.len(), 0);
assert_eq!(result.unique_edges.len(), 0);
}
#[test]
fn test_deduplicate_later_entry_overwrites() {
let source_id = Uuid::new_v4();
let target_id = Uuid::new_v4();
let mut edge1 = create_test_edge(source_id, target_id, "works_at");
edge1.add_property("since", "2020");
let mut edge2 = create_test_edge(source_id, target_id, "works_at");
edge2.add_property("since", "2021");
let result = deduplicate_nodes_and_edges(vec![], vec![edge1, edge2]);
assert_eq!(result.unique_edges.len(), 1);
assert_eq!(
result.unique_edges[0].properties.get("since"),
Some(&"2021".to_string())
);
}
#[test]
fn test_deduplicate_mixed_unique_and_duplicate() {
let node1 = create_test_node("TechCorp", "Organization");
let node2 = create_test_node("Alice", "Person");
let node3_dup = create_test_node("Alice", "Person");
let mut node3 = node3_dup.clone();
node3.entity.base.id = node2.entity.base.id;
let result = deduplicate_nodes_and_edges(vec![node1, node2.clone(), node3], vec![]);
assert_eq!(result.unique_nodes.len(), 2);
}
}