use std::collections::HashSet;
use graphify_core::model::ExtractionResult;
pub fn dedup_file(result: &mut ExtractionResult) {
let mut seen_nodes = HashSet::new();
result.nodes.retain(|n| seen_nodes.insert(n.id.clone()));
let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
result
.edges
.retain(|e| seen_edges.insert((e.source.clone(), e.target.clone(), e.relation.clone())));
}
pub fn dedup_results(results: Vec<ExtractionResult>) -> ExtractionResult {
let mut combined = ExtractionResult::default();
for r in results {
combined.nodes.extend(r.nodes);
combined.edges.extend(r.edges);
combined.hyperedges.extend(r.hyperedges);
}
dedup_file(&mut combined);
combined
}
#[cfg(test)]
mod tests {
use super::*;
use graphify_core::confidence::Confidence;
use graphify_core::model::{GraphEdge, GraphNode, NodeType};
use std::collections::HashMap;
fn node(id: &str) -> GraphNode {
GraphNode {
id: id.to_string(),
label: id.to_string(),
source_file: "test.rs".to_string(),
source_location: None,
node_type: NodeType::Function,
community: None,
extra: HashMap::new(),
}
}
fn edge(src: &str, tgt: &str, rel: &str) -> GraphEdge {
GraphEdge {
source: src.to_string(),
target: tgt.to_string(),
relation: rel.to_string(),
confidence: Confidence::Extracted,
confidence_score: 1.0,
source_file: "test.rs".to_string(),
source_location: None,
weight: 1.0,
extra: HashMap::new(),
}
}
#[test]
fn dedup_removes_duplicate_nodes() {
let mut result = ExtractionResult {
nodes: vec![node("a"), node("b"), node("a"), node("c"), node("b")],
edges: Vec::new(),
hyperedges: Vec::new(),
};
dedup_file(&mut result);
assert_eq!(result.nodes.len(), 3);
let ids: Vec<&str> = result.nodes.iter().map(|n| n.id.as_str()).collect();
assert_eq!(ids, vec!["a", "b", "c"]);
}
#[test]
fn dedup_removes_duplicate_edges() {
let mut result = ExtractionResult {
nodes: Vec::new(),
edges: vec![
edge("a", "b", "calls"),
edge("a", "b", "calls"), edge("a", "b", "imports"), edge("c", "d", "calls"),
],
hyperedges: Vec::new(),
};
dedup_file(&mut result);
assert_eq!(result.edges.len(), 3);
}
#[test]
fn dedup_preserves_first_occurrence() {
let mut n1 = node("x");
n1.label = "first".to_string();
let mut n2 = node("x");
n2.label = "second".to_string();
let mut result = ExtractionResult {
nodes: vec![n1, n2],
edges: Vec::new(),
hyperedges: Vec::new(),
};
dedup_file(&mut result);
assert_eq!(result.nodes.len(), 1);
assert_eq!(result.nodes[0].label, "first");
}
#[test]
fn dedup_no_duplicates_is_noop() {
let mut result = ExtractionResult {
nodes: vec![node("a"), node("b")],
edges: vec![edge("a", "b", "calls")],
hyperedges: Vec::new(),
};
dedup_file(&mut result);
assert_eq!(result.nodes.len(), 2);
assert_eq!(result.edges.len(), 1);
}
#[test]
fn dedup_empty_is_noop() {
let mut result = ExtractionResult::default();
dedup_file(&mut result);
assert!(result.nodes.is_empty());
assert!(result.edges.is_empty());
}
#[test]
fn dedup_results_merges_and_deduplicates() {
let r1 = ExtractionResult {
nodes: vec![node("a"), node("b")],
edges: vec![edge("a", "b", "calls")],
hyperedges: Vec::new(),
};
let r2 = ExtractionResult {
nodes: vec![node("b"), node("c")],
edges: vec![edge("a", "b", "calls"), edge("b", "c", "imports")],
hyperedges: Vec::new(),
};
let merged = dedup_results(vec![r1, r2]);
assert_eq!(merged.nodes.len(), 3);
assert_eq!(merged.edges.len(), 2);
}
}