use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use super::super::edge::{DeltaEdge, DeltaOp, EdgeKey, EdgeKind};
use super::super::file::FileId;
use super::super::node::NodeId;
use crate::graph::node::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MergedEdge {
pub source: NodeId,
pub target: NodeId,
pub kind: EdgeKind,
pub seq: u64,
pub file: FileId,
pub spans: Vec<Span>,
}
impl MergedEdge {
#[must_use]
pub fn new(source: NodeId, target: NodeId, kind: EdgeKind, seq: u64, file: FileId) -> Self {
Self {
source,
target,
kind,
seq,
file,
spans: Vec::new(),
}
}
#[must_use]
pub fn with_spans(
source: NodeId,
target: NodeId,
kind: EdgeKind,
seq: u64,
file: FileId,
spans: Vec<Span>,
) -> Self {
Self {
source,
target,
kind,
seq,
file,
spans,
}
}
#[must_use]
pub fn from_delta(edge: &DeltaEdge) -> Self {
Self {
source: edge.source,
target: edge.target,
kind: edge.kind.clone(),
seq: edge.seq,
file: edge.file,
spans: edge.spans.clone(),
}
}
#[must_use]
pub fn edge_key(&self) -> EdgeKey {
EdgeKey {
source: self.source,
target: self.target,
kind: self.kind.clone(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct MergeStats {
pub input_count: usize,
pub output_count: usize,
pub deduplicated_count: usize,
pub removed_count: usize,
}
impl MergeStats {
#[must_use]
pub fn compression_ratio(&self) -> f64 {
if self.input_count == 0 {
1.0
} else {
usize_to_f64(self.output_count) / usize_to_f64(self.input_count)
}
}
}
impl fmt::Display for MergeStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"merge: {} -> {} edges ({} deduped, {} removed, {:.1}% compression)",
self.input_count,
self.output_count,
self.deduplicated_count,
self.removed_count,
self.compression_ratio() * 100.0
)
}
}
fn process_edge_group(group: &[DeltaEdge], merged: &mut Vec<MergedEdge>) -> bool {
let winner = &group[0];
if winner.op == DeltaOp::Add {
let spans = accumulate_and_deduplicate_spans(group);
merged.push(MergedEdge {
source: winner.source,
target: winner.target,
kind: winner.kind.clone(),
seq: winner.seq,
file: winner.file,
spans,
});
false
} else {
true
}
}
fn accumulate_and_deduplicate_spans(group: &[DeltaEdge]) -> Vec<Span> {
let mut accumulated_spans: Vec<Span> = Vec::new();
for edge in group {
if edge.op == DeltaOp::Add {
accumulated_spans.extend(edge.spans.iter().copied());
}
}
accumulated_spans.sort_by(|a, b| {
(a.start.line, a.start.column, a.end.line, a.end.column).cmp(&(
b.start.line,
b.start.column,
b.end.line,
b.end.column,
))
});
accumulated_spans.dedup();
accumulated_spans
}
#[must_use]
pub fn merge_delta_edges(mut edges: Vec<DeltaEdge>) -> (Vec<MergedEdge>, MergeStats) {
let input_count = edges.len();
if edges.is_empty() {
return (
vec![],
MergeStats {
input_count: 0,
output_count: 0,
deduplicated_count: 0,
removed_count: 0,
},
);
}
edges.sort_by(|a, b| {
let key_cmp = compare_edge_keys(&a.edge_key(), &b.edge_key());
if key_cmp == Ordering::Equal {
b.seq.cmp(&a.seq)
} else {
key_cmp
}
});
let mut merged: Vec<MergedEdge> = Vec::new();
let mut unique_keys = 0usize;
let mut removed_count = 0usize;
let mut i = 0;
while i < edges.len() {
unique_keys += 1;
let key = edges[i].edge_key();
let mut j = i + 1;
while j < edges.len() && edges[j].edge_key() == key {
j += 1;
}
if process_edge_group(&edges[i..j], &mut merged) {
removed_count += 1;
}
i = j;
}
let deduplicated_count = input_count - unique_keys;
let stats = MergeStats {
input_count,
output_count: merged.len(),
deduplicated_count,
removed_count,
};
(merged, stats)
}
#[must_use]
pub fn merge_with_csr(
csr_edges: &[MergedEdge],
delta_edges: Vec<DeltaEdge>,
) -> (Vec<MergedEdge>, MergeStats) {
let csr_as_delta: Vec<DeltaEdge> = csr_edges
.iter()
.map(|e| DeltaEdge {
source: e.source,
target: e.target,
kind: e.kind.clone(),
seq: e.seq,
op: DeltaOp::Add, file: e.file,
spans: e.spans.clone(),
})
.collect();
let mut all_edges = csr_as_delta;
all_edges.extend(delta_edges);
merge_delta_edges(all_edges)
}
#[must_use]
pub fn merge_by_file<S: std::hash::BuildHasher>(
edges_by_file: HashMap<FileId, Vec<DeltaEdge>, S>,
) -> (HashMap<FileId, Vec<MergedEdge>>, MergeStats) {
let input_count: usize = edges_by_file.values().map(std::vec::Vec::len).sum();
let all_edges: Vec<DeltaEdge> = edges_by_file.into_values().flatten().collect();
let (merged, stats) = merge_delta_edges(all_edges);
let mut result: HashMap<FileId, Vec<MergedEdge>> = HashMap::new();
for edge in merged {
result.entry(edge.file).or_default().push(edge);
}
let adjusted_stats = MergeStats {
input_count,
output_count: stats.output_count,
deduplicated_count: stats.deduplicated_count,
removed_count: stats.removed_count,
};
(result, adjusted_stats)
}
fn usize_to_f64(value: usize) -> f64 {
f64::from(u32::try_from(value).unwrap_or(u32::MAX))
}
fn compare_edge_keys(a: &EdgeKey, b: &EdgeKey) -> Ordering {
let src_cmp = compare_node_ids(&a.source, &b.source);
if src_cmp != Ordering::Equal {
return src_cmp;
}
let tgt_cmp = compare_node_ids(&a.target, &b.target);
if tgt_cmp != Ordering::Equal {
return tgt_cmp;
}
compare_edge_kinds(&a.kind, &b.kind)
}
fn compare_node_ids(a: &NodeId, b: &NodeId) -> Ordering {
match a.index().cmp(&b.index()) {
Ordering::Equal => a.generation().cmp(&b.generation()),
other => other,
}
}
fn compare_edge_kinds(a: &EdgeKind, b: &EdgeKind) -> Ordering {
let disc_a = std::mem::discriminant(a);
let disc_b = std::mem::discriminant(b);
if disc_a == disc_b {
format!("{a:?}").cmp(&format!("{b:?}"))
} else {
format!("{disc_a:?}").cmp(&format!("{disc_b:?}"))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::unified::edge::EdgeKind;
use crate::graph::unified::file::FileId;
use crate::graph::unified::node::NodeId;
fn make_node(index: u32) -> NodeId {
NodeId::new(index, 0)
}
fn make_file(index: u32) -> FileId {
FileId::new(index)
}
fn make_delta(src: u32, tgt: u32, kind: EdgeKind, seq: u64, op: DeltaOp) -> DeltaEdge {
DeltaEdge::new(make_node(src), make_node(tgt), kind, seq, op, make_file(1))
}
fn make_delta_with_file(
src: u32,
tgt: u32,
kind: EdgeKind,
seq: u64,
op: DeltaOp,
file: u32,
) -> DeltaEdge {
DeltaEdge::new(
make_node(src),
make_node(tgt),
kind,
seq,
op,
make_file(file),
)
}
#[test]
fn test_empty_merge() {
let (merged, stats) = merge_delta_edges(vec![]);
assert!(merged.is_empty());
assert_eq!(stats.input_count, 0);
assert_eq!(stats.output_count, 0);
}
#[test]
fn test_single_edge() {
let edges = vec![make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
)];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].source, make_node(1));
assert_eq!(merged[0].target, make_node(2));
assert_eq!(
merged[0].kind,
EdgeKind::Calls {
argument_count: 0,
is_async: false
}
);
assert_eq!(merged[0].seq, 1);
assert_eq!(stats.input_count, 1);
assert_eq!(stats.output_count, 1);
assert_eq!(stats.deduplicated_count, 0);
assert_eq!(stats.removed_count, 0);
}
#[test]
fn test_last_writer_wins_add() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
3,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].seq, 5);
assert_eq!(stats.input_count, 3);
assert_eq!(stats.output_count, 1);
assert_eq!(stats.deduplicated_count, 2);
assert_eq!(stats.removed_count, 0);
}
#[test]
fn test_last_writer_wins_remove() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Remove,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert!(merged.is_empty());
assert_eq!(stats.input_count, 2);
assert_eq!(stats.output_count, 0);
assert_eq!(stats.deduplicated_count, 1);
assert_eq!(stats.removed_count, 1);
}
#[test]
fn test_add_after_remove() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
3,
DeltaOp::Remove,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
7,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].seq, 7);
assert_eq!(stats.input_count, 3);
assert_eq!(stats.output_count, 1);
assert_eq!(stats.deduplicated_count, 2);
assert_eq!(stats.removed_count, 0);
}
#[test]
fn test_different_edge_kinds() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Imports {
alias: None,
is_wildcard: false,
},
2,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 2);
assert_eq!(stats.input_count, 2);
assert_eq!(stats.output_count, 2);
assert_eq!(stats.deduplicated_count, 0);
}
#[test]
fn test_different_targets() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
3,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
2,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 2);
assert_eq!(stats.deduplicated_count, 0);
}
#[test]
fn test_different_sources() {
let edges = vec![
make_delta(
1,
3,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
2,
3,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
2,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 2);
assert_eq!(stats.deduplicated_count, 0);
}
#[test]
fn test_complex_merge() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
3,
DeltaOp::Remove,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Add,
),
make_delta(
2,
3,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
2,
DeltaOp::Add,
),
make_delta(
2,
3,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
4,
DeltaOp::Remove,
),
make_delta(
3,
4,
EdgeKind::Imports {
alias: None,
is_wildcard: false,
},
6,
DeltaOp::Add,
),
];
let (merged, stats) = merge_delta_edges(edges);
assert_eq!(merged.len(), 2);
let edge_a = merged.iter().find(|e| e.source == make_node(1)).unwrap();
assert_eq!(edge_a.seq, 5);
let edge_c = merged.iter().find(|e| e.source == make_node(3)).unwrap();
assert_eq!(edge_c.seq, 6);
assert_eq!(stats.input_count, 6);
assert_eq!(stats.output_count, 2);
assert_eq!(stats.deduplicated_count, 3); assert_eq!(stats.removed_count, 1); }
#[test]
fn test_merge_stats_display() {
let stats = MergeStats {
input_count: 100,
output_count: 60,
deduplicated_count: 30,
removed_count: 10,
};
let display = format!("{stats}");
assert!(display.contains("100 -> 60"));
assert!(display.contains("30 deduped"));
assert!(display.contains("10 removed"));
}
#[test]
fn test_compression_ratio() {
let stats = MergeStats {
input_count: 100,
output_count: 50,
deduplicated_count: 40,
removed_count: 10,
};
assert!((stats.compression_ratio() - 0.5).abs() < 0.001);
}
#[test]
fn test_compression_ratio_empty() {
let stats = MergeStats::default();
assert!((stats.compression_ratio() - 1.0).abs() < 0.001);
}
#[test]
fn test_merged_edge_from_delta() {
let delta = make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
42,
DeltaOp::Add,
);
let merged = MergedEdge::from_delta(&delta);
assert_eq!(merged.source, delta.source);
assert_eq!(merged.target, delta.target);
assert_eq!(merged.kind, delta.kind);
assert_eq!(merged.seq, delta.seq);
assert_eq!(merged.file, delta.file);
}
#[test]
fn test_merge_with_csr() {
let csr_edges = vec![MergedEdge::new(
make_node(1),
make_node(2),
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
make_file(1),
)];
let delta_edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Add,
),
make_delta(
3,
4,
EdgeKind::Imports {
alias: None,
is_wildcard: false,
},
6,
DeltaOp::Add,
),
];
let (merged, stats) = merge_with_csr(&csr_edges, delta_edges);
let edge_1_2 = merged.iter().find(|e| e.source == make_node(1)).unwrap();
assert_eq!(edge_1_2.seq, 5);
let edge_3_4 = merged.iter().find(|e| e.source == make_node(3)).unwrap();
assert_eq!(edge_3_4.seq, 6);
assert_eq!(merged.len(), 2);
assert_eq!(stats.input_count, 3);
assert_eq!(stats.deduplicated_count, 1);
}
#[test]
fn test_merge_by_file() {
let mut edges_by_file = HashMap::new();
edges_by_file.insert(
make_file(1),
vec![
make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
1,
),
make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
3,
DeltaOp::Add,
1,
),
],
);
edges_by_file.insert(
make_file(2),
vec![make_delta_with_file(
3,
4,
EdgeKind::Imports {
alias: None,
is_wildcard: false,
},
2,
DeltaOp::Add,
2,
)],
);
let (result, stats) = merge_by_file(edges_by_file);
assert_eq!(result.len(), 2);
assert_eq!(result.get(&make_file(1)).unwrap().len(), 1);
assert_eq!(result.get(&make_file(2)).unwrap().len(), 1);
assert_eq!(stats.input_count, 3);
assert_eq!(stats.output_count, 2);
assert_eq!(stats.deduplicated_count, 1);
}
#[test]
fn test_merge_by_file_cross_file_remove() {
let mut edges_by_file = HashMap::new();
edges_by_file.insert(
make_file(1),
vec![make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
1,
)],
);
edges_by_file.insert(
make_file(2),
vec![make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Remove,
2,
)],
);
let (result, stats) = merge_by_file(edges_by_file);
assert!(
result.is_empty() || result.values().all(Vec::is_empty),
"Cross-file remove should cancel the add"
);
assert_eq!(stats.input_count, 2);
assert_eq!(stats.output_count, 0);
assert_eq!(stats.removed_count, 1);
}
#[test]
fn test_merge_by_file_cross_file_add_wins() {
let mut edges_by_file = HashMap::new();
edges_by_file.insert(
make_file(1),
vec![make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
3,
DeltaOp::Remove,
1,
)],
);
edges_by_file.insert(
make_file(2),
vec![make_delta_with_file(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
7,
DeltaOp::Add,
2,
)],
);
let (result, stats) = merge_by_file(edges_by_file);
assert_eq!(result.len(), 1);
let file2_edges = result.get(&make_file(2)).expect("Edge should be in file2");
assert_eq!(file2_edges.len(), 1);
assert_eq!(file2_edges[0].seq, 7);
assert_eq!(stats.output_count, 1);
}
#[test]
fn test_sequence_stability() {
let edges = vec![
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
10,
DeltaOp::Add,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
5,
DeltaOp::Remove,
),
make_delta(
1,
2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
),
];
let (merged, _) = merge_delta_edges(edges);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].seq, 10);
}
#[test]
fn test_preserves_file_id() {
let delta = DeltaEdge::new(
make_node(1),
make_node(2),
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
1,
DeltaOp::Add,
make_file(42),
);
let (merged, _) = merge_delta_edges(vec![delta]);
assert_eq!(merged[0].file, make_file(42));
}
#[test]
fn test_node_id_ordering() {
let node_a = NodeId::new(1, 0);
let node_b = NodeId::new(1, 1);
let node_c = NodeId::new(2, 0);
assert_eq!(compare_node_ids(&node_a, &node_b), Ordering::Less);
assert_eq!(compare_node_ids(&node_b, &node_a), Ordering::Greater);
assert_eq!(compare_node_ids(&node_a, &node_c), Ordering::Less);
assert_eq!(compare_node_ids(&node_a, &node_a), Ordering::Equal);
}
}