use foldhash::HashMap;
use foldhash::HashSet;
use mago_atom::empty_atom;
use mago_database::file::FileId;
use crate::diff::CodebaseDiff;
use crate::diff::DeletionRange;
use crate::diff::DiffHunk;
use crate::signature::DefSignatureNode;
use crate::signature::FileSignature;
#[must_use]
pub fn compute_file_diff(
file_id: FileId,
old_signature: Option<&FileSignature>,
new_signature: Option<&FileSignature>,
) -> CodebaseDiff {
match (old_signature, new_signature) {
(None, None) => CodebaseDiff::new(),
(Some(old_sig), None) => mark_all_as_changed(old_sig),
(None, Some(new_sig)) => mark_all_as_changed(new_sig),
(Some(old_sig), Some(new_sig)) => myers_diff(file_id, old_sig, new_sig),
}
}
fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
let mut changed = HashSet::default();
for node in &signature.ast_nodes {
changed.insert((node.name, empty_atom()));
for child in &node.children {
changed.insert((node.name, child.name));
}
}
CodebaseDiff::new().with_changed(changed)
}
fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
let mut keep = HashSet::default();
let mut changed = HashSet::default();
let mut file_diffs: Vec<DiffHunk> = vec![];
let mut deletion_ranges: Vec<DeletionRange> = vec![];
let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
return mark_all_as_changed(new_signature);
};
let diff = extract_diff(&trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
for diff_elem in diff {
match diff_elem {
AstDiffElem::Keep(a, b) => {
let mut has_child_sig_change = false;
let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
changed.insert((a.name, empty_atom()));
for child in &a.children {
changed.insert((a.name, child.name));
}
for child in &b.children {
changed.insert((b.name, child.name));
}
continue;
};
let class_diff = extract_diff(&class_trace, class_x, class_y, &a.children, &b.children);
for class_diff_elem in class_diff {
match class_diff_elem {
AstDiffElem::Keep(a_child, b_child) => {
if a_child.signature_hash == b_child.signature_hash {
keep.insert((a.name, a_child.name));
} else {
has_child_sig_change = true;
changed.insert((a.name, a_child.name));
}
if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
{
file_diffs.push((
a_child.start_offset as usize,
a_child.end_offset as usize,
b_child.start_offset as isize - a_child.start_offset as isize,
b_child.start_line as isize - a_child.start_line as isize,
));
}
}
AstDiffElem::Remove(child_node) => {
has_child_sig_change = true;
changed.insert((a.name, child_node.name));
deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
}
AstDiffElem::Add(child_node) => {
has_child_sig_change = true;
changed.insert((a.name, child_node.name));
}
}
}
if has_child_sig_change || a.signature_hash != b.signature_hash {
changed.insert((a.name, empty_atom()));
} else {
keep.insert((a.name, empty_atom()));
if b.start_offset != a.start_offset || b.start_line != a.start_line {
file_diffs.push((
a.start_offset as usize,
a.end_offset as usize,
b.start_offset as isize - a.start_offset as isize,
b.start_line as isize - a.start_line as isize,
));
}
}
}
AstDiffElem::Remove(node) => {
changed.insert((node.name, empty_atom()));
deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
for child in &node.children {
changed.insert((node.name, child.name));
}
}
AstDiffElem::Add(node) => {
changed.insert((node.name, empty_atom()));
for child in &node.children {
changed.insert((node.name, child.name));
}
}
}
}
let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
if !file_diffs.is_empty() {
diff.add_diff_map_entry(file_id, file_diffs);
}
if !deletion_ranges.is_empty() {
diff.add_deletion_ranges_entry(file_id, deletion_ranges);
}
diff
}
type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
let n = a_nodes.len();
let m = b_nodes.len();
let max = n + m;
let mut v: HashMap<isize, usize> = HashMap::default();
v.insert(1, 0);
let mut trace = vec![];
for d in 0..=(max as isize) {
trace.push(v.clone());
let mut k = -d;
while k <= d {
let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
let mut y = (x as isize - k) as usize;
while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
x += 1;
y += 1;
}
v.insert(k, x);
if x >= n && y >= m {
return Ok((trace, x, y));
}
k += 2;
}
}
Err("Myers diff algorithm failed to converge")
}
fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
a_node.name == b_node.name && a_node.is_function == b_node.is_function
}
fn extract_diff<'a>(
trace: &[HashMap<isize, usize>],
mut x: usize,
mut y: usize,
a_nodes: &'a [DefSignatureNode],
b_nodes: &'a [DefSignatureNode],
) -> Vec<AstDiffElem<'a>> {
let mut result = vec![];
let mut d = trace.len() as isize - 1;
while d >= 0 {
let v = &trace[d as usize];
let k = (x as isize) - (y as isize);
let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
let prev_x = v[&prev_k];
let prev_y = prev_x as isize - prev_k;
while x > prev_x && y as isize > prev_y {
result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
x -= 1;
y -= 1;
}
if d == 0 {
break;
}
while x > prev_x {
result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
x -= 1;
}
while y as isize > prev_y {
result.push(AstDiffElem::Add(&b_nodes[y - 1]));
y -= 1;
}
d -= 1;
}
result.reverse();
result
}
#[derive(Debug)]
enum AstDiffElem<'a> {
Keep(&'a DefSignatureNode, &'a DefSignatureNode),
Remove(&'a DefSignatureNode),
Add(&'a DefSignatureNode),
}