use std::borrow::Cow;
use std::convert::TryFrom;
use crate::diff_algorithm::common::{Change, ChangeTag, DiffAlgorithm, DiffOp};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Edit {
Equal,
Delete,
Insert,
}
#[derive(Debug, Default)]
pub struct MyersDiff;
impl DiffAlgorithm for MyersDiff {
fn ops<'a>(&self, old: &'a str, new: &'a str) -> Vec<DiffOp> {
let old_lines = split_lines_keep_newline(old);
let new_lines = split_lines_keep_newline(new);
group_ops(&myers_diff(&old_lines, &new_lines))
}
fn iter_inline_changes<'a>(&self, old: &'a str, new: &'a str, op: &DiffOp) -> Vec<Change<'a>> {
let old_lines = split_lines_keep_newline(old);
let new_lines = split_lines_keep_newline(new);
inline_changes(&old_lines, &new_lines, op)
}
}
fn to_idx(value: i64) -> usize {
usize::try_from(value).expect("Myers coordinates are always non-negative")
}
fn myers_diff<T: PartialEq>(old: &[T], new: &[T]) -> Vec<Edit> {
let old_len = old.len();
let new_len = new.len();
if old_len == 0 && new_len == 0 {
return Vec::new();
}
let old_i = i64::try_from(old_len).expect("old length fits in i64");
let new_i = i64::try_from(new_len).expect("new length fits in i64");
let offset = old_i + new_i;
let width = to_idx(2 * offset + 1);
let mut frontier: Vec<i64> = vec![-1; width];
frontier[to_idx(offset + 1)] = 0;
let mut trace: Vec<Vec<i64>> = Vec::with_capacity(to_idx(offset + 1));
for dist in 0..=old_len + new_len {
trace.push(frontier.clone());
let dist_i = i64::try_from(dist).expect("edit distance fits in i64");
let mut diag = -dist_i;
while diag <= dist_i {
let idx = to_idx(diag + offset);
let go_down =
diag == -dist_i || (diag != dist_i && frontier[idx - 1] < frontier[idx + 1]);
let mut old_pos = if go_down {
frontier[idx + 1]
} else {
frontier[idx - 1] + 1
};
let mut new_pos = old_pos - diag;
while old_pos >= 0
&& new_pos >= 0
&& to_idx(old_pos) < old_len
&& to_idx(new_pos) < new_len
&& old[to_idx(old_pos)] == new[to_idx(new_pos)]
{
old_pos += 1;
new_pos += 1;
}
frontier[idx] = old_pos;
if old_pos >= old_i && new_pos >= new_i {
return backtrack(&trace, offset, old_i, new_i, dist);
}
diag += 2;
}
}
Vec::new()
}
fn backtrack(
trace: &[Vec<i64>],
offset: i64,
old_i: i64,
new_i: i64,
dist_total: usize,
) -> Vec<Edit> {
let mut edits: Vec<Edit> = Vec::new();
let mut old_pos = old_i;
let mut new_pos = new_i;
for dist in (1..=dist_total).rev() {
let frontier = &trace[dist];
let diag = old_pos - new_pos;
let idx = to_idx(diag + offset);
let dist_i = i64::try_from(dist).expect("edit distance fits in i64");
let go_down = diag == -dist_i || (diag != dist_i && frontier[idx - 1] < frontier[idx + 1]);
let prev_diag = if go_down { diag + 1 } else { diag - 1 };
let prev_old = frontier[to_idx(prev_diag + offset)];
let prev_new = prev_old - prev_diag;
let (step_old, step_new) = if go_down {
(prev_old, prev_new + 1)
} else {
(prev_old + 1, prev_new)
};
while old_pos > step_old && new_pos > step_new {
edits.push(Edit::Equal);
old_pos -= 1;
new_pos -= 1;
}
if go_down {
edits.push(Edit::Insert);
new_pos -= 1;
} else {
edits.push(Edit::Delete);
old_pos -= 1;
}
}
while old_pos > 0 && new_pos > 0 {
edits.push(Edit::Equal);
old_pos -= 1;
new_pos -= 1;
}
edits.reverse();
edits
}
fn group_ops(edits: &[Edit]) -> Vec<DiffOp> {
let mut ops = Vec::new();
let mut old_idx = 0usize;
let mut new_idx = 0usize;
let mut i = 0;
while i < edits.len() {
let edit = edits[i];
let mut len = 1;
while i + len < edits.len() && edits[i + len] == edit {
len += 1;
}
match edit {
Edit::Equal => {
ops.push(DiffOp::new(ChangeTag::Equal, old_idx, len, new_idx, len));
old_idx += len;
new_idx += len;
}
Edit::Delete => {
ops.push(DiffOp::new(ChangeTag::Delete, old_idx, len, new_idx, 0));
old_idx += len;
}
Edit::Insert => {
ops.push(DiffOp::new(ChangeTag::Insert, old_idx, 0, new_idx, len));
new_idx += len;
}
}
i += len;
}
ops
}
fn inline_changes<'a>(
old_lines: &[&'a str],
new_lines: &[&'a str],
op: &DiffOp,
) -> Vec<Change<'a>> {
let mut changes = Vec::new();
match op.tag() {
ChangeTag::Equal => {
for (offset, line) in (op.old_start()..op.old_start() + op.old_len())
.map(|i| old_lines[i])
.enumerate()
{
let mut change = Change::new(ChangeTag::Equal);
change.add_value(false, Cow::Borrowed(line));
change.set_missing_newline(!line.ends_with('\n'));
let _ = offset;
changes.push(change);
}
}
ChangeTag::Delete => {
for line in (op.old_start()..op.old_start() + op.old_len()).map(|i| old_lines[i]) {
let mut change = Change::new(ChangeTag::Delete);
change.add_value(false, Cow::Borrowed(line));
change.set_missing_newline(!line.ends_with('\n'));
changes.push(change);
}
}
ChangeTag::Insert => {
for line in (op.new_start()..op.new_start() + op.new_len()).map(|i| new_lines[i]) {
let mut change = Change::new(ChangeTag::Insert);
change.add_value(false, Cow::Borrowed(line));
change.set_missing_newline(!line.ends_with('\n'));
changes.push(change);
}
}
}
changes
}
fn split_lines_keep_newline(s: &str) -> Vec<&str> {
if s.is_empty() {
return Vec::new();
}
let mut lines = Vec::new();
let mut start = 0;
for (idx, ch) in s.char_indices() {
if ch == '\n' {
lines.push(&s[start..=idx]);
start = idx + ch.len_utf8();
}
}
if start < s.len() {
lines.push(&s[start..]);
}
lines
}
#[cfg(test)]
mod tests {
use super::*;
use crate::diff_algorithm::common::{ChangeTag, DiffOp};
#[test]
fn myers_diff_insert_in_middle() {
let a = ["a", "c"];
let b = ["a", "b", "c"];
assert_eq!(
myers_diff(&a, &b),
vec![Edit::Equal, Edit::Insert, Edit::Equal]
);
}
#[test]
fn myers_diff_delete_in_middle() {
let a = ["a", "b", "c"];
let b = ["a", "c"];
assert_eq!(
myers_diff(&a, &b),
vec![Edit::Equal, Edit::Delete, Edit::Equal]
);
}
#[test]
fn myers_diff_identical() {
let a = ["a", "b"];
let b = ["a", "b"];
assert_eq!(myers_diff(&a, &b), vec![Edit::Equal, Edit::Equal]);
}
#[test]
fn myers_diff_empty_old() {
let b = ["a", "b"];
assert_eq!(myers_diff(&[], &b), vec![Edit::Insert, Edit::Insert]);
}
#[test]
fn myers_diff_empty_new() {
let a = ["a", "b"];
assert_eq!(myers_diff(&a, &[]), vec![Edit::Delete, Edit::Delete]);
}
#[test]
fn myers_diff_both_empty() {
assert!(myers_diff::<&str>(&[], &[]).is_empty());
}
#[test]
fn myers_diff_all_different() {
let a = ["x"];
let b = ["y"];
assert_eq!(myers_diff(&a, &b), vec![Edit::Delete, Edit::Insert]);
}
#[test]
fn myers_diff_interleaved_changes() {
let a = ["a", "b", "c", "d"];
let b = ["a", "x", "c", "y"];
assert_eq!(
myers_diff(&a, &b),
vec![
Edit::Equal,
Edit::Delete,
Edit::Insert,
Edit::Equal,
Edit::Delete,
Edit::Insert,
]
);
}
#[test]
fn myers_diff_common_prefix_and_suffix() {
let a = ["a", "b", "c", "a", "b", "c"];
let b = ["a", "b", "X", "a", "b", "c"];
assert_eq!(
myers_diff(&a, &b),
vec![
Edit::Equal,
Edit::Equal,
Edit::Delete,
Edit::Insert,
Edit::Equal,
Edit::Equal,
Edit::Equal,
]
);
}
#[test]
fn group_ops_collapses_runs() {
let edits = vec![
Edit::Equal,
Edit::Equal,
Edit::Delete,
Edit::Insert,
Edit::Insert,
Edit::Equal,
];
let ops = group_ops(&edits);
assert_eq!(ops.len(), 4);
assert_eq!(ops[0], DiffOp::new(ChangeTag::Equal, 0, 2, 0, 2));
assert_eq!(ops[1], DiffOp::new(ChangeTag::Delete, 2, 1, 2, 0));
assert_eq!(ops[2], DiffOp::new(ChangeTag::Insert, 3, 0, 2, 2));
assert_eq!(ops[3], DiffOp::new(ChangeTag::Equal, 3, 1, 4, 1));
}
#[test]
fn split_lines_keeps_newline_on_internal_lines() {
assert_eq!(split_lines_keep_newline("a\nb\nc"), vec!["a\n", "b\n", "c"]);
assert_eq!(split_lines_keep_newline("a\nb\n"), vec!["a\n", "b\n"]);
assert_eq!(split_lines_keep_newline("abc"), vec!["abc"]);
assert!(split_lines_keep_newline("").is_empty());
}
}