use crate::input_processing::{EditType, Entry};
use crate::neg_idx_vec::NegIdxVec;
use anyhow::Result;
use logging_timer::time;
use serde::Serialize;
use std::fmt::Debug;
use std::iter::FromIterator;
use std::ops::Range;
use thiserror::Error;
fn common_prefix_len<T: PartialEq>(
a: &[T],
a_range: Range<usize>,
b: &[T],
b_range: Range<usize>,
) -> usize {
let mut l = 0;
unsafe {
while a_range.start + l < a_range.end
&& b_range.start + l < b_range.end
&& a.get_unchecked(a_range.start + l) == b.get_unchecked(b_range.start + l)
{
l += 1;
}
}
l
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
struct Coordinates<T>
where
T: Debug + PartialEq + Eq + Clone + Copy,
{
pub old: T,
pub new: T,
}
fn convert_range<FromType, IntoType>(range: Range<FromType>) -> Range<IntoType>
where
FromType: TryInto<IntoType>,
<FromType as TryInto<IntoType>>::Error: std::fmt::Debug,
{
range.start.try_into().unwrap()..range.end.try_into().unwrap()
}
fn common_suffix_len<T: PartialEq>(
a: &[T],
a_range: Range<usize>,
b: &[T],
b_range: Range<usize>,
) -> usize {
let mut l: isize = 1;
let a_range: Range<isize> = convert_range(a_range);
let b_range: Range<isize> = convert_range(b_range);
unsafe {
while a_range.end - l >= a_range.start
&& b_range.end - l >= b_range.start
&& a.get_unchecked::<usize>((a_range.end - l).try_into().unwrap())
== b.get_unchecked::<usize>((b_range.end - l).try_into().unwrap())
{
l += 1;
}
}
(l - 1).try_into().unwrap()
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Line<'a> {
pub line_index: usize,
pub entries: Vec<&'a Entry<'a>>,
}
impl<'a> Line<'a> {
#[must_use]
pub fn new(line_index: usize) -> Self {
Line {
line_index,
entries: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Hunk<'a>(pub Vec<Line<'a>>);
#[derive(Debug, Error)]
pub enum HunkInsertionError {
#[error(
"Non-adjacent entry (line {incoming_line:?}) added to hunk (last line: {last_line:?})"
)]
NonAdjacentHunk {
incoming_line: usize,
last_line: usize,
},
#[error("Attempted to append an entry with a line index ({incoming_line:?}) less than the first line's index ({last_line:?})")]
PriorLine {
incoming_line: usize,
last_line: usize,
},
#[error("Attempted to append an entry with a column ({incoming_col:?}, line: {incoming_line:?}) less than the first entry's column ({last_col:?}, line: {last_line:?})")]
PriorColumn {
incoming_col: usize,
incoming_line: usize,
last_col: usize,
last_line: usize,
},
}
impl<'a> Hunk<'a> {
#[must_use]
pub fn new() -> Self {
Hunk(Vec::new())
}
#[must_use]
pub fn first_line(&self) -> Option<usize> {
self.0.first().map(|x| x.line_index)
}
#[must_use]
pub fn last_line(&self) -> Option<usize> {
self.0.last().map(|x| x.line_index)
}
pub fn can_push_back(&self, entry: &Entry<'a>) -> Result<(), HunkInsertionError> {
let incoming_line_idx = entry.start_position().row;
if let Some(last_line) = self.0.last() {
let last_line_idx = last_line.line_index;
if incoming_line_idx < last_line_idx {
return Err(HunkInsertionError::PriorLine {
incoming_line: incoming_line_idx,
last_line: last_line_idx,
});
} else if incoming_line_idx - last_line_idx > 1 {
return Err(HunkInsertionError::NonAdjacentHunk {
incoming_line: incoming_line_idx,
last_line: last_line_idx,
});
}
if incoming_line_idx == last_line_idx && !last_line.entries.is_empty() {
let last_entry = last_line.entries.last().unwrap();
let last_col = last_entry.end_position().column;
let last_line = last_entry.end_position().row;
let incoming_col = entry.start_position().column;
let incoming_line = entry.end_position().row;
if incoming_col < last_col {
return Err(HunkInsertionError::PriorColumn {
incoming_col,
last_col,
incoming_line,
last_line,
});
}
}
}
Ok(())
}
pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<(), HunkInsertionError> {
self.can_push_back(entry)?;
let incoming_line_idx = entry.start_position().row;
if self.0.is_empty() || incoming_line_idx - self.0.last().unwrap().line_index == 1 {
self.0.push(Line::new(incoming_line_idx));
}
let last_line = self.0.last_mut().unwrap();
last_line.entries.push(entry);
Ok(())
}
}
impl<'a> Default for Hunk<'a> {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub enum DocumentType<T: Debug + PartialEq + Serialize + Clone> {
Old(T),
New(T),
}
impl<T> AsRef<T> for DocumentType<T>
where
T: Debug + Clone + PartialEq + Serialize,
{
fn as_ref(&self) -> &T {
match self {
Self::Old(x) | Self::New(x) => x,
}
}
}
impl<T> AsMut<T> for DocumentType<T>
where
T: Debug + Clone + PartialEq + Serialize,
{
fn as_mut(&mut self) -> &mut T {
match self {
Self::Old(x) | Self::New(x) => x,
}
}
}
impl<T: Debug + Clone + PartialEq + Serialize> DocumentType<T> {
fn consume(self) -> T {
match self {
Self::Old(x) | Self::New(x) => x,
}
}
}
pub type RichHunk<'a> = DocumentType<Hunk<'a>>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Hunks<'a>(pub Vec<Hunk<'a>>);
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct RichHunks<'a>(pub Vec<RichHunk<'a>>);
pub struct RichHunksBuilder<'a> {
hunks: RichHunks<'a>,
last_old: Option<usize>,
last_new: Option<usize>,
}
impl<'a> RichHunksBuilder<'a> {
#[must_use]
pub fn new() -> Self {
Self {
hunks: RichHunks(Vec::new()),
last_old: None,
last_new: None,
}
}
#[must_use]
pub fn build(self) -> RichHunks<'a> {
self.hunks
}
fn get_hunk_for_insertion(
&mut self,
incoming_entry: &DocumentType<&'a Entry<'a>>,
) -> Result<usize, HunkInsertionError> {
let (mut last_idx, new_hunk) = match incoming_entry {
DocumentType::Old(_) => (self.last_old, DocumentType::Old(Hunk::new())),
DocumentType::New(_) => (self.last_new, DocumentType::New(Hunk::new())),
};
match last_idx {
None => {
self.hunks.0.push(new_hunk);
last_idx = Some(self.hunks.0.len() - 1);
}
Some(idx) => {
let last_hunk = self.hunks.0[idx].as_ref();
let last_line = last_hunk.last_line();
if let Some(last_line) = last_line {
let incoming_line = incoming_entry.as_ref().end_position.row;
if incoming_line < last_line {
return Err(HunkInsertionError::PriorLine {
incoming_line,
last_line,
});
}
if incoming_line - last_line > 1 {
self.hunks.0.push(new_hunk);
last_idx = Some(self.hunks.0.len() - 1);
}
}
}
};
match incoming_entry {
DocumentType::Old(_) => self.last_old = last_idx,
DocumentType::New(_) => self.last_new = last_idx,
}
Ok(last_idx.unwrap())
}
pub fn push_back(&mut self, entry_wrapper: DocumentType<&'a Entry<'a>>) -> Result<()> {
let insertion_idx = self.get_hunk_for_insertion(&entry_wrapper)?;
self.hunks.0[insertion_idx]
.as_mut()
.push_back(entry_wrapper.consume())?;
Ok(())
}
}
impl<'a> Default for RichHunksBuilder<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> Hunks<'a> {
#[must_use]
pub fn new() -> Self {
Hunks(Vec::new())
}
pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<()> {
if let Some(hunk) = self.0.last_mut() {
match hunk.can_push_back(entry) {
Ok(()) => hunk.push_back(entry),
Err(HunkInsertionError::NonAdjacentHunk {
incoming_line: _,
last_line: _,
}) => {
let mut hunk = Hunk::new();
hunk.push_back(entry)?;
self.0.push(hunk);
Ok(())
}
Err(e) => Err(e),
}?;
} else {
self.0.push(Hunk::new());
self.0.last_mut().unwrap().push_back(entry)?;
}
Ok(())
}
}
impl<'a> Default for Hunks<'a> {
fn default() -> Self {
Self::new()
}
}
pub struct HunkAppender<'a>(pub Hunks<'a>);
impl<'a> FromIterator<&'a Entry<'a>> for HunkAppender<'a> {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = &'a Entry<'a>>,
{
let mut hunks = Hunks::new();
for i in iter {
hunks.push_back(i).expect("Invalid iterator");
}
HunkAppender(hunks)
}
}
pub trait Engine<'elem, T>
where
T: Eq + 'elem,
{
type Container;
fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container;
}
#[derive(Eq, PartialEq, Copy, Clone, Debug, Default)]
pub struct Myers {}
impl<'elem, T> Engine<'elem, T> for Myers
where
T: Eq + 'elem + std::fmt::Debug,
{
type Container = Vec<EditType<&'elem T>>;
fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container {
let mut res = Vec::with_capacity(a.len() + b.len());
let mut frontiers = MyersFrontiers::new(a.len(), b.len());
Myers::diff_impl(&mut res, a, 0..a.len(), b, 0..b.len(), &mut frontiers);
res
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct MidSnakeInfo {
pub a_split: i32,
pub b_split: i32,
pub optimal_len: u32,
}
fn split_range(r: &Range<usize>, idx: usize) -> (Range<usize>, Range<usize>) {
(r.start..idx, idx..r.end)
}
pub struct MyersFrontiers {
pub forward: NegIdxVec<i32>,
pub reverse: NegIdxVec<i32>,
}
impl MyersFrontiers {
fn new(old_len: usize, new_len: usize) -> Self {
let midpoint = ((old_len + new_len) as f32 / 2.00).ceil() as i32 + 1;
let vec_length = (midpoint * 2) as usize;
MyersFrontiers {
forward: vec![0; vec_length].into(),
reverse: vec![0; vec_length].into(),
}
}
}
impl Myers {
fn diff_impl<'elem, T: Eq + Debug + 'elem>(
res: &mut Vec<EditType<&'elem T>>,
old: &'elem [T],
mut old_range: Range<usize>,
new: &'elem [T],
mut new_range: Range<usize>,
frontiers: &mut MyersFrontiers,
) {
let common_pref_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
old_range.start += common_pref_len;
new_range.start += common_pref_len;
let common_suf_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
old_range.end = old_range.start.max(old_range.end - common_suf_len);
new_range.end = new_range.start.max(new_range.end - common_suf_len);
if old_range.is_empty() && new_range.is_empty() {
return;
}
if old_range.is_empty() {
for i in new_range {
res.push(EditType::Addition(&new[i]));
}
return;
}
if new_range.is_empty() {
for i in old_range {
res.push(EditType::Deletion(&old[i]));
}
return;
}
let Coordinates {
old: x_start,
new: y_start,
} = Myers::middle_snake(old, old_range.clone(), new, new_range.clone(), frontiers);
let (old_first_half, old_second_half) = split_range(&old_range, x_start);
let (new_first_half, new_second_half) = split_range(&new_range, y_start);
Myers::diff_impl(res, old, old_first_half, new, new_first_half, frontiers);
Myers::diff_impl(res, old, old_second_half, new, new_second_half, frontiers);
}
fn middle_snake<T: Eq>(
old: &[T],
old_range: Range<usize>,
new: &[T],
new_range: Range<usize>,
frontiers: &mut MyersFrontiers,
) -> Coordinates<usize> {
let n = old_range.len() as i32;
let m = new_range.len() as i32;
let delta = n - m;
let is_odd = delta % 2 == 1;
let midpoint = ((m + n) as f32 / 2.00).ceil() as i32 + 1;
let fwd_front = &mut frontiers.forward;
let rev_front = &mut frontiers.reverse;
fwd_front[1] = 0;
rev_front[1] = 0;
for d in 0..=midpoint {
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && fwd_front[k + 1] >= fwd_front[k - 1]) {
fwd_front[k + 1]
} else {
fwd_front[k - 1] + 1
};
let y = x - k;
let (x0, y0) = (x, y);
if x < n && y < m {
debug_assert!(x >= 0);
debug_assert!(y >= 0);
let common_pref_len = common_prefix_len(
old,
old_range.start + (x as usize)..old_range.end,
new,
new_range.start + (y as usize)..new_range.end,
);
x += common_pref_len as i32;
}
fwd_front[k] = x;
if is_odd && (k - delta).abs() < d {
let reverse_x = rev_front[-(k - delta)];
let old = (old_range.start as i32) + x0;
let new = (new_range.start as i32) + y0;
debug_assert!(
old >= (old_range.start as i32) && old <= (old_range.end as i32),
"expected old={} in {}..{}",
old,
old_range.start,
old_range.end,
);
debug_assert!(
new >= (new_range.start as i32) && new <= (new_range.end as i32),
"expected new={} in {}..{}",
new,
new_range.start,
new_range.end,
);
if x + reverse_x >= n {
return Coordinates {
old: old as usize,
new: new as usize,
};
}
}
}
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && rev_front[k + 1] >= rev_front[k - 1]) {
rev_front[k + 1]
} else {
rev_front[k - 1] + 1
};
let mut y = x - k;
if x < n && y < m {
debug_assert!(x >= 0);
debug_assert!(y >= 0);
debug_assert!(n - x >= 0);
debug_assert!(m - y >= 0);
let common_suf_len = common_suffix_len(
old,
old_range.start..old_range.start + (n as usize) - (x as usize),
new,
new_range.start..new_range.start + (m as usize) - (y as usize),
);
x += common_suf_len as i32;
y += common_suf_len as i32;
}
rev_front[k] = x;
if !is_odd && (k - delta).abs() <= d {
let forward_x = fwd_front[-(k - delta)];
if forward_x + x >= n {
let old = n - x + (old_range.start as i32);
let new = m - y + (new_range.start as i32);
debug_assert!(
old >= (old_range.start as i32) && old <= (old_range.end as i32),
"expected old={} in {}..{}",
old,
old_range.start,
old_range.end,
);
debug_assert!(
new >= (new_range.start as i32) && new <= (new_range.end as i32),
"expected new={} in {}..{}",
new,
new_range.start,
new_range.end,
);
return Coordinates {
old: old as usize,
new: new as usize,
};
}
}
}
}
unreachable!();
}
}
impl<'a> TryFrom<Vec<EditType<&'a Entry<'a>>>> for RichHunks<'a> {
type Error = anyhow::Error;
fn try_from(edits: Vec<EditType<&'a Entry<'a>>>) -> Result<Self, Self::Error> {
let mut builder = RichHunksBuilder::new();
for edit_wrapper in edits {
let edit = match edit_wrapper {
EditType::Addition(edit) => DocumentType::New(edit),
EditType::Deletion(edit) => DocumentType::Old(edit),
};
builder.push_back(edit)?;
}
Ok(builder.build())
}
}
#[time("info", "diff::{}")]
pub fn compute_edit_script<'a>(
old: &'a [Entry<'a>],
new: &'a [Entry<'a>],
) -> Result<RichHunks<'a>> {
let myers = Myers::default();
let edit_script = myers.diff(old, new);
RichHunks::try_from(edit_script)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq as p_assert_eq;
use test_case::test_case;
fn myers_diff<'a, T>(a: &'a [T], b: &'a [T]) -> Vec<EditType<&'a T>>
where
T: 'a + Eq + Debug,
{
let myers = Myers::default();
myers.diff(a, b)
}
#[test]
fn mid_snake_empty_input() {
let input_a = b"";
let input_b = b"";
let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
let mid_snake = Myers::middle_snake(
&input_a[..],
0..input_a.len(),
&input_b[..],
0..input_b.len(),
&mut frontiers,
);
let expected = Coordinates { old: 0, new: 0 };
p_assert_eq!(expected, mid_snake);
}
#[test]
fn mid_snake() {
let input_a = &b"ABCABBA"[..];
let input_b = &b"CBABAC"[..];
let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
let mid_snake = Myers::middle_snake(
input_a,
0..input_a.len(),
input_b,
0..input_b.len(),
&mut frontiers,
);
let expected = Coordinates { old: 4, new: 1 };
p_assert_eq!(expected, mid_snake);
}
#[test]
fn myers_diff_empty_inputs() {
let input_a: Vec<i32> = vec![];
let input_b: Vec<i32> = vec![];
let edit_script = myers_diff(&input_a, &input_b);
assert!(edit_script.is_empty());
}
#[test]
fn myers_diff_no_diff() {
let input_a: Vec<i32> = vec![0; 4];
let input_b: Vec<i32> = vec![0; 4];
let edit_script = myers_diff(&input_a, &input_b);
assert!(edit_script.is_empty());
}
#[test]
fn myers_diff_one_addition() {
let input_a: Vec<i32> = Vec::new();
let input_b: Vec<i32> = vec![0];
let expected = vec![EditType::Addition(&input_b[0])];
let edit_script = myers_diff(&input_a, &input_b);
p_assert_eq!(expected, edit_script);
}
#[test]
fn myers_diff_one_deletion() {
let input_a: Vec<i32> = vec![0];
let input_b: Vec<i32> = Vec::new();
let expected = vec![EditType::Deletion(&input_a[0])];
let edit_script = myers_diff(&input_a, &input_b);
p_assert_eq!(expected, edit_script);
}
#[test]
fn myers_diff_single_substitution() {
let myers = Myers::default();
let input_a = [1];
let input_b = [2];
let edit_script = myers.diff(&input_a[..], &input_b[..]);
let expected = vec![
EditType::Addition(&input_b[0]),
EditType::Deletion(&input_a[0]),
];
p_assert_eq!(expected, edit_script);
}
#[test]
fn myers_diff_single_substitution_with_common_elements() {
let myers = Myers::default();
let input_a = [0, 0, 0];
let input_b = [0, 1, 0];
let edit_script = myers.diff(&input_a[..], &input_b[..]);
let expected = vec![
EditType::Addition(&input_b[1]),
EditType::Deletion(&input_a[1]),
];
p_assert_eq!(expected, edit_script);
}
#[test_case(b"BAAA", b"CAAA" => 0 ; "no common prefix")]
#[test_case(b"AAABA", b"AAACA" => 3 ; "with common prefix")]
fn common_prefix(a: &[u8], b: &[u8]) -> usize {
common_prefix_len(a, 0..a.len(), b, 0..b.len())
}
#[test_case(b"AAAB", b"AAAC" => 0 ; "no common suffix")]
#[test_case(b"ABAAA", b"ACAAA" => 3 ; "with common suffix")]
fn common_suffix(a: &[u8], b: &[u8]) -> usize {
common_suffix_len(a, 0..a.len(), b, 0..b.len())
}
}