use crate::{Change, Diff, DiffConfig, Path};
use serde_value::Value;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SequenceDiffAlgorithm {
#[default]
IndexBased,
Myers,
Patience,
}
pub trait SequenceDiffer {
fn diff_sequences(&self, old: &[Value], new: &[Value], path: Path, config: &DiffConfig)
-> Diff;
}
pub fn diff_sequences_with_algorithm(
old: &[Value],
new: &[Value],
path: Path,
config: &DiffConfig,
algorithm: SequenceDiffAlgorithm,
) -> Diff {
match algorithm {
SequenceDiffAlgorithm::IndexBased => {
IndexBasedDiffer.diff_sequences(old, new, path, config)
}
SequenceDiffAlgorithm::Myers => MyersDiffer.diff_sequences(old, new, path, config),
SequenceDiffAlgorithm::Patience => PatienceDiffer.diff_sequences(old, new, path, config),
}
}
struct IndexBasedDiffer;
impl SequenceDiffer for IndexBasedDiffer {
fn diff_sequences(
&self,
old: &[Value],
new: &[Value],
path: Path,
config: &DiffConfig,
) -> Diff {
let mut diff = Diff::new();
let max_items = config.get_collection_limit();
if old.len() > max_items || new.len() > max_items {
diff.insert(
path,
Change::Elided {
reason: format!(
"collection too large (old: {}, new: {})",
old.len(),
new.len()
),
count: old.len().max(new.len()),
},
);
return diff;
}
let max_len = old.len().max(new.len());
for i in 0..max_len {
let idx_path = path.index(i);
match (old.get(i), new.get(i)) {
(Some(old_val), Some(new_val)) => {
diff.merge(crate::diff::diff_values(old_val, new_val, idx_path, config));
}
(Some(old_val), None) => {
diff.insert(idx_path, Change::Removed(old_val.clone()));
}
(None, Some(new_val)) => {
diff.insert(idx_path, Change::Added(new_val.clone()));
}
(None, None) => unreachable!(),
}
}
diff
}
}
struct MyersDiffer;
impl SequenceDiffer for MyersDiffer {
fn diff_sequences(
&self,
old: &[Value],
new: &[Value],
path: Path,
config: &DiffConfig,
) -> Diff {
let mut diff = Diff::new();
let max_items = config.get_collection_limit();
if old.len() > max_items || new.len() > max_items {
diff.insert(
path,
Change::Elided {
reason: format!(
"collection too large (old: {}, new: {})",
old.len(),
new.len()
),
count: old.len().max(new.len()),
},
);
return diff;
}
let edits = myers_diff(old, new);
for edit in edits {
match edit {
Edit::Insert { new_index, value } => {
diff.insert(path.index(new_index), Change::Added(value.clone()));
}
Edit::Delete { old_index, value } => {
diff.insert(path.index(old_index), Change::Removed(value.clone()));
}
Edit::Equal {
old_index,
new_index,
value: _,
} => {
let old_val = &old[old_index];
let new_val = &new[new_index];
if old_val != new_val {
diff.merge(crate::diff::diff_values(
old_val,
new_val,
path.index(new_index),
config,
));
}
}
}
}
diff
}
}
struct PatienceDiffer;
impl SequenceDiffer for PatienceDiffer {
fn diff_sequences(
&self,
old: &[Value],
new: &[Value],
path: Path,
config: &DiffConfig,
) -> Diff {
let mut diff = Diff::new();
let max_items = config.get_collection_limit();
if old.len() > max_items || new.len() > max_items {
diff.insert(
path,
Change::Elided {
reason: format!(
"collection too large (old: {}, new: {})",
old.len(),
new.len()
),
count: old.len().max(new.len()),
},
);
return diff;
}
let edits = patience_diff(old, new);
for edit in edits {
match edit {
Edit::Insert { new_index, value } => {
diff.insert(path.index(new_index), Change::Added(value.clone()));
}
Edit::Delete { old_index, value } => {
diff.insert(path.index(old_index), Change::Removed(value.clone()));
}
Edit::Equal {
old_index,
new_index,
value: _,
} => {
let old_val = &old[old_index];
let new_val = &new[new_index];
if old_val != new_val {
diff.merge(crate::diff::diff_values(
old_val,
new_val,
path.index(new_index),
config,
));
}
}
}
}
diff
}
}
#[derive(Debug, Clone)]
enum Edit<'a> {
Insert {
new_index: usize,
value: &'a Value,
},
Delete {
old_index: usize,
value: &'a Value,
},
Equal {
old_index: usize,
new_index: usize,
value: &'a Value,
},
}
fn myers_diff<'a>(old: &'a [Value], new: &'a [Value]) -> Vec<Edit<'a>> {
let n = old.len();
let m = new.len();
if n == 0 && m == 0 {
return vec![];
}
if n == 0 {
return new
.iter()
.enumerate()
.map(|(i, v)| Edit::Insert {
new_index: i,
value: v,
})
.collect();
}
if m == 0 {
return old
.iter()
.enumerate()
.map(|(i, v)| Edit::Delete {
old_index: i,
value: v,
})
.collect();
}
let max = n + m;
let mut v: Vec<isize> = vec![0; 2 * max + 1];
let offset = max as isize;
let mut trace: Vec<Vec<isize>> = vec![];
for d in 0..=max {
trace.push(v.clone());
for k in (-(d as isize)..=(d as isize)).step_by(2) {
let mut x = if k == -(d as isize)
|| (k != d as isize && v[(offset + k - 1) as usize] < v[(offset + k + 1) as usize])
{
v[(offset + k + 1) as usize]
} else {
v[(offset + k - 1) as usize] + 1
};
let mut y = x - k;
while x < n as isize && y < m as isize && old[x as usize] == new[y as usize] {
x += 1;
y += 1;
}
v[(offset + k) as usize] = x;
if x >= n as isize && y >= m as isize {
return backtrack_myers(&trace, old, new, d);
}
}
}
vec![]
}
fn backtrack_myers<'a>(
trace: &[Vec<isize>],
old: &'a [Value],
new: &'a [Value],
d: usize,
) -> Vec<Edit<'a>> {
let mut edits = vec![];
let n = old.len() as isize;
let m = new.len() as isize;
let max = (old.len() + new.len()) as isize;
let offset = max;
let mut x = n;
let mut y = m;
for depth in (0..=d).rev() {
let v = &trace[depth];
let k = x - y;
let prev_k = if k == -(depth as isize)
|| (k != depth as isize && v[(offset + k - 1) as usize] < v[(offset + k + 1) as usize])
{
k + 1
} else {
k - 1
};
let prev_x = v[(offset + prev_k) as usize];
let prev_y = prev_x - prev_k;
while x > prev_x && y > prev_y {
x -= 1;
y -= 1;
edits.push(Edit::Equal {
old_index: x as usize,
new_index: y as usize,
value: &old[x as usize],
});
}
if depth > 0 {
if x == prev_x {
y -= 1;
edits.push(Edit::Insert {
new_index: y as usize,
value: &new[y as usize],
});
} else {
x -= 1;
edits.push(Edit::Delete {
old_index: x as usize,
value: &old[x as usize],
});
}
}
}
edits.reverse();
edits
}
fn patience_diff<'a>(old: &'a [Value], new: &'a [Value]) -> Vec<Edit<'a>> {
let unique_common = find_unique_common(old, new);
if unique_common.is_empty() {
return myers_diff(old, new);
}
let mut edits = vec![];
let mut old_pos = 0;
let mut new_pos = 0;
for (old_idx, new_idx) in unique_common {
if old_pos < old_idx || new_pos < new_idx {
let section_edits = myers_diff(&old[old_pos..old_idx], &new[new_pos..new_idx]);
for edit in section_edits {
match edit {
Edit::Insert { new_index, value } => edits.push(Edit::Insert {
new_index: new_pos + new_index,
value,
}),
Edit::Delete { old_index, value } => edits.push(Edit::Delete {
old_index: old_pos + old_index,
value,
}),
Edit::Equal {
old_index,
new_index,
value,
} => edits.push(Edit::Equal {
old_index: old_pos + old_index,
new_index: new_pos + new_index,
value,
}),
}
}
}
edits.push(Edit::Equal {
old_index: old_idx,
new_index: new_idx,
value: &old[old_idx],
});
old_pos = old_idx + 1;
new_pos = new_idx + 1;
}
if old_pos < old.len() || new_pos < new.len() {
let section_edits = myers_diff(&old[old_pos..], &new[new_pos..]);
for edit in section_edits {
match edit {
Edit::Insert { new_index, value } => edits.push(Edit::Insert {
new_index: new_pos + new_index,
value,
}),
Edit::Delete { old_index, value } => edits.push(Edit::Delete {
old_index: old_pos + old_index,
value,
}),
Edit::Equal {
old_index,
new_index,
value,
} => edits.push(Edit::Equal {
old_index: old_pos + old_index,
new_index: new_pos + new_index,
value,
}),
}
}
}
edits
}
fn find_unique_common(old: &[Value], new: &[Value]) -> Vec<(usize, usize)> {
use std::collections::HashMap;
let mut old_counts: HashMap<&Value, usize> = HashMap::new();
for val in old {
*old_counts.entry(val).or_insert(0) += 1;
}
let mut new_counts: HashMap<&Value, usize> = HashMap::new();
for val in new {
*new_counts.entry(val).or_insert(0) += 1;
}
let mut unique_in_old: HashMap<&Value, usize> = HashMap::new();
for (val, count) in &old_counts {
if *count == 1 && new_counts.get(val) == Some(&1) {
if let Some(pos) = old.iter().position(|v| v == *val) {
unique_in_old.insert(val, pos);
}
}
}
let mut pairs = vec![];
for (new_idx, val) in new.iter().enumerate() {
if let Some(&old_idx) = unique_in_old.get(&val) {
pairs.push((old_idx, new_idx));
}
}
let mut lis: Vec<(usize, usize)> = vec![];
for (old_idx, new_idx) in pairs {
if lis.is_empty() || (old_idx > lis.last().unwrap().0 && new_idx > lis.last().unwrap().1) {
lis.push((old_idx, new_idx));
}
}
lis
}
#[cfg(test)]
mod tests {
use super::*;
use serde_value::Value;
#[test]
fn test_index_based_simple() {
let old = vec![Value::I64(1), Value::I64(2), Value::I64(3)];
let new = vec![Value::I64(1), Value::I64(2), Value::I64(4)];
let config = DiffConfig::default();
let diff = IndexBasedDiffer.diff_sequences(&old, &new, Path::root(), &config);
assert!(!diff.is_empty());
}
#[test]
fn test_myers_insertion() {
let old = vec![Value::I64(1), Value::I64(2), Value::I64(3)];
let new = vec![Value::I64(1), Value::I64(5), Value::I64(2), Value::I64(3)];
let edits = myers_diff(&old, &new);
let has_insert = edits.iter().any(|e| matches!(e, Edit::Insert { .. }));
assert!(has_insert);
}
#[test]
fn test_patience_unique_elements() {
let old = vec![Value::I64(1), Value::I64(2), Value::I64(3)];
let new = vec![Value::I64(2), Value::I64(3), Value::I64(4), Value::I64(1)];
let unique = find_unique_common(&old, &new);
assert!(!unique.is_empty());
}
}