#![allow(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::many_single_char_names,
clippy::cast_lossless,
clippy::items_after_statements,
clippy::too_many_lines
)]
use std::{collections::HashSet, hash::Hash};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiffTag {
Equal,
Replace,
Delete,
Insert,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct DiffChunk {
pub tag: DiffTag,
pub start_a: usize,
pub end_a: usize,
pub start_b: usize,
pub end_b: usize,
}
pub fn diff<T: Eq + Hash>(a: &[T], b: &[T]) -> Vec<DiffChunk> {
if a.is_empty() && b.is_empty() {
return vec![];
}
let prefix = common_prefix_len(a, b);
let suffix = common_suffix_len(&a[prefix..], &b[prefix..]);
let a_trimmed = &a[prefix..a.len() - suffix];
let b_trimmed = &b[prefix..b.len() - suffix];
let (a_filtered, aindex, b_filtered, bindex, discarded) =
discard_nonmatching(a_trimmed, b_trimmed);
let mut arena = Vec::new();
let lastsnake = if discarded {
myers_core(&a_filtered, &b_filtered, &mut arena)
} else {
myers_core(a_trimmed, b_trimmed, &mut arena)
};
let mut blocks = build_matching_blocks(
lastsnake,
&arena,
prefix,
suffix,
&aindex,
&bindex,
discarded,
a.len(),
b.len(),
);
postprocess(&mut blocks, a, b);
blocks_to_opcodes(&blocks)
}
#[must_use]
pub fn diff_lines(a: &str, b: &str) -> Vec<DiffChunk> {
let a_lines: Vec<&str> = a.lines().collect();
let b_lines: Vec<&str> = b.lines().collect();
diff(&a_lines, &b_lines)
}
#[derive(Debug)]
pub struct Token<'a> {
pub text: &'a str,
pub offset: usize,
}
#[must_use]
pub fn tokenize(s: &str) -> Vec<Token<'_>> {
let mut tokens = Vec::new();
let mut i = 0;
while i < s.len() {
let ch = s[i..].chars().next().unwrap();
if ch.is_alphanumeric() || ch == '_' {
let start = i;
while i < s.len() {
let c = s[i..].chars().next().unwrap();
if c.is_alphanumeric() || c == '_' {
i += c.len_utf8();
} else {
break;
}
}
tokens.push(Token {
text: &s[start..i],
offset: start,
});
} else if ch.is_whitespace() {
let start = i;
while i < s.len() {
let c = s[i..].chars().next().unwrap();
if c.is_whitespace() {
i += c.len_utf8();
} else {
break;
}
}
tokens.push(Token {
text: &s[start..i],
offset: start,
});
} else {
let len = ch.len_utf8();
tokens.push(Token {
text: &s[i..i + len],
offset: i,
});
i += len;
}
}
tokens
}
#[must_use]
pub fn diff_words<'a>(a: &'a str, b: &'a str) -> (Vec<Token<'a>>, Vec<Token<'a>>, Vec<DiffChunk>) {
let toks_a = tokenize(a);
let toks_b = tokenize(b);
let words_a: Vec<&str> = toks_a.iter().map(|t| t.text).collect();
let words_b: Vec<&str> = toks_b.iter().map(|t| t.text).collect();
let chunks = diff(&words_a, &words_b);
(toks_a, toks_b, chunks)
}
#[derive(Debug, Clone, Copy)]
struct MatchingBlock {
a: usize,
b: usize,
len: usize,
}
#[derive(Debug)]
struct SnakeNode {
parent: Option<usize>,
x: usize,
y: usize,
len: usize,
}
struct Fp {
data: Vec<(isize, Option<usize>)>,
}
impl Fp {
fn new(size: usize) -> Self {
Fp {
data: vec![(-1, None); size],
}
}
fn get(&self, km: isize) -> (isize, Option<usize>) {
if km >= 0 && (km as usize) < self.data.len() {
self.data[km as usize]
} else {
(-1, None)
}
}
fn set(&mut self, km: isize, val: (isize, Option<usize>)) {
if km >= 0 && (km as usize) < self.data.len() {
self.data[km as usize] = val;
}
}
}
fn common_prefix_len<T: PartialEq>(a: &[T], b: &[T]) -> usize {
a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
}
fn common_suffix_len<T: PartialEq>(a: &[T], b: &[T]) -> usize {
a.iter()
.rev()
.zip(b.iter().rev())
.take_while(|(x, y)| x == y)
.count()
}
fn discard_nonmatching<'a, T: Eq + Hash>(
a: &'a [T],
b: &'a [T],
) -> (Vec<&'a T>, Vec<usize>, Vec<&'a T>, Vec<usize>, bool) {
if a.is_empty() || b.is_empty() {
return (vec![], vec![], vec![], vec![], false);
}
fn index_matching<'a, T: Eq + Hash>(
needle_set: &HashSet<&T>,
haystack: &'a [T],
) -> (Vec<&'a T>, Vec<usize>) {
let mut matches = Vec::new();
let mut index = Vec::new();
for (i, item) in haystack.iter().enumerate() {
if needle_set.contains(item) {
matches.push(item);
index.push(i);
}
}
(matches, index)
}
let aset: HashSet<&T> = a.iter().collect();
let bset: HashSet<&T> = b.iter().collect();
let (b_filtered, bindex) = index_matching(&aset, b);
let (a_filtered, aindex) = index_matching(&bset, a);
let discarded = (b.len() - b_filtered.len() > 10) || (a.len() - a_filtered.len() > 10);
if discarded {
(a_filtered, aindex, b_filtered, bindex, true)
} else {
(vec![], vec![], vec![], vec![], false)
}
}
fn myers_core<T: PartialEq>(a: &[T], b: &[T], arena: &mut Vec<SnakeNode>) -> Option<usize> {
let m = a.len();
let n = b.len();
if m == 0 || n == 0 {
return None;
}
let middle = (m + 1) as isize;
let delta = n as isize - m as isize + middle;
let dmin = middle.min(delta);
let dmax = middle.max(delta);
let size = n + m + 2;
let mut fp = Fp::new(size);
for p in 0isize.. {
let mut yv: isize = -1;
let mut node_v: Option<usize> = None;
{
let mut km = dmin - p;
while km < delta {
let t = fp.get(km + 1);
if yv < t.0 {
yv = t.0;
node_v = t.1;
} else {
yv += 1;
}
let x = yv - km + middle;
if x >= 0
&& (x as usize) < m
&& yv >= 0
&& (yv as usize) < n
&& a[x as usize] == b[yv as usize]
{
let snake_x = x as usize;
let mut xi = x as usize + 1;
let mut yi = yv as usize + 1;
while xi < m && yi < n && a[xi] == b[yi] {
xi += 1;
yi += 1;
}
let snake_len = xi - snake_x;
arena.push(SnakeNode {
parent: node_v,
x: snake_x,
y: yv as usize,
len: snake_len,
});
node_v = Some(arena.len() - 1);
yv = yi as isize;
}
fp.set(km, (yv, node_v));
km += 1;
}
}
let mut yh: isize = -1;
let mut node_h: Option<usize> = None;
{
let mut km = dmax + p;
while km > delta {
let t = fp.get(km - 1);
if yh <= t.0 {
yh = t.0;
node_h = t.1;
yh += 1;
}
let x = yh - km + middle;
if x >= 0
&& (x as usize) < m
&& yh >= 0
&& (yh as usize) < n
&& a[x as usize] == b[yh as usize]
{
let snake_x = x as usize;
let mut xi = x as usize + 1;
let mut yi = yh as usize + 1;
while xi < m && yi < n && a[xi] == b[yi] {
xi += 1;
yi += 1;
}
let snake_len = xi - snake_x;
arena.push(SnakeNode {
parent: node_h,
x: snake_x,
y: yh as usize,
len: snake_len,
});
node_h = Some(arena.len() - 1);
yh = yi as isize;
}
fp.set(km, (yh, node_h));
km -= 1;
}
}
let (mut y, mut node) = if yv < yh {
fp.get(delta + 1)
} else {
let t = fp.get(delta - 1);
(t.0 + 1, t.1)
};
let x = y - delta + middle;
if x >= 0
&& (x as usize) < m
&& y >= 0
&& (y as usize) < n
&& a[x as usize] == b[y as usize]
{
let snake_x = x as usize;
let mut xi = x as usize + 1;
let mut yi = y as usize + 1;
while xi < m && yi < n && a[xi] == b[yi] {
xi += 1;
yi += 1;
}
let snake_len = xi - snake_x;
arena.push(SnakeNode {
parent: node,
x: snake_x,
y: y as usize,
len: snake_len,
});
node = Some(arena.len() - 1);
y = yi as isize;
}
fp.set(delta, (y, node));
if y >= n as isize {
return node;
}
}
unreachable!()
}
#[allow(clippy::too_many_arguments)]
fn build_matching_blocks(
lastsnake: Option<usize>,
arena: &[SnakeNode],
common_prefix: usize,
common_suffix: usize,
aindex: &[usize],
bindex: &[usize],
lines_discarded: bool,
total_a: usize,
total_b: usize,
) -> Vec<MatchingBlock> {
let mut blocks = Vec::new();
let mut current = lastsnake;
while let Some(idx) = current {
let node = &arena[idx];
let snake = node.len;
current = node.parent;
if lines_discarded {
let mut xi = node.x + snake - 1;
let mut yi = node.y + snake - 1;
let mut xprev = aindex[xi] + common_prefix;
let mut yprev = bindex[yi] + common_prefix;
if snake > 1 {
let mut newsnake = 1usize;
for _ in 1..snake {
xi -= 1;
yi -= 1;
let xnext = aindex[xi] + common_prefix;
let ynext = bindex[yi] + common_prefix;
if xprev - xnext != 1 || yprev - ynext != 1 {
blocks.push(MatchingBlock {
a: xprev,
b: yprev,
len: newsnake,
});
newsnake = 0;
}
xprev = xnext;
yprev = ynext;
newsnake += 1;
}
blocks.push(MatchingBlock {
a: xprev,
b: yprev,
len: newsnake,
});
} else {
blocks.push(MatchingBlock {
a: xprev,
b: yprev,
len: snake,
});
}
} else {
blocks.push(MatchingBlock {
a: node.x + common_prefix,
b: node.y + common_prefix,
len: snake,
});
}
}
blocks.reverse();
if common_prefix > 0 {
blocks.insert(
0,
MatchingBlock {
a: 0,
b: 0,
len: common_prefix,
},
);
}
if common_suffix > 0 {
blocks.push(MatchingBlock {
a: total_a - common_suffix,
b: total_b - common_suffix,
len: common_suffix,
});
}
blocks.push(MatchingBlock {
a: total_a,
b: total_b,
len: 0,
});
blocks
}
fn postprocess<T: PartialEq>(blocks: &mut Vec<MatchingBlock>, a: &[T], b: &[T]) {
let mut result = vec![*blocks.last().unwrap()]; let mut i = blocks.len() as isize - 2;
while i >= 0 {
let mut cur = blocks[i as usize];
i -= 1;
while i >= 0 {
let prev = blocks[i as usize];
if (prev.b + prev.len == cur.b || prev.a + prev.len == cur.a)
&& cur.a >= prev.len
&& cur.b >= prev.len
{
let slice_a = &a[cur.a - prev.len..cur.a];
let slice_b = &b[cur.b - prev.len..cur.b];
if slice_a == slice_b {
cur.a -= prev.len;
cur.b -= prev.len;
cur.len += prev.len;
i -= 1;
continue;
}
}
break;
}
result.push(cur);
}
result.reverse();
*blocks = result;
}
fn blocks_to_opcodes(blocks: &[MatchingBlock]) -> Vec<DiffChunk> {
let mut opcodes = Vec::new();
let mut i = 0usize;
let mut j = 0usize;
for block in blocks {
let ai = block.a;
let bj = block.b;
let size = block.len;
if i < ai && j < bj {
opcodes.push(DiffChunk {
tag: DiffTag::Replace,
start_a: i,
end_a: ai,
start_b: j,
end_b: bj,
});
} else if i < ai {
opcodes.push(DiffChunk {
tag: DiffTag::Delete,
start_a: i,
end_a: ai,
start_b: j,
end_b: bj,
});
} else if j < bj {
opcodes.push(DiffChunk {
tag: DiffTag::Insert,
start_a: i,
end_a: ai,
start_b: j,
end_b: bj,
});
}
i = ai + size;
j = bj + size;
if size > 0 {
opcodes.push(DiffChunk {
tag: DiffTag::Equal,
start_a: ai,
end_a: i,
start_b: bj,
end_b: j,
});
}
}
opcodes
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_both_empty() {
let a: Vec<&str> = vec![];
let b: Vec<&str> = vec![];
assert!(diff(&a, &b).is_empty());
}
#[test]
fn test_identical() {
let a = vec!["a", "b", "c"];
let chunks = diff(&a, &a);
assert_eq!(
chunks,
vec![DiffChunk {
tag: DiffTag::Equal,
start_a: 0,
end_a: 3,
start_b: 0,
end_b: 3,
}]
);
}
#[test]
fn test_completely_different() {
let a = vec!["a", "b"];
let b = vec!["c", "d"];
let chunks = diff(&a, &b);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].tag, DiffTag::Replace);
}
#[test]
fn test_empty_a() {
let a: Vec<&str> = vec![];
let b = vec!["a", "b"];
let chunks = diff(&a, &b);
assert_eq!(
chunks,
vec![DiffChunk {
tag: DiffTag::Insert,
start_a: 0,
end_a: 0,
start_b: 0,
end_b: 2,
}]
);
}
#[test]
fn test_empty_b() {
let a = vec!["a", "b"];
let b: Vec<&str> = vec![];
let chunks = diff(&a, &b);
assert_eq!(
chunks,
vec![DiffChunk {
tag: DiffTag::Delete,
start_a: 0,
end_a: 2,
start_b: 0,
end_b: 0,
}]
);
}
#[test]
fn test_insert_at_end() {
let a = vec!["a", "b"];
let b = vec!["a", "b", "c"];
let chunks = diff(&a, &b);
assert_eq!(
chunks,
vec![
DiffChunk {
tag: DiffTag::Equal,
start_a: 0,
end_a: 2,
start_b: 0,
end_b: 2
},
DiffChunk {
tag: DiffTag::Insert,
start_a: 2,
end_a: 2,
start_b: 2,
end_b: 3
},
]
);
}
#[test]
fn test_insert_at_start() {
let a = vec!["b", "c"];
let b = vec!["a", "b", "c"];
let chunks = diff(&a, &b);
assert_eq!(
chunks,
vec![
DiffChunk {
tag: DiffTag::Insert,
start_a: 0,
end_a: 0,
start_b: 0,
end_b: 1
},
DiffChunk {
tag: DiffTag::Equal,
start_a: 0,
end_a: 2,
start_b: 1,
end_b: 3
},
]
);
}
#[test]
fn test_delete_from_middle() {
let a = vec!["a", "b", "c"];
let b = vec!["a", "c"];
let chunks = diff(&a, &b);
assert_eq!(
chunks,
vec![
DiffChunk {
tag: DiffTag::Equal,
start_a: 0,
end_a: 1,
start_b: 0,
end_b: 1
},
DiffChunk {
tag: DiffTag::Delete,
start_a: 1,
end_a: 2,
start_b: 1,
end_b: 1
},
DiffChunk {
tag: DiffTag::Equal,
start_a: 2,
end_a: 3,
start_b: 1,
end_b: 2
},
]
);
}
#[test]
fn test_replace_in_middle() {
let a = vec!["a", "b", "c"];
let b = vec!["a", "X", "c"];
let chunks = diff(&a, &b);
assert_eq!(chunks.len(), 3);
assert_eq!(chunks[0].tag, DiffTag::Equal);
assert_eq!(chunks[1].tag, DiffTag::Replace);
assert_eq!(chunks[2].tag, DiffTag::Equal);
}
#[test]
fn test_diff_lines() {
let a = "line1\nline2\nline3";
let b = "line1\nchanged\nline3";
let chunks = diff_lines(a, b);
assert_eq!(chunks.len(), 3);
assert_eq!(chunks[0].tag, DiffTag::Equal);
assert_eq!(chunks[1].tag, DiffTag::Replace);
assert_eq!(chunks[2].tag, DiffTag::Equal);
}
#[test]
fn test_opcodes_cover_full_range() {
let a = vec!["a", "b", "c", "d", "e"];
let b = vec!["a", "x", "c", "y", "e"];
let chunks = diff(&a, &b);
let mut pos_a = 0;
let mut pos_b = 0;
for chunk in &chunks {
assert_eq!(chunk.start_a, pos_a, "gap in a coverage");
assert_eq!(chunk.start_b, pos_b, "gap in b coverage");
pos_a = chunk.end_a;
pos_b = chunk.end_b;
}
assert_eq!(pos_a, a.len(), "a not fully covered");
assert_eq!(pos_b, b.len(), "b not fully covered");
}
#[test]
fn test_large_with_discarding() {
let mut a: Vec<String> = (0..50).map(|i| format!("unique_a_{i}")).collect();
let mut b: Vec<String> = (0..50).map(|i| format!("unique_b_{i}")).collect();
for i in 0..5 {
let common = format!("common_{i}");
a.push(common.clone());
b.push(common);
}
let chunks = diff(&a, &b);
assert!(!chunks.is_empty());
let mut pos_a = 0;
let mut pos_b = 0;
for chunk in &chunks {
assert_eq!(chunk.start_a, pos_a);
assert_eq!(chunk.start_b, pos_b);
pos_a = chunk.end_a;
pos_b = chunk.end_b;
}
assert_eq!(pos_a, a.len());
assert_eq!(pos_b, b.len());
}
#[test]
fn test_single_element_sequences() {
assert_eq!(
diff(&["a"], &["a"]),
vec![DiffChunk {
tag: DiffTag::Equal,
start_a: 0,
end_a: 1,
start_b: 0,
end_b: 1
},]
);
assert_eq!(
diff(&["a"], &["b"]),
vec![DiffChunk {
tag: DiffTag::Replace,
start_a: 0,
end_a: 1,
start_b: 0,
end_b: 1
},]
);
}
#[test]
fn test_common_prefix_only() {
let a = vec!["a", "b", "c"];
let b = vec!["a", "b", "x", "y"];
let chunks = diff(&a, &b);
assert_eq!(chunks[0].tag, DiffTag::Equal);
assert_eq!(chunks[0].end_a, 2);
}
#[test]
fn test_common_suffix_only() {
let a = vec!["x", "b", "c"];
let b = vec!["y", "b", "c"];
let chunks = diff(&a, &b);
let last = chunks.last().unwrap();
assert_eq!(last.tag, DiffTag::Equal);
}
#[test]
fn test_tokenize_empty() {
assert!(tokenize("").is_empty());
}
#[test]
fn test_tokenize_words_and_spaces() {
let toks = tokenize("hello world");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["hello", " ", "world"]);
}
#[test]
fn test_tokenize_punctuation() {
let toks = tokenize("a+b");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["a", "+", "b"]);
}
#[test]
fn test_tokenize_underscore_in_word() {
let toks = tokenize("foo_bar baz");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["foo_bar", " ", "baz"]);
}
#[test]
fn test_tokenize_offsets() {
let toks = tokenize("ab cd");
assert_eq!(toks[0].offset, 0);
assert_eq!(toks[0].text, "ab");
assert_eq!(toks[1].offset, 2);
assert_eq!(toks[1].text, " ");
assert_eq!(toks[2].offset, 3);
assert_eq!(toks[2].text, "cd");
}
#[test]
fn test_tokenize_multiple_spaces() {
let toks = tokenize("a b");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["a", " ", "b"]);
}
#[test]
fn test_tokenize_only_punctuation() {
let toks = tokenize("+-=");
assert_eq!(toks.len(), 3);
assert_eq!(toks[0].text, "+");
assert_eq!(toks[1].text, "-");
assert_eq!(toks[2].text, "=");
}
#[test]
fn test_diff_words_identical() {
let (_, _, chunks) = diff_words("hello world", "hello world");
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].tag, DiffTag::Equal);
}
#[test]
fn test_diff_words_single_word_change() {
let (_, _, chunks) = diff_words("hello world", "hello earth");
assert!(chunks.iter().any(|c| c.tag == DiffTag::Replace));
assert!(chunks.iter().any(|c| c.tag == DiffTag::Equal));
}
#[test]
fn test_diff_words_insertion() {
let (_, _, chunks) = diff_words("a b", "a x b");
assert!(chunks.iter().any(|c| c.tag == DiffTag::Insert));
}
#[test]
fn test_diff_words_empty_strings() {
let (_, _, chunks) = diff_words("", "");
assert!(chunks.is_empty());
}
#[test]
fn test_diff_words_one_empty() {
let (_, _, chunks) = diff_words("", "hello");
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].tag, DiffTag::Insert);
}
#[test]
fn test_multiple_insertions() {
let a = vec!["a", "c", "e"];
let b = vec!["a", "b", "c", "d", "e"];
let chunks = diff(&a, &b);
assert!(chunks.iter().filter(|c| c.tag == DiffTag::Insert).count() == 2);
verify_coverage(&chunks, a.len(), b.len());
}
#[test]
fn test_multiple_deletions() {
let a = vec!["a", "b", "c", "d", "e"];
let b = vec!["a", "c", "e"];
let chunks = diff(&a, &b);
assert!(chunks.iter().filter(|c| c.tag == DiffTag::Delete).count() == 2);
verify_coverage(&chunks, a.len(), b.len());
}
#[test]
fn test_interleaved_changes() {
let a = vec!["1", "2", "3", "4", "5"];
let b = vec!["1", "X", "3", "Y", "5"];
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
assert_eq!(chunks.len(), 5);
}
#[test]
fn test_complete_replacement_long() {
let a: Vec<&str> = vec!["a", "b", "c", "d", "e"];
let b: Vec<&str> = vec!["v", "w", "x", "y", "z"];
let chunks = diff(&a, &b);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].tag, DiffTag::Replace);
verify_coverage(&chunks, a.len(), b.len());
}
#[test]
fn test_repeated_elements() {
let a = vec!["a", "a", "a", "b", "b"];
let b = vec!["a", "a", "b", "b", "b"];
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
}
#[test]
fn test_diff_lines_empty_lines() {
let a = "line1\n\nline3";
let b = "line1\ninserted\n\nline3";
let chunks = diff_lines(a, b);
verify_coverage(&chunks, 3, 4);
}
#[test]
fn test_diff_lines_trailing_newline() {
let a = "line1\nline2";
let b = "line1\nline2\n";
let chunks = diff_lines(a, b);
assert!(!chunks.is_empty());
}
#[test]
fn test_long_common_prefix_and_suffix() {
let mut a: Vec<String> = (0..20).map(|i| format!("prefix_{i}")).collect();
let mut b = a.clone();
a.push("middle_a".to_string());
b.push("middle_b".to_string());
for i in 0..20 {
let suffix = format!("suffix_{i}");
a.push(suffix.clone());
b.push(suffix);
}
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
assert!(chunks.len() <= 5); }
#[test]
fn test_tokenize_unicode() {
let toks = tokenize("héllo wörld ä½ å¥½");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["héllo", " ", "wörld", " ", "ä½ å¥½"]);
}
#[test]
fn test_tokenize_unicode_offsets() {
let s = "a é b";
let toks = tokenize(s);
assert_eq!(toks[0].offset, 0); assert_eq!(toks[1].offset, 1); assert_eq!(toks[2].offset, 2); assert_eq!(toks[3].offset, 4); assert_eq!(toks[4].offset, 5); }
#[test]
fn test_tokenize_emoji() {
let toks = tokenize("hello 👋 world");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["hello", " ", "👋", " ", "world"]);
}
#[test]
fn test_tokenize_tabs_and_newlines() {
let toks = tokenize("a\tb\nc");
let texts: Vec<&str> = toks.iter().map(|t| t.text).collect();
assert_eq!(texts, vec!["a", "\t", "b", "\n", "c"]);
}
#[test]
fn test_diff_words_unicode() {
let (_, _, chunks) = diff_words("héllo wörld", "héllo earth");
assert!(chunks.iter().any(|c| c.tag == DiffTag::Replace));
assert!(chunks.iter().any(|c| c.tag == DiffTag::Equal));
}
#[test]
fn test_diff_integers() {
let a = vec![1, 2, 3, 4, 5];
let b = vec![1, 2, 99, 4, 5];
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
assert!(chunks.iter().any(|c| c.tag == DiffTag::Replace));
}
#[test]
fn test_asymmetric_lengths() {
let a = vec!["x"];
let b = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"];
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
}
#[test]
fn test_asymmetric_lengths_reverse() {
let a = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"];
let b = vec!["x"];
let chunks = diff(&a, &b);
verify_coverage(&chunks, a.len(), b.len());
}
fn verify_coverage(chunks: &[DiffChunk], len_a: usize, len_b: usize) {
let mut pos_a = 0;
let mut pos_b = 0;
for chunk in chunks {
assert_eq!(chunk.start_a, pos_a, "gap in a coverage at {pos_a}");
assert_eq!(chunk.start_b, pos_b, "gap in b coverage at {pos_b}");
pos_a = chunk.end_a;
pos_b = chunk.end_b;
}
assert_eq!(pos_a, len_a, "a not fully covered");
assert_eq!(pos_b, len_b, "b not fully covered");
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn test_diff_reconstruction(a in any::<Vec<String>>(), b in any::<Vec<String>>()) {
let chunks = diff(&a, &b);
let mut reconstructed_b = Vec::new();
for chunk in &chunks {
match chunk.tag {
DiffTag::Equal => {
reconstructed_b.extend_from_slice(&a[chunk.start_a..chunk.end_a]);
}
DiffTag::Insert | DiffTag::Replace => {
reconstructed_b.extend_from_slice(&b[chunk.start_b..chunk.end_b]);
}
DiffTag::Delete => {}
}
}
prop_assert_eq!(&reconstructed_b, &b);
let mut reconstructed_a = Vec::new();
for chunk in &chunks {
match chunk.tag {
DiffTag::Equal => {
reconstructed_a.extend_from_slice(&b[chunk.start_b..chunk.end_b]);
}
DiffTag::Delete | DiffTag::Replace => {
reconstructed_a.extend_from_slice(&a[chunk.start_a..chunk.end_a]);
}
DiffTag::Insert => {}
}
}
prop_assert_eq!(&reconstructed_a, &a);
let mut pos_a = 0;
let mut pos_b = 0;
for chunk in &chunks {
prop_assert_eq!(chunk.start_a, pos_a);
prop_assert_eq!(chunk.start_b, pos_b);
pos_a = chunk.end_a;
pos_b = chunk.end_b;
}
prop_assert_eq!(pos_a, a.len());
prop_assert_eq!(pos_b, b.len());
}
#[test]
fn test_diff_identity(a in any::<Vec<String>>()) {
let chunks = diff(&a, &a);
if a.is_empty() {
prop_assert!(chunks.is_empty());
} else {
prop_assert_eq!(chunks.len(), 1);
prop_assert_eq!(chunks[0].tag, DiffTag::Equal);
prop_assert_eq!(chunks[0].start_a, 0);
prop_assert_eq!(chunks[0].end_a, a.len());
}
}
}
}