difference-rs 3.2.0

A Rust text diffing and assertion library.
Documentation
use std::cmp::max;

// strsplit is like `s.split(split)`, except that if `split` is "", it
// trims the leading and trailing empty elements, since the `lcs`
// logic won't handle those properly.
fn strsplit<'a>(s: &'a str, split: &str) -> Vec<&'a str> {
    let mut si = s.split(split);
    if split.is_empty() {
        si.next();
    }
    let mut v: Vec<&str> = si.collect();
    if split.is_empty() {
        v.pop();
    }
    v
}

// finds the longest common subsequences
// outputs the edit distance and a string containing
// all chars both inputs have in common
//
// This algorithm is based on
// https://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
#[expect(non_snake_case)]
pub fn lcs(orig: &str, edit: &str, split: &str) -> (i128, String) {
    // make list by custom splits
    let a = strsplit(orig, split);
    let b = strsplit(edit, split);

    let N = a.len();
    let M = b.len();

    let mut idx: Vec<usize> = vec![0; N * M];

    for i in 0..N {
        for j in 0..M {
            if b[j] == a[i] {
                if i == 0 || j == 0 {
                    idx[i * M + j] = 1;
                } else {
                    idx[i * M + j] = idx[(i - 1) * M + j - 1] + 1;
                }
            } else if i == 0 {
                if j == 0 {
                    idx[i * M + j] = 0;
                } else {
                    idx[i * M + j] = idx[i * M + j - 1];
                }
            } else if j == 0 {
                idx[i * M + j] = idx[(i - 1) * M + j];
            } else {
                idx[i * M + j] = max(idx[i * M + j - 1], idx[(i - 1) * M + j]);
            }
        }
    }

    let mut i = (N as i128) - 1;
    let mut j = (M as i128) - 1;
    let mut lcs = Vec::new();

    while i >= 0 && j >= 0 {
        #[expect(clippy::cast_sign_loss)] // Already validated line `let mut i = (N as i128) - 1;`
        let ui = i as usize;
        #[expect(clippy::cast_sign_loss)] // Already validated line `let mut j = (M as i128) - 1;`
        let uj = j as usize;
        if a[ui] == b[uj] {
            lcs.push(a[ui]);
            i -= 1;
            j -= 1;
        } else if j == 0 && i == 0 {
            break;
        } else if i == 0 || idx[ui * M + uj - 1] > idx[(ui - 1) * M + uj] {
            j -= 1;
        } else {
            i -= 1;
        }
    }

    lcs.reverse();
    ((N + M - 2 * lcs.len()) as i128, lcs.join(split))
}

#[test]
fn test_lcs() {
    assert_eq!(lcs("test", "tost", ""), (2, "tst".to_string()));
    assert_eq!(lcs("test", "test", ""), (0, "test".to_string()));

    assert_eq!(lcs("test", "test", " "), (0, "test".to_string()));

    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            "",
        ),
        (16, "The quick brown o ps over the lazy ".to_string())
    );
    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            " ",
        ),
        (6, "The quick brown over the lazy".to_string())
    );

    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown dog leaps over the lazy cat",
            "\n",
        ),
        (2, String::new())
    );
    assert_eq!(
        lcs(
            "The quick brown fox jumps over the lazy dog",
            "The quick brown fox jumps over the lazy dog",
            "\n",
        ),
        (0, "The quick brown fox jumps over the lazy dog".to_string())
    );

    assert_eq!(
        lcs("a b : c", "b a : b : c", " "),
        (2, "a b : c".to_string())
    );

    assert_eq!(lcs("", "a b c", ""), (5, String::new()));

    assert_eq!(lcs("", " a", " "), (1, String::new()));
}