natural-xml-diff 0.1.0

Natural diffing between XML documents
Documentation
use triple_accel::levenshtein::levenshtein_simd_k_str;

// if the levenshtein distance is beyond 1000, we're not going to bother
// comparing more
const MAX_EDITS: u32 = 1000;

pub(crate) fn text_similarity(a: &str, b: &str) -> Option<f32> {
    let distance = levenshtein_simd_k_str(a, b, MAX_EDITS);
    distance.map(|distance| 1f32 - ((distance as f32) / std::cmp::max(a.len(), b.len()) as f32))
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_complete_similarity() {
        let s = text_similarity("hello", "hello");
        assert_eq!(s, Some(1.0));
    }

    #[test]
    fn test_partial_similarity() {
        let s = text_similarity("hello", "hell");
        assert_eq!(s, Some(0.8));
    }

    #[test]
    fn test_complete_difference() {
        let s = text_similarity("hello", "arghu");
        assert_eq!(s, Some(0.0));
    }

    #[test]
    fn test_complete_difference_size() {
        let s = text_similarity("hello", "");
        assert_eq!(s, Some(0.0));
    }

    #[test]
    fn test_some_similarity() {
        let s = text_similarity("hello", "helvo");
        assert_eq!(s, Some(0.8));
    }

    #[test]
    fn test_some_similarity_size() {
        let s = text_similarity("hello", "hel");
        assert_eq!(s, Some(0.6));
    }

    #[test]
    fn test_similarity_longer_sentence() {
        let s = text_similarity(
            "We journey from Lave to Reorte",
            "We journey through space from Lave to Reorte",
        );
        assert_eq!(s, Some(0.6818182));
    }
}