pub fn distance(s1: &str, s2: &str) -> f64 {
if s1 == s2 {
return 0.0;
}
let len1 = s1.len();
let len2 = s2.len();
if len1 == 0 && len2 == 0 {
return 0.0;
}
if len1 == 0 || len2 == 0 {
return 1.0;
}
let max_len = len1.max(len2);
let min_len = len1.min(len2);
if max_len > min_len * 3 {
return 1.0;
}
if s1.is_ascii() && s2.is_ascii() {
return distance_ascii_fast(s1.as_bytes(), s2.as_bytes());
}
distance_unicode(s1, s2)
}
#[inline]
fn distance_ascii_fast(s1: &[u8], s2: &[u8]) -> f64 {
let len1 = s1.len();
let len2 = s2.len();
let max_len = len1.max(len2);
let mut c1 = 0;
let mut c2 = 0;
let mut lcss = 0; let mut local_cs = 0; const OFFSET_THRESHOLD: usize = 5;
while c1 < len1 && c2 < len2 {
if s1[c1] == s2[c2] {
local_cs += 1;
} else {
lcss += local_cs;
local_cs = 0;
if c1 != c2 {
let start1 = c1.saturating_sub(OFFSET_THRESHOLD);
let end1 = (c1 + OFFSET_THRESHOLD + 1).min(len1);
let start2 = c2.saturating_sub(OFFSET_THRESHOLD);
let end2 = (c2 + OFFSET_THRESHOLD + 1).min(len2);
if let Some(pos) = s2[start2..end2].iter().position(|&b| b == s1[c1]) {
c2 = start2 + pos;
} else if let Some(pos) = s1[start1..end1].iter().position(|&b| b == s2[c2]) {
c1 = start1 + pos;
}
}
}
c1 += 1;
c2 += 1;
}
lcss += local_cs;
1.0 - (lcss as f64 / max_len as f64)
}
fn distance_unicode(s1: &str, s2: &str) -> f64 {
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
let len1 = s1_chars.len();
let len2 = s2_chars.len();
let max_len = len1.max(len2);
let mut c1 = 0;
let mut c2 = 0;
let mut lcss = 0;
let mut local_cs = 0;
const OFFSET_THRESHOLD: usize = 5;
while c1 < len1 && c2 < len2 {
if s1_chars[c1] == s2_chars[c2] {
local_cs += 1;
} else {
lcss += local_cs;
local_cs = 0;
if c1 != c2 {
let start1 = c1.saturating_sub(OFFSET_THRESHOLD);
let end1 = (c1 + OFFSET_THRESHOLD + 1).min(len1);
let start2 = c2.saturating_sub(OFFSET_THRESHOLD);
let end2 = (c2 + OFFSET_THRESHOLD + 1).min(len2);
if let Some(pos) = s2_chars[start2..end2]
.iter()
.position(|&ch| ch == s1_chars[c1])
{
c2 = start2 + pos;
} else if let Some(pos) = s1_chars[start1..end1]
.iter()
.position(|&ch| ch == s2_chars[c2])
{
c1 = start1 + pos;
}
}
}
c1 += 1;
c2 += 1;
}
lcss += local_cs;
1.0 - (lcss as f64 / max_len as f64)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identical_strings() {
assert_eq!(distance("hello", "hello"), 0.0);
assert_eq!(distance("", ""), 0.0);
}
#[test]
fn test_completely_different_strings() {
let dist = distance("abc", "xyz");
assert!(dist > 0.5); }
#[test]
fn test_empty_strings() {
assert_eq!(distance("", "hello"), 1.0);
assert_eq!(distance("hello", ""), 1.0);
}
#[test]
fn test_similar_strings() {
let dist = distance("gmail", "gmaik");
assert!(dist > 0.0 && dist < 0.5);
let dist = distance("yahoo", "yaho");
assert!(dist > 0.0 && dist < 0.5); }
#[test]
fn test_case_sensitivity() {
let dist = distance("Gmail", "gmail");
assert!(dist > 0.0); }
#[test]
fn test_substring() {
let dist = distance("test", "testing");
assert!(dist > 0.0 && dist < 1.0);
}
}