use crate::ops::string_similarity::validation::{validate_pair, SimilarityError};
pub fn jaro(a: &[u8], b: &[u8]) -> Result<f32, SimilarityError> {
validate_pair(a, b)?;
Ok(jaro_unchecked(a, b))
}
pub fn jaro_bits(a: &[u8], b: &[u8]) -> Result<u32, SimilarityError> {
Ok(jaro(a, b)?.to_bits())
}
pub(crate) fn jaro_unchecked(a: &[u8], b: &[u8]) -> f32 {
if a == b {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let window = (a.len().max(b.len()) / 2).saturating_sub(1);
let mut a_match = vec![false; a.len()];
let mut b_match = vec![false; b.len()];
let mut matches = 0usize;
for (i, &left) in a.iter().enumerate() {
let start = i.saturating_sub(window);
let end = (i + window + 1).min(b.len());
for j in start..end {
if !b_match[j] && left == b[j] {
a_match[i] = true;
b_match[j] = true;
matches += 1;
break;
}
}
}
if matches == 0 {
return 0.0;
}
let mut transpositions = 0usize;
let mut j = 0usize;
for (i, &left) in a.iter().enumerate() {
if !a_match[i] {
continue;
}
while !b_match[j] {
j += 1;
}
if left != b[j] {
transpositions += 1;
}
j += 1;
}
let m = matches as f32;
(m / a.len() as f32 + m / b.len() as f32 + (m - transpositions as f32 / 2.0) / m) / 3.0
}