pub fn levenshtein(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
if m == 0 {
return n;
}
if n == 0 {
return m;
}
let mut prev: Vec<usize> = (0..=n).collect();
let mut curr: Vec<usize> = vec![0; n + 1];
for i in 1..=m {
curr[0] = i;
for j in 1..=n {
let cost = if a_chars[i - 1] == b_chars[j - 1] {
0
} else {
1
};
curr[j] = (prev[j] + 1) .min(curr[j - 1] + 1) .min(prev[j - 1] + cost); }
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
pub fn weighted_levenshtein(a: &str, b: &str, ins: f64, del: f64, sub: f64) -> f64 {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
if m == 0 {
return n as f64 * ins;
}
if n == 0 {
return m as f64 * del;
}
let mut prev: Vec<f64> = (0..=n).map(|j| j as f64 * ins).collect();
let mut curr: Vec<f64> = vec![0.0; n + 1];
for i in 1..=m {
curr[0] = i as f64 * del;
for j in 1..=n {
let sub_cost = if a_chars[i - 1] == b_chars[j - 1] {
0.0
} else {
sub
};
curr[j] = (prev[j] + del) .min(curr[j - 1] + ins) .min(prev[j - 1] + sub_cost); }
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
if m == 0 {
return n;
}
if n == 0 {
return m;
}
let mut d = vec![vec![0usize; n + 1]; m + 1];
for i in 0..=m {
d[i][0] = i;
}
for j in 0..=n {
d[0][j] = j;
}
for i in 1..=m {
for j in 1..=n {
let cost = if a_chars[i - 1] == b_chars[j - 1] {
0
} else {
1
};
d[i][j] = (d[i - 1][j] + 1) .min(d[i][j - 1] + 1) .min(d[i - 1][j - 1] + cost);
if i > 1
&& j > 1
&& a_chars[i - 1] == b_chars[j - 2]
&& a_chars[i - 2] == b_chars[j - 1]
{
d[i][j] = d[i][j].min(d[i - 2][j - 2] + 1);
}
}
}
d[m][n]
}
pub fn jaro(a: &str, b: &str) -> f64 {
if a.is_empty() && b.is_empty() {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m_len = a_chars.len();
let n_len = b_chars.len();
let match_dist = (m_len.max(n_len) / 2).saturating_sub(1);
let mut a_matched = vec![false; m_len];
let mut b_matched = vec![false; n_len];
let mut matches = 0usize;
for i in 0..m_len {
let lo = i.saturating_sub(match_dist);
let hi = (i + match_dist + 1).min(n_len);
for j in lo..hi {
if !b_matched[j] && a_chars[i] == b_chars[j] {
a_matched[i] = true;
b_matched[j] = true;
matches += 1;
break;
}
}
}
if matches == 0 {
return 0.0;
}
let mut transpositions = 0usize;
let mut k = 0usize;
for i in 0..m_len {
if a_matched[i] {
while !b_matched[k] {
k += 1;
}
if a_chars[i] != b_chars[k] {
transpositions += 1;
}
k += 1;
}
}
let m = matches as f64;
let t = transpositions as f64 / 2.0;
(m / m_len as f64 + m / n_len as f64 + (m - t) / m) / 3.0
}
pub fn jaro_winkler(a: &str, b: &str, prefix_weight: f64) -> f64 {
let j = jaro(a, b);
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let max_prefix = 4.min(a_chars.len().min(b_chars.len()));
let mut prefix = 0usize;
for i in 0..max_prefix {
if a_chars[i] == b_chars[i] {
prefix += 1;
} else {
break;
}
}
j + prefix as f64 * prefix_weight * (1.0 - j)
}
pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
let dist = levenshtein(a, b);
let max_len = a.chars().count().max(b.chars().count());
if max_len == 0 {
0.0
} else {
dist as f64 / max_len as f64
}
}
pub fn lcs_length(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
if a_chars[i - 1] == b_chars[j - 1] {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
}
}
}
dp[m][n]
}
pub fn lcs(a: &str, b: &str) -> String {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
if a_chars[i - 1] == b_chars[j - 1] {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
}
}
}
let mut result = Vec::new();
let (mut i, mut j) = (m, n);
while i > 0 && j > 0 {
if a_chars[i - 1] == b_chars[j - 1] {
result.push(a_chars[i - 1]);
i -= 1;
j -= 1;
} else if dp[i - 1][j] >= dp[i][j - 1] {
i -= 1;
} else {
j -= 1;
}
}
result.reverse();
result.into_iter().collect()
}
fn traceback_alignment(
a_chars: &[char],
b_chars: &[char],
dir: &[Vec<u8>],
start_i: usize,
start_j: usize,
stop_zero: bool, scores: &[Vec<i32>],
) -> (String, String) {
let mut aligned_a = Vec::<char>::new();
let mut aligned_b = Vec::<char>::new();
let (mut i, mut j) = (start_i, start_j);
loop {
if i == 0 && j == 0 {
break;
}
if stop_zero && i < scores.len() && j < scores[i].len() && scores[i][j] == 0 {
break;
}
if i == 0 {
aligned_a.push('-');
aligned_b.push(b_chars[j - 1]);
j -= 1;
} else if j == 0 {
aligned_a.push(a_chars[i - 1]);
aligned_b.push('-');
i -= 1;
} else {
match dir[i][j] {
0 => {
aligned_a.push(a_chars[i - 1]);
aligned_b.push(b_chars[j - 1]);
i -= 1;
j -= 1;
}
1 => {
aligned_a.push(a_chars[i - 1]);
aligned_b.push('-');
i -= 1;
}
_ => {
aligned_a.push('-');
aligned_b.push(b_chars[j - 1]);
j -= 1;
}
}
}
}
aligned_a.reverse();
aligned_b.reverse();
(
aligned_a.into_iter().collect(),
aligned_b.into_iter().collect(),
)
}
pub fn smith_waterman(
a: &str,
b: &str,
match_score: i32,
mismatch: i32,
gap: i32,
) -> (i32, String, String) {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
let mut score = vec![vec![0i32; n + 1]; m + 1];
let mut dir = vec![vec![0u8; n + 1]; m + 1];
let mut best_score = 0i32;
let mut best_i = 0usize;
let mut best_j = 0usize;
for i in 1..=m {
for j in 1..=n {
let diag_score = if a_chars[i - 1] == b_chars[j - 1] {
match_score
} else {
mismatch
};
let from_diag = score[i - 1][j - 1] + diag_score;
let from_up = score[i - 1][j] + gap;
let from_left = score[i][j - 1] + gap;
let best = from_diag.max(from_up).max(from_left).max(0);
score[i][j] = best;
if best == from_diag {
dir[i][j] = 0;
} else if best == from_up {
dir[i][j] = 1;
} else if best == from_left {
dir[i][j] = 2;
}
if best > best_score {
best_score = best;
best_i = i;
best_j = j;
}
}
}
if best_score == 0 {
return (0, String::new(), String::new());
}
let (aligned_a, aligned_b) =
traceback_alignment(&a_chars, &b_chars, &dir, best_i, best_j, true, &score);
(best_score, aligned_a, aligned_b)
}
pub fn needleman_wunsch(
a: &str,
b: &str,
match_score: i32,
mismatch: i32,
gap: i32,
) -> (i32, String, String) {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
let mut score = vec![vec![0i32; n + 1]; m + 1];
let mut dir = vec![vec![0u8; n + 1]; m + 1];
for i in 0..=m {
score[i][0] = i as i32 * gap;
}
for j in 0..=n {
score[0][j] = j as i32 * gap;
if j > 0 {
dir[0][j] = 2; }
}
for i in 1..=m {
dir[i][0] = 1; }
for i in 1..=m {
for j in 1..=n {
let diag_score = if a_chars[i - 1] == b_chars[j - 1] {
match_score
} else {
mismatch
};
let from_diag = score[i - 1][j - 1] + diag_score;
let from_up = score[i - 1][j] + gap;
let from_left = score[i][j - 1] + gap;
let best = from_diag.max(from_up).max(from_left);
score[i][j] = best;
if best == from_diag {
dir[i][j] = 0;
} else if best == from_up {
dir[i][j] = 1;
} else {
dir[i][j] = 2;
}
}
}
let final_score = score[m][n];
let (aligned_a, aligned_b) = traceback_alignment(&a_chars, &b_chars, &dir, m, n, false, &score);
(final_score, aligned_a, aligned_b)
}
pub fn fuzzy_match<'a>(query: &str, candidates: &[&'a str]) -> Option<(&'a str, f64)> {
candidates
.iter()
.map(|&c| (c, jaro_winkler(query, c, 0.1)))
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
}
pub fn fuzzy_search<'a>(
query: &str,
candidates: &[&'a str],
threshold: f64,
) -> Vec<(&'a str, f64)> {
let mut results: Vec<(&'a str, f64)> = candidates
.iter()
.map(|&c| (c, jaro_winkler(query, c, 0.1)))
.filter(|(_, s)| *s >= threshold)
.collect();
results.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_levenshtein_kitten_sitting() {
assert_eq!(levenshtein("kitten", "sitting"), 3);
}
#[test]
fn test_levenshtein_identical() {
assert_eq!(levenshtein("hello", "hello"), 0);
}
#[test]
fn test_levenshtein_empty() {
assert_eq!(levenshtein("", "abc"), 3);
assert_eq!(levenshtein("abc", ""), 3);
assert_eq!(levenshtein("", ""), 0);
}
#[test]
fn test_weighted_levenshtein_equal_costs() {
let w = weighted_levenshtein("kitten", "sitting", 1.0, 1.0, 1.0);
assert!((w - 3.0).abs() < 1e-9);
}
#[test]
fn test_weighted_levenshtein_custom_costs() {
let w_custom = weighted_levenshtein("ab", "cd", 1.0, 1.0, 2.0);
let w_standard = weighted_levenshtein("ab", "cd", 1.0, 1.0, 1.0);
assert!(w_custom >= w_standard);
}
#[test]
fn test_damerau_levenshtein_transposition() {
assert_eq!(damerau_levenshtein("ab", "ba"), 1);
}
#[test]
fn test_damerau_levenshtein_kitten() {
assert_eq!(damerau_levenshtein("kitten", "sitting"), 3);
}
#[test]
fn test_damerau_levenshtein_empty() {
assert_eq!(damerau_levenshtein("", "abc"), 3);
assert_eq!(damerau_levenshtein("xyz", ""), 3);
}
#[test]
fn test_jaro_identical() {
assert!((jaro("hello", "hello") - 1.0).abs() < 1e-9);
}
#[test]
fn test_jaro_empty() {
assert_eq!(jaro("", ""), 1.0);
assert_eq!(jaro("", "abc"), 0.0);
assert_eq!(jaro("abc", ""), 0.0);
}
#[test]
fn test_jaro_dwayne_duane() {
let s = jaro("DWAYNE", "DUANE");
assert!(s > 0.8 && s < 1.0, "jaro DWAYNE/DUANE = {}", s);
}
#[test]
fn test_jaro_winkler_prefix_boost() {
let j = jaro("PREFIX", "PREFIXY");
let jw = jaro_winkler("PREFIX", "PREFIXY", 0.1);
assert!(jw >= j, "JW should be >= Jaro");
}
#[test]
fn test_jaro_winkler_identical() {
assert!((jaro_winkler("same", "same", 0.1) - 1.0).abs() < 1e-9);
}
#[test]
fn test_normalized_levenshtein_identical() {
assert_eq!(normalized_levenshtein("abc", "abc"), 0.0);
}
#[test]
fn test_normalized_levenshtein_range() {
let d = normalized_levenshtein("kitten", "sitting");
assert!((0.0..=1.0).contains(&d));
}
#[test]
fn test_normalized_levenshtein_empty() {
assert_eq!(normalized_levenshtein("", ""), 0.0);
assert_eq!(normalized_levenshtein("", "abc"), 1.0);
}
#[test]
fn test_lcs_length_basic() {
assert_eq!(lcs_length("ABCBDAB", "BDCABA"), 4);
}
#[test]
fn test_lcs_length_empty() {
assert_eq!(lcs_length("", "abc"), 0);
assert_eq!(lcs_length("abc", ""), 0);
}
#[test]
fn test_lcs_string() {
let result = lcs("ABCBDAB", "BDCABA");
assert_eq!(result.len(), 4);
}
#[test]
fn test_lcs_identical() {
assert_eq!(lcs("hello", "hello"), "hello");
}
#[test]
fn test_smith_waterman_basic() {
let (score, a_aligned, b_aligned) = smith_waterman("ACGT", "ACGT", 2, -1, -1);
assert!(score > 0, "score should be positive");
assert!(!a_aligned.is_empty());
assert!(!b_aligned.is_empty());
}
#[test]
fn test_smith_waterman_no_match() {
let (score, a_aligned, b_aligned) = smith_waterman("AAAA", "TTTT", 2, -5, -5);
assert_eq!(score, 0);
assert!(a_aligned.is_empty());
assert!(b_aligned.is_empty());
}
#[test]
fn test_smith_waterman_partial_match() {
let (score, _, _) = smith_waterman("XACGX", "YACGY", 2, -1, -1);
assert!(score > 0);
}
#[test]
fn test_needleman_wunsch_identical() {
let (score, a_aligned, b_aligned) = needleman_wunsch("ACG", "ACG", 2, -1, -1);
assert!(score > 0);
assert_eq!(a_aligned, b_aligned);
assert_eq!(a_aligned, "ACG");
}
#[test]
fn test_needleman_wunsch_with_gap() {
let (score, a_aligned, b_aligned) = needleman_wunsch("AC", "AGC", 2, -1, -1);
assert_eq!(
a_aligned.len(),
b_aligned.len(),
"aligned lengths must match"
);
let _ = score;
}
#[test]
fn test_needleman_wunsch_empty_strings() {
let (score, a_aligned, b_aligned) = needleman_wunsch("", "", 2, -1, -1);
assert_eq!(score, 0);
assert!(a_aligned.is_empty());
assert!(b_aligned.is_empty());
}
#[test]
fn test_fuzzy_match_basic() {
let candidates = ["apple", "application", "banana", "appetizer"];
let result = fuzzy_match("appel", &candidates);
assert!(result.is_some());
let (best, score) = result.unwrap();
assert!(!best.is_empty());
assert!((0.0..=1.0).contains(&score));
}
#[test]
fn test_fuzzy_match_empty_candidates() {
let result = fuzzy_match("query", &[]);
assert!(result.is_none());
}
#[test]
fn test_fuzzy_search_threshold() {
let candidates = ["hello", "helo", "world", "word"];
let results = fuzzy_search("hello", &candidates, 0.9);
assert!(!results.is_empty());
for w in results.windows(2) {
assert!(w[0].1 >= w[1].1);
}
}
#[test]
fn test_fuzzy_search_no_results() {
let candidates = ["xyz", "abc", "def"];
let results = fuzzy_search("hello", &candidates, 0.99);
assert!(results.is_empty());
}
}