use std::cmp::{max, min};
use crate::util::*;
pub fn levenshtein_naive<T: PartialEq>(source: &[T], target: &[T]) -> usize {
if source.is_empty() || target.is_empty() {
return max(source.len(), target.len());
}
if source.last() == target.last() {
return levenshtein_naive(up_to_last(source), up_to_last(target));
}
let delete = levenshtein_naive(up_to_last(source), target) + 1;
let insert = levenshtein_naive(source, up_to_last(target)) + 1;
let substitute = levenshtein_naive(up_to_last(source), up_to_last(target)) + 1;
min(min(insert, delete), substitute)
}
pub fn levenshtein_tabulation<T: PartialEq>(source: &[T], target: &[T]) -> (usize, DistanceMatrix) {
let m = source.len();
let n = target.len();
let mut distances = get_distance_table(m, n);
for i in 1..distances.len() {
for j in 1..distances[0].len() {
if source[i - 1] == target[j - 1] {
distances[i][j] = distances[i - 1][j - 1];
continue;
}
let delete = distances[i - 1][j] + 1;
let insert = distances[i][j - 1] + 1;
let substitute = distances[i - 1][j - 1] + 1;
distances[i][j] = min(min(delete, insert), substitute);
}
}
(distances[m][n], distances)
}
pub fn levenshtein_memoization<T: PartialEq>(
source: &[T],
target: &[T],
) -> (usize, DistanceMatrix) {
fn levenshtein_memoization_helper<T: PartialEq>(
source: &[T],
target: &[T],
distances: &mut DistanceMatrix,
) -> usize {
if distances[source.len()][target.len()] < usize::MAX {
return distances[source.len()][target.len()];
}
if source.is_empty() || target.is_empty() {
return max(source.len(), target.len());
}
let k = if source.last() == target.last() { 0 } else { 1 };
let delete = levenshtein_memoization_helper(up_to_last(source), target, distances) + 1;
let insert = levenshtein_memoization_helper(source, up_to_last(target), distances) + 1;
let substitute =
levenshtein_memoization_helper(up_to_last(source), up_to_last(target), distances) + k;
let distance = min(min(delete, insert), substitute);
distances[source.len()][target.len()] = distance;
distance
}
let mut distances = get_distance_table(source.len(), target.len());
let distance = levenshtein_memoization_helper(source, target, &mut distances);
(distance, distances)
}
#[cfg(test)]
mod tests {
use crate::distance::*;
#[test]
fn levenshtein_naive_test() {
let s1 = String::from("LAWN");
let s2 = String::from("FFLAWANN");
let expected_leven = 4;
let leven_naive = levenshtein_naive(s1.as_bytes(), s2.as_bytes());
assert_eq!(leven_naive, expected_leven);
}
#[test]
fn levenshtein_memoization_test() {
let s1 = String::from("LAWN");
let s2 = String::from("FFLAWANN");
let expected_leven = 4;
let (leven_memo, _) = levenshtein_memoization(s1.as_bytes(), s2.as_bytes());
assert_eq!(leven_memo, expected_leven);
}
#[test]
fn levenshtein_tabulation_test() {
let s1 = String::from("LAWN");
let s2 = String::from("FFLAWANN");
let expected_leven = 4;
let (leven_tab, _) = levenshtein_tabulation(s1.as_bytes(), s2.as_bytes());
assert_eq!(leven_tab, expected_leven);
}
}