pub fn find_similar_strings(
target: &str,
candidates: &[String],
max_suggestions: usize,
) -> Vec<String> {
let mut distances: Vec<(String, usize)> = candidates
.iter()
.map(|c| (c.clone(), levenshtein_distance(target, c)))
.collect();
distances.sort_by_key(|(_, dist)| *dist);
distances
.into_iter()
.take(max_suggestions)
.filter(|(_, dist)| *dist <= 3) .map(|(s, _)| s)
.collect()
}
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
let len1 = s1.len();
let len2 = s2.len();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
#[allow(clippy::needless_range_loop)]
for i in 0..=len1 {
matrix[i][0] = i;
}
#[allow(clippy::needless_range_loop)]
for j in 0..=len2 {
matrix[0][j] = j;
}
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
for (i, &c1) in s1_chars.iter().enumerate() {
for (j, &c2) in s2_chars.iter().enumerate() {
let cost = if c1 == c2 { 0 } else { 1 };
matrix[i + 1][j + 1] = (matrix[i][j + 1] + 1)
.min(matrix[i + 1][j] + 1)
.min(matrix[i][j] + cost);
}
}
matrix[len1][len2]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_levenshtein_distance_identical() {
assert_eq!(levenshtein_distance("hello", "hello"), 0);
}
#[test]
fn test_levenshtein_distance_empty() {
assert_eq!(levenshtein_distance("", "hello"), 5);
assert_eq!(levenshtein_distance("hello", ""), 5);
assert_eq!(levenshtein_distance("", ""), 0);
}
#[test]
fn test_levenshtein_distance_one_char_diff() {
assert_eq!(levenshtein_distance("hello", "hallo"), 1);
assert_eq!(levenshtein_distance("hello", "hell"), 1);
assert_eq!(levenshtein_distance("hello", "hellow"), 1);
}
#[test]
fn test_levenshtein_distance_classic_examples() {
assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
assert_eq!(levenshtein_distance("saturday", "sunday"), 3);
assert_eq!(levenshtein_distance("simulate", "simulat"), 1);
}
#[test]
fn test_find_similar_strings_exact_match() {
let candidates = vec![
"simulate".to_string(),
"validate".to_string(),
"plot".to_string(),
];
let suggestions = find_similar_strings("simulate", &candidates, 3);
assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
}
#[test]
fn test_find_similar_strings_typo() {
let candidates = vec![
"simulate".to_string(),
"validate".to_string(),
"plot".to_string(),
];
let suggestions = find_similar_strings("simulat", &candidates, 3);
assert!(suggestions.contains(&"simulate".to_string()));
assert!(!suggestions.contains(&"plot".to_string())); }
#[test]
fn test_find_similar_strings_max_suggestions() {
let candidates = vec![
"aaaa".to_string(),
"aaab".to_string(),
"aabb".to_string(),
"abbb".to_string(),
];
let suggestions = find_similar_strings("aaaa", &candidates, 2);
assert!(suggestions.len() <= 2);
assert!(suggestions.contains(&"aaaa".to_string()));
}
#[test]
fn test_find_similar_strings_threshold() {
let candidates = vec!["simulate".to_string(), "wxyz".to_string()];
let suggestions = find_similar_strings("abc", &candidates, 10);
println!("{:?}", suggestions);
assert!(suggestions.is_empty());
}
#[test]
fn test_find_similar_strings_threshold_exactly_3() {
let candidates = vec!["xyz".to_string()];
let suggestions = find_similar_strings("abc", &candidates, 10);
assert_eq!(suggestions.len(), 1);
assert_eq!(suggestions[0], "xyz");
}
#[test]
fn test_find_similar_strings_empty_candidates() {
let candidates: Vec<String> = vec![];
let suggestions = find_similar_strings("test", &candidates, 3);
assert!(suggestions.is_empty());
}
#[test]
fn test_find_similar_strings_case_sensitive() {
let candidates = vec!["Simulate".to_string(), "simulate".to_string()];
let suggestions = find_similar_strings("simulate", &candidates, 2);
assert_eq!(suggestions.first(), Some(&"simulate".to_string()));
}
#[test]
fn test_levenshtein_performance() {
let s1 = "a".repeat(100);
let s2 = "b".repeat(100);
let distance = levenshtein_distance(&s1, &s2);
assert_eq!(distance, 100);
}
}