use vecstore::fuzzy::{
damerau_levenshtein_distance, levenshtein_distance, similarity_score, suggest_corrections,
BKTree, FuzzyMatcher,
};
fn main() {
println!("🔍 Fuzzy Search with Typo Tolerance Example\n");
println!("📏 Distance Calculations:");
println!("─────────────────────────");
let dist1 = levenshtein_distance("kitten", "sitting");
println!(" Levenshtein('kitten', 'sitting') = {}", dist1);
let dist2 = levenshtein_distance("hello", "helo");
println!(" Levenshtein('hello', 'helo') = {}", dist2);
let dist3 = damerau_levenshtein_distance("teh", "the");
println!(
" Damerau-Levenshtein('teh', 'the') = {} (transposition)",
dist3
);
let dist4 = levenshtein_distance("teh", "the");
println!(" Levenshtein('teh', 'the') = {} (no transposition)", dist4);
let sim = similarity_score("hello", "helo");
println!("\n Similarity('hello', 'helo') = {:.2}%", sim * 100.0);
println!("\n\n🎯 Fuzzy Matcher:");
println!("─────────────────");
let matcher = FuzzyMatcher::new(2);
let query = "programming";
let candidates = vec![
"programming".to_string(), "programing".to_string(), "progamming".to_string(), "coding".to_string(), ];
println!(" Query: '{}'", query);
println!(" Max distance: 2 edits\n");
for candidate in &candidates {
let distance = matcher.distance(query, candidate);
let is_match = matcher.is_match(query, candidate);
let status = if is_match {
"✓ MATCH"
} else {
"✗ no match"
};
println!(" {} '{}' (distance: {})", status, candidate, distance);
}
if let Some((idx, dist)) = matcher.find_best_match(query, &candidates) {
println!("\n Best match: '{}' (distance: {})", candidates[idx], dist);
}
println!("\n\n🌳 BK-Tree (Burkhard-Keller Tree):");
println!("──────────────────────────────────");
let mut tree = BKTree::new();
let dictionary = vec![
"apple",
"application",
"apply",
"banana",
"band",
"can",
"candy",
"cat",
"dog",
"door",
"example",
"hello",
"help",
"programming",
"python",
"rust",
"search",
"world",
];
println!(" Building BK-tree with {} words...", dictionary.len());
for word in &dictionary {
tree.insert(word.to_string());
}
println!(" ✓ Built tree with {} nodes\n", tree.len());
let typos = vec![
("aple", 1), ("helo", 1), ("serch", 1), ("progamming", 2), ];
for (typo, max_dist) in typos {
let matches = tree.search(typo, max_dist);
println!(" Typo: '{}' (max distance: {})", typo, max_dist);
if matches.is_empty() {
println!(" No matches found");
} else {
println!(" Matches: {}", matches.join(", "));
}
if let Some((closest, dist)) = tree.find_closest(typo) {
println!(" Closest: '{}' (distance: {})", closest, dist);
}
println!();
}
println!("\n💡 Query Correction / Spell Checking:");
println!("─────────────────────────────────────");
let user_queries = vec![
"progrmming", "pyton", "exmple", "wrld", ];
let dict_strings: Vec<String> = dictionary.iter().map(|s| s.to_string()).collect();
for query in user_queries {
println!(" User query: '{}'", query);
let suggestions = suggest_corrections(query, &dict_strings, 2, 3);
if suggestions.is_empty() {
println!(" No suggestions (too different)");
} else {
println!(" Suggestions: {}", suggestions.join(", "));
for sugg in &suggestions {
let dist = levenshtein_distance(query, sugg);
println!(" - '{}' (distance: {})", sugg, dist);
}
}
println!();
}
println!("\n🔤 Case Sensitivity:");
println!("───────────────────");
let matcher_ci = FuzzyMatcher::new(1).with_case_sensitive(false);
println!(" Case-insensitive matching:");
println!(
" 'Hello' ~ 'hello': {}",
matcher_ci.is_match("Hello", "hello")
);
println!(
" 'RUST' ~ 'rust': {}",
matcher_ci.is_match("RUST", "rust")
);
let matcher_cs = FuzzyMatcher::new(0).with_case_sensitive(true);
println!("\n Case-sensitive matching (exact only):");
println!(
" 'Hello' ~ 'hello': {}",
matcher_cs.is_match("Hello", "hello")
);
println!(
" 'Hello' ~ 'Hello': {}",
matcher_cs.is_match("Hello", "Hello")
);
println!("\n\n🔄 Transposition Handling:");
println!("─────────────────────────");
let common_typos = vec![
("teh", "the"),
("recieve", "receive"),
("occured", "occurred"),
];
println!(" Common transposition typos:\n");
for (typo, correct) in common_typos {
let lev_dist = levenshtein_distance(typo, correct);
let dam_dist = damerau_levenshtein_distance(typo, correct);
println!(" '{}' → '{}'", typo, correct);
println!(" Levenshtein: {} edits", lev_dist);
println!(" Damerau-Levenshtein: {} edit(s)", dam_dist);
if dam_dist < lev_dist {
println!(" ✓ Handles transposition better!");
}
println!();
}
println!("\n⚡ Performance:");
println!("──────────────");
let large_dict: Vec<String> = (0..1000).map(|i| format!("word{:04}", i)).collect();
println!(" Dictionary size: {} words", large_dict.len());
let mut tree_large = BKTree::from_words(large_dict.clone());
let search_query = "word0500";
let start = std::time::Instant::now();
let matches = tree_large.search(search_query, 2);
let elapsed = start.elapsed();
println!(" BK-tree search for '{}' (max distance: 2)", search_query);
println!(" Found {} matches in {:?}", matches.len(), elapsed);
let matcher_perf = FuzzyMatcher::new(2);
let start2 = std::time::Instant::now();
let linear_matches = matcher_perf.find_all_matches(search_query, &large_dict);
let elapsed2 = start2.elapsed();
println!("\n Linear search (for comparison):");
println!(
" Found {} matches in {:?}",
linear_matches.len(),
elapsed2
);
if elapsed2 > elapsed {
let speedup = elapsed2.as_micros() as f64 / elapsed.as_micros() as f64;
println!(" BK-tree is {:.1}x faster!", speedup);
}
println!("\n✅ Fuzzy search example complete!");
println!("\n💡 Use Cases:");
println!(" - Spell checking and autocorrect");
println!(" - Search with typo tolerance");
println!(" - Duplicate detection");
println!(" - Query suggestions");
println!(" - Fuzzy autocomplete");
}