use std::collections::HashSet;
fn content_set(q: &str) -> HashSet<String> {
q.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| w.len() > 2) .map(str::to_string)
.collect()
}
pub fn similarity(a: &str, b: &str) -> f32 {
let (sa, sb) = (content_set(a), content_set(b));
if sa.is_empty() || sb.is_empty() {
return 0.0;
}
let inter = sa.intersection(&sb).count() as f32;
let union = sa.union(&sb).count() as f32;
inter / union
}
pub fn near_match<'a>(q: &str, existing: &'a [String]) -> Option<&'a String> {
existing
.iter()
.filter(|e| e.as_str() != q)
.map(|e| (similarity(q, e), e))
.filter(|(s, _)| *s >= 0.5) .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal))
.map(|(_, e)| e)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rephrasing_clears_the_bar_unrelated_does_not() {
let existing = vec![
"What am I avoiding?".to_string(),
"Where does my anger live?".to_string(),
];
assert_eq!(
near_match("what am i avoiding right now", &existing),
Some(&existing[0])
);
assert_eq!(near_match("how do I rest more deeply", &existing), None);
}
#[test]
fn exact_match_is_not_offered() {
let existing = vec!["What am I avoiding?".to_string()];
assert_eq!(near_match("What am I avoiding?", &existing), None);
}
}