use unicode_normalization::UnicodeNormalization;
pub fn normalize(s: &str) -> String {
s.nfc().collect::<String>().to_lowercase()
}
pub fn strip_diacritics(s: &str) -> String {
s.nfd()
.filter(|c| {
let n = *c as u32;
!(0x0300..=0x036F).contains(&n)
&& !(0x1DC0..=0x1DFF).contains(&n)
&& !(0x20D0..=0x20FF).contains(&n)
&& !(0xFE20..=0xFE2F).contains(&n)
})
.collect()
}
pub fn matches(query: &str, text: &str, accent_insensitive: bool) -> bool {
let (q, t) = if accent_insensitive {
(strip_diacritics(&normalize(query)), strip_diacritics(&normalize(text)))
} else {
(normalize(query), normalize(text))
};
let words: Vec<&str> = t.split_whitespace().collect();
q.split_whitespace().all(|term| words.contains(&term))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accent_insensitive_vi() {
assert!(matches("viet", "Việt Nam", true));
assert!(matches("rust", "học Rust", false));
assert!(!matches("java", "học Rust", false));
}
#[test]
fn multi_term_search() {
assert!(matches("rust async", "Rust async programming guide", false));
assert!(!matches("rust sync", "Rust async programming guide", false));
}
#[test]
fn empty_query() {
assert!(matches("", "anything", false));
}
}