use unicode_normalization::UnicodeNormalization;
#[must_use]
pub fn normalize_nfkc(input: &str) -> String {
input.nfkc().collect()
}
#[must_use]
pub fn strip_zero_width(input: &str) -> String {
input
.chars()
.filter(|c| {
!matches!(
*c,
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{00AD}' | '\u{2060}' | '\u{180E}' )
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_nfkc_basic() {
assert_eq!(normalize_nfkc("find"), "find");
}
#[test]
fn test_nfkc_composed() {
let input = "café";
let normalized = normalize_nfkc(input);
assert!(normalized.contains('é') || normalized.contains("cafe"));
}
#[test]
fn test_strip_zero_width() {
let input = "find\u{200B}foo";
assert_eq!(strip_zero_width(input), "findfoo");
}
#[test]
fn test_strip_multiple_invisible() {
let input = "\u{FEFF}find\u{200D}foo\u{00AD}bar";
assert_eq!(strip_zero_width(input), "findfoobar");
}
#[test]
fn test_no_change_normal_input() {
let input = "find authentication";
assert_eq!(strip_zero_width(input), input);
}
}