indicium/simple/internal/normalize.rs
1#[cfg(feature = "unicode-normalization")]
2use unicode_normalization::UnicodeNormalization;
3
4// -----------------------------------------------------------------------------
5
6impl<K: Ord> crate::simple::SearchIndex<K> {
7 /// Returns a normalized string according to the search index's settings.
8 ///
9 /// Normalization ensures consistent matching by canonicalizing Unicode
10 /// representations and optionally folding case. This allows searches to
11 /// match equivalent characters (like "fi" and "fi") regardless of how
12 /// they were encoded.
13 ///
14 /// When the `unicode-normalization` feature is enabled, NFKC normalization
15 /// is applied to decompose compatibility characters into their canonical
16 /// forms. When case insensitivity is also enabled, the string is
17 /// additionally lowercased.
18 ///
19 /// * If the search index case been set to be case sensitive, the string
20 /// will be returned as-is.
21 ///
22 /// * If the search index case been set to be case insensitive, the string
23 /// will be returned in lower-case form.
24 #[inline]
25 pub(crate) fn normalize<'k>(
26 &self,
27 keyword: &'k str
28 ) -> beef::lean::Cow<'k, str> {
29 if self.case_sensitive {
30 #[cfg(feature = "unicode-normalization")]
31 let normalized = keyword.nfkc().collect::<String>().into();
32
33 #[cfg(feature = "icu_normalizer")]
34 let normalized = self.icu_normalizer.normalize(keyword).into();
35
36 #[cfg(not(any(feature = "unicode-normalization", feature = "icu_normalizer")))]
37 let normalized = keyword.into();
38
39 normalized
40 } else {
41 #[cfg(feature = "unicode-normalization")]
42 let normalized = keyword.nfkc().collect::<String>().to_lowercase().into();
43
44 #[cfg(feature = "icu_normalizer")]
45 let normalized = self.icu_normalizer.normalize(keyword).to_lowercase().into();
46
47 #[cfg(not(any(feature = "unicode-normalization", feature = "icu_normalizer")))]
48 let normalized = keyword.to_lowercase().into();
49
50 normalized
51 } // if
52 } // fn
53} // impl