indicium/simple/internal/
normalize.rs

1#[cfg(feature = "unicode-normalization")]
2use unicode_normalization::UnicodeNormalization;
3
4// -----------------------------------------------------------------------------
5
6impl<K: Ord> crate::simple::SearchIndex<K> {
7    /// Returns a normalized string according to the search index's settings.
8    ///
9    /// Normalization ensures consistent matching by canonicalizing Unicode
10    /// representations and optionally folding case. This allows searches to
11    /// match equivalent characters (like "fi" and "fi") regardless of how
12    /// they were encoded.
13    ///
14    /// When the `unicode-normalization` feature is enabled, NFKC normalization
15    /// is applied to decompose compatibility characters into their canonical
16    /// forms. When case insensitivity is also enabled, the string is
17    /// additionally lowercased.
18    ///
19    /// * If the search index case been set to be case sensitive, the string
20    ///   will be returned as-is.
21    ///
22    /// * If the search index case been set to be case insensitive, the string
23    ///   will be returned in lower-case form.
24    #[inline]
25    pub(crate) fn normalize<'k>(
26        &self,
27        keyword: &'k str
28    ) -> beef::lean::Cow<'k, str> {
29        if self.case_sensitive {
30            #[cfg(feature = "unicode-normalization")]
31            let normalized = keyword.nfkc().collect::<String>().into();
32
33            #[cfg(feature = "icu_normalizer")]
34            let normalized = self.icu_normalizer.normalize(keyword).into();
35
36            #[cfg(not(any(feature = "unicode-normalization", feature = "icu_normalizer")))]
37            let normalized = keyword.into();
38
39            normalized
40        } else {
41            #[cfg(feature = "unicode-normalization")]
42            let normalized = keyword.nfkc().collect::<String>().to_lowercase().into();
43
44            #[cfg(feature = "icu_normalizer")]
45            let normalized = self.icu_normalizer.normalize(keyword).to_lowercase().into();
46
47            #[cfg(not(any(feature = "unicode-normalization", feature = "icu_normalizer")))]
48            let normalized = keyword.to_lowercase().into();
49
50            normalized
51        } // if
52    } // fn
53} // impl