harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3
4use smallvec::SmallVec;
5
6/// A char sequence that improves cache locality.
7/// Most English words are fewer than 12 characters.
8pub type CharString = SmallVec<[char; 16]>;
9
10mod private {
11    pub trait Sealed {}
12
13    impl Sealed for [char] {}
14}
15
16/// Extensions to character sequences that make them easier to wrangle.
17pub trait CharStringExt: private::Sealed {
18    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
19    fn to_lower(&'_ self) -> Cow<'_, [char]>;
20
21    /// Normalize the character sequence according to the dictionary's standard character set.
22    fn normalized(&'_ self) -> Cow<'_, [char]>;
23
24    /// Convert the character sequence to a String.
25    fn to_string(&self) -> String;
26
27    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
28    /// Only normalizes the left side to lowercase and avoids allocations.
29    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
30
31    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
32    /// Only normalizes the left side to lowercase and avoids allocations.
33    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
34
35    /// Case-insensitive comparison with any of a list of string slices, assuming the right-hand side is lowercase ASCII.
36    /// Only normalizes the left side to lowercase and avoids allocations.
37    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool;
38
39    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
40    /// Only normalizes the left side to lowercase and avoids allocations.
41    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
42
43    /// Case-insensitive check if the string starts with the given ASCII prefix.
44    /// The prefix is assumed to be lowercase.
45    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
46
47    /// Case-insensitive check if the string starts with any of the given ASCII prefixes.
48    /// The prefixes are assumed to be lowercase.
49    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
50
51    /// Case-insensitive check if the string ends with the given ASCII suffix.
52    /// The suffix is assumed to be lowercase.
53    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
54
55    /// Case-insensitive check if the string ends with the given ASCII suffix.
56    /// The suffix is assumed to be lowercase.
57    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
58
59    /// Case-insensitive check if the string ends with any of the given ASCII suffixes.
60    /// The suffixes are assumed to be lowercase.
61    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
62
63    /// Check if the string contains any vowels
64    fn contains_vowel(&self) -> bool;
65}
66
67impl CharStringExt for [char] {
68    fn to_lower(&'_ self) -> Cow<'_, [char]> {
69        if self.iter().all(|c| c.is_lowercase()) {
70            return Cow::Borrowed(self);
71        }
72
73        let mut out = CharString::with_capacity(self.len());
74
75        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
76
77        Cow::Owned(out.to_vec())
78    }
79
80    fn to_string(&self) -> String {
81        self.iter().collect()
82    }
83
84    /// Convert a given character sequence to the standard character set
85    /// the dictionary is in.
86    fn normalized(&'_ self) -> Cow<'_, [char]> {
87        if self.as_ref().iter().any(|c| c.normalized() != *c) {
88            Cow::Owned(
89                self.as_ref()
90                    .iter()
91                    .copied()
92                    .map(|c| c.normalized())
93                    .collect(),
94            )
95        } else {
96            Cow::Borrowed(self)
97        }
98    }
99
100    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
101        self.len() == other.len()
102            && self
103                .iter()
104                .zip(other.chars())
105                .all(|(a, b)| a.to_ascii_lowercase() == b)
106    }
107
108    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
109        self.len() == other.len()
110            && self
111                .iter()
112                .zip(other.iter())
113                .all(|(a, b)| a.to_ascii_lowercase() == *b)
114    }
115
116    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool {
117        others.iter().any(|str| self.eq_ignore_ascii_case_str(str))
118    }
119
120    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
121        others
122            .iter()
123            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
124    }
125
126    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
127        let prefix_len = prefix.len();
128        if self.len() < prefix_len {
129            return false;
130        }
131        self.iter()
132            .take(prefix_len)
133            .zip(prefix.chars())
134            .all(|(a, b)| a.to_ascii_lowercase() == b)
135    }
136
137    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
138        prefixes
139            .iter()
140            .any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
141    }
142
143    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
144        let suffix_len = suffix.len();
145        if self.len() < suffix_len {
146            return false;
147        }
148        self.iter()
149            .rev()
150            .take(suffix_len)
151            .rev()
152            .zip(suffix.chars())
153            .all(|(a, b)| a.to_ascii_lowercase() == b)
154    }
155
156    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
157        let suffix_len = suffix.len();
158        if self.len() < suffix_len {
159            return false;
160        }
161        self.iter()
162            .rev()
163            .take(suffix_len)
164            .rev()
165            .zip(suffix.iter())
166            .all(|(a, b)| a.to_ascii_lowercase() == *b)
167    }
168
169    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
170        suffixes
171            .iter()
172            .any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
173    }
174
175    fn contains_vowel(&self) -> bool {
176        self.iter().any(|c| c.is_vowel())
177    }
178}
179
180macro_rules! char_string {
181    ($string:literal) => {{
182        use crate::char_string::CharString;
183
184        $string.chars().collect::<CharString>()
185    }};
186}
187
188pub(crate) use char_string;
189
190#[cfg(test)]
191mod tests {
192    use super::CharStringExt;
193
194    #[test]
195    fn eq_ignore_ascii_case_chars_matches_lowercase() {
196        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
197    }
198
199    #[test]
200    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
201        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
202    }
203
204    #[test]
205    fn eq_ignore_ascii_case_str_matches_lowercase() {
206        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
207    }
208
209    #[test]
210    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
211        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
212    }
213
214    #[test]
215    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
216        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
217    }
218
219    #[test]
220    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
221        assert!(
222            !['H', 'e', 'l', 'l', 'o']
223                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
224        );
225    }
226
227    #[test]
228    fn ends_with_ignore_ascii_case_str_matches_suffix() {
229        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
230    }
231
232    #[test]
233    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
234        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
235    }
236}