harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3
4use smallvec::SmallVec;
5
6// TODO: remove this when `SmallVec` allows retrieving this value in a const context.
7pub(crate) const CHAR_STRING_INLINE_SIZE: usize = 16;
8
9/// A char sequence that improves cache locality.
10/// Most English words are fewer than 12 characters.
11pub type CharString = SmallVec<[char; CHAR_STRING_INLINE_SIZE]>;
12
13mod private {
14    pub trait Sealed {}
15
16    impl Sealed for [char] {}
17}
18
19/// Extensions to character sequences that make them easier to wrangle.
20pub trait CharStringExt: private::Sealed {
21    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
22    fn to_lower(&'_ self) -> Cow<'_, [char]>;
23
24    /// Normalize the character sequence according to the dictionary's standard character set.
25    fn normalized(&'_ self) -> Cow<'_, [char]>;
26
27    /// Convert the character sequence to a String.
28    fn to_string(&self) -> String;
29
30    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
31    /// Only normalizes the left side to lowercase and avoids allocations.
32    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
33
34    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
35    /// Only normalizes the left side to lowercase and avoids allocations.
36    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
37
38    /// Case-insensitive comparison with any of a list of string slices, assuming the right-hand side is lowercase ASCII.
39    /// Only normalizes the left side to lowercase and avoids allocations.
40    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool;
41
42    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
43    /// Only normalizes the left side to lowercase and avoids allocations.
44    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
45
46    /// Case-insensitive check if the string starts with the given ASCII prefix.
47    /// The prefix is assumed to be lowercase.
48    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
49
50    /// Case-insensitive check if the string starts with any of the given ASCII prefixes.
51    /// The prefixes are assumed to be lowercase.
52    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
53
54    /// Case-insensitive check if the string ends with the given ASCII suffix.
55    /// The suffix is assumed to be lowercase.
56    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
57
58    /// Case-insensitive check if the string ends with the given ASCII suffix.
59    /// The suffix is assumed to be lowercase.
60    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
61
62    /// Case-insensitive check if the string ends with any of the given ASCII suffixes.
63    /// The suffixes are assumed to be lowercase.
64    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
65
66    /// Check if the string contains any vowels
67    fn contains_vowel(&self) -> bool;
68}
69
70impl CharStringExt for [char] {
71    fn to_lower(&'_ self) -> Cow<'_, [char]> {
72        if self.iter().all(|c| c.is_lowercase()) {
73            return Cow::Borrowed(self);
74        }
75
76        let mut out = CharString::with_capacity(self.len());
77
78        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
79
80        Cow::Owned(out.to_vec())
81    }
82
83    fn to_string(&self) -> String {
84        self.iter().collect()
85    }
86
87    /// Convert a given character sequence to the standard character set
88    /// the dictionary is in.
89    fn normalized(&'_ self) -> Cow<'_, [char]> {
90        if self.as_ref().iter().any(|c| c.normalized() != *c) {
91            Cow::Owned(
92                self.as_ref()
93                    .iter()
94                    .copied()
95                    .map(|c| c.normalized())
96                    .collect(),
97            )
98        } else {
99            Cow::Borrowed(self)
100        }
101    }
102
103    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
104        let mut chit = self.iter();
105        let mut strit = other.chars();
106
107        loop {
108            let (c, s) = (chit.next(), strit.next());
109            match (c, s) {
110                (Some(c), Some(s)) => {
111                    if c.to_ascii_lowercase() != s {
112                        return false;
113                    }
114                }
115                (None, None) => return true,
116                _ => return false,
117            }
118        }
119    }
120
121    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
122        self.len() == other.len()
123            && self
124                .iter()
125                .zip(other.iter())
126                .all(|(a, b)| a.to_ascii_lowercase() == *b)
127    }
128
129    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool {
130        others.iter().any(|str| self.eq_ignore_ascii_case_str(str))
131    }
132
133    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
134        others
135            .iter()
136            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
137    }
138
139    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
140        let prefix_len = prefix.chars().count();
141        if self.len() < prefix_len {
142            return false;
143        }
144        self.iter()
145            .take(prefix_len)
146            .zip(prefix.chars())
147            .all(|(a, b)| a.to_ascii_lowercase() == b)
148    }
149
150    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
151        prefixes
152            .iter()
153            .any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
154    }
155
156    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
157        let suffix_len = suffix.chars().count();
158        if self.len() < suffix_len {
159            return false;
160        }
161        self.iter()
162            .rev()
163            .take(suffix_len)
164            .rev()
165            .zip(suffix.chars())
166            .all(|(a, b)| a.to_ascii_lowercase() == b)
167    }
168
169    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
170        let suffix_len = suffix.len();
171        if self.len() < suffix_len {
172            return false;
173        }
174        self.iter()
175            .rev()
176            .take(suffix_len)
177            .rev()
178            .zip(suffix.iter())
179            .all(|(a, b)| a.to_ascii_lowercase() == *b)
180    }
181
182    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
183        suffixes
184            .iter()
185            .any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
186    }
187
188    fn contains_vowel(&self) -> bool {
189        self.iter().any(|c| c.is_vowel())
190    }
191}
192
193macro_rules! char_string {
194    ($string:literal) => {{
195        use crate::char_string::CharString;
196
197        $string.chars().collect::<CharString>()
198    }};
199}
200
201pub(crate) use char_string;
202
203#[cfg(test)]
204mod tests {
205    use super::CharStringExt;
206
207    #[test]
208    fn eq_ignore_ascii_case_chars_matches_lowercase() {
209        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
210    }
211
212    #[test]
213    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
214        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
215    }
216
217    #[test]
218    fn eq_ignore_ascii_case_str_matches_lowercase() {
219        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
220    }
221
222    #[test]
223    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
224        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
225    }
226
227    #[test]
228    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
229        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
230    }
231
232    #[test]
233    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
234        assert!(
235            !['H', 'e', 'l', 'l', 'o']
236                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
237        );
238    }
239
240    #[test]
241    fn ends_with_ignore_ascii_case_str_matches_suffix() {
242        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
243    }
244
245    #[test]
246    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
247        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
248    }
249
250    #[test]
251    fn differs_only_by_length_1() {
252        assert!(!['b', 'b'].eq_ignore_ascii_case_str("b"));
253    }
254
255    #[test]
256    fn differs_only_by_length_2() {
257        assert!(!['c'].eq_ignore_ascii_case_str("cc"));
258    }
259}