Skip to main content

harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3use std::iter::Iterator;
4
5use smallvec::SmallVec;
6
7// TODO: remove this when `SmallVec` allows retrieving this value in a const context.
8pub(crate) const CHAR_STRING_INLINE_SIZE: usize = 16;
9
10/// A char sequence that improves cache locality.
11/// Most English words are fewer than 12 characters.
12pub type CharString = SmallVec<[char; CHAR_STRING_INLINE_SIZE]>;
13
14mod private {
15    pub trait Sealed {}
16
17    impl Sealed for [char] {}
18}
19
20/// Extensions to character sequences that make them easier to wrangle.
21pub trait CharStringExt: private::Sealed {
22    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
23    fn to_lower(&'_ self) -> Cow<'_, [char]>;
24
25    /// Normalize the character sequence according to the dictionary's standard character set.
26    fn normalized(&'_ self) -> Cow<'_, [char]>;
27
28    /// Convert the character sequence to a String.
29    fn to_string(&self) -> String;
30
31    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
32    /// Only normalizes the left side to lowercase and avoids allocations.
33    fn eq_ch(&self, other: &[char]) -> bool;
34
35    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
36    /// Only normalizes the left side to lowercase and avoids allocations.
37    fn eq_str(&self, other: &str) -> bool;
38
39    /// Case-insensitive comparison with any of a list of string slices, assuming the right-hand side is lowercase ASCII.
40    /// Only normalizes the left side to lowercase and avoids allocations.
41    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool;
42
43    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
44    /// Only normalizes the left side to lowercase and avoids allocations.
45    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
46
47    /// Case-insensitive check if the string starts with the given ASCII prefix.
48    /// The prefix is assumed to be lowercase.
49    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
50
51    /// Case-insensitive check if the string starts with any of the given ASCII prefixes.
52    /// The prefixes are assumed to be lowercase.
53    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
54
55    /// Case-insensitive check if the string ends with the given ASCII suffix.
56    /// The suffix is assumed to be lowercase.
57    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
58
59    /// Case-insensitive check if the string ends with the given ASCII suffix.
60    /// The suffix is assumed to be lowercase.
61    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
62
63    /// Case-insensitive check if the string ends with any of the given ASCII suffixes.
64    /// The suffixes are assumed to be lowercase.
65    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
66
67    /// Check if the string contains any vowels
68    fn contains_vowel(&self) -> bool;
69}
70
71impl CharStringExt for [char] {
72    fn to_lower(&'_ self) -> Cow<'_, [char]> {
73        if self.iter().all(|c| c.is_lowercase()) {
74            return Cow::Borrowed(self);
75        }
76
77        let mut out = CharString::with_capacity(self.len());
78
79        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
80
81        Cow::Owned(out.to_vec())
82    }
83
84    fn to_string(&self) -> String {
85        self.iter().collect()
86    }
87
88    /// Convert a given character sequence to the standard character set
89    /// the dictionary is in.
90    fn normalized(&'_ self) -> Cow<'_, [char]> {
91        if self.as_ref().iter().any(|c| c.normalized() != *c) {
92            Cow::Owned(
93                self.as_ref()
94                    .iter()
95                    .copied()
96                    .map(|c| c.normalized())
97                    .collect(),
98            )
99        } else {
100            Cow::Borrowed(self)
101        }
102    }
103
104    fn eq_str(&self, other: &str) -> bool {
105        // Assert that the right-hand side is all-lowercase as required
106        debug_assert!(
107            other
108                .chars()
109                .all(|c| c.is_ascii_lowercase() || !c.is_ascii_alphabetic()),
110            "eq_str requires right-hand side to be lowercase ASCII, but got: {:?}",
111            other
112        );
113
114        let mut chit = self.iter();
115        let mut strit = other.chars();
116
117        loop {
118            let (c, s) = (chit.next(), strit.next());
119            match (c, s) {
120                (Some(c), Some(s)) => {
121                    if c.to_ascii_lowercase() != s {
122                        return false;
123                    }
124                }
125                (None, None) => return true,
126                _ => return false,
127            }
128        }
129    }
130
131    fn eq_ch(&self, other: &[char]) -> bool {
132        // Assert that the right-hand side is all-lowercase as required
133        debug_assert!(
134            other
135                .iter()
136                .all(|c| c.is_ascii_lowercase() || !c.is_ascii_alphabetic()),
137            "eq_ch requires right-hand side to be lowercase ASCII, but got: {:?}",
138            other
139        );
140
141        self.len() == other.len()
142            && self
143                .iter()
144                .zip(other.iter())
145                .all(|(a, b)| a.to_ascii_lowercase() == *b)
146    }
147
148    fn eq_any_ignore_ascii_case_str(&self, others: &[&str]) -> bool {
149        others.iter().any(|str| self.eq_str(str))
150    }
151
152    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
153        others.iter().any(|chars| self.eq_ch(chars))
154    }
155
156    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
157        let prefix_len = prefix.chars().count();
158        if self.len() < prefix_len {
159            return false;
160        }
161        self.iter()
162            .take(prefix_len)
163            .zip(prefix.chars())
164            .all(|(a, b)| a.to_ascii_lowercase() == b)
165    }
166
167    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
168        prefixes
169            .iter()
170            .any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
171    }
172
173    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
174        let suffix_len = suffix.chars().count();
175        if self.len() < suffix_len {
176            return false;
177        }
178        self.iter()
179            .rev()
180            .take(suffix_len)
181            .rev()
182            .zip(suffix.chars())
183            .all(|(a, b)| a.to_ascii_lowercase() == b)
184    }
185
186    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
187        let suffix_len = suffix.len();
188        if self.len() < suffix_len {
189            return false;
190        }
191        self.iter()
192            .rev()
193            .take(suffix_len)
194            .rev()
195            .zip(suffix.iter())
196            .all(|(a, b)| a.to_ascii_lowercase() == *b)
197    }
198
199    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
200        suffixes
201            .iter()
202            .any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
203    }
204
205    fn contains_vowel(&self) -> bool {
206        self.iter().any(|c| c.is_vowel())
207    }
208}
209
210macro_rules! char_string {
211    ($string:literal) => {{
212        use crate::char_string::CharString;
213
214        $string.chars().collect::<CharString>()
215    }};
216}
217
218pub(crate) use char_string;
219
220#[cfg(test)]
221mod tests {
222    use super::CharStringExt;
223
224    #[test]
225    fn eq_ignore_ascii_case_chars_matches_lowercase() {
226        assert!(['H', 'e', 'l', 'l', 'o'].eq_ch(&['h', 'e', 'l', 'l', 'o']));
227    }
228
229    #[test]
230    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
231        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ch(&['w', 'o', 'r', 'l', 'd']));
232    }
233
234    #[test]
235    fn eq_ignore_ascii_case_str_matches_lowercase() {
236        assert!(['H', 'e', 'l', 'l', 'o'].eq_str("hello"));
237    }
238
239    #[test]
240    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
241        assert!(!['H', 'e', 'l', 'l', 'o'].eq_str("world"));
242    }
243
244    #[test]
245    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
246        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
247    }
248
249    #[test]
250    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
251        assert!(
252            !['H', 'e', 'l', 'l', 'o']
253                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
254        );
255    }
256
257    #[test]
258    fn ends_with_ignore_ascii_case_str_matches_suffix() {
259        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
260    }
261
262    #[test]
263    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
264        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
265    }
266
267    #[test]
268    fn differs_only_by_length_1() {
269        assert!(!['b', 'b'].eq_str("b"));
270    }
271
272    #[test]
273    fn differs_only_by_length_2() {
274        assert!(!['c'].eq_str("cc"));
275    }
276
277    #[test]
278    #[should_panic]
279    fn right_side_must_be_all_lowercase_str() {
280        assert!(['c'].eq_str("C"))
281    }
282
283    #[test]
284    #[should_panic]
285    fn right_side_must_be_all_lowercase_ch() {
286        assert!(['c'].eq_ch(&['C']))
287    }
288}