harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3
4use smallvec::SmallVec;
5
6/// A char sequence that improves cache locality.
7/// Most English words are fewer than 12 characters.
8pub type CharString = SmallVec<[char; 16]>;
9
10/// Extensions to character sequences that make them easier to wrangle.
11pub trait CharStringExt {
12    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
13    fn to_lower(&'_ self) -> Cow<'_, [char]>;
14
15    /// Normalize the character sequence according to the dictionary's standard character set.
16    fn normalized(&'_ self) -> Cow<'_, [char]>;
17
18    /// Convert the character sequence to a String.
19    fn to_string(&self) -> String;
20
21    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
22    /// Only normalizes the left side to lowercase and avoids allocations.
23    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
24
25    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
26    /// Only normalizes the left side to lowercase and avoids allocations.
27    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
28
29    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
30    /// Only normalizes the left side to lowercase and avoids allocations.
31    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
32
33    /// Case-insensitive check if the string starts with the given ASCII prefix.
34    /// The prefix is assumed to be lowercase.
35    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
36
37    /// Case-insensitive check if the string starts with any of the given ASCII prefixes.
38    /// The prefixes are assumed to be lowercase.
39    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
40
41    /// Case-insensitive check if the string ends with the given ASCII suffix.
42    /// The suffix is assumed to be lowercase.
43    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
44
45    /// Case-insensitive check if the string ends with the given ASCII suffix.
46    /// The suffix is assumed to be lowercase.
47    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
48
49    /// Case-insensitive check if the string ends with any of the given ASCII suffixes.
50    /// The suffixes are assumed to be lowercase.
51    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool;
52
53    /// Check if the string contains any vowels
54    fn contains_vowel(&self) -> bool;
55}
56
57impl CharStringExt for [char] {
58    fn to_lower(&'_ self) -> Cow<'_, [char]> {
59        if self.iter().all(|c| c.is_lowercase()) {
60            return Cow::Borrowed(self);
61        }
62
63        let mut out = CharString::with_capacity(self.len());
64
65        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
66
67        Cow::Owned(out.to_vec())
68    }
69
70    fn to_string(&self) -> String {
71        self.iter().collect()
72    }
73
74    /// Convert a given character sequence to the standard character set
75    /// the dictionary is in.
76    fn normalized(&'_ self) -> Cow<'_, [char]> {
77        if self.as_ref().iter().any(|c| c.normalized() != *c) {
78            Cow::Owned(
79                self.as_ref()
80                    .iter()
81                    .copied()
82                    .map(|c| c.normalized())
83                    .collect(),
84            )
85        } else {
86            Cow::Borrowed(self)
87        }
88    }
89
90    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
91        self.len() == other.len()
92            && self
93                .iter()
94                .zip(other.chars())
95                .all(|(a, b)| a.to_ascii_lowercase() == b)
96    }
97
98    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
99        self.len() == other.len()
100            && self
101                .iter()
102                .zip(other.iter())
103                .all(|(a, b)| a.to_ascii_lowercase() == *b)
104    }
105
106    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
107        others
108            .iter()
109            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
110    }
111
112    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
113        let prefix_len = prefix.len();
114        if self.len() < prefix_len {
115            return false;
116        }
117        self.iter()
118            .take(prefix_len)
119            .zip(prefix.chars())
120            .all(|(a, b)| a.to_ascii_lowercase() == b)
121    }
122
123    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
124        prefixes
125            .iter()
126            .any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
127    }
128
129    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
130        let suffix_len = suffix.len();
131        if self.len() < suffix_len {
132            return false;
133        }
134        self.iter()
135            .rev()
136            .take(suffix_len)
137            .rev()
138            .zip(suffix.chars())
139            .all(|(a, b)| a.to_ascii_lowercase() == b)
140    }
141
142    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
143        let suffix_len = suffix.len();
144        if self.len() < suffix_len {
145            return false;
146        }
147        self.iter()
148            .rev()
149            .take(suffix_len)
150            .rev()
151            .zip(suffix.iter())
152            .all(|(a, b)| a.to_ascii_lowercase() == *b)
153    }
154
155    fn ends_with_any_ignore_ascii_case_chars(&self, suffixes: &[&[char]]) -> bool {
156        suffixes
157            .iter()
158            .any(|suffix| self.ends_with_ignore_ascii_case_chars(suffix))
159    }
160
161    fn contains_vowel(&self) -> bool {
162        self.iter().any(|c| c.is_vowel())
163    }
164}
165
166macro_rules! char_string {
167    ($string:literal) => {{
168        use crate::char_string::CharString;
169
170        $string.chars().collect::<CharString>()
171    }};
172}
173
174pub(crate) use char_string;
175
176#[cfg(test)]
177mod tests {
178    use super::CharStringExt;
179
180    #[test]
181    fn eq_ignore_ascii_case_chars_matches_lowercase() {
182        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
183    }
184
185    #[test]
186    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
187        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
188    }
189
190    #[test]
191    fn eq_ignore_ascii_case_str_matches_lowercase() {
192        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
193    }
194
195    #[test]
196    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
197        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
198    }
199
200    #[test]
201    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
202        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
203    }
204
205    #[test]
206    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
207        assert!(
208            !['H', 'e', 'l', 'l', 'o']
209                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
210        );
211    }
212
213    #[test]
214    fn ends_with_ignore_ascii_case_str_matches_suffix() {
215        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
216    }
217
218    #[test]
219    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
220        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
221    }
222}