harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3
4use smallvec::SmallVec;
5
6/// A char sequence that improves cache locality.
7/// Most English words are fewer than 12 characters.
8pub type CharString = SmallVec<[char; 16]>;
9
10/// Extensions to character sequences that make them easier to wrangle.
11pub trait CharStringExt {
12    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
13    fn to_lower(&'_ self) -> Cow<'_, [char]>;
14
15    /// Normalize the character sequence according to the dictionary's standard character set.
16    fn normalized(&'_ self) -> Cow<'_, [char]>;
17
18    /// Convert the character sequence to a String.
19    fn to_string(&self) -> String;
20
21    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
22    /// Only normalizes the left side to lowercase and avoids allocations.
23    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
24
25    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
26    /// Only normalizes the left side to lowercase and avoids allocations.
27    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
28
29    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
30    /// Only normalizes the left side to lowercase and avoids allocations.
31    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
32
33    /// Case-insensitive check if the string starts with the given ASCII prefix.
34    /// The prefix is assumed to be lowercase.
35    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool;
36
37    /// Case-insensitive check if the string starts with any of the given ASCII prefixes.
38    /// The prefixes are assumed to be lowercase.
39    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool;
40
41    /// Case-insensitive check if the string ends with the given ASCII suffix.
42    /// The suffix is assumed to be lowercase.
43    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
44
45    /// Case-insensitive check if the string ends with the given ASCII suffix.
46    /// The suffix is assumed to be lowercase.
47    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
48
49    /// Check if the string contains any vowels
50    fn contains_vowel(&self) -> bool;
51}
52
53impl CharStringExt for [char] {
54    fn to_lower(&'_ self) -> Cow<'_, [char]> {
55        if self.iter().all(|c| c.is_lowercase()) {
56            return Cow::Borrowed(self);
57        }
58
59        let mut out = CharString::with_capacity(self.len());
60
61        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
62
63        Cow::Owned(out.to_vec())
64    }
65
66    fn to_string(&self) -> String {
67        self.iter().collect()
68    }
69
70    /// Convert a given character sequence to the standard character set
71    /// the dictionary is in.
72    fn normalized(&'_ self) -> Cow<'_, [char]> {
73        if self.as_ref().iter().any(|c| c.normalized() != *c) {
74            Cow::Owned(
75                self.as_ref()
76                    .iter()
77                    .copied()
78                    .map(|c| c.normalized())
79                    .collect(),
80            )
81        } else {
82            Cow::Borrowed(self)
83        }
84    }
85
86    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
87        self.len() == other.len()
88            && self
89                .iter()
90                .zip(other.chars())
91                .all(|(a, b)| a.to_ascii_lowercase() == b)
92    }
93
94    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
95        self.len() == other.len()
96            && self
97                .iter()
98                .zip(other.iter())
99                .all(|(a, b)| a.to_ascii_lowercase() == *b)
100    }
101
102    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
103        others
104            .iter()
105            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
106    }
107
108    fn starts_with_ignore_ascii_case_str(&self, prefix: &str) -> bool {
109        let prefix_len = prefix.len();
110        if self.len() < prefix_len {
111            return false;
112        }
113        self.iter()
114            .take(prefix_len)
115            .zip(prefix.chars())
116            .all(|(a, b)| a.to_ascii_lowercase() == b)
117    }
118
119    fn starts_with_any_ignore_ascii_case_str(&self, prefixes: &[&str]) -> bool {
120        prefixes
121            .iter()
122            .any(|prefix| self.starts_with_ignore_ascii_case_str(prefix))
123    }
124
125    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
126        let suffix_len = suffix.len();
127        if self.len() < suffix_len {
128            return false;
129        }
130        self.iter()
131            .rev()
132            .take(suffix_len)
133            .rev()
134            .zip(suffix.chars())
135            .all(|(a, b)| a.to_ascii_lowercase() == b)
136    }
137
138    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
139        let suffix_len = suffix.len();
140        if self.len() < suffix_len {
141            return false;
142        }
143        self.iter()
144            .rev()
145            .take(suffix_len)
146            .rev()
147            .zip(suffix.iter())
148            .all(|(a, b)| a.to_ascii_lowercase() == *b)
149    }
150
151    fn contains_vowel(&self) -> bool {
152        self.iter().any(|c| c.is_vowel())
153    }
154}
155
156macro_rules! char_string {
157    ($string:literal) => {{
158        use crate::char_string::CharString;
159
160        $string.chars().collect::<CharString>()
161    }};
162}
163
164pub(crate) use char_string;
165
166#[cfg(test)]
167mod tests {
168    use super::CharStringExt;
169
170    #[test]
171    fn eq_ignore_ascii_case_chars_matches_lowercase() {
172        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
173    }
174
175    #[test]
176    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
177        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
178    }
179
180    #[test]
181    fn eq_ignore_ascii_case_str_matches_lowercase() {
182        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
183    }
184
185    #[test]
186    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
187        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
188    }
189
190    #[test]
191    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
192        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
193    }
194
195    #[test]
196    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
197        assert!(
198            !['H', 'e', 'l', 'l', 'o']
199                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
200        );
201    }
202
203    #[test]
204    fn ends_with_ignore_ascii_case_str_matches_suffix() {
205        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
206    }
207
208    #[test]
209    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
210        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
211    }
212}