harper_core/
char_string.rs

1use crate::char_ext::CharExt;
2use std::borrow::Cow;
3
4use smallvec::SmallVec;
5
6/// A char sequence that improves cache locality.
7/// Most English words are fewer than 12 characters.
8pub type CharString = SmallVec<[char; 16]>;
9
10/// Extensions to character sequences that make them easier to wrangle.
11pub trait CharStringExt {
12    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
13    fn to_lower(&'_ self) -> Cow<'_, [char]>;
14
15    /// Normalize the character sequence according to the dictionary's standard character set.
16    fn normalized(&'_ self) -> Cow<'_, [char]>;
17
18    /// Convert the character sequence to a String.
19    fn to_string(&self) -> String;
20
21    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
22    /// Only normalizes the left side to lowercase and avoids allocations.
23    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
24
25    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
26    /// Only normalizes the left side to lowercase and avoids allocations.
27    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
28
29    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
30    /// Only normalizes the left side to lowercase and avoids allocations.
31    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
32
33    /// Case-insensitive check if the string ends with the given ASCII suffix.
34    /// The suffix is assumed to be lowercase.
35    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
36
37    /// Case-insensitive check if the string ends with the given ASCII suffix.
38    /// The suffix is assumed to be lowercase.
39    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
40
41    /// Check if the string contains any vowels
42    fn contains_vowel(&self) -> bool;
43}
44
45impl CharStringExt for [char] {
46    fn to_lower(&'_ self) -> Cow<'_, [char]> {
47        if self.iter().all(|c| c.is_lowercase()) {
48            return Cow::Borrowed(self);
49        }
50
51        let mut out = CharString::with_capacity(self.len());
52
53        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
54
55        Cow::Owned(out.to_vec())
56    }
57
58    fn to_string(&self) -> String {
59        self.iter().collect()
60    }
61
62    /// Convert a given character sequence to the standard character set
63    /// the dictionary is in.
64    fn normalized(&'_ self) -> Cow<'_, [char]> {
65        if self.as_ref().iter().any(|c| c.normalized() != *c) {
66            Cow::Owned(
67                self.as_ref()
68                    .iter()
69                    .copied()
70                    .map(|c| c.normalized())
71                    .collect(),
72            )
73        } else {
74            Cow::Borrowed(self)
75        }
76    }
77
78    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
79        self.len() == other.len()
80            && self
81                .iter()
82                .zip(other.chars())
83                .all(|(a, b)| a.to_ascii_lowercase() == b)
84    }
85
86    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
87        self.len() == other.len()
88            && self
89                .iter()
90                .zip(other.iter())
91                .all(|(a, b)| a.to_ascii_lowercase() == *b)
92    }
93
94    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
95        others
96            .iter()
97            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
98    }
99
100    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
101        let suffix_len = suffix.len();
102        if self.len() < suffix_len {
103            return false;
104        }
105        self.iter()
106            .rev()
107            .take(suffix_len)
108            .rev()
109            .zip(suffix.chars())
110            .all(|(a, b)| a.to_ascii_lowercase() == b)
111    }
112
113    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
114        let suffix_len = suffix.len();
115        if self.len() < suffix_len {
116            return false;
117        }
118        self.iter()
119            .rev()
120            .take(suffix_len)
121            .rev()
122            .zip(suffix.iter())
123            .all(|(a, b)| a.to_ascii_lowercase() == *b)
124    }
125
126    fn contains_vowel(&self) -> bool {
127        self.iter().any(|c| c.is_vowel())
128    }
129}
130
131macro_rules! char_string {
132    ($string:literal) => {{
133        use crate::char_string::CharString;
134
135        $string.chars().collect::<CharString>()
136    }};
137}
138
139pub(crate) use char_string;
140
141#[cfg(test)]
142mod tests {
143    use super::CharStringExt;
144
145    #[test]
146    fn eq_ignore_ascii_case_chars_matches_lowercase() {
147        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
148    }
149
150    #[test]
151    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
152        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
153    }
154
155    #[test]
156    fn eq_ignore_ascii_case_str_matches_lowercase() {
157        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
158    }
159
160    #[test]
161    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
162        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
163    }
164
165    #[test]
166    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
167        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
168    }
169
170    #[test]
171    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
172        assert!(
173            !['H', 'e', 'l', 'l', 'o']
174                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
175        );
176    }
177
178    #[test]
179    fn ends_with_ignore_ascii_case_str_matches_suffix() {
180        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
181    }
182
183    #[test]
184    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
185        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
186    }
187}