harper_core/
char_string.rs

1use std::borrow::Cow;
2
3use smallvec::SmallVec;
4
5/// A char sequence that improves cache locality.
6/// Most English words are fewer than 12 characters.
7pub type CharString = SmallVec<[char; 16]>;
8
9/// Extensions to character sequences that make them easier to wrangle.
10pub trait CharStringExt {
11    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
12    fn to_lower(&'_ self) -> Cow<'_, [char]>;
13
14    /// Normalize the character sequence according to the dictionary's standard character set.
15    fn normalized(&'_ self) -> Cow<'_, [char]>;
16
17    /// Convert the character sequence to a String.
18    fn to_string(&self) -> String;
19
20    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
21    /// Only normalizes the left side to lowercase and avoids allocations.
22    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
23
24    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
25    /// Only normalizes the left side to lowercase and avoids allocations.
26    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
27
28    /// Case-insensitive comparison with any of a list of character slices, assuming the right-hand side is lowercase ASCII.
29    /// Only normalizes the left side to lowercase and avoids allocations.
30    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool;
31
32    /// Case-insensitive check if the string ends with the given ASCII suffix.
33    /// The suffix is assumed to be lowercase.
34    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
35
36    /// Case-insensitive check if the string ends with the given ASCII suffix.
37    /// The suffix is assumed to be lowercase.
38    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
39}
40
41impl CharStringExt for [char] {
42    fn to_lower(&'_ self) -> Cow<'_, [char]> {
43        if self.iter().all(|c| c.is_lowercase()) {
44            return Cow::Borrowed(self);
45        }
46
47        let mut out = CharString::with_capacity(self.len());
48
49        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
50
51        Cow::Owned(out.to_vec())
52    }
53
54    fn to_string(&self) -> String {
55        self.iter().collect()
56    }
57
58    /// Convert a given character sequence to the standard character set
59    /// the dictionary is in.
60    fn normalized(&'_ self) -> Cow<'_, [char]> {
61        if self.as_ref().iter().any(|c| char_to_normalized(*c) != *c) {
62            Cow::Owned(
63                self.as_ref()
64                    .iter()
65                    .copied()
66                    .map(char_to_normalized)
67                    .collect(),
68            )
69        } else {
70            Cow::Borrowed(self)
71        }
72    }
73
74    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
75        self.len() == other.len()
76            && self
77                .iter()
78                .zip(other.chars())
79                .all(|(a, b)| a.to_ascii_lowercase() == b)
80    }
81
82    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
83        self.len() == other.len()
84            && self
85                .iter()
86                .zip(other.iter())
87                .all(|(a, b)| a.to_ascii_lowercase() == *b)
88    }
89
90    fn eq_any_ignore_ascii_case_chars(&self, others: &[&[char]]) -> bool {
91        others
92            .iter()
93            .any(|chars| self.eq_ignore_ascii_case_chars(chars))
94    }
95
96    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
97        let suffix_len = suffix.len();
98        if self.len() < suffix_len {
99            return false;
100        }
101        self.iter()
102            .rev()
103            .take(suffix_len)
104            .rev()
105            .zip(suffix.chars())
106            .all(|(a, b)| a.to_ascii_lowercase() == b)
107    }
108
109    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
110        let suffix_len = suffix.len();
111        if self.len() < suffix_len {
112            return false;
113        }
114        self.iter()
115            .rev()
116            .take(suffix_len)
117            .rev()
118            .zip(suffix.iter())
119            .all(|(a, b)| a.to_ascii_lowercase() == *b)
120    }
121}
122
123fn char_to_normalized(c: char) -> char {
124    match c {
125        '’' => '\'',
126        '‘' => '\'',
127        ''' => '\'',
128        _ => c,
129    }
130}
131
132macro_rules! char_string {
133    ($string:literal) => {{
134        use crate::char_string::CharString;
135
136        $string.chars().collect::<CharString>()
137    }};
138}
139
140pub(crate) use char_string;
141
142#[cfg(test)]
143mod tests {
144    use super::CharStringExt;
145
146    #[test]
147    fn eq_ignore_ascii_case_chars_matches_lowercase() {
148        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
149    }
150
151    #[test]
152    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
153        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
154    }
155
156    #[test]
157    fn eq_ignore_ascii_case_str_matches_lowercase() {
158        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
159    }
160
161    #[test]
162    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
163        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
164    }
165
166    #[test]
167    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
168        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
169    }
170
171    #[test]
172    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
173        assert!(
174            !['H', 'e', 'l', 'l', 'o']
175                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
176        );
177    }
178
179    #[test]
180    fn ends_with_ignore_ascii_case_str_matches_suffix() {
181        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
182    }
183
184    #[test]
185    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
186        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
187    }
188}