harper_core/
char_string.rs

1use std::borrow::Cow;
2
3use smallvec::SmallVec;
4
5/// A char sequence that improves cache locality.
6/// Most English words are fewer than 12 characters.
7pub type CharString = SmallVec<[char; 16]>;
8
9/// Extensions to character sequences that make them easier to wrangle.
10pub trait CharStringExt {
11    /// Convert all characters to lowercase, returning a new owned vector if any changes were made.
12    fn to_lower(&self) -> Cow<[char]>;
13
14    /// Normalize the character sequence according to the dictionary's standard character set.
15    fn normalized(&self) -> Cow<[char]>;
16
17    /// Convert the character sequence to a String.
18    fn to_string(&self) -> String;
19
20    /// Case-insensitive comparison with a character slice, assuming the right-hand side is lowercase ASCII.
21    /// Only normalizes the left side to lowercase and avoids allocations.
22    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool;
23
24    /// Case-insensitive comparison with a string slice, assuming the right-hand side is lowercase ASCII.
25    /// Only normalizes the left side to lowercase and avoids allocations.
26    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool;
27
28    /// Case-insensitive check if the string ends with the given ASCII suffix.
29    /// The suffix is assumed to be lowercase.
30    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool;
31
32    /// Case-insensitive check if the string ends with the given ASCII suffix.
33    /// The suffix is assumed to be lowercase.
34    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool;
35}
36
37impl CharStringExt for [char] {
38    fn to_lower(&self) -> Cow<[char]> {
39        if self.iter().all(|c| c.is_lowercase()) {
40            return Cow::Borrowed(self);
41        }
42
43        let mut out = CharString::with_capacity(self.len());
44
45        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
46
47        Cow::Owned(out.to_vec())
48    }
49
50    fn to_string(&self) -> String {
51        self.iter().collect()
52    }
53
54    /// Convert a given character sequence to the standard character set
55    /// the dictionary is in.
56    fn normalized(&self) -> Cow<[char]> {
57        if self.as_ref().iter().any(|c| char_to_normalized(*c) != *c) {
58            Cow::Owned(
59                self.as_ref()
60                    .iter()
61                    .copied()
62                    .map(char_to_normalized)
63                    .collect(),
64            )
65        } else {
66            Cow::Borrowed(self)
67        }
68    }
69
70    fn eq_ignore_ascii_case_str(&self, other: &str) -> bool {
71        self.len() == other.len()
72            && self
73                .iter()
74                .zip(other.chars())
75                .all(|(a, b)| a.to_ascii_lowercase() == b)
76    }
77
78    fn eq_ignore_ascii_case_chars(&self, other: &[char]) -> bool {
79        self.len() == other.len()
80            && self
81                .iter()
82                .zip(other.iter())
83                .all(|(a, b)| a.to_ascii_lowercase() == *b)
84    }
85
86    fn ends_with_ignore_ascii_case_str(&self, suffix: &str) -> bool {
87        let suffix_len = suffix.len();
88        if self.len() < suffix_len {
89            return false;
90        }
91        self.iter()
92            .rev()
93            .take(suffix_len)
94            .rev()
95            .zip(suffix.chars())
96            .all(|(a, b)| a.to_ascii_lowercase() == b)
97    }
98
99    fn ends_with_ignore_ascii_case_chars(&self, suffix: &[char]) -> bool {
100        let suffix_len = suffix.len();
101        if self.len() < suffix_len {
102            return false;
103        }
104        self.iter()
105            .rev()
106            .take(suffix_len)
107            .rev()
108            .zip(suffix.iter())
109            .all(|(a, b)| a.to_ascii_lowercase() == *b)
110    }
111}
112
113fn char_to_normalized(c: char) -> char {
114    match c {
115        '’' => '\'',
116        '‘' => '\'',
117        ''' => '\'',
118        _ => c,
119    }
120}
121
122macro_rules! char_string {
123    ($string:literal) => {{
124        use crate::char_string::CharString;
125
126        $string.chars().collect::<CharString>()
127    }};
128}
129
130pub(crate) use char_string;
131
132#[cfg(test)]
133mod tests {
134    use super::CharStringExt;
135
136    #[test]
137    fn eq_ignore_ascii_case_chars_matches_lowercase() {
138        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['h', 'e', 'l', 'l', 'o']));
139    }
140
141    #[test]
142    fn eq_ignore_ascii_case_chars_does_not_match_different_word() {
143        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd']));
144    }
145
146    #[test]
147    fn eq_ignore_ascii_case_str_matches_lowercase() {
148        assert!(['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("hello"));
149    }
150
151    #[test]
152    fn eq_ignore_ascii_case_str_does_not_match_different_word() {
153        assert!(!['H', 'e', 'l', 'l', 'o'].eq_ignore_ascii_case_str("world"));
154    }
155
156    #[test]
157    fn ends_with_ignore_ascii_case_chars_matches_suffix() {
158        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_chars(&['l', 'o']));
159    }
160
161    #[test]
162    fn ends_with_ignore_ascii_case_chars_does_not_match_different_suffix() {
163        assert!(
164            !['H', 'e', 'l', 'l', 'o']
165                .ends_with_ignore_ascii_case_chars(&['w', 'o', 'r', 'l', 'd'])
166        );
167    }
168
169    #[test]
170    fn ends_with_ignore_ascii_case_str_matches_suffix() {
171        assert!(['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("lo"));
172    }
173
174    #[test]
175    fn ends_with_ignore_ascii_case_str_does_not_match_different_suffix() {
176        assert!(!['H', 'e', 'l', 'l', 'o'].ends_with_ignore_ascii_case_str("world"));
177    }
178}