harper_core/
char_string.rs

1use std::borrow::Cow;
2
3use smallvec::SmallVec;
4
5/// A char sequence that improves cache locality.
6/// Most English words are fewer than 12 characters.
7pub type CharString = SmallVec<[char; 16]>;
8
9/// Extensions to character sequences that make them easier to wrangle.
10pub trait CharStringExt {
11    fn to_lower(&self) -> Cow<[char]>;
12    fn normalized(&self) -> Cow<[char]>;
13    fn to_string(&self) -> String;
14}
15
16impl CharStringExt for [char] {
17    fn to_lower(&self) -> Cow<[char]> {
18        if self.iter().all(|c| c.is_lowercase()) {
19            return Cow::Borrowed(self);
20        }
21
22        let mut out = CharString::with_capacity(self.len());
23
24        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
25
26        Cow::Owned(out.to_vec())
27    }
28
29    fn to_string(&self) -> String {
30        self.iter().collect()
31    }
32
33    /// Convert a given character sequence to the standard character set
34    /// the dictionary is in.
35    fn normalized(&self) -> Cow<[char]> {
36        if self.as_ref().iter().any(|c| char_to_normalized(*c) != *c) {
37            Cow::Owned(
38                self.as_ref()
39                    .iter()
40                    .copied()
41                    .map(char_to_normalized)
42                    .collect(),
43            )
44        } else {
45            Cow::Borrowed(self)
46        }
47    }
48}
49
50fn char_to_normalized(c: char) -> char {
51    match c {
52        '’' => '\'',
53        '‘' => '\'',
54        ''' => '\'',
55        _ => c,
56    }
57}
58
59macro_rules! char_string {
60    ($string:literal) => {{
61        use crate::char_string::CharString;
62
63        $string.chars().collect::<CharString>()
64    }};
65}
66
67pub(crate) use char_string;