harper_core/patterns/
word_set.rs1use super::SingleTokenPattern;
2use smallvec::SmallVec;
3
4use crate::{CharString, Token, char_ext::CharExt};
5
6#[derive(Debug, Default, Clone)]
11pub struct WordSet {
12 words: SmallVec<[CharString; 4]>,
13}
14
15impl WordSet {
16 pub fn add(&mut self, word: &str) {
17 let chars = word.chars().collect();
18
19 if !self.words.contains(&chars) {
20 self.words.push(chars);
21 }
22 }
23
24 pub fn add_chars(&mut self, chars: &[char]) {
25 if !self.words.iter().any(|i| i.as_ref() == chars) {
26 self.words.push(chars.into());
27 }
28 }
29
30 pub fn contains(&self, word: &str) -> bool {
31 self.words.contains(&word.chars().collect())
32 }
33
34 pub fn new(words: &[&'static str]) -> Self {
36 let mut set = Self::default();
37
38 for str in words {
39 set.add(str);
40 }
41
42 set
43 }
44}
45
46impl SingleTokenPattern for WordSet {
47 fn matches_token(&self, token: &Token, source: &[char]) -> bool {
48 if !token.kind.is_word() {
49 return false;
50 }
51
52 let tok_chars = token.span.get_content(source);
53
54 for word in &self.words {
55 if tok_chars.len() != word.len() {
56 continue;
57 }
58
59 let partial_match = tok_chars
60 .iter()
61 .map(CharExt::normalized)
62 .zip(word.iter().map(CharExt::normalized))
63 .all(|(a, b)| a.eq_ignore_ascii_case(&b));
64
65 if partial_match {
66 return true;
67 }
68 }
69
70 false
71 }
72}
73
74#[cfg(test)]
75mod tests {
76 use crate::{Document, Span, patterns::DocPattern};
77
78 use super::WordSet;
79
80 #[test]
81 fn fruit() {
82 let set = WordSet::new(&["banana", "apple", "orange"]);
83
84 let doc = Document::new_markdown_default_curated("I ate a banana and an apple today.");
85
86 let matches = set.find_all_matches_in_doc(&doc);
87
88 assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
89 }
90
91 #[test]
92 fn fruit_whack_capitalization() {
93 let set = WordSet::new(&["banana", "apple", "orange"]);
94
95 let doc = Document::new_markdown_default_curated("I Ate A bAnaNa And aN apPlE today.");
96
97 let matches = set.find_all_matches_in_doc(&doc);
98
99 assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
100 }
101
102 #[test]
103 fn supports_typographic_apostrophes() {
104 let set = WordSet::new(&["They're"]);
105
106 let doc = Document::new_markdown_default_curated("They’re");
107
108 let matches = set.find_all_matches_in_doc(&doc);
109
110 assert_eq!(matches, vec![Span::new(0, 1)]);
111 }
112}