harper_core/patterns/
word_set.rs1use super::Pattern;
2use smallvec::SmallVec;
3
4use crate::{CharString, Token};
5
6#[derive(Debug, Default, Clone)]
11pub struct WordSet {
12 words: SmallVec<[CharString; 4]>,
13}
14
15impl WordSet {
16 pub fn add(&mut self, word: &str) {
17 let chars = word.chars().collect();
18
19 if !self.words.contains(&chars) {
20 self.words.push(chars);
21 }
22 }
23
24 pub fn contains(&self, word: &str) -> bool {
25 self.words.contains(&word.chars().collect())
26 }
27
28 pub fn new(words: &[&'static str]) -> Self {
30 let mut set = Self::default();
31
32 for str in words {
33 set.add(str);
34 }
35
36 set
37 }
38}
39
40impl Pattern for WordSet {
41 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
42 let tok = tokens.first()?;
43 if !tok.kind.is_word() {
44 return None;
45 }
46
47 let tok_chars = tok.span.get_content(source);
48
49 for word in &self.words {
50 if tok_chars.len() != word.len() {
51 continue;
52 }
53
54 let partial_match = tok_chars
55 .iter()
56 .zip(word)
57 .all(|(a, b)| a.eq_ignore_ascii_case(b));
58
59 if partial_match {
60 return Some(1);
61 }
62 }
63
64 None
65 }
66}
67
68#[cfg(test)]
69mod tests {
70 use crate::{Document, Span, patterns::DocPattern};
71
72 use super::WordSet;
73
74 #[test]
75 fn fruit() {
76 let set = WordSet::new(&["banana", "apple", "orange"]);
77
78 let doc = Document::new_markdown_default_curated("I ate a banana and an apple today.");
79
80 let matches = set.find_all_matches_in_doc(&doc);
81
82 assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
83 }
84
85 #[test]
86 fn fruit_whack_capitalization() {
87 let set = WordSet::new(&["banana", "apple", "orange"]);
88
89 let doc = Document::new_markdown_default_curated("I Ate A bAnaNa And aN apPlE today.");
90
91 let matches = set.find_all_matches_in_doc(&doc);
92
93 assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
94 }
95}