harper_core/patterns/
word_set.rs

1use super::Pattern;
2use smallvec::SmallVec;
3
4use crate::{CharString, Token};
5
6/// A [`Pattern`] that matches against any of a set of provided words.
7/// For small sets of short words, it doesn't allocate.
8///
9/// Note that any capitalization of the contained words will result in a match.
10#[derive(Debug, Default, Clone)]
11pub struct WordSet {
12    words: SmallVec<[CharString; 4]>,
13}
14
15impl WordSet {
16    pub fn add(&mut self, word: &str) {
17        let chars = word.chars().collect();
18
19        if !self.words.contains(&chars) {
20            self.words.push(chars);
21        }
22    }
23
24    pub fn contains(&self, word: &str) -> bool {
25        self.words.contains(&word.chars().collect())
26    }
27
28    /// Create a new word set that matches against any word in the provided list.
29    pub fn new(words: &[&'static str]) -> Self {
30        let mut set = Self::default();
31
32        for str in words {
33            set.add(str);
34        }
35
36        set
37    }
38}
39
40impl Pattern for WordSet {
41    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
42        let tok = tokens.first()?;
43        if !tok.kind.is_word() {
44            return None;
45        }
46
47        let tok_chars = tok.span.get_content(source);
48
49        for word in &self.words {
50            if tok_chars.len() != word.len() {
51                continue;
52            }
53
54            let partial_match = tok_chars
55                .iter()
56                .zip(word)
57                .all(|(a, b)| a.eq_ignore_ascii_case(b));
58
59            if partial_match {
60                return Some(1);
61            }
62        }
63
64        None
65    }
66}
67
68#[cfg(test)]
69mod tests {
70    use crate::{Document, Span, patterns::DocPattern};
71
72    use super::WordSet;
73
74    #[test]
75    fn fruit() {
76        let set = WordSet::new(&["banana", "apple", "orange"]);
77
78        let doc = Document::new_markdown_default_curated("I ate a banana and an apple today.");
79
80        let matches = set.find_all_matches_in_doc(&doc);
81
82        assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
83    }
84
85    #[test]
86    fn fruit_whack_capitalization() {
87        let set = WordSet::new(&["banana", "apple", "orange"]);
88
89        let doc = Document::new_markdown_default_curated("I Ate A bAnaNa And aN apPlE today.");
90
91        let matches = set.find_all_matches_in_doc(&doc);
92
93        assert_eq!(matches, vec![Span::new(6, 7), Span::new(12, 13)]);
94    }
95}