harper_core/patterns/
sequence_pattern.rs1use paste::paste;
2
3use super::whitespace_pattern::WhitespacePattern;
4use super::{AnyPattern, IndefiniteArticle, Pattern, RepeatingPattern, Word};
5use crate::{Token, TokenKind};
6
7#[derive(Default)]
29pub struct SequencePattern {
30 token_patterns: Vec<Box<dyn Pattern>>,
31}
32
33macro_rules! gen_then_from_is {
35 ($quality:ident) => {
36 paste! {
37 pub fn [< then_$quality >] (mut self) -> Self{
38 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
39 tok.kind.[< is_$quality >]()
40 }));
41
42 self
43 }
44
45 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
46 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
47 tok.kind.[< is_$quality >]()
48 }))
49 }
50
51 pub fn [< then_anything_but_$quality >] (mut self) -> Self{
52 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
53 if tok.kind.[< is_$quality >](){
54 false
55 }else{
56 true
57 }
58 }));
59
60 self
61 }
62 }
63 };
64}
65
66impl SequencePattern {
67 gen_then_from_is!(nominal);
68 gen_then_from_is!(noun);
69 gen_then_from_is!(possessive_nominal);
70 gen_then_from_is!(plural_nominal);
71 gen_then_from_is!(verb);
72 gen_then_from_is!(auxiliary_verb);
73 gen_then_from_is!(linking_verb);
74 gen_then_from_is!(pronoun);
75 gen_then_from_is!(punctuation);
76 gen_then_from_is!(conjunction);
77 gen_then_from_is!(comma);
78 gen_then_from_is!(period);
79 gen_then_from_is!(number);
80 gen_then_from_is!(case_separator);
81 gen_then_from_is!(adverb);
82 gen_then_from_is!(adjective);
83 gen_then_from_is!(apostrophe);
84 gen_then_from_is!(hyphen);
85 gen_then_from_is!(determiner);
86 gen_then_from_is!(proper_noun);
87 gen_then_from_is!(preposition);
88 gen_then_from_is!(not_plural_nominal);
89
90 pub fn then_indefinite_article(self) -> Self {
91 self.then(IndefiniteArticle::default())
92 }
93
94 pub fn then_exact_word(mut self, word: &'static str) -> Self {
95 self.token_patterns.push(Box::new(Word::new_exact(word)));
96 self
97 }
98
99 pub fn aco(word: &'static str) -> Self {
101 Self::any_capitalization_of(word)
102 }
103
104 pub fn any_capitalization_of(word: &'static str) -> Self {
105 Self::default().then_any_capitalization_of(word)
106 }
107
108 pub fn t_aco(self, word: &'static str) -> Self {
110 self.then_any_capitalization_of(word)
111 }
112
113 pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self {
115 self.token_patterns.push(Box::new(Word::new(word)));
116 self
117 }
118
119 pub fn then_any_word(mut self) -> Self {
121 self.token_patterns
122 .push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
123 self
124 }
125
126 pub fn then_strict(mut self, kind: TokenKind) -> Self {
128 self.token_patterns
129 .push(Box::new(move |tok: &Token, _source: &[char]| {
130 tok.kind == kind
131 }));
132 self
133 }
134
135 pub fn t_ws(self) -> Self {
137 self.then_whitespace()
138 }
139
140 pub fn then_whitespace(mut self) -> Self {
142 self.token_patterns.push(Box::new(WhitespacePattern));
143 self
144 }
145
146 pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self {
147 self.token_patterns
148 .push(Box::new(RepeatingPattern::new(Box::new(pat), 1)));
149 self
150 }
151
152 pub fn then_anything(mut self) -> Self {
155 self.token_patterns.push(Box::new(AnyPattern));
156 self
157 }
158
159 pub fn then(mut self, pat: impl Pattern + 'static) -> Self {
160 self.token_patterns.push(Box::new(pat));
161 self
162 }
163}
164
165impl Pattern for SequencePattern {
166 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
167 let mut tok_cursor = 0;
168
169 for pat in self.token_patterns.iter() {
170 let match_length = pat.matches(&tokens[tok_cursor..], source)?;
171 tok_cursor += match_length;
172 }
173
174 Some(tok_cursor)
175 }
176}
177
178#[cfg(test)]
179mod tests {
180
181 use super::SequencePattern;
182 use crate::Document;
183 use crate::patterns::Pattern;
184
185 #[test]
186 fn matches_n_whitespace_tokens() {
187 let pat = SequencePattern::default()
188 .then_any_word()
189 .then_whitespace()
190 .then_any_word();
191 let doc = Document::new_plain_english_curated("word\n \nword");
192
193 assert_eq!(
194 pat.matches(doc.get_tokens(), doc.get_source()),
195 Some(doc.get_tokens().len())
196 );
197 }
198
199 #[test]
200 fn matches_specific_words() {
201 let pat = SequencePattern::default()
202 .then_exact_word("she")
203 .then_whitespace()
204 .then_exact_word("her");
205 let doc = Document::new_plain_english_curated("she her");
206
207 assert_eq!(
208 pat.matches(doc.get_tokens(), doc.get_source()),
209 Some(doc.get_tokens().len())
210 );
211 }
212
213 #[test]
214 fn match_t_aco_and_t_ws() {
215 let pat = SequencePattern::aco("foo").t_ws().t_aco("bar");
216 let doc = Document::new_plain_english_curated("foo\nBAR");
217
218 assert_eq!(
219 pat.matches(doc.get_tokens(), doc.get_source()),
220 Some(doc.get_tokens().len())
221 );
222 }
223}