harper_core/patterns/
sequence_pattern.rs1use paste::paste;
2
3use super::whitespace_pattern::WhitespacePattern;
4use super::{AnyPattern, IndefiniteArticle, Pattern, RepeatingPattern, Word};
5use crate::{Token, TokenKind};
6
7#[derive(Default)]
29pub struct SequencePattern {
30 token_patterns: Vec<Box<dyn Pattern>>,
31}
32
33macro_rules! gen_then_from_is {
35 ($quality:ident) => {
36 paste! {
37 pub fn [< then_$quality >] (mut self) -> Self{
38 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
39 tok.kind.[< is_$quality >]()
40 }));
41
42 self
43 }
44
45 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
46 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
47 tok.kind.[< is_$quality >]()
48 }))
49 }
50
51 pub fn [< then_anything_but_$quality >] (mut self) -> Self{
52 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
53 if tok.kind.[< is_$quality >](){
54 false
55 }else{
56 true
57 }
58 }));
59
60 self
61 }
62 }
63 };
64}
65
66impl SequencePattern {
67 gen_then_from_is!(nominal);
68 gen_then_from_is!(noun);
69 gen_then_from_is!(possessive_nominal);
70 gen_then_from_is!(plural_nominal);
71 gen_then_from_is!(verb);
72 gen_then_from_is!(auxiliary_verb);
73 gen_then_from_is!(linking_verb);
74 gen_then_from_is!(pronoun);
75 gen_then_from_is!(punctuation);
76 gen_then_from_is!(conjunction);
77 gen_then_from_is!(comma);
78 gen_then_from_is!(period);
79 gen_then_from_is!(number);
80 gen_then_from_is!(case_separator);
81 gen_then_from_is!(adverb);
82 gen_then_from_is!(adjective);
83 gen_then_from_is!(apostrophe);
84 gen_then_from_is!(hyphen);
85 gen_then_from_is!(determiner);
86 gen_then_from_is!(proper_noun);
87 gen_then_from_is!(preposition);
88 gen_then_from_is!(not_plural_nominal);
89
90 pub fn then_indefinite_article(self) -> Self {
91 self.then(IndefiniteArticle::default())
92 }
93
94 pub fn then_exact_word(mut self, word: &'static str) -> Self {
96 self.token_patterns.push(Box::new(Word::new_exact(word)));
97 self
98 }
99
100 pub fn aco(word: &'static str) -> Self {
102 Self::any_capitalization_of(word)
103 }
104
105 pub fn any_capitalization_of(word: &'static str) -> Self {
106 Self::default().then_any_capitalization_of(word)
107 }
108
109 pub fn t_aco(self, word: &'static str) -> Self {
111 self.then_any_capitalization_of(word)
112 }
113
114 pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self {
116 self.token_patterns.push(Box::new(Word::new(word)));
117 self
118 }
119
120 pub fn then_any_word(mut self) -> Self {
122 self.token_patterns
123 .push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
124 self
125 }
126
127 pub fn then_strict(mut self, kind: TokenKind) -> Self {
129 self.token_patterns
130 .push(Box::new(move |tok: &Token, _source: &[char]| {
131 tok.kind == kind
132 }));
133 self
134 }
135
136 pub fn t_ws(self) -> Self {
138 self.then_whitespace()
139 }
140
141 pub fn then_whitespace(mut self) -> Self {
143 self.token_patterns.push(Box::new(WhitespacePattern));
144 self
145 }
146
147 pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self {
148 self.token_patterns
149 .push(Box::new(RepeatingPattern::new(Box::new(pat), 1)));
150 self
151 }
152
153 pub fn t_any(self) -> Self {
155 self.then_anything()
156 }
157
158 pub fn then_anything(mut self) -> Self {
161 self.token_patterns.push(Box::new(AnyPattern));
162 self
163 }
164
165 pub fn then(mut self, pat: impl Pattern + 'static) -> Self {
166 self.token_patterns.push(Box::new(pat));
167 self
168 }
169}
170
171impl Pattern for SequencePattern {
172 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
173 let mut tok_cursor = 0;
174
175 for pat in self.token_patterns.iter() {
176 let match_length = pat.matches(&tokens[tok_cursor..], source)?;
177 tok_cursor += match_length;
178 }
179
180 Some(tok_cursor)
181 }
182}
183
184#[cfg(test)]
185mod tests {
186
187 use super::SequencePattern;
188 use crate::Document;
189 use crate::patterns::{DocPattern, Pattern};
190
191 #[test]
192 fn matches_n_whitespace_tokens() {
193 let pat = SequencePattern::default()
194 .then_any_word()
195 .then_whitespace()
196 .then_any_word();
197 let doc = Document::new_plain_english_curated("word\n \nword");
198
199 assert_eq!(
200 pat.matches(doc.get_tokens(), doc.get_source()),
201 Some(doc.get_tokens().len())
202 );
203 }
204
205 #[test]
206 fn matches_specific_words() {
207 let pat = SequencePattern::default()
208 .then_exact_word("she")
209 .then_whitespace()
210 .then_exact_word("her");
211 let doc = Document::new_plain_english_curated("she her");
212
213 assert_eq!(
214 pat.matches(doc.get_tokens(), doc.get_source()),
215 Some(doc.get_tokens().len())
216 );
217 }
218
219 #[test]
220 fn match_t_aco_and_t_ws() {
221 let pat = SequencePattern::aco("foo").t_ws().t_aco("bar");
222 let doc = Document::new_plain_english_curated("foo\nBAR");
223
224 assert_eq!(
225 pat.matches(doc.get_tokens(), doc.get_source()),
226 Some(doc.get_tokens().len())
227 );
228 }
229
230 #[test]
231 fn exact_word_matches_title_case() {
232 let pat = SequencePattern::default().then_exact_word("Foo");
233 let doc = Document::new_plain_english_curated("Foo");
234
235 assert_eq!(
236 pat.matches(doc.get_tokens(), doc.get_source()),
237 Some(doc.get_tokens().len())
238 );
239 }
240
241 #[test]
242 fn exact_means_case_sensitive() {
243 let pat = SequencePattern::default().then_exact_word("Foo");
244 let doc = Document::new_plain_english_curated("foo Foo FOO");
245 let matches = pat.find_all_matches_in_doc(&doc);
246 assert_eq!(matches.len(), 1); }
248
249 #[test]
250 fn any_capitalization_of_matches_different_cases() {
251 let pat = SequencePattern::aco("foo");
252 let doc = Document::new_plain_english_curated("foo Foo FOO");
253 let matches = pat.find_all_matches_in_doc(&doc);
254 assert_eq!(matches.len(), 3); }
256}