harper_core/patterns/
sequence_pattern.rs1use paste::paste;
2
3use super::whitespace_pattern::WhitespacePattern;
4use super::{AnyCapitalization, AnyPattern, IndefiniteArticle, Pattern, RepeatingPattern};
5use crate::{Token, TokenKind};
6
7#[derive(Default)]
29pub struct SequencePattern {
30 token_patterns: Vec<Box<dyn Pattern>>,
31}
32
33macro_rules! gen_then_from_is {
35 ($quality:ident) => {
36 paste! {
37 pub fn [< then_$quality >] (mut self) -> Self{
38 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
39 tok.kind.[< is_$quality >]()
40 }));
41
42 self
43 }
44
45 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
46 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
47 tok.kind.[< is_$quality >]()
48 }))
49 }
50
51 pub fn [< then_anything_but_$quality >] (mut self) -> Self{
52 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
53 if tok.kind.[< is_$quality >](){
54 false
55 }else{
56 true
57 }
58 }));
59
60 self
61 }
62 }
63 };
64}
65
66impl SequencePattern {
67 gen_then_from_is!(nominal);
68 gen_then_from_is!(noun);
69 gen_then_from_is!(possessive_nominal);
70 gen_then_from_is!(plural_nominal);
71 gen_then_from_is!(verb);
72 gen_then_from_is!(linking_verb);
73 gen_then_from_is!(pronoun);
74 gen_then_from_is!(punctuation);
75 gen_then_from_is!(conjunction);
76 gen_then_from_is!(comma);
77 gen_then_from_is!(period);
78 gen_then_from_is!(number);
79 gen_then_from_is!(case_separator);
80 gen_then_from_is!(adverb);
81 gen_then_from_is!(adjective);
82 gen_then_from_is!(apostrophe);
83 gen_then_from_is!(hyphen);
84 gen_then_from_is!(determiner);
85 gen_then_from_is!(proper_noun);
86 gen_then_from_is!(preposition);
87 gen_then_from_is!(not_plural_nominal);
88
89 pub fn then_indefinite_article(self) -> Self {
90 self.then(IndefiniteArticle::default())
91 }
92
93 pub fn then_exact_word(mut self, word: &'static str) -> Self {
94 self.token_patterns
95 .push(Box::new(|tok: &Token, source: &[char]| {
96 if !tok.kind.is_word() {
97 return false;
98 }
99
100 let tok_chars = tok.span.get_content(source);
101
102 let mut w_char_count = 0;
103 for (i, w_char) in word.chars().enumerate() {
104 w_char_count += 1;
105
106 if tok_chars.get(i).cloned() != Some(w_char) {
107 return false;
108 }
109 }
110
111 w_char_count == tok_chars.len()
112 }));
113 self
114 }
115
116 pub fn aco(word: &'static str) -> Self {
118 Self::any_capitalization_of(word)
119 }
120
121 pub fn any_capitalization_of(word: &'static str) -> Self {
122 Self::default().then_any_capitalization_of(word)
123 }
124
125 pub fn t_aco(self, word: &'static str) -> Self {
127 self.then_any_capitalization_of(word)
128 }
129
130 pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self {
132 self.token_patterns
133 .push(Box::new(AnyCapitalization::of(word)));
134 self
135 }
136
137 pub fn then_any_word(mut self) -> Self {
139 self.token_patterns
140 .push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
141 self
142 }
143
144 pub fn then_strict(mut self, kind: TokenKind) -> Self {
146 self.token_patterns
147 .push(Box::new(move |tok: &Token, _source: &[char]| {
148 tok.kind == kind
149 }));
150 self
151 }
152
153 pub fn then_whitespace(mut self) -> Self {
155 self.token_patterns.push(Box::new(WhitespacePattern));
156 self
157 }
158
159 pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self {
160 self.token_patterns
161 .push(Box::new(RepeatingPattern::new(Box::new(pat), 0)));
162 self
163 }
164
165 pub fn then_anything(mut self) -> Self {
168 self.token_patterns.push(Box::new(AnyPattern));
169 self
170 }
171
172 pub fn then(mut self, pat: impl Pattern + 'static) -> Self {
173 self.token_patterns.push(Box::new(pat));
174 self
175 }
176}
177
178impl Pattern for SequencePattern {
179 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
180 let mut tok_cursor = 0;
181
182 for pat in self.token_patterns.iter() {
183 let match_length = pat.matches(&tokens[tok_cursor..], source);
184
185 if match_length == 0 {
186 return 0;
187 }
188
189 tok_cursor += match_length;
190 }
191
192 tok_cursor
193 }
194}
195
196#[cfg(test)]
197mod tests {
198
199 use super::SequencePattern;
200 use crate::Document;
201 use crate::patterns::Pattern;
202
203 #[test]
204 fn matches_n_whitespace_tokens() {
205 let pat = SequencePattern::default()
206 .then_any_word()
207 .then_whitespace()
208 .then_any_word();
209 let doc = Document::new_plain_english_curated("word\n \nword");
210
211 assert_eq!(
212 pat.matches(doc.get_tokens(), doc.get_source()),
213 doc.get_tokens().len()
214 );
215 }
216
217 #[test]
218 fn matches_specific_words() {
219 let pat = SequencePattern::default()
220 .then_exact_word("she")
221 .then_whitespace()
222 .then_exact_word("her");
223 let doc = Document::new_plain_english_curated("she her");
224
225 assert_eq!(
226 pat.matches(doc.get_tokens(), doc.get_source()),
227 doc.get_tokens().len()
228 );
229 }
230}