harper_core/patterns/
sequence_pattern.rs1use paste::paste;
2
3use super::whitespace_pattern::WhitespacePattern;
4use super::{
5 AnyCapitalization, AnyPattern, IndefiniteArticle, Pattern, RepeatingPattern, SingularSubject,
6};
7use crate::{Token, TokenKind};
8
9#[derive(Default)]
31pub struct SequencePattern {
32 token_patterns: Vec<Box<dyn Pattern>>,
33}
34
35macro_rules! gen_then_from_is {
37 ($quality:ident) => {
38 paste! {
39 pub fn [< then_$quality >] (mut self) -> Self{
40 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
41 tok.kind.[< is_$quality >]()
42 }));
43
44 self
45 }
46
47 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
48 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
49 tok.kind.[< is_$quality >]()
50 }))
51 }
52
53 pub fn [< then_anything_but_$quality >] (mut self) -> Self{
54 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
55 if tok.kind.[< is_$quality >](){
56 false
57 }else{
58 true
59 }
60 }));
61
62 self
63 }
64 }
65 };
66}
67
68impl SequencePattern {
69 gen_then_from_is!(nominal);
70 gen_then_from_is!(noun);
71 gen_then_from_is!(possessive_nominal);
72 gen_then_from_is!(plural_nominal);
73 gen_then_from_is!(verb);
74 gen_then_from_is!(linking_verb);
75 gen_then_from_is!(pronoun);
76 gen_then_from_is!(punctuation);
77 gen_then_from_is!(conjunction);
78 gen_then_from_is!(comma);
79 gen_then_from_is!(period);
80 gen_then_from_is!(number);
81 gen_then_from_is!(case_separator);
82 gen_then_from_is!(adverb);
83 gen_then_from_is!(adjective);
84 gen_then_from_is!(apostrophe);
85 gen_then_from_is!(hyphen);
86 gen_then_from_is!(determiner);
87 gen_then_from_is!(proper_noun);
88 gen_then_from_is!(preposition);
89
90 pub fn then_indefinite_article(self) -> Self {
91 self.then(IndefiniteArticle::default())
92 }
93
94 pub fn then_exact_word(mut self, word: &'static str) -> Self {
95 self.token_patterns
96 .push(Box::new(|tok: &Token, source: &[char]| {
97 if !tok.kind.is_word() {
98 return false;
99 }
100
101 let tok_chars = tok.span.get_content(source);
102
103 let mut w_char_count = 0;
104 for (i, w_char) in word.chars().enumerate() {
105 w_char_count += 1;
106
107 if tok_chars.get(i).cloned() != Some(w_char) {
108 return false;
109 }
110 }
111
112 w_char_count == tok_chars.len()
113 }));
114 self
115 }
116
117 pub fn then_singular_subject(self) -> Self {
118 self.then(SingularSubject::default())
119 }
120
121 pub fn aco(word: &'static str) -> Self {
123 Self::any_capitalization_of(word)
124 }
125
126 pub fn any_capitalization_of(word: &'static str) -> Self {
127 Self::default().then_any_capitalization_of(word)
128 }
129
130 pub fn t_aco(self, word: &'static str) -> Self {
132 self.then_any_capitalization_of(word)
133 }
134
135 pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self {
137 self.token_patterns
138 .push(Box::new(AnyCapitalization::of(word)));
139 self
140 }
141
142 pub fn then_any_word(mut self) -> Self {
144 self.token_patterns
145 .push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
146 self
147 }
148
149 pub fn then_strict(mut self, kind: TokenKind) -> Self {
151 self.token_patterns
152 .push(Box::new(move |tok: &Token, _source: &[char]| {
153 tok.kind == kind
154 }));
155 self
156 }
157
158 pub fn then_whitespace(mut self) -> Self {
160 self.token_patterns.push(Box::new(WhitespacePattern));
161 self
162 }
163
164 pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self {
165 self.token_patterns
166 .push(Box::new(RepeatingPattern::new(Box::new(pat), 0)));
167 self
168 }
169
170 pub fn then_anything(mut self) -> Self {
173 self.token_patterns.push(Box::new(AnyPattern));
174 self
175 }
176
177 pub fn then(mut self, pat: impl Pattern + 'static) -> Self {
178 self.token_patterns.push(Box::new(pat));
179 self
180 }
181}
182
183impl Pattern for SequencePattern {
184 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
185 let mut tok_cursor = 0;
186
187 for pat in self.token_patterns.iter() {
188 let match_length = pat.matches(&tokens[tok_cursor..], source);
189
190 if match_length == 0 {
191 return 0;
192 }
193
194 tok_cursor += match_length;
195 }
196
197 tok_cursor
198 }
199}
200
201#[cfg(test)]
202mod tests {
203
204 use super::SequencePattern;
205 use crate::Document;
206 use crate::patterns::Pattern;
207
208 #[test]
209 fn matches_n_whitespace_tokens() {
210 let pat = SequencePattern::default()
211 .then_any_word()
212 .then_whitespace()
213 .then_any_word();
214 let doc = Document::new_plain_english_curated("word\n \nword");
215
216 assert_eq!(
217 pat.matches(doc.get_tokens(), doc.get_source()),
218 doc.get_tokens().len()
219 );
220 }
221
222 #[test]
223 fn matches_specific_words() {
224 let pat = SequencePattern::default()
225 .then_exact_word("she")
226 .then_whitespace()
227 .then_exact_word("her");
228 let doc = Document::new_plain_english_curated("she her");
229
230 assert_eq!(
231 pat.matches(doc.get_tokens(), doc.get_source()),
232 doc.get_tokens().len()
233 );
234 }
235}