harper_core/patterns/
sequence_pattern.rs1use paste::paste;
2
3use super::whitespace_pattern::WhitespacePattern;
4use super::{
5 AnyCapitalization, AnyPattern, IndefiniteArticle, Pattern, RepeatingPattern, SingularSubject,
6};
7use crate::{Token, TokenKind};
8
9#[derive(Default)]
31pub struct SequencePattern {
32 token_patterns: Vec<Box<dyn Pattern>>,
33}
34
35macro_rules! gen_then_from_is {
37 ($quality:ident) => {
38 paste! {
39 pub fn [< then_$quality >] (mut self) -> Self{
40 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
41 tok.kind.[< is_$quality >]()
42 }));
43
44 self
45 }
46
47 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
48 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
49 tok.kind.[< is_$quality >]()
50 }))
51 }
52
53 pub fn [< then_anything_but_$quality >] (mut self) -> Self{
54 self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| {
55 if tok.kind.[< is_$quality >](){
56 false
57 }else{
58 true
59 }
60 }));
61
62 self
63 }
64 }
65 };
66}
67
68impl SequencePattern {
69 gen_then_from_is!(noun);
70 gen_then_from_is!(possessive_noun);
71 gen_then_from_is!(plural_noun);
72 gen_then_from_is!(verb);
73 gen_then_from_is!(linking_verb);
74 gen_then_from_is!(pronoun);
75 gen_then_from_is!(punctuation);
76 gen_then_from_is!(conjunction);
77 gen_then_from_is!(comma);
78 gen_then_from_is!(period);
79 gen_then_from_is!(number);
80 gen_then_from_is!(case_separator);
81 gen_then_from_is!(adverb);
82 gen_then_from_is!(adjective);
83 gen_then_from_is!(apostrophe);
84 gen_then_from_is!(hyphen);
85 gen_then_from_is!(article);
86 gen_then_from_is!(proper_noun);
87 gen_then_from_is!(preposition);
88
89 pub fn then_indefinite_article(self) -> Self {
90 self.then(IndefiniteArticle::default())
91 }
92
93 pub fn then_exact_word(mut self, word: &'static str) -> Self {
94 self.token_patterns
95 .push(Box::new(|tok: &Token, source: &[char]| {
96 if !tok.kind.is_word() {
97 return false;
98 }
99
100 let tok_chars = tok.span.get_content(source);
101
102 let mut w_char_count = 0;
103 for (i, w_char) in word.chars().enumerate() {
104 w_char_count += 1;
105
106 if tok_chars.get(i).cloned() != Some(w_char) {
107 return false;
108 }
109 }
110
111 w_char_count == tok_chars.len()
112 }));
113 self
114 }
115
116 pub fn then_singular_subject(self) -> Self {
117 self.then(SingularSubject::default())
118 }
119
120 pub fn aco(word: &'static str) -> Self {
122 Self::any_capitalization_of(word)
123 }
124
125 pub fn any_capitalization_of(word: &'static str) -> Self {
126 Self::default().then_any_capitalization_of(word)
127 }
128
129 pub fn t_aco(self, word: &'static str) -> Self {
131 self.then_any_capitalization_of(word)
132 }
133
134 pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self {
136 self.token_patterns
137 .push(Box::new(AnyCapitalization::of(word)));
138 self
139 }
140
141 pub fn then_any_word(mut self) -> Self {
143 self.token_patterns
144 .push(Box::new(|tok: &Token, _source: &[char]| tok.kind.is_word()));
145 self
146 }
147
148 pub fn then_strict(mut self, kind: TokenKind) -> Self {
150 self.token_patterns
151 .push(Box::new(move |tok: &Token, _source: &[char]| {
152 tok.kind == kind
153 }));
154 self
155 }
156
157 pub fn then_whitespace(mut self) -> Self {
159 self.token_patterns.push(Box::new(WhitespacePattern));
160 self
161 }
162
163 pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self {
164 self.token_patterns
165 .push(Box::new(RepeatingPattern::new(Box::new(pat), 0)));
166 self
167 }
168
169 pub fn then_anything(mut self) -> Self {
172 self.token_patterns.push(Box::new(AnyPattern));
173 self
174 }
175
176 pub fn then(mut self, pat: impl Pattern + 'static) -> Self {
177 self.token_patterns.push(Box::new(pat));
178 self
179 }
180}
181
182impl Pattern for SequencePattern {
183 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
184 let mut tok_cursor = 0;
185
186 for pat in self.token_patterns.iter() {
187 let match_length = pat.matches(&tokens[tok_cursor..], source);
188
189 if match_length == 0 {
190 return 0;
191 }
192
193 tok_cursor += match_length;
194 }
195
196 tok_cursor
197 }
198}
199
200#[cfg(test)]
201mod tests {
202
203 use super::SequencePattern;
204 use crate::Document;
205 use crate::patterns::Pattern;
206
207 #[test]
208 fn matches_n_whitespace_tokens() {
209 let pat = SequencePattern::default()
210 .then_any_word()
211 .then_whitespace()
212 .then_any_word();
213 let doc = Document::new_plain_english_curated("word\n \nword");
214
215 assert_eq!(
216 pat.matches(doc.get_tokens(), doc.get_source()),
217 doc.get_tokens().len()
218 );
219 }
220
221 #[test]
222 fn matches_specific_words() {
223 let pat = SequencePattern::default()
224 .then_exact_word("she")
225 .then_whitespace()
226 .then_exact_word("her");
227 let doc = Document::new_plain_english_curated("she her");
228
229 assert_eq!(
230 pat.matches(doc.get_tokens(), doc.get_source()),
231 doc.get_tokens().len()
232 );
233 }
234}