1use paste::paste;
2
3use crate::{
4 CharStringExt, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then(|tok: &Token, _source: &[char]| {
23 tok.kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then(|tok: &Token, _source: &[char]| {
44 if tok.kind.[< is_$quality >](){
45 false
46 }else{
47 true
48 }
49 })
50 }
51 }
52 };
53}
54
55impl Expr for SequenceExpr {
56 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
60 let mut window = Span::new_with_len(cursor, 0);
61
62 for cur_expr in &self.exprs {
63 let out = cur_expr.run(cursor, tokens, source)?;
64
65 if out.end > out.start {
67 window.expand_to_include(out.start);
68 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
69 }
70
71 if out.end > cursor {
73 cursor = out.end;
74 } else if out.start < cursor {
75 cursor = out.start;
76 }
77 }
79
80 Some(window)
81 }
82}
83
84impl SequenceExpr {
85 pub fn any_capitalization_of(word: &'static str) -> Self {
89 Self::default().then_any_capitalization_of(word)
90 }
91
92 pub fn aco(word: &'static str) -> Self {
94 Self::any_capitalization_of(word)
95 }
96
97 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
99 Self::default().then_any_of(exprs)
100 }
101
102 pub fn word_set(words: &'static [&'static str]) -> Self {
104 Self::default().then_word_set(words)
105 }
106
107 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
111 self.exprs.push(Box::new(expr));
112 self
113 }
114
115 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
117 self.exprs.push(Box::new(Optional::new(expr)));
118 self
119 }
120
121 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
127 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
128 self
129 }
130
131 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
136 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
137 self
138 }
139
140 pub fn then_seq(mut self, mut other: Self) -> Self {
143 self.exprs.append(&mut other.exprs);
144 self
145 }
146
147 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
149 self.then(WordSet::new(words))
150 }
151
152 pub fn then_strict(self, kind: TokenKind) -> Self {
154 self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
155 }
156
157 pub fn then_whitespace(self) -> Self {
159 self.then(WhitespacePattern)
160 }
161
162 pub fn t_ws(self) -> Self {
164 self.then_whitespace()
165 }
166
167 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
168 self.then(Repeating::new(Box::new(expr), 1))
169 }
170
171 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
178 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
179 true
180 }))
181 }
182
183 pub fn then_anything(self) -> Self {
187 self.then(AnyPattern)
188 }
189
190 pub fn t_any(self) -> Self {
194 self.then_anything()
195 }
196
197 pub fn then_any_word(self) -> Self {
201 self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
202 }
203
204 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
206 self.then(Word::new(word))
207 }
208
209 pub fn t_aco(self, word: &'static str) -> Self {
211 self.then_any_capitalization_of(word)
212 }
213
214 pub fn then_exact_word(self, word: &'static str) -> Self {
216 self.then(Word::new_exact(word))
217 }
218
219 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
221 self.then(FixedPhrase::from_phrase(phrase))
222 }
223
224 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
226 self.then(move |tok: &Token, src: &[char]| {
227 !tok.kind.is_word()
228 || !words
229 .iter()
230 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
231 })
232 }
233
234 pub fn then_kind_except<F>(self, pred: F, words: &'static [&'static str]) -> Self
238 where
239 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
240 {
241 self.then(move |tok: &Token, src: &[char]| {
242 pred(&tok.kind)
243 && !words
244 .iter()
245 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
246 })
247 }
248
249 pub fn then_kind_both<F1, F2>(self, pred1: F1, pred2: F2) -> Self
251 where
252 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
253 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
254 {
255 self.then(move |tok: &Token, _source: &[char]| pred1(&tok.kind) && pred2(&tok.kind))
256 }
257
258 pub fn then_kind_either<F1, F2>(self, pred1: F1, pred2: F2) -> Self
260 where
261 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
262 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
263 {
264 self.then(move |tok: &Token, _source: &[char]| pred1(&tok.kind) || pred2(&tok.kind))
265 }
266
267 pub fn then_kind_any<F>(self, preds: &'static [F]) -> Self
269 where
270 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
271 {
272 self.then(move |tok: &Token, _source: &[char]| preds.iter().any(|pred| pred(&tok.kind)))
273 }
274
275 pub fn then_kind_any_or_words<F>(
276 self,
277 preds: &'static [F],
278 words: &'static [&'static str],
279 ) -> Self
280 where
281 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
282 {
283 self.then(move |tok: &Token, src: &[char]| {
284 preds.iter().any(|pred| pred(&tok.kind))
285 || words
287 .iter()
288 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
289 })
290 }
291
292 pub fn then_kind_is_but_is_not<F1, F2>(self, pred1: F1, pred2: F2) -> Self
294 where
295 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
296 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
297 {
298 self.then(move |tok: &Token, _source: &[char]| pred1(&tok.kind) && !pred2(&tok.kind))
299 }
300
301 pub fn then_kind_is_but_is_not_except<F1, F2>(
304 self,
305 pred1: F1,
306 pred2: F2,
307 words: &'static [&'static str],
308 ) -> Self
309 where
310 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
311 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
312 {
313 self.then(move |tok: &Token, src: &[char]| {
314 pred1(&tok.kind)
315 && !pred2(&tok.kind)
316 && !words
317 .iter()
318 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
319 })
320 }
321
322 gen_then_from_is!(oov);
326 gen_then_from_is!(swear);
327
328 gen_then_from_is!(nominal);
333 gen_then_from_is!(plural_nominal);
334 gen_then_from_is!(non_plural_nominal);
335 gen_then_from_is!(possessive_nominal);
336
337 gen_then_from_is!(noun);
340 gen_then_from_is!(proper_noun);
341 gen_then_from_is!(mass_noun_only);
342
343 gen_then_from_is!(pronoun);
346 gen_then_from_is!(personal_pronoun);
347 gen_then_from_is!(first_person_singular_pronoun);
348 gen_then_from_is!(first_person_plural_pronoun);
349 gen_then_from_is!(second_person_pronoun);
350 gen_then_from_is!(third_person_pronoun);
351 gen_then_from_is!(third_person_singular_pronoun);
352 gen_then_from_is!(third_person_plural_pronoun);
353 gen_then_from_is!(object_pronoun);
354
355 gen_then_from_is!(verb);
358 gen_then_from_is!(auxiliary_verb);
359 gen_then_from_is!(linking_verb);
360
361 gen_then_from_is!(adjective);
364 gen_then_from_is!(positive_adjective);
365 gen_then_from_is!(comparative_adjective);
366 gen_then_from_is!(superlative_adjective);
367
368 gen_then_from_is!(adverb);
371
372 gen_then_from_is!(determiner);
375 gen_then_from_is!(demonstrative_determiner);
376 gen_then_from_is!(quantifier);
377 gen_then_from_is!(non_quantifier_determiner);
378
379 pub fn then_indefinite_article(self) -> Self {
381 self.then(IndefiniteArticle::default())
382 }
383
384 gen_then_from_is!(conjunction);
387 gen_then_from_is!(preposition);
388
389 gen_then_from_is!(punctuation);
392 gen_then_from_is!(apostrophe);
393 gen_then_from_is!(comma);
394 gen_then_from_is!(hyphen);
395 gen_then_from_is!(period);
396 gen_then_from_is!(semicolon);
397
398 gen_then_from_is!(number);
401 gen_then_from_is!(case_separator);
402 gen_then_from_is!(likely_homograph);
403}
404
405impl<S> From<S> for SequenceExpr
406where
407 S: Step + 'static,
408{
409 fn from(step: S) -> Self {
410 Self {
411 exprs: vec![Box::new(step)],
412 }
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use crate::{
419 Document, TokenKind,
420 expr::{ExprExt, SequenceExpr},
421 linting::tests::SpanVecExt,
422 };
423
424 #[test]
425 fn test_kind_both() {
426 let noun_and_verb =
427 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
428 let doc = Document::new_plain_english_curated("Use a good example.");
429 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
430 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
431 }
432
433 #[test]
434 fn test_adjective_or_determiner() {
435 let expr = SequenceExpr::default()
436 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
437 let doc = Document::new_plain_english_curated("Use a good example.");
438 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
439 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
440 }
441
442 #[test]
443 fn test_noun_but_not_adjective() {
444 let expr = SequenceExpr::default()
445 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
446 let doc = Document::new_plain_english_curated("Use a good example.");
447 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
448 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
449 }
450}