1use paste::paste;
2
3use crate::{
4 CharStringExt, Lrc, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then_kind_where(|kind| {
23 kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then_kind_where(|kind| {
44 !kind.[< is_$quality >]()
45 })
46 }
47 }
48 };
49}
50
51impl Expr for SequenceExpr {
52 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56 let mut window = Span::empty(cursor);
57
58 for cur_expr in &self.exprs {
59 let out = cur_expr.run(cursor, tokens, source)?;
60
61 let is_zero_width = out.end == out.start;
64
65 if !is_zero_width {
66 if out.end > out.start {
68 window.expand_to_include(out.start);
69 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
70 }
71
72 if out.end > cursor {
74 cursor = out.end;
75 } else if out.start < cursor {
76 cursor = out.start;
77 }
78 }
79 }
81
82 Some(window)
83 }
84}
85
86impl SequenceExpr {
87 pub fn with(expr: impl Expr + 'static) -> Self {
91 Self::default().then(expr)
92 }
93
94 pub fn anything() -> Self {
98 Self::default().then_anything()
99 }
100
101 pub fn any_capitalization_of(word: &'static str) -> Self {
105 Self::default().then_any_capitalization_of(word)
106 }
107
108 pub fn aco(word: &'static str) -> Self {
110 Self::any_capitalization_of(word)
111 }
112
113 pub fn word_set(words: &'static [&'static str]) -> Self {
115 Self::default().then_word_set(words)
116 }
117
118 pub fn any_word() -> Self {
120 Self::default().then_any_word()
121 }
122
123 pub fn optional(expr: impl Expr + 'static) -> Self {
127 Self::default().then_optional(expr)
128 }
129
130 pub fn fixed_phrase(phrase: &'static str) -> Self {
132 Self::default().then_fixed_phrase(phrase)
133 }
134
135 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
139 Self::default().then_any_of(exprs)
140 }
141
142 pub fn longest_of(exprs: Vec<Box<dyn Expr>>) -> Self {
144 Self::default().then_longest_of(exprs)
145 }
146
147 pub fn whitespace() -> Self {
148 Self::default().then_whitespace()
149 }
150
151 pub fn unless(condition: impl Expr + 'static) -> Self {
153 Self::default().then_unless(condition)
154 }
155
156 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
160 self.exprs.push(Box::new(expr));
161 self
162 }
163
164 pub fn then_boxed(mut self, expr: Box<dyn Expr>) -> Self {
166 self.exprs.push(expr);
167 self
168 }
169
170 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
172 self.exprs.push(Box::new(Optional::new(expr)));
173 self
174 }
175
176 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
182 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
183 self
184 }
185
186 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
191 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
192 self
193 }
194
195 pub fn then_seq(mut self, mut other: Self) -> Self {
198 self.exprs.append(&mut other.exprs);
199 self
200 }
201
202 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
204 self.then(WordSet::new(words))
205 }
206
207 pub fn t_set(self, words: &'static [&'static str]) -> Self {
209 self.then_word_set(words)
210 }
211
212 pub fn then_whitespace(self) -> Self {
214 self.then(WhitespacePattern)
215 }
216
217 pub fn t_ws(self) -> Self {
219 self.then_whitespace()
220 }
221
222 pub fn then_whitespace_or_hyphen(self) -> Self {
224 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
225 }
226
227 pub fn t_ws_h(self) -> Self {
229 self.then_whitespace_or_hyphen()
230 }
231
232 pub fn then_zero_or_more(self, expr: impl Expr + 'static) -> Self {
234 self.then(Repeating::new(Box::new(expr), 0))
235 }
236
237 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
239 self.then(Repeating::new(Box::new(expr), 1))
240 }
241
242 pub fn then_zero_or_more_spaced(self, expr: impl Expr + 'static) -> Self {
244 let expr = Lrc::new(expr);
245 self.then(SequenceExpr::with(expr.clone()).then(Repeating::new(
246 Box::new(SequenceExpr::default().t_ws().then(expr)),
247 0,
248 )))
249 }
250
251 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
258 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
259 true
260 }))
261 }
262
263 pub fn then_anything(self) -> Self {
267 self.then(AnyPattern)
268 }
269
270 pub fn t_any(self) -> Self {
274 self.then_anything()
275 }
276
277 pub fn then_any_word(self) -> Self {
281 self.then_kind_where(|kind| kind.is_word())
282 }
283
284 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
286 self.then(Word::new(word))
287 }
288
289 pub fn t_aco(self, word: &'static str) -> Self {
291 self.then_any_capitalization_of(word)
292 }
293
294 pub fn then_exact_word(self, word: &'static str) -> Self {
296 self.then(Word::new_exact(word))
297 }
298
299 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
301 self.then(FixedPhrase::from_phrase(phrase))
302 }
303
304 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
306 self.then(move |tok: &Token, src: &[char]| {
307 !tok.kind.is_word() || !words.iter().any(|&word| tok.get_ch(src).eq_str(word))
308 })
309 }
310
311 pub fn then_kind(self, kind: TokenKind) -> Self {
317 self.then_kind_where(move |k| kind == *k)
318 }
319
320 pub fn then_kind_where<F>(mut self, predicate: F) -> Self
322 where
323 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
324 {
325 self.exprs
326 .push(Box::new(move |tok: &Token, _source: &[char]| {
327 predicate(&tok.kind)
328 }));
329 self
330 }
331
332 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
334 where
335 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
336 {
337 self.then(move |tok: &Token, src: &[char]| {
338 pred_is(&tok.kind) && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
339 })
340 }
341
342 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
347 where
348 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
349 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
350 {
351 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
352 }
353
354 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
357 where
358 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
359 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
360 {
361 self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
362 }
363
364 pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
367 where
368 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
369 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
370 {
371 self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
372 }
373
374 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
377 where
378 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
379 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
380 {
381 self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
382 }
383
384 pub fn then_kind_is_but_is_not_except<F1, F2>(
387 self,
388 pred_is: F1,
389 pred_not: F2,
390 ex: &'static [&'static str],
391 ) -> Self
392 where
393 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
394 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
395 {
396 self.then(move |tok: &Token, src: &[char]| {
397 pred_is(&tok.kind)
398 && !pred_not(&tok.kind)
399 && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
400 })
401 }
402
403 pub fn then_kind_is_but_isnt_any_of<F1, F2>(
406 self,
407 pred_is: F1,
408 preds_isnt: &'static [F2],
409 ) -> Self
410 where
411 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
412 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
413 {
414 self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
415 }
416
417 pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
421 self,
422 pred_is: F1,
423 preds_isnt: &'static [F2],
424 ex: &'static [&'static str],
425 ) -> Self
426 where
427 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
428 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
429 {
430 self.then(move |tok: &Token, src: &[char]| {
431 pred_is(&tok.kind)
432 && !preds_isnt.iter().any(|pred| pred(&tok.kind))
433 && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
434 })
435 }
436
437 pub fn then_kind_both_but_not<F1, F2, F3>(
443 self,
444 (pred_is_1, pred_is_2): (F1, F2),
445 pred_not: F3,
446 ) -> Self
447 where
448 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
449 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
450 F3: Fn(&TokenKind) -> bool + Send + Sync + 'static,
451 {
452 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k) && !pred_not(k))
453 }
454
455 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
458 where
459 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
460 {
461 self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
462 }
463
464 pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
467 where
468 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
469 {
470 self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
471 }
472
473 pub fn then_kind_any_except<F>(
476 self,
477 preds_is: &'static [F],
478 ex: &'static [&'static str],
479 ) -> Self
480 where
481 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
482 {
483 self.then(move |tok: &Token, src: &[char]| {
484 preds_is.iter().any(|pred| pred(&tok.kind))
485 && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
486 })
487 }
488
489 pub fn then_kind_any_or_words<F>(
492 self,
493 preds: &'static [F],
494 words: &'static [&'static str],
495 ) -> Self
496 where
497 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
498 {
499 self.then(move |tok: &Token, src: &[char]| {
500 preds.iter().any(|pred| pred(&tok.kind))
501 || words.iter().any(|&word| tok.get_ch(src).eq_str(word))
502 })
503 }
504
505 pub fn then_kind_any_but_not<F1, F2>(self, preds_is: &'static [F1], pred_not: F2) -> Self
508 where
509 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
510 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
511 {
512 self.then(move |tok: &Token, _src: &[char]| {
513 preds_is.iter().any(|pred| pred(&tok.kind)) && !pred_not(&tok.kind)
514 })
515 }
516
517 pub fn then_kind_any_but_not_except<F1, F2>(
520 self,
521 preds_is: &'static [F1],
522 pred_not: F2,
523 ex: &'static [&'static str],
524 ) -> Self
525 where
526 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
527 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
528 {
529 self.then(move |tok: &Token, src: &[char]| {
530 preds_is.iter().any(|pred| pred(&tok.kind))
531 && !pred_not(&tok.kind)
532 && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
533 })
534 }
535
536 gen_then_from_is!(oov);
540 gen_then_from_is!(swear);
541
542 gen_then_from_is!(nominal);
547 gen_then_from_is!(plural_nominal);
548 gen_then_from_is!(non_plural_nominal);
549 gen_then_from_is!(possessive_nominal);
550
551 gen_then_from_is!(noun);
554 gen_then_from_is!(proper_noun);
555 gen_then_from_is!(plural_noun);
556 gen_then_from_is!(singular_noun);
557 gen_then_from_is!(mass_noun_only);
558
559 gen_then_from_is!(pronoun);
562 gen_then_from_is!(personal_pronoun);
563 gen_then_from_is!(first_person_singular_pronoun);
564 gen_then_from_is!(first_person_plural_pronoun);
565 gen_then_from_is!(second_person_pronoun);
566 gen_then_from_is!(third_person_pronoun);
567 gen_then_from_is!(third_person_singular_pronoun);
568 gen_then_from_is!(third_person_plural_pronoun);
569 gen_then_from_is!(subject_pronoun);
570 gen_then_from_is!(object_pronoun);
571
572 gen_then_from_is!(verb);
575 gen_then_from_is!(auxiliary_verb);
576 gen_then_from_is!(linking_verb);
577 gen_then_from_is!(verb_lemma);
578 gen_then_from_is!(verb_simple_past_form);
579 gen_then_from_is!(verb_past_participle_form);
580 gen_then_from_is!(verb_progressive_form);
581 gen_then_from_is!(verb_third_person_singular_present_form);
582
583 gen_then_from_is!(adjective);
586 gen_then_from_is!(positive_adjective);
587 gen_then_from_is!(comparative_adjective);
588 gen_then_from_is!(superlative_adjective);
589
590 gen_then_from_is!(adverb);
593 gen_then_from_is!(frequency_adverb);
594 gen_then_from_is!(degree_adverb);
595
596 gen_then_from_is!(determiner);
599 gen_then_from_is!(demonstrative_determiner);
600 gen_then_from_is!(possessive_determiner);
601 gen_then_from_is!(quantifier);
602 gen_then_from_is!(non_quantifier_determiner);
603 gen_then_from_is!(non_demonstrative_determiner);
604
605 pub fn then_indefinite_article(self) -> Self {
607 self.then(IndefiniteArticle::default())
608 }
609
610 gen_then_from_is!(conjunction);
613 gen_then_from_is!(preposition);
614
615 gen_then_from_is!(number);
618 gen_then_from_is!(cardinal_number);
619 gen_then_from_is!(ordinal_number);
620
621 gen_then_from_is!(punctuation);
624 gen_then_from_is!(apostrophe);
625 gen_then_from_is!(comma);
626 gen_then_from_is!(hyphen);
627 gen_then_from_is!(period);
628 gen_then_from_is!(semicolon);
629 gen_then_from_is!(acute);
630 gen_then_from_is!(quote);
631 gen_then_from_is!(backslash);
632 gen_then_from_is!(slash);
633 gen_then_from_is!(percent);
634 gen_then_from_is!(backtick);
635
636 gen_then_from_is!(case_separator);
639 gen_then_from_is!(likely_homograph);
640 gen_then_from_is!(sentence_terminator);
641}
642
643impl<S> From<S> for SequenceExpr
644where
645 S: Step + 'static,
646{
647 fn from(step: S) -> Self {
648 Self {
649 exprs: vec![Box::new(step)],
650 }
651 }
652}
653
654#[cfg(test)]
655mod tests {
656 use crate::{
657 Document, TokenKind,
658 expr::{AnchorEnd, ExprExt, SequenceExpr},
659 linting::tests::SpanVecExt,
660 };
661
662 #[test]
663 fn test_kind_both() {
664 let noun_and_verb =
665 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
666 let doc = Document::new_plain_english_curated("Use a good example.");
667 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
668 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
669 }
670
671 #[test]
672 fn test_adjective_or_determiner() {
673 let expr = SequenceExpr::default()
674 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
675 let doc = Document::new_plain_english_curated("Use a good example.");
676 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
677 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
678 }
679
680 #[test]
681 fn test_noun_but_not_adjective() {
682 let expr = SequenceExpr::default()
683 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
684 let doc = Document::new_plain_english_curated("Use a good example.");
685 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
686 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
687 }
688
689 #[test]
690 fn flag_foo_followed_by_bar_or_at_end_1() {
691 let expr = SequenceExpr::aco("foo").then_any_of(vec![
692 Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
693 Box::new(AnchorEnd),
694 ]);
695
696 let doc_with_bar = Document::new_plain_english_curated("foo bar");
697
698 let matches_with_bar = expr.iter_matches_in_doc(&doc_with_bar).collect::<Vec<_>>();
699
700 eprintln!("matches_with_bar: {:#?}", matches_with_bar);
701
702 assert_eq!(matches_with_bar.len(), 1);
704 assert_eq!(matches_with_bar[0].start, 0);
705 assert_eq!(matches_with_bar[0].end, 3);
706 assert_eq!(matches_with_bar.to_strings(&doc_with_bar), vec!["foo bar"]);
707 }
708
709 #[test]
710 fn flag_foo_followed_by_bar_or_at_end_2() {
711 let expr = SequenceExpr::aco("foo").then_any_of(vec![
712 Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
713 Box::new(AnchorEnd),
714 ]);
715
716 let doc_with_end = Document::new_plain_english_curated("foo");
717
718 let matches_with_end = expr.iter_matches_in_doc(&doc_with_end).collect::<Vec<_>>();
719
720 eprintln!("matches_with_end: {:#?}", matches_with_end);
721
722 assert_eq!(matches_with_end.len(), 1);
724 assert_eq!(matches_with_end[0].start, 0);
725 assert_eq!(matches_with_end[0].end, 1);
726 assert_eq!(matches_with_end.to_strings(&doc_with_end), vec!["foo"]);
727 }
728
729 #[test]
730 fn flag_foo_followed_by_bar_or_at_end_3() {
731 let expr = SequenceExpr::aco("foo").then_any_of(vec![
732 Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
733 Box::new(AnchorEnd),
734 ]);
735
736 let doc_with_foo_bar_baz = Document::new_plain_english_curated("foo bar baz");
737
738 let matches_with_foo_bar_baz = expr
739 .iter_matches_in_doc(&doc_with_foo_bar_baz)
740 .collect::<Vec<_>>();
741
742 eprintln!("matches_with_foo_bar_baz: {:#?}", matches_with_foo_bar_baz);
743
744 assert_eq!(matches_with_foo_bar_baz.len(), 0);
746 assert_eq!(
747 matches_with_foo_bar_baz.to_strings(&doc_with_foo_bar_baz),
748 Vec::<String>::new()
749 );
750 }
751}