1use paste::paste;
2
3use crate::{
4 CharStringExt, Lrc, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then_kind_where(|kind| {
23 kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then_kind_where(|kind| {
44 !kind.[< is_$quality >]()
45 })
46 }
47 }
48 };
49}
50
51impl Expr for SequenceExpr {
52 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56 let mut window = Span::empty(cursor);
57
58 for cur_expr in &self.exprs {
59 let out = cur_expr.run(cursor, tokens, source)?;
60
61 if out.end > out.start {
63 window.expand_to_include(out.start);
64 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
65 }
66
67 if out.end > cursor {
69 cursor = out.end;
70 } else if out.start < cursor {
71 cursor = out.start;
72 }
73 }
75
76 Some(window)
77 }
78}
79
80impl SequenceExpr {
81 pub fn with(expr: impl Expr + 'static) -> Self {
85 Self::default().then(expr)
86 }
87
88 pub fn anything() -> Self {
92 Self::default().then_anything()
93 }
94
95 pub fn any_capitalization_of(word: &'static str) -> Self {
99 Self::default().then_any_capitalization_of(word)
100 }
101
102 pub fn aco(word: &'static str) -> Self {
104 Self::any_capitalization_of(word)
105 }
106
107 pub fn word_set(words: &'static [&'static str]) -> Self {
109 Self::default().then_word_set(words)
110 }
111
112 pub fn any_word() -> Self {
114 Self::default().then_any_word()
115 }
116
117 pub fn optional(expr: impl Expr + 'static) -> Self {
121 Self::default().then_optional(expr)
122 }
123
124 pub fn fixed_phrase(phrase: &'static str) -> Self {
126 Self::default().then_fixed_phrase(phrase)
127 }
128
129 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
133 Self::default().then_any_of(exprs)
134 }
135
136 pub fn longest_of(exprs: Vec<Box<dyn Expr>>) -> Self {
138 Self::default().then_longest_of(exprs)
139 }
140
141 pub fn whitespace() -> Self {
142 Self::default().then_whitespace()
143 }
144
145 pub fn unless(condition: impl Expr + 'static) -> Self {
147 Self::default().then_unless(condition)
148 }
149
150 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
154 self.exprs.push(Box::new(expr));
155 self
156 }
157
158 pub fn then_boxed(mut self, expr: Box<dyn Expr>) -> Self {
160 self.exprs.push(expr);
161 self
162 }
163
164 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
166 self.exprs.push(Box::new(Optional::new(expr)));
167 self
168 }
169
170 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
176 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
177 self
178 }
179
180 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
185 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
186 self
187 }
188
189 pub fn then_seq(mut self, mut other: Self) -> Self {
192 self.exprs.append(&mut other.exprs);
193 self
194 }
195
196 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
198 self.then(WordSet::new(words))
199 }
200
201 pub fn t_set(self, words: &'static [&'static str]) -> Self {
203 self.then_word_set(words)
204 }
205
206 pub fn then_whitespace(self) -> Self {
208 self.then(WhitespacePattern)
209 }
210
211 pub fn t_ws(self) -> Self {
213 self.then_whitespace()
214 }
215
216 pub fn then_whitespace_or_hyphen(self) -> Self {
218 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
219 }
220
221 pub fn t_ws_h(self) -> Self {
223 self.then_whitespace_or_hyphen()
224 }
225
226 pub fn then_zero_or_more(self, expr: impl Expr + 'static) -> Self {
228 self.then(Repeating::new(Box::new(expr), 0))
229 }
230
231 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
233 self.then(Repeating::new(Box::new(expr), 1))
234 }
235
236 pub fn then_zero_or_more_spaced(self, expr: impl Expr + 'static) -> Self {
238 let expr = Lrc::new(expr);
239 self.then(SequenceExpr::with(expr.clone()).then(Repeating::new(
240 Box::new(SequenceExpr::default().t_ws().then(expr)),
241 0,
242 )))
243 }
244
245 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
252 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
253 true
254 }))
255 }
256
257 pub fn then_anything(self) -> Self {
261 self.then(AnyPattern)
262 }
263
264 pub fn t_any(self) -> Self {
268 self.then_anything()
269 }
270
271 pub fn then_any_word(self) -> Self {
275 self.then_kind_where(|kind| kind.is_word())
276 }
277
278 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
280 self.then(Word::new(word))
281 }
282
283 pub fn t_aco(self, word: &'static str) -> Self {
285 self.then_any_capitalization_of(word)
286 }
287
288 pub fn then_exact_word(self, word: &'static str) -> Self {
290 self.then(Word::new_exact(word))
291 }
292
293 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
295 self.then(FixedPhrase::from_phrase(phrase))
296 }
297
298 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
300 self.then(move |tok: &Token, src: &[char]| {
301 !tok.kind.is_word()
302 || !words
303 .iter()
304 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
305 })
306 }
307
308 pub fn then_kind(self, kind: TokenKind) -> Self {
314 self.then_kind_where(move |k| kind == *k)
315 }
316
317 pub fn then_kind_where<F>(mut self, predicate: F) -> Self
319 where
320 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
321 {
322 self.exprs
323 .push(Box::new(move |tok: &Token, _source: &[char]| {
324 predicate(&tok.kind)
325 }));
326 self
327 }
328
329 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
331 where
332 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
333 {
334 self.then(move |tok: &Token, src: &[char]| {
335 pred_is(&tok.kind)
336 && !ex
337 .iter()
338 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
339 })
340 }
341
342 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
347 where
348 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
349 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
350 {
351 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
352 }
353
354 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
357 where
358 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
359 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
360 {
361 self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
362 }
363
364 pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
367 where
368 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
369 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
370 {
371 self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
372 }
373
374 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
377 where
378 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
379 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
380 {
381 self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
382 }
383
384 pub fn then_kind_is_but_is_not_except<F1, F2>(
387 self,
388 pred_is: F1,
389 pred_not: F2,
390 ex: &'static [&'static str],
391 ) -> Self
392 where
393 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
394 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
395 {
396 self.then(move |tok: &Token, src: &[char]| {
397 pred_is(&tok.kind)
398 && !pred_not(&tok.kind)
399 && !ex
400 .iter()
401 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
402 })
403 }
404
405 pub fn then_kind_is_but_isnt_any_of<F1, F2>(
408 self,
409 pred_is: F1,
410 preds_isnt: &'static [F2],
411 ) -> Self
412 where
413 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
414 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
415 {
416 self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
417 }
418
419 pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
423 self,
424 pred_is: F1,
425 preds_isnt: &'static [F2],
426 ex: &'static [&'static str],
427 ) -> Self
428 where
429 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
430 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
431 {
432 self.then(move |tok: &Token, src: &[char]| {
433 pred_is(&tok.kind)
434 && !preds_isnt.iter().any(|pred| pred(&tok.kind))
435 && !ex
436 .iter()
437 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
438 })
439 }
440
441 pub fn then_kind_both_but_not<F1, F2, F3>(
447 self,
448 (pred_is_1, pred_is_2): (F1, F2),
449 pred_not: F3,
450 ) -> Self
451 where
452 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
453 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
454 F3: Fn(&TokenKind) -> bool + Send + Sync + 'static,
455 {
456 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k) && !pred_not(k))
457 }
458
459 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
462 where
463 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
464 {
465 self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
466 }
467
468 pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
471 where
472 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
473 {
474 self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
475 }
476
477 pub fn then_kind_any_except<F>(
480 self,
481 preds_is: &'static [F],
482 ex: &'static [&'static str],
483 ) -> Self
484 where
485 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
486 {
487 self.then(move |tok: &Token, src: &[char]| {
488 preds_is.iter().any(|pred| pred(&tok.kind))
489 && !ex
490 .iter()
491 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
492 })
493 }
494
495 pub fn then_kind_any_or_words<F>(
498 self,
499 preds: &'static [F],
500 words: &'static [&'static str],
501 ) -> Self
502 where
503 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
504 {
505 self.then(move |tok: &Token, src: &[char]| {
506 preds.iter().any(|pred| pred(&tok.kind))
507 || words
508 .iter()
509 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
510 })
511 }
512
513 pub fn then_kind_any_but_not_except<F1, F2>(
516 self,
517 preds_is: &'static [F1],
518 pred_not: F2,
519 ex: &'static [&'static str],
520 ) -> Self
521 where
522 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
523 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
524 {
525 self.then(move |tok: &Token, src: &[char]| {
526 preds_is.iter().any(|pred| pred(&tok.kind))
527 && !pred_not(&tok.kind)
528 && !ex
529 .iter()
530 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
531 })
532 }
533
534 gen_then_from_is!(oov);
538 gen_then_from_is!(swear);
539
540 gen_then_from_is!(nominal);
545 gen_then_from_is!(plural_nominal);
546 gen_then_from_is!(non_plural_nominal);
547 gen_then_from_is!(possessive_nominal);
548
549 gen_then_from_is!(noun);
552 gen_then_from_is!(proper_noun);
553 gen_then_from_is!(plural_noun);
554 gen_then_from_is!(singular_noun);
555 gen_then_from_is!(mass_noun_only);
556
557 gen_then_from_is!(pronoun);
560 gen_then_from_is!(personal_pronoun);
561 gen_then_from_is!(first_person_singular_pronoun);
562 gen_then_from_is!(first_person_plural_pronoun);
563 gen_then_from_is!(second_person_pronoun);
564 gen_then_from_is!(third_person_pronoun);
565 gen_then_from_is!(third_person_singular_pronoun);
566 gen_then_from_is!(third_person_plural_pronoun);
567 gen_then_from_is!(subject_pronoun);
568 gen_then_from_is!(object_pronoun);
569
570 gen_then_from_is!(verb);
573 gen_then_from_is!(auxiliary_verb);
574 gen_then_from_is!(linking_verb);
575 gen_then_from_is!(verb_lemma);
576 gen_then_from_is!(verb_simple_past_form);
577 gen_then_from_is!(verb_past_participle_form);
578 gen_then_from_is!(verb_progressive_form);
579 gen_then_from_is!(verb_third_person_singular_present_form);
580
581 gen_then_from_is!(adjective);
584 gen_then_from_is!(positive_adjective);
585 gen_then_from_is!(comparative_adjective);
586 gen_then_from_is!(superlative_adjective);
587
588 gen_then_from_is!(adverb);
591 gen_then_from_is!(frequency_adverb);
592 gen_then_from_is!(degree_adverb);
593
594 gen_then_from_is!(determiner);
597 gen_then_from_is!(demonstrative_determiner);
598 gen_then_from_is!(possessive_determiner);
599 gen_then_from_is!(quantifier);
600 gen_then_from_is!(non_quantifier_determiner);
601 gen_then_from_is!(non_demonstrative_determiner);
602
603 pub fn then_indefinite_article(self) -> Self {
605 self.then(IndefiniteArticle::default())
606 }
607
608 gen_then_from_is!(conjunction);
611 gen_then_from_is!(preposition);
612
613 gen_then_from_is!(number);
616 gen_then_from_is!(cardinal_number);
617 gen_then_from_is!(ordinal_number);
618
619 gen_then_from_is!(punctuation);
622 gen_then_from_is!(apostrophe);
623 gen_then_from_is!(comma);
624 gen_then_from_is!(hyphen);
625 gen_then_from_is!(period);
626 gen_then_from_is!(semicolon);
627 gen_then_from_is!(quote);
628 gen_then_from_is!(backslash);
629 gen_then_from_is!(slash);
630 gen_then_from_is!(percent);
631
632 gen_then_from_is!(case_separator);
635 gen_then_from_is!(likely_homograph);
636 gen_then_from_is!(sentence_terminator);
637}
638
639impl<S> From<S> for SequenceExpr
640where
641 S: Step + 'static,
642{
643 fn from(step: S) -> Self {
644 Self {
645 exprs: vec![Box::new(step)],
646 }
647 }
648}
649
650#[cfg(test)]
651mod tests {
652 use crate::{
653 Document, TokenKind,
654 expr::{ExprExt, SequenceExpr},
655 linting::tests::SpanVecExt,
656 };
657
658 #[test]
659 fn test_kind_both() {
660 let noun_and_verb =
661 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
662 let doc = Document::new_plain_english_curated("Use a good example.");
663 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
664 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
665 }
666
667 #[test]
668 fn test_adjective_or_determiner() {
669 let expr = SequenceExpr::default()
670 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
671 let doc = Document::new_plain_english_curated("Use a good example.");
672 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
673 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
674 }
675
676 #[test]
677 fn test_noun_but_not_adjective() {
678 let expr = SequenceExpr::default()
679 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
680 let doc = Document::new_plain_english_curated("Use a good example.");
681 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
682 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
683 }
684}