1use paste::paste;
2
3use crate::{
4 CharStringExt, Lrc, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then_kind_where(|kind| {
23 kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then_kind_where(|kind| {
44 !kind.[< is_$quality >]()
45 })
46 }
47 }
48 };
49}
50
51impl Expr for SequenceExpr {
52 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56 let mut window = Span::new_with_len(cursor, 0);
57
58 for cur_expr in &self.exprs {
59 let out = cur_expr.run(cursor, tokens, source)?;
60
61 if out.end > out.start {
63 window.expand_to_include(out.start);
64 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
65 }
66
67 if out.end > cursor {
69 cursor = out.end;
70 } else if out.start < cursor {
71 cursor = out.start;
72 }
73 }
75
76 Some(window)
77 }
78}
79
80impl SequenceExpr {
81 pub fn with(expr: impl Expr + 'static) -> Self {
85 Self::default().then(expr)
86 }
87
88 pub fn anything() -> Self {
92 Self::default().then_anything()
93 }
94
95 pub fn any_capitalization_of(word: &'static str) -> Self {
99 Self::default().then_any_capitalization_of(word)
100 }
101
102 pub fn aco(word: &'static str) -> Self {
104 Self::any_capitalization_of(word)
105 }
106
107 pub fn word_set(words: &'static [&'static str]) -> Self {
109 Self::default().then_word_set(words)
110 }
111
112 pub fn any_word() -> Self {
114 Self::default().then_any_word()
115 }
116
117 pub fn optional(expr: impl Expr + 'static) -> Self {
121 Self::default().then_optional(expr)
122 }
123
124 pub fn fixed_phrase(phrase: &'static str) -> Self {
126 Self::default().then_fixed_phrase(phrase)
127 }
128
129 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
133 Self::default().then_any_of(exprs)
134 }
135
136 pub fn unless(condition: impl Expr + 'static) -> Self {
138 Self::default().then_unless(condition)
139 }
140
141 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
145 self.exprs.push(Box::new(expr));
146 self
147 }
148
149 pub fn then_boxed(mut self, expr: Box<dyn Expr>) -> Self {
151 self.exprs.push(expr);
152 self
153 }
154
155 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
157 self.exprs.push(Box::new(Optional::new(expr)));
158 self
159 }
160
161 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
167 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
168 self
169 }
170
171 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
176 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
177 self
178 }
179
180 pub fn then_seq(mut self, mut other: Self) -> Self {
183 self.exprs.append(&mut other.exprs);
184 self
185 }
186
187 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
189 self.then(WordSet::new(words))
190 }
191
192 pub fn t_set(self, words: &'static [&'static str]) -> Self {
194 self.then_word_set(words)
195 }
196
197 pub fn then_whitespace(self) -> Self {
199 self.then(WhitespacePattern)
200 }
201
202 pub fn t_ws(self) -> Self {
204 self.then_whitespace()
205 }
206
207 pub fn then_whitespace_or_hyphen(self) -> Self {
209 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
210 }
211
212 pub fn t_ws_h(self) -> Self {
214 self.then_whitespace_or_hyphen()
215 }
216
217 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
218 self.then(Repeating::new(Box::new(expr), 1))
219 }
220
221 pub fn then_one_or_more_spaced(self, expr: impl Expr + 'static) -> Self {
222 let expr = Lrc::new(expr);
223 self.then(
224 SequenceExpr::default()
225 .then(expr.clone())
226 .then(Repeating::new(
227 Box::new(SequenceExpr::default().t_ws().then(expr)),
228 0,
229 )),
230 )
231 }
232
233 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
240 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
241 true
242 }))
243 }
244
245 pub fn then_anything(self) -> Self {
249 self.then(AnyPattern)
250 }
251
252 pub fn t_any(self) -> Self {
256 self.then_anything()
257 }
258
259 pub fn then_any_word(self) -> Self {
263 self.then_kind_where(|kind| kind.is_word())
264 }
265
266 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
268 self.then(Word::new(word))
269 }
270
271 pub fn t_aco(self, word: &'static str) -> Self {
273 self.then_any_capitalization_of(word)
274 }
275
276 pub fn then_exact_word(self, word: &'static str) -> Self {
278 self.then(Word::new_exact(word))
279 }
280
281 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
283 self.then(FixedPhrase::from_phrase(phrase))
284 }
285
286 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
288 self.then(move |tok: &Token, src: &[char]| {
289 !tok.kind.is_word()
290 || !words
291 .iter()
292 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
293 })
294 }
295
296 pub fn then_kind(self, kind: TokenKind) -> Self {
302 self.then_kind_where(move |k| kind == *k)
303 }
304
305 pub fn then_kind_where<F>(mut self, predicate: F) -> Self
307 where
308 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
309 {
310 self.exprs
311 .push(Box::new(move |tok: &Token, _source: &[char]| {
312 predicate(&tok.kind)
313 }));
314 self
315 }
316
317 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
319 where
320 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
321 {
322 self.then(move |tok: &Token, src: &[char]| {
323 pred_is(&tok.kind)
324 && !ex
325 .iter()
326 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
327 })
328 }
329
330 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
335 where
336 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
337 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
338 {
339 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
340 }
341
342 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
345 where
346 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
347 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
348 {
349 self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
350 }
351
352 pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
355 where
356 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
357 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
358 {
359 self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
360 }
361
362 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
365 where
366 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
367 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
368 {
369 self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
370 }
371
372 pub fn then_kind_is_but_is_not_except<F1, F2>(
375 self,
376 pred_is: F1,
377 pred_not: F2,
378 ex: &'static [&'static str],
379 ) -> Self
380 where
381 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
382 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
383 {
384 self.then(move |tok: &Token, src: &[char]| {
385 pred_is(&tok.kind)
386 && !pred_not(&tok.kind)
387 && !ex
388 .iter()
389 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
390 })
391 }
392
393 pub fn then_kind_is_but_isnt_any_of<F1, F2>(
396 self,
397 pred_is: F1,
398 preds_isnt: &'static [F2],
399 ) -> Self
400 where
401 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
402 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
403 {
404 self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
405 }
406
407 pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
411 self,
412 pred_is: F1,
413 preds_isnt: &'static [F2],
414 ex: &'static [&'static str],
415 ) -> Self
416 where
417 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
418 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
419 {
420 self.then(move |tok: &Token, src: &[char]| {
421 pred_is(&tok.kind)
422 && !preds_isnt.iter().any(|pred| pred(&tok.kind))
423 && !ex
424 .iter()
425 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
426 })
427 }
428
429 pub fn then_kind_both_but_not<F1, F2, F3>(
435 self,
436 (pred_is_1, pred_is_2): (F1, F2),
437 pred_not: F3,
438 ) -> Self
439 where
440 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
441 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
442 F3: Fn(&TokenKind) -> bool + Send + Sync + 'static,
443 {
444 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k) && !pred_not(k))
445 }
446
447 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
450 where
451 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
452 {
453 self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
454 }
455
456 pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
459 where
460 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
461 {
462 self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
463 }
464
465 pub fn then_kind_any_except<F>(
468 self,
469 preds_is: &'static [F],
470 ex: &'static [&'static str],
471 ) -> Self
472 where
473 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
474 {
475 self.then(move |tok: &Token, src: &[char]| {
476 preds_is.iter().any(|pred| pred(&tok.kind))
477 && !ex
478 .iter()
479 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
480 })
481 }
482
483 pub fn then_kind_any_or_words<F>(
486 self,
487 preds: &'static [F],
488 words: &'static [&'static str],
489 ) -> Self
490 where
491 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
492 {
493 self.then(move |tok: &Token, src: &[char]| {
494 preds.iter().any(|pred| pred(&tok.kind))
495 || words
496 .iter()
497 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
498 })
499 }
500
501 pub fn then_kind_any_but_not_except<F1, F2>(
504 self,
505 preds_is: &'static [F1],
506 pred_not: F2,
507 ex: &'static [&'static str],
508 ) -> Self
509 where
510 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
511 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
512 {
513 self.then(move |tok: &Token, src: &[char]| {
514 preds_is.iter().any(|pred| pred(&tok.kind))
515 && !pred_not(&tok.kind)
516 && !ex
517 .iter()
518 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
519 })
520 }
521
522 gen_then_from_is!(oov);
526 gen_then_from_is!(swear);
527
528 gen_then_from_is!(nominal);
533 gen_then_from_is!(plural_nominal);
534 gen_then_from_is!(non_plural_nominal);
535 gen_then_from_is!(possessive_nominal);
536
537 gen_then_from_is!(noun);
540 gen_then_from_is!(proper_noun);
541 gen_then_from_is!(plural_noun);
542 gen_then_from_is!(singular_noun);
543 gen_then_from_is!(mass_noun_only);
544
545 gen_then_from_is!(pronoun);
548 gen_then_from_is!(personal_pronoun);
549 gen_then_from_is!(first_person_singular_pronoun);
550 gen_then_from_is!(first_person_plural_pronoun);
551 gen_then_from_is!(second_person_pronoun);
552 gen_then_from_is!(third_person_pronoun);
553 gen_then_from_is!(third_person_singular_pronoun);
554 gen_then_from_is!(third_person_plural_pronoun);
555 gen_then_from_is!(subject_pronoun);
556 gen_then_from_is!(object_pronoun);
557
558 gen_then_from_is!(verb);
561 gen_then_from_is!(auxiliary_verb);
562 gen_then_from_is!(linking_verb);
563 gen_then_from_is!(verb_lemma);
564 gen_then_from_is!(verb_simple_past_form);
565 gen_then_from_is!(verb_past_participle_form);
566 gen_then_from_is!(verb_progressive_form);
567 gen_then_from_is!(verb_third_person_singular_present_form);
568
569 gen_then_from_is!(adjective);
572 gen_then_from_is!(positive_adjective);
573 gen_then_from_is!(comparative_adjective);
574 gen_then_from_is!(superlative_adjective);
575
576 gen_then_from_is!(adverb);
579 gen_then_from_is!(frequency_adverb);
580 gen_then_from_is!(degree_adverb);
581
582 gen_then_from_is!(determiner);
585 gen_then_from_is!(demonstrative_determiner);
586 gen_then_from_is!(possessive_determiner);
587 gen_then_from_is!(quantifier);
588 gen_then_from_is!(non_quantifier_determiner);
589 gen_then_from_is!(non_demonstrative_determiner);
590
591 pub fn then_indefinite_article(self) -> Self {
593 self.then(IndefiniteArticle::default())
594 }
595
596 gen_then_from_is!(conjunction);
599 gen_then_from_is!(preposition);
600
601 gen_then_from_is!(number);
604 gen_then_from_is!(cardinal_number);
605 gen_then_from_is!(ordinal_number);
606
607 gen_then_from_is!(punctuation);
610 gen_then_from_is!(apostrophe);
611 gen_then_from_is!(comma);
612 gen_then_from_is!(hyphen);
613 gen_then_from_is!(period);
614 gen_then_from_is!(semicolon);
615 gen_then_from_is!(quote);
616
617 gen_then_from_is!(case_separator);
620 gen_then_from_is!(likely_homograph);
621 gen_then_from_is!(sentence_terminator);
622}
623
624impl<S> From<S> for SequenceExpr
625where
626 S: Step + 'static,
627{
628 fn from(step: S) -> Self {
629 Self {
630 exprs: vec![Box::new(step)],
631 }
632 }
633}
634
635#[cfg(test)]
636mod tests {
637 use crate::{
638 Document, TokenKind,
639 expr::{ExprExt, SequenceExpr},
640 linting::tests::SpanVecExt,
641 };
642
643 #[test]
644 fn test_kind_both() {
645 let noun_and_verb =
646 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
647 let doc = Document::new_plain_english_curated("Use a good example.");
648 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
649 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
650 }
651
652 #[test]
653 fn test_adjective_or_determiner() {
654 let expr = SequenceExpr::default()
655 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
656 let doc = Document::new_plain_english_curated("Use a good example.");
657 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
658 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
659 }
660
661 #[test]
662 fn test_noun_but_not_adjective() {
663 let expr = SequenceExpr::default()
664 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
665 let doc = Document::new_plain_english_curated("Use a good example.");
666 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
667 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
668 }
669}