1use paste::paste;
2
3use crate::{
4 CharStringExt, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then_kind_where(|kind| {
23 kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then_kind_where(|kind| {
44 !kind.[< is_$quality >]()
45 })
46 }
47 }
48 };
49}
50
51impl Expr for SequenceExpr {
52 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56 let mut window = Span::new_with_len(cursor, 0);
57
58 for cur_expr in &self.exprs {
59 let out = cur_expr.run(cursor, tokens, source)?;
60
61 if out.end > out.start {
63 window.expand_to_include(out.start);
64 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
65 }
66
67 if out.end > cursor {
69 cursor = out.end;
70 } else if out.start < cursor {
71 cursor = out.start;
72 }
73 }
75
76 Some(window)
77 }
78}
79
80impl SequenceExpr {
81 pub fn anything() -> Self {
87 Self::default().then_anything()
88 }
89
90 pub fn any_capitalization_of(word: &'static str) -> Self {
94 Self::default().then_any_capitalization_of(word)
95 }
96
97 pub fn aco(word: &'static str) -> Self {
99 Self::any_capitalization_of(word)
100 }
101
102 pub fn word_set(words: &'static [&'static str]) -> Self {
104 Self::default().then_word_set(words)
105 }
106
107 pub fn any_word() -> Self {
109 Self::default().then_any_word()
110 }
111
112 pub fn fixed_phrase(phrase: &'static str) -> Self {
116 Self::default().then_fixed_phrase(phrase)
117 }
118
119 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
123 Self::default().then_any_of(exprs)
124 }
125
126 pub fn unless(condition: impl Expr + 'static) -> Self {
128 Self::default().then_unless(condition)
129 }
130
131 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
135 self.exprs.push(Box::new(expr));
136 self
137 }
138
139 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
141 self.exprs.push(Box::new(Optional::new(expr)));
142 self
143 }
144
145 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
151 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
152 self
153 }
154
155 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
160 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
161 self
162 }
163
164 pub fn then_seq(mut self, mut other: Self) -> Self {
167 self.exprs.append(&mut other.exprs);
168 self
169 }
170
171 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
173 self.then(WordSet::new(words))
174 }
175
176 pub fn t_set(self, words: &'static [&'static str]) -> Self {
178 self.then_word_set(words)
179 }
180
181 pub fn then_whitespace(self) -> Self {
183 self.then(WhitespacePattern)
184 }
185
186 pub fn t_ws(self) -> Self {
188 self.then_whitespace()
189 }
190
191 pub fn then_whitespace_or_hyphen(self) -> Self {
193 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
194 }
195
196 pub fn t_ws_h(self) -> Self {
198 self.then_whitespace_or_hyphen()
199 }
200
201 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
202 self.then(Repeating::new(Box::new(expr), 1))
203 }
204
205 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
212 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
213 true
214 }))
215 }
216
217 pub fn then_anything(self) -> Self {
221 self.then(AnyPattern)
222 }
223
224 pub fn t_any(self) -> Self {
228 self.then_anything()
229 }
230
231 pub fn then_any_word(self) -> Self {
235 self.then_kind_where(|kind| kind.is_word())
236 }
237
238 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
240 self.then(Word::new(word))
241 }
242
243 pub fn t_aco(self, word: &'static str) -> Self {
245 self.then_any_capitalization_of(word)
246 }
247
248 pub fn then_exact_word(self, word: &'static str) -> Self {
250 self.then(Word::new_exact(word))
251 }
252
253 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
255 self.then(FixedPhrase::from_phrase(phrase))
256 }
257
258 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
260 self.then(move |tok: &Token, src: &[char]| {
261 !tok.kind.is_word()
262 || !words
263 .iter()
264 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
265 })
266 }
267
268 pub fn then_kind(self, kind: TokenKind) -> Self {
274 self.then_kind_where(move |k| kind == *k)
275 }
276
277 pub fn then_kind_where<F>(mut self, predicate: F) -> Self
279 where
280 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
281 {
282 self.exprs
283 .push(Box::new(move |tok: &Token, _source: &[char]| {
284 predicate(&tok.kind)
285 }));
286 self
287 }
288
289 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
291 where
292 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
293 {
294 self.then(move |tok: &Token, src: &[char]| {
295 pred_is(&tok.kind)
296 && !ex
297 .iter()
298 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
299 })
300 }
301
302 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
307 where
308 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
309 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
310 {
311 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
312 }
313
314 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
317 where
318 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
319 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
320 {
321 self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
322 }
323
324 pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
327 where
328 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
329 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
330 {
331 self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
332 }
333
334 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
337 where
338 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
339 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
340 {
341 self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
342 }
343
344 pub fn then_kind_is_but_is_not_except<F1, F2>(
347 self,
348 pred_is: F1,
349 pred_not: F2,
350 ex: &'static [&'static str],
351 ) -> Self
352 where
353 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
354 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
355 {
356 self.then(move |tok: &Token, src: &[char]| {
357 pred_is(&tok.kind)
358 && !pred_not(&tok.kind)
359 && !ex
360 .iter()
361 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
362 })
363 }
364
365 pub fn then_kind_is_but_isnt_any_of<F1, F2>(
368 self,
369 pred_is: F1,
370 preds_isnt: &'static [F2],
371 ) -> Self
372 where
373 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
374 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
375 {
376 self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
377 }
378
379 pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
383 self,
384 pred_is: F1,
385 preds_isnt: &'static [F2],
386 ex: &'static [&'static str],
387 ) -> Self
388 where
389 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
390 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
391 {
392 self.then(move |tok: &Token, src: &[char]| {
393 pred_is(&tok.kind)
394 && !preds_isnt.iter().any(|pred| pred(&tok.kind))
395 && !ex
396 .iter()
397 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
398 })
399 }
400
401 gen_then_from_is!(sentence_terminator);
402 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
407 where
408 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
409 {
410 self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
411 }
412
413 pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
416 where
417 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
418 {
419 self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
420 }
421
422 pub fn then_kind_any_except<F>(
425 self,
426 preds_is: &'static [F],
427 ex: &'static [&'static str],
428 ) -> Self
429 where
430 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
431 {
432 self.then(move |tok: &Token, src: &[char]| {
433 preds_is.iter().any(|pred| pred(&tok.kind))
434 && !ex
435 .iter()
436 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
437 })
438 }
439
440 pub fn then_kind_any_or_words<F>(
443 self,
444 preds: &'static [F],
445 words: &'static [&'static str],
446 ) -> Self
447 where
448 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
449 {
450 self.then(move |tok: &Token, src: &[char]| {
451 preds.iter().any(|pred| pred(&tok.kind))
452 || words
453 .iter()
454 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
455 })
456 }
457
458 pub fn then_kind_any_but_not_except<F1, F2>(
461 self,
462 preds_is: &'static [F1],
463 pred_not: F2,
464 ex: &'static [&'static str],
465 ) -> Self
466 where
467 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
468 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
469 {
470 self.then(move |tok: &Token, src: &[char]| {
471 preds_is.iter().any(|pred| pred(&tok.kind))
472 && !pred_not(&tok.kind)
473 && !ex
474 .iter()
475 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
476 })
477 }
478
479 gen_then_from_is!(oov);
483 gen_then_from_is!(swear);
484
485 gen_then_from_is!(nominal);
490 gen_then_from_is!(plural_nominal);
491 gen_then_from_is!(non_plural_nominal);
492 gen_then_from_is!(possessive_nominal);
493
494 gen_then_from_is!(noun);
497 gen_then_from_is!(proper_noun);
498 gen_then_from_is!(plural_noun);
499 gen_then_from_is!(mass_noun_only);
500
501 gen_then_from_is!(pronoun);
504 gen_then_from_is!(personal_pronoun);
505 gen_then_from_is!(first_person_singular_pronoun);
506 gen_then_from_is!(first_person_plural_pronoun);
507 gen_then_from_is!(second_person_pronoun);
508 gen_then_from_is!(third_person_pronoun);
509 gen_then_from_is!(third_person_singular_pronoun);
510 gen_then_from_is!(third_person_plural_pronoun);
511 gen_then_from_is!(subject_pronoun);
512 gen_then_from_is!(object_pronoun);
513
514 gen_then_from_is!(verb);
517 gen_then_from_is!(auxiliary_verb);
518 gen_then_from_is!(linking_verb);
519 gen_then_from_is!(verb_lemma);
520 gen_then_from_is!(verb_simple_past_form);
521 gen_then_from_is!(verb_past_participle_form);
522 gen_then_from_is!(verb_progressive_form);
523
524 gen_then_from_is!(adjective);
527 gen_then_from_is!(positive_adjective);
528 gen_then_from_is!(comparative_adjective);
529 gen_then_from_is!(superlative_adjective);
530
531 gen_then_from_is!(adverb);
534 gen_then_from_is!(frequency_adverb);
535
536 gen_then_from_is!(determiner);
539 gen_then_from_is!(demonstrative_determiner);
540 gen_then_from_is!(possessive_determiner);
541 gen_then_from_is!(quantifier);
542 gen_then_from_is!(non_quantifier_determiner);
543 gen_then_from_is!(non_demonstrative_determiner);
544
545 pub fn then_indefinite_article(self) -> Self {
547 self.then(IndefiniteArticle::default())
548 }
549
550 gen_then_from_is!(conjunction);
553 gen_then_from_is!(preposition);
554
555 gen_then_from_is!(number);
558 gen_then_from_is!(cardinal_number);
559 gen_then_from_is!(ordinal_number);
560
561 gen_then_from_is!(punctuation);
564 gen_then_from_is!(apostrophe);
565 gen_then_from_is!(comma);
566 gen_then_from_is!(hyphen);
567 gen_then_from_is!(period);
568 gen_then_from_is!(semicolon);
569 gen_then_from_is!(quote);
570
571 gen_then_from_is!(case_separator);
574 gen_then_from_is!(likely_homograph);
575}
576
577impl<S> From<S> for SequenceExpr
578where
579 S: Step + 'static,
580{
581 fn from(step: S) -> Self {
582 Self {
583 exprs: vec![Box::new(step)],
584 }
585 }
586}
587
588#[cfg(test)]
589mod tests {
590 use crate::{
591 Document, TokenKind,
592 expr::{ExprExt, SequenceExpr},
593 linting::tests::SpanVecExt,
594 };
595
596 #[test]
597 fn test_kind_both() {
598 let noun_and_verb =
599 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
600 let doc = Document::new_plain_english_curated("Use a good example.");
601 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
602 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
603 }
604
605 #[test]
606 fn test_adjective_or_determiner() {
607 let expr = SequenceExpr::default()
608 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
609 let doc = Document::new_plain_english_curated("Use a good example.");
610 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
611 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
612 }
613
614 #[test]
615 fn test_noun_but_not_adjective() {
616 let expr = SequenceExpr::default()
617 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
618 let doc = Document::new_plain_english_curated("Use a good example.");
619 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
620 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
621 }
622}