1use paste::paste;
2
3use crate::{
4 CharStringExt, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then_kind_where(|kind| {
23 kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then_kind_where(|kind| {
44 !kind.[< is_$quality >]()
45 })
46 }
47 }
48 };
49}
50
51impl Expr for SequenceExpr {
52 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56 let mut window = Span::new_with_len(cursor, 0);
57
58 for cur_expr in &self.exprs {
59 let out = cur_expr.run(cursor, tokens, source)?;
60
61 if out.end > out.start {
63 window.expand_to_include(out.start);
64 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
65 }
66
67 if out.end > cursor {
69 cursor = out.end;
70 } else if out.start < cursor {
71 cursor = out.start;
72 }
73 }
75
76 Some(window)
77 }
78}
79
80impl SequenceExpr {
81 pub fn any_capitalization_of(word: &'static str) -> Self {
87 Self::default().then_any_capitalization_of(word)
88 }
89
90 pub fn aco(word: &'static str) -> Self {
92 Self::any_capitalization_of(word)
93 }
94
95 pub fn word_set(words: &'static [&'static str]) -> Self {
97 Self::default().then_word_set(words)
98 }
99
100 pub fn any_word() -> Self {
102 Self::default().then_any_word()
103 }
104
105 pub fn fixed_phrase(phrase: &'static str) -> Self {
109 Self::default().then_fixed_phrase(phrase)
110 }
111
112 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
116 Self::default().then_any_of(exprs)
117 }
118
119 pub fn unless(condition: impl Expr + 'static) -> Self {
121 Self::default().then_unless(condition)
122 }
123
124 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
128 self.exprs.push(Box::new(expr));
129 self
130 }
131
132 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
134 self.exprs.push(Box::new(Optional::new(expr)));
135 self
136 }
137
138 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
144 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
145 self
146 }
147
148 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
153 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
154 self
155 }
156
157 pub fn then_seq(mut self, mut other: Self) -> Self {
160 self.exprs.append(&mut other.exprs);
161 self
162 }
163
164 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
166 self.then(WordSet::new(words))
167 }
168
169 pub fn then_whitespace(self) -> Self {
171 self.then(WhitespacePattern)
172 }
173
174 pub fn then_whitespace_or_hyphen(self) -> Self {
176 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
177 }
178
179 pub fn t_ws_h(self) -> Self {
181 self.then_whitespace_or_hyphen()
182 }
183
184 pub fn t_ws(self) -> Self {
186 self.then_whitespace()
187 }
188
189 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
190 self.then(Repeating::new(Box::new(expr), 1))
191 }
192
193 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
200 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
201 true
202 }))
203 }
204
205 pub fn then_anything(self) -> Self {
209 self.then(AnyPattern)
210 }
211
212 pub fn t_any(self) -> Self {
216 self.then_anything()
217 }
218
219 pub fn then_any_word(self) -> Self {
223 self.then_kind_where(|kind| kind.is_word())
224 }
225
226 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
228 self.then(Word::new(word))
229 }
230
231 pub fn t_aco(self, word: &'static str) -> Self {
233 self.then_any_capitalization_of(word)
234 }
235
236 pub fn then_exact_word(self, word: &'static str) -> Self {
238 self.then(Word::new_exact(word))
239 }
240
241 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
243 self.then(FixedPhrase::from_phrase(phrase))
244 }
245
246 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
248 self.then(move |tok: &Token, src: &[char]| {
249 !tok.kind.is_word()
250 || !words
251 .iter()
252 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
253 })
254 }
255
256 pub fn then_kind(self, kind: TokenKind) -> Self {
262 self.then_kind_where(move |k| kind == *k)
263 }
264
265 pub fn then_kind_where<F>(mut self, predicate: F) -> Self
267 where
268 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
269 {
270 self.exprs
271 .push(Box::new(move |tok: &Token, _source: &[char]| {
272 predicate(&tok.kind)
273 }));
274 self
275 }
276
277 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
279 where
280 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
281 {
282 self.then(move |tok: &Token, src: &[char]| {
283 pred_is(&tok.kind)
284 && !ex
285 .iter()
286 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
287 })
288 }
289
290 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
295 where
296 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
297 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
298 {
299 self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
300 }
301
302 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
305 where
306 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
307 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
308 {
309 self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
310 }
311
312 pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
315 where
316 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
317 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
318 {
319 self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
320 }
321
322 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
325 where
326 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
327 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
328 {
329 self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
330 }
331
332 pub fn then_kind_is_but_isnt_any_of<F1, F2>(
335 self,
336 pred_is: F1,
337 preds_isnt: &'static [F2],
338 ) -> Self
339 where
340 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
341 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
342 {
343 self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
344 }
345
346 pub fn then_kind_is_but_is_not_except<F1, F2>(
349 self,
350 pred_is: F1,
351 pred_not: F2,
352 ex: &'static [&'static str],
353 ) -> Self
354 where
355 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
356 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
357 {
358 self.then(move |tok: &Token, src: &[char]| {
359 pred_is(&tok.kind)
360 && !pred_not(&tok.kind)
361 && !ex
362 .iter()
363 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
364 })
365 }
366
367 gen_then_from_is!(sentence_terminator);
368 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
373 where
374 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
375 {
376 self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
377 }
378
379 pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
382 where
383 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
384 {
385 self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
386 }
387
388 pub fn then_kind_any_except<F>(
391 self,
392 preds_is: &'static [F],
393 ex: &'static [&'static str],
394 ) -> Self
395 where
396 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
397 {
398 self.then(move |tok: &Token, src: &[char]| {
399 preds_is.iter().any(|pred| pred(&tok.kind))
400 && !ex
401 .iter()
402 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
403 })
404 }
405
406 pub fn then_kind_any_or_words<F>(
409 self,
410 preds: &'static [F],
411 words: &'static [&'static str],
412 ) -> Self
413 where
414 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
415 {
416 self.then(move |tok: &Token, src: &[char]| {
417 preds.iter().any(|pred| pred(&tok.kind))
418 || words
419 .iter()
420 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
421 })
422 }
423
424 pub fn then_kind_any_but_not_except<F1, F2>(
427 self,
428 preds_is: &'static [F1],
429 pred_not: F2,
430 ex: &'static [&'static str],
431 ) -> Self
432 where
433 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
434 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
435 {
436 self.then(move |tok: &Token, src: &[char]| {
437 preds_is.iter().any(|pred| pred(&tok.kind))
438 && !pred_not(&tok.kind)
439 && !ex
440 .iter()
441 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
442 })
443 }
444
445 gen_then_from_is!(oov);
449 gen_then_from_is!(swear);
450
451 gen_then_from_is!(nominal);
456 gen_then_from_is!(plural_nominal);
457 gen_then_from_is!(non_plural_nominal);
458 gen_then_from_is!(possessive_nominal);
459
460 gen_then_from_is!(noun);
463 gen_then_from_is!(proper_noun);
464 gen_then_from_is!(plural_noun);
465 gen_then_from_is!(mass_noun_only);
466
467 gen_then_from_is!(pronoun);
470 gen_then_from_is!(personal_pronoun);
471 gen_then_from_is!(first_person_singular_pronoun);
472 gen_then_from_is!(first_person_plural_pronoun);
473 gen_then_from_is!(second_person_pronoun);
474 gen_then_from_is!(third_person_pronoun);
475 gen_then_from_is!(third_person_singular_pronoun);
476 gen_then_from_is!(third_person_plural_pronoun);
477 gen_then_from_is!(subject_pronoun);
478 gen_then_from_is!(object_pronoun);
479
480 gen_then_from_is!(verb);
483 gen_then_from_is!(auxiliary_verb);
484 gen_then_from_is!(linking_verb);
485 gen_then_from_is!(verb_lemma);
486 gen_then_from_is!(verb_simple_past_form);
487 gen_then_from_is!(verb_past_participle_form);
488 gen_then_from_is!(verb_progressive_form);
489
490 gen_then_from_is!(adjective);
493 gen_then_from_is!(positive_adjective);
494 gen_then_from_is!(comparative_adjective);
495 gen_then_from_is!(superlative_adjective);
496
497 gen_then_from_is!(adverb);
500
501 gen_then_from_is!(determiner);
504 gen_then_from_is!(demonstrative_determiner);
505 gen_then_from_is!(possessive_determiner);
506 gen_then_from_is!(quantifier);
507 gen_then_from_is!(non_quantifier_determiner);
508 gen_then_from_is!(non_demonstrative_determiner);
509
510 pub fn then_indefinite_article(self) -> Self {
512 self.then(IndefiniteArticle::default())
513 }
514
515 gen_then_from_is!(conjunction);
518 gen_then_from_is!(preposition);
519
520 gen_then_from_is!(number);
523 gen_then_from_is!(cardinal_number);
524 gen_then_from_is!(ordinal_number);
525
526 gen_then_from_is!(punctuation);
529 gen_then_from_is!(apostrophe);
530 gen_then_from_is!(comma);
531 gen_then_from_is!(hyphen);
532 gen_then_from_is!(period);
533 gen_then_from_is!(semicolon);
534 gen_then_from_is!(quote);
535
536 gen_then_from_is!(case_separator);
539 gen_then_from_is!(likely_homograph);
540}
541
542impl<S> From<S> for SequenceExpr
543where
544 S: Step + 'static,
545{
546 fn from(step: S) -> Self {
547 Self {
548 exprs: vec![Box::new(step)],
549 }
550 }
551}
552
553#[cfg(test)]
554mod tests {
555 use crate::{
556 Document, TokenKind,
557 expr::{ExprExt, SequenceExpr},
558 linting::tests::SpanVecExt,
559 };
560
561 #[test]
562 fn test_kind_both() {
563 let noun_and_verb =
564 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
565 let doc = Document::new_plain_english_curated("Use a good example.");
566 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
567 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
568 }
569
570 #[test]
571 fn test_adjective_or_determiner() {
572 let expr = SequenceExpr::default()
573 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
574 let doc = Document::new_plain_english_curated("Use a good example.");
575 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
576 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
577 }
578
579 #[test]
580 fn test_noun_but_not_adjective() {
581 let expr = SequenceExpr::default()
582 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
583 let doc = Document::new_plain_english_curated("Use a good example.");
584 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
585 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
586 }
587}