1use paste::paste;
2
3use crate::{
4 CharStringExt, Span, Token, TokenKind,
5 expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6 patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13 exprs: Vec<Box<dyn Expr>>,
14}
15
16macro_rules! gen_then_from_is {
18 ($quality:ident) => {
19 paste! {
20 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21 pub fn [< then_$quality >] (self) -> Self{
22 self.then(|tok: &Token, _source: &[char]| {
23 tok.kind.[< is_$quality >]()
24 })
25 }
26
27 #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28 pub fn [< then_optional_$quality >] (self) -> Self{
29 self.then_optional(|tok: &Token, _source: &[char]| {
30 tok.kind.[< is_$quality >]()
31 })
32 }
33
34 #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35 pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36 self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37 tok.kind.[< is_$quality >]()
38 }))
39 }
40
41 #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42 pub fn [< then_anything_but_$quality >] (self) -> Self{
43 self.then(|tok: &Token, _source: &[char]| {
44 if tok.kind.[< is_$quality >](){
45 false
46 }else{
47 true
48 }
49 })
50 }
51 }
52 };
53}
54
55impl Expr for SequenceExpr {
56 fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
60 let mut window = Span::new_with_len(cursor, 0);
61
62 for cur_expr in &self.exprs {
63 let out = cur_expr.run(cursor, tokens, source)?;
64
65 if out.end > out.start {
67 window.expand_to_include(out.start);
68 window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
69 }
70
71 if out.end > cursor {
73 cursor = out.end;
74 } else if out.start < cursor {
75 cursor = out.start;
76 }
77 }
79
80 Some(window)
81 }
82}
83
84impl SequenceExpr {
85 pub fn any_capitalization_of(word: &'static str) -> Self {
91 Self::default().then_any_capitalization_of(word)
92 }
93
94 pub fn aco(word: &'static str) -> Self {
96 Self::any_capitalization_of(word)
97 }
98
99 pub fn word_set(words: &'static [&'static str]) -> Self {
101 Self::default().then_word_set(words)
102 }
103
104 pub fn any_word() -> Self {
106 Self::default().then_any_word()
107 }
108
109 pub fn fixed_phrase(phrase: &'static str) -> Self {
113 Self::default().then_fixed_phrase(phrase)
114 }
115
116 pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
120 Self::default().then_any_of(exprs)
121 }
122
123 pub fn unless(condition: impl Expr + 'static) -> Self {
125 Self::default().then_unless(condition)
126 }
127
128 pub fn then(mut self, expr: impl Expr + 'static) -> Self {
132 self.exprs.push(Box::new(expr));
133 self
134 }
135
136 pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
138 self.exprs.push(Box::new(Optional::new(expr)));
139 self
140 }
141
142 pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
148 self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
149 self
150 }
151
152 pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
157 self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
158 self
159 }
160
161 pub fn then_seq(mut self, mut other: Self) -> Self {
164 self.exprs.append(&mut other.exprs);
165 self
166 }
167
168 pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
170 self.then(WordSet::new(words))
171 }
172
173 pub fn then_strict(self, kind: TokenKind) -> Self {
175 self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
176 }
177
178 pub fn then_whitespace(self) -> Self {
180 self.then(WhitespacePattern)
181 }
182
183 pub fn then_whitespace_or_hyphen(self) -> Self {
185 self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
186 }
187
188 pub fn t_ws_h(self) -> Self {
190 self.then_whitespace_or_hyphen()
191 }
192
193 pub fn t_ws(self) -> Self {
195 self.then_whitespace()
196 }
197
198 pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
199 self.then(Repeating::new(Box::new(expr), 1))
200 }
201
202 pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
209 self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
210 true
211 }))
212 }
213
214 pub fn then_anything(self) -> Self {
218 self.then(AnyPattern)
219 }
220
221 pub fn t_any(self) -> Self {
225 self.then_anything()
226 }
227
228 pub fn then_any_word(self) -> Self {
232 self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
233 }
234
235 pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
237 self.then(Word::new(word))
238 }
239
240 pub fn t_aco(self, word: &'static str) -> Self {
242 self.then_any_capitalization_of(word)
243 }
244
245 pub fn then_exact_word(self, word: &'static str) -> Self {
247 self.then(Word::new_exact(word))
248 }
249
250 pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
252 self.then(FixedPhrase::from_phrase(phrase))
253 }
254
255 pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
257 self.then(move |tok: &Token, src: &[char]| {
258 !tok.kind.is_word()
259 || !words
260 .iter()
261 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
262 })
263 }
264
265 pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
271 where
272 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
273 {
274 self.then(move |tok: &Token, src: &[char]| {
275 pred_is(&tok.kind)
276 && !ex
277 .iter()
278 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
279 })
280 }
281
282 pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
287 where
288 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
289 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
290 {
291 self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) && pred_is_2(&tok.kind))
292 }
293
294 pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
297 where
298 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
299 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
300 {
301 self.then(move |tok: &Token, _source: &[char]| pred_is_1(&tok.kind) || pred_is_2(&tok.kind))
302 }
303
304 pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
307 where
308 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
309 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
310 {
311 self.then(move |tok: &Token, _source: &[char]| pred_is(&tok.kind) && !pred_not(&tok.kind))
312 }
313
314 pub fn then_kind_is_but_is_not_except<F1, F2>(
317 self,
318 pred_is: F1,
319 pred_not: F2,
320 ex: &'static [&'static str],
321 ) -> Self
322 where
323 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
324 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
325 {
326 self.then(move |tok: &Token, src: &[char]| {
327 pred_is(&tok.kind)
328 && !pred_not(&tok.kind)
329 && !ex
330 .iter()
331 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
332 })
333 }
334
335 gen_then_from_is!(sentence_terminator);
336 pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
341 where
342 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
343 {
344 self.then(move |tok: &Token, _source: &[char]| preds_is.iter().any(|pred| pred(&tok.kind)))
345 }
346
347 pub fn then_kind_any_except<F>(
350 self,
351 preds_is: &'static [F],
352 ex: &'static [&'static str],
353 ) -> Self
354 where
355 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
356 {
357 self.then(move |tok: &Token, src: &[char]| {
358 preds_is.iter().any(|pred| pred(&tok.kind))
359 && !ex
360 .iter()
361 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
362 })
363 }
364
365 pub fn then_kind_any_or_words<F>(
368 self,
369 preds: &'static [F],
370 words: &'static [&'static str],
371 ) -> Self
372 where
373 F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
374 {
375 self.then(move |tok: &Token, src: &[char]| {
376 preds.iter().any(|pred| pred(&tok.kind))
377 || words
378 .iter()
379 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
380 })
381 }
382
383 pub fn then_kind_any_but_not_except<F1, F2>(
386 self,
387 preds_is: &'static [F1],
388 pred_not: F2,
389 ex: &'static [&'static str],
390 ) -> Self
391 where
392 F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
393 F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
394 {
395 self.then(move |tok: &Token, src: &[char]| {
396 preds_is.iter().any(|pred| pred(&tok.kind))
397 && !pred_not(&tok.kind)
398 && !ex
399 .iter()
400 .any(|&word| tok.span.get_content(src).eq_ignore_ascii_case_str(word))
401 })
402 }
403
404 gen_then_from_is!(oov);
408 gen_then_from_is!(swear);
409
410 gen_then_from_is!(nominal);
415 gen_then_from_is!(plural_nominal);
416 gen_then_from_is!(non_plural_nominal);
417 gen_then_from_is!(possessive_nominal);
418
419 gen_then_from_is!(noun);
422 gen_then_from_is!(proper_noun);
423 gen_then_from_is!(mass_noun_only);
424
425 gen_then_from_is!(pronoun);
428 gen_then_from_is!(personal_pronoun);
429 gen_then_from_is!(first_person_singular_pronoun);
430 gen_then_from_is!(first_person_plural_pronoun);
431 gen_then_from_is!(second_person_pronoun);
432 gen_then_from_is!(third_person_pronoun);
433 gen_then_from_is!(third_person_singular_pronoun);
434 gen_then_from_is!(third_person_plural_pronoun);
435 gen_then_from_is!(subject_pronoun);
436 gen_then_from_is!(object_pronoun);
437
438 gen_then_from_is!(verb);
441 gen_then_from_is!(auxiliary_verb);
442 gen_then_from_is!(linking_verb);
443 gen_then_from_is!(verb_lemma);
444 gen_then_from_is!(verb_simple_past_form);
445 gen_then_from_is!(verb_past_participle_form);
446
447 gen_then_from_is!(adjective);
450 gen_then_from_is!(positive_adjective);
451 gen_then_from_is!(comparative_adjective);
452 gen_then_from_is!(superlative_adjective);
453
454 gen_then_from_is!(adverb);
457
458 gen_then_from_is!(determiner);
461 gen_then_from_is!(demonstrative_determiner);
462 gen_then_from_is!(possessive_determiner);
463 gen_then_from_is!(quantifier);
464 gen_then_from_is!(non_quantifier_determiner);
465 gen_then_from_is!(non_demonstrative_determiner);
466
467 pub fn then_indefinite_article(self) -> Self {
469 self.then(IndefiniteArticle::default())
470 }
471
472 gen_then_from_is!(conjunction);
475 gen_then_from_is!(preposition);
476
477 gen_then_from_is!(punctuation);
480 gen_then_from_is!(apostrophe);
481 gen_then_from_is!(comma);
482 gen_then_from_is!(hyphen);
483 gen_then_from_is!(period);
484 gen_then_from_is!(semicolon);
485 gen_then_from_is!(quote);
486
487 gen_then_from_is!(number);
490 gen_then_from_is!(case_separator);
491 gen_then_from_is!(likely_homograph);
492}
493
494impl<S> From<S> for SequenceExpr
495where
496 S: Step + 'static,
497{
498 fn from(step: S) -> Self {
499 Self {
500 exprs: vec![Box::new(step)],
501 }
502 }
503}
504
505#[cfg(test)]
506mod tests {
507 use crate::{
508 Document, TokenKind,
509 expr::{ExprExt, SequenceExpr},
510 linting::tests::SpanVecExt,
511 };
512
513 #[test]
514 fn test_kind_both() {
515 let noun_and_verb =
516 SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
517 let doc = Document::new_plain_english_curated("Use a good example.");
518 let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
519 assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
520 }
521
522 #[test]
523 fn test_adjective_or_determiner() {
524 let expr = SequenceExpr::default()
525 .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
526 let doc = Document::new_plain_english_curated("Use a good example.");
527 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
528 assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
529 }
530
531 #[test]
532 fn test_noun_but_not_adjective() {
533 let expr = SequenceExpr::default()
534 .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
535 let doc = Document::new_plain_english_curated("Use a good example.");
536 let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
537 assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
538 }
539}