Skip to main content

harper_core/expr/
sequence_expr.rs

1use paste::paste;
2
3use crate::{
4    CharStringExt, Lrc, Span, Token, TokenKind,
5    expr::{FirstMatchOf, FixedPhrase, LongestMatchOf},
6    patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, OwnedExprExt, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13    exprs: Vec<Box<dyn Expr>>,
14}
15
16/// Generate a `then_*` method from an available `is_*` function on [`TokenKind`].
17macro_rules! gen_then_from_is {
18    ($quality:ident) => {
19        paste! {
20            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21            pub fn [< then_$quality >] (self) -> Self{
22                self.then_kind_where(|kind| {
23                    kind.[< is_$quality >]()
24                })
25            }
26
27            #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28            pub fn [< then_optional_$quality >] (self) -> Self{
29                self.then_optional(|tok: &Token, _source: &[char]| {
30                    tok.kind.[< is_$quality >]()
31                })
32            }
33
34            #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35            pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36                self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37                    tok.kind.[< is_$quality >]()
38                }))
39            }
40
41            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42            pub fn [< then_anything_but_$quality >] (self) -> Self{
43                self.then_kind_where(|kind| {
44                    !kind.[< is_$quality >]()
45                })
46            }
47        }
48    };
49}
50
51impl Expr for SequenceExpr {
52    /// Run the expression starting at an index, returning the total matched window.
53    ///
54    /// If any step returns `None`, the entire expression does as well.
55    fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
56        let mut window = Span::empty(cursor);
57
58        for cur_expr in &self.exprs {
59            let out = cur_expr.run(cursor, tokens, source)?;
60
61            // Zero-width assertions (like AnchorEnd) validate position without consuming tokens
62            // They should not expand the window or advance the cursor
63            let is_zero_width = out.end == out.start;
64
65            if !is_zero_width {
66                // Only expand the window if the match actually covers some tokens
67                if out.end > out.start {
68                    window.expand_to_include(out.start);
69                    window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
70                }
71
72                // Only advance cursor if we actually matched something
73                if out.end > cursor {
74                    cursor = out.end;
75                } else if out.start < cursor {
76                    cursor = out.start;
77                }
78            }
79            // If zero-width, don't expand window or advance cursor - just validate position
80        }
81
82        Some(window)
83    }
84}
85
86impl SequenceExpr {
87    // Constructor methods
88
89    // Match an [expression](Expr).
90    pub fn with(expr: impl Expr + 'static) -> Self {
91        Self::default().then(expr)
92    }
93
94    // Single token methods
95
96    /// Construct a new sequence with an [`AnyPattern`] at the beginning of the operation list.
97    pub fn anything() -> Self {
98        Self::default().then_anything()
99    }
100
101    // Single word token methods
102
103    /// Construct a new sequence with a [`Word`] at the beginning of the operation list.
104    pub fn any_capitalization_of(word: &'static str) -> Self {
105        Self::default().then_any_capitalization_of(word)
106    }
107
108    /// Shorthand for [`Self::any_capitalization_of`].
109    pub fn aco(word: &'static str) -> Self {
110        Self::any_capitalization_of(word)
111    }
112
113    /// Match any word from the given set of words, case-insensitive.
114    pub fn word_set(words: &'static [&'static str]) -> Self {
115        Self::default().then_word_set(words)
116    }
117
118    /// Match any word.
119    pub fn any_word() -> Self {
120        Self::default().then_any_word()
121    }
122
123    // Expressions of more than one token
124
125    /// Optionally match an expression.
126    pub fn optional(expr: impl Expr + 'static) -> Self {
127        Self::default().then_optional(expr)
128    }
129
130    /// Match a fixed phrase.
131    pub fn fixed_phrase(phrase: &'static str) -> Self {
132        Self::default().then_fixed_phrase(phrase)
133    }
134
135    // Multiple expressions
136
137    /// Match the first of multiple expressions.
138    pub fn any_of(exprs: Vec<Box<dyn Expr>>) -> Self {
139        Self::default().then_any_of(exprs)
140    }
141
142    /// Match the longest of multiple expressions.
143    pub fn longest_of(exprs: Vec<Box<dyn Expr>>) -> Self {
144        Self::default().then_longest_of(exprs)
145    }
146
147    pub fn whitespace() -> Self {
148        Self::default().then_whitespace()
149    }
150
151    /// Will be accepted unless the condition matches.
152    pub fn unless(condition: impl Expr + 'static) -> Self {
153        Self::default().then_unless(condition)
154    }
155
156    // Builder methods
157
158    /// Push an [expression](Expr) to the operation list.
159    pub fn then(mut self, expr: impl Expr + 'static) -> Self {
160        self.exprs.push(Box::new(expr));
161        self
162    }
163
164    /// Push an already-boxed [expression](Expr) to the operation list.
165    pub fn then_boxed(mut self, expr: Box<dyn Expr>) -> Self {
166        self.exprs.push(expr);
167        self
168    }
169
170    /// Pushes an expression that could move the cursor to the sequence, but does not require it.
171    pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
172        self.exprs.push(Box::new(Optional::new(expr)));
173        self
174    }
175
176    /// Pushes an expression that will match any of the provided expressions.
177    ///
178    /// If more than one of the provided expressions match, this function provides no guarantee
179    /// as to which match will end up being used. If you need to get the longest of multiple
180    /// matches, use [`Self::then_longest_of()`] instead.
181    pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
182        self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
183        self
184    }
185
186    /// Pushes an expression that will match the longest of the provided expressions.
187    ///
188    /// If you don't need the longest match, prefer using the short-circuiting
189    /// [`Self::then_any_of()`] instead.
190    pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
191        self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
192        self
193    }
194
195    /// Appends the steps in `other` onto the end of `self`.
196    /// This is more efficient than [`Self::then`] because it avoids pointer redirection.
197    pub fn then_seq(mut self, mut other: Self) -> Self {
198        self.exprs.append(&mut other.exprs);
199        self
200    }
201
202    /// Pushes an expression that will match any word from the given set of words, case-insensitive.
203    pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
204        self.then(WordSet::new(words))
205    }
206
207    /// Shorthand for [`Self::then_word_set`].
208    pub fn t_set(self, words: &'static [&'static str]) -> Self {
209        self.then_word_set(words)
210    }
211
212    /// Match against one or more whitespace tokens.
213    pub fn then_whitespace(self) -> Self {
214        self.then(WhitespacePattern)
215    }
216
217    /// Shorthand for [`Self::then_whitespace`].
218    pub fn t_ws(self) -> Self {
219        self.then_whitespace()
220    }
221
222    /// Match against one or more whitespace tokens.
223    pub fn then_whitespace_or_hyphen(self) -> Self {
224        self.then(WhitespacePattern.or(|tok: &Token, _: &[char]| tok.kind.is_hyphen()))
225    }
226
227    /// Shorthand for [`Self::then_whitespace_or_hyphen`].
228    pub fn t_ws_h(self) -> Self {
229        self.then_whitespace_or_hyphen()
230    }
231
232    /// Match against zero or more occurrences of the given expression. Like `*` in regex.
233    pub fn then_zero_or_more(self, expr: impl Expr + 'static) -> Self {
234        self.then(Repeating::new(Box::new(expr), 0))
235    }
236
237    /// Match against one or more occurrences of the given expression. Like `+` in regex.
238    pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
239        self.then(Repeating::new(Box::new(expr), 1))
240    }
241
242    /// Match against zero or more whitespace-separated occurrences of the given expression.
243    pub fn then_zero_or_more_spaced(self, expr: impl Expr + 'static) -> Self {
244        let expr = Lrc::new(expr);
245        self.then(SequenceExpr::with(expr.clone()).then(Repeating::new(
246            Box::new(SequenceExpr::default().t_ws().then(expr)),
247            0,
248        )))
249    }
250
251    /// Create a new condition that will step one token forward if met.
252    /// If the condition is _not_ met, the whole expression returns `None`.
253    ///
254    /// This can be used to build out exceptions to other rules.
255    ///
256    /// See [`UnlessStep`] for more info.
257    pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
258        self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
259            true
260        }))
261    }
262
263    /// Match any single token.
264    ///
265    /// See [`AnyPattern`] for more info.
266    pub fn then_anything(self) -> Self {
267        self.then(AnyPattern)
268    }
269
270    /// Match any single token.
271    ///
272    /// Shorthand for [`Self::then_anything`].
273    pub fn t_any(self) -> Self {
274        self.then_anything()
275    }
276
277    // Word matching methods
278
279    /// Matches any word.
280    pub fn then_any_word(self) -> Self {
281        self.then_kind_where(|kind| kind.is_word())
282    }
283
284    /// Match examples of `word` that have any capitalization.
285    pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
286        self.then(Word::new(word))
287    }
288
289    /// Shorthand for [`Self::then_any_capitalization_of`].
290    pub fn t_aco(self, word: &'static str) -> Self {
291        self.then_any_capitalization_of(word)
292    }
293
294    /// Match examples of `word` case-sensitively.
295    pub fn then_exact_word(self, word: &'static str) -> Self {
296        self.then(Word::new_exact(word))
297    }
298
299    /// Match a fixed phrase.
300    pub fn then_fixed_phrase(self, phrase: &'static str) -> Self {
301        self.then(FixedPhrase::from_phrase(phrase))
302    }
303
304    /// Match any word except the ones in `words`.
305    pub fn then_word_except(self, words: &'static [&'static str]) -> Self {
306        self.then(move |tok: &Token, src: &[char]| {
307            !tok.kind.is_word() || !words.iter().any(|&word| tok.get_ch(src).eq_str(word))
308        })
309    }
310
311    // Token kind/predicate matching methods
312
313    // One kind
314
315    /// Matches any token whose `Kind` exactly matches.
316    pub fn then_kind(self, kind: TokenKind) -> Self {
317        self.then_kind_where(move |k| kind == *k)
318    }
319
320    /// Matches a token where the provided closure returns true for the token's kind.
321    pub fn then_kind_where<F>(mut self, predicate: F) -> Self
322    where
323        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
324    {
325        self.exprs
326            .push(Box::new(move |tok: &Token, _source: &[char]| {
327                predicate(&tok.kind)
328            }));
329        self
330    }
331
332    /// Match a token of a given kind which is not in the list of words.
333    pub fn then_kind_except<F>(self, pred_is: F, ex: &'static [&'static str]) -> Self
334    where
335        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
336    {
337        self.then(move |tok: &Token, src: &[char]| {
338            pred_is(&tok.kind) && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
339        })
340    }
341
342    // Two kinds
343
344    /// Match a token where both token kind predicates return true.
345    /// For instance, a word that can be both noun and verb.
346    pub fn then_kind_both<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
347    where
348        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
349        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
350    {
351        self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k))
352    }
353
354    /// Match a token where either of the two token kind predicates returns true.
355    /// For instance, an adjective or an adverb.
356    pub fn then_kind_either<F1, F2>(self, pred_is_1: F1, pred_is_2: F2) -> Self
357    where
358        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
359        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
360    {
361        self.then_kind_where(move |k| pred_is_1(k) || pred_is_2(k))
362    }
363
364    /// Match a token where neither of the two token kind predicates returns true.
365    /// For instance, a word that can't be a verb or a noun.
366    pub fn then_kind_neither<F1, F2>(self, pred_isnt_1: F1, pred_isnt_2: F2) -> Self
367    where
368        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
369        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
370    {
371        self.then_kind_where(move |k| !pred_isnt_1(k) && !pred_isnt_2(k))
372    }
373
374    /// Match a token where the first token kind predicate returns true and the second returns false.
375    /// For instance, a word that can be a noun but cannot be a verb.
376    pub fn then_kind_is_but_is_not<F1, F2>(self, pred_is: F1, pred_not: F2) -> Self
377    where
378        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
379        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
380    {
381        self.then_kind_where(move |k| pred_is(k) && !pred_not(k))
382    }
383
384    /// Match a token where the first token kind predicate returns true and the second returns false,
385    /// and the token is not in the list of exceptions.
386    pub fn then_kind_is_but_is_not_except<F1, F2>(
387        self,
388        pred_is: F1,
389        pred_not: F2,
390        ex: &'static [&'static str],
391    ) -> Self
392    where
393        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
394        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
395    {
396        self.then(move |tok: &Token, src: &[char]| {
397            pred_is(&tok.kind)
398                && !pred_not(&tok.kind)
399                && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
400        })
401    }
402
403    /// Match a token where the first token kind predicate returns true and all of the second return false.
404    /// For instance, a word that can be a verb but not a noun or an adjective.
405    pub fn then_kind_is_but_isnt_any_of<F1, F2>(
406        self,
407        pred_is: F1,
408        preds_isnt: &'static [F2],
409    ) -> Self
410    where
411        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
412        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
413    {
414        self.then_kind_where(move |k| pred_is(k) && !preds_isnt.iter().any(|pred| pred(k)))
415    }
416
417    /// Match a token where the first token kind predicate returns true and all of the second return false,
418    /// and the token is not in the list of exceptions.
419    /// For instance, an adjective that isn't also a verb or adverb or the word "likely".
420    pub fn then_kind_is_but_isnt_any_of_except<F1, F2>(
421        self,
422        pred_is: F1,
423        preds_isnt: &'static [F2],
424        ex: &'static [&'static str],
425    ) -> Self
426    where
427        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
428        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
429    {
430        self.then(move |tok: &Token, src: &[char]| {
431            pred_is(&tok.kind)
432                && !preds_isnt.iter().any(|pred| pred(&tok.kind))
433                && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
434        })
435    }
436
437    // More than two kinds
438
439    /// Match a token where both of the first two token kind predicates return true,
440    /// and the third returns false.
441    /// For instance, a word that must be both noun and verb, but not adjective.
442    pub fn then_kind_both_but_not<F1, F2, F3>(
443        self,
444        (pred_is_1, pred_is_2): (F1, F2),
445        pred_not: F3,
446    ) -> Self
447    where
448        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
449        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
450        F3: Fn(&TokenKind) -> bool + Send + Sync + 'static,
451    {
452        self.then_kind_where(move |k| pred_is_1(k) && pred_is_2(k) && !pred_not(k))
453    }
454
455    /// Match a token where any of the token kind predicates returns true.
456    /// Like `then_kind_either` but for more than two predicates.
457    pub fn then_kind_any<F>(self, preds_is: &'static [F]) -> Self
458    where
459        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
460    {
461        self.then_kind_where(move |k| preds_is.iter().any(|pred| pred(k)))
462    }
463
464    /// Match a token where none of the token kind predicates returns true.
465    /// Like `then_kind_neither` but for more than two predicates.
466    pub fn then_kind_none_of<F>(self, preds_isnt: &'static [F]) -> Self
467    where
468        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
469    {
470        self.then_kind_where(move |k| preds_isnt.iter().all(|pred| !pred(k)))
471    }
472
473    /// Match a token where any of the token kind predicates returns true,
474    /// and the word is not in the list of exceptions.
475    pub fn then_kind_any_except<F>(
476        self,
477        preds_is: &'static [F],
478        ex: &'static [&'static str],
479    ) -> Self
480    where
481        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
482    {
483        self.then(move |tok: &Token, src: &[char]| {
484            preds_is.iter().any(|pred| pred(&tok.kind))
485                && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
486        })
487    }
488
489    /// Match a token where any of the token kind predicates returns true,
490    /// or the token is in the list of words.
491    pub fn then_kind_any_or_words<F>(
492        self,
493        preds: &'static [F],
494        words: &'static [&'static str],
495    ) -> Self
496    where
497        F: Fn(&TokenKind) -> bool + Send + Sync + 'static,
498    {
499        self.then(move |tok: &Token, src: &[char]| {
500            preds.iter().any(|pred| pred(&tok.kind))
501                || words.iter().any(|&word| tok.get_ch(src).eq_str(word))
502        })
503    }
504
505    /// Match a token where any of the first token kind predicates returns true
506    /// and the second returns false.    
507    pub fn then_kind_any_but_not<F1, F2>(self, preds_is: &'static [F1], pred_not: F2) -> Self
508    where
509        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
510        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
511    {
512        self.then(move |tok: &Token, _src: &[char]| {
513            preds_is.iter().any(|pred| pred(&tok.kind)) && !pred_not(&tok.kind)
514        })
515    }
516
517    /// Match a token where any of the first token kind predicates returns true,
518    /// the second returns false, and the token is not in the list of exceptions.    
519    pub fn then_kind_any_but_not_except<F1, F2>(
520        self,
521        preds_is: &'static [F1],
522        pred_not: F2,
523        ex: &'static [&'static str],
524    ) -> Self
525    where
526        F1: Fn(&TokenKind) -> bool + Send + Sync + 'static,
527        F2: Fn(&TokenKind) -> bool + Send + Sync + 'static,
528    {
529        self.then(move |tok: &Token, src: &[char]| {
530            preds_is.iter().any(|pred| pred(&tok.kind))
531                && !pred_not(&tok.kind)
532                && !ex.iter().any(|&word| tok.get_ch(src).eq_str(word))
533        })
534    }
535
536    // Word property matching methods
537
538    // Out-of-vocabulary word. (Words not in the dictionary)
539    gen_then_from_is!(oov);
540    gen_then_from_is!(swear);
541
542    // Part-of-speech matching methods
543
544    // Nominals (nouns and pronouns)
545
546    gen_then_from_is!(nominal);
547    gen_then_from_is!(plural_nominal);
548    gen_then_from_is!(non_plural_nominal);
549    gen_then_from_is!(possessive_nominal);
550
551    // Nouns
552
553    gen_then_from_is!(noun);
554    gen_then_from_is!(proper_noun);
555    gen_then_from_is!(plural_noun);
556    gen_then_from_is!(singular_noun);
557    gen_then_from_is!(mass_noun_only);
558
559    // Pronouns
560
561    gen_then_from_is!(pronoun);
562    gen_then_from_is!(personal_pronoun);
563    gen_then_from_is!(first_person_singular_pronoun);
564    gen_then_from_is!(first_person_plural_pronoun);
565    gen_then_from_is!(second_person_pronoun);
566    gen_then_from_is!(third_person_pronoun);
567    gen_then_from_is!(third_person_singular_pronoun);
568    gen_then_from_is!(third_person_plural_pronoun);
569    gen_then_from_is!(subject_pronoun);
570    gen_then_from_is!(object_pronoun);
571
572    // Verbs
573
574    gen_then_from_is!(verb);
575    gen_then_from_is!(auxiliary_verb);
576    gen_then_from_is!(linking_verb);
577    gen_then_from_is!(verb_lemma);
578    gen_then_from_is!(verb_simple_past_form);
579    gen_then_from_is!(verb_past_participle_form);
580    gen_then_from_is!(verb_progressive_form);
581    gen_then_from_is!(verb_third_person_singular_present_form);
582
583    // Adjectives
584
585    gen_then_from_is!(adjective);
586    gen_then_from_is!(positive_adjective);
587    gen_then_from_is!(comparative_adjective);
588    gen_then_from_is!(superlative_adjective);
589
590    // Adverbs
591
592    gen_then_from_is!(adverb);
593    gen_then_from_is!(frequency_adverb);
594    gen_then_from_is!(degree_adverb);
595
596    // Determiners
597
598    gen_then_from_is!(determiner);
599    gen_then_from_is!(demonstrative_determiner);
600    gen_then_from_is!(possessive_determiner);
601    gen_then_from_is!(quantifier);
602    gen_then_from_is!(non_quantifier_determiner);
603    gen_then_from_is!(non_demonstrative_determiner);
604
605    /// Push an [`IndefiniteArticle`] to the end of the operation list.
606    pub fn then_indefinite_article(self) -> Self {
607        self.then(IndefiniteArticle::default())
608    }
609
610    // Other parts of speech
611
612    gen_then_from_is!(conjunction);
613    gen_then_from_is!(preposition);
614
615    // Numbers
616
617    gen_then_from_is!(number);
618    gen_then_from_is!(cardinal_number);
619    gen_then_from_is!(ordinal_number);
620
621    // Punctuation
622
623    gen_then_from_is!(punctuation);
624    gen_then_from_is!(apostrophe);
625    gen_then_from_is!(comma);
626    gen_then_from_is!(hyphen);
627    gen_then_from_is!(period);
628    gen_then_from_is!(semicolon);
629    gen_then_from_is!(acute);
630    gen_then_from_is!(quote);
631    gen_then_from_is!(backslash);
632    gen_then_from_is!(slash);
633    gen_then_from_is!(percent);
634    gen_then_from_is!(backtick);
635
636    // Other
637
638    gen_then_from_is!(case_separator);
639    gen_then_from_is!(likely_homograph);
640    gen_then_from_is!(sentence_terminator);
641}
642
643impl<S> From<S> for SequenceExpr
644where
645    S: Step + 'static,
646{
647    fn from(step: S) -> Self {
648        Self {
649            exprs: vec![Box::new(step)],
650        }
651    }
652}
653
654#[cfg(test)]
655mod tests {
656    use crate::{
657        Document, TokenKind,
658        expr::{AnchorEnd, ExprExt, SequenceExpr},
659        linting::tests::SpanVecExt,
660    };
661
662    #[test]
663    fn test_kind_both() {
664        let noun_and_verb =
665            SequenceExpr::default().then_kind_both(TokenKind::is_noun, TokenKind::is_verb);
666        let doc = Document::new_plain_english_curated("Use a good example.");
667        let matches = noun_and_verb.iter_matches_in_doc(&doc).collect::<Vec<_>>();
668        assert_eq!(matches.to_strings(&doc), vec!["Use", "good", "example"]);
669    }
670
671    #[test]
672    fn test_adjective_or_determiner() {
673        let expr = SequenceExpr::default()
674            .then_kind_either(TokenKind::is_adjective, TokenKind::is_determiner);
675        let doc = Document::new_plain_english_curated("Use a good example.");
676        let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
677        assert_eq!(matches.to_strings(&doc), vec!["a", "good"]);
678    }
679
680    #[test]
681    fn test_noun_but_not_adjective() {
682        let expr = SequenceExpr::default()
683            .then_kind_is_but_is_not(TokenKind::is_noun, TokenKind::is_adjective);
684        let doc = Document::new_plain_english_curated("Use a good example.");
685        let matches = expr.iter_matches_in_doc(&doc).collect::<Vec<_>>();
686        assert_eq!(matches.to_strings(&doc), vec!["Use", "example"]);
687    }
688
689    #[test]
690    fn flag_foo_followed_by_bar_or_at_end_1() {
691        let expr = SequenceExpr::aco("foo").then_any_of(vec![
692            Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
693            Box::new(AnchorEnd),
694        ]);
695
696        let doc_with_bar = Document::new_plain_english_curated("foo bar");
697
698        let matches_with_bar = expr.iter_matches_in_doc(&doc_with_bar).collect::<Vec<_>>();
699
700        eprintln!("matches_with_bar: {:#?}", matches_with_bar);
701
702        // "foo bar" matches with span covering both tokens
703        assert_eq!(matches_with_bar.len(), 1);
704        assert_eq!(matches_with_bar[0].start, 0);
705        assert_eq!(matches_with_bar[0].end, 3);
706        assert_eq!(matches_with_bar.to_strings(&doc_with_bar), vec!["foo bar"]);
707    }
708
709    #[test]
710    fn flag_foo_followed_by_bar_or_at_end_2() {
711        let expr = SequenceExpr::aco("foo").then_any_of(vec![
712            Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
713            Box::new(AnchorEnd),
714        ]);
715
716        let doc_with_end = Document::new_plain_english_curated("foo");
717
718        let matches_with_end = expr.iter_matches_in_doc(&doc_with_end).collect::<Vec<_>>();
719
720        eprintln!("matches_with_end: {:#?}", matches_with_end);
721
722        // "foo" at end matches with span covering just "foo"
723        assert_eq!(matches_with_end.len(), 1);
724        assert_eq!(matches_with_end[0].start, 0);
725        assert_eq!(matches_with_end[0].end, 1);
726        assert_eq!(matches_with_end.to_strings(&doc_with_end), vec!["foo"]);
727    }
728
729    #[test]
730    fn flag_foo_followed_by_bar_or_at_end_3() {
731        let expr = SequenceExpr::aco("foo").then_any_of(vec![
732            Box::new(SequenceExpr::whitespace().t_aco("bar").then(AnchorEnd)),
733            Box::new(AnchorEnd),
734        ]);
735
736        let doc_with_foo_bar_baz = Document::new_plain_english_curated("foo bar baz");
737
738        let matches_with_foo_bar_baz = expr
739            .iter_matches_in_doc(&doc_with_foo_bar_baz)
740            .collect::<Vec<_>>();
741
742        eprintln!("matches_with_foo_bar_baz: {:#?}", matches_with_foo_bar_baz);
743
744        // "foo bar baz" should NOT match because "bar" is not at the end
745        assert_eq!(matches_with_foo_bar_baz.len(), 0);
746        assert_eq!(
747            matches_with_foo_bar_baz.to_strings(&doc_with_foo_bar_baz),
748            Vec::<String>::new()
749        );
750    }
751}