harper_core/expr/
sequence_expr.rs

1use paste::paste;
2
3use crate::{
4    Span, Token, TokenKind,
5    expr::{FirstMatchOf, LongestMatchOf},
6    patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word, WordSet},
7};
8
9use super::{Expr, Optional, Repeating, Step, UnlessStep};
10
11#[derive(Default)]
12pub struct SequenceExpr {
13    exprs: Vec<Box<dyn Expr>>,
14}
15
16/// Generate a `then_*` method from an available `is_*` function on [`TokenKind`].
17macro_rules! gen_then_from_is {
18    ($quality:ident) => {
19        paste! {
20            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
21            pub fn [< then_$quality >] (self) -> Self{
22                self.then(|tok: &Token, _source: &[char]| {
23                    tok.kind.[< is_$quality >]()
24                })
25            }
26
27            #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
28            pub fn [< then_optional_$quality >] (self) -> Self{
29                self.then_optional(|tok: &Token, _source: &[char]| {
30                    tok.kind.[< is_$quality >]()
31                })
32            }
33
34            #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
35            pub fn [< then_one_or_more_$quality s >] (self) -> Self{
36                self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
37                    tok.kind.[< is_$quality >]()
38                }))
39            }
40
41            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
42            pub fn [< then_anything_but_$quality >] (self) -> Self{
43                self.then(|tok: &Token, _source: &[char]| {
44                    if tok.kind.[< is_$quality >](){
45                        false
46                    }else{
47                        true
48                    }
49                })
50            }
51        }
52    };
53}
54
55impl Expr for SequenceExpr {
56    /// Run the expression starting at an index, returning the total matched window.
57    ///
58    /// If any step returns `None`, the entire expression does as well.
59    fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span> {
60        let mut window = Span::new_with_len(cursor, 0);
61
62        for cur_expr in &self.exprs {
63            let out = cur_expr.run(cursor, tokens, source)?;
64
65            // Only expand the window if the match actually covers some tokens
66            if out.end > out.start {
67                window.expand_to_include(out.start);
68                window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
69            }
70
71            // Only advance cursor if we actually matched something
72            if out.end > cursor {
73                cursor = out.end;
74            } else if out.start < cursor {
75                cursor = out.start;
76            }
77            // If both start and end are equal to cursor, don't move the cursor
78        }
79
80        Some(window)
81    }
82}
83
84impl SequenceExpr {
85    // Constructor methods
86
87    /// Construct a new sequence with a [`Word`] at the beginning of the operation list.
88    pub fn any_capitalization_of(word: &'static str) -> Self {
89        Self::default().then_any_capitalization_of(word)
90    }
91
92    /// Shorthand for [`Self::any_capitalization_of`].
93    pub fn aco(word: &'static str) -> Self {
94        Self::any_capitalization_of(word)
95    }
96
97    /// Match any word from the given set of words, case-insensitive.
98    pub fn word_set(words: &'static [&'static str]) -> Self {
99        Self::default().then_word_set(words)
100    }
101
102    // General builder methods
103
104    /// Push an [expression](Expr) to the operation list.
105    pub fn then(mut self, expr: impl Expr + 'static) -> Self {
106        self.exprs.push(Box::new(expr));
107        self
108    }
109
110    /// Pushes an expression that could move the cursor to the sequence, but does not require it.
111    pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
112        self.exprs.push(Box::new(Optional::new(expr)));
113        self
114    }
115
116    /// Pushes an expression that will match any of the provided expressions.
117    ///
118    /// If more than one of the provided expressions match, this function provides no guarantee
119    /// as to which match will end up being used. If you need to get the longest of multiple
120    /// matches, use [`Self::then_longest_of()`] instead.
121    pub fn then_any_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
122        self.exprs.push(Box::new(FirstMatchOf::new(exprs)));
123        self
124    }
125
126    /// Pushes an expression that will match the longest of the provided expressions.
127    ///
128    /// If you don't need the longest match, prefer using the short-circuiting
129    /// [`Self::then_any_of()`] instead.
130    pub fn then_longest_of(mut self, exprs: Vec<Box<dyn Expr>>) -> Self {
131        self.exprs.push(Box::new(LongestMatchOf::new(exprs)));
132        self
133    }
134
135    /// Appends the steps in `other` onto the end of `self`.
136    /// This is more efficient than [`Self::then`] because it avoids pointer redirection.
137    pub fn then_seq(mut self, mut other: Self) -> Self {
138        self.exprs.append(&mut other.exprs);
139        self
140    }
141
142    pub fn then_word_set(self, words: &'static [&'static str]) -> Self {
143        self.then(WordSet::new(words))
144    }
145
146    /// Matches any token whose `Kind` exactly matches.
147    pub fn then_strict(self, kind: TokenKind) -> Self {
148        self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
149    }
150
151    /// Match against one or more whitespace tokens.
152    pub fn then_whitespace(self) -> Self {
153        self.then(WhitespacePattern)
154    }
155
156    /// Shorthand for [`Self::then_whitespace`].
157    pub fn t_ws(self) -> Self {
158        self.then_whitespace()
159    }
160
161    pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
162        self.then(Repeating::new(Box::new(expr), 1))
163    }
164
165    /// Create a new condition that will step one token forward if met.
166    /// If the condition is _not_ met, the whole expression returns `None`.
167    ///
168    /// This can be used to build out exceptions to other rules.
169    ///
170    /// See [`UnlessStep`] for more info.
171    pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
172        self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
173            true
174        }))
175    }
176
177    /// Match any single token.
178    ///
179    /// See [`AnyPattern`] for more info.
180    pub fn then_anything(self) -> Self {
181        self.then(AnyPattern)
182    }
183
184    /// Match any single token.
185    ///
186    /// Shorthand for [`Self::then_anything`].
187    pub fn t_any(self) -> Self {
188        self.then_anything()
189    }
190
191    // Word matching methods
192
193    /// Matches any word.
194    pub fn then_any_word(self) -> Self {
195        self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
196    }
197
198    /// Match examples of `word` that have any capitalization.
199    pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
200        self.then(Word::new(word))
201    }
202
203    /// Shorthand for [`Self::then_any_capitalization_of`].
204    pub fn t_aco(self, word: &'static str) -> Self {
205        self.then_any_capitalization_of(word)
206    }
207
208    /// Match examples of `word` case-sensitively.
209    pub fn then_exact_word(self, word: &'static str) -> Self {
210        self.then(Word::new_exact(word))
211    }
212
213    // Part-of-speech matching methods
214
215    // Nominals (nouns and pronouns)
216
217    gen_then_from_is!(nominal);
218    gen_then_from_is!(plural_nominal);
219    gen_then_from_is!(non_plural_nominal);
220    gen_then_from_is!(possessive_nominal);
221
222    // Nouns
223
224    gen_then_from_is!(noun);
225    gen_then_from_is!(proper_noun);
226    gen_then_from_is!(mass_noun_only);
227
228    // Pronouns
229
230    gen_then_from_is!(pronoun);
231    gen_then_from_is!(first_person_singular_pronoun);
232    gen_then_from_is!(first_person_plural_pronoun);
233    gen_then_from_is!(second_person_pronoun);
234    gen_then_from_is!(third_person_pronoun);
235    gen_then_from_is!(third_person_singular_pronoun);
236    gen_then_from_is!(third_person_plural_pronoun);
237
238    // Verbs
239
240    // POS - Verbs
241    gen_then_from_is!(verb);
242    gen_then_from_is!(auxiliary_verb);
243    gen_then_from_is!(linking_verb);
244
245    // Adjectives and adverbs
246
247    gen_then_from_is!(adjective);
248    gen_then_from_is!(adverb);
249
250    // Determiners
251
252    gen_then_from_is!(determiner);
253
254    /// Push an [`IndefiniteArticle`] to the end of the operation list.
255    pub fn then_indefinite_article(self) -> Self {
256        self.then(IndefiniteArticle::default())
257    }
258
259    // Other parts of speech
260
261    gen_then_from_is!(conjunction);
262    gen_then_from_is!(preposition);
263
264    // Punctuation
265
266    gen_then_from_is!(punctuation);
267    gen_then_from_is!(apostrophe);
268    gen_then_from_is!(comma);
269    gen_then_from_is!(hyphen);
270    gen_then_from_is!(period);
271    gen_then_from_is!(semicolon);
272
273    // Other
274
275    gen_then_from_is!(number);
276    gen_then_from_is!(case_separator);
277}
278
279impl<S> From<S> for SequenceExpr
280where
281    S: Step + 'static,
282{
283    fn from(step: S) -> Self {
284        Self {
285            exprs: vec![Box::new(step)],
286        }
287    }
288}