harper_core/expr/
sequence_expr.rs

1use paste::paste;
2
3use crate::{
4    Span, Token, TokenKind,
5    patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word},
6};
7
8use super::{Expr, Optional, Repeating, Step, UnlessStep};
9
10#[derive(Default)]
11pub struct SequenceExpr {
12    exprs: Vec<Box<dyn Expr>>,
13}
14
15/// Generate a `then_*` method from an available `is_*` function on [`TokenKind`].
16macro_rules! gen_then_from_is {
17    ($quality:ident) => {
18        paste! {
19            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
20            pub fn [< then_$quality >] (self) -> Self{
21                self.then(|tok: &Token, _source: &[char]| {
22                    tok.kind.[< is_$quality >]()
23                })
24            }
25
26            #[doc = concat!("Adds an optional step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
27            pub fn [< then_optional_$quality >] (self) -> Self{
28                self.then_optional(|tok: &Token, _source: &[char]| {
29                    tok.kind.[< is_$quality >]()
30                })
31            }
32
33            #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
34            pub fn [< then_one_or_more_$quality s >] (self) -> Self{
35                self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
36                    tok.kind.[< is_$quality >]()
37                }))
38            }
39
40            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
41            pub fn [< then_anything_but_$quality >] (self) -> Self{
42                self.then(|tok: &Token, _source: &[char]| {
43                    if tok.kind.[< is_$quality >](){
44                        false
45                    }else{
46                        true
47                    }
48                })
49            }
50        }
51    };
52}
53
54impl Expr for SequenceExpr {
55    /// Run the expression starting at an index, returning the total matched window.
56    ///
57    /// If any step returns `None`, the entire expression does as well.
58    fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span> {
59        let mut window = Span::new_with_len(cursor, 0);
60
61        for cur_expr in &self.exprs {
62            let out = cur_expr.run(cursor, tokens, source)?;
63
64            // Only expand the window if the match actually covers some tokens
65            if out.end > out.start {
66                window.expand_to_include(out.start);
67                window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
68            }
69
70            // Only advance cursor if we actually matched something
71            if out.end > cursor {
72                cursor = out.end;
73            } else if out.start < cursor {
74                cursor = out.start;
75            }
76            // If both start and end are equal to cursor, don't move the cursor
77        }
78
79        Some(window)
80    }
81}
82
83impl SequenceExpr {
84    // Constructor methods
85
86    /// Construct a new sequence with a [`Word`] at the beginning of the operation list.
87    pub fn any_capitalization_of(word: &'static str) -> Self {
88        Self::default().then_any_capitalization_of(word)
89    }
90
91    /// Shorthand for [`Self::any_capitalization_of`].
92    pub fn aco(word: &'static str) -> Self {
93        Self::any_capitalization_of(word)
94    }
95
96    // General builder methods
97
98    /// Push an [expression](Expr) to the operation list.
99    pub fn then(mut self, expr: impl Expr + 'static) -> Self {
100        self.exprs.push(Box::new(expr));
101        self
102    }
103
104    /// Pushes an expression that could move the cursor to the sequence, but does not require it.
105    pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
106        self.exprs.push(Box::new(Optional::new(expr)));
107        self
108    }
109
110    /// Appends the steps in `other` onto the end of `self`.
111    /// This is more efficient than [`Self::then`] because it avoids pointer redirection.
112    pub fn then_seq(mut self, mut other: Self) -> Self {
113        self.exprs.append(&mut other.exprs);
114        self
115    }
116
117    /// Matches any token whose `Kind` exactly matches.
118    pub fn then_strict(self, kind: TokenKind) -> Self {
119        self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
120    }
121
122    /// Match against one or more whitespace tokens.
123    pub fn then_whitespace(self) -> Self {
124        self.then(WhitespacePattern)
125    }
126
127    /// Shorthand for [`Self::then_whitespace`].
128    pub fn t_ws(self) -> Self {
129        self.then_whitespace()
130    }
131
132    pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
133        self.then(Repeating::new(Box::new(expr), 1))
134    }
135
136    /// Create a new condition that will step one token forward if met.
137    /// If the condition is _not_ met, the whole expression returns `None`.
138    ///
139    /// This can be used to build out exceptions to other rules.
140    ///
141    /// See [`UnlessStep`] for more info.
142    pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
143        self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
144            true
145        }))
146    }
147
148    /// Match any single token.
149    ///
150    /// See [`AnyPattern`] for more info.
151    pub fn then_anything(self) -> Self {
152        self.then(AnyPattern)
153    }
154
155    /// Match any single token.
156    ///
157    /// Shorthand for [`Self::then_anything`].
158    pub fn t_any(self) -> Self {
159        self.then_anything()
160    }
161
162    // Word matching methods
163
164    /// Matches any word.
165    pub fn then_any_word(self) -> Self {
166        self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
167    }
168
169    /// Match examples of `word` that have any capitalization.
170    pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
171        self.then(Word::new(word))
172    }
173
174    /// Shorthand for [`Self::then_any_capitalization_of`].
175    pub fn t_aco(self, word: &'static str) -> Self {
176        self.then_any_capitalization_of(word)
177    }
178
179    /// Match examples of `word` case-sensitively.
180    pub fn then_exact_word(self, word: &'static str) -> Self {
181        self.then(Word::new_exact(word))
182    }
183
184    // Part-of-speech matching methods
185
186    // Nominals (nouns and pronouns)
187
188    gen_then_from_is!(nominal);
189    gen_then_from_is!(plural_nominal);
190    gen_then_from_is!(non_plural_nominal);
191    gen_then_from_is!(possessive_nominal);
192
193    // Nouns
194
195    gen_then_from_is!(noun);
196    gen_then_from_is!(proper_noun);
197    gen_then_from_is!(mass_noun_only);
198
199    // Pronouns
200
201    gen_then_from_is!(pronoun);
202    gen_then_from_is!(first_person_singular_pronoun);
203    gen_then_from_is!(first_person_plural_pronoun);
204    gen_then_from_is!(second_person_pronoun);
205    gen_then_from_is!(third_person_pronoun);
206    gen_then_from_is!(third_person_singular_pronoun);
207    gen_then_from_is!(third_person_plural_pronoun);
208
209    // Verbs
210
211    // POS - Verbs
212    gen_then_from_is!(verb);
213    gen_then_from_is!(auxiliary_verb);
214    gen_then_from_is!(linking_verb);
215
216    // Adjectives and adverbs
217
218    gen_then_from_is!(adjective);
219    gen_then_from_is!(adverb);
220
221    // Determiners
222
223    gen_then_from_is!(determiner);
224
225    /// Push an [`IndefiniteArticle`] to the end of the operation list.
226    pub fn then_indefinite_article(self) -> Self {
227        self.then(IndefiniteArticle::default())
228    }
229
230    // Other parts of speech
231
232    gen_then_from_is!(conjunction);
233    gen_then_from_is!(preposition);
234
235    // Punctuation
236
237    gen_then_from_is!(punctuation);
238    gen_then_from_is!(apostrophe);
239    gen_then_from_is!(comma);
240    gen_then_from_is!(hyphen);
241    gen_then_from_is!(period);
242    gen_then_from_is!(semicolon);
243
244    // Other
245
246    gen_then_from_is!(number);
247    gen_then_from_is!(case_separator);
248}
249
250impl<S> From<S> for SequenceExpr
251where
252    S: Step + 'static,
253{
254    fn from(step: S) -> Self {
255        Self {
256            exprs: vec![Box::new(step)],
257        }
258    }
259}