harper_core/expr/
sequence_expr.rs

1use paste::paste;
2
3use crate::{
4    Span, Token, TokenKind,
5    patterns::{AnyPattern, IndefiniteArticle, WhitespacePattern, Word},
6};
7
8use super::{Expr, Optional, Repeating, Step, UnlessStep};
9
10#[derive(Default)]
11pub struct SequenceExpr {
12    exprs: Vec<Box<dyn Expr>>,
13}
14
15/// Generate a `then_*` method from an available `is_*` function on [`TokenKind`].
16macro_rules! gen_then_from_is {
17    ($quality:ident) => {
18        paste! {
19            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
20            pub fn [< then_$quality >] (self) -> Self{
21                self.then(|tok: &Token, _source: &[char]| {
22                    tok.kind.[< is_$quality >]()
23                })
24            }
25
26            #[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
27            pub fn [< then_one_or_more_$quality s >] (self) -> Self{
28                self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
29                    tok.kind.[< is_$quality >]()
30                }))
31            }
32
33            #[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
34            pub fn [< then_anything_but_$quality >] (self) -> Self{
35                self.then(|tok: &Token, _source: &[char]| {
36                    if tok.kind.[< is_$quality >](){
37                        false
38                    }else{
39                        true
40                    }
41                })
42            }
43        }
44    };
45}
46
47impl Expr for SequenceExpr {
48    /// Run the expression starting at an index, returning the total matched window.
49    ///
50    /// If any step returns `None`, the entire expression does as well.
51    fn run(&self, mut cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span> {
52        let mut window = Span::new_with_len(cursor, 0);
53
54        for cur_expr in &self.exprs {
55            let out = cur_expr.run(cursor, tokens, source)?;
56
57            // Only expand the window if the match actually covers some tokens
58            if out.end > out.start {
59                window.expand_to_include(out.start);
60                window.expand_to_include(out.end.checked_sub(1).unwrap_or(out.start));
61            }
62
63            // Only advance cursor if we actually matched something
64            if out.end > cursor {
65                cursor = out.end;
66            } else if out.start < cursor {
67                cursor = out.start;
68            }
69            // If both start and end are equal to cursor, don't move the cursor
70        }
71
72        Some(window)
73    }
74}
75
76impl SequenceExpr {
77    /// Push an [expression](Expr) to the operation list.
78    pub fn then(mut self, expr: impl Expr + 'static) -> Self {
79        self.exprs.push(Box::new(expr));
80        self
81    }
82
83    /// Pushes an expression that could move the cursor to the sequence, but does not require it.
84    pub fn then_optional(mut self, expr: impl Expr + 'static) -> Self {
85        self.exprs.push(Box::new(Optional::new(expr)));
86        self
87    }
88
89    /// Appends the steps in `other` onto the end of `self`.
90    /// This is more efficient than [`Self::then`] because it avoids pointer redirection.
91    pub fn then_seq(mut self, mut other: Self) -> Self {
92        self.exprs.append(&mut other.exprs);
93        self
94    }
95
96    /// Push an [`IndefiniteArticle`] to the end of the operation list.
97    pub fn then_indefinite_article(self) -> Self {
98        self.then(IndefiniteArticle::default())
99    }
100
101    /// Match examples of `word` case-sensitively.
102    pub fn then_exact_word(self, word: &'static str) -> Self {
103        self.then(Word::new_exact(word))
104    }
105
106    /// Shorthand for [`Self::any_capitalization_of`].
107    pub fn aco(word: &'static str) -> Self {
108        Self::any_capitalization_of(word)
109    }
110
111    /// Construct a new sequence with a [`Word`] at the beginning of the operation list.
112    pub fn any_capitalization_of(word: &'static str) -> Self {
113        Self::default().then_any_capitalization_of(word)
114    }
115
116    /// Shorthand for [`Self::then_any_capitalization_of`].
117    pub fn t_aco(self, word: &'static str) -> Self {
118        self.then_any_capitalization_of(word)
119    }
120
121    /// Match examples of `word` that have any capitalization.
122    pub fn then_any_capitalization_of(self, word: &'static str) -> Self {
123        self.then(Word::new(word))
124    }
125
126    /// Matches any word.
127    pub fn then_any_word(self) -> Self {
128        self.then(|tok: &Token, _source: &[char]| tok.kind.is_word())
129    }
130
131    /// Matches any token whose `Kind` exactly matches.
132    pub fn then_strict(self, kind: TokenKind) -> Self {
133        self.then(move |tok: &Token, _source: &[char]| tok.kind == kind)
134    }
135
136    /// Shorthand for [`Self::then_whitespace`].
137    pub fn t_ws(self) -> Self {
138        self.then_whitespace()
139    }
140
141    /// Match against one or more whitespace tokens.
142    pub fn then_whitespace(self) -> Self {
143        self.then(WhitespacePattern)
144    }
145
146    pub fn then_one_or_more(self, expr: impl Expr + 'static) -> Self {
147        self.then(Repeating::new(Box::new(expr), 1))
148    }
149
150    /// Create a new condition that will step one token forward if met.
151    /// If the condition is _not_ met, the whole expression returns `None`.
152    ///
153    /// This can be used to build out exceptions to other rules.
154    ///
155    /// See [`UnlessStep`] for more info.
156    pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
157        self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
158            true
159        }))
160    }
161
162    /// Match any single token.
163    ///
164    /// Shorthand for [`Self::then_anything`].
165    pub fn t_any(self) -> Self {
166        self.then_anything()
167    }
168
169    /// Match any single token.
170    ///
171    /// See [`AnyPattern`] for more info.
172    pub fn then_anything(self) -> Self {
173        self.then(AnyPattern)
174    }
175
176    gen_then_from_is!(nominal);
177    gen_then_from_is!(noun);
178    gen_then_from_is!(possessive_nominal);
179    gen_then_from_is!(plural_nominal);
180    gen_then_from_is!(verb);
181    gen_then_from_is!(auxiliary_verb);
182    gen_then_from_is!(linking_verb);
183    gen_then_from_is!(pronoun);
184    gen_then_from_is!(punctuation);
185    gen_then_from_is!(conjunction);
186    gen_then_from_is!(comma);
187    gen_then_from_is!(period);
188    gen_then_from_is!(number);
189    gen_then_from_is!(case_separator);
190    gen_then_from_is!(adverb);
191    gen_then_from_is!(adjective);
192    gen_then_from_is!(apostrophe);
193    gen_then_from_is!(hyphen);
194    gen_then_from_is!(determiner);
195    gen_then_from_is!(proper_noun);
196    gen_then_from_is!(preposition);
197    gen_then_from_is!(third_person_pronoun);
198    gen_then_from_is!(third_person_singular_pronoun);
199    gen_then_from_is!(third_person_plural_pronoun);
200    gen_then_from_is!(first_person_singular_pronoun);
201    gen_then_from_is!(first_person_plural_pronoun);
202    gen_then_from_is!(second_person_pronoun);
203    gen_then_from_is!(non_plural_nominal);
204}
205
206impl<S> From<S> for SequenceExpr
207where
208    S: Step + 'static,
209{
210    fn from(step: S) -> Self {
211        Self {
212            exprs: vec![Box::new(step)],
213        }
214    }
215}