Skip to main content

harper_core/expr/
mod.rs

1//! An `Expr` is a declarative way to express whether a certain set of tokens fulfill a criteria.
2//!
3//! For example, if we want to look for the word "that" followed by an adjective, we could build an
4//! expression to do so.
5//!
6//! The actual searching is done by another system (usually a part of the [lint framework](crate::linting::ExprLinter)).
7//! It iterates through a document, checking if each index matches the criteria.
8//!
9//! When supplied a specific position in a token stream, the technical job of an `Expr` is to determine the window of tokens (including the cursor itself) that fulfills whatever criteria the author desires.
10//!
11//! The goal of the `Expr` initiative is to make rules easier to _read_ as well as to write.
12//! Gone are the days of trying to manually parse the logic of another man's Rust code.
13//!
14//! See also: [`SequenceExpr`].
15
16mod all;
17mod anchor_end;
18mod anchor_start;
19mod duration_expr;
20mod expr_map;
21mod filter;
22mod first_match_of;
23mod fixed_phrase;
24mod longest_match_of;
25mod mergeable_words;
26mod optional;
27mod pronoun_be;
28mod reflexive_pronoun;
29mod repeating;
30mod sequence_expr;
31mod similar_to_phrase;
32mod space_or_hyphen;
33mod spelled_number_expr;
34mod step;
35mod time_unit_expr;
36mod unless_step;
37mod word_expr_group;
38
39#[cfg(not(feature = "concurrent"))]
40use std::rc::Rc;
41use std::sync::Arc;
42
43pub use all::All;
44pub use anchor_end::AnchorEnd;
45pub use anchor_start::AnchorStart;
46pub use duration_expr::DurationExpr;
47pub use expr_map::ExprMap;
48pub use filter::Filter;
49pub use first_match_of::FirstMatchOf;
50pub use fixed_phrase::FixedPhrase;
51pub use longest_match_of::LongestMatchOf;
52pub use mergeable_words::MergeableWords;
53pub use optional::Optional;
54pub use pronoun_be::PronounBe;
55pub use reflexive_pronoun::ReflexivePronoun;
56pub use repeating::Repeating;
57pub use sequence_expr::SequenceExpr;
58pub use similar_to_phrase::SimilarToPhrase;
59pub use space_or_hyphen::SpaceOrHyphen;
60pub use spelled_number_expr::SpelledNumberExpr;
61pub use step::Step;
62pub use time_unit_expr::TimeUnitExpr;
63pub use unless_step::UnlessStep;
64pub use word_expr_group::WordExprGroup;
65
66use crate::{Document, LSend, Span, Token};
67
68pub trait Expr: LSend {
69    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>>;
70}
71
72impl<S> Expr for S
73where
74    S: Step + ?Sized,
75{
76    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
77        self.step(tokens, cursor, source).map(|s| {
78            if s >= 0 {
79                Span::new_with_len(cursor, s as usize)
80            } else {
81                Span::new(add(cursor, s).unwrap(), cursor)
82            }
83        })
84    }
85}
86
87impl<E> Expr for Arc<E>
88where
89    E: Expr,
90{
91    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
92        self.as_ref().run(cursor, tokens, source)
93    }
94}
95
96impl Expr for Box<dyn Expr> {
97    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
98        self.as_ref().run(cursor, tokens, source)
99    }
100}
101
102#[cfg(not(feature = "concurrent"))]
103impl<E> Expr for Rc<E>
104where
105    E: Expr,
106{
107    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
108        self.as_ref().run(cursor, tokens, source)
109    }
110}
111
112fn add(u: usize, i: isize) -> Option<usize> {
113    if i.is_negative() {
114        u.checked_sub(i.wrapping_abs() as u32 as usize)
115    } else {
116        u.checked_add(i as usize)
117    }
118}
119
120pub trait ExprExt {
121    /// Iterate over all matches of this expression in the document, automatically filtering out
122    /// overlapping matches, preferring the first.
123    fn iter_matches<'a>(
124        &'a self,
125        tokens: &'a [Token],
126        source: &'a [char],
127    ) -> Box<dyn Iterator<Item = Span<Token>> + 'a>;
128
129    fn iter_matches_in_doc<'a>(
130        &'a self,
131        doc: &'a Document,
132    ) -> Box<dyn Iterator<Item = Span<Token>> + 'a>;
133}
134
135impl<E: ?Sized> ExprExt for E
136where
137    E: Expr,
138{
139    fn iter_matches<'a>(
140        &'a self,
141        tokens: &'a [Token],
142        source: &'a [char],
143    ) -> Box<dyn Iterator<Item = Span<Token>> + 'a> {
144        let mut last_end = 0usize;
145
146        Box::new((0..tokens.len()).filter_map(move |i| {
147            let span = self.run(i, tokens, source)?;
148            if span.start >= last_end {
149                last_end = span.end;
150                Some(span)
151            } else {
152                None
153            }
154        }))
155    }
156
157    fn iter_matches_in_doc<'a>(
158        &'a self,
159        doc: &'a Document,
160    ) -> Box<dyn Iterator<Item = Span<Token>> + 'a> {
161        Box::new(self.iter_matches(doc.get_tokens(), doc.get_source()))
162    }
163}
164
165pub trait OwnedExprExt {
166    fn or(self, other: impl Expr + 'static) -> FirstMatchOf;
167    fn and(self, other: impl Expr + 'static) -> All;
168    fn and_not(self, other: impl Expr + 'static) -> All;
169    fn or_longest(self, other: impl Expr + 'static) -> LongestMatchOf;
170}
171
172impl<E> OwnedExprExt for E
173where
174    E: Expr + 'static,
175{
176    /// Returns an expression that matches either the current one or the expression contained in `other`.
177    fn or(self, other: impl Expr + 'static) -> FirstMatchOf {
178        FirstMatchOf::new(vec![Box::new(self), Box::new(other)])
179    }
180
181    /// Returns an expression that matches only if both the current one and the expression contained in `other` do.
182    fn and(self, other: impl Expr + 'static) -> All {
183        All::new(vec![Box::new(self), Box::new(other)])
184    }
185
186    /// Returns an expression that matches only if the current one matches and the expression contained in `other` does not.
187    fn and_not(self, other: impl Expr + 'static) -> All {
188        self.and(UnlessStep::new(other, |_tok: &Token, _src: &[char]| true))
189    }
190
191    /// Returns an expression that matches the longest of the current one or the expression contained in `other`.
192    ///
193    /// If you don't need the longest match, prefer using the short-circuiting [`Self::or()`] instead.
194    fn or_longest(self, other: impl Expr + 'static) -> LongestMatchOf {
195        LongestMatchOf::new(vec![Box::new(self), Box::new(other)])
196    }
197}