harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners. They are a simplified abstraction over [`Expr`](crate::expr::Expr).
2//!
3//! Through the [`ExprLinter`](crate::linting::ExprLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequenceExpr`](crate::expr::SequenceExpr) for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod any_pattern;
11mod implies_quantity;
12mod indefinite_article;
13mod inflection_of_be;
14mod invert;
15mod modal_verb;
16mod nominal_phrase;
17mod upos_set;
18mod whitespace_pattern;
19mod within_edit_distance;
20mod word;
21mod word_set;
22
23pub use any_pattern::AnyPattern;
24pub use implies_quantity::ImpliesQuantity;
25pub use indefinite_article::IndefiniteArticle;
26pub use inflection_of_be::InflectionOfBe;
27pub use invert::Invert;
28pub use modal_verb::ModalVerb;
29pub use nominal_phrase::NominalPhrase;
30pub use upos_set::UPOSSet;
31pub use whitespace_pattern::WhitespacePattern;
32pub use within_edit_distance::WithinEditDistance;
33pub use word::Word;
34pub use word_set::WordSet;
35
36pub trait Pattern: LSend {
37    /// Check if the pattern matches at the start of the given token slice.
38    ///
39    /// Returns the length of the match if successful, or `None` if not.
40    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
41}
42
43pub trait PatternExt {
44    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>>;
45
46    /// Search through all tokens to locate all non-overlapping pattern matches.
47    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span<Token>> {
48        self.iter_matches(tokens, source).collect()
49    }
50}
51
52impl<P> PatternExt for P
53where
54    P: Pattern + ?Sized,
55{
56    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>> {
57        MatchIter::new(self, tokens, source)
58    }
59}
60
61struct MatchIter<'a, 'b, 'c, P: ?Sized> {
62    pattern: &'a P,
63    tokens: &'b [Token],
64    source: &'c [char],
65    index: usize,
66}
67impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
68where
69    P: Pattern + ?Sized,
70{
71    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
72        Self {
73            pattern,
74            tokens,
75            source,
76            index: 0,
77        }
78    }
79}
80impl<P> Iterator for MatchIter<'_, '_, '_, P>
81where
82    P: Pattern + ?Sized,
83{
84    type Item = Span<Token>;
85
86    fn next(&mut self) -> Option<Self::Item> {
87        while self.index < self.tokens.len() {
88            if let Some(len) = self
89                .pattern
90                .matches(&self.tokens[self.index..], self.source)
91            {
92                let span = Span::new_with_len(self.index, len);
93                self.index += len.max(1);
94                return Some(span);
95            } else {
96                self.index += 1;
97            }
98        }
99
100        None
101    }
102}
103
104/// A simpler version of the [`Pattern`] trait that only matches a single
105/// token.
106pub trait SingleTokenPattern: LSend {
107    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
108}
109
110impl<S: SingleTokenPattern> Pattern for S {
111    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
112        if self.matches_token(tokens.first()?, source) {
113            Some(1)
114        } else {
115            None
116        }
117    }
118}
119
120impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
121    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
122        self(token, source)
123    }
124}
125
126pub trait DocPattern {
127    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>>;
128}
129
130impl<P: PatternExt> DocPattern for P {
131    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>> {
132        self.find_all_matches(document.get_tokens(), document.get_source())
133    }
134}