harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners. They are a simplified abstraction over [`Expr`](crate::expr::Expr).
2//!
3//! Through the [`ExprLinter`](crate::linting::ExprLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequenceExpr`](crate::expr::SequenceExpr) for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod any_pattern;
11mod derived_from;
12mod implies_quantity;
13mod indefinite_article;
14mod inflection_of_be;
15mod invert;
16mod modal_verb;
17mod nominal_phrase;
18mod prepositional_preceder;
19mod upos_set;
20mod whitespace_pattern;
21mod within_edit_distance;
22mod word;
23mod word_set;
24
25pub use any_pattern::AnyPattern;
26pub use derived_from::DerivedFrom;
27pub use implies_quantity::ImpliesQuantity;
28pub use indefinite_article::IndefiniteArticle;
29pub use inflection_of_be::InflectionOfBe;
30pub use invert::Invert;
31pub use modal_verb::ModalVerb;
32pub use nominal_phrase::NominalPhrase;
33pub use prepositional_preceder::{PrepositionalPrecederPattern, prepositional_preceder};
34pub use upos_set::UPOSSet;
35pub use whitespace_pattern::WhitespacePattern;
36pub use within_edit_distance::WithinEditDistance;
37pub use word::Word;
38pub use word_set::WordSet;
39
40pub trait Pattern: LSend {
41    /// Check if the pattern matches at the start of the given token slice.
42    ///
43    /// Returns the length of the match if successful, or `None` if not.
44    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
45}
46
47pub trait PatternExt {
48    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>>;
49
50    /// Search through all tokens to locate all non-overlapping pattern matches.
51    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span<Token>> {
52        self.iter_matches(tokens, source).collect()
53    }
54}
55
56impl<P> PatternExt for P
57where
58    P: Pattern + ?Sized,
59{
60    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>> {
61        MatchIter::new(self, tokens, source)
62    }
63}
64
65struct MatchIter<'a, 'b, 'c, P: ?Sized> {
66    pattern: &'a P,
67    tokens: &'b [Token],
68    source: &'c [char],
69    index: usize,
70}
71impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
72where
73    P: Pattern + ?Sized,
74{
75    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
76        Self {
77            pattern,
78            tokens,
79            source,
80            index: 0,
81        }
82    }
83}
84impl<P> Iterator for MatchIter<'_, '_, '_, P>
85where
86    P: Pattern + ?Sized,
87{
88    type Item = Span<Token>;
89
90    fn next(&mut self) -> Option<Self::Item> {
91        while self.index < self.tokens.len() {
92            if let Some(len) = self
93                .pattern
94                .matches(&self.tokens[self.index..], self.source)
95            {
96                let span = Span::new_with_len(self.index, len);
97                self.index += len.max(1);
98                return Some(span);
99            } else {
100                self.index += 1;
101            }
102        }
103
104        None
105    }
106}
107
108/// A simpler version of the [`Pattern`] trait that only matches a single
109/// token.
110pub trait SingleTokenPattern: LSend {
111    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
112}
113
114impl<S: SingleTokenPattern> Pattern for S {
115    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
116        if self.matches_token(tokens.first()?, source) {
117            Some(1)
118        } else {
119            None
120        }
121    }
122}
123
124impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
125    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
126        self(token, source)
127    }
128}
129
130pub trait DocPattern {
131    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>>;
132}
133
134impl<P: PatternExt> DocPattern for P {
135    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>> {
136        self.find_all_matches(document.get_tokens(), document.get_source())
137    }
138}