harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod all;
11mod any_pattern;
12mod either_pattern;
13mod fixed_phrase;
14mod implies_quantity;
15mod indefinite_article;
16mod inflection_of_be;
17mod invert;
18mod mergeable_words;
19mod naive_pattern_group;
20mod nominal_phrase;
21mod pattern_map;
22mod repeating_pattern;
23mod sequence_pattern;
24mod similar_to_phrase;
25mod spelled_number_pattern;
26mod whitespace_pattern;
27mod within_edit_distance;
28mod word;
29mod word_pattern_group;
30mod word_set;
31
32pub use all::All;
33pub use any_pattern::AnyPattern;
34use blanket::blanket;
35pub use either_pattern::EitherPattern;
36pub use fixed_phrase::FixedPhrase;
37pub use implies_quantity::ImpliesQuantity;
38pub use indefinite_article::IndefiniteArticle;
39pub use inflection_of_be::InflectionOfBe;
40pub use invert::Invert;
41pub use mergeable_words::MergeableWords;
42pub use naive_pattern_group::NaivePatternGroup;
43pub use nominal_phrase::NominalPhrase;
44pub use pattern_map::PatternMap;
45pub use repeating_pattern::RepeatingPattern;
46pub use sequence_pattern::SequencePattern;
47pub use similar_to_phrase::SimilarToPhrase;
48pub use spelled_number_pattern::SpelledNumberPattern;
49pub use whitespace_pattern::WhitespacePattern;
50pub use word::Word;
51pub use word_pattern_group::WordPatternGroup;
52pub use word_set::WordSet;
53
54#[cfg_attr(feature = "concurrent", blanket(derive(Arc)))]
55#[cfg_attr(not(feature = "concurrent"), blanket(derive(Rc, Arc)))]
56pub trait Pattern: LSend {
57    /// Check if the pattern matches at the start of the given token slice.
58    ///
59    /// Returns the length of the match if successful, or `None` if not.
60    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
61}
62
63pub trait PatternExt {
64    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
65
66    /// Search through all tokens to locate all non-overlapping pattern matches.
67    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
68        self.iter_matches(tokens, source).collect()
69    }
70}
71
72impl<P> PatternExt for P
73where
74    P: Pattern + ?Sized,
75{
76    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
77        MatchIter::new(self, tokens, source)
78    }
79}
80
81struct MatchIter<'a, 'b, 'c, P: ?Sized> {
82    pattern: &'a P,
83    tokens: &'b [Token],
84    source: &'c [char],
85    index: usize,
86}
87impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
88where
89    P: Pattern + ?Sized,
90{
91    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
92        Self {
93            pattern,
94            tokens,
95            source,
96            index: 0,
97        }
98    }
99}
100impl<P> Iterator for MatchIter<'_, '_, '_, P>
101where
102    P: Pattern + ?Sized,
103{
104    type Item = Span;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        while self.index < self.tokens.len() {
108            if let Some(len) = self
109                .pattern
110                .matches(&self.tokens[self.index..], self.source)
111            {
112                let span = Span::new_with_len(self.index, len);
113                self.index += len.max(1);
114                return Some(span);
115            } else {
116                self.index += 1;
117            }
118        }
119
120        None
121    }
122}
123
124pub trait OwnedPatternExt {
125    fn or(self, other: impl Pattern + 'static) -> EitherPattern;
126}
127
128impl<P> OwnedPatternExt for P
129where
130    P: Pattern + 'static,
131{
132    fn or(self, other: impl Pattern + 'static) -> EitherPattern {
133        EitherPattern::new(vec![Box::new(self), Box::new(other)])
134    }
135}
136
137/// A simpler version of the [`Pattern`] trait that only matches a single
138/// token.
139pub trait SingleTokenPattern: LSend {
140    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
141}
142
143impl<S: SingleTokenPattern> Pattern for S {
144    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
145        if self.matches_token(tokens.first()?, source) {
146            Some(1)
147        } else {
148            None
149        }
150    }
151}
152
153impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
154    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
155        self(token, source)
156    }
157}
158
159pub trait DocPattern {
160    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
161}
162
163impl<P: PatternExt> DocPattern for P {
164    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
165        self.find_all_matches(document.get_tokens(), document.get_source())
166    }
167}