harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod all;
11mod any_pattern;
12mod first_match_of;
13mod fixed_phrase;
14mod implies_quantity;
15mod indefinite_article;
16mod inflection_of_be;
17mod invert;
18mod longest_match_of;
19mod mergeable_words;
20mod nominal_phrase;
21mod pattern_map;
22mod repeating_pattern;
23mod sequence_pattern;
24mod similar_to_phrase;
25mod spelled_number_pattern;
26mod time_unit_pattern;
27mod whitespace_pattern;
28mod within_edit_distance;
29mod word;
30mod word_pattern_group;
31mod word_set;
32
33pub use all::All;
34pub use any_pattern::AnyPattern;
35use blanket::blanket;
36pub use first_match_of::FirstMatchOf;
37pub use fixed_phrase::FixedPhrase;
38pub use implies_quantity::ImpliesQuantity;
39pub use indefinite_article::IndefiniteArticle;
40pub use inflection_of_be::InflectionOfBe;
41pub use invert::Invert;
42pub use longest_match_of::LongestMatchOf;
43pub use mergeable_words::MergeableWords;
44pub use nominal_phrase::NominalPhrase;
45pub use pattern_map::PatternMap;
46pub use repeating_pattern::RepeatingPattern;
47pub use sequence_pattern::SequencePattern;
48pub use similar_to_phrase::SimilarToPhrase;
49pub use spelled_number_pattern::SpelledNumberPattern;
50pub use time_unit_pattern::TimeUnitPattern;
51pub use whitespace_pattern::WhitespacePattern;
52pub use word::Word;
53pub use word_pattern_group::WordPatternGroup;
54pub use word_set::WordSet;
55
56#[cfg_attr(feature = "concurrent", blanket(derive(Arc)))]
57#[cfg_attr(not(feature = "concurrent"), blanket(derive(Rc, Arc)))]
58pub trait Pattern: LSend {
59    /// Check if the pattern matches at the start of the given token slice.
60    ///
61    /// Returns the length of the match if successful, or `None` if not.
62    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
63}
64
65pub trait PatternExt {
66    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
67
68    /// Search through all tokens to locate all non-overlapping pattern matches.
69    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
70        self.iter_matches(tokens, source).collect()
71    }
72}
73
74impl<P> PatternExt for P
75where
76    P: Pattern + ?Sized,
77{
78    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
79        MatchIter::new(self, tokens, source)
80    }
81}
82
83struct MatchIter<'a, 'b, 'c, P: ?Sized> {
84    pattern: &'a P,
85    tokens: &'b [Token],
86    source: &'c [char],
87    index: usize,
88}
89impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
90where
91    P: Pattern + ?Sized,
92{
93    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
94        Self {
95            pattern,
96            tokens,
97            source,
98            index: 0,
99        }
100    }
101}
102impl<P> Iterator for MatchIter<'_, '_, '_, P>
103where
104    P: Pattern + ?Sized,
105{
106    type Item = Span;
107
108    fn next(&mut self) -> Option<Self::Item> {
109        while self.index < self.tokens.len() {
110            if let Some(len) = self
111                .pattern
112                .matches(&self.tokens[self.index..], self.source)
113            {
114                let span = Span::new_with_len(self.index, len);
115                self.index += len.max(1);
116                return Some(span);
117            } else {
118                self.index += 1;
119            }
120        }
121
122        None
123    }
124}
125
126pub trait OwnedPatternExt {
127    fn or(self, other: impl Pattern + 'static) -> LongestMatchOf;
128}
129
130impl<P> OwnedPatternExt for P
131where
132    P: Pattern + 'static,
133{
134    fn or(self, other: impl Pattern + 'static) -> LongestMatchOf {
135        LongestMatchOf::new(vec![Box::new(self), Box::new(other)])
136    }
137}
138
139/// A simpler version of the [`Pattern`] trait that only matches a single
140/// token.
141pub trait SingleTokenPattern: LSend {
142    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
143}
144
145impl<S: SingleTokenPattern> Pattern for S {
146    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
147        if self.matches_token(tokens.first()?, source) {
148            Some(1)
149        } else {
150            None
151        }
152    }
153}
154
155impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
156    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
157        self(token, source)
158    }
159}
160
161pub trait DocPattern {
162    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
163}
164
165impl<P: PatternExt> DocPattern for P {
166    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
167        self.find_all_matches(document.get_tokens(), document.get_source())
168    }
169}