harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod any_pattern;
11mod implies_quantity;
12mod indefinite_article;
13mod inflection_of_be;
14mod invert;
15mod nominal_phrase;
16mod upos_set;
17mod whitespace_pattern;
18mod within_edit_distance;
19mod word;
20mod word_set;
21
22pub use any_pattern::AnyPattern;
23pub use implies_quantity::ImpliesQuantity;
24pub use indefinite_article::IndefiniteArticle;
25pub use inflection_of_be::InflectionOfBe;
26pub use invert::Invert;
27pub use nominal_phrase::NominalPhrase;
28pub use upos_set::UPOSSet;
29pub use whitespace_pattern::WhitespacePattern;
30pub use within_edit_distance::WithinEditDistance;
31pub use word::Word;
32pub use word_set::WordSet;
33
34pub trait Pattern: LSend {
35    /// Check if the pattern matches at the start of the given token slice.
36    ///
37    /// Returns the length of the match if successful, or `None` if not.
38    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
39}
40
41pub trait PatternExt {
42    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
43
44    /// Search through all tokens to locate all non-overlapping pattern matches.
45    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
46        self.iter_matches(tokens, source).collect()
47    }
48}
49
50impl<P> PatternExt for P
51where
52    P: Pattern + ?Sized,
53{
54    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
55        MatchIter::new(self, tokens, source)
56    }
57}
58
59struct MatchIter<'a, 'b, 'c, P: ?Sized> {
60    pattern: &'a P,
61    tokens: &'b [Token],
62    source: &'c [char],
63    index: usize,
64}
65impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
66where
67    P: Pattern + ?Sized,
68{
69    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
70        Self {
71            pattern,
72            tokens,
73            source,
74            index: 0,
75        }
76    }
77}
78impl<P> Iterator for MatchIter<'_, '_, '_, P>
79where
80    P: Pattern + ?Sized,
81{
82    type Item = Span;
83
84    fn next(&mut self) -> Option<Self::Item> {
85        while self.index < self.tokens.len() {
86            if let Some(len) = self
87                .pattern
88                .matches(&self.tokens[self.index..], self.source)
89            {
90                let span = Span::new_with_len(self.index, len);
91                self.index += len.max(1);
92                return Some(span);
93            } else {
94                self.index += 1;
95            }
96        }
97
98        None
99    }
100}
101
102/// A simpler version of the [`Pattern`] trait that only matches a single
103/// token.
104pub trait SingleTokenPattern: LSend {
105    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
106}
107
108impl<S: SingleTokenPattern> Pattern for S {
109    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
110        if self.matches_token(tokens.first()?, source) {
111            Some(1)
112        } else {
113            None
114        }
115    }
116}
117
118impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
119    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
120        self(token, source)
121    }
122}
123
124pub trait DocPattern {
125    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
126}
127
128impl<P: PatternExt> DocPattern for P {
129    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
130        self.find_all_matches(document.get_tokens(), document.get_source())
131    }
132}