harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners. They are a simplified abstraction over [`Expr`](crate::expr::Expr).
2//!
3//! Through the [`ExprLinter`](crate::linting::ExprLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequenceExpr`](crate::expr::SequenceExpr) for a concrete example of their use.
7
8use crate::{Document, LSend, Span, Token};
9
10mod any_pattern;
11mod implies_quantity;
12mod indefinite_article;
13mod inflection_of_be;
14mod invert;
15mod modal_verb;
16mod nominal_phrase;
17mod prepositional_preceder;
18mod upos_set;
19mod whitespace_pattern;
20mod within_edit_distance;
21mod word;
22mod word_set;
23
24pub use any_pattern::AnyPattern;
25pub use implies_quantity::ImpliesQuantity;
26pub use indefinite_article::IndefiniteArticle;
27pub use inflection_of_be::InflectionOfBe;
28pub use invert::Invert;
29pub use modal_verb::ModalVerb;
30pub use nominal_phrase::NominalPhrase;
31pub use prepositional_preceder::{PrepositionalPrecederPattern, prepositional_preceder};
32pub use upos_set::UPOSSet;
33pub use whitespace_pattern::WhitespacePattern;
34pub use within_edit_distance::WithinEditDistance;
35pub use word::Word;
36pub use word_set::WordSet;
37
38pub trait Pattern: LSend {
39    /// Check if the pattern matches at the start of the given token slice.
40    ///
41    /// Returns the length of the match if successful, or `None` if not.
42    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
43}
44
45pub trait PatternExt {
46    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>>;
47
48    /// Search through all tokens to locate all non-overlapping pattern matches.
49    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span<Token>> {
50        self.iter_matches(tokens, source).collect()
51    }
52}
53
54impl<P> PatternExt for P
55where
56    P: Pattern + ?Sized,
57{
58    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span<Token>> {
59        MatchIter::new(self, tokens, source)
60    }
61}
62
63struct MatchIter<'a, 'b, 'c, P: ?Sized> {
64    pattern: &'a P,
65    tokens: &'b [Token],
66    source: &'c [char],
67    index: usize,
68}
69impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
70where
71    P: Pattern + ?Sized,
72{
73    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
74        Self {
75            pattern,
76            tokens,
77            source,
78            index: 0,
79        }
80    }
81}
82impl<P> Iterator for MatchIter<'_, '_, '_, P>
83where
84    P: Pattern + ?Sized,
85{
86    type Item = Span<Token>;
87
88    fn next(&mut self) -> Option<Self::Item> {
89        while self.index < self.tokens.len() {
90            if let Some(len) = self
91                .pattern
92                .matches(&self.tokens[self.index..], self.source)
93            {
94                let span = Span::new_with_len(self.index, len);
95                self.index += len.max(1);
96                return Some(span);
97            } else {
98                self.index += 1;
99            }
100        }
101
102        None
103    }
104}
105
106/// A simpler version of the [`Pattern`] trait that only matches a single
107/// token.
108pub trait SingleTokenPattern: LSend {
109    fn matches_token(&self, token: &Token, source: &[char]) -> bool;
110}
111
112impl<S: SingleTokenPattern> Pattern for S {
113    fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
114        if self.matches_token(tokens.first()?, source) {
115            Some(1)
116        } else {
117            None
118        }
119    }
120}
121
122impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
123    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
124        self(token, source)
125    }
126}
127
128pub trait DocPattern {
129    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>>;
130}
131
132impl<P: PatternExt> DocPattern for P {
133    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span<Token>> {
134        self.find_all_matches(document.get_tokens(), document.get_source())
135    }
136}