harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod implies_quantity;
19mod indefinite_article;
20mod invert;
21mod is_not_title_case;
22mod naive_pattern_group;
23mod nominal_phrase;
24mod pattern_map;
25mod repeating_pattern;
26mod sequence_pattern;
27mod similar_to_phrase;
28mod split_compound_word;
29mod token_kind_pattern_group;
30mod whitespace_pattern;
31mod within_edit_distance;
32mod word_pattern_group;
33mod word_set;
34
35pub use all::All;
36pub use any_capitalization::AnyCapitalization;
37pub use any_pattern::AnyPattern;
38use blanket::blanket;
39pub use consumes_remaining_pattern::ConsumesRemainingPattern;
40pub use either_pattern::EitherPattern;
41pub use exact_phrase::ExactPhrase;
42pub use implies_quantity::ImpliesQuantity;
43pub use indefinite_article::IndefiniteArticle;
44pub use invert::Invert;
45pub use is_not_title_case::IsNotTitleCase;
46pub use naive_pattern_group::NaivePatternGroup;
47pub use nominal_phrase::NominalPhrase;
48pub use pattern_map::PatternMap;
49pub use repeating_pattern::RepeatingPattern;
50pub use sequence_pattern::SequencePattern;
51pub use similar_to_phrase::SimilarToPhrase;
52pub use split_compound_word::SplitCompoundWord;
53pub use token_kind_pattern_group::TokenKindPatternGroup;
54pub use whitespace_pattern::WhitespacePattern;
55pub use word_pattern_group::WordPatternGroup;
56pub use word_set::WordSet;
57
58#[cfg(not(feature = "concurrent"))]
59#[blanket(derive(Rc, Arc))]
60pub trait Pattern {
61    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
62}
63
64#[cfg(feature = "concurrent")]
65#[blanket(derive(Arc))]
66pub trait Pattern: Send + Sync {
67    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
68}
69
70pub trait PatternExt {
71    /// Search through all tokens to locate all non-overlapping pattern matches.
72    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
73}
74
75impl<P> PatternExt for P
76where
77    P: Pattern,
78{
79    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
80        let mut found = Vec::new();
81
82        for i in 0..tokens.len() {
83            let len = self.matches(&tokens[i..], source);
84
85            if len > 0 {
86                found.push(Span::new_with_len(i, len));
87            }
88        }
89
90        if found.len() < 2 {
91            return found;
92        }
93
94        let mut remove_indices = VecDeque::new();
95
96        for i in 0..found.len() - 1 {
97            let cur = &found[i];
98            let next = &found[i + 1];
99
100            if cur.overlaps_with(*next) {
101                remove_indices.push_back(i + 1);
102            }
103        }
104
105        found.remove_indices(remove_indices);
106
107        found
108    }
109}
110
111pub trait OwnedPatternExt {
112    fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
113}
114
115impl<P> OwnedPatternExt for P
116where
117    P: Pattern + 'static,
118{
119    fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
120        EitherPattern::new(vec![Box::new(self), other])
121    }
122}
123
124#[cfg(feature = "concurrent")]
125impl<F> Pattern for F
126where
127    F: Fn(&Token, &[char]) -> bool,
128    F: Send + Sync,
129{
130    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
131        if tokens.is_empty() {
132            return 0;
133        }
134
135        let tok = &tokens[0];
136
137        if self(tok, source) { 1 } else { 0 }
138    }
139}
140
141#[cfg(not(feature = "concurrent"))]
142impl<F> Pattern for F
143where
144    F: Fn(&Token, &[char]) -> bool,
145{
146    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
147        if tokens.is_empty() {
148            return 0;
149        }
150
151        let tok = &tokens[0];
152
153        if self(tok, source) { 1 } else { 0 }
154    }
155}
156
157pub trait DocPattern {
158    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
159}
160
161impl<P: PatternExt> DocPattern for P {
162    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
163        self.find_all_matches(document.get_tokens(), document.get_source())
164    }
165}