harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod implies_quantity;
19mod indefinite_article;
20mod invert;
21mod is_not_title_case;
22mod naive_pattern_group;
23mod nominal_phrase;
24mod pattern_map;
25mod repeating_pattern;
26mod sequence_pattern;
27mod similar_to_phrase;
28mod singular_subject;
29mod split_compound_word;
30mod token_kind_pattern_group;
31mod whitespace_pattern;
32mod within_edit_distance;
33mod word_pattern_group;
34mod word_set;
35
36pub use all::All;
37pub use any_capitalization::AnyCapitalization;
38pub use any_pattern::AnyPattern;
39use blanket::blanket;
40pub use consumes_remaining_pattern::ConsumesRemainingPattern;
41pub use either_pattern::EitherPattern;
42pub use exact_phrase::ExactPhrase;
43pub use implies_quantity::ImpliesQuantity;
44pub use indefinite_article::IndefiniteArticle;
45pub use invert::Invert;
46pub use is_not_title_case::IsNotTitleCase;
47pub use naive_pattern_group::NaivePatternGroup;
48pub use nominal_phrase::NominalPhrase;
49pub use pattern_map::PatternMap;
50pub use repeating_pattern::RepeatingPattern;
51pub use sequence_pattern::SequencePattern;
52pub use similar_to_phrase::SimilarToPhrase;
53pub use singular_subject::SingularSubject;
54pub use split_compound_word::SplitCompoundWord;
55pub use token_kind_pattern_group::TokenKindPatternGroup;
56pub use whitespace_pattern::WhitespacePattern;
57pub use word_pattern_group::WordPatternGroup;
58pub use word_set::WordSet;
59
60#[cfg(not(feature = "concurrent"))]
61#[blanket(derive(Rc, Arc))]
62pub trait Pattern {
63    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
64}
65
66#[cfg(feature = "concurrent")]
67#[blanket(derive(Arc))]
68pub trait Pattern: Send + Sync {
69    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
70}
71
72pub trait PatternExt {
73    /// Search through all tokens to locate all non-overlapping pattern matches.
74    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
75}
76
77impl<P> PatternExt for P
78where
79    P: Pattern,
80{
81    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
82        let mut found = Vec::new();
83
84        for i in 0..tokens.len() {
85            let len = self.matches(&tokens[i..], source);
86
87            if len > 0 {
88                found.push(Span::new_with_len(i, len));
89            }
90        }
91
92        if found.len() < 2 {
93            return found;
94        }
95
96        let mut remove_indices = VecDeque::new();
97
98        for i in 0..found.len() - 1 {
99            let cur = &found[i];
100            let next = &found[i + 1];
101
102            if cur.overlaps_with(*next) {
103                remove_indices.push_back(i + 1);
104            }
105        }
106
107        found.remove_indices(remove_indices);
108
109        found
110    }
111}
112
113pub trait OwnedPatternExt {
114    fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
115}
116
117impl<P> OwnedPatternExt for P
118where
119    P: Pattern + 'static,
120{
121    fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
122        EitherPattern::new(vec![Box::new(self), other])
123    }
124}
125
126#[cfg(feature = "concurrent")]
127impl<F> Pattern for F
128where
129    F: Fn(&Token, &[char]) -> bool,
130    F: Send + Sync,
131{
132    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
133        if tokens.is_empty() {
134            return 0;
135        }
136
137        let tok = &tokens[0];
138
139        if self(tok, source) { 1 } else { 0 }
140    }
141}
142
143#[cfg(not(feature = "concurrent"))]
144impl<F> Pattern for F
145where
146    F: Fn(&Token, &[char]) -> bool,
147{
148    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
149        if tokens.is_empty() {
150            return 0;
151        }
152
153        let tok = &tokens[0];
154
155        if self(tok, source) { 1 } else { 0 }
156    }
157}
158
159pub trait DocPattern {
160    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
161}
162
163impl<P: PatternExt> DocPattern for P {
164    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
165        self.find_all_matches(document.get_tokens(), document.get_source())
166    }
167}