harper_core/patterns/
mod.rs

1//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
2//!
3//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
4//! build Harper [rules](crate::linting::Linter).
5//!
6//! See the page about [`SequencePattern`] for a concrete example of their use.
7
8use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod indefinite_article;
19mod invert;
20mod is_not_title_case;
21mod naive_pattern_group;
22mod noun_phrase;
23mod repeating_pattern;
24mod sequence_pattern;
25mod similar_to_phrase;
26mod singular_subject;
27mod split_compound_word;
28mod token_kind_pattern_group;
29mod whitespace_pattern;
30mod within_edit_distance;
31mod word_pattern_group;
32mod word_set;
33
34pub use all::All;
35pub use any_capitalization::AnyCapitalization;
36pub use any_pattern::AnyPattern;
37use blanket::blanket;
38pub use consumes_remaining_pattern::ConsumesRemainingPattern;
39pub use either_pattern::EitherPattern;
40pub use exact_phrase::ExactPhrase;
41pub use indefinite_article::IndefiniteArticle;
42pub use invert::Invert;
43pub use is_not_title_case::IsNotTitleCase;
44pub use naive_pattern_group::NaivePatternGroup;
45pub use noun_phrase::NounPhrase;
46pub use repeating_pattern::RepeatingPattern;
47pub use sequence_pattern::SequencePattern;
48pub use similar_to_phrase::SimilarToPhrase;
49pub use singular_subject::SingularSubject;
50pub use split_compound_word::SplitCompoundWord;
51pub use token_kind_pattern_group::TokenKindPatternGroup;
52pub use whitespace_pattern::WhitespacePattern;
53pub use word_pattern_group::WordPatternGroup;
54pub use word_set::WordSet;
55
56#[cfg(not(feature = "concurrent"))]
57#[blanket(derive(Rc, Arc))]
58pub trait Pattern {
59    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
60}
61
62#[cfg(feature = "concurrent")]
63#[blanket(derive(Arc))]
64pub trait Pattern: Send + Sync {
65    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
66}
67
68pub trait PatternExt {
69    /// Search through all tokens to locate all non-overlapping pattern matches.
70    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
71}
72
73impl<P> PatternExt for P
74where
75    P: Pattern,
76{
77    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
78        let mut found = Vec::new();
79
80        for i in 0..tokens.len() {
81            let len = self.matches(&tokens[i..], source);
82
83            if len > 0 {
84                found.push(Span::new_with_len(i, len));
85            }
86        }
87
88        if found.len() < 2 {
89            return found;
90        }
91
92        let mut remove_indices = VecDeque::new();
93
94        for i in 0..found.len() - 1 {
95            let cur = &found[i];
96            let next = &found[i + 1];
97
98            if cur.overlaps_with(*next) {
99                remove_indices.push_back(i + 1);
100            }
101        }
102
103        found.remove_indices(remove_indices);
104
105        found
106    }
107}
108
109pub trait OwnedPatternExt {
110    fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
111}
112
113impl<P> OwnedPatternExt for P
114where
115    P: Pattern + 'static,
116{
117    fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
118        EitherPattern::new(vec![Box::new(self), other])
119    }
120}
121
122#[cfg(feature = "concurrent")]
123impl<F> Pattern for F
124where
125    F: Fn(&Token, &[char]) -> bool,
126    F: Send + Sync,
127{
128    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
129        if tokens.is_empty() {
130            return 0;
131        }
132
133        let tok = &tokens[0];
134
135        if self(tok, source) {
136            1
137        } else {
138            0
139        }
140    }
141}
142
143#[cfg(not(feature = "concurrent"))]
144impl<F> Pattern for F
145where
146    F: Fn(&Token, &[char]) -> bool,
147{
148    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
149        if tokens.is_empty() {
150            return 0;
151        }
152
153        let tok = &tokens[0];
154
155        if self(tok, source) {
156            1
157        } else {
158            0
159        }
160    }
161}
162
163pub trait DocPattern {
164    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
165}
166
167impl<P: PatternExt> DocPattern for P {
168    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
169        self.find_all_matches(document.get_tokens(), document.get_source())
170    }
171}