harper_core/patterns/
mod.rs1use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod implies_quantity;
19mod indefinite_article;
20mod invert;
21mod is_not_title_case;
22mod naive_pattern_group;
23mod nominal_phrase;
24mod pattern_map;
25mod repeating_pattern;
26mod sequence_pattern;
27mod similar_to_phrase;
28mod split_compound_word;
29mod token_kind_pattern_group;
30mod whitespace_pattern;
31mod within_edit_distance;
32mod word_pattern_group;
33mod word_set;
34
35pub use all::All;
36pub use any_capitalization::AnyCapitalization;
37pub use any_pattern::AnyPattern;
38use blanket::blanket;
39pub use consumes_remaining_pattern::ConsumesRemainingPattern;
40pub use either_pattern::EitherPattern;
41pub use exact_phrase::ExactPhrase;
42pub use implies_quantity::ImpliesQuantity;
43pub use indefinite_article::IndefiniteArticle;
44pub use invert::Invert;
45pub use is_not_title_case::IsNotTitleCase;
46pub use naive_pattern_group::NaivePatternGroup;
47pub use nominal_phrase::NominalPhrase;
48pub use pattern_map::PatternMap;
49pub use repeating_pattern::RepeatingPattern;
50pub use sequence_pattern::SequencePattern;
51pub use similar_to_phrase::SimilarToPhrase;
52pub use split_compound_word::SplitCompoundWord;
53pub use token_kind_pattern_group::TokenKindPatternGroup;
54pub use whitespace_pattern::WhitespacePattern;
55pub use word_pattern_group::WordPatternGroup;
56pub use word_set::WordSet;
57
58#[cfg(not(feature = "concurrent"))]
59#[blanket(derive(Rc, Arc))]
60pub trait Pattern {
61 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
62}
63
64#[cfg(feature = "concurrent")]
65#[blanket(derive(Arc))]
66pub trait Pattern: Send + Sync {
67 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
68}
69
70pub trait PatternExt {
71 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
73}
74
75impl<P> PatternExt for P
76where
77 P: Pattern,
78{
79 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
80 let mut found = Vec::new();
81
82 for i in 0..tokens.len() {
83 let len = self.matches(&tokens[i..], source);
84
85 if len > 0 {
86 found.push(Span::new_with_len(i, len));
87 }
88 }
89
90 if found.len() < 2 {
91 return found;
92 }
93
94 let mut remove_indices = VecDeque::new();
95
96 for i in 0..found.len() - 1 {
97 let cur = &found[i];
98 let next = &found[i + 1];
99
100 if cur.overlaps_with(*next) {
101 remove_indices.push_back(i + 1);
102 }
103 }
104
105 found.remove_indices(remove_indices);
106
107 found
108 }
109}
110
111pub trait OwnedPatternExt {
112 fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
113}
114
115impl<P> OwnedPatternExt for P
116where
117 P: Pattern + 'static,
118{
119 fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
120 EitherPattern::new(vec![Box::new(self), other])
121 }
122}
123
124#[cfg(feature = "concurrent")]
125impl<F> Pattern for F
126where
127 F: Fn(&Token, &[char]) -> bool,
128 F: Send + Sync,
129{
130 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
131 if tokens.is_empty() {
132 return 0;
133 }
134
135 let tok = &tokens[0];
136
137 if self(tok, source) { 1 } else { 0 }
138 }
139}
140
141#[cfg(not(feature = "concurrent"))]
142impl<F> Pattern for F
143where
144 F: Fn(&Token, &[char]) -> bool,
145{
146 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
147 if tokens.is_empty() {
148 return 0;
149 }
150
151 let tok = &tokens[0];
152
153 if self(tok, source) { 1 } else { 0 }
154 }
155}
156
157pub trait DocPattern {
158 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
159}
160
161impl<P: PatternExt> DocPattern for P {
162 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
163 self.find_all_matches(document.get_tokens(), document.get_source())
164 }
165}