harper_core/patterns/
mod.rs1use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod indefinite_article;
19mod invert;
20mod is_not_title_case;
21mod naive_pattern_group;
22mod noun_phrase;
23mod repeating_pattern;
24mod sequence_pattern;
25mod similar_to_phrase;
26mod singular_subject;
27mod split_compound_word;
28mod token_kind_pattern_group;
29mod whitespace_pattern;
30mod within_edit_distance;
31mod word_pattern_group;
32mod word_set;
33
34pub use all::All;
35pub use any_capitalization::AnyCapitalization;
36pub use any_pattern::AnyPattern;
37use blanket::blanket;
38pub use consumes_remaining_pattern::ConsumesRemainingPattern;
39pub use either_pattern::EitherPattern;
40pub use exact_phrase::ExactPhrase;
41pub use indefinite_article::IndefiniteArticle;
42pub use invert::Invert;
43pub use is_not_title_case::IsNotTitleCase;
44pub use naive_pattern_group::NaivePatternGroup;
45pub use noun_phrase::NounPhrase;
46pub use repeating_pattern::RepeatingPattern;
47pub use sequence_pattern::SequencePattern;
48pub use similar_to_phrase::SimilarToPhrase;
49pub use singular_subject::SingularSubject;
50pub use split_compound_word::SplitCompoundWord;
51pub use token_kind_pattern_group::TokenKindPatternGroup;
52pub use whitespace_pattern::WhitespacePattern;
53pub use word_pattern_group::WordPatternGroup;
54pub use word_set::WordSet;
55
56#[cfg(not(feature = "concurrent"))]
57#[blanket(derive(Rc, Arc))]
58pub trait Pattern {
59 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
60}
61
62#[cfg(feature = "concurrent")]
63#[blanket(derive(Arc))]
64pub trait Pattern: Send + Sync {
65 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
66}
67
68pub trait PatternExt {
69 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
71}
72
73impl<P> PatternExt for P
74where
75 P: Pattern,
76{
77 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
78 let mut found = Vec::new();
79
80 for i in 0..tokens.len() {
81 let len = self.matches(&tokens[i..], source);
82
83 if len > 0 {
84 found.push(Span::new_with_len(i, len));
85 }
86 }
87
88 if found.len() < 2 {
89 return found;
90 }
91
92 let mut remove_indices = VecDeque::new();
93
94 for i in 0..found.len() - 1 {
95 let cur = &found[i];
96 let next = &found[i + 1];
97
98 if cur.overlaps_with(*next) {
99 remove_indices.push_back(i + 1);
100 }
101 }
102
103 found.remove_indices(remove_indices);
104
105 found
106 }
107}
108
109pub trait OwnedPatternExt {
110 fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
111}
112
113impl<P> OwnedPatternExt for P
114where
115 P: Pattern + 'static,
116{
117 fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
118 EitherPattern::new(vec![Box::new(self), other])
119 }
120}
121
122#[cfg(feature = "concurrent")]
123impl<F> Pattern for F
124where
125 F: Fn(&Token, &[char]) -> bool,
126 F: Send + Sync,
127{
128 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
129 if tokens.is_empty() {
130 return 0;
131 }
132
133 let tok = &tokens[0];
134
135 if self(tok, source) {
136 1
137 } else {
138 0
139 }
140 }
141}
142
143#[cfg(not(feature = "concurrent"))]
144impl<F> Pattern for F
145where
146 F: Fn(&Token, &[char]) -> bool,
147{
148 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
149 if tokens.is_empty() {
150 return 0;
151 }
152
153 let tok = &tokens[0];
154
155 if self(tok, source) {
156 1
157 } else {
158 0
159 }
160 }
161}
162
163pub trait DocPattern {
164 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
165}
166
167impl<P: PatternExt> DocPattern for P {
168 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
169 self.find_all_matches(document.get_tokens(), document.get_source())
170 }
171}