harper_core/patterns/
mod.rs1use std::collections::VecDeque;
9
10use crate::{Document, Span, Token, VecExt};
11
12mod all;
13mod any_capitalization;
14mod any_pattern;
15mod consumes_remaining_pattern;
16mod either_pattern;
17mod exact_phrase;
18mod implies_quantity;
19mod indefinite_article;
20mod invert;
21mod is_not_title_case;
22mod naive_pattern_group;
23mod nominal_phrase;
24mod pattern_map;
25mod repeating_pattern;
26mod sequence_pattern;
27mod similar_to_phrase;
28mod singular_subject;
29mod split_compound_word;
30mod token_kind_pattern_group;
31mod whitespace_pattern;
32mod within_edit_distance;
33mod word_pattern_group;
34mod word_set;
35
36pub use all::All;
37pub use any_capitalization::AnyCapitalization;
38pub use any_pattern::AnyPattern;
39use blanket::blanket;
40pub use consumes_remaining_pattern::ConsumesRemainingPattern;
41pub use either_pattern::EitherPattern;
42pub use exact_phrase::ExactPhrase;
43pub use implies_quantity::ImpliesQuantity;
44pub use indefinite_article::IndefiniteArticle;
45pub use invert::Invert;
46pub use is_not_title_case::IsNotTitleCase;
47pub use naive_pattern_group::NaivePatternGroup;
48pub use nominal_phrase::NominalPhrase;
49pub use pattern_map::PatternMap;
50pub use repeating_pattern::RepeatingPattern;
51pub use sequence_pattern::SequencePattern;
52pub use similar_to_phrase::SimilarToPhrase;
53pub use singular_subject::SingularSubject;
54pub use split_compound_word::SplitCompoundWord;
55pub use token_kind_pattern_group::TokenKindPatternGroup;
56pub use whitespace_pattern::WhitespacePattern;
57pub use word_pattern_group::WordPatternGroup;
58pub use word_set::WordSet;
59
60#[cfg(not(feature = "concurrent"))]
61#[blanket(derive(Rc, Arc))]
62pub trait Pattern {
63 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
64}
65
66#[cfg(feature = "concurrent")]
67#[blanket(derive(Arc))]
68pub trait Pattern: Send + Sync {
69 fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
70}
71
72pub trait PatternExt {
73 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
75}
76
77impl<P> PatternExt for P
78where
79 P: Pattern,
80{
81 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
82 let mut found = Vec::new();
83
84 for i in 0..tokens.len() {
85 let len = self.matches(&tokens[i..], source);
86
87 if len > 0 {
88 found.push(Span::new_with_len(i, len));
89 }
90 }
91
92 if found.len() < 2 {
93 return found;
94 }
95
96 let mut remove_indices = VecDeque::new();
97
98 for i in 0..found.len() - 1 {
99 let cur = &found[i];
100 let next = &found[i + 1];
101
102 if cur.overlaps_with(*next) {
103 remove_indices.push_back(i + 1);
104 }
105 }
106
107 found.remove_indices(remove_indices);
108
109 found
110 }
111}
112
113pub trait OwnedPatternExt {
114 fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
115}
116
117impl<P> OwnedPatternExt for P
118where
119 P: Pattern + 'static,
120{
121 fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
122 EitherPattern::new(vec![Box::new(self), other])
123 }
124}
125
126#[cfg(feature = "concurrent")]
127impl<F> Pattern for F
128where
129 F: Fn(&Token, &[char]) -> bool,
130 F: Send + Sync,
131{
132 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
133 if tokens.is_empty() {
134 return 0;
135 }
136
137 let tok = &tokens[0];
138
139 if self(tok, source) { 1 } else { 0 }
140 }
141}
142
143#[cfg(not(feature = "concurrent"))]
144impl<F> Pattern for F
145where
146 F: Fn(&Token, &[char]) -> bool,
147{
148 fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
149 if tokens.is_empty() {
150 return 0;
151 }
152
153 let tok = &tokens[0];
154
155 if self(tok, source) { 1 } else { 0 }
156 }
157}
158
159pub trait DocPattern {
160 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
161}
162
163impl<P: PatternExt> DocPattern for P {
164 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
165 self.find_all_matches(document.get_tokens(), document.get_source())
166 }
167}