harper_core/patterns/
mod.rs1use crate::{Document, Span, Token};
9
10mod all;
11mod any_pattern;
12mod either_pattern;
13mod exact_phrase;
14mod implies_quantity;
15mod indefinite_article;
16mod inflection_of_be;
17mod invert;
18mod naive_pattern_group;
19mod nominal_phrase;
20mod pattern_map;
21mod repeating_pattern;
22mod sequence_pattern;
23mod similar_to_phrase;
24mod split_compound_word;
25mod whitespace_pattern;
26mod within_edit_distance;
27mod word;
28mod word_pattern_group;
29mod word_set;
30
31pub use all::All;
32pub use any_pattern::AnyPattern;
33use blanket::blanket;
34pub use either_pattern::EitherPattern;
35pub use exact_phrase::ExactPhrase;
36pub use implies_quantity::ImpliesQuantity;
37pub use indefinite_article::IndefiniteArticle;
38pub use inflection_of_be::InflectionOfBe;
39pub use invert::Invert;
40pub use naive_pattern_group::NaivePatternGroup;
41pub use nominal_phrase::NominalPhrase;
42pub use pattern_map::PatternMap;
43pub use repeating_pattern::RepeatingPattern;
44pub use sequence_pattern::SequencePattern;
45pub use similar_to_phrase::SimilarToPhrase;
46pub use split_compound_word::SplitCompoundWord;
47pub use whitespace_pattern::WhitespacePattern;
48pub use word::Word;
49pub use word_pattern_group::WordPatternGroup;
50pub use word_set::WordSet;
51
52#[cfg(not(feature = "concurrent"))]
53#[blanket(derive(Rc, Arc))]
54pub trait Pattern {
55 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
59}
60#[cfg(feature = "concurrent")]
61#[blanket(derive(Arc))]
62pub trait Pattern: Send + Sync {
63 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
67}
68
69pub trait PatternExt {
70 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
71
72 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
74 self.iter_matches(tokens, source).collect()
75 }
76}
77
78impl<P> PatternExt for P
79where
80 P: Pattern + ?Sized,
81{
82 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
83 self.iter_matches(tokens, source).collect()
84 }
85 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
86 MatchIter::new(self, tokens, source)
87 }
88}
89
90struct MatchIter<'a, 'b, 'c, P: ?Sized> {
91 pattern: &'a P,
92 tokens: &'b [Token],
93 source: &'c [char],
94 index: usize,
95}
96impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
97where
98 P: Pattern + ?Sized,
99{
100 fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
101 Self {
102 pattern,
103 tokens,
104 source,
105 index: 0,
106 }
107 }
108}
109impl<P> Iterator for MatchIter<'_, '_, '_, P>
110where
111 P: Pattern + ?Sized,
112{
113 type Item = Span;
114
115 fn next(&mut self) -> Option<Self::Item> {
116 while self.index < self.tokens.len() {
117 if let Some(len) = self
118 .pattern
119 .matches(&self.tokens[self.index..], self.source)
120 {
121 let span = Span::new_with_len(self.index, len);
122 self.index += len.max(1);
123 return Some(span);
124 } else {
125 self.index += 1;
126 }
127 }
128
129 None
130 }
131}
132
133pub trait OwnedPatternExt {
134 fn or(self, other: Box<dyn Pattern>) -> EitherPattern;
135}
136
137impl<P> OwnedPatternExt for P
138where
139 P: Pattern + 'static,
140{
141 fn or(self, other: Box<dyn Pattern>) -> EitherPattern {
142 EitherPattern::new(vec![Box::new(self), other])
143 }
144}
145
146#[cfg(feature = "concurrent")]
147impl<F> Pattern for F
148where
149 F: Fn(&Token, &[char]) -> bool,
150 F: Send + Sync,
151{
152 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
153 if self(tokens.first()?, source) {
154 Some(1)
155 } else {
156 None
157 }
158 }
159}
160
161#[cfg(not(feature = "concurrent"))]
162impl<F> Pattern for F
163where
164 F: Fn(&Token, &[char]) -> bool,
165{
166 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
167 if self(tokens.first()?, source) {
168 Some(1)
169 } else {
170 None
171 }
172 }
173}
174
175pub trait DocPattern {
176 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
177}
178
179impl<P: PatternExt> DocPattern for P {
180 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
181 self.find_all_matches(document.get_tokens(), document.get_source())
182 }
183}