harper_core/patterns/
mod.rs1use crate::{Document, LSend, Span, Token};
9
10mod all;
11mod any_pattern;
12mod either_pattern;
13mod exact_phrase;
14mod implies_quantity;
15mod indefinite_article;
16mod inflection_of_be;
17mod invert;
18mod naive_pattern_group;
19mod nominal_phrase;
20mod pattern_map;
21mod repeating_pattern;
22mod sequence_pattern;
23mod similar_to_phrase;
24mod split_compound_word;
25mod whitespace_pattern;
26mod within_edit_distance;
27mod word;
28mod word_pattern_group;
29mod word_set;
30
31pub use all::All;
32pub use any_pattern::AnyPattern;
33use blanket::blanket;
34pub use either_pattern::EitherPattern;
35pub use exact_phrase::ExactPhrase;
36pub use implies_quantity::ImpliesQuantity;
37pub use indefinite_article::IndefiniteArticle;
38pub use inflection_of_be::InflectionOfBe;
39pub use invert::Invert;
40pub use naive_pattern_group::NaivePatternGroup;
41pub use nominal_phrase::NominalPhrase;
42pub use pattern_map::PatternMap;
43pub use repeating_pattern::RepeatingPattern;
44pub use sequence_pattern::SequencePattern;
45pub use similar_to_phrase::SimilarToPhrase;
46pub use split_compound_word::SplitCompoundWord;
47pub use whitespace_pattern::WhitespacePattern;
48pub use word::Word;
49pub use word_pattern_group::WordPatternGroup;
50pub use word_set::WordSet;
51
52#[cfg_attr(feature = "concurrent", blanket(derive(Arc)))]
53#[cfg_attr(not(feature = "concurrent"), blanket(derive(Rc, Arc)))]
54pub trait Pattern: LSend {
55 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
59}
60
61pub trait PatternExt {
62 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
63
64 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
66 self.iter_matches(tokens, source).collect()
67 }
68}
69
70impl<P> PatternExt for P
71where
72 P: Pattern + ?Sized,
73{
74 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
75 MatchIter::new(self, tokens, source)
76 }
77}
78
79struct MatchIter<'a, 'b, 'c, P: ?Sized> {
80 pattern: &'a P,
81 tokens: &'b [Token],
82 source: &'c [char],
83 index: usize,
84}
85impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
86where
87 P: Pattern + ?Sized,
88{
89 fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
90 Self {
91 pattern,
92 tokens,
93 source,
94 index: 0,
95 }
96 }
97}
98impl<P> Iterator for MatchIter<'_, '_, '_, P>
99where
100 P: Pattern + ?Sized,
101{
102 type Item = Span;
103
104 fn next(&mut self) -> Option<Self::Item> {
105 while self.index < self.tokens.len() {
106 if let Some(len) = self
107 .pattern
108 .matches(&self.tokens[self.index..], self.source)
109 {
110 let span = Span::new_with_len(self.index, len);
111 self.index += len.max(1);
112 return Some(span);
113 } else {
114 self.index += 1;
115 }
116 }
117
118 None
119 }
120}
121
122pub trait OwnedPatternExt {
123 fn or(self, other: impl Pattern + 'static) -> EitherPattern;
124}
125
126impl<P> OwnedPatternExt for P
127where
128 P: Pattern + 'static,
129{
130 fn or(self, other: impl Pattern + 'static) -> EitherPattern {
131 EitherPattern::new(vec![Box::new(self), Box::new(other)])
132 }
133}
134
135pub trait SingleTokenPattern: LSend {
138 fn matches_token(&self, token: &Token, source: &[char]) -> bool;
139}
140
141impl<S: SingleTokenPattern> Pattern for S {
142 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
143 if self.matches_token(tokens.first()?, source) {
144 Some(1)
145 } else {
146 None
147 }
148 }
149}
150
151impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
152 fn matches_token(&self, token: &Token, source: &[char]) -> bool {
153 self(token, source)
154 }
155}
156
157pub trait DocPattern {
158 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
159}
160
161impl<P: PatternExt> DocPattern for P {
162 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
163 self.find_all_matches(document.get_tokens(), document.get_source())
164 }
165}