harper_core/patterns/
mod.rs1use crate::{Document, LSend, Span, Token};
9
10mod all;
11mod any_pattern;
12mod first_match_of;
13mod fixed_phrase;
14mod implies_quantity;
15mod indefinite_article;
16mod inflection_of_be;
17mod invert;
18mod longest_match_of;
19mod mergeable_words;
20mod nominal_phrase;
21mod pattern_map;
22mod repeating_pattern;
23mod sequence_pattern;
24mod similar_to_phrase;
25mod spelled_number_pattern;
26mod time_unit_pattern;
27mod whitespace_pattern;
28mod within_edit_distance;
29mod word;
30mod word_pattern_group;
31mod word_set;
32
33pub use all::All;
34pub use any_pattern::AnyPattern;
35use blanket::blanket;
36pub use first_match_of::FirstMatchOf;
37pub use fixed_phrase::FixedPhrase;
38pub use implies_quantity::ImpliesQuantity;
39pub use indefinite_article::IndefiniteArticle;
40pub use inflection_of_be::InflectionOfBe;
41pub use invert::Invert;
42pub use longest_match_of::LongestMatchOf;
43pub use mergeable_words::MergeableWords;
44pub use nominal_phrase::NominalPhrase;
45pub use pattern_map::PatternMap;
46pub use repeating_pattern::RepeatingPattern;
47pub use sequence_pattern::SequencePattern;
48pub use similar_to_phrase::SimilarToPhrase;
49pub use spelled_number_pattern::SpelledNumberPattern;
50pub use time_unit_pattern::TimeUnitPattern;
51pub use whitespace_pattern::WhitespacePattern;
52pub use word::Word;
53pub use word_pattern_group::WordPatternGroup;
54pub use word_set::WordSet;
55
56#[cfg_attr(feature = "concurrent", blanket(derive(Arc)))]
57#[cfg_attr(not(feature = "concurrent"), blanket(derive(Rc, Arc)))]
58pub trait Pattern: LSend {
59 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
63}
64
65pub trait PatternExt {
66 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
67
68 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
70 self.iter_matches(tokens, source).collect()
71 }
72}
73
74impl<P> PatternExt for P
75where
76 P: Pattern + ?Sized,
77{
78 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
79 MatchIter::new(self, tokens, source)
80 }
81}
82
83struct MatchIter<'a, 'b, 'c, P: ?Sized> {
84 pattern: &'a P,
85 tokens: &'b [Token],
86 source: &'c [char],
87 index: usize,
88}
89impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
90where
91 P: Pattern + ?Sized,
92{
93 fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
94 Self {
95 pattern,
96 tokens,
97 source,
98 index: 0,
99 }
100 }
101}
102impl<P> Iterator for MatchIter<'_, '_, '_, P>
103where
104 P: Pattern + ?Sized,
105{
106 type Item = Span;
107
108 fn next(&mut self) -> Option<Self::Item> {
109 while self.index < self.tokens.len() {
110 if let Some(len) = self
111 .pattern
112 .matches(&self.tokens[self.index..], self.source)
113 {
114 let span = Span::new_with_len(self.index, len);
115 self.index += len.max(1);
116 return Some(span);
117 } else {
118 self.index += 1;
119 }
120 }
121
122 None
123 }
124}
125
126pub trait OwnedPatternExt {
127 fn or(self, other: impl Pattern + 'static) -> LongestMatchOf;
128}
129
130impl<P> OwnedPatternExt for P
131where
132 P: Pattern + 'static,
133{
134 fn or(self, other: impl Pattern + 'static) -> LongestMatchOf {
135 LongestMatchOf::new(vec![Box::new(self), Box::new(other)])
136 }
137}
138
139pub trait SingleTokenPattern: LSend {
142 fn matches_token(&self, token: &Token, source: &[char]) -> bool;
143}
144
145impl<S: SingleTokenPattern> Pattern for S {
146 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
147 if self.matches_token(tokens.first()?, source) {
148 Some(1)
149 } else {
150 None
151 }
152 }
153}
154
155impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
156 fn matches_token(&self, token: &Token, source: &[char]) -> bool {
157 self(token, source)
158 }
159}
160
161pub trait DocPattern {
162 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
163}
164
165impl<P: PatternExt> DocPattern for P {
166 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
167 self.find_all_matches(document.get_tokens(), document.get_source())
168 }
169}