harper_core/patterns/
mod.rs1use crate::{Document, LSend, Span, Token};
9
10mod any_pattern;
11mod implies_quantity;
12mod indefinite_article;
13mod inflection_of_be;
14mod invert;
15mod nominal_phrase;
16mod upos_set;
17mod whitespace_pattern;
18mod within_edit_distance;
19mod word;
20mod word_set;
21
22pub use any_pattern::AnyPattern;
23pub use implies_quantity::ImpliesQuantity;
24pub use indefinite_article::IndefiniteArticle;
25pub use inflection_of_be::InflectionOfBe;
26pub use invert::Invert;
27pub use nominal_phrase::NominalPhrase;
28pub use upos_set::UPOSSet;
29pub use whitespace_pattern::WhitespacePattern;
30pub use within_edit_distance::WithinEditDistance;
31pub use word::Word;
32pub use word_set::WordSet;
33
34pub trait Pattern: LSend {
35 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize>;
39}
40
41pub trait PatternExt {
42 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
43
44 fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
46 self.iter_matches(tokens, source).collect()
47 }
48}
49
50impl<P> PatternExt for P
51where
52 P: Pattern + ?Sized,
53{
54 fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
55 MatchIter::new(self, tokens, source)
56 }
57}
58
59struct MatchIter<'a, 'b, 'c, P: ?Sized> {
60 pattern: &'a P,
61 tokens: &'b [Token],
62 source: &'c [char],
63 index: usize,
64}
65impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
66where
67 P: Pattern + ?Sized,
68{
69 fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
70 Self {
71 pattern,
72 tokens,
73 source,
74 index: 0,
75 }
76 }
77}
78impl<P> Iterator for MatchIter<'_, '_, '_, P>
79where
80 P: Pattern + ?Sized,
81{
82 type Item = Span;
83
84 fn next(&mut self) -> Option<Self::Item> {
85 while self.index < self.tokens.len() {
86 if let Some(len) = self
87 .pattern
88 .matches(&self.tokens[self.index..], self.source)
89 {
90 let span = Span::new_with_len(self.index, len);
91 self.index += len.max(1);
92 return Some(span);
93 } else {
94 self.index += 1;
95 }
96 }
97
98 None
99 }
100}
101
102pub trait SingleTokenPattern: LSend {
105 fn matches_token(&self, token: &Token, source: &[char]) -> bool;
106}
107
108impl<S: SingleTokenPattern> Pattern for S {
109 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
110 if self.matches_token(tokens.first()?, source) {
111 Some(1)
112 } else {
113 None
114 }
115 }
116}
117
118impl<F: LSend + Fn(&Token, &[char]) -> bool> SingleTokenPattern for F {
119 fn matches_token(&self, token: &Token, source: &[char]) -> bool {
120 self(token, source)
121 }
122}
123
124pub trait DocPattern {
125 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
126}
127
128impl<P: PatternExt> DocPattern for P {
129 fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
130 self.find_all_matches(document.get_tokens(), document.get_source())
131 }
132}