harper_core/
token_string_ext.rs1use crate::{Span, Token, TokenKind};
2use itertools::Itertools;
3use paste::paste;
4
5macro_rules! create_decl_for {
6 ($thing:ident) => {
7 paste! {
8 fn [< first_ $thing >](&self) -> Option<Token>;
9
10 fn [< last_ $thing >](&self) -> Option<Token>;
11
12 fn [< last_ $thing _index >](&self) -> Option<usize>;
13
14 fn [<iter_ $thing _indices>](&self) -> impl Iterator<Item = usize> + '_;
15
16 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = Token> + '_;
17 }
18 };
19}
20
21macro_rules! create_fns_for {
22 ($thing:ident) => {
23 paste! {
24 fn [< first_ $thing >](&self) -> Option<Token> {
25 self.iter().find(|v| v.kind.[<is_ $thing>]()).copied()
26 }
27
28 fn [< last_ $thing >](&self) -> Option<Token> {
29 self.iter().rev().find(|v| v.kind.[<is_ $thing>]()).copied()
30 }
31
32 fn [< last_ $thing _index >](&self) -> Option<usize> {
33 self.iter().rev().position(|v| v.kind.[<is_ $thing>]()).map(|i| self.len() - i - 1)
34 }
35
36 fn [<iter_ $thing _indices>](&self) -> impl Iterator<Item = usize> + '_ {
37 self.iter()
38 .enumerate()
39 .filter(|(_, t)| t.kind.[<is_ $thing>]())
40 .map(|(i, _)| i)
41 }
42
43 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = Token> + '_ {
44 self.[<iter_ $thing _indices>]().map(|i| self[i])
45 }
46 }
47 };
48}
49
50pub trait TokenStringExt {
52 fn first_sentence_word(&self) -> Option<Token>;
53 fn first_non_whitespace(&self) -> Option<Token>;
54 fn span(&self) -> Option<Span>;
57
58 create_decl_for!(word);
59 create_decl_for!(word_like);
60 create_decl_for!(conjunction);
61 create_decl_for!(space);
62 create_decl_for!(apostrophe);
63 create_decl_for!(pipe);
64 create_decl_for!(quote);
65 create_decl_for!(number);
66 create_decl_for!(at);
67 create_decl_for!(ellipsis);
68 create_decl_for!(unlintable);
69 create_decl_for!(sentence_terminator);
70 create_decl_for!(paragraph_break);
71 create_decl_for!(chunk_terminator);
72 create_decl_for!(punctuation);
73 create_decl_for!(currency);
74 create_decl_for!(likely_homograph);
75 create_decl_for!(comma);
76
77 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_;
78 fn iter_linking_verbs(&self) -> impl Iterator<Item = Token> + '_;
79
80 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
89
90 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
93
94 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
97}
98
99impl TokenStringExt for [Token] {
100 create_fns_for!(word);
101 create_fns_for!(word_like);
102 create_fns_for!(conjunction);
103 create_fns_for!(space);
104 create_fns_for!(apostrophe);
105 create_fns_for!(pipe);
106 create_fns_for!(quote);
107 create_fns_for!(number);
108 create_fns_for!(at);
109 create_fns_for!(punctuation);
110 create_fns_for!(ellipsis);
111 create_fns_for!(unlintable);
112 create_fns_for!(sentence_terminator);
113 create_fns_for!(paragraph_break);
114 create_fns_for!(chunk_terminator);
115 create_fns_for!(currency);
116 create_fns_for!(likely_homograph);
117 create_fns_for!(comma);
118
119 fn first_non_whitespace(&self) -> Option<Token> {
120 self.iter().find(|t| !t.kind.is_whitespace()).copied()
121 }
122
123 fn first_sentence_word(&self) -> Option<Token> {
124 let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?;
125
126 let Some(u_idx) = self.iter().position(|v| v.kind.is_unlintable()) else {
127 return Some(*word);
128 };
129
130 if w_idx < u_idx {
131 Some(*word)
132 } else {
133 None
134 }
135 }
136
137 fn span(&self) -> Option<Span> {
138 let min_max = self
139 .iter()
140 .flat_map(|v| [v.span.start, v.span.end].into_iter())
141 .minmax();
142
143 match min_max {
144 itertools::MinMaxResult::NoElements => None,
145 itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)),
146 itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)),
147 }
148 }
149
150 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_ {
151 self.iter_word_indices().filter(|idx| {
152 let word = self[*idx];
153 let TokenKind::Word(word) = word.kind else {
154 panic!("Should be unreachable.");
155 };
156
157 word.is_linking_verb()
158 })
159 }
160
161 fn iter_linking_verbs(&self) -> impl Iterator<Item = Token> + '_ {
162 self.iter_linking_verb_indices().map(|idx| self[idx])
163 }
164
165 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
166 let first_chunk = self
167 .iter_chunk_terminator_indices()
168 .next()
169 .map(|first_term| &self[0..=first_term]);
170
171 let rest = self
172 .iter_chunk_terminator_indices()
173 .tuple_windows()
174 .map(move |(a, b)| &self[a + 1..=b]);
175
176 let last = if let Some(last_i) = self.last_chunk_terminator_index() {
177 if last_i + 1 < self.len() {
178 Some(&self[last_i + 1..])
179 } else {
180 None
181 }
182 } else {
183 Some(self)
184 };
185
186 first_chunk.into_iter().chain(rest).chain(last)
187 }
188
189 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
190 let first_pg = self
191 .iter_paragraph_break_indices()
192 .next()
193 .map(|first_term| &self[0..=first_term]);
194
195 let rest = self
196 .iter_paragraph_break_indices()
197 .tuple_windows()
198 .map(move |(a, b)| &self[a + 1..=b]);
199
200 let last_pg = if let Some(last_i) = self.last_paragraph_break_index() {
201 if last_i + 1 < self.len() {
202 Some(&self[last_i + 1..])
203 } else {
204 None
205 }
206 } else {
207 Some(self)
208 };
209
210 first_pg.into_iter().chain(rest).chain(last_pg)
211 }
212
213 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
214 let first_sentence = self
215 .iter_sentence_terminator_indices()
216 .next()
217 .map(|first_term| &self[0..=first_term]);
218
219 let rest = self
220 .iter_sentence_terminator_indices()
221 .tuple_windows()
222 .map(move |(a, b)| &self[a + 1..=b]);
223
224 let last_sentence = if let Some(last_i) = self.last_sentence_terminator_index() {
225 if last_i + 1 < self.len() {
226 Some(&self[last_i + 1..])
227 } else {
228 None
229 }
230 } else {
231 Some(self)
232 };
233
234 first_sentence.into_iter().chain(rest).chain(last_sentence)
235 }
236}