harper_core/
token_string_ext.rs1use crate::{Span, Token};
2use itertools::Itertools;
3use paste::paste;
4
5macro_rules! create_decl_for {
6 ($thing:ident) => {
7 paste! {
8 fn [< first_ $thing >](&self) -> Option<&Token>;
9
10 fn [< last_ $thing >](&self) -> Option<&Token>;
11
12 fn [< last_ $thing _index >](&self) -> Option<usize>;
13
14 fn [<iter_ $thing _indices>](&self) -> impl DoubleEndedIterator<Item = usize> + '_;
15
16 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = &Token> + '_;
17 }
18 };
19}
20
21macro_rules! create_fns_for {
22 ($thing:ident) => {
23 paste! {
24 fn [< first_ $thing >](&self) -> Option<&Token> {
25 self.iter().find(|v| v.kind.[<is_ $thing>]())
26 }
27
28 fn [< last_ $thing >](&self) -> Option<&Token> {
29 self.iter().rev().find(|v| v.kind.[<is_ $thing>]())
30 }
31
32 fn [< last_ $thing _index >](&self) -> Option<usize> {
33 self.iter().rev().position(|v| v.kind.[<is_ $thing>]()).map(|i| self.len() - i - 1)
34 }
35
36 fn [<iter_ $thing _indices>](&self) -> impl DoubleEndedIterator<Item = usize> + '_ {
37 self.iter()
38 .enumerate()
39 .filter(|(_, t)| t.kind.[<is_ $thing>]())
40 .map(|(i, _)| i)
41 }
42
43 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = &Token> + '_ {
44 self.[<iter_ $thing _indices>]().map(|i| &self[i])
45 }
46 }
47 };
48}
49
50mod private {
51 use crate::{Document, Token};
52
53 pub trait Sealed {}
54
55 impl Sealed for [Token] {}
56
57 impl Sealed for Document {}
58}
59
60pub trait TokenStringExt: private::Sealed {
62 fn first_sentence_word(&self) -> Option<&Token>;
63 fn first_non_whitespace(&self) -> Option<&Token>;
64 fn span(&self) -> Option<Span<char>>;
67
68 create_decl_for!(adjective);
69 create_decl_for!(apostrophe);
70 create_decl_for!(at);
71 create_decl_for!(comma);
72 create_decl_for!(conjunction);
73 create_decl_for!(chunk_terminator);
74 create_decl_for!(currency);
75 create_decl_for!(ellipsis);
76 create_decl_for!(hostname);
77 create_decl_for!(likely_homograph);
78 create_decl_for!(number);
79 create_decl_for!(noun);
80 create_decl_for!(paragraph_break);
81 create_decl_for!(pipe);
82 create_decl_for!(preposition);
83 create_decl_for!(punctuation);
84 create_decl_for!(quote);
85 create_decl_for!(sentence_terminator);
86 create_decl_for!(space);
87 create_decl_for!(unlintable);
88 create_decl_for!(verb);
89 create_decl_for!(word);
90 create_decl_for!(word_like);
91 create_decl_for!(heading_start);
92
93 fn get_rel(&self, index: isize) -> Option<&Token>
113 where
114 Self: AsRef<[Token]>,
115 {
116 let slice = self.as_ref();
117 let len = slice.len() as isize;
118
119 if index >= len || -index > len {
120 return None;
121 }
122
123 let idx = if index >= 0 { index } else { len + index } as usize;
124
125 slice.get(idx)
126 }
127
128 fn get_rel_slice(&self, rel_start: isize, inclusive_end: isize) -> Option<&[Token]>
141 where
142 Self: AsRef<[Token]>,
143 {
144 let slice = self.as_ref();
145 let len = slice.len() as isize;
146
147 let start_idx = if rel_start >= 0 {
149 rel_start
150 } else {
151 len + rel_start
152 } as usize;
153
154 let end_idx_plus_one = if inclusive_end >= 0 {
155 inclusive_end + 1 } else {
157 len + inclusive_end + 1
158 } as usize;
159
160 if start_idx >= slice.len()
162 || end_idx_plus_one > slice.len()
163 || start_idx >= end_idx_plus_one
164 {
165 return None;
166 }
167
168 Some(&slice[start_idx..end_idx_plus_one])
169 }
170
171 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_;
172 fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_;
173
174 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
183
184 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
187
188 fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
193
194 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
197
198 fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &'_ mut [Token]> + '_;
201}
202
203impl TokenStringExt for [Token] {
204 create_fns_for!(adjective);
205 create_fns_for!(apostrophe);
206 create_fns_for!(at);
207 create_fns_for!(chunk_terminator);
208 create_fns_for!(comma);
209 create_fns_for!(conjunction);
210 create_fns_for!(currency);
211 create_fns_for!(ellipsis);
212 create_fns_for!(hostname);
213 create_fns_for!(likely_homograph);
214 create_fns_for!(noun);
215 create_fns_for!(number);
216 create_fns_for!(paragraph_break);
217 create_fns_for!(pipe);
218 create_fns_for!(preposition);
219 create_fns_for!(punctuation);
220 create_fns_for!(quote);
221 create_fns_for!(sentence_terminator);
222 create_fns_for!(space);
223 create_fns_for!(unlintable);
224 create_fns_for!(verb);
225 create_fns_for!(word_like);
226 create_fns_for!(word);
227 create_fns_for!(heading_start);
228
229 fn first_non_whitespace(&self) -> Option<&Token> {
230 self.iter().find(|t| !t.kind.is_whitespace())
231 }
232
233 fn first_sentence_word(&self) -> Option<&Token> {
234 let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?;
235
236 let Some(u_idx) = self.iter().position(|v| v.kind.is_unlintable()) else {
237 return Some(word);
238 };
239
240 if w_idx < u_idx { Some(word) } else { None }
241 }
242
243 fn span(&self) -> Option<Span<char>> {
244 let min_max = self
245 .iter()
246 .flat_map(|v| [v.span.start, v.span.end].into_iter())
247 .minmax();
248
249 match min_max {
250 itertools::MinMaxResult::NoElements => None,
251 itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)),
252 itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)),
253 }
254 }
255
256 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_ {
257 self.iter_word_indices().filter(|idx| {
258 let word = &self[*idx];
259 let Some(Some(meta)) = word.kind.as_word() else {
260 return false;
261 };
262
263 meta.is_linking_verb()
264 })
265 }
266
267 fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_ {
268 self.iter_linking_verb_indices().map(|idx| &self[idx])
269 }
270
271 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
272 self.split_inclusive(|tok| tok.kind.is_chunk_terminator())
273 }
274
275 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
276 self.split_inclusive(|tok| tok.kind.is_paragraph_break())
277 }
278
279 fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
280 self.iter_heading_start_indices().map(|start| {
281 let end = self[start..]
282 .iter()
283 .position(|t| t.kind.is_paragraph_break())
284 .unwrap_or(self[start..].len() - 1);
285
286 &self[start..=start + end]
287 })
288 }
289
290 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
291 self.split_inclusive(|token| token.kind.is_sentence_terminator())
292 }
293
294 fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &mut [Token]> + '_ {
295 struct SentIter<'a> {
296 rem: &'a mut [Token],
297 }
298
299 impl<'a> Iterator for SentIter<'a> {
300 type Item = &'a mut [Token];
301
302 fn next(&mut self) -> Option<Self::Item> {
303 if self.rem.is_empty() {
304 return None;
305 }
306 let split = self
307 .rem
308 .iter()
309 .position(|t| t.kind.is_sentence_terminator())
310 .map(|i| i + 1)
311 .unwrap_or(self.rem.len());
312 let tmp = core::mem::take(&mut self.rem);
313 let (sent, rest) = tmp.split_at_mut(split);
314 self.rem = rest;
315 Some(sent)
316 }
317 }
318
319 SentIter { rem: self }
320 }
321}