harper_core/
token_string_ext.rs1use crate::{Span, Token};
2use itertools::Itertools;
3use paste::paste;
4
5macro_rules! create_decl_for {
6 ($thing:ident) => {
7 paste! {
8 fn [< first_ $thing >](&self) -> Option<&Token>;
9
10 fn [< last_ $thing >](&self) -> Option<&Token>;
11
12 fn [< last_ $thing _index >](&self) -> Option<usize>;
13
14 fn [<iter_ $thing _indices>](&self) -> impl DoubleEndedIterator<Item = usize> + '_;
15
16 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = &Token> + '_;
17 }
18 };
19}
20
21macro_rules! create_fns_for {
22 ($thing:ident) => {
23 paste! {
24 fn [< first_ $thing >](&self) -> Option<&Token> {
25 self.iter().find(|v| v.kind.[<is_ $thing>]())
26 }
27
28 fn [< last_ $thing >](&self) -> Option<&Token> {
29 self.iter().rev().find(|v| v.kind.[<is_ $thing>]())
30 }
31
32 fn [< last_ $thing _index >](&self) -> Option<usize> {
33 self.iter().rev().position(|v| v.kind.[<is_ $thing>]()).map(|i| self.len() - i - 1)
34 }
35
36 fn [<iter_ $thing _indices>](&self) -> impl DoubleEndedIterator<Item = usize> + '_ {
37 self.iter()
38 .enumerate()
39 .filter(|(_, t)| t.kind.[<is_ $thing>]())
40 .map(|(i, _)| i)
41 }
42
43 fn [<iter_ $thing s>](&self) -> impl Iterator<Item = &Token> + '_ {
44 self.[<iter_ $thing _indices>]().map(|i| &self[i])
45 }
46 }
47 };
48}
49
50pub trait TokenStringExt {
52 fn first_sentence_word(&self) -> Option<&Token>;
53 fn first_non_whitespace(&self) -> Option<&Token>;
54 fn span(&self) -> Option<Span<char>>;
57
58 create_decl_for!(adjective);
59 create_decl_for!(apostrophe);
60 create_decl_for!(at);
61 create_decl_for!(comma);
62 create_decl_for!(conjunction);
63 create_decl_for!(chunk_terminator);
64 create_decl_for!(currency);
65 create_decl_for!(ellipsis);
66 create_decl_for!(hostname);
67 create_decl_for!(likely_homograph);
68 create_decl_for!(number);
69 create_decl_for!(noun);
70 create_decl_for!(paragraph_break);
71 create_decl_for!(pipe);
72 create_decl_for!(preposition);
73 create_decl_for!(punctuation);
74 create_decl_for!(quote);
75 create_decl_for!(sentence_terminator);
76 create_decl_for!(space);
77 create_decl_for!(unlintable);
78 create_decl_for!(verb);
79 create_decl_for!(word);
80 create_decl_for!(word_like);
81
82 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_;
83 fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_;
84
85 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
94
95 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
98
99 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
102
103 fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &'_ mut [Token]> + '_;
106}
107
108impl TokenStringExt for [Token] {
109 create_fns_for!(adjective);
110 create_fns_for!(apostrophe);
111 create_fns_for!(at);
112 create_fns_for!(chunk_terminator);
113 create_fns_for!(comma);
114 create_fns_for!(conjunction);
115 create_fns_for!(currency);
116 create_fns_for!(ellipsis);
117 create_fns_for!(hostname);
118 create_fns_for!(likely_homograph);
119 create_fns_for!(noun);
120 create_fns_for!(number);
121 create_fns_for!(paragraph_break);
122 create_fns_for!(pipe);
123 create_fns_for!(preposition);
124 create_fns_for!(punctuation);
125 create_fns_for!(quote);
126 create_fns_for!(sentence_terminator);
127 create_fns_for!(space);
128 create_fns_for!(unlintable);
129 create_fns_for!(verb);
130 create_fns_for!(word_like);
131 create_fns_for!(word);
132
133 fn first_non_whitespace(&self) -> Option<&Token> {
134 self.iter().find(|t| !t.kind.is_whitespace())
135 }
136
137 fn first_sentence_word(&self) -> Option<&Token> {
138 let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?;
139
140 let Some(u_idx) = self.iter().position(|v| v.kind.is_unlintable()) else {
141 return Some(word);
142 };
143
144 if w_idx < u_idx { Some(word) } else { None }
145 }
146
147 fn span(&self) -> Option<Span<char>> {
148 let min_max = self
149 .iter()
150 .flat_map(|v| [v.span.start, v.span.end].into_iter())
151 .minmax();
152
153 match min_max {
154 itertools::MinMaxResult::NoElements => None,
155 itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)),
156 itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)),
157 }
158 }
159
160 fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_ {
161 self.iter_word_indices().filter(|idx| {
162 let word = &self[*idx];
163 let Some(Some(meta)) = word.kind.as_word() else {
164 return false;
165 };
166
167 meta.is_linking_verb()
168 })
169 }
170
171 fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_ {
172 self.iter_linking_verb_indices().map(|idx| &self[idx])
173 }
174
175 fn iter_chunks(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
176 let first_chunk = self
177 .iter_chunk_terminator_indices()
178 .next()
179 .map(|first_term| &self[0..=first_term]);
180
181 let rest = self
182 .iter_chunk_terminator_indices()
183 .tuple_windows()
184 .map(move |(a, b)| &self[a + 1..=b]);
185
186 let last = if let Some(last_i) = self.last_chunk_terminator_index() {
187 if last_i + 1 < self.len() {
188 Some(&self[last_i + 1..])
189 } else {
190 None
191 }
192 } else {
193 Some(self)
194 };
195
196 first_chunk.into_iter().chain(rest).chain(last)
197 }
198
199 fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
200 let first_pg = self
201 .iter_paragraph_break_indices()
202 .next()
203 .map(|first_term| &self[0..=first_term]);
204
205 let rest = self
206 .iter_paragraph_break_indices()
207 .tuple_windows()
208 .map(move |(a, b)| &self[a + 1..=b]);
209
210 let last_pg = if let Some(last_i) = self.last_paragraph_break_index() {
211 if last_i + 1 < self.len() {
212 Some(&self[last_i + 1..])
213 } else {
214 None
215 }
216 } else {
217 Some(self)
218 };
219
220 first_pg.into_iter().chain(rest).chain(last_pg)
221 }
222
223 fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
224 let first_sentence = self
225 .iter_sentence_terminator_indices()
226 .next()
227 .map(|first_term| &self[0..=first_term]);
228
229 let rest = self
230 .iter_sentence_terminator_indices()
231 .tuple_windows()
232 .map(move |(a, b)| &self[a + 1..=b]);
233
234 let last_sentence = if let Some(last_i) = self.last_sentence_terminator_index() {
235 if last_i + 1 < self.len() {
236 Some(&self[last_i + 1..])
237 } else {
238 None
239 }
240 } else {
241 Some(self)
242 };
243
244 first_sentence.into_iter().chain(rest).chain(last_sentence)
245 }
246
247 fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &mut [Token]> + '_ {
248 struct SentIter<'a> {
249 rem: &'a mut [Token],
250 }
251
252 impl<'a> Iterator for SentIter<'a> {
253 type Item = &'a mut [Token];
254
255 fn next(&mut self) -> Option<Self::Item> {
256 if self.rem.is_empty() {
257 return None;
258 }
259 let split = self
260 .rem
261 .iter()
262 .position(|t| t.kind.is_sentence_terminator())
263 .map(|i| i + 1)
264 .unwrap_or(self.rem.len());
265 let tmp = core::mem::take(&mut self.rem);
266 let (sent, rest) = tmp.split_at_mut(split);
267 self.rem = rest;
268 Some(sent)
269 }
270 }
271
272 SentIter { rem: self }
273 }
274}