1use is_macro::Is;
2use serde::{Deserialize, Serialize};
3
4use crate::{
5 ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, Tense, VerbData,
6 WordMetadata,
7};
8
9#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
10#[serde(tag = "kind", content = "value")]
11pub enum TokenKind {
12 Word(Option<WordMetadata>),
14 Punctuation(Punctuation),
15 Decade,
16 Number(Number),
17 Space(usize),
19 Newline(usize),
21 EmailAddress,
22 Url,
23 Hostname,
24 #[default]
27 Unlintable,
28 ParagraphBreak,
29 Regexish,
30}
31
32impl TokenKind {
33 pub fn is_open_square(&self) -> bool {
34 matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
35 }
36
37 pub fn is_close_square(&self) -> bool {
38 matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
39 }
40
41 pub fn is_pipe(&self) -> bool {
42 matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
43 }
44
45 pub fn is_word_like(&self) -> bool {
48 matches!(
49 self,
50 TokenKind::Word(..)
51 | TokenKind::EmailAddress
52 | TokenKind::Hostname
53 | TokenKind::Decade
54 | TokenKind::Number(..)
55 )
56 }
57
58 pub fn is_possessive_nominal(&self) -> bool {
59 matches!(
60 self,
61 TokenKind::Word(Some(WordMetadata {
62 noun: Some(NounData {
63 is_possessive: Some(true),
64 ..
65 }),
66 ..
67 })) | TokenKind::Word(Some(WordMetadata {
68 pronoun: Some(PronounData {
69 is_possessive: Some(true),
70 ..
71 }),
72 ..
73 }))
74 )
75 }
76
77 pub fn is_possessive_noun(&self) -> bool {
78 matches!(
79 self,
80 TokenKind::Word(Some(WordMetadata {
81 noun: Some(NounData {
82 is_possessive: Some(true),
83 ..
84 }),
85 ..
86 }))
87 )
88 }
89
90 pub fn is_possessive_pronoun(&self) -> bool {
91 matches!(
92 self,
93 TokenKind::Word(Some(WordMetadata {
94 pronoun: Some(PronounData {
95 is_possessive: Some(true),
96 ..
97 }),
98 ..
99 }))
100 )
101 }
102
103 pub fn is_proper_noun(&self) -> bool {
104 matches!(
105 self,
106 TokenKind::Word(Some(WordMetadata {
107 noun: Some(NounData {
108 is_proper: Some(true),
109 ..
110 }),
111 ..
112 }))
113 )
114 }
115
116 pub fn is_conjunction(&self) -> bool {
117 matches!(
118 self,
119 TokenKind::Word(Some(WordMetadata {
120 conjunction: Some(ConjunctionData {}),
121 ..
122 }))
123 )
124 }
125
126 pub(crate) fn is_chunk_terminator(&self) -> bool {
127 if self.is_sentence_terminator() {
128 return true;
129 }
130
131 match self {
132 TokenKind::Punctuation(punct) => {
133 matches!(
134 punct,
135 Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
136 )
137 }
138 _ => false,
139 }
140 }
141
142 pub(crate) fn is_sentence_terminator(&self) -> bool {
143 match self {
144 TokenKind::Punctuation(punct) => [
145 Punctuation::Period,
146 Punctuation::Bang,
147 Punctuation::Question,
148 ]
149 .contains(punct),
150 TokenKind::ParagraphBreak => true,
151 _ => false,
152 }
153 }
154
155 pub fn is_currency(&self) -> bool {
156 matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
157 }
158
159 pub fn is_preposition(&self) -> bool {
160 matches!(
161 self,
162 TokenKind::Word(Some(WordMetadata {
163 preposition: true,
164 ..
165 }))
166 )
167 }
168
169 pub fn is_determiner(&self) -> bool {
170 matches!(
171 self,
172 TokenKind::Word(Some(WordMetadata {
173 determiner: true,
174 ..
175 }))
176 )
177 }
178
179 pub fn is_ellipsis(&self) -> bool {
180 matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
181 }
182
183 pub fn is_hyphen(&self) -> bool {
184 matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
185 }
186
187 pub fn is_adjective(&self) -> bool {
188 matches!(
189 self,
190 TokenKind::Word(Some(WordMetadata {
191 adjective: Some(_),
192 ..
193 }))
194 )
195 }
196
197 pub fn is_present_tense_verb(&self) -> bool {
198 matches!(
199 self,
200 TokenKind::Word(Some(WordMetadata {
201 verb: Some(VerbData {
202 tense: Some(Tense::Present),
203 ..
204 }),
205 ..
206 }))
207 )
208 }
209
210 pub fn is_adverb(&self) -> bool {
211 matches!(
212 self,
213 TokenKind::Word(Some(WordMetadata {
214 adverb: Some(_),
215 ..
216 }))
217 )
218 }
219
220 pub fn is_swear(&self) -> bool {
221 matches!(
222 self,
223 TokenKind::Word(Some(WordMetadata {
224 swear: Some(true),
225 ..
226 }))
227 )
228 }
229
230 pub fn matches_variant_of(&self, other: &Self) -> bool {
233 self.with_default_data() == other.with_default_data()
234 }
235
236 pub fn with_default_data(&self) -> Self {
240 match self {
241 TokenKind::Word(_) => TokenKind::Word(Default::default()),
242 TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
243 TokenKind::Number(..) => TokenKind::Number(Default::default()),
244 TokenKind::Space(_) => TokenKind::Space(Default::default()),
245 TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
246 _ => self.clone(),
247 }
248 }
249}
250
251impl TokenKind {
252 pub fn blank_word() -> Self {
254 Self::Word(None)
255 }
256}
257
258impl TokenKind {
259 pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
260 self.as_mut_punctuation()?.as_mut_quote()
261 }
262
263 pub fn as_quote(&self) -> Option<&Quote> {
264 self.as_punctuation()?.as_quote()
265 }
266
267 pub fn is_quote(&self) -> bool {
268 matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
269 }
270
271 pub fn is_apostrophe(&self) -> bool {
272 matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
273 }
274
275 pub fn is_period(&self) -> bool {
276 matches!(self, TokenKind::Punctuation(Punctuation::Period))
277 }
278
279 pub fn is_at(&self) -> bool {
280 matches!(self, TokenKind::Punctuation(Punctuation::At))
281 }
282
283 pub fn is_case_separator(&self) -> bool {
287 matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
288 || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
289 }
290
291 pub fn is_verb(&self) -> bool {
292 let TokenKind::Word(Some(metadata)) = self else {
293 return false;
294 };
295
296 metadata.is_verb()
297 }
298
299 pub fn is_auxiliary_verb(&self) -> bool {
300 let TokenKind::Word(Some(metadata)) = self else {
301 return false;
302 };
303
304 metadata.is_auxiliary_verb()
305 }
306
307 pub fn is_linking_verb(&self) -> bool {
308 let TokenKind::Word(Some(metadata)) = self else {
309 return false;
310 };
311
312 metadata.is_linking_verb()
313 }
314
315 pub fn is_not_plural_nominal(&self) -> bool {
316 let TokenKind::Word(Some(metadata)) = self else {
317 return true;
318 };
319
320 metadata.is_not_plural_noun() || metadata.is_not_plural_pronoun()
321 }
322
323 pub fn is_not_plural_noun(&self) -> bool {
324 let TokenKind::Word(Some(metadata)) = self else {
325 return true;
326 };
327
328 metadata.is_not_plural_noun()
329 }
330
331 pub fn is_not_plural_pronoun(&self) -> bool {
332 let TokenKind::Word(Some(metadata)) = self else {
333 return true;
334 };
335
336 metadata.is_not_plural_pronoun()
337 }
338
339 pub fn is_common_word(&self) -> bool {
340 let TokenKind::Word(Some(metadata)) = self else {
341 return true;
342 };
343
344 metadata.common
345 }
346
347 pub fn is_plural_nominal(&self) -> bool {
348 let TokenKind::Word(Some(metadata)) = self else {
349 return false;
350 };
351
352 metadata.is_plural_noun() || metadata.is_plural_pronoun()
353 }
354
355 pub fn is_plural_pronoun(&self) -> bool {
356 let TokenKind::Word(Some(metadata)) = self else {
357 return false;
358 };
359
360 metadata.is_plural_pronoun()
361 }
362
363 pub fn is_plural_noun(&self) -> bool {
364 let TokenKind::Word(Some(metadata)) = self else {
365 return false;
366 };
367
368 metadata.is_plural_noun()
369 }
370
371 pub fn is_nominal(&self) -> bool {
372 let TokenKind::Word(Some(metadata)) = self else {
373 return false;
374 };
375
376 metadata.is_noun() || metadata.is_pronoun()
377 }
378
379 pub fn is_noun(&self) -> bool {
380 let TokenKind::Word(Some(metadata)) = self else {
381 return false;
382 };
383
384 metadata.is_noun()
385 }
386
387 pub fn is_pronoun(&self) -> bool {
388 let TokenKind::Word(Some(metadata)) = self else {
389 return false;
390 };
391
392 metadata.is_pronoun()
393 }
394
395 pub fn is_likely_homograph(&self) -> bool {
396 let TokenKind::Word(Some(metadata)) = self else {
397 return false;
398 };
399
400 metadata.is_likely_homograph()
401 }
402
403 pub fn is_comma(&self) -> bool {
404 matches!(self, TokenKind::Punctuation(Punctuation::Comma))
405 }
406
407 pub fn is_whitespace(&self) -> bool {
409 matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
410 }
411}