1use harper_brill::UPOS;
2use is_macro::Is;
3use serde::{Deserialize, Serialize};
4
5use crate::{
6 ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, VerbData, VerbForm,
7 WordMetadata,
8};
9
10#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
11#[serde(tag = "kind", content = "value")]
12pub enum TokenKind {
13 Word(Option<WordMetadata>),
15 Punctuation(Punctuation),
16 Decade,
17 Number(Number),
18 Space(usize),
20 Newline(usize),
22 EmailAddress,
23 Url,
24 Hostname,
25 #[default]
28 Unlintable,
29 ParagraphBreak,
30 Regexish,
31}
32
33impl TokenKind {
34 pub fn is_open_square(&self) -> bool {
35 matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
36 }
37
38 pub fn is_close_square(&self) -> bool {
39 matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
40 }
41
42 pub fn is_pipe(&self) -> bool {
43 matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
44 }
45
46 pub fn is_word_like(&self) -> bool {
49 matches!(
50 self,
51 TokenKind::Word(..)
52 | TokenKind::EmailAddress
53 | TokenKind::Hostname
54 | TokenKind::Decade
55 | TokenKind::Number(..)
56 )
57 }
58
59 pub fn is_possessive_nominal(&self) -> bool {
60 matches!(
61 self,
62 TokenKind::Word(Some(WordMetadata {
63 noun: Some(NounData {
64 is_possessive: Some(true),
65 ..
66 }),
67 ..
68 })) | TokenKind::Word(Some(WordMetadata {
69 pronoun: Some(PronounData {
70 is_possessive: Some(true),
71 ..
72 }),
73 ..
74 }))
75 )
76 }
77
78 pub fn is_possessive_noun(&self) -> bool {
79 matches!(
80 self,
81 TokenKind::Word(Some(WordMetadata {
82 noun: Some(NounData {
83 is_possessive: Some(true),
84 ..
85 }),
86 ..
87 }))
88 )
89 }
90
91 pub fn is_possessive_pronoun(&self) -> bool {
92 matches!(
93 self,
94 TokenKind::Word(Some(WordMetadata {
95 pronoun: Some(PronounData {
96 is_possessive: Some(true),
97 ..
98 }),
99 ..
100 }))
101 )
102 }
103
104 pub fn is_proper_noun(&self) -> bool {
105 matches!(
106 self,
107 TokenKind::Word(Some(WordMetadata {
108 noun: Some(NounData {
109 is_proper: Some(true),
110 ..
111 }),
112 ..
113 }))
114 )
115 }
116
117 pub fn is_conjunction(&self) -> bool {
118 matches!(
119 self,
120 TokenKind::Word(Some(WordMetadata {
121 conjunction: Some(ConjunctionData {}),
122 ..
123 }))
124 )
125 }
126
127 pub(crate) fn is_chunk_terminator(&self) -> bool {
128 if self.is_sentence_terminator() {
129 return true;
130 }
131
132 match self {
133 TokenKind::Punctuation(punct) => {
134 matches!(
135 punct,
136 Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
137 )
138 }
139 _ => false,
140 }
141 }
142
143 pub(crate) fn is_sentence_terminator(&self) -> bool {
144 match self {
145 TokenKind::Punctuation(punct) => [
146 Punctuation::Period,
147 Punctuation::Bang,
148 Punctuation::Question,
149 ]
150 .contains(punct),
151 TokenKind::ParagraphBreak => true,
152 _ => false,
153 }
154 }
155
156 pub fn is_currency(&self) -> bool {
157 matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
158 }
159
160 pub fn is_preposition(&self) -> bool {
161 matches!(
162 self,
163 TokenKind::Word(Some(WordMetadata {
164 preposition: true,
165 ..
166 }))
167 )
168 }
169
170 pub fn is_ellipsis(&self) -> bool {
171 matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
172 }
173
174 pub fn is_hyphen(&self) -> bool {
175 matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
176 }
177
178 pub fn is_adjective(&self) -> bool {
179 matches!(
180 self,
181 TokenKind::Word(Some(WordMetadata {
182 adjective: Some(_),
183 ..
184 }))
185 )
186 }
187
188 pub fn is_verb_lemma(&self) -> bool {
189 matches!(
190 self,
191 TokenKind::Word(Some(WordMetadata {
192 verb: Some(VerbData {
193 verb_form: Some(VerbForm::LemmaForm),
194 ..
195 }),
196 ..
197 }))
198 )
199 }
200
201 pub fn is_verb_past_form(&self) -> bool {
202 matches!(
203 self,
204 TokenKind::Word(Some(WordMetadata {
205 verb: Some(VerbData {
206 verb_form: Some(VerbForm::PastForm),
207 ..
208 }),
209 ..
210 }))
211 )
212 }
213
214 pub fn is_verb_progressive_form(&self) -> bool {
215 matches!(
216 self,
217 TokenKind::Word(Some(WordMetadata {
218 verb: Some(VerbData {
219 verb_form: Some(VerbForm::ProgressiveForm),
220 ..
221 }),
222 ..
223 }))
224 )
225 }
226
227 pub fn is_verb_third_person_singular_present_form(&self) -> bool {
228 matches!(
229 self,
230 TokenKind::Word(Some(WordMetadata {
231 verb: Some(VerbData {
232 verb_form: Some(VerbForm::ThirdPersonSingularPresentForm),
233 ..
234 }),
235 ..
236 }))
237 )
238 }
239
240 pub fn is_adverb(&self) -> bool {
241 matches!(
242 self,
243 TokenKind::Word(Some(WordMetadata {
244 adverb: Some(_),
245 ..
246 }))
247 )
248 }
249
250 pub fn is_swear(&self) -> bool {
251 matches!(
252 self,
253 TokenKind::Word(Some(WordMetadata {
254 swear: Some(true),
255 ..
256 }))
257 )
258 }
259
260 pub fn matches_variant_of(&self, other: &Self) -> bool {
263 self.with_default_data() == other.with_default_data()
264 }
265
266 pub fn with_default_data(&self) -> Self {
270 match self {
271 TokenKind::Word(_) => TokenKind::Word(Default::default()),
272 TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
273 TokenKind::Number(..) => TokenKind::Number(Default::default()),
274 TokenKind::Space(_) => TokenKind::Space(Default::default()),
275 TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
276 _ => self.clone(),
277 }
278 }
279}
280
281impl TokenKind {
282 pub fn blank_word() -> Self {
284 Self::Word(None)
285 }
286}
287
288impl TokenKind {
289 pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
290 self.as_mut_punctuation()?.as_mut_quote()
291 }
292
293 pub fn as_quote(&self) -> Option<&Quote> {
294 self.as_punctuation()?.as_quote()
295 }
296
297 pub fn is_quote(&self) -> bool {
298 matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
299 }
300
301 pub fn is_apostrophe(&self) -> bool {
302 matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
303 }
304
305 pub fn is_period(&self) -> bool {
306 matches!(self, TokenKind::Punctuation(Punctuation::Period))
307 }
308
309 pub fn is_at(&self) -> bool {
310 matches!(self, TokenKind::Punctuation(Punctuation::At))
311 }
312
313 pub fn is_case_separator(&self) -> bool {
317 matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
318 || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
319 }
320
321 pub fn is_verb(&self) -> bool {
322 let TokenKind::Word(Some(metadata)) = self else {
323 return false;
324 };
325
326 metadata.is_verb()
327 }
328
329 pub fn is_auxiliary_verb(&self) -> bool {
330 let TokenKind::Word(Some(metadata)) = self else {
331 return false;
332 };
333
334 metadata.is_auxiliary_verb()
335 }
336
337 pub fn is_linking_verb(&self) -> bool {
338 let TokenKind::Word(Some(metadata)) = self else {
339 return false;
340 };
341
342 metadata.is_linking_verb()
343 }
344
345 pub fn is_non_plural_nominal(&self) -> bool {
346 let TokenKind::Word(Some(metadata)) = self else {
347 return true;
348 };
349
350 metadata.is_non_plural_noun() || metadata.is_non_plural_pronoun()
351 }
352
353 pub fn is_non_plural_noun(&self) -> bool {
354 let TokenKind::Word(Some(metadata)) = self else {
355 return true;
356 };
357
358 metadata.is_non_plural_noun()
359 }
360
361 pub fn is_non_plural_pronoun(&self) -> bool {
362 let TokenKind::Word(Some(metadata)) = self else {
363 return true;
364 };
365
366 metadata.is_non_plural_pronoun()
367 }
368
369 pub fn is_second_person_pronoun(&self) -> bool {
370 let TokenKind::Word(Some(metadata)) = self else {
371 return true;
372 };
373
374 metadata.is_second_person_pronoun()
375 }
376
377 pub fn is_third_person_pronoun(&self) -> bool {
378 let TokenKind::Word(Some(metadata)) = self else {
379 return true;
380 };
381
382 metadata.is_third_person_pronoun()
383 }
384
385 pub fn is_first_person_singular_pronoun(&self) -> bool {
386 let TokenKind::Word(Some(metadata)) = self else {
387 return true;
388 };
389
390 metadata.is_first_person_singular_pronoun()
391 }
392
393 pub fn is_first_person_plural_pronoun(&self) -> bool {
394 let TokenKind::Word(Some(metadata)) = self else {
395 return true;
396 };
397
398 metadata.is_first_person_plural_pronoun()
399 }
400
401 pub fn is_third_person_singular_pronoun(&self) -> bool {
402 let TokenKind::Word(Some(metadata)) = self else {
403 return true;
404 };
405
406 metadata.is_third_person_singular_pronoun()
407 }
408
409 pub fn is_third_person_plural_pronoun(&self) -> bool {
410 let TokenKind::Word(Some(metadata)) = self else {
411 return true;
412 };
413
414 metadata.is_third_person_plural_pronoun()
415 }
416
417 pub fn is_object_pronoun(&self) -> bool {
418 let TokenKind::Word(Some(metadata)) = self else {
419 return true;
420 };
421
422 metadata.is_object_pronoun()
423 }
424
425 pub fn is_common_word(&self) -> bool {
426 let TokenKind::Word(Some(metadata)) = self else {
427 return true;
428 };
429
430 metadata.common
431 }
432
433 pub fn is_plural_nominal(&self) -> bool {
434 let TokenKind::Word(Some(metadata)) = self else {
435 return false;
436 };
437
438 metadata.is_plural_noun() || metadata.is_plural_pronoun()
439 }
440
441 pub fn is_plural_pronoun(&self) -> bool {
442 let TokenKind::Word(Some(metadata)) = self else {
443 return false;
444 };
445
446 metadata.is_plural_pronoun()
447 }
448
449 pub fn is_plural_noun(&self) -> bool {
450 let TokenKind::Word(Some(metadata)) = self else {
451 return false;
452 };
453
454 metadata.is_plural_noun()
455 }
456
457 pub fn is_nominal(&self) -> bool {
458 let TokenKind::Word(Some(metadata)) = self else {
459 return false;
460 };
461
462 metadata.is_noun() || metadata.is_pronoun()
463 }
464
465 pub fn is_noun(&self) -> bool {
466 let TokenKind::Word(Some(metadata)) = self else {
467 return false;
468 };
469
470 metadata.is_noun()
471 }
472
473 pub fn is_pronoun(&self) -> bool {
474 let TokenKind::Word(Some(metadata)) = self else {
475 return false;
476 };
477
478 metadata.is_pronoun()
479 }
480
481 pub fn is_reflexive_pronoun(&self) -> bool {
482 let TokenKind::Word(Some(metadata)) = self else {
483 return false;
484 };
485
486 metadata.is_reflexive_pronoun()
487 }
488
489 pub fn is_determiner(&self) -> bool {
490 let TokenKind::Word(Some(metadata)) = self else {
491 return false;
492 };
493
494 metadata.is_determiner()
495 }
496
497 pub fn is_demonstrative_determiner(&self) -> bool {
498 let TokenKind::Word(Some(metadata)) = self else {
499 return false;
500 };
501
502 metadata.is_demonstrative_determiner()
503 }
504
505 pub fn is_possessive_determiner(&self) -> bool {
506 let TokenKind::Word(Some(metadata)) = self else {
507 return false;
508 };
509
510 metadata.is_possessive_determiner()
511 }
512
513 pub fn is_likely_homograph(&self) -> bool {
514 let TokenKind::Word(Some(metadata)) = self else {
515 return false;
516 };
517
518 metadata.is_likely_homograph()
519 }
520
521 pub fn is_comma(&self) -> bool {
522 matches!(self, TokenKind::Punctuation(Punctuation::Comma))
523 }
524
525 pub fn is_whitespace(&self) -> bool {
527 matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
528 }
529
530 pub fn is_upos(&self, upos: UPOS) -> bool {
531 let Some(Some(meta)) = self.as_word() else {
532 return false;
533 };
534
535 meta.pos_tag == Some(upos)
536 }
537}