harper_core/
token_kind.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, VerbData, VerbForm,
7    WordMetadata,
8};
9
10#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
11#[serde(tag = "kind", content = "value")]
12pub enum TokenKind {
13    /// `None` if the word does not exist in the dictionary.
14    Word(Option<WordMetadata>),
15    Punctuation(Punctuation),
16    Decade,
17    Number(Number),
18    /// A sequence of " " spaces.
19    Space(usize),
20    /// A sequence of "\n" newlines
21    Newline(usize),
22    EmailAddress,
23    Url,
24    Hostname,
25    /// A special token used for things like inline code blocks that should be
26    /// ignored by all linters.
27    #[default]
28    Unlintable,
29    ParagraphBreak,
30    Regexish,
31}
32
33impl TokenKind {
34    pub fn is_open_square(&self) -> bool {
35        matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
36    }
37
38    pub fn is_close_square(&self) -> bool {
39        matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
40    }
41
42    pub fn is_pipe(&self) -> bool {
43        matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
44    }
45
46    /// Checks whether a token is word-like--meaning it is more complex than punctuation and can
47    /// hold semantic meaning in the way a word does.
48    pub fn is_word_like(&self) -> bool {
49        matches!(
50            self,
51            TokenKind::Word(..)
52                | TokenKind::EmailAddress
53                | TokenKind::Hostname
54                | TokenKind::Decade
55                | TokenKind::Number(..)
56        )
57    }
58
59    pub fn is_possessive_nominal(&self) -> bool {
60        matches!(
61            self,
62            TokenKind::Word(Some(WordMetadata {
63                noun: Some(NounData {
64                    is_possessive: Some(true),
65                    ..
66                }),
67                ..
68            })) | TokenKind::Word(Some(WordMetadata {
69                pronoun: Some(PronounData {
70                    is_possessive: Some(true),
71                    ..
72                }),
73                ..
74            }))
75        )
76    }
77
78    pub fn is_possessive_noun(&self) -> bool {
79        matches!(
80            self,
81            TokenKind::Word(Some(WordMetadata {
82                noun: Some(NounData {
83                    is_possessive: Some(true),
84                    ..
85                }),
86                ..
87            }))
88        )
89    }
90
91    pub fn is_possessive_pronoun(&self) -> bool {
92        matches!(
93            self,
94            TokenKind::Word(Some(WordMetadata {
95                pronoun: Some(PronounData {
96                    is_possessive: Some(true),
97                    ..
98                }),
99                ..
100            }))
101        )
102    }
103
104    pub fn is_proper_noun(&self) -> bool {
105        matches!(
106            self,
107            TokenKind::Word(Some(WordMetadata {
108                noun: Some(NounData {
109                    is_proper: Some(true),
110                    ..
111                }),
112                ..
113            }))
114        )
115    }
116
117    pub fn is_conjunction(&self) -> bool {
118        matches!(
119            self,
120            TokenKind::Word(Some(WordMetadata {
121                conjunction: Some(ConjunctionData {}),
122                ..
123            }))
124        )
125    }
126
127    pub(crate) fn is_chunk_terminator(&self) -> bool {
128        if self.is_sentence_terminator() {
129            return true;
130        }
131
132        match self {
133            TokenKind::Punctuation(punct) => {
134                matches!(
135                    punct,
136                    Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
137                )
138            }
139            _ => false,
140        }
141    }
142
143    pub(crate) fn is_sentence_terminator(&self) -> bool {
144        match self {
145            TokenKind::Punctuation(punct) => [
146                Punctuation::Period,
147                Punctuation::Bang,
148                Punctuation::Question,
149            ]
150            .contains(punct),
151            TokenKind::ParagraphBreak => true,
152            _ => false,
153        }
154    }
155
156    pub fn is_currency(&self) -> bool {
157        matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
158    }
159
160    pub fn is_preposition(&self) -> bool {
161        matches!(
162            self,
163            TokenKind::Word(Some(WordMetadata {
164                preposition: true,
165                ..
166            }))
167        )
168    }
169
170    pub fn is_ellipsis(&self) -> bool {
171        matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
172    }
173
174    pub fn is_hyphen(&self) -> bool {
175        matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
176    }
177
178    pub fn is_adjective(&self) -> bool {
179        matches!(
180            self,
181            TokenKind::Word(Some(WordMetadata {
182                adjective: Some(_),
183                ..
184            }))
185        )
186    }
187
188    pub fn is_verb_lemma(&self) -> bool {
189        matches!(
190            self,
191            TokenKind::Word(Some(WordMetadata {
192                verb: Some(VerbData {
193                    verb_form: Some(VerbForm::LemmaForm),
194                    ..
195                }),
196                ..
197            }))
198        )
199    }
200
201    pub fn is_verb_past_form(&self) -> bool {
202        matches!(
203            self,
204            TokenKind::Word(Some(WordMetadata {
205                verb: Some(VerbData {
206                    verb_form: Some(VerbForm::PastForm),
207                    ..
208                }),
209                ..
210            }))
211        )
212    }
213
214    pub fn is_verb_progressive_form(&self) -> bool {
215        matches!(
216            self,
217            TokenKind::Word(Some(WordMetadata {
218                verb: Some(VerbData {
219                    verb_form: Some(VerbForm::ProgressiveForm),
220                    ..
221                }),
222                ..
223            }))
224        )
225    }
226
227    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
228        matches!(
229            self,
230            TokenKind::Word(Some(WordMetadata {
231                verb: Some(VerbData {
232                    verb_form: Some(VerbForm::ThirdPersonSingularPresentForm),
233                    ..
234                }),
235                ..
236            }))
237        )
238    }
239
240    pub fn is_adverb(&self) -> bool {
241        matches!(
242            self,
243            TokenKind::Word(Some(WordMetadata {
244                adverb: Some(_),
245                ..
246            }))
247        )
248    }
249
250    pub fn is_swear(&self) -> bool {
251        matches!(
252            self,
253            TokenKind::Word(Some(WordMetadata {
254                swear: Some(true),
255                ..
256            }))
257        )
258    }
259
260    /// Checks that `self` is the same enum variant as `other`, regardless of
261    /// whether the inner metadata is also equal.
262    pub fn matches_variant_of(&self, other: &Self) -> bool {
263        self.with_default_data() == other.with_default_data()
264    }
265
266    /// Produces a copy of `self` with any inner data replaced with its default
267    /// value. Useful for making comparisons on just the variant of the
268    /// enum.
269    pub fn with_default_data(&self) -> Self {
270        match self {
271            TokenKind::Word(_) => TokenKind::Word(Default::default()),
272            TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
273            TokenKind::Number(..) => TokenKind::Number(Default::default()),
274            TokenKind::Space(_) => TokenKind::Space(Default::default()),
275            TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
276            _ => self.clone(),
277        }
278    }
279}
280
281impl TokenKind {
282    /// Construct a [`TokenKind::Word`] with no metadata.
283    pub fn blank_word() -> Self {
284        Self::Word(None)
285    }
286}
287
288impl TokenKind {
289    pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
290        self.as_mut_punctuation()?.as_mut_quote()
291    }
292
293    pub fn as_quote(&self) -> Option<&Quote> {
294        self.as_punctuation()?.as_quote()
295    }
296
297    pub fn is_quote(&self) -> bool {
298        matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
299    }
300
301    pub fn is_apostrophe(&self) -> bool {
302        matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
303    }
304
305    pub fn is_period(&self) -> bool {
306        matches!(self, TokenKind::Punctuation(Punctuation::Period))
307    }
308
309    pub fn is_at(&self) -> bool {
310        matches!(self, TokenKind::Punctuation(Punctuation::At))
311    }
312
313    /// Used by `crate::parsers::CollapseIdentifiers`
314    /// TODO: Separate this into two functions and add OR functionality to
315    /// pattern matching
316    pub fn is_case_separator(&self) -> bool {
317        matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
318            || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
319    }
320
321    pub fn is_verb(&self) -> bool {
322        let TokenKind::Word(Some(metadata)) = self else {
323            return false;
324        };
325
326        metadata.is_verb()
327    }
328
329    pub fn is_auxiliary_verb(&self) -> bool {
330        let TokenKind::Word(Some(metadata)) = self else {
331            return false;
332        };
333
334        metadata.is_auxiliary_verb()
335    }
336
337    pub fn is_linking_verb(&self) -> bool {
338        let TokenKind::Word(Some(metadata)) = self else {
339            return false;
340        };
341
342        metadata.is_linking_verb()
343    }
344
345    pub fn is_non_plural_nominal(&self) -> bool {
346        let TokenKind::Word(Some(metadata)) = self else {
347            return true;
348        };
349
350        metadata.is_non_plural_noun() || metadata.is_non_plural_pronoun()
351    }
352
353    pub fn is_non_plural_noun(&self) -> bool {
354        let TokenKind::Word(Some(metadata)) = self else {
355            return true;
356        };
357
358        metadata.is_non_plural_noun()
359    }
360
361    pub fn is_non_plural_pronoun(&self) -> bool {
362        let TokenKind::Word(Some(metadata)) = self else {
363            return true;
364        };
365
366        metadata.is_non_plural_pronoun()
367    }
368
369    pub fn is_second_person_pronoun(&self) -> bool {
370        let TokenKind::Word(Some(metadata)) = self else {
371            return true;
372        };
373
374        metadata.is_second_person_pronoun()
375    }
376
377    pub fn is_third_person_pronoun(&self) -> bool {
378        let TokenKind::Word(Some(metadata)) = self else {
379            return true;
380        };
381
382        metadata.is_third_person_pronoun()
383    }
384
385    pub fn is_first_person_singular_pronoun(&self) -> bool {
386        let TokenKind::Word(Some(metadata)) = self else {
387            return true;
388        };
389
390        metadata.is_first_person_singular_pronoun()
391    }
392
393    pub fn is_first_person_plural_pronoun(&self) -> bool {
394        let TokenKind::Word(Some(metadata)) = self else {
395            return true;
396        };
397
398        metadata.is_first_person_plural_pronoun()
399    }
400
401    pub fn is_third_person_singular_pronoun(&self) -> bool {
402        let TokenKind::Word(Some(metadata)) = self else {
403            return true;
404        };
405
406        metadata.is_third_person_singular_pronoun()
407    }
408
409    pub fn is_third_person_plural_pronoun(&self) -> bool {
410        let TokenKind::Word(Some(metadata)) = self else {
411            return true;
412        };
413
414        metadata.is_third_person_plural_pronoun()
415    }
416
417    pub fn is_object_pronoun(&self) -> bool {
418        let TokenKind::Word(Some(metadata)) = self else {
419            return true;
420        };
421
422        metadata.is_object_pronoun()
423    }
424
425    pub fn is_common_word(&self) -> bool {
426        let TokenKind::Word(Some(metadata)) = self else {
427            return true;
428        };
429
430        metadata.common
431    }
432
433    pub fn is_plural_nominal(&self) -> bool {
434        let TokenKind::Word(Some(metadata)) = self else {
435            return false;
436        };
437
438        metadata.is_plural_noun() || metadata.is_plural_pronoun()
439    }
440
441    pub fn is_plural_pronoun(&self) -> bool {
442        let TokenKind::Word(Some(metadata)) = self else {
443            return false;
444        };
445
446        metadata.is_plural_pronoun()
447    }
448
449    pub fn is_plural_noun(&self) -> bool {
450        let TokenKind::Word(Some(metadata)) = self else {
451            return false;
452        };
453
454        metadata.is_plural_noun()
455    }
456
457    pub fn is_nominal(&self) -> bool {
458        let TokenKind::Word(Some(metadata)) = self else {
459            return false;
460        };
461
462        metadata.is_noun() || metadata.is_pronoun()
463    }
464
465    pub fn is_noun(&self) -> bool {
466        let TokenKind::Word(Some(metadata)) = self else {
467            return false;
468        };
469
470        metadata.is_noun()
471    }
472
473    pub fn is_pronoun(&self) -> bool {
474        let TokenKind::Word(Some(metadata)) = self else {
475            return false;
476        };
477
478        metadata.is_pronoun()
479    }
480
481    pub fn is_reflexive_pronoun(&self) -> bool {
482        let TokenKind::Word(Some(metadata)) = self else {
483            return false;
484        };
485
486        metadata.is_reflexive_pronoun()
487    }
488
489    pub fn is_determiner(&self) -> bool {
490        let TokenKind::Word(Some(metadata)) = self else {
491            return false;
492        };
493
494        metadata.is_determiner()
495    }
496
497    pub fn is_demonstrative_determiner(&self) -> bool {
498        let TokenKind::Word(Some(metadata)) = self else {
499            return false;
500        };
501
502        metadata.is_demonstrative_determiner()
503    }
504
505    pub fn is_possessive_determiner(&self) -> bool {
506        let TokenKind::Word(Some(metadata)) = self else {
507            return false;
508        };
509
510        metadata.is_possessive_determiner()
511    }
512
513    pub fn is_likely_homograph(&self) -> bool {
514        let TokenKind::Word(Some(metadata)) = self else {
515            return false;
516        };
517
518        metadata.is_likely_homograph()
519    }
520
521    pub fn is_comma(&self) -> bool {
522        matches!(self, TokenKind::Punctuation(Punctuation::Comma))
523    }
524
525    /// Checks whether the token is whitespace.
526    pub fn is_whitespace(&self) -> bool {
527        matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
528    }
529
530    pub fn is_upos(&self, upos: UPOS) -> bool {
531        let Some(Some(meta)) = self.as_word() else {
532            return false;
533        };
534
535        meta.pos_tag == Some(upos)
536    }
537}