harper_core/
token_kind.rs

1use is_macro::Is;
2use serde::{Deserialize, Serialize};
3
4use crate::{
5    ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, Tense, VerbData,
6    WordMetadata,
7};
8
9#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
10#[serde(tag = "kind", content = "value")]
11pub enum TokenKind {
12    /// `None` if the word does not exist in the dictionary.
13    Word(Option<WordMetadata>),
14    Punctuation(Punctuation),
15    Decade,
16    Number(Number),
17    /// A sequence of " " spaces.
18    Space(usize),
19    /// A sequence of "\n" newlines
20    Newline(usize),
21    EmailAddress,
22    Url,
23    Hostname,
24    /// A special token used for things like inline code blocks that should be
25    /// ignored by all linters.
26    #[default]
27    Unlintable,
28    ParagraphBreak,
29    Regexish,
30}
31
32impl TokenKind {
33    pub fn is_open_square(&self) -> bool {
34        matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
35    }
36
37    pub fn is_close_square(&self) -> bool {
38        matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
39    }
40
41    pub fn is_pipe(&self) -> bool {
42        matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
43    }
44
45    /// Checks whether a token is word-like--meaning it is more complex than punctuation and can
46    /// hold semantic meaning in the way a word does.
47    pub fn is_word_like(&self) -> bool {
48        matches!(
49            self,
50            TokenKind::Word(..)
51                | TokenKind::EmailAddress
52                | TokenKind::Hostname
53                | TokenKind::Decade
54                | TokenKind::Number(..)
55        )
56    }
57
58    pub fn is_possessive_nominal(&self) -> bool {
59        matches!(
60            self,
61            TokenKind::Word(Some(WordMetadata {
62                noun: Some(NounData {
63                    is_possessive: Some(true),
64                    ..
65                }),
66                ..
67            })) | TokenKind::Word(Some(WordMetadata {
68                pronoun: Some(PronounData {
69                    is_possessive: Some(true),
70                    ..
71                }),
72                ..
73            }))
74        )
75    }
76
77    pub fn is_possessive_noun(&self) -> bool {
78        matches!(
79            self,
80            TokenKind::Word(Some(WordMetadata {
81                noun: Some(NounData {
82                    is_possessive: Some(true),
83                    ..
84                }),
85                ..
86            }))
87        )
88    }
89
90    pub fn is_possessive_pronoun(&self) -> bool {
91        matches!(
92            self,
93            TokenKind::Word(Some(WordMetadata {
94                pronoun: Some(PronounData {
95                    is_possessive: Some(true),
96                    ..
97                }),
98                ..
99            }))
100        )
101    }
102
103    pub fn is_proper_noun(&self) -> bool {
104        matches!(
105            self,
106            TokenKind::Word(Some(WordMetadata {
107                noun: Some(NounData {
108                    is_proper: Some(true),
109                    ..
110                }),
111                ..
112            }))
113        )
114    }
115
116    pub fn is_conjunction(&self) -> bool {
117        matches!(
118            self,
119            TokenKind::Word(Some(WordMetadata {
120                conjunction: Some(ConjunctionData {}),
121                ..
122            }))
123        )
124    }
125
126    pub(crate) fn is_chunk_terminator(&self) -> bool {
127        if self.is_sentence_terminator() {
128            return true;
129        }
130
131        match self {
132            TokenKind::Punctuation(punct) => {
133                matches!(
134                    punct,
135                    Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
136                )
137            }
138            _ => false,
139        }
140    }
141
142    pub(crate) fn is_sentence_terminator(&self) -> bool {
143        match self {
144            TokenKind::Punctuation(punct) => [
145                Punctuation::Period,
146                Punctuation::Bang,
147                Punctuation::Question,
148            ]
149            .contains(punct),
150            TokenKind::ParagraphBreak => true,
151            _ => false,
152        }
153    }
154
155    pub fn is_currency(&self) -> bool {
156        matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
157    }
158
159    pub fn is_preposition(&self) -> bool {
160        matches!(
161            self,
162            TokenKind::Word(Some(WordMetadata {
163                preposition: true,
164                ..
165            }))
166        )
167    }
168
169    pub fn is_determiner(&self) -> bool {
170        matches!(
171            self,
172            TokenKind::Word(Some(WordMetadata {
173                determiner: true,
174                ..
175            }))
176        )
177    }
178
179    pub fn is_ellipsis(&self) -> bool {
180        matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
181    }
182
183    pub fn is_hyphen(&self) -> bool {
184        matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
185    }
186
187    pub fn is_adjective(&self) -> bool {
188        matches!(
189            self,
190            TokenKind::Word(Some(WordMetadata {
191                adjective: Some(_),
192                ..
193            }))
194        )
195    }
196
197    pub fn is_present_tense_verb(&self) -> bool {
198        matches!(
199            self,
200            TokenKind::Word(Some(WordMetadata {
201                verb: Some(VerbData {
202                    tense: Some(Tense::Present),
203                    ..
204                }),
205                ..
206            }))
207        )
208    }
209
210    pub fn is_adverb(&self) -> bool {
211        matches!(
212            self,
213            TokenKind::Word(Some(WordMetadata {
214                adverb: Some(_),
215                ..
216            }))
217        )
218    }
219
220    pub fn is_swear(&self) -> bool {
221        matches!(
222            self,
223            TokenKind::Word(Some(WordMetadata {
224                swear: Some(true),
225                ..
226            }))
227        )
228    }
229
230    /// Checks that `self` is the same enum variant as `other`, regardless of
231    /// whether the inner metadata is also equal.
232    pub fn matches_variant_of(&self, other: &Self) -> bool {
233        self.with_default_data() == other.with_default_data()
234    }
235
236    /// Produces a copy of `self` with any inner data replaced with its default
237    /// value. Useful for making comparisons on just the variant of the
238    /// enum.
239    pub fn with_default_data(&self) -> Self {
240        match self {
241            TokenKind::Word(_) => TokenKind::Word(Default::default()),
242            TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
243            TokenKind::Number(..) => TokenKind::Number(Default::default()),
244            TokenKind::Space(_) => TokenKind::Space(Default::default()),
245            TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
246            _ => self.clone(),
247        }
248    }
249}
250
251impl TokenKind {
252    /// Construct a [`TokenKind::Word`] with no metadata.
253    pub fn blank_word() -> Self {
254        Self::Word(None)
255    }
256}
257
258impl TokenKind {
259    pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
260        self.as_mut_punctuation()?.as_mut_quote()
261    }
262
263    pub fn as_quote(&self) -> Option<&Quote> {
264        self.as_punctuation()?.as_quote()
265    }
266
267    pub fn is_quote(&self) -> bool {
268        matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
269    }
270
271    pub fn is_apostrophe(&self) -> bool {
272        matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
273    }
274
275    pub fn is_period(&self) -> bool {
276        matches!(self, TokenKind::Punctuation(Punctuation::Period))
277    }
278
279    pub fn is_at(&self) -> bool {
280        matches!(self, TokenKind::Punctuation(Punctuation::At))
281    }
282
283    /// Used by `crate::parsers::CollapseIdentifiers`
284    /// TODO: Separate this into two functions and add OR functionality to
285    /// pattern matching
286    pub fn is_case_separator(&self) -> bool {
287        matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
288            || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
289    }
290
291    pub fn is_verb(&self) -> bool {
292        let TokenKind::Word(Some(metadata)) = self else {
293            return false;
294        };
295
296        metadata.is_verb()
297    }
298
299    pub fn is_auxiliary_verb(&self) -> bool {
300        let TokenKind::Word(Some(metadata)) = self else {
301            return false;
302        };
303
304        metadata.is_auxiliary_verb()
305    }
306
307    pub fn is_linking_verb(&self) -> bool {
308        let TokenKind::Word(Some(metadata)) = self else {
309            return false;
310        };
311
312        metadata.is_linking_verb()
313    }
314
315    pub fn is_not_plural_nominal(&self) -> bool {
316        let TokenKind::Word(Some(metadata)) = self else {
317            return true;
318        };
319
320        metadata.is_not_plural_noun() || metadata.is_not_plural_pronoun()
321    }
322
323    pub fn is_not_plural_noun(&self) -> bool {
324        let TokenKind::Word(Some(metadata)) = self else {
325            return true;
326        };
327
328        metadata.is_not_plural_noun()
329    }
330
331    pub fn is_not_plural_pronoun(&self) -> bool {
332        let TokenKind::Word(Some(metadata)) = self else {
333            return true;
334        };
335
336        metadata.is_not_plural_pronoun()
337    }
338
339    pub fn is_common_word(&self) -> bool {
340        let TokenKind::Word(Some(metadata)) = self else {
341            return true;
342        };
343
344        metadata.common
345    }
346
347    pub fn is_plural_nominal(&self) -> bool {
348        let TokenKind::Word(Some(metadata)) = self else {
349            return false;
350        };
351
352        metadata.is_plural_noun() || metadata.is_plural_pronoun()
353    }
354
355    pub fn is_plural_pronoun(&self) -> bool {
356        let TokenKind::Word(Some(metadata)) = self else {
357            return false;
358        };
359
360        metadata.is_plural_pronoun()
361    }
362
363    pub fn is_plural_noun(&self) -> bool {
364        let TokenKind::Word(Some(metadata)) = self else {
365            return false;
366        };
367
368        metadata.is_plural_noun()
369    }
370
371    pub fn is_nominal(&self) -> bool {
372        let TokenKind::Word(Some(metadata)) = self else {
373            return false;
374        };
375
376        metadata.is_noun() || metadata.is_pronoun()
377    }
378
379    pub fn is_noun(&self) -> bool {
380        let TokenKind::Word(Some(metadata)) = self else {
381            return false;
382        };
383
384        metadata.is_noun()
385    }
386
387    pub fn is_pronoun(&self) -> bool {
388        let TokenKind::Word(Some(metadata)) = self else {
389            return false;
390        };
391
392        metadata.is_pronoun()
393    }
394
395    pub fn is_likely_homograph(&self) -> bool {
396        let TokenKind::Word(Some(metadata)) = self else {
397            return false;
398        };
399
400        metadata.is_likely_homograph()
401    }
402
403    pub fn is_comma(&self) -> bool {
404        matches!(self, TokenKind::Punctuation(Punctuation::Comma))
405    }
406
407    /// Checks whether the token is whitespace.
408    pub fn is_whitespace(&self) -> bool {
409        matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
410    }
411}