harper_core/
token_kind.rs

1use is_macro::Is;
2use serde::{Deserialize, Serialize};
3
4use crate::{ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, WordMetadata};
5
6#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
7#[serde(tag = "kind", content = "value")]
8pub enum TokenKind {
9    /// `None` if the word does not exist in the dictionary.
10    Word(Option<WordMetadata>),
11    Punctuation(Punctuation),
12    Decade,
13    Number(Number),
14    /// A sequence of " " spaces.
15    Space(usize),
16    /// A sequence of "\n" newlines
17    Newline(usize),
18    EmailAddress,
19    Url,
20    Hostname,
21    /// A special token used for things like inline code blocks that should be
22    /// ignored by all linters.
23    #[default]
24    Unlintable,
25    ParagraphBreak,
26    Regexish,
27}
28
29impl TokenKind {
30    pub fn is_open_square(&self) -> bool {
31        matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
32    }
33
34    pub fn is_close_square(&self) -> bool {
35        matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
36    }
37
38    pub fn is_pipe(&self) -> bool {
39        matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
40    }
41
42    /// Checks whether a token is word-like--meaning it is more complex than punctuation and can
43    /// hold semantic meaning in the way a word does.
44    pub fn is_word_like(&self) -> bool {
45        matches!(
46            self,
47            TokenKind::Word(..)
48                | TokenKind::EmailAddress
49                | TokenKind::Hostname
50                | TokenKind::Decade
51                | TokenKind::Number(..)
52        )
53    }
54
55    pub fn is_possessive_nominal(&self) -> bool {
56        matches!(
57            self,
58            TokenKind::Word(Some(WordMetadata {
59                noun: Some(NounData {
60                    is_possessive: Some(true),
61                    ..
62                }),
63                ..
64            })) | TokenKind::Word(Some(WordMetadata {
65                pronoun: Some(PronounData {
66                    is_possessive: Some(true),
67                    ..
68                }),
69                ..
70            }))
71        )
72    }
73
74    pub fn is_possessive_noun(&self) -> bool {
75        matches!(
76            self,
77            TokenKind::Word(Some(WordMetadata {
78                noun: Some(NounData {
79                    is_possessive: Some(true),
80                    ..
81                }),
82                ..
83            }))
84        )
85    }
86
87    pub fn is_possessive_pronoun(&self) -> bool {
88        matches!(
89            self,
90            TokenKind::Word(Some(WordMetadata {
91                pronoun: Some(PronounData {
92                    is_possessive: Some(true),
93                    ..
94                }),
95                ..
96            }))
97        )
98    }
99
100    pub fn is_proper_noun(&self) -> bool {
101        matches!(
102            self,
103            TokenKind::Word(Some(WordMetadata {
104                noun: Some(NounData {
105                    is_proper: Some(true),
106                    ..
107                }),
108                ..
109            }))
110        )
111    }
112
113    pub fn is_conjunction(&self) -> bool {
114        matches!(
115            self,
116            TokenKind::Word(Some(WordMetadata {
117                conjunction: Some(ConjunctionData {}),
118                ..
119            }))
120        )
121    }
122
123    pub(crate) fn is_chunk_terminator(&self) -> bool {
124        if self.is_sentence_terminator() {
125            return true;
126        }
127
128        match self {
129            TokenKind::Punctuation(punct) => {
130                matches!(
131                    punct,
132                    Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
133                )
134            }
135            _ => false,
136        }
137    }
138
139    pub(crate) fn is_sentence_terminator(&self) -> bool {
140        match self {
141            TokenKind::Punctuation(punct) => [
142                Punctuation::Period,
143                Punctuation::Bang,
144                Punctuation::Question,
145            ]
146            .contains(punct),
147            TokenKind::ParagraphBreak => true,
148            _ => false,
149        }
150    }
151
152    pub fn is_currency(&self) -> bool {
153        matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
154    }
155
156    pub fn is_preposition(&self) -> bool {
157        matches!(
158            self,
159            TokenKind::Word(Some(WordMetadata {
160                preposition: true,
161                ..
162            }))
163        )
164    }
165
166    pub fn is_determiner(&self) -> bool {
167        matches!(
168            self,
169            TokenKind::Word(Some(WordMetadata {
170                determiner: true,
171                ..
172            }))
173        )
174    }
175
176    pub fn is_ellipsis(&self) -> bool {
177        matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
178    }
179
180    pub fn is_hyphen(&self) -> bool {
181        matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
182    }
183
184    pub fn is_adjective(&self) -> bool {
185        matches!(
186            self,
187            TokenKind::Word(Some(WordMetadata {
188                adjective: Some(_),
189                ..
190            }))
191        )
192    }
193
194    pub fn is_adverb(&self) -> bool {
195        matches!(
196            self,
197            TokenKind::Word(Some(WordMetadata {
198                adverb: Some(_),
199                ..
200            }))
201        )
202    }
203
204    pub fn is_swear(&self) -> bool {
205        matches!(
206            self,
207            TokenKind::Word(Some(WordMetadata {
208                swear: Some(true),
209                ..
210            }))
211        )
212    }
213
214    /// Checks that `self` is the same enum variant as `other`, regardless of
215    /// whether the inner metadata is also equal.
216    pub fn matches_variant_of(&self, other: &Self) -> bool {
217        self.with_default_data() == other.with_default_data()
218    }
219
220    /// Produces a copy of `self` with any inner data replaced with its default
221    /// value. Useful for making comparisons on just the variant of the
222    /// enum.
223    pub fn with_default_data(&self) -> Self {
224        match self {
225            TokenKind::Word(_) => TokenKind::Word(Default::default()),
226            TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
227            TokenKind::Number(..) => TokenKind::Number(Default::default()),
228            TokenKind::Space(_) => TokenKind::Space(Default::default()),
229            TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
230            _ => self.clone(),
231        }
232    }
233}
234
235impl TokenKind {
236    /// Construct a [`TokenKind::Word`] with no metadata.
237    pub fn blank_word() -> Self {
238        Self::Word(None)
239    }
240}
241
242impl TokenKind {
243    pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
244        self.as_mut_punctuation()?.as_mut_quote()
245    }
246
247    pub fn as_quote(&self) -> Option<&Quote> {
248        self.as_punctuation()?.as_quote()
249    }
250
251    pub fn is_quote(&self) -> bool {
252        matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
253    }
254
255    pub fn is_apostrophe(&self) -> bool {
256        matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
257    }
258
259    pub fn is_period(&self) -> bool {
260        matches!(self, TokenKind::Punctuation(Punctuation::Period))
261    }
262
263    pub fn is_at(&self) -> bool {
264        matches!(self, TokenKind::Punctuation(Punctuation::At))
265    }
266
267    /// Used by `crate::parsers::CollapseIdentifiers`
268    /// TODO: Separate this into two functions and add OR functionality to
269    /// pattern matching
270    pub fn is_case_separator(&self) -> bool {
271        matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
272            || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
273    }
274
275    pub fn is_verb(&self) -> bool {
276        let TokenKind::Word(Some(metadata)) = self else {
277            return false;
278        };
279
280        metadata.is_verb()
281    }
282
283    pub fn is_auxiliary_verb(&self) -> bool {
284        let TokenKind::Word(Some(metadata)) = self else {
285            return false;
286        };
287
288        metadata.is_auxiliary_verb()
289    }
290
291    pub fn is_linking_verb(&self) -> bool {
292        let TokenKind::Word(Some(metadata)) = self else {
293            return false;
294        };
295
296        metadata.is_linking_verb()
297    }
298
299    pub fn is_not_plural_nominal(&self) -> bool {
300        let TokenKind::Word(Some(metadata)) = self else {
301            return true;
302        };
303
304        metadata.is_not_plural_noun() || metadata.is_not_plural_pronoun()
305    }
306
307    pub fn is_not_plural_noun(&self) -> bool {
308        let TokenKind::Word(Some(metadata)) = self else {
309            return true;
310        };
311
312        metadata.is_not_plural_noun()
313    }
314
315    pub fn is_not_plural_pronoun(&self) -> bool {
316        let TokenKind::Word(Some(metadata)) = self else {
317            return true;
318        };
319
320        metadata.is_not_plural_pronoun()
321    }
322
323    pub fn is_common_word(&self) -> bool {
324        let TokenKind::Word(Some(metadata)) = self else {
325            return true;
326        };
327
328        metadata.common
329    }
330
331    pub fn is_plural_nominal(&self) -> bool {
332        let TokenKind::Word(Some(metadata)) = self else {
333            return false;
334        };
335
336        metadata.is_plural_noun() || metadata.is_plural_pronoun()
337    }
338
339    pub fn is_plural_pronoun(&self) -> bool {
340        let TokenKind::Word(Some(metadata)) = self else {
341            return false;
342        };
343
344        metadata.is_plural_pronoun()
345    }
346
347    pub fn is_plural_noun(&self) -> bool {
348        let TokenKind::Word(Some(metadata)) = self else {
349            return false;
350        };
351
352        metadata.is_plural_noun()
353    }
354
355    pub fn is_nominal(&self) -> bool {
356        let TokenKind::Word(Some(metadata)) = self else {
357            return false;
358        };
359
360        metadata.is_noun() || metadata.is_pronoun()
361    }
362
363    pub fn is_noun(&self) -> bool {
364        let TokenKind::Word(Some(metadata)) = self else {
365            return false;
366        };
367
368        metadata.is_noun()
369    }
370
371    pub fn is_pronoun(&self) -> bool {
372        let TokenKind::Word(Some(metadata)) = self else {
373            return false;
374        };
375
376        metadata.is_pronoun()
377    }
378
379    pub fn is_likely_homograph(&self) -> bool {
380        let TokenKind::Word(Some(metadata)) = self else {
381            return false;
382        };
383
384        metadata.is_likely_homograph()
385    }
386
387    pub fn is_comma(&self) -> bool {
388        matches!(self, TokenKind::Punctuation(Punctuation::Comma))
389    }
390
391    /// Checks whether the token is whitespace.
392    pub fn is_whitespace(&self) -> bool {
393        matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
394    }
395}