harper_core/
token_kind.rs1use is_macro::Is;
2use serde::{Deserialize, Serialize};
3
4use crate::{ConjunctionData, NounData, Number, PronounData, Punctuation, Quote, WordMetadata};
5
6#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
7#[serde(tag = "kind", content = "value")]
8pub enum TokenKind {
9 Word(Option<WordMetadata>),
11 Punctuation(Punctuation),
12 Decade,
13 Number(Number),
14 Space(usize),
16 Newline(usize),
18 EmailAddress,
19 Url,
20 Hostname,
21 #[default]
24 Unlintable,
25 ParagraphBreak,
26 Regexish,
27}
28
29impl TokenKind {
30 pub fn is_open_square(&self) -> bool {
31 matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare))
32 }
33
34 pub fn is_close_square(&self) -> bool {
35 matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare))
36 }
37
38 pub fn is_pipe(&self) -> bool {
39 matches!(self, TokenKind::Punctuation(Punctuation::Pipe))
40 }
41
42 pub fn is_word_like(&self) -> bool {
45 matches!(
46 self,
47 TokenKind::Word(..)
48 | TokenKind::EmailAddress
49 | TokenKind::Hostname
50 | TokenKind::Decade
51 | TokenKind::Number(..)
52 )
53 }
54
55 pub fn is_possessive_nominal(&self) -> bool {
56 matches!(
57 self,
58 TokenKind::Word(Some(WordMetadata {
59 noun: Some(NounData {
60 is_possessive: Some(true),
61 ..
62 }),
63 ..
64 })) | TokenKind::Word(Some(WordMetadata {
65 pronoun: Some(PronounData {
66 is_possessive: Some(true),
67 ..
68 }),
69 ..
70 }))
71 )
72 }
73
74 pub fn is_possessive_noun(&self) -> bool {
75 matches!(
76 self,
77 TokenKind::Word(Some(WordMetadata {
78 noun: Some(NounData {
79 is_possessive: Some(true),
80 ..
81 }),
82 ..
83 }))
84 )
85 }
86
87 pub fn is_possessive_pronoun(&self) -> bool {
88 matches!(
89 self,
90 TokenKind::Word(Some(WordMetadata {
91 pronoun: Some(PronounData {
92 is_possessive: Some(true),
93 ..
94 }),
95 ..
96 }))
97 )
98 }
99
100 pub fn is_proper_noun(&self) -> bool {
101 matches!(
102 self,
103 TokenKind::Word(Some(WordMetadata {
104 noun: Some(NounData {
105 is_proper: Some(true),
106 ..
107 }),
108 ..
109 }))
110 )
111 }
112
113 pub fn is_conjunction(&self) -> bool {
114 matches!(
115 self,
116 TokenKind::Word(Some(WordMetadata {
117 conjunction: Some(ConjunctionData {}),
118 ..
119 }))
120 )
121 }
122
123 pub(crate) fn is_chunk_terminator(&self) -> bool {
124 if self.is_sentence_terminator() {
125 return true;
126 }
127
128 match self {
129 TokenKind::Punctuation(punct) => {
130 matches!(
131 punct,
132 Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon
133 )
134 }
135 _ => false,
136 }
137 }
138
139 pub(crate) fn is_sentence_terminator(&self) -> bool {
140 match self {
141 TokenKind::Punctuation(punct) => [
142 Punctuation::Period,
143 Punctuation::Bang,
144 Punctuation::Question,
145 ]
146 .contains(punct),
147 TokenKind::ParagraphBreak => true,
148 _ => false,
149 }
150 }
151
152 pub fn is_currency(&self) -> bool {
153 matches!(self, TokenKind::Punctuation(Punctuation::Currency(..)))
154 }
155
156 pub fn is_preposition(&self) -> bool {
157 matches!(
158 self,
159 TokenKind::Word(Some(WordMetadata {
160 preposition: true,
161 ..
162 }))
163 )
164 }
165
166 pub fn is_determiner(&self) -> bool {
167 matches!(
168 self,
169 TokenKind::Word(Some(WordMetadata {
170 determiner: true,
171 ..
172 }))
173 )
174 }
175
176 pub fn is_ellipsis(&self) -> bool {
177 matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis))
178 }
179
180 pub fn is_hyphen(&self) -> bool {
181 matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
182 }
183
184 pub fn is_adjective(&self) -> bool {
185 matches!(
186 self,
187 TokenKind::Word(Some(WordMetadata {
188 adjective: Some(_),
189 ..
190 }))
191 )
192 }
193
194 pub fn is_adverb(&self) -> bool {
195 matches!(
196 self,
197 TokenKind::Word(Some(WordMetadata {
198 adverb: Some(_),
199 ..
200 }))
201 )
202 }
203
204 pub fn is_swear(&self) -> bool {
205 matches!(
206 self,
207 TokenKind::Word(Some(WordMetadata {
208 swear: Some(true),
209 ..
210 }))
211 )
212 }
213
214 pub fn matches_variant_of(&self, other: &Self) -> bool {
217 self.with_default_data() == other.with_default_data()
218 }
219
220 pub fn with_default_data(&self) -> Self {
224 match self {
225 TokenKind::Word(_) => TokenKind::Word(Default::default()),
226 TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()),
227 TokenKind::Number(..) => TokenKind::Number(Default::default()),
228 TokenKind::Space(_) => TokenKind::Space(Default::default()),
229 TokenKind::Newline(_) => TokenKind::Newline(Default::default()),
230 _ => self.clone(),
231 }
232 }
233}
234
235impl TokenKind {
236 pub fn blank_word() -> Self {
238 Self::Word(None)
239 }
240}
241
242impl TokenKind {
243 pub fn as_mut_quote(&mut self) -> Option<&mut Quote> {
244 self.as_mut_punctuation()?.as_mut_quote()
245 }
246
247 pub fn as_quote(&self) -> Option<&Quote> {
248 self.as_punctuation()?.as_quote()
249 }
250
251 pub fn is_quote(&self) -> bool {
252 matches!(self, TokenKind::Punctuation(Punctuation::Quote(_)))
253 }
254
255 pub fn is_apostrophe(&self) -> bool {
256 matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
257 }
258
259 pub fn is_period(&self) -> bool {
260 matches!(self, TokenKind::Punctuation(Punctuation::Period))
261 }
262
263 pub fn is_at(&self) -> bool {
264 matches!(self, TokenKind::Punctuation(Punctuation::At))
265 }
266
267 pub fn is_case_separator(&self) -> bool {
271 matches!(self, TokenKind::Punctuation(Punctuation::Underscore))
272 || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen))
273 }
274
275 pub fn is_verb(&self) -> bool {
276 let TokenKind::Word(Some(metadata)) = self else {
277 return false;
278 };
279
280 metadata.is_verb()
281 }
282
283 pub fn is_auxiliary_verb(&self) -> bool {
284 let TokenKind::Word(Some(metadata)) = self else {
285 return false;
286 };
287
288 metadata.is_auxiliary_verb()
289 }
290
291 pub fn is_linking_verb(&self) -> bool {
292 let TokenKind::Word(Some(metadata)) = self else {
293 return false;
294 };
295
296 metadata.is_linking_verb()
297 }
298
299 pub fn is_not_plural_nominal(&self) -> bool {
300 let TokenKind::Word(Some(metadata)) = self else {
301 return true;
302 };
303
304 metadata.is_not_plural_noun() || metadata.is_not_plural_pronoun()
305 }
306
307 pub fn is_not_plural_noun(&self) -> bool {
308 let TokenKind::Word(Some(metadata)) = self else {
309 return true;
310 };
311
312 metadata.is_not_plural_noun()
313 }
314
315 pub fn is_not_plural_pronoun(&self) -> bool {
316 let TokenKind::Word(Some(metadata)) = self else {
317 return true;
318 };
319
320 metadata.is_not_plural_pronoun()
321 }
322
323 pub fn is_common_word(&self) -> bool {
324 let TokenKind::Word(Some(metadata)) = self else {
325 return true;
326 };
327
328 metadata.common
329 }
330
331 pub fn is_plural_nominal(&self) -> bool {
332 let TokenKind::Word(Some(metadata)) = self else {
333 return false;
334 };
335
336 metadata.is_plural_noun() || metadata.is_plural_pronoun()
337 }
338
339 pub fn is_plural_pronoun(&self) -> bool {
340 let TokenKind::Word(Some(metadata)) = self else {
341 return false;
342 };
343
344 metadata.is_plural_pronoun()
345 }
346
347 pub fn is_plural_noun(&self) -> bool {
348 let TokenKind::Word(Some(metadata)) = self else {
349 return false;
350 };
351
352 metadata.is_plural_noun()
353 }
354
355 pub fn is_nominal(&self) -> bool {
356 let TokenKind::Word(Some(metadata)) = self else {
357 return false;
358 };
359
360 metadata.is_noun() || metadata.is_pronoun()
361 }
362
363 pub fn is_noun(&self) -> bool {
364 let TokenKind::Word(Some(metadata)) = self else {
365 return false;
366 };
367
368 metadata.is_noun()
369 }
370
371 pub fn is_pronoun(&self) -> bool {
372 let TokenKind::Word(Some(metadata)) = self else {
373 return false;
374 };
375
376 metadata.is_pronoun()
377 }
378
379 pub fn is_likely_homograph(&self) -> bool {
380 let TokenKind::Word(Some(metadata)) = self else {
381 return false;
382 };
383
384 metadata.is_likely_homograph()
385 }
386
387 pub fn is_comma(&self) -> bool {
388 matches!(self, TokenKind::Punctuation(Punctuation::Comma))
389 }
390
391 pub fn is_whitespace(&self) -> bool {
393 matches!(self, TokenKind::Space(_) | TokenKind::Newline(_))
394 }
395}