1use harper_brill::UPOS;
2use is_macro::Is;
3use paste::paste;
4use serde::{Deserialize, Serialize};
5use strum_macros::{Display, EnumString};
6
7use std::convert::TryFrom;
8
9use crate::WordId;
10
11#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
12pub struct WordMetadata {
13 pub noun: Option<NounData>,
14 pub pronoun: Option<PronounData>,
15 pub verb: Option<VerbData>,
16 pub adjective: Option<AdjectiveData>,
17 pub adverb: Option<AdverbData>,
18 pub conjunction: Option<ConjunctionData>,
19 pub swear: Option<bool>,
20 #[serde(default = "default_default")]
24 pub dialects: DialectFlags,
25 pub determiner: Option<DeterminerData>,
27 #[serde(default = "default_false")]
29 pub preposition: bool,
30 #[serde(default = "default_false")]
32 pub common: bool,
33 #[serde(default = "default_none")]
34 pub derived_from: Option<WordId>,
35 pub np_member: Option<bool>,
37 pub pos_tag: Option<UPOS>,
39}
40
41fn default_false() -> bool {
43 false
44}
45
46fn default_none<T>() -> Option<T> {
48 None
49}
50
51fn default_default<T: Default>() -> T {
53 T::default()
54}
55
56macro_rules! generate_metadata_queries {
57 ($($category:ident has $($sub:ident),*).*) => {
58 paste! {
59 pub fn is_likely_homograph(&self) -> bool {
60 [self.is_determiner(), self.preposition, $(
61 self.[< is_ $category >](),
62 )*].iter().map(|b| *b as u8).sum::<u8>() > 1
63 }
64
65 $(
66 #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
67 pub fn [< is_ $category >](&self) -> bool {
68 self.$category.is_some()
69 }
70
71 $(
72 #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
73 pub fn [< is_ $sub _ $category >](&self) -> bool {
74 matches!(
75 self.$category,
76 Some([< $category:camel Data >]{
77 [< is_ $sub >]: Some(true),
78 ..
79 })
80 )
81 }
82
83
84 #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
85 pub fn [< is_not_ $sub _ $category >](&self) -> bool {
86 matches!(
87 self.$category,
88 Some([< $category:camel Data >]{
89 [< is_ $sub >]: Some(false),
90 ..
91 })
92 )
93 }
94 )*
95 )*
96 }
97 };
98}
99
100impl WordMetadata {
101 pub fn or(&self, other: &Self) -> Self {
103 macro_rules! merge {
104 ($a:expr, $b:expr) => {
105 match ($a, $b) {
106 (Some(a), Some(b)) => Some(a.or(&b)),
107 (Some(a), None) => Some(a),
108 (None, Some(b)) => Some(b),
109 (None, None) => None,
110 }
111 };
112 }
113
114 Self {
115 noun: merge!(self.noun, other.noun),
116 pronoun: merge!(self.pronoun, other.pronoun),
117 verb: merge!(self.verb, other.verb),
118 adjective: merge!(self.adjective, other.adjective),
119 adverb: merge!(self.adverb, other.adverb),
120 conjunction: merge!(self.conjunction, other.conjunction),
121 dialects: self.dialects | other.dialects,
122 swear: self.swear.or(other.swear),
123 determiner: merge!(self.determiner, other.determiner),
124 preposition: self.preposition || other.preposition,
125 common: self.common || other.common,
126 derived_from: self.derived_from.or(other.derived_from),
127 pos_tag: self.pos_tag.or(other.pos_tag),
128 np_member: self.np_member.or(other.np_member),
129 }
130 }
131
132 pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
140 use UPOS::*;
141 match pos {
142 NOUN => {
143 if let Some(noun) = self.noun {
144 self.noun = Some(NounData {
145 is_proper: Some(false),
146 ..noun
147 })
148 } else {
149 self.noun = Some(NounData {
150 is_proper: Some(false),
151 is_plural: None,
152 is_possessive: None,
153 })
154 }
155
156 self.pronoun = None;
157 self.verb = None;
158 self.adjective = None;
159 self.adverb = None;
160 self.conjunction = None;
161 self.determiner = None;
162 self.preposition = false;
163 }
164 PROPN => {
165 if let Some(noun) = self.noun {
166 self.noun = Some(NounData {
167 is_proper: Some(true),
168 ..noun
169 })
170 } else {
171 self.noun = Some(NounData {
172 is_proper: Some(true),
173 is_plural: None,
174 is_possessive: None,
175 })
176 }
177
178 self.pronoun = None;
179 self.verb = None;
180 self.adjective = None;
181 self.adverb = None;
182 self.conjunction = None;
183 self.determiner = None;
184 self.preposition = false;
185 }
186 PRON => {
187 if self.pronoun.is_none() {
188 self.pronoun = Some(PronounData::default())
189 }
190
191 self.noun = None;
192 self.verb = None;
193 self.adjective = None;
194 self.adverb = None;
195 self.conjunction = None;
196 self.determiner = None;
197 self.preposition = false;
198 }
199 VERB => {
200 if let Some(verb) = self.verb {
201 self.verb = Some(VerbData {
202 is_auxiliary: Some(false),
203 ..verb
204 })
205 } else {
206 self.verb = Some(VerbData {
207 is_auxiliary: Some(false),
208 ..Default::default()
209 })
210 }
211
212 self.noun = None;
213 self.pronoun = None;
214 self.adjective = None;
215 self.adverb = None;
216 self.conjunction = None;
217 self.determiner = None;
218 self.preposition = false;
219 }
220 AUX => {
221 if let Some(verb) = self.verb {
222 self.verb = Some(VerbData {
223 is_auxiliary: Some(true),
224 ..verb
225 })
226 } else {
227 self.verb = Some(VerbData {
228 is_auxiliary: Some(true),
229 ..Default::default()
230 })
231 }
232
233 self.noun = None;
234 self.pronoun = None;
235 self.adjective = None;
236 self.adverb = None;
237 self.conjunction = None;
238 self.determiner = None;
239 self.preposition = false;
240 }
241 ADJ => {
242 if self.adjective.is_none() {
243 self.adjective = Some(AdjectiveData::default())
244 }
245
246 self.noun = None;
247 self.pronoun = None;
248 self.verb = None;
249 self.adverb = None;
250 self.conjunction = None;
251 self.determiner = None;
252 self.preposition = false;
253 }
254 ADV => {
255 if self.adverb.is_none() {
256 self.adverb = Some(AdverbData::default())
257 }
258
259 self.noun = None;
260 self.pronoun = None;
261 self.verb = None;
262 self.adjective = None;
263 self.conjunction = None;
264 self.determiner = None;
265 self.preposition = false;
266 }
267 ADP => {
268 self.noun = None;
269 self.pronoun = None;
270 self.verb = None;
271 self.adjective = None;
272 self.adverb = None;
273 self.conjunction = None;
274 self.determiner = None;
275 self.preposition = true;
276 }
277 DET => {
278 self.noun = None;
279 self.pronoun = None;
280 self.verb = None;
281 self.adjective = None;
282 self.adverb = None;
283 self.conjunction = None;
284 self.preposition = false;
285 self.determiner = Some(DeterminerData::default());
286 }
287 CCONJ | SCONJ => {
288 if self.conjunction.is_none() {
289 self.conjunction = Some(ConjunctionData::default())
290 }
291
292 self.noun = None;
293 self.pronoun = None;
294 self.verb = None;
295 self.adjective = None;
296 self.adverb = None;
297 self.determiner = None;
298 self.preposition = false;
299 }
300 _ => {}
301 }
302 }
303
304 generate_metadata_queries!(
305 noun has proper, plural, possessive.
306 pronoun has plural, possessive, reflexive.
307 determiner has demonstrative, possessive.
308 verb has linking, auxiliary.
309 conjunction has.
310 adjective has.
311 adverb has
312 );
313
314 pub fn is_verb_lemma(&self) -> bool {
315 matches!(
316 self.verb,
317 Some(VerbData {
318 verb_form: Some(VerbForm::LemmaForm),
319 ..
320 })
321 )
322 }
323
324 pub fn is_verb_past_form(&self) -> bool {
325 matches!(
326 self.verb,
327 Some(VerbData {
328 verb_form: Some(VerbForm::PastForm),
329 ..
330 })
331 )
332 }
333
334 pub fn is_verb_progressive_form(&self) -> bool {
335 matches!(
336 self.verb,
337 Some(VerbData {
338 verb_form: Some(VerbForm::ProgressiveForm),
339 ..
340 })
341 )
342 }
343
344 pub fn is_verb_third_person_singular_present_form(&self) -> bool {
345 matches!(
346 self.verb,
347 Some(VerbData {
348 verb_form: Some(VerbForm::ThirdPersonSingularPresentForm),
349 ..
350 })
351 )
352 }
353
354 pub fn is_nominal(&self) -> bool {
356 self.noun.is_some() || self.pronoun.is_some()
357 }
358
359 pub fn is_plural_nominal(&self) -> bool {
361 matches!(
362 self.noun,
363 Some(NounData {
364 is_plural: Some(true),
365 ..
366 })
367 ) || matches!(
368 self.pronoun,
369 Some(PronounData {
370 is_plural: Some(true),
371 ..
372 })
373 )
374 }
375
376 pub fn is_possessive_nominal(&self) -> bool {
378 matches!(
379 self.noun,
380 Some(NounData {
381 is_possessive: Some(true),
382 ..
383 })
384 ) || matches!(
385 self.pronoun,
386 Some(PronounData {
387 is_possessive: Some(true),
388 ..
389 })
390 )
391 }
392
393 pub fn is_not_plural_nominal(&self) -> bool {
395 matches!(
396 self.noun,
397 Some(NounData {
398 is_plural: Some(false),
399 ..
400 })
401 ) || matches!(
402 self.pronoun,
403 Some(PronounData {
404 is_plural: Some(false),
405 ..
406 })
407 )
408 }
409
410 pub fn is_not_possessive_nominal(&self) -> bool {
412 matches!(
413 self.noun,
414 Some(NounData {
415 is_possessive: Some(false),
416 ..
417 })
418 ) && matches!(
419 self.pronoun,
420 Some(PronounData {
421 is_possessive: Some(false),
422 ..
423 })
424 )
425 }
426
427 pub fn is_swear(&self) -> bool {
429 matches!(self.swear, Some(true))
430 }
431
432 pub fn append(&mut self, other: &Self) -> &mut Self {
434 *self = self.or(other);
435 self
436 }
437}
438
439#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
455pub enum VerbForm {
456 LemmaForm,
457 PastForm,
458 ProgressiveForm,
459 ThirdPersonSingularPresentForm,
460}
461
462#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
463pub struct VerbData {
464 pub is_linking: Option<bool>,
465 pub is_auxiliary: Option<bool>,
466 pub verb_form: Option<VerbForm>,
467}
468
469impl VerbData {
470 pub fn or(&self, other: &Self) -> Self {
472 Self {
473 is_linking: self.is_linking.or(other.is_linking),
474 is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
475 verb_form: self.verb_form.or(other.verb_form),
476 }
477 }
478}
479
480#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
483pub struct NounData {
484 pub is_proper: Option<bool>,
485 pub is_plural: Option<bool>,
486 pub is_possessive: Option<bool>,
487}
488
489impl NounData {
490 pub fn or(&self, other: &Self) -> Self {
492 Self {
493 is_proper: self.is_proper.or(other.is_proper),
494 is_plural: self.is_plural.or(other.is_plural),
495 is_possessive: self.is_possessive.or(other.is_possessive),
496 }
497 }
498}
499
500#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
502pub enum Person {
503 First,
504 Second,
505 Third,
506}
507
508#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
510pub enum Case {
511 Subject,
512 Object,
513}
514
515#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
517pub struct PronounData {
518 pub is_plural: Option<bool>,
519 pub is_possessive: Option<bool>,
520 pub is_reflexive: Option<bool>,
521 pub person: Option<Person>,
522 pub case: Option<Case>,
523}
524
525impl PronounData {
526 pub fn or(&self, other: &Self) -> Self {
528 Self {
529 is_plural: self.is_plural.or(other.is_plural),
530 is_possessive: self.is_possessive.or(other.is_possessive),
531 is_reflexive: self.is_reflexive.or(other.is_reflexive),
532 person: self.person.or(other.person),
533 case: self.case.or(other.case),
534 }
535 }
536}
537
538#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
539pub struct DeterminerData {
540 pub is_demonstrative: Option<bool>,
541 pub is_possessive: Option<bool>,
542}
543
544impl DeterminerData {
545 pub fn or(&self, other: &Self) -> Self {
547 Self {
548 is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
549 is_possessive: self.is_possessive.or(other.is_possessive),
550 }
551 }
552}
553
554#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
558pub enum Degree {
559 Positive,
560 Comparative,
561 Superlative,
562}
563
564#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
568pub struct AdjectiveData {
569 pub degree: Option<Degree>,
570}
571
572impl AdjectiveData {
573 pub fn or(&self, other: &Self) -> Self {
575 Self {
576 degree: self.degree.or(other.degree),
577 }
578 }
579}
580
581#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
585pub struct AdverbData {}
586
587impl AdverbData {
588 pub fn or(&self, _other: &Self) -> Self {
590 Self {}
591 }
592}
593
594#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
595pub struct ConjunctionData {}
596
597impl ConjunctionData {
598 pub fn or(&self, _other: &Self) -> Self {
600 Self {}
601 }
602}
603
604#[derive(
606 Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, EnumString, Display,
607)]
608pub enum Dialect {
609 American = 1 << 0,
613 Canadian = 1 << 1,
614 Australian = 1 << 2,
615 British = 1 << 3,
616}
617impl TryFrom<DialectFlags> for Dialect {
618 type Error = ();
619
620 fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
627 if dialect_flags.bits().count_ones() == 1 {
629 match dialect_flags {
630 df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
631 df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
632 df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
633 df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
634 _ => Err(()),
635 }
636 } else {
637 Err(())
639 }
640 }
641}
642
643type DialectFlagsUnderlyingType = u8;
647
648bitflags::bitflags! {
649 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
653 #[serde(transparent)]
654 pub struct DialectFlags: DialectFlagsUnderlyingType {
655 const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
656 const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
657 const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
658 const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
659 }
660}
661impl DialectFlags {
662 #[must_use]
665 pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
666 self.is_empty() || self.intersects(Self::from_dialect(dialect))
667 }
668
669 #[must_use]
674 pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
675 self.intersects(Self::from_dialect(dialect))
676 }
677
678 #[must_use]
686 pub fn from_dialect(dialect: Dialect) -> Self {
687 let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
688 panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
689 };
690 out
691 }
692}
693impl Default for DialectFlags {
694 fn default() -> Self {
697 Self::empty()
698 }
699}