Skip to main content

harper_core/
dict_word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use smallvec::SmallVec;
7use strum::{EnumCount as _, VariantArray as _};
8use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
9
10use std::convert::TryFrom;
11
12use crate::dict_word_metadata_orthography::OrthFlags;
13use crate::spell::WordId;
14use crate::{Document, TokenKind, TokenStringExt};
15
16/// This represents a "lexeme" or "headword" which is case-folded but affix-expanded.
17/// So not only lemmata but also inflected forms are stored here, with "horn" and "horns" each
18/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
19#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
20pub struct DictWordMetadata {
21    /// The main parts of speech which have extra data.
22    pub noun: Option<NounData>,
23    pub pronoun: Option<PronounData>,
24    pub verb: Option<VerbData>,
25    pub adjective: Option<AdjectiveData>,
26    pub adverb: Option<AdverbData>,
27    pub conjunction: Option<ConjunctionData>,
28    pub determiner: Option<DeterminerData>,
29    pub affix: Option<AffixData>,
30    /// Parts of speech which don't have extra data.
31    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
32    #[serde(default = "default_false")]
33    pub preposition: bool,
34    /// Whether the word is an offensive word.
35    pub swear: Option<bool>,
36    /// The dialects this word belongs to.
37    /// If no dialects are defined, it can be assumed that the word is
38    /// valid in all dialects of English.
39    #[serde(default = "default_default")]
40    pub dialects: DialectFlags,
41    /// Orthographic information: letter case, spaces, hyphens, etc.
42    #[serde(default = "OrthFlags::empty")]
43    pub orth_info: OrthFlags,
44    /// Whether the word is considered especially common.
45    #[serde(default = "default_false")]
46    pub common: bool,
47    #[serde(default = "default_none")]
48    pub derived_from: Option<WordId>,
49    /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
50    /// this should be preferred over the similarly named `Pattern`.
51    ///
52    /// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
53    pub np_member: Option<bool>,
54    /// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
55    pub pos_tag: Option<UPOS>,
56}
57
58/// Needed for `serde`
59fn default_false() -> bool {
60    false
61}
62
63/// Needed for `serde`
64fn default_none<T>() -> Option<T> {
65    None
66}
67
68/// Needed for `serde`
69fn default_default<T: Default>() -> T {
70    T::default()
71}
72
73macro_rules! generate_metadata_queries {
74    ($($category:ident has $($sub:ident),*).*) => {
75        paste! {
76            pub fn is_likely_homograph(&self) -> bool {
77                [self.is_determiner(), self.preposition, $(
78                    self.[< is_ $category >](),
79                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
80            }
81
82            /// How different is this word from another?
83            pub fn difference(&self, other: &Self) -> u32 {
84                [
85                    $(
86                        Self::[< is_ $category >],
87                        $(
88                            Self::[< is_ $sub _ $category >],
89                            Self::[< is_non_ $sub _ $category >],
90                        )*
91                    )*
92                ]
93                .iter()
94                .fold(0, |acc, func| acc + (func(self) ^ func(other)) as u32)
95            }
96
97            $(
98                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
99                pub fn [< is_ $category >](&self) -> bool {
100                    self.$category.is_some()
101                }
102
103                $(
104                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
105                    pub fn [< is_ $sub _ $category >](&self) -> bool {
106                        matches!(
107                            self.$category,
108                            Some([< $category:camel Data >]{
109                                [< is_ $sub >]: Some(true),
110                                ..
111                            })
112                        ) }
113
114                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
115                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
116                        matches!(
117                            self.$category,
118                            Some([< $category:camel Data >]{
119                                [< is_ $sub >]: None | Some(false),
120                                ..
121                            })
122                        )
123                    }
124                )*
125            )*
126        }
127    };
128}
129
130impl DictWordMetadata {
131    /// If there is only one possible interpretation of the metadata, infer its UPOS tag.
132    pub fn infer_pos_tag(&self) -> Option<UPOS> {
133        // If an explicit POS tag exists, return it immediately.
134        if let Some(pos) = self.pos_tag {
135            return Some(pos);
136        }
137
138        // Collect all possible POS tags from metadata
139        let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
140
141        if self.is_proper_noun() {
142            candidates.push(UPOS::PROPN);
143        }
144
145        if self.is_pronoun() {
146            candidates.push(UPOS::PRON);
147        }
148        if self.is_noun() {
149            candidates.push(UPOS::NOUN);
150        }
151        if self.is_verb() {
152            // Distinguish auxiliary verbs
153            if let Some(data) = &self.verb {
154                if data.is_auxiliary == Some(true) {
155                    candidates.push(UPOS::AUX);
156                } else {
157                    candidates.push(UPOS::VERB);
158                }
159            } else {
160                candidates.push(UPOS::VERB);
161            }
162        }
163        if self.is_adjective() {
164            candidates.push(UPOS::ADJ);
165        }
166        if self.is_adverb() {
167            candidates.push(UPOS::ADV);
168        }
169        if self.is_conjunction() {
170            candidates.push(UPOS::CCONJ);
171        }
172        if self.is_determiner() {
173            candidates.push(UPOS::DET);
174        }
175        if self.preposition {
176            candidates.push(UPOS::ADP);
177        }
178
179        // Remove duplicates
180        candidates.sort();
181        candidates.dedup();
182
183        candidates.into_iter().exactly_one().ok()
184    }
185
186    /// Produce a copy of `self` with the known properties of `other` set.
187    pub fn or(&self, other: &Self) -> Self {
188        let mut clone = self.clone();
189        clone.merge(other);
190        clone
191    }
192
193    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
194    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
195    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
196    ///
197    /// Additionally, if the metadata does not currently declare the potential of the word to be
198    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
199    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
200    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
201        use UPOS::*;
202        match pos {
203            NOUN => {
204                if let Some(noun) = self.noun {
205                    self.noun = Some(NounData {
206                        is_proper: Some(false),
207                        ..noun
208                    })
209                } else {
210                    self.noun = Some(NounData {
211                        is_proper: Some(false),
212                        is_singular: None,
213                        is_plural: None,
214                        is_countable: None,
215                        is_mass: None,
216                        is_possessive: None,
217                    })
218                }
219
220                self.pronoun = None;
221                self.verb = None;
222                self.adjective = None;
223                self.adverb = None;
224                self.conjunction = None;
225                self.determiner = None;
226                self.affix = None;
227                self.preposition = false;
228            }
229            PROPN => {
230                if let Some(noun) = self.noun {
231                    self.noun = Some(NounData {
232                        is_proper: Some(true),
233                        ..noun
234                    })
235                } else {
236                    self.noun = Some(NounData {
237                        is_proper: Some(true),
238                        is_singular: None,
239                        is_plural: None,
240                        is_countable: None,
241                        is_mass: None,
242                        is_possessive: None,
243                    })
244                }
245
246                self.pronoun = None;
247                self.verb = None;
248                self.adjective = None;
249                self.adverb = None;
250                self.conjunction = None;
251                self.determiner = None;
252                self.affix = None;
253                self.preposition = false;
254            }
255            PRON => {
256                if self.pronoun.is_none() {
257                    self.pronoun = Some(PronounData::default())
258                }
259
260                self.noun = None;
261                self.verb = None;
262                self.adjective = None;
263                self.adverb = None;
264                self.conjunction = None;
265                self.determiner = None;
266                self.affix = None;
267                self.preposition = false;
268            }
269            VERB => {
270                if let Some(verb) = self.verb {
271                    self.verb = Some(VerbData {
272                        is_auxiliary: Some(false),
273                        ..verb
274                    })
275                } else {
276                    self.verb = Some(VerbData {
277                        is_auxiliary: Some(false),
278                        ..Default::default()
279                    })
280                }
281
282                self.noun = None;
283                self.pronoun = None;
284                self.adjective = None;
285                self.adverb = None;
286                self.conjunction = None;
287                self.determiner = None;
288                self.affix = None;
289                self.preposition = false;
290            }
291            AUX => {
292                if let Some(verb) = self.verb {
293                    self.verb = Some(VerbData {
294                        is_auxiliary: Some(true),
295                        ..verb
296                    })
297                } else {
298                    self.verb = Some(VerbData {
299                        is_auxiliary: Some(true),
300                        ..Default::default()
301                    })
302                }
303
304                self.noun = None;
305                self.pronoun = None;
306                self.adjective = None;
307                self.adverb = None;
308                self.conjunction = None;
309                self.determiner = None;
310                self.affix = None;
311                self.preposition = false;
312            }
313            ADJ => {
314                if self.adjective.is_none() {
315                    self.adjective = Some(AdjectiveData::default())
316                }
317
318                self.noun = None;
319                self.pronoun = None;
320                self.verb = None;
321                self.adverb = None;
322                self.conjunction = None;
323                self.determiner = None;
324                self.affix = None;
325                self.preposition = false;
326            }
327            ADV => {
328                if self.adverb.is_none() {
329                    self.adverb = Some(AdverbData::default())
330                }
331
332                self.noun = None;
333                self.pronoun = None;
334                self.verb = None;
335                self.adjective = None;
336                self.conjunction = None;
337                self.determiner = None;
338                self.affix = None;
339                self.preposition = false;
340            }
341            ADP => {
342                self.noun = None;
343                self.pronoun = None;
344                self.verb = None;
345                self.adjective = None;
346                self.adverb = None;
347                self.conjunction = None;
348                self.determiner = None;
349                self.affix = None;
350                self.preposition = true;
351            }
352            DET => {
353                self.noun = None;
354                self.pronoun = None;
355                self.verb = None;
356                self.adjective = None;
357                self.adverb = None;
358                self.conjunction = None;
359                self.affix = None;
360                self.preposition = false;
361                self.determiner = Some(DeterminerData::default());
362            }
363            CCONJ | SCONJ => {
364                if self.conjunction.is_none() {
365                    self.conjunction = Some(ConjunctionData::default())
366                }
367
368                self.noun = None;
369                self.pronoun = None;
370                self.verb = None;
371                self.adjective = None;
372                self.adverb = None;
373                self.determiner = None;
374                self.affix = None;
375                self.preposition = false;
376            }
377            _ => {}
378        }
379    }
380
381    generate_metadata_queries!(
382        // Singular and countable default to true, so their metadata queries are not generated.
383        noun has proper, plural, mass, possessive.
384        pronoun has personal, singular, plural, possessive, reflexive, subject, object.
385        determiner has demonstrative, possessive, quantifier.
386        verb has linking, auxiliary.
387        conjunction has.
388        adjective has.
389        adverb has manner, frequency, degree
390    );
391
392    // Manual metadata queries
393
394    // Pronoun metadata queries
395
396    pub fn get_person(&self) -> Option<Person> {
397        self.pronoun.as_ref().and_then(|p| p.person)
398    }
399
400    pub fn is_first_person_plural_pronoun(&self) -> bool {
401        matches!(
402            self.pronoun,
403            Some(PronounData {
404                person: Some(Person::First),
405                is_plural: Some(true),
406                ..
407            })
408        )
409    }
410
411    pub fn is_first_person_singular_pronoun(&self) -> bool {
412        matches!(
413            self.pronoun,
414            Some(PronounData {
415                person: Some(Person::First),
416                is_singular: Some(true),
417                ..
418            })
419        )
420    }
421
422    pub fn is_third_person_plural_pronoun(&self) -> bool {
423        matches!(
424            self.pronoun,
425            Some(PronounData {
426                person: Some(Person::Third),
427                is_plural: Some(true),
428                ..
429            })
430        )
431    }
432
433    pub fn is_third_person_singular_pronoun(&self) -> bool {
434        matches!(
435            self.pronoun,
436            Some(PronounData {
437                person: Some(Person::Third),
438                is_singular: Some(true),
439                ..
440            })
441        )
442    }
443
444    pub fn is_third_person_pronoun(&self) -> bool {
445        matches!(
446            self.pronoun,
447            Some(PronounData {
448                person: Some(Person::Third),
449                ..
450            })
451        )
452    }
453
454    pub fn is_second_person_pronoun(&self) -> bool {
455        matches!(
456            self.pronoun,
457            Some(PronounData {
458                person: Some(Person::Second),
459                ..
460            })
461        )
462    }
463
464    // Lemma is default if no verb form is specified in the dictionary
465    pub fn is_verb_lemma(&self) -> bool {
466        if let Some(verb) = self.verb {
467            if let Some(forms) = verb.verb_forms {
468                return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
469            } else {
470                return true;
471            }
472        }
473        false
474    }
475
476    pub fn is_verb_past_form(&self) -> bool {
477        self.verb.is_some_and(|v| {
478            v.verb_forms
479                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
480        })
481    }
482
483    pub fn is_verb_simple_past_form(&self) -> bool {
484        self.verb.is_some_and(|v| {
485            v.verb_forms
486                .is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
487        })
488    }
489
490    pub fn is_verb_past_participle_form(&self) -> bool {
491        self.verb.is_some_and(|v| {
492            v.verb_forms
493                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
494        })
495    }
496
497    pub fn is_verb_progressive_form(&self) -> bool {
498        self.verb.is_some_and(|v| {
499            v.verb_forms
500                .is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
501        })
502    }
503
504    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
505        self.verb.is_some_and(|v| {
506            v.verb_forms
507                .is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
508        })
509    }
510
511    // Noun metadata queries
512
513    // Singular is default if number is not marked in the dictionary.
514    pub fn is_singular_noun(&self) -> bool {
515        if let Some(noun) = self.noun {
516            matches!(
517                (noun.is_singular, noun.is_plural),
518                (Some(true), _) | (None | Some(false), None | Some(false))
519            )
520        } else {
521            false
522        }
523    }
524    pub fn is_non_singular_noun(&self) -> bool {
525        if let Some(noun) = self.noun {
526            !matches!(
527                (noun.is_singular, noun.is_plural),
528                (Some(true), _) | (None | Some(false), None | Some(false))
529            )
530        } else {
531            false
532        }
533    }
534
535    // Countable is default if countability is not marked in the dictionary.
536    pub fn is_countable_noun(&self) -> bool {
537        if let Some(noun) = self.noun {
538            matches!(
539                (noun.is_countable, noun.is_mass),
540                (Some(true), _) | (None | Some(false), None | Some(false))
541            )
542        } else {
543            false
544        }
545    }
546    pub fn is_non_countable_noun(&self) -> bool {
547        if let Some(noun) = self.noun {
548            !matches!(
549                (noun.is_countable, noun.is_mass),
550                (Some(true), _) | (None | Some(false), None | Some(false))
551            )
552        } else {
553            false
554        }
555    }
556
557    // Most mass nouns also have countable senses. Match those that are only mass nouns.
558    pub fn is_mass_noun_only(&self) -> bool {
559        if let Some(noun) = self.noun {
560            matches!(
561                (noun.is_countable, noun.is_mass),
562                (None | Some(false), Some(true))
563            )
564        } else {
565            false
566        }
567    }
568
569    // Nominal metadata queries (noun + pronoun)
570
571    /// Checks if the word is definitely nominal.
572    pub fn is_nominal(&self) -> bool {
573        self.is_noun() || self.is_pronoun()
574    }
575
576    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
577    pub fn is_singular_nominal(&self) -> bool {
578        self.is_singular_noun() || self.is_singular_pronoun()
579    }
580
581    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
582    pub fn is_plural_nominal(&self) -> bool {
583        self.is_plural_noun() || self.is_plural_pronoun()
584    }
585
586    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
587    /// NOTE: `possessive pronoun`s are not qualifiers, but words like `mine`, `yours`, etc.
588    /// The terminology of `possessive noun`, `possessive pronoun` and `possessive determiner` only
589    /// tends to reinforce this confusion.
590    pub fn is_possessive_nominal(&self) -> bool {
591        self.is_possessive_noun() || self.is_possessive_determiner()
592    }
593
594    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
595    pub fn is_non_singular_nominal(&self) -> bool {
596        self.is_non_singular_noun() || self.is_non_singular_pronoun()
597    }
598
599    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
600    pub fn is_non_plural_nominal(&self) -> bool {
601        self.is_non_plural_noun() || self.is_non_plural_pronoun()
602    }
603
604    // Adjective metadata queries
605
606    pub fn get_degree(&self) -> Option<Degree> {
607        self.adjective.as_ref().and_then(|a| a.degree)
608    }
609
610    pub fn is_comparative_adjective(&self) -> bool {
611        matches!(
612            self.adjective,
613            Some(AdjectiveData {
614                degree: Some(Degree::Comparative)
615            })
616        )
617    }
618
619    pub fn is_superlative_adjective(&self) -> bool {
620        matches!(
621            self.adjective,
622            Some(AdjectiveData {
623                degree: Some(Degree::Superlative)
624            })
625        )
626    }
627
628    // Degree::Positive is the default if degree is not marked in the dictionary.
629    pub fn is_positive_adjective(&self) -> bool {
630        match self.adjective {
631            Some(AdjectiveData {
632                degree: Some(Degree::Positive),
633            }) => true,
634            Some(AdjectiveData { degree: None }) => true,
635            Some(AdjectiveData {
636                degree: Some(degree),
637            }) => !matches!(degree, Degree::Comparative | Degree::Superlative),
638            _ => false,
639        }
640    }
641
642    // Determiner metadata queries
643
644    // Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
645    pub fn is_quantifier(&self) -> bool {
646        self.is_quantifier_determiner()
647    }
648
649    // Non-POS queries
650
651    /// Checks whether a word is _definitely_ a swear.
652    pub fn is_swear(&self) -> bool {
653        matches!(self.swear, Some(true))
654    }
655
656    // Orthographic queries
657
658    /// Does the metadata for this word cover an all-lowercase variant? (e.g., "hello")
659    ///
660    /// This returns true if all letters in the word are lowercase. Words containing
661    /// non-letter characters (like numbers or symbols) are only considered if all
662    /// letter characters are lowercase.
663    pub fn is_lowercase(&self) -> bool {
664        self.orth_info.contains(OrthFlags::LOWERCASE)
665    }
666    /// Does the metadata for this word cover a titlecase variant? (e.g., "Hello")
667    ///
668    /// This returns true if the word is in titlecase form, which means:
669    /// - The first letter is uppercase
670    /// - All other letters are lowercase
671    /// - The word is at least 2 characters long
672    ///
673    /// Examples: "Hello", "World"
674    ///
675    /// Note: Words with internal capital letters (like "McDonald") or apostrophes (like "O'Reilly")
676    /// are not considered titlecase - they are classified as UPPER_CAMEL instead.
677    pub fn is_titlecase(&self) -> bool {
678        self.orth_info.contains(OrthFlags::TITLECASE)
679    }
680    /// Does the metadata for this word cover an all-uppercase variant? (e.g., "HELLO")
681    ///
682    /// This returns true if all letters in the word are uppercase. Words containing
683    /// non-letter characters (like numbers or symbols) are only considered if all
684    /// letter characters are uppercase.
685    ///
686    /// Examples: "HELLO", "NASA", "I"
687    pub fn is_allcaps(&self) -> bool {
688        self.orth_info.contains(OrthFlags::ALLCAPS)
689    }
690    /// Does the metadata for this word cover a lower camel case variant? (e.g., "helloWorld")
691    ///
692    /// This returns true if the word is in lower camel case, which means:
693    /// - The first letter is lowercase
694    /// - There is at least one uppercase letter after the first character
695    /// - The word must be at least 2 characters long
696    ///
697    /// Examples: "helloWorld", "getHTTPResponse", "eBay"
698    ///
699    /// Note: Single words that are all lowercase will return false.
700    /// Words starting with an uppercase letter will return false (those would be UpperCamel).
701    pub fn is_lower_camel(&self) -> bool {
702        self.orth_info.contains(OrthFlags::LOWER_CAMEL)
703    }
704    /// Does the metadata for this word cover an upper camel case / pascal case variant? (e.g., "HelloWorld")
705    ///
706    /// This returns true if the word is in upper camel case (also known as Pascal case), which means:
707    /// - The first letter is uppercase
708    /// - There is at least one other uppercase letter after the first character
709    /// - There is at least one lowercase letter after the first uppercase letter
710    /// - The word must be at least 3 characters long
711    ///
712    /// Examples:
713    /// - "HelloWorld" (standard Pascal case)
714    /// - "McDonald" (name with internal caps)
715    /// - "O'Reilly" (name with apostrophe and internal caps)
716    /// - "HttpRequest" (initialism followed by word)
717    ///
718    /// Note: Single words that are titlecase (like "Hello") will return false.
719    /// Words that are all uppercase (like "NASA") will also return false.
720    pub fn is_upper_camel(&self) -> bool {
721        self.orth_info.contains(OrthFlags::UPPER_CAMEL)
722    }
723
724    /// Does the metadata for this word cover an apostrophized variant? (e.g., "doesn't")
725    pub fn is_apostrophized(&self) -> bool {
726        self.orth_info.contains(OrthFlags::APOSTROPHE)
727    }
728
729    pub fn is_roman_numerals(&self) -> bool {
730        self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
731    }
732
733    /// Same thing as [`Self::or`], except in-place rather than a clone.
734    pub fn merge(&mut self, other: &Self) -> &mut Self {
735        macro_rules! merge {
736            ($a:expr, $b:expr) => {
737                match ($a, $b) {
738                    (Some(a), Some(b)) => Some(a.or(&b)),
739                    (Some(a), None) => Some(a),
740                    (None, Some(b)) => Some(b),
741                    (None, None) => None,
742                }
743            };
744        }
745
746        self.noun = merge!(self.noun, other.noun);
747        self.pronoun = merge!(self.pronoun, other.pronoun);
748        self.verb = merge!(self.verb, other.verb);
749        self.adjective = merge!(self.adjective, other.adjective);
750        self.adverb = merge!(self.adverb, other.adverb);
751        self.conjunction = merge!(self.conjunction, other.conjunction);
752        self.determiner = merge!(self.determiner, other.determiner);
753        self.affix = merge!(self.affix, other.affix);
754        self.preposition |= other.preposition;
755        self.dialects |= other.dialects;
756        self.orth_info |= other.orth_info;
757        self.swear = self.swear.or(other.swear);
758        self.common |= other.common;
759        self.derived_from = self.derived_from.or(other.derived_from);
760        self.pos_tag = self.pos_tag.or(other.pos_tag);
761        self.np_member = self.np_member.or(other.np_member);
762
763        self
764    }
765}
766
767// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
768// Each form can be used in various TAM combinations:
769// - Lemma form (infinitive, citation form, dictionary form)
770//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
771// - Past form (past participle and simple past)
772//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
773// - Progressive form (present participle and gerund)
774//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
775// - Third person singular present (-s/-es)
776//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
777//
778// Important notes:
779// 1. English expresses time through auxiliary verbs, not verb form alone
780// 2. Irregular verbs can have different forms for past participle and simple past
781// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
782#[repr(u32)]
783pub enum VerbForm {
784    /// The uninflected verb form: "walk", "eat"
785    LemmaForm = 1 << 0,
786    /// The past form for regular verbs: "walked"
787    PastForm = 1 << 1,
788    /// The simple past/preterite form for irregular verbs: "ate"
789    SimplePastForm = 1 << 2,
790    /// The past participle form for irregular verbs: "eaten"
791    PastParticipleForm = 1 << 3,
792    /// The progressive/continuous/gerund/present participle form: "walking", "eating"
793    ProgressiveForm = 1 << 4,
794    /// The third person singular present form: "walks", "eats"
795    ThirdPersonSingularPresentForm = 1 << 5,
796}
797
798/// The underlying type used for verb form flags.
799pub type VerbFormFlagsUnderlyingType = u32;
800
801bitflags::bitflags! {
802    /// A collection of bit flags used to represent verb forms.
803    ///
804    /// This allows a word to be tagged with multiple verb forms when applicable.
805    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
806    #[serde(transparent)]
807    pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
808        const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
809        const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
810        const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
811        const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
812        const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
813        const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
814    }
815}
816
817#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
818pub struct VerbData {
819    pub is_linking: Option<bool>,
820    pub is_auxiliary: Option<bool>,
821    #[serde(rename = "verb_form", default)]
822    pub verb_forms: Option<VerbFormFlags>,
823}
824
825impl VerbData {
826    /// Produce a copy of `self` with the known properties of `other` set.
827    pub fn or(&self, other: &Self) -> Self {
828        let verb_forms = match (self.verb_forms, other.verb_forms) {
829            (Some(self_verb_forms), Some(other_verb_forms)) => {
830                Some(self_verb_forms | other_verb_forms)
831            }
832            (Some(self_verb_forms), None) => Some(self_verb_forms),
833            (None, Some(other_verb_forms)) => Some(other_verb_forms),
834            (None, None) => None,
835        };
836
837        Self {
838            is_linking: self.is_linking.or(other.is_linking),
839            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
840            verb_forms,
841        }
842    }
843}
844
845// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
846// TODO other noun properties may be worth adding: abstract
847#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
848pub struct NounData {
849    pub is_proper: Option<bool>,
850    pub is_singular: Option<bool>,
851    pub is_plural: Option<bool>,
852    pub is_countable: Option<bool>,
853    pub is_mass: Option<bool>,
854    pub is_possessive: Option<bool>,
855}
856
857impl NounData {
858    /// Produce a copy of `self` with the known properties of `other` set.
859    pub fn or(&self, other: &Self) -> Self {
860        Self {
861            is_proper: self.is_proper.or(other.is_proper),
862            is_singular: self.is_singular.or(other.is_singular),
863            is_plural: self.is_plural.or(other.is_plural),
864            is_countable: self.is_countable.or(other.is_countable),
865            is_mass: self.is_mass.or(other.is_mass),
866            is_possessive: self.is_possessive.or(other.is_possessive),
867        }
868    }
869}
870
871// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
872#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
873pub enum Person {
874    First,
875    Second,
876    Third,
877}
878
879// TODO for now focused on personal pronouns?
880#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
881pub struct PronounData {
882    pub is_personal: Option<bool>,
883    pub is_singular: Option<bool>,
884    pub is_plural: Option<bool>,
885    pub is_possessive: Option<bool>,
886    pub is_reflexive: Option<bool>,
887    pub person: Option<Person>,
888    pub is_subject: Option<bool>,
889    pub is_object: Option<bool>,
890}
891
892impl PronounData {
893    /// Produce a copy of `self` with the known properties of `other` set.
894    pub fn or(&self, other: &Self) -> Self {
895        Self {
896            is_personal: self.is_personal.or(other.is_personal),
897            is_singular: self.is_singular.or(other.is_singular),
898            is_plural: self.is_plural.or(other.is_plural),
899            is_possessive: self.is_possessive.or(other.is_possessive),
900            is_reflexive: self.is_reflexive.or(other.is_reflexive),
901            person: self.person.or(other.person),
902            is_subject: self.is_subject.or(other.is_subject),
903            is_object: self.is_object.or(other.is_object),
904        }
905    }
906}
907
908/// Additional metadata for determiners
909#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
910pub struct DeterminerData {
911    pub is_demonstrative: Option<bool>,
912    pub is_possessive: Option<bool>,
913    pub is_quantifier: Option<bool>,
914}
915
916impl DeterminerData {
917    /// Produce a copy of `self` with the known properties of `other` set.
918    pub fn or(&self, other: &Self) -> Self {
919        Self {
920            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
921            is_possessive: self.is_possessive.or(other.is_possessive),
922            is_quantifier: self.is_quantifier.or(other.is_quantifier),
923        }
924    }
925}
926
927/// Degree is a property of adjectives: positive is not inflected
928/// Comparative is inflected with -er or comes after the word "more"
929/// Superlative is inflected with -est or comes after the word "most"
930#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
931pub enum Degree {
932    Positive,
933    Comparative,
934    Superlative,
935}
936
937/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
938/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
939/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
940#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
941pub struct AdjectiveData {
942    pub degree: Option<Degree>,
943}
944
945impl AdjectiveData {
946    /// Produce a copy of `self` with the known properties of `other` set.
947    pub fn or(&self, other: &Self) -> Self {
948        Self {
949            degree: self.degree.or(other.degree),
950        }
951    }
952}
953
954/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
955/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
956/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
957#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
958pub struct AdverbData {
959    pub is_manner: Option<bool>,
960    pub is_frequency: Option<bool>,
961    pub is_degree: Option<bool>,
962}
963
964impl AdverbData {
965    /// Produce a copy of `self` with the known properties of `other` set.
966    pub fn or(&self, _other: &Self) -> Self {
967        Self {
968            is_manner: self.is_manner.or(_other.is_manner),
969            is_frequency: self.is_frequency.or(_other.is_frequency),
970            is_degree: self.is_degree.or(_other.is_degree),
971        }
972    }
973}
974
975#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
976pub struct ConjunctionData {}
977
978impl ConjunctionData {
979    /// Produce a copy of `self` with the known properties of `other` set.
980    pub fn or(&self, _other: &Self) -> Self {
981        Self {}
982    }
983}
984
985#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
986pub struct AffixData {
987    pub is_prefix: Option<bool>,
988    pub is_suffix: Option<bool>,
989}
990
991impl AffixData {
992    /// Produce a copy of `self` with the known properties of `other` set.
993    pub fn or(&self, _other: &Self) -> Self {
994        Self {
995            is_prefix: self.is_prefix.or(_other.is_prefix),
996            is_suffix: self.is_suffix.or(_other.is_suffix),
997        }
998    }
999}
1000
1001/// A regional dialect.
1002///
1003/// Note: these have bit-shifted values so that they can ergonomically integrate with
1004/// `DialectFlags`. Each value here must have a unique bit index inside
1005/// `DialectsUnderlyingType`.
1006#[derive(
1007    Debug,
1008    Clone,
1009    Copy,
1010    Serialize,
1011    Deserialize,
1012    PartialEq,
1013    PartialOrd,
1014    Eq,
1015    Hash,
1016    EnumCount,
1017    EnumString,
1018    EnumIter,
1019    Display,
1020    VariantArray,
1021)]
1022pub enum Dialect {
1023    American = 1 << 0,
1024    Canadian = 1 << 1,
1025    Australian = 1 << 2,
1026    British = 1 << 3,
1027    Indian = 1 << 4,
1028}
1029impl Dialect {
1030    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
1031    /// Returns `None` if it fails to find a single dialect that is used the most.
1032    #[must_use]
1033    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
1034        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
1035    }
1036
1037    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
1038    /// recognized.
1039    ///
1040    /// # Examples
1041    ///
1042    /// ```
1043    /// use harper_core::Dialect;
1044    ///
1045    /// let abbrs = ["US", "CA", "AU", "GB", "IN"];
1046    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
1047    ///
1048    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
1049    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
1050    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
1051    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
1052    /// assert_eq!(Some(Dialect::Indian), dialects.next().unwrap()); // IN
1053    /// ```
1054    #[must_use]
1055    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
1056        match abbr {
1057            "US" => Some(Self::American),
1058            "CA" => Some(Self::Canadian),
1059            "AU" => Some(Self::Australian),
1060            "GB" => Some(Self::British),
1061            "IN" => Some(Self::Indian),
1062            _ => None,
1063        }
1064    }
1065}
1066impl TryFrom<DialectFlags> for Dialect {
1067    type Error = ();
1068
1069    /// Attempts to convert `DialectFlags` to a single `Dialect`.
1070    ///
1071    /// # Errors
1072    ///
1073    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
1074    /// enabled.
1075    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
1076        // Ensure only one dialect is enabled before converting.
1077        if dialect_flags.bits().count_ones() == 1 {
1078            match dialect_flags {
1079                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
1080                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
1081                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
1082                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
1083                df if df.is_dialect_enabled_strict(Dialect::Indian) => Ok(Dialect::Indian),
1084                _ => Err(()),
1085            }
1086        } else {
1087            // More than one dialect enabled; can't soundly convert.
1088            Err(())
1089        }
1090    }
1091}
1092
1093// The underlying type used for DialectFlags.
1094// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
1095// the future, we will need to switch this to a larger type.
1096type DialectFlagsUnderlyingType = u8;
1097
1098bitflags::bitflags! {
1099    /// A collection of bit flags used to represent enabled dialects.
1100    ///
1101    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
1102    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
1103    #[serde(transparent)]
1104    pub struct DialectFlags: DialectFlagsUnderlyingType {
1105        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
1106        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
1107        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
1108        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
1109        const INDIAN = Dialect::Indian as DialectFlagsUnderlyingType;
1110    }
1111}
1112impl DialectFlags {
1113    /// Checks if the provided dialect is enabled.
1114    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
1115    #[must_use]
1116    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
1117        self.is_empty() || self.intersects(Self::from_dialect(dialect))
1118    }
1119
1120    /// Checks if the provided dialect is ***explicitly*** enabled.
1121    ///
1122    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
1123    /// enabled.
1124    #[must_use]
1125    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
1126        self.intersects(Self::from_dialect(dialect))
1127    }
1128
1129    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
1130    /// enabled.
1131    ///
1132    /// # Panics
1133    ///
1134    /// This will panic if `dialect` represents a dialect that is not defined in
1135    /// `DialectFlags`.
1136    #[must_use]
1137    pub fn from_dialect(dialect: Dialect) -> Self {
1138        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
1139            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
1140        };
1141        out
1142    }
1143
1144    /// Gets the most commonly used dialect(s) in the document.
1145    ///
1146    /// If multiple dialects are used equally often, they will all be enabled in the returned
1147    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
1148    /// will be the only one enabled.
1149    #[must_use]
1150    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
1151        // Initialize counters.
1152        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
1153            .iter()
1154            .map(|d| (*d, 0))
1155            .collect_array()
1156            .unwrap();
1157
1158        // Count word dialects.
1159        document.iter_words().for_each(|w| {
1160            if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
1161                // If the token is a word, iterate though the dialects in `dialect_counters` and
1162                // increment those counters where the word has the respective dialect enabled.
1163                dialect_counters.iter_mut().for_each(|(dialect, count)| {
1164                    if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
1165                        *count += 1;
1166                    }
1167                });
1168            }
1169        });
1170
1171        // Find max counter.
1172        let max_counter = dialect_counters
1173            .iter()
1174            .map(|(_, count)| count)
1175            .max()
1176            .unwrap();
1177        // Get and convert the collection of most used dialects into a `DialectFlags`.
1178        dialect_counters
1179            .into_iter()
1180            .filter(|(_, count)| count == max_counter)
1181            .fold(DialectFlags::empty(), |acc, dialect| {
1182                // Fold most used dialects into `DialectFlags` via bitwise or.
1183                acc | Self::from_dialect(dialect.0)
1184            })
1185    }
1186}
1187impl Default for DialectFlags {
1188    /// A default value with no dialects explicitly enabled.
1189    /// Implicitly, this state corresponds to all dialects being enabled.
1190    fn default() -> Self {
1191        Self::empty()
1192    }
1193}
1194
1195#[cfg(test)]
1196pub mod tests {
1197    use crate::DictWordMetadata;
1198    use crate::spell::{Dictionary, FstDictionary};
1199
1200    // Helper function to get metadata from the curated dictionary
1201    pub fn md(word: &str) -> DictWordMetadata {
1202        FstDictionary::curated()
1203            .get_word_metadata_str(word)
1204            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
1205            .into_owned()
1206    }
1207
1208    mod dialect {
1209        use super::super::{Dialect, DialectFlags};
1210        use crate::Document;
1211
1212        #[test]
1213        fn guess_british_dialect() {
1214            let document = Document::new_plain_english_curated("Aluminium was used.");
1215            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1216            assert!(
1217                df.is_dialect_enabled_strict(Dialect::British)
1218                    && !df.is_dialect_enabled_strict(Dialect::American)
1219            );
1220        }
1221
1222        #[test]
1223        fn guess_american_dialect() {
1224            let document = Document::new_plain_english_curated("Aluminum was used.");
1225            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1226            assert!(
1227                df.is_dialect_enabled_strict(Dialect::American)
1228                    && !df.is_dialect_enabled_strict(Dialect::British)
1229            );
1230        }
1231    }
1232
1233    mod noun {
1234        use crate::dict_word_metadata::tests::md;
1235
1236        #[test]
1237        fn puppy_is_noun() {
1238            assert!(md("puppy").is_noun());
1239        }
1240
1241        #[test]
1242        fn prepare_is_not_noun() {
1243            assert!(!md("prepare").is_noun());
1244        }
1245
1246        #[test]
1247        fn paris_is_proper_noun() {
1248            assert!(md("Paris").is_proper_noun());
1249        }
1250
1251        #[test]
1252        fn permit_is_non_proper_noun() {
1253            assert!(md("lapdog").is_non_proper_noun());
1254        }
1255
1256        #[test]
1257        fn hound_is_singular_noun() {
1258            assert!(md("hound").is_singular_noun());
1259        }
1260
1261        #[test]
1262        fn pooches_is_non_singular_noun() {
1263            assert!(md("pooches").is_non_singular_noun());
1264        }
1265
1266        // Make sure is_non_xxx_noun methods don't behave like is_not_xxx_noun.
1267        // In other words, make sure they don't return true for words that are not nouns.
1268        // They must only pass for words that are nouns but not singular etc.
1269        #[test]
1270        fn loyal_doesnt_pass_is_non_singular_noun() {
1271            assert!(!md("loyal").is_non_singular_noun());
1272        }
1273
1274        #[test]
1275        fn hounds_is_plural_noun() {
1276            assert!(md("hounds").is_plural_noun());
1277        }
1278
1279        #[test]
1280        fn pooch_is_non_plural_noun() {
1281            assert!(md("pooch").is_non_plural_noun());
1282        }
1283
1284        #[test]
1285        fn fish_is_singular_noun() {
1286            assert!(md("fish").is_singular_noun());
1287        }
1288
1289        #[test]
1290        fn fish_is_plural_noun() {
1291            assert!(md("fish").is_plural_noun());
1292        }
1293
1294        #[test]
1295        fn fishes_is_plural_noun() {
1296            assert!(md("fishes").is_plural_noun());
1297        }
1298
1299        #[test]
1300        fn sheep_is_singular_noun() {
1301            assert!(md("sheep").is_singular_noun());
1302        }
1303
1304        #[test]
1305        fn sheep_is_plural_noun() {
1306            assert!(md("sheep").is_plural_noun());
1307        }
1308
1309        #[test]
1310        #[should_panic]
1311        fn sheeps_is_not_word() {
1312            md("sheeps");
1313        }
1314
1315        #[test]
1316        fn bicep_is_singular_noun() {
1317            assert!(md("bicep").is_singular_noun());
1318        }
1319
1320        #[test]
1321        fn biceps_is_singular_noun() {
1322            assert!(md("biceps").is_singular_noun());
1323        }
1324
1325        #[test]
1326        fn biceps_is_plural_noun() {
1327            assert!(md("biceps").is_plural_noun());
1328        }
1329
1330        #[test]
1331        fn aircraft_is_singular_noun() {
1332            assert!(md("aircraft").is_singular_noun());
1333        }
1334
1335        #[test]
1336        fn aircraft_is_plural_noun() {
1337            assert!(md("aircraft").is_plural_noun());
1338        }
1339
1340        #[test]
1341        #[should_panic]
1342        fn aircrafts_is_not_word() {
1343            md("aircrafts");
1344        }
1345
1346        #[test]
1347        fn dog_apostrophe_s_is_possessive_noun() {
1348            assert!(md("dog's").is_possessive_noun());
1349        }
1350
1351        #[test]
1352        fn dogs_is_non_possessive_noun() {
1353            assert!(md("dogs").is_non_possessive_noun());
1354        }
1355
1356        // noun countability
1357
1358        #[test]
1359        fn dog_is_countable() {
1360            assert!(md("dog").is_countable_noun());
1361        }
1362        #[test]
1363        fn dog_is_non_mass_noun() {
1364            assert!(md("dog").is_non_mass_noun());
1365        }
1366
1367        #[test]
1368        fn furniture_is_mass_noun() {
1369            assert!(md("furniture").is_mass_noun());
1370        }
1371        #[test]
1372        fn furniture_is_non_countable_noun() {
1373            assert!(md("furniture").is_non_countable_noun());
1374        }
1375
1376        #[test]
1377        fn equipment_is_mass_noun() {
1378            assert!(md("equipment").is_mass_noun());
1379        }
1380        #[test]
1381        fn equipment_is_non_countable_noun() {
1382            assert!(md("equipment").is_non_countable_noun());
1383        }
1384
1385        #[test]
1386        fn beer_is_countable_noun() {
1387            assert!(md("beer").is_countable_noun());
1388        }
1389        #[test]
1390        fn beer_is_mass_noun() {
1391            assert!(md("beer").is_mass_noun());
1392        }
1393    }
1394
1395    mod pronoun {
1396        use crate::dict_word_metadata::tests::md;
1397
1398        mod i_me_myself {
1399            use crate::dict_word_metadata::tests::md;
1400
1401            #[test]
1402            fn i_is_pronoun() {
1403                assert!(md("I").is_pronoun());
1404            }
1405            #[test]
1406            fn i_is_personal_pronoun() {
1407                assert!(md("I").is_personal_pronoun());
1408            }
1409            #[test]
1410            fn i_is_singular_pronoun() {
1411                assert!(md("I").is_singular_pronoun());
1412            }
1413            #[test]
1414            fn i_is_subject_pronoun() {
1415                assert!(md("I").is_subject_pronoun());
1416            }
1417
1418            #[test]
1419            fn me_is_pronoun() {
1420                assert!(md("me").is_pronoun());
1421            }
1422            #[test]
1423            fn me_is_personal_pronoun() {
1424                assert!(md("me").is_personal_pronoun());
1425            }
1426            #[test]
1427            fn me_is_singular_pronoun() {
1428                assert!(md("me").is_singular_pronoun());
1429            }
1430            #[test]
1431            fn me_is_object_pronoun() {
1432                assert!(md("me").is_object_pronoun());
1433            }
1434
1435            #[test]
1436            fn myself_is_pronoun() {
1437                assert!(md("myself").is_pronoun());
1438            }
1439            #[test]
1440            fn myself_is_personal_pronoun() {
1441                assert!(md("myself").is_personal_pronoun());
1442            }
1443            #[test]
1444            fn myself_is_singular_pronoun() {
1445                assert!(md("myself").is_singular_pronoun());
1446            }
1447            #[test]
1448            fn myself_is_reflexive_pronoun() {
1449                assert!(md("myself").is_reflexive_pronoun());
1450            }
1451        }
1452
1453        mod we_us_ourselves {
1454            use crate::dict_word_metadata::tests::md;
1455
1456            #[test]
1457            fn we_is_pronoun() {
1458                assert!(md("we").is_pronoun());
1459            }
1460            #[test]
1461            fn we_is_personal_pronoun() {
1462                assert!(md("we").is_personal_pronoun());
1463            }
1464            #[test]
1465            fn we_is_plural_pronoun() {
1466                assert!(md("we").is_plural_pronoun());
1467            }
1468            #[test]
1469            fn we_is_subject_pronoun() {
1470                assert!(md("we").is_subject_pronoun());
1471            }
1472
1473            #[test]
1474            fn us_is_pronoun() {
1475                assert!(md("us").is_pronoun());
1476            }
1477            #[test]
1478            fn us_is_personal_pronoun() {
1479                assert!(md("us").is_personal_pronoun());
1480            }
1481            #[test]
1482            fn us_is_plural_pronoun() {
1483                assert!(md("us").is_plural_pronoun());
1484            }
1485            #[test]
1486            fn us_is_object_pronoun() {
1487                assert!(md("us").is_object_pronoun());
1488            }
1489
1490            #[test]
1491            fn ourselves_is_pronoun() {
1492                assert!(md("ourselves").is_pronoun());
1493            }
1494            #[test]
1495            fn ourselves_is_personal_pronoun() {
1496                assert!(md("ourselves").is_personal_pronoun());
1497            }
1498            #[test]
1499            fn ourselves_is_plural_pronoun() {
1500                assert!(md("ourselves").is_plural_pronoun());
1501            }
1502            #[test]
1503            fn ourselves_is_reflexive_pronoun() {
1504                assert!(md("ourselves").is_reflexive_pronoun());
1505            }
1506        }
1507
1508        mod you_yourself {
1509            use crate::dict_word_metadata::tests::md;
1510
1511            #[test]
1512            fn you_is_pronoun() {
1513                assert!(md("you").is_pronoun());
1514            }
1515            #[test]
1516            fn you_is_personal_pronoun() {
1517                assert!(md("you").is_personal_pronoun());
1518            }
1519            #[test]
1520            fn you_is_singular_pronoun() {
1521                assert!(md("you").is_singular_pronoun());
1522            }
1523            #[test]
1524            fn you_is_plural_pronoun() {
1525                assert!(md("you").is_plural_pronoun());
1526            }
1527            #[test]
1528            fn you_is_subject_pronoun() {
1529                assert!(md("you").is_subject_pronoun());
1530            }
1531            #[test]
1532            fn you_is_object_pronoun() {
1533                assert!(md("you").is_object_pronoun());
1534            }
1535            #[test]
1536            fn yourself_is_pronoun() {
1537                assert!(md("yourself").is_pronoun());
1538            }
1539            #[test]
1540            fn yourself_is_personal_pronoun() {
1541                assert!(md("yourself").is_personal_pronoun());
1542            }
1543            #[test]
1544            fn yourself_is_singular_pronoun() {
1545                assert!(md("yourself").is_singular_pronoun());
1546            }
1547            #[test]
1548            fn yourself_is_reflexive_pronoun() {
1549                assert!(md("yourself").is_reflexive_pronoun());
1550            }
1551        }
1552
1553        mod he_him_himself {
1554            use crate::dict_word_metadata::tests::md;
1555
1556            #[test]
1557            fn he_is_pronoun() {
1558                assert!(md("he").is_pronoun());
1559            }
1560            #[test]
1561            fn he_is_personal_pronoun() {
1562                assert!(md("he").is_personal_pronoun());
1563            }
1564            #[test]
1565            fn he_is_singular_pronoun() {
1566                assert!(md("he").is_singular_pronoun());
1567            }
1568            #[test]
1569            fn he_is_subject_pronoun() {
1570                assert!(md("he").is_subject_pronoun());
1571            }
1572
1573            #[test]
1574            fn him_is_pronoun() {
1575                assert!(md("him").is_pronoun());
1576            }
1577            #[test]
1578            fn him_is_personal_pronoun() {
1579                assert!(md("him").is_personal_pronoun());
1580            }
1581            #[test]
1582            fn him_is_singular_pronoun() {
1583                assert!(md("him").is_singular_pronoun());
1584            }
1585            #[test]
1586            fn him_is_object_pronoun() {
1587                assert!(md("him").is_object_pronoun());
1588            }
1589
1590            #[test]
1591            fn himself_is_pronoun() {
1592                assert!(md("himself").is_pronoun());
1593            }
1594            #[test]
1595            fn himself_is_personal_pronoun() {
1596                assert!(md("himself").is_personal_pronoun());
1597            }
1598            #[test]
1599            fn himself_is_singular_pronoun() {
1600                assert!(md("himself").is_singular_pronoun());
1601            }
1602            #[test]
1603            fn himself_is_reflexive_pronoun() {
1604                assert!(md("himself").is_reflexive_pronoun());
1605            }
1606        }
1607
1608        mod she_her_herself {
1609            use crate::dict_word_metadata::tests::md;
1610
1611            #[test]
1612            fn she_is_pronoun() {
1613                assert!(md("she").is_pronoun());
1614            }
1615            #[test]
1616            fn she_is_personal_pronoun() {
1617                assert!(md("she").is_personal_pronoun());
1618            }
1619            #[test]
1620            fn she_is_singular_pronoun() {
1621                assert!(md("she").is_singular_pronoun());
1622            }
1623            #[test]
1624            fn she_is_subject_pronoun() {
1625                assert!(md("she").is_subject_pronoun());
1626            }
1627
1628            #[test]
1629            fn her_is_pronoun() {
1630                assert!(md("her").is_pronoun());
1631            }
1632            #[test]
1633            fn her_is_personal_pronoun() {
1634                assert!(md("her").is_personal_pronoun());
1635            }
1636            #[test]
1637            fn her_is_singular_pronoun() {
1638                assert!(md("her").is_singular_pronoun());
1639            }
1640            #[test]
1641            fn her_is_object_pronoun() {
1642                assert!(md("her").is_object_pronoun());
1643            }
1644
1645            #[test]
1646            fn herself_is_pronoun() {
1647                assert!(md("herself").is_pronoun());
1648            }
1649            #[test]
1650            fn herself_is_personal_pronoun() {
1651                assert!(md("herself").is_personal_pronoun());
1652            }
1653            #[test]
1654            fn herself_is_singular_pronoun() {
1655                assert!(md("herself").is_singular_pronoun());
1656            }
1657            #[test]
1658            fn herself_is_reflexive_pronoun() {
1659                assert!(md("herself").is_reflexive_pronoun());
1660            }
1661        }
1662
1663        mod it_itself {
1664            use crate::dict_word_metadata::tests::md;
1665
1666            #[test]
1667            fn it_is_pronoun() {
1668                assert!(md("it").is_pronoun());
1669            }
1670            #[test]
1671            fn it_is_personal_pronoun() {
1672                assert!(md("it").is_personal_pronoun());
1673            }
1674            #[test]
1675            fn it_is_singular_pronoun() {
1676                assert!(md("it").is_singular_pronoun());
1677            }
1678            #[test]
1679            fn it_is_subject_pronoun() {
1680                assert!(md("it").is_subject_pronoun());
1681            }
1682            #[test]
1683            fn it_is_object_pronoun() {
1684                assert!(md("it").is_object_pronoun());
1685            }
1686
1687            #[test]
1688            fn itself_is_pronoun() {
1689                assert!(md("itself").is_pronoun());
1690            }
1691            #[test]
1692            fn itself_is_personal_pronoun() {
1693                assert!(md("itself").is_personal_pronoun());
1694            }
1695            #[test]
1696            fn itself_is_singular_pronoun() {
1697                assert!(md("itself").is_singular_pronoun());
1698            }
1699            #[test]
1700            fn itself_is_reflexive_pronoun() {
1701                assert!(md("itself").is_reflexive_pronoun());
1702            }
1703        }
1704
1705        mod they_them_themselves {
1706            use crate::dict_word_metadata::tests::md;
1707
1708            #[test]
1709            fn they_is_pronoun() {
1710                assert!(md("they").is_pronoun());
1711            }
1712            #[test]
1713            fn they_is_personal_pronoun() {
1714                assert!(md("they").is_personal_pronoun());
1715            }
1716            #[test]
1717            fn they_is_plural_pronoun() {
1718                assert!(md("they").is_plural_pronoun());
1719            }
1720            #[test]
1721            fn they_is_subject_pronoun() {
1722                assert!(md("they").is_subject_pronoun());
1723            }
1724
1725            #[test]
1726            fn them_is_pronoun() {
1727                assert!(md("them").is_pronoun());
1728            }
1729            #[test]
1730            fn them_is_personal_pronoun() {
1731                assert!(md("them").is_personal_pronoun());
1732            }
1733            #[test]
1734            fn them_is_plural_pronoun() {
1735                assert!(md("them").is_plural_pronoun());
1736            }
1737            #[test]
1738            fn them_is_object_pronoun() {
1739                assert!(md("them").is_object_pronoun());
1740            }
1741
1742            #[test]
1743            fn themselves_is_pronoun() {
1744                assert!(md("themselves").is_pronoun());
1745            }
1746            #[test]
1747            fn themselves_is_personal_pronoun() {
1748                assert!(md("themselves").is_personal_pronoun());
1749            }
1750            #[test]
1751            fn themselves_is_plural_pronoun() {
1752                assert!(md("themselves").is_plural_pronoun());
1753            }
1754            #[test]
1755            fn themselves_is_reflexive_pronoun() {
1756                assert!(md("themselves").is_reflexive_pronoun());
1757            }
1758        }
1759
1760        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1761        #[test]
1762        fn mine_is_pronoun() {
1763            assert!(md("mine").is_pronoun());
1764        }
1765        #[test]
1766        fn ours_is_pronoun() {
1767            assert!(md("ours").is_pronoun());
1768        }
1769        #[test]
1770        fn yours_is_pronoun() {
1771            assert!(md("yours").is_pronoun());
1772        }
1773        #[test]
1774        fn his_is_pronoun() {
1775            assert!(md("his").is_pronoun());
1776        }
1777        #[test]
1778        fn hers_is_pronoun() {
1779            assert!(md("hers").is_pronoun());
1780        }
1781        #[test]
1782        fn its_is_pronoun() {
1783            assert!(md("its").is_pronoun());
1784        }
1785        #[test]
1786        fn theirs_is_pronoun() {
1787            assert!(md("theirs").is_pronoun());
1788        }
1789
1790        // archaic pronouns
1791        #[test]
1792        fn archaic_pronouns() {
1793            assert!(md("thou").is_pronoun());
1794            assert!(md("thee").is_pronoun());
1795            assert!(md("thyself").is_pronoun());
1796            assert!(md("thine").is_pronoun());
1797        }
1798
1799        // generic pronouns
1800        #[test]
1801        fn generic_pronouns() {
1802            assert!(md("one").is_pronoun());
1803            assert!(md("oneself").is_pronoun());
1804        }
1805
1806        // relative and interrogative pronouns
1807        #[test]
1808        fn relative_and_interrogative_pronouns() {
1809            assert!(md("who").is_pronoun());
1810            assert!(md("whom").is_pronoun());
1811            assert!(md("whose").is_pronoun());
1812            assert!(md("which").is_pronoun());
1813            assert!(md("what").is_pronoun());
1814        }
1815
1816        // nonstandard pronouns
1817        #[test]
1818        #[ignore = "not in dictionary"]
1819        fn nonstandard_pronouns() {
1820            assert!(md("themself").pronoun.is_some());
1821            assert!(md("y'all'").pronoun.is_some());
1822        }
1823    }
1824
1825    mod nominal {
1826        use crate::dict_word_metadata::tests::md;
1827
1828        #[test]
1829        fn my_is_possessive_nominal() {
1830            assert!(md("my").is_possessive_nominal());
1831        }
1832
1833        #[test]
1834        fn mine_is_not_possessive_nominal() {
1835            assert!(!md("mine").is_possessive_nominal());
1836        }
1837
1838        #[test]
1839        fn freds_is_possessive_nominal() {
1840            assert!(md("Fred's").is_possessive_nominal());
1841        }
1842
1843        #[test]
1844        fn fred_is_not_possessive_nominal() {
1845            assert!(!md("Fred").is_possessive_nominal());
1846        }
1847
1848        #[test]
1849        fn dogs_is_possessive_nominal() {
1850            assert!(md("dog's").is_possessive_nominal());
1851        }
1852
1853        #[test]
1854        fn microsofts_is_possessive_nominal() {
1855            assert!(md("Microsoft's").is_possessive_nominal());
1856        }
1857    }
1858
1859    mod adjective {
1860        use crate::{Degree, dict_word_metadata::tests::md};
1861
1862        // Getting degrees
1863
1864        #[test]
1865        #[ignore = "not marked yet because it might not be reliable"]
1866        fn big_is_positive() {
1867            assert_eq!(md("big").get_degree(), Some(Degree::Positive));
1868        }
1869
1870        #[test]
1871        fn bigger_is_comparative() {
1872            assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
1873        }
1874
1875        #[test]
1876        fn biggest_is_superlative() {
1877            assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
1878        }
1879
1880        #[test]
1881        #[should_panic(expected = "Word 'bigly' not found in dictionary")]
1882        fn bigly_is_not_an_adjective_form_we_track() {
1883            assert_eq!(md("bigly").get_degree(), None);
1884        }
1885
1886        // Calling is_ methods
1887
1888        // TODO: positive degree not implemented
1889
1890        #[test]
1891        fn bigger_is_comparative_adjective() {
1892            assert!(md("bigger").is_comparative_adjective());
1893        }
1894
1895        #[test]
1896        fn biggest_is_superlative_adjective() {
1897            assert!(md("biggest").is_superlative_adjective());
1898        }
1899    }
1900
1901    #[test]
1902    fn the_is_determiner() {
1903        assert!(md("the").is_determiner());
1904    }
1905    #[test]
1906    fn this_is_demonstrative_determiner() {
1907        assert!(md("this").is_demonstrative_determiner());
1908    }
1909    #[test]
1910    fn your_is_possessive_determiner() {
1911        assert!(md("your").is_possessive_determiner());
1912    }
1913
1914    #[test]
1915    fn every_is_quantifier() {
1916        assert!(md("every").is_quantifier());
1917    }
1918
1919    #[test]
1920    fn the_isnt_quantifier() {
1921        assert!(!md("the").is_quantifier());
1922    }
1923
1924    #[test]
1925    fn equipment_is_mass_noun() {
1926        assert!(md("equipment").is_mass_noun());
1927    }
1928
1929    #[test]
1930    fn equipment_is_non_countable_noun() {
1931        assert!(md("equipment").is_non_countable_noun());
1932    }
1933
1934    #[test]
1935    fn equipment_isnt_countable_noun() {
1936        assert!(!md("equipment").is_countable_noun());
1937    }
1938
1939    mod verb {
1940        use crate::dict_word_metadata::tests::md;
1941
1942        #[test]
1943        fn lemma_walk() {
1944            let md = md("walk");
1945            assert!(md.is_verb_lemma())
1946        }
1947
1948        #[test]
1949        fn lemma_fix() {
1950            let md = md("fix");
1951            assert!(md.is_verb_lemma())
1952        }
1953
1954        #[test]
1955        fn progressive_walking() {
1956            let md = md("walking");
1957            assert!(md.is_verb_progressive_form())
1958        }
1959
1960        #[test]
1961        fn past_walked() {
1962            let md = md("walked");
1963            assert!(md.is_verb_past_form())
1964        }
1965
1966        #[test]
1967        fn simple_past_ate() {
1968            let md = md("ate");
1969            assert!(md.is_verb_simple_past_form())
1970        }
1971
1972        #[test]
1973        fn past_participle_eaten() {
1974            let md = md("eaten");
1975            assert!(md.is_verb_past_participle_form())
1976        }
1977
1978        #[test]
1979        fn third_pers_sing_walks() {
1980            let md = md("walks");
1981            assert!(md.is_verb_third_person_singular_present_form())
1982        }
1983    }
1984}