harper_core/
dict_word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use smallvec::SmallVec;
7use strum::{EnumCount as _, VariantArray as _};
8use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
9
10use std::convert::TryFrom;
11
12use crate::dict_word_metadata_orthography::OrthFlags;
13use crate::spell::WordId;
14use crate::{Document, TokenKind, TokenStringExt};
15
16/// This represents a "lexeme" or "headword" which is case-folded but affix-expanded.
17/// So not only lemmata but also inflected forms are stored here, with "horn" and "horns" each
18/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
19#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
20pub struct DictWordMetadata {
21    /// The main parts of speech which have extra data.
22    pub noun: Option<NounData>,
23    pub pronoun: Option<PronounData>,
24    pub verb: Option<VerbData>,
25    pub adjective: Option<AdjectiveData>,
26    pub adverb: Option<AdverbData>,
27    pub conjunction: Option<ConjunctionData>,
28    pub determiner: Option<DeterminerData>,
29    pub affix: Option<AffixData>,
30    /// Parts of speech which don't have extra data.
31    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
32    #[serde(default = "default_false")]
33    pub preposition: bool,
34    /// Whether the word is an offensive word.
35    pub swear: Option<bool>,
36    /// The dialects this word belongs to.
37    /// If no dialects are defined, it can be assumed that the word is
38    /// valid in all dialects of English.
39    #[serde(default = "default_default")]
40    pub dialects: DialectFlags,
41    /// Orthographic information: letter case, spaces, hyphens, etc.
42    #[serde(default = "OrthFlags::empty")]
43    pub orth_info: OrthFlags,
44    /// Whether the word is considered especially common.
45    #[serde(default = "default_false")]
46    pub common: bool,
47    #[serde(default = "default_none")]
48    pub derived_from: Option<WordId>,
49    /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
50    /// this should be preferred over the similarly named `Pattern`.
51    ///
52    /// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
53    pub np_member: Option<bool>,
54    /// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
55    pub pos_tag: Option<UPOS>,
56}
57
58/// Needed for `serde`
59fn default_false() -> bool {
60    false
61}
62
63/// Needed for `serde`
64fn default_none<T>() -> Option<T> {
65    None
66}
67
68/// Needed for `serde`
69fn default_default<T: Default>() -> T {
70    T::default()
71}
72
73macro_rules! generate_metadata_queries {
74    ($($category:ident has $($sub:ident),*).*) => {
75        paste! {
76            pub fn is_likely_homograph(&self) -> bool {
77                [self.is_determiner(), self.preposition, $(
78                    self.[< is_ $category >](),
79                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
80            }
81
82            $(
83                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
84                pub fn [< is_ $category >](&self) -> bool {
85                    self.$category.is_some()
86                }
87
88                $(
89                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
90                    pub fn [< is_ $sub _ $category >](&self) -> bool {
91                        matches!(
92                            self.$category,
93                            Some([< $category:camel Data >]{
94                                [< is_ $sub >]: Some(true),
95                                ..
96                            })
97                        ) }
98
99                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
100                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
101                        matches!(
102                            self.$category,
103                            Some([< $category:camel Data >]{
104                                [< is_ $sub >]: None | Some(false),
105                                ..
106                            })
107                        )
108                    }
109                )*
110            )*
111        }
112    };
113}
114
115impl DictWordMetadata {
116    /// If there is only one possible interpretation of the metadata, infer its UPOS tag.
117    pub fn infer_pos_tag(&self) -> Option<UPOS> {
118        // If an explicit POS tag exists, return it immediately.
119        if let Some(pos) = self.pos_tag {
120            return Some(pos);
121        }
122
123        // Collect all possible POS tags from metadata
124        let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
125
126        if self.is_proper_noun() {
127            candidates.push(UPOS::PROPN);
128        }
129
130        if self.is_pronoun() {
131            candidates.push(UPOS::PRON);
132        }
133        if self.is_noun() {
134            candidates.push(UPOS::NOUN);
135        }
136        if self.is_verb() {
137            // Distinguish auxiliary verbs
138            if let Some(data) = &self.verb {
139                if data.is_auxiliary == Some(true) {
140                    candidates.push(UPOS::AUX);
141                } else {
142                    candidates.push(UPOS::VERB);
143                }
144            } else {
145                candidates.push(UPOS::VERB);
146            }
147        }
148        if self.is_adjective() {
149            candidates.push(UPOS::ADJ);
150        }
151        if self.is_adverb() {
152            candidates.push(UPOS::ADV);
153        }
154        if self.is_conjunction() {
155            candidates.push(UPOS::CCONJ);
156        }
157        if self.is_determiner() {
158            candidates.push(UPOS::DET);
159        }
160        if self.preposition {
161            candidates.push(UPOS::ADP);
162        }
163
164        // Remove duplicates
165        candidates.sort();
166        candidates.dedup();
167
168        if candidates.len() == 1 {
169            candidates.first().copied()
170        } else {
171            None
172        }
173    }
174
175    /// Produce a copy of `self` with the known properties of `other` set.
176    pub fn or(&self, other: &Self) -> Self {
177        macro_rules! merge {
178            ($a:expr, $b:expr) => {
179                match ($a, $b) {
180                    (Some(a), Some(b)) => Some(a.or(&b)),
181                    (Some(a), None) => Some(a),
182                    (None, Some(b)) => Some(b),
183                    (None, None) => None,
184                }
185            };
186        }
187
188        Self {
189            noun: merge!(self.noun, other.noun),
190            pronoun: merge!(self.pronoun, other.pronoun),
191            verb: merge!(self.verb, other.verb),
192            adjective: merge!(self.adjective, other.adjective),
193            adverb: merge!(self.adverb, other.adverb),
194            conjunction: merge!(self.conjunction, other.conjunction),
195            determiner: merge!(self.determiner, other.determiner),
196            affix: merge!(self.affix, other.affix),
197            preposition: self.preposition || other.preposition,
198            dialects: self.dialects | other.dialects,
199            orth_info: self.orth_info | other.orth_info,
200            swear: self.swear.or(other.swear),
201            common: self.common || other.common,
202            derived_from: self.derived_from.or(other.derived_from),
203            pos_tag: self.pos_tag.or(other.pos_tag),
204            np_member: self.np_member.or(other.np_member),
205        }
206    }
207
208    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
209    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
210    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
211    ///
212    /// Additionally, if the metadata does not currently declare the potential of the word to be
213    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
214    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
215    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
216        use UPOS::*;
217        match pos {
218            NOUN => {
219                if let Some(noun) = self.noun {
220                    self.noun = Some(NounData {
221                        is_proper: Some(false),
222                        ..noun
223                    })
224                } else {
225                    self.noun = Some(NounData {
226                        is_proper: Some(false),
227                        is_singular: None,
228                        is_plural: None,
229                        is_countable: None,
230                        is_mass: None,
231                        is_possessive: None,
232                    })
233                }
234
235                self.pronoun = None;
236                self.verb = None;
237                self.adjective = None;
238                self.adverb = None;
239                self.conjunction = None;
240                self.determiner = None;
241                self.affix = None;
242                self.preposition = false;
243            }
244            PROPN => {
245                if let Some(noun) = self.noun {
246                    self.noun = Some(NounData {
247                        is_proper: Some(true),
248                        ..noun
249                    })
250                } else {
251                    self.noun = Some(NounData {
252                        is_proper: Some(true),
253                        is_singular: None,
254                        is_plural: None,
255                        is_countable: None,
256                        is_mass: None,
257                        is_possessive: None,
258                    })
259                }
260
261                self.pronoun = None;
262                self.verb = None;
263                self.adjective = None;
264                self.adverb = None;
265                self.conjunction = None;
266                self.determiner = None;
267                self.affix = None;
268                self.preposition = false;
269            }
270            PRON => {
271                if self.pronoun.is_none() {
272                    self.pronoun = Some(PronounData::default())
273                }
274
275                self.noun = None;
276                self.verb = None;
277                self.adjective = None;
278                self.adverb = None;
279                self.conjunction = None;
280                self.determiner = None;
281                self.affix = None;
282                self.preposition = false;
283            }
284            VERB => {
285                if let Some(verb) = self.verb {
286                    self.verb = Some(VerbData {
287                        is_auxiliary: Some(false),
288                        ..verb
289                    })
290                } else {
291                    self.verb = Some(VerbData {
292                        is_auxiliary: Some(false),
293                        ..Default::default()
294                    })
295                }
296
297                self.noun = None;
298                self.pronoun = None;
299                self.adjective = None;
300                self.adverb = None;
301                self.conjunction = None;
302                self.determiner = None;
303                self.affix = None;
304                self.preposition = false;
305            }
306            AUX => {
307                if let Some(verb) = self.verb {
308                    self.verb = Some(VerbData {
309                        is_auxiliary: Some(true),
310                        ..verb
311                    })
312                } else {
313                    self.verb = Some(VerbData {
314                        is_auxiliary: Some(true),
315                        ..Default::default()
316                    })
317                }
318
319                self.noun = None;
320                self.pronoun = None;
321                self.adjective = None;
322                self.adverb = None;
323                self.conjunction = None;
324                self.determiner = None;
325                self.affix = None;
326                self.preposition = false;
327            }
328            ADJ => {
329                if self.adjective.is_none() {
330                    self.adjective = Some(AdjectiveData::default())
331                }
332
333                self.noun = None;
334                self.pronoun = None;
335                self.verb = None;
336                self.adverb = None;
337                self.conjunction = None;
338                self.determiner = None;
339                self.affix = None;
340                self.preposition = false;
341            }
342            ADV => {
343                if self.adverb.is_none() {
344                    self.adverb = Some(AdverbData::default())
345                }
346
347                self.noun = None;
348                self.pronoun = None;
349                self.verb = None;
350                self.adjective = None;
351                self.conjunction = None;
352                self.determiner = None;
353                self.affix = None;
354                self.preposition = false;
355            }
356            ADP => {
357                self.noun = None;
358                self.pronoun = None;
359                self.verb = None;
360                self.adjective = None;
361                self.adverb = None;
362                self.conjunction = None;
363                self.determiner = None;
364                self.affix = None;
365                self.preposition = true;
366            }
367            DET => {
368                self.noun = None;
369                self.pronoun = None;
370                self.verb = None;
371                self.adjective = None;
372                self.adverb = None;
373                self.conjunction = None;
374                self.affix = None;
375                self.preposition = false;
376                self.determiner = Some(DeterminerData::default());
377            }
378            CCONJ | SCONJ => {
379                if self.conjunction.is_none() {
380                    self.conjunction = Some(ConjunctionData::default())
381                }
382
383                self.noun = None;
384                self.pronoun = None;
385                self.verb = None;
386                self.adjective = None;
387                self.adverb = None;
388                self.determiner = None;
389                self.affix = None;
390                self.preposition = false;
391            }
392            _ => {}
393        }
394    }
395
396    generate_metadata_queries!(
397        // Singular and countable default to true, so their metadata queries are not generated.
398        noun has proper, plural, mass, possessive.
399        pronoun has personal, singular, plural, possessive, reflexive, subject, object.
400        determiner has demonstrative, possessive, quantifier.
401        verb has linking, auxiliary.
402        conjunction has.
403        adjective has.
404        adverb has manner, frequency, degree
405    );
406
407    // Manual metadata queries
408
409    // Pronoun metadata queries
410
411    pub fn get_person(&self) -> Option<Person> {
412        self.pronoun.as_ref().and_then(|p| p.person)
413    }
414
415    pub fn is_first_person_plural_pronoun(&self) -> bool {
416        matches!(
417            self.pronoun,
418            Some(PronounData {
419                person: Some(Person::First),
420                is_plural: Some(true),
421                ..
422            })
423        )
424    }
425
426    pub fn is_first_person_singular_pronoun(&self) -> bool {
427        matches!(
428            self.pronoun,
429            Some(PronounData {
430                person: Some(Person::First),
431                is_singular: Some(true),
432                ..
433            })
434        )
435    }
436
437    pub fn is_third_person_plural_pronoun(&self) -> bool {
438        matches!(
439            self.pronoun,
440            Some(PronounData {
441                person: Some(Person::Third),
442                is_plural: Some(true),
443                ..
444            })
445        )
446    }
447
448    pub fn is_third_person_singular_pronoun(&self) -> bool {
449        matches!(
450            self.pronoun,
451            Some(PronounData {
452                person: Some(Person::Third),
453                is_singular: Some(true),
454                ..
455            })
456        )
457    }
458
459    pub fn is_third_person_pronoun(&self) -> bool {
460        matches!(
461            self.pronoun,
462            Some(PronounData {
463                person: Some(Person::Third),
464                ..
465            })
466        )
467    }
468
469    pub fn is_second_person_pronoun(&self) -> bool {
470        matches!(
471            self.pronoun,
472            Some(PronounData {
473                person: Some(Person::Second),
474                ..
475            })
476        )
477    }
478
479    // Lemma is default if no verb form is specified in the dictionary
480    pub fn is_verb_lemma(&self) -> bool {
481        if let Some(verb) = self.verb {
482            if let Some(forms) = verb.verb_forms {
483                return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
484            } else {
485                return true;
486            }
487        }
488        false
489    }
490
491    pub fn is_verb_past_form(&self) -> bool {
492        self.verb.is_some_and(|v| {
493            v.verb_forms
494                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
495        })
496    }
497
498    pub fn is_verb_simple_past_form(&self) -> bool {
499        self.verb.is_some_and(|v| {
500            v.verb_forms
501                .is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
502        })
503    }
504
505    pub fn is_verb_past_participle_form(&self) -> bool {
506        self.verb.is_some_and(|v| {
507            v.verb_forms
508                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
509        })
510    }
511
512    pub fn is_verb_progressive_form(&self) -> bool {
513        self.verb.is_some_and(|v| {
514            v.verb_forms
515                .is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
516        })
517    }
518
519    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
520        self.verb.is_some_and(|v| {
521            v.verb_forms
522                .is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
523        })
524    }
525
526    // Noun metadata queries
527
528    // Singular is default if number is not marked in the dictionary.
529    pub fn is_singular_noun(&self) -> bool {
530        if let Some(noun) = self.noun {
531            matches!(
532                (noun.is_singular, noun.is_plural),
533                (Some(true), _) | (None | Some(false), None | Some(false))
534            )
535        } else {
536            false
537        }
538    }
539    pub fn is_non_singular_noun(&self) -> bool {
540        if let Some(noun) = self.noun {
541            !matches!(
542                (noun.is_singular, noun.is_plural),
543                (Some(true), _) | (None | Some(false), None | Some(false))
544            )
545        } else {
546            false
547        }
548    }
549
550    // Countable is default if countability is not marked in the dictionary.
551    pub fn is_countable_noun(&self) -> bool {
552        if let Some(noun) = self.noun {
553            matches!(
554                (noun.is_countable, noun.is_mass),
555                (Some(true), _) | (None | Some(false), None | Some(false))
556            )
557        } else {
558            false
559        }
560    }
561    pub fn is_non_countable_noun(&self) -> bool {
562        if let Some(noun) = self.noun {
563            !matches!(
564                (noun.is_countable, noun.is_mass),
565                (Some(true), _) | (None | Some(false), None | Some(false))
566            )
567        } else {
568            false
569        }
570    }
571
572    // Most mass nouns also have countable senses. Match those that are only mass nouns.
573    pub fn is_mass_noun_only(&self) -> bool {
574        if let Some(noun) = self.noun {
575            matches!(
576                (noun.is_countable, noun.is_mass),
577                (None | Some(false), Some(true))
578            )
579        } else {
580            false
581        }
582    }
583
584    // Nominal metadata queries (noun + pronoun)
585
586    /// Checks if the word is definitely nominal.
587    pub fn is_nominal(&self) -> bool {
588        self.is_noun() || self.is_pronoun()
589    }
590
591    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
592    pub fn is_singular_nominal(&self) -> bool {
593        self.is_singular_noun() || self.is_singular_pronoun()
594    }
595
596    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
597    pub fn is_plural_nominal(&self) -> bool {
598        self.is_plural_noun() || self.is_plural_pronoun()
599    }
600
601    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
602    pub fn is_possessive_nominal(&self) -> bool {
603        self.is_possessive_noun() || self.is_possessive_pronoun()
604    }
605
606    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
607    pub fn is_non_singular_nominal(&self) -> bool {
608        self.is_non_singular_noun() || self.is_non_singular_pronoun()
609    }
610
611    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
612    pub fn is_non_plural_nominal(&self) -> bool {
613        self.is_non_plural_noun() || self.is_non_plural_pronoun()
614    }
615
616    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
617    pub fn is_non_possessive_nominal(&self) -> bool {
618        self.is_non_possessive_noun() || self.is_non_possessive_pronoun()
619    }
620
621    // Adjective metadata queries
622
623    pub fn get_degree(&self) -> Option<Degree> {
624        self.adjective.as_ref().and_then(|a| a.degree)
625    }
626
627    pub fn is_comparative_adjective(&self) -> bool {
628        matches!(
629            self.adjective,
630            Some(AdjectiveData {
631                degree: Some(Degree::Comparative)
632            })
633        )
634    }
635
636    pub fn is_superlative_adjective(&self) -> bool {
637        matches!(
638            self.adjective,
639            Some(AdjectiveData {
640                degree: Some(Degree::Superlative)
641            })
642        )
643    }
644
645    // Degree::Positive is the default if degree is not marked in the dictionary.
646    pub fn is_positive_adjective(&self) -> bool {
647        match self.adjective {
648            Some(AdjectiveData {
649                degree: Some(Degree::Positive),
650            }) => true,
651            Some(AdjectiveData { degree: None }) => true,
652            Some(AdjectiveData {
653                degree: Some(degree),
654            }) => !matches!(degree, Degree::Comparative | Degree::Superlative),
655            _ => false,
656        }
657    }
658
659    // Determiner metadata queries
660
661    // Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
662    pub fn is_quantifier(&self) -> bool {
663        self.is_quantifier_determiner()
664    }
665
666    // Non-POS queries
667
668    /// Checks whether a word is _definitely_ a swear.
669    pub fn is_swear(&self) -> bool {
670        matches!(self.swear, Some(true))
671    }
672
673    // Orthographic queries
674
675    /// Does the metadata for this word cover an all-lowercase variant? (e.g., "hello")
676    ///
677    /// This returns true if all letters in the word are lowercase. Words containing
678    /// non-letter characters (like numbers or symbols) are only considered if all
679    /// letter characters are lowercase.
680    pub fn is_lowercase(&self) -> bool {
681        self.orth_info.contains(OrthFlags::LOWERCASE)
682    }
683    /// Does the metadata for this word cover a titlecase variant? (e.g., "Hello")
684    ///
685    /// This returns true if the word is in titlecase form, which means:
686    /// - The first letter is uppercase
687    /// - All other letters are lowercase
688    /// - The word is at least 2 characters long
689    ///
690    /// Examples: "Hello", "World"
691    ///
692    /// Note: Words with internal capital letters (like "McDonald") or apostrophes (like "O'Reilly")
693    /// are not considered titlecase - they are classified as UPPER_CAMEL instead.
694    pub fn is_titlecase(&self) -> bool {
695        self.orth_info.contains(OrthFlags::TITLECASE)
696    }
697    /// Does the metadata for this word cover an all-uppercase variant? (e.g., "HELLO")
698    ///
699    /// This returns true if all letters in the word are uppercase. Words containing
700    /// non-letter characters (like numbers or symbols) are only considered if all
701    /// letter characters are uppercase.
702    ///
703    /// Examples: "HELLO", "NASA", "I"
704    pub fn is_allcaps(&self) -> bool {
705        self.orth_info.contains(OrthFlags::ALLCAPS)
706    }
707    /// Does the metadata for this word cover a lower camel case variant? (e.g., "helloWorld")
708    ///
709    /// This returns true if the word is in lower camel case, which means:
710    /// - The first letter is lowercase
711    /// - There is at least one uppercase letter after the first character
712    /// - The word must be at least 2 characters long
713    ///
714    /// Examples: "helloWorld", "getHTTPResponse", "eBay"
715    ///
716    /// Note: Single words that are all lowercase will return false.
717    /// Words starting with an uppercase letter will return false (those would be UpperCamel).
718    pub fn is_lower_camel(&self) -> bool {
719        self.orth_info.contains(OrthFlags::LOWER_CAMEL)
720    }
721    /// Does the metadata for this word cover an upper camel case / pascal case variant? (e.g., "HelloWorld")
722    ///
723    /// This returns true if the word is in upper camel case (also known as Pascal case), which means:
724    /// - The first letter is uppercase
725    /// - There is at least one other uppercase letter after the first character
726    /// - There is at least one lowercase letter after the first uppercase letter
727    /// - The word must be at least 3 characters long
728    ///
729    /// Examples:
730    /// - "HelloWorld" (standard Pascal case)
731    /// - "McDonald" (name with internal caps)
732    /// - "O'Reilly" (name with apostrophe and internal caps)
733    /// - "HttpRequest" (initialism followed by word)
734    ///
735    /// Note: Single words that are titlecase (like "Hello") will return false.
736    /// Words that are all uppercase (like "NASA") will also return false.
737    pub fn is_upper_camel(&self) -> bool {
738        self.orth_info.contains(OrthFlags::UPPER_CAMEL)
739    }
740
741    /// Does the metadata for this word cover an apostrophized variant? (e.g., "doesn't")
742    pub fn is_apostrophized(&self) -> bool {
743        self.orth_info.contains(OrthFlags::APOSTROPHE)
744    }
745
746    pub fn is_roman_numerals(&self) -> bool {
747        self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
748    }
749
750    /// Same thing as [`Self::or`], except in-place rather than a clone.
751    pub fn append(&mut self, other: &Self) -> &mut Self {
752        *self = self.or(other);
753        self
754    }
755}
756
757// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
758// Each form can be used in various TAM combinations:
759// - Lemma form (infinitive, citation form, dictionary form)
760//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
761// - Past form (past participle and simple past)
762//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
763// - Progressive form (present participle and gerund)
764//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
765// - Third person singular present (-s/-es)
766//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
767//
768// Important notes:
769// 1. English expresses time through auxiliary verbs, not verb form alone
770// 2. Irregular verbs can have different forms for past participle and simple past
771// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
772#[repr(u32)]
773pub enum VerbForm {
774    /// The uninflected verb form: "walk", "eat"
775    LemmaForm = 1 << 0,
776    /// The past form for regular verbs: "walked"
777    PastForm = 1 << 1,
778    /// The simple past/preterite form for irregular verbs: "ate"
779    SimplePastForm = 1 << 2,
780    /// The past participle form for irregular verbs: "eaten"
781    PastParticipleForm = 1 << 3,
782    /// The progressive/continuous/gerund/present participle form: "walking", "eating"
783    ProgressiveForm = 1 << 4,
784    /// The third person singular present form: "walks", "eats"
785    ThirdPersonSingularPresentForm = 1 << 5,
786}
787
788/// The underlying type used for verb form flags.
789pub type VerbFormFlagsUnderlyingType = u32;
790
791bitflags::bitflags! {
792    /// A collection of bit flags used to represent verb forms.
793    ///
794    /// This allows a word to be tagged with multiple verb forms when applicable.
795    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
796    #[serde(transparent)]
797    pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
798        const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
799        const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
800        const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
801        const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
802        const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
803        const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
804    }
805}
806
807#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
808pub struct VerbData {
809    pub is_linking: Option<bool>,
810    pub is_auxiliary: Option<bool>,
811    #[serde(rename = "verb_form", default)]
812    pub verb_forms: Option<VerbFormFlags>,
813}
814
815impl VerbData {
816    /// Produce a copy of `self` with the known properties of `other` set.
817    pub fn or(&self, other: &Self) -> Self {
818        let verb_forms = match (self.verb_forms, other.verb_forms) {
819            (Some(self_verb_forms), Some(other_verb_forms)) => {
820                Some(self_verb_forms | other_verb_forms)
821            }
822            (Some(self_verb_forms), None) => Some(self_verb_forms),
823            (None, Some(other_verb_forms)) => Some(other_verb_forms),
824            (None, None) => None,
825        };
826
827        Self {
828            is_linking: self.is_linking.or(other.is_linking),
829            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
830            verb_forms,
831        }
832    }
833}
834
835// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
836// TODO other noun properties may be worth adding: abstract
837#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
838pub struct NounData {
839    pub is_proper: Option<bool>,
840    pub is_singular: Option<bool>,
841    pub is_plural: Option<bool>,
842    pub is_countable: Option<bool>,
843    pub is_mass: Option<bool>,
844    pub is_possessive: Option<bool>,
845}
846
847impl NounData {
848    /// Produce a copy of `self` with the known properties of `other` set.
849    pub fn or(&self, other: &Self) -> Self {
850        Self {
851            is_proper: self.is_proper.or(other.is_proper),
852            is_singular: self.is_singular.or(other.is_singular),
853            is_plural: self.is_plural.or(other.is_plural),
854            is_countable: self.is_countable.or(other.is_countable),
855            is_mass: self.is_mass.or(other.is_mass),
856            is_possessive: self.is_possessive.or(other.is_possessive),
857        }
858    }
859}
860
861// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
862#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
863pub enum Person {
864    First,
865    Second,
866    Third,
867}
868
869// TODO for now focused on personal pronouns?
870#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
871pub struct PronounData {
872    pub is_personal: Option<bool>,
873    pub is_singular: Option<bool>,
874    pub is_plural: Option<bool>,
875    pub is_possessive: Option<bool>,
876    pub is_reflexive: Option<bool>,
877    pub person: Option<Person>,
878    pub is_subject: Option<bool>,
879    pub is_object: Option<bool>,
880}
881
882impl PronounData {
883    /// Produce a copy of `self` with the known properties of `other` set.
884    pub fn or(&self, other: &Self) -> Self {
885        Self {
886            is_personal: self.is_personal.or(other.is_personal),
887            is_singular: self.is_singular.or(other.is_singular),
888            is_plural: self.is_plural.or(other.is_plural),
889            is_possessive: self.is_possessive.or(other.is_possessive),
890            is_reflexive: self.is_reflexive.or(other.is_reflexive),
891            person: self.person.or(other.person),
892            is_subject: self.is_subject.or(other.is_subject),
893            is_object: self.is_object.or(other.is_object),
894        }
895    }
896}
897
898/// Additional metadata for determiners
899#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
900pub struct DeterminerData {
901    pub is_demonstrative: Option<bool>,
902    pub is_possessive: Option<bool>,
903    pub is_quantifier: Option<bool>,
904}
905
906impl DeterminerData {
907    /// Produce a copy of `self` with the known properties of `other` set.
908    pub fn or(&self, other: &Self) -> Self {
909        Self {
910            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
911            is_possessive: self.is_possessive.or(other.is_possessive),
912            is_quantifier: self.is_quantifier.or(other.is_quantifier),
913        }
914    }
915}
916
917/// Degree is a property of adjectives: positive is not inflected
918/// Comparative is inflected with -er or comes after the word "more"
919/// Superlative is inflected with -est or comes after the word "most"
920#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
921pub enum Degree {
922    Positive,
923    Comparative,
924    Superlative,
925}
926
927/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
928/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
929/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
930#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
931pub struct AdjectiveData {
932    pub degree: Option<Degree>,
933}
934
935impl AdjectiveData {
936    /// Produce a copy of `self` with the known properties of `other` set.
937    pub fn or(&self, other: &Self) -> Self {
938        Self {
939            degree: self.degree.or(other.degree),
940        }
941    }
942}
943
944/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
945/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
946/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
947#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
948pub struct AdverbData {
949    pub is_manner: Option<bool>,
950    pub is_frequency: Option<bool>,
951    pub is_degree: Option<bool>,
952}
953
954impl AdverbData {
955    /// Produce a copy of `self` with the known properties of `other` set.
956    pub fn or(&self, _other: &Self) -> Self {
957        Self {
958            is_manner: self.is_manner.or(_other.is_manner),
959            is_frequency: self.is_frequency.or(_other.is_frequency),
960            is_degree: self.is_degree.or(_other.is_degree),
961        }
962    }
963}
964
965#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
966pub struct ConjunctionData {}
967
968impl ConjunctionData {
969    /// Produce a copy of `self` with the known properties of `other` set.
970    pub fn or(&self, _other: &Self) -> Self {
971        Self {}
972    }
973}
974
975#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
976pub struct AffixData {
977    pub is_prefix: Option<bool>,
978    pub is_suffix: Option<bool>,
979}
980
981impl AffixData {
982    /// Produce a copy of `self` with the known properties of `other` set.
983    pub fn or(&self, _other: &Self) -> Self {
984        Self {
985            is_prefix: self.is_prefix.or(_other.is_prefix),
986            is_suffix: self.is_suffix.or(_other.is_suffix),
987        }
988    }
989}
990
991/// A regional dialect.
992///
993/// Note: these have bit-shifted values so that they can ergonomically integrate with
994/// `DialectFlags`. Each value here must have a unique bit index inside
995/// `DialectsUnderlyingType`.
996#[derive(
997    Debug,
998    Clone,
999    Copy,
1000    Serialize,
1001    Deserialize,
1002    PartialEq,
1003    PartialOrd,
1004    Eq,
1005    Hash,
1006    EnumCount,
1007    EnumString,
1008    EnumIter,
1009    Display,
1010    VariantArray,
1011)]
1012pub enum Dialect {
1013    American = 1 << 0,
1014    Canadian = 1 << 1,
1015    Australian = 1 << 2,
1016    British = 1 << 3,
1017}
1018impl Dialect {
1019    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
1020    /// Returns `None` if it fails to find a single dialect that is used the most.
1021    #[must_use]
1022    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
1023        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
1024    }
1025
1026    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
1027    /// recognized.
1028    ///
1029    /// # Examples
1030    ///
1031    /// ```
1032    /// use harper_core::Dialect;
1033    ///
1034    /// let abbrs = ["US", "CA", "AU", "GB"];
1035    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
1036    ///
1037    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
1038    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
1039    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
1040    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
1041    /// ```
1042    #[must_use]
1043    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
1044        match abbr {
1045            "US" => Some(Self::American),
1046            "CA" => Some(Self::Canadian),
1047            "AU" => Some(Self::Australian),
1048            "GB" => Some(Self::British),
1049            _ => None,
1050        }
1051    }
1052}
1053impl TryFrom<DialectFlags> for Dialect {
1054    type Error = ();
1055
1056    /// Attempts to convert `DialectFlags` to a single `Dialect`.
1057    ///
1058    /// # Errors
1059    ///
1060    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
1061    /// enabled.
1062    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
1063        // Ensure only one dialect is enabled before converting.
1064        if dialect_flags.bits().count_ones() == 1 {
1065            match dialect_flags {
1066                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
1067                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
1068                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
1069                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
1070                _ => Err(()),
1071            }
1072        } else {
1073            // More than one dialect enabled; can't soundly convert.
1074            Err(())
1075        }
1076    }
1077}
1078
1079// The underlying type used for DialectFlags.
1080// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
1081// the future, we will need to switch this to a larger type.
1082type DialectFlagsUnderlyingType = u8;
1083
1084bitflags::bitflags! {
1085    /// A collection of bit flags used to represent enabled dialects.
1086    ///
1087    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
1088    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
1089    #[serde(transparent)]
1090    pub struct DialectFlags: DialectFlagsUnderlyingType {
1091        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
1092        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
1093        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
1094        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
1095    }
1096}
1097impl DialectFlags {
1098    /// Checks if the provided dialect is enabled.
1099    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
1100    #[must_use]
1101    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
1102        self.is_empty() || self.intersects(Self::from_dialect(dialect))
1103    }
1104
1105    /// Checks if the provided dialect is ***explicitly*** enabled.
1106    ///
1107    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
1108    /// enabled.
1109    #[must_use]
1110    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
1111        self.intersects(Self::from_dialect(dialect))
1112    }
1113
1114    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
1115    /// enabled.
1116    ///
1117    /// # Panics
1118    ///
1119    /// This will panic if `dialect` represents a dialect that is not defined in
1120    /// `DialectFlags`.
1121    #[must_use]
1122    pub fn from_dialect(dialect: Dialect) -> Self {
1123        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
1124            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
1125        };
1126        out
1127    }
1128
1129    /// Gets the most commonly used dialect(s) in the document.
1130    ///
1131    /// If multiple dialects are used equally often, they will all be enabled in the returned
1132    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
1133    /// will be the only one enabled.
1134    #[must_use]
1135    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
1136        // Initialize counters.
1137        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
1138            .iter()
1139            .map(|d| (*d, 0))
1140            .collect_array()
1141            .unwrap();
1142
1143        // Count word dialects.
1144        document.iter_words().for_each(|w| {
1145            if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
1146                // If the token is a word, iterate though the dialects in `dialect_counters` and
1147                // increment those counters where the word has the respective dialect enabled.
1148                dialect_counters.iter_mut().for_each(|(dialect, count)| {
1149                    if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
1150                        *count += 1;
1151                    }
1152                });
1153            }
1154        });
1155
1156        // Find max counter.
1157        let max_counter = dialect_counters
1158            .iter()
1159            .map(|(_, count)| count)
1160            .max()
1161            .unwrap();
1162        // Get and convert the collection of most used dialects into a `DialectFlags`.
1163        dialect_counters
1164            .into_iter()
1165            .filter(|(_, count)| count == max_counter)
1166            .fold(DialectFlags::empty(), |acc, dialect| {
1167                // Fold most used dialects into `DialectFlags` via bitwise or.
1168                acc | Self::from_dialect(dialect.0)
1169            })
1170    }
1171}
1172impl Default for DialectFlags {
1173    /// A default value with no dialects explicitly enabled.
1174    /// Implicitly, this state corresponds to all dialects being enabled.
1175    fn default() -> Self {
1176        Self::empty()
1177    }
1178}
1179
1180#[cfg(test)]
1181pub mod tests {
1182    use crate::DictWordMetadata;
1183    use crate::spell::{Dictionary, FstDictionary};
1184
1185    // Helper function to get metadata from the curated dictionary
1186    pub fn md(word: &str) -> DictWordMetadata {
1187        FstDictionary::curated()
1188            .get_word_metadata_str(word)
1189            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
1190            .into_owned()
1191    }
1192
1193    mod dialect {
1194        use super::super::{Dialect, DialectFlags};
1195        use crate::Document;
1196
1197        #[test]
1198        fn guess_british_dialect() {
1199            let document = Document::new_plain_english_curated("Aluminium was used.");
1200            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1201            assert!(
1202                df.is_dialect_enabled_strict(Dialect::British)
1203                    && !df.is_dialect_enabled_strict(Dialect::American)
1204            );
1205        }
1206
1207        #[test]
1208        fn guess_american_dialect() {
1209            let document = Document::new_plain_english_curated("Aluminum was used.");
1210            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1211            assert!(
1212                df.is_dialect_enabled_strict(Dialect::American)
1213                    && !df.is_dialect_enabled_strict(Dialect::British)
1214            );
1215        }
1216    }
1217
1218    mod noun {
1219        use crate::dict_word_metadata::tests::md;
1220
1221        #[test]
1222        fn puppy_is_noun() {
1223            assert!(md("puppy").is_noun());
1224        }
1225
1226        #[test]
1227        fn prepare_is_not_noun() {
1228            assert!(!md("prepare").is_noun());
1229        }
1230
1231        #[test]
1232        fn paris_is_proper_noun() {
1233            assert!(md("Paris").is_proper_noun());
1234        }
1235
1236        #[test]
1237        fn permit_is_non_proper_noun() {
1238            assert!(md("lapdog").is_non_proper_noun());
1239        }
1240
1241        #[test]
1242        fn hound_is_singular_noun() {
1243            assert!(md("hound").is_singular_noun());
1244        }
1245
1246        #[test]
1247        fn pooches_is_non_singular_noun() {
1248            assert!(md("pooches").is_non_singular_noun());
1249        }
1250
1251        // Make sure is_non_xxx_noun methods don't behave like is_not_xxx_noun.
1252        // In other words, make sure they don't return true for words that are not nouns.
1253        // They must only pass for words that are nouns but not singular etc.
1254        #[test]
1255        fn loyal_doesnt_pass_is_non_singular_noun() {
1256            assert!(!md("loyal").is_non_singular_noun());
1257        }
1258
1259        #[test]
1260        fn hounds_is_plural_noun() {
1261            assert!(md("hounds").is_plural_noun());
1262        }
1263
1264        #[test]
1265        fn pooch_is_non_plural_noun() {
1266            assert!(md("pooch").is_non_plural_noun());
1267        }
1268
1269        #[test]
1270        fn fish_is_singular_noun() {
1271            assert!(md("fish").is_singular_noun());
1272        }
1273
1274        #[test]
1275        fn fish_is_plural_noun() {
1276            assert!(md("fish").is_plural_noun());
1277        }
1278
1279        #[test]
1280        fn fishes_is_plural_noun() {
1281            assert!(md("fishes").is_plural_noun());
1282        }
1283
1284        #[test]
1285        fn sheep_is_singular_noun() {
1286            assert!(md("sheep").is_singular_noun());
1287        }
1288
1289        #[test]
1290        fn sheep_is_plural_noun() {
1291            assert!(md("sheep").is_plural_noun());
1292        }
1293
1294        #[test]
1295        #[should_panic]
1296        fn sheeps_is_not_word() {
1297            md("sheeps");
1298        }
1299
1300        #[test]
1301        fn bicep_is_singular_noun() {
1302            assert!(md("bicep").is_singular_noun());
1303        }
1304
1305        #[test]
1306        fn biceps_is_singular_noun() {
1307            assert!(md("biceps").is_singular_noun());
1308        }
1309
1310        #[test]
1311        fn biceps_is_plural_noun() {
1312            assert!(md("biceps").is_plural_noun());
1313        }
1314
1315        #[test]
1316        fn aircraft_is_singular_noun() {
1317            assert!(md("aircraft").is_singular_noun());
1318        }
1319
1320        #[test]
1321        fn aircraft_is_plural_noun() {
1322            assert!(md("aircraft").is_plural_noun());
1323        }
1324
1325        #[test]
1326        #[should_panic]
1327        fn aircrafts_is_not_word() {
1328            md("aircrafts");
1329        }
1330
1331        #[test]
1332        fn dog_apostrophe_s_is_possessive_noun() {
1333            assert!(md("dog's").is_possessive_noun());
1334        }
1335
1336        #[test]
1337        fn dogs_is_non_possessive_noun() {
1338            assert!(md("dogs").is_non_possessive_noun());
1339        }
1340
1341        // noun countability
1342
1343        #[test]
1344        fn dog_is_countable() {
1345            assert!(md("dog").is_countable_noun());
1346        }
1347        #[test]
1348        fn dog_is_non_mass_noun() {
1349            assert!(md("dog").is_non_mass_noun());
1350        }
1351
1352        #[test]
1353        fn furniture_is_mass_noun() {
1354            assert!(md("furniture").is_mass_noun());
1355        }
1356        #[test]
1357        fn furniture_is_non_countable_noun() {
1358            assert!(md("furniture").is_non_countable_noun());
1359        }
1360
1361        #[test]
1362        fn equipment_is_mass_noun() {
1363            assert!(md("equipment").is_mass_noun());
1364        }
1365        #[test]
1366        fn equipment_is_non_countable_noun() {
1367            assert!(md("equipment").is_non_countable_noun());
1368        }
1369
1370        #[test]
1371        fn beer_is_countable_noun() {
1372            assert!(md("beer").is_countable_noun());
1373        }
1374        #[test]
1375        fn beer_is_mass_noun() {
1376            assert!(md("beer").is_mass_noun());
1377        }
1378    }
1379
1380    mod pronoun {
1381        use crate::dict_word_metadata::tests::md;
1382
1383        mod i_me_myself {
1384            use crate::dict_word_metadata::tests::md;
1385
1386            #[test]
1387            fn i_is_pronoun() {
1388                assert!(md("I").is_pronoun());
1389            }
1390            #[test]
1391            fn i_is_personal_pronoun() {
1392                assert!(md("I").is_personal_pronoun());
1393            }
1394            #[test]
1395            fn i_is_singular_pronoun() {
1396                assert!(md("I").is_singular_pronoun());
1397            }
1398            #[test]
1399            fn i_is_subject_pronoun() {
1400                assert!(md("I").is_subject_pronoun());
1401            }
1402
1403            #[test]
1404            fn me_is_pronoun() {
1405                assert!(md("me").is_pronoun());
1406            }
1407            #[test]
1408            fn me_is_personal_pronoun() {
1409                assert!(md("me").is_personal_pronoun());
1410            }
1411            #[test]
1412            fn me_is_singular_pronoun() {
1413                assert!(md("me").is_singular_pronoun());
1414            }
1415            #[test]
1416            fn me_is_object_pronoun() {
1417                assert!(md("me").is_object_pronoun());
1418            }
1419
1420            #[test]
1421            fn myself_is_pronoun() {
1422                assert!(md("myself").is_pronoun());
1423            }
1424            #[test]
1425            fn myself_is_personal_pronoun() {
1426                assert!(md("myself").is_personal_pronoun());
1427            }
1428            #[test]
1429            fn myself_is_singular_pronoun() {
1430                assert!(md("myself").is_singular_pronoun());
1431            }
1432            #[test]
1433            fn myself_is_reflexive_pronoun() {
1434                assert!(md("myself").is_reflexive_pronoun());
1435            }
1436        }
1437
1438        mod we_us_ourselves {
1439            use crate::dict_word_metadata::tests::md;
1440
1441            #[test]
1442            fn we_is_pronoun() {
1443                assert!(md("we").is_pronoun());
1444            }
1445            #[test]
1446            fn we_is_personal_pronoun() {
1447                assert!(md("we").is_personal_pronoun());
1448            }
1449            #[test]
1450            fn we_is_plural_pronoun() {
1451                assert!(md("we").is_plural_pronoun());
1452            }
1453            #[test]
1454            fn we_is_subject_pronoun() {
1455                assert!(md("we").is_subject_pronoun());
1456            }
1457
1458            #[test]
1459            fn us_is_pronoun() {
1460                assert!(md("us").is_pronoun());
1461            }
1462            #[test]
1463            fn us_is_personal_pronoun() {
1464                assert!(md("us").is_personal_pronoun());
1465            }
1466            #[test]
1467            fn us_is_plural_pronoun() {
1468                assert!(md("us").is_plural_pronoun());
1469            }
1470            #[test]
1471            fn us_is_object_pronoun() {
1472                assert!(md("us").is_object_pronoun());
1473            }
1474
1475            #[test]
1476            fn ourselves_is_pronoun() {
1477                assert!(md("ourselves").is_pronoun());
1478            }
1479            #[test]
1480            fn ourselves_is_personal_pronoun() {
1481                assert!(md("ourselves").is_personal_pronoun());
1482            }
1483            #[test]
1484            fn ourselves_is_plural_pronoun() {
1485                assert!(md("ourselves").is_plural_pronoun());
1486            }
1487            #[test]
1488            fn ourselves_is_reflexive_pronoun() {
1489                assert!(md("ourselves").is_reflexive_pronoun());
1490            }
1491        }
1492
1493        mod you_yourself {
1494            use crate::dict_word_metadata::tests::md;
1495
1496            #[test]
1497            fn you_is_pronoun() {
1498                assert!(md("you").is_pronoun());
1499            }
1500            #[test]
1501            fn you_is_personal_pronoun() {
1502                assert!(md("you").is_personal_pronoun());
1503            }
1504            #[test]
1505            fn you_is_singular_pronoun() {
1506                assert!(md("you").is_singular_pronoun());
1507            }
1508            #[test]
1509            fn you_is_plural_pronoun() {
1510                assert!(md("you").is_plural_pronoun());
1511            }
1512            #[test]
1513            fn you_is_subject_pronoun() {
1514                assert!(md("you").is_subject_pronoun());
1515            }
1516            #[test]
1517            fn you_is_object_pronoun() {
1518                assert!(md("you").is_object_pronoun());
1519            }
1520            #[test]
1521            fn yourself_is_pronoun() {
1522                assert!(md("yourself").is_pronoun());
1523            }
1524            #[test]
1525            fn yourself_is_personal_pronoun() {
1526                assert!(md("yourself").is_personal_pronoun());
1527            }
1528            #[test]
1529            fn yourself_is_singular_pronoun() {
1530                assert!(md("yourself").is_singular_pronoun());
1531            }
1532            #[test]
1533            fn yourself_is_reflexive_pronoun() {
1534                assert!(md("yourself").is_reflexive_pronoun());
1535            }
1536        }
1537
1538        mod he_him_himself {
1539            use crate::dict_word_metadata::tests::md;
1540
1541            #[test]
1542            fn he_is_pronoun() {
1543                assert!(md("he").is_pronoun());
1544            }
1545            #[test]
1546            fn he_is_personal_pronoun() {
1547                assert!(md("he").is_personal_pronoun());
1548            }
1549            #[test]
1550            fn he_is_singular_pronoun() {
1551                assert!(md("he").is_singular_pronoun());
1552            }
1553            #[test]
1554            fn he_is_subject_pronoun() {
1555                assert!(md("he").is_subject_pronoun());
1556            }
1557
1558            #[test]
1559            fn him_is_pronoun() {
1560                assert!(md("him").is_pronoun());
1561            }
1562            #[test]
1563            fn him_is_personal_pronoun() {
1564                assert!(md("him").is_personal_pronoun());
1565            }
1566            #[test]
1567            fn him_is_singular_pronoun() {
1568                assert!(md("him").is_singular_pronoun());
1569            }
1570            #[test]
1571            fn him_is_object_pronoun() {
1572                assert!(md("him").is_object_pronoun());
1573            }
1574
1575            #[test]
1576            fn himself_is_pronoun() {
1577                assert!(md("himself").is_pronoun());
1578            }
1579            #[test]
1580            fn himself_is_personal_pronoun() {
1581                assert!(md("himself").is_personal_pronoun());
1582            }
1583            #[test]
1584            fn himself_is_singular_pronoun() {
1585                assert!(md("himself").is_singular_pronoun());
1586            }
1587            #[test]
1588            fn himself_is_reflexive_pronoun() {
1589                assert!(md("himself").is_reflexive_pronoun());
1590            }
1591        }
1592
1593        mod she_her_herself {
1594            use crate::dict_word_metadata::tests::md;
1595
1596            #[test]
1597            fn she_is_pronoun() {
1598                assert!(md("she").is_pronoun());
1599            }
1600            #[test]
1601            fn she_is_personal_pronoun() {
1602                assert!(md("she").is_personal_pronoun());
1603            }
1604            #[test]
1605            fn she_is_singular_pronoun() {
1606                assert!(md("she").is_singular_pronoun());
1607            }
1608            #[test]
1609            fn she_is_subject_pronoun() {
1610                assert!(md("she").is_subject_pronoun());
1611            }
1612
1613            #[test]
1614            fn her_is_pronoun() {
1615                assert!(md("her").is_pronoun());
1616            }
1617            #[test]
1618            fn her_is_personal_pronoun() {
1619                assert!(md("her").is_personal_pronoun());
1620            }
1621            #[test]
1622            fn her_is_singular_pronoun() {
1623                assert!(md("her").is_singular_pronoun());
1624            }
1625            #[test]
1626            fn her_is_object_pronoun() {
1627                assert!(md("her").is_object_pronoun());
1628            }
1629
1630            #[test]
1631            fn herself_is_pronoun() {
1632                assert!(md("herself").is_pronoun());
1633            }
1634            #[test]
1635            fn herself_is_personal_pronoun() {
1636                assert!(md("herself").is_personal_pronoun());
1637            }
1638            #[test]
1639            fn herself_is_singular_pronoun() {
1640                assert!(md("herself").is_singular_pronoun());
1641            }
1642            #[test]
1643            fn herself_is_reflexive_pronoun() {
1644                assert!(md("herself").is_reflexive_pronoun());
1645            }
1646        }
1647
1648        mod it_itself {
1649            use crate::dict_word_metadata::tests::md;
1650
1651            #[test]
1652            fn it_is_pronoun() {
1653                assert!(md("it").is_pronoun());
1654            }
1655            #[test]
1656            fn it_is_personal_pronoun() {
1657                assert!(md("it").is_personal_pronoun());
1658            }
1659            #[test]
1660            fn it_is_singular_pronoun() {
1661                assert!(md("it").is_singular_pronoun());
1662            }
1663            #[test]
1664            fn it_is_subject_pronoun() {
1665                assert!(md("it").is_subject_pronoun());
1666            }
1667            #[test]
1668            fn it_is_object_pronoun() {
1669                assert!(md("it").is_object_pronoun());
1670            }
1671
1672            #[test]
1673            fn itself_is_pronoun() {
1674                assert!(md("itself").is_pronoun());
1675            }
1676            #[test]
1677            fn itself_is_personal_pronoun() {
1678                assert!(md("itself").is_personal_pronoun());
1679            }
1680            #[test]
1681            fn itself_is_singular_pronoun() {
1682                assert!(md("itself").is_singular_pronoun());
1683            }
1684            #[test]
1685            fn itself_is_reflexive_pronoun() {
1686                assert!(md("itself").is_reflexive_pronoun());
1687            }
1688        }
1689
1690        mod they_them_themselves {
1691            use crate::dict_word_metadata::tests::md;
1692
1693            #[test]
1694            fn they_is_pronoun() {
1695                assert!(md("they").is_pronoun());
1696            }
1697            #[test]
1698            fn they_is_personal_pronoun() {
1699                assert!(md("they").is_personal_pronoun());
1700            }
1701            #[test]
1702            fn they_is_plural_pronoun() {
1703                assert!(md("they").is_plural_pronoun());
1704            }
1705            #[test]
1706            fn they_is_subject_pronoun() {
1707                assert!(md("they").is_subject_pronoun());
1708            }
1709
1710            #[test]
1711            fn them_is_pronoun() {
1712                assert!(md("them").is_pronoun());
1713            }
1714            #[test]
1715            fn them_is_personal_pronoun() {
1716                assert!(md("them").is_personal_pronoun());
1717            }
1718            #[test]
1719            fn them_is_plural_pronoun() {
1720                assert!(md("them").is_plural_pronoun());
1721            }
1722            #[test]
1723            fn them_is_object_pronoun() {
1724                assert!(md("them").is_object_pronoun());
1725            }
1726
1727            #[test]
1728            fn themselves_is_pronoun() {
1729                assert!(md("themselves").is_pronoun());
1730            }
1731            #[test]
1732            fn themselves_is_personal_pronoun() {
1733                assert!(md("themselves").is_personal_pronoun());
1734            }
1735            #[test]
1736            fn themselves_is_plural_pronoun() {
1737                assert!(md("themselves").is_plural_pronoun());
1738            }
1739            #[test]
1740            fn themselves_is_reflexive_pronoun() {
1741                assert!(md("themselves").is_reflexive_pronoun());
1742            }
1743        }
1744
1745        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1746        #[test]
1747        fn mine_is_pronoun() {
1748            assert!(md("mine").is_pronoun());
1749        }
1750        #[test]
1751        fn ours_is_pronoun() {
1752            assert!(md("ours").is_pronoun());
1753        }
1754        #[test]
1755        fn yours_is_pronoun() {
1756            assert!(md("yours").is_pronoun());
1757        }
1758        #[test]
1759        fn his_is_pronoun() {
1760            assert!(md("his").is_pronoun());
1761        }
1762        #[test]
1763        fn hers_is_pronoun() {
1764            assert!(md("hers").is_pronoun());
1765        }
1766        #[test]
1767        fn its_is_pronoun() {
1768            assert!(md("its").is_pronoun());
1769        }
1770        #[test]
1771        fn theirs_is_pronoun() {
1772            assert!(md("theirs").is_pronoun());
1773        }
1774
1775        // archaic pronouns
1776        #[test]
1777        fn archaic_pronouns() {
1778            assert!(md("thou").is_pronoun());
1779            assert!(md("thee").is_pronoun());
1780            assert!(md("thyself").is_pronoun());
1781            assert!(md("thine").is_pronoun());
1782        }
1783
1784        // generic pronouns
1785        #[test]
1786        fn generic_pronouns() {
1787            assert!(md("one").is_pronoun());
1788            assert!(md("oneself").is_pronoun());
1789        }
1790
1791        // relative and interrogative pronouns
1792        #[test]
1793        fn relative_and_interrogative_pronouns() {
1794            assert!(md("who").is_pronoun());
1795            assert!(md("whom").is_pronoun());
1796            assert!(md("whose").is_pronoun());
1797            assert!(md("which").is_pronoun());
1798            assert!(md("what").is_pronoun());
1799        }
1800
1801        // nonstandard pronouns
1802        #[test]
1803        #[ignore = "not in dictionary"]
1804        fn nonstandard_pronouns() {
1805            assert!(md("themself").pronoun.is_some());
1806            assert!(md("y'all'").pronoun.is_some());
1807        }
1808    }
1809
1810    mod adjective {
1811        use crate::{Degree, dict_word_metadata::tests::md};
1812
1813        // Getting degrees
1814
1815        #[test]
1816        #[ignore = "not marked yet because it might not be reliable"]
1817        fn big_is_positive() {
1818            assert_eq!(md("big").get_degree(), Some(Degree::Positive));
1819        }
1820
1821        #[test]
1822        fn bigger_is_comparative() {
1823            assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
1824        }
1825
1826        #[test]
1827        fn biggest_is_superlative() {
1828            assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
1829        }
1830
1831        #[test]
1832        #[should_panic(expected = "Word 'bigly' not found in dictionary")]
1833        fn bigly_is_not_an_adjective_form_we_track() {
1834            assert_eq!(md("bigly").get_degree(), None);
1835        }
1836
1837        // Calling is_ methods
1838
1839        // TODO: positive degree not implemented
1840
1841        #[test]
1842        fn bigger_is_comparative_adjective() {
1843            assert!(md("bigger").is_comparative_adjective());
1844        }
1845
1846        #[test]
1847        fn biggest_is_superlative_adjective() {
1848            assert!(md("biggest").is_superlative_adjective());
1849        }
1850    }
1851
1852    #[test]
1853    fn the_is_determiner() {
1854        assert!(md("the").is_determiner());
1855    }
1856    #[test]
1857    fn this_is_demonstrative_determiner() {
1858        assert!(md("this").is_demonstrative_determiner());
1859    }
1860    #[test]
1861    fn your_is_possessive_determiner() {
1862        assert!(md("your").is_possessive_determiner());
1863    }
1864
1865    #[test]
1866    fn every_is_quantifier() {
1867        assert!(md("every").is_quantifier());
1868    }
1869
1870    #[test]
1871    fn the_isnt_quantifier() {
1872        assert!(!md("the").is_quantifier());
1873    }
1874
1875    #[test]
1876    fn equipment_is_mass_noun() {
1877        assert!(md("equipment").is_mass_noun());
1878    }
1879
1880    #[test]
1881    fn equipment_is_non_countable_noun() {
1882        assert!(md("equipment").is_non_countable_noun());
1883    }
1884
1885    #[test]
1886    fn equipment_isnt_countable_noun() {
1887        assert!(!md("equipment").is_countable_noun());
1888    }
1889
1890    mod verb {
1891        use crate::dict_word_metadata::tests::md;
1892
1893        #[test]
1894        fn lemma_walk() {
1895            let md = md("walk");
1896            assert!(md.is_verb_lemma())
1897        }
1898
1899        #[test]
1900        fn lemma_fix() {
1901            let md = md("fix");
1902            assert!(md.is_verb_lemma())
1903        }
1904
1905        #[test]
1906        fn progressive_walking() {
1907            let md = md("walking");
1908            assert!(md.is_verb_progressive_form())
1909        }
1910
1911        #[test]
1912        fn past_walked() {
1913            let md = md("walked");
1914            assert!(md.is_verb_past_form())
1915        }
1916
1917        #[test]
1918        fn simple_past_ate() {
1919            let md = md("ate");
1920            assert!(md.is_verb_simple_past_form())
1921        }
1922
1923        #[test]
1924        fn past_participle_eaten() {
1925            let md = md("eaten");
1926            assert!(md.is_verb_past_participle_form())
1927        }
1928
1929        #[test]
1930        fn third_pers_sing_walks() {
1931            let md = md("walks");
1932            assert!(md.is_verb_third_person_singular_present_form())
1933        }
1934    }
1935}