harper_core/
dict_word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use smallvec::SmallVec;
7use strum::{EnumCount as _, VariantArray as _};
8use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
9
10use std::convert::TryFrom;
11
12use crate::dict_word_metadata_orthography::OrthFlags;
13use crate::spell::WordId;
14use crate::{Document, TokenKind, TokenStringExt};
15
16/// This represents a "lexeme" or "headword" which is case-folded but affix-expanded.
17/// So not only lemmata but also inflected forms are stored here, with "horn" and "horns" each
18/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
19#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
20pub struct DictWordMetadata {
21    pub noun: Option<NounData>,
22    pub pronoun: Option<PronounData>,
23    pub verb: Option<VerbData>,
24    pub adjective: Option<AdjectiveData>,
25    pub adverb: Option<AdverbData>,
26    pub conjunction: Option<ConjunctionData>,
27    pub swear: Option<bool>,
28    /// The dialects this word belongs to.
29    /// If no dialects are defined, it can be assumed that the word is
30    /// valid in all dialects of English.
31    #[serde(default = "default_default")]
32    pub dialects: DialectFlags,
33    /// Orthographic information: letter case, spaces, hyphens, etc.
34    #[serde(default = "OrthFlags::empty")]
35    pub orth_info: OrthFlags,
36    /// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
37    pub determiner: Option<DeterminerData>,
38    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
39    #[serde(default = "default_false")]
40    pub preposition: bool,
41    /// Whether the word is considered especially common.
42    #[serde(default = "default_false")]
43    pub common: bool,
44    #[serde(default = "default_none")]
45    pub derived_from: Option<WordId>,
46    /// Generated by a chunker
47    pub np_member: Option<bool>,
48    /// Generated by a POS tagger
49    pub pos_tag: Option<UPOS>,
50}
51
52/// Needed for `serde`
53fn default_false() -> bool {
54    false
55}
56
57/// Needed for `serde`
58fn default_none<T>() -> Option<T> {
59    None
60}
61
62/// Needed for `serde`
63fn default_default<T: Default>() -> T {
64    T::default()
65}
66
67macro_rules! generate_metadata_queries {
68    ($($category:ident has $($sub:ident),*).*) => {
69        paste! {
70            pub fn is_likely_homograph(&self) -> bool {
71                [self.is_determiner(), self.preposition, $(
72                    self.[< is_ $category >](),
73                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
74            }
75
76            $(
77                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
78                pub fn [< is_ $category >](&self) -> bool {
79                    self.$category.is_some()
80                }
81
82                $(
83                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
84                    pub fn [< is_ $sub _ $category >](&self) -> bool {
85                        matches!(
86                            self.$category,
87                            Some([< $category:camel Data >]{
88                                [< is_ $sub >]: Some(true),
89                                ..
90                            })
91                        ) }
92
93                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
94                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
95                        matches!(
96                            self.$category,
97                            Some([< $category:camel Data >]{
98                                [< is_ $sub >]: None | Some(false),
99                                ..
100                            })
101                        )
102                    }
103                )*
104            )*
105        }
106    };
107}
108
109impl DictWordMetadata {
110    /// If there is only one possible interpretation of the metadata, infer its UPOS tag.
111    pub fn infer_pos_tag(&self) -> Option<UPOS> {
112        // If an explicit POS tag exists, return it immediately.
113        if let Some(pos) = self.pos_tag {
114            return Some(pos);
115        }
116
117        // Collect all possible POS tags from metadata
118        let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
119
120        if self.is_proper_noun() {
121            candidates.push(UPOS::PROPN);
122        }
123
124        if self.is_pronoun() {
125            candidates.push(UPOS::PRON);
126        }
127        if self.is_noun() {
128            candidates.push(UPOS::NOUN);
129        }
130        if self.is_verb() {
131            // Distinguish auxiliary verbs
132            if let Some(data) = &self.verb {
133                if data.is_auxiliary == Some(true) {
134                    candidates.push(UPOS::AUX);
135                } else {
136                    candidates.push(UPOS::VERB);
137                }
138            } else {
139                candidates.push(UPOS::VERB);
140            }
141        }
142        if self.is_adjective() {
143            candidates.push(UPOS::ADJ);
144        }
145        if self.is_adverb() {
146            candidates.push(UPOS::ADV);
147        }
148        if self.is_conjunction() {
149            candidates.push(UPOS::CCONJ);
150        }
151        if self.is_determiner() {
152            candidates.push(UPOS::DET);
153        }
154        if self.preposition {
155            candidates.push(UPOS::ADP);
156        }
157
158        // Remove duplicates
159        candidates.sort();
160        candidates.dedup();
161
162        if candidates.len() == 1 {
163            candidates.first().copied()
164        } else {
165            None
166        }
167    }
168
169    /// Produce a copy of `self` with the known properties of `other` set.
170    pub fn or(&self, other: &Self) -> Self {
171        macro_rules! merge {
172            ($a:expr, $b:expr) => {
173                match ($a, $b) {
174                    (Some(a), Some(b)) => Some(a.or(&b)),
175                    (Some(a), None) => Some(a),
176                    (None, Some(b)) => Some(b),
177                    (None, None) => None,
178                }
179            };
180        }
181
182        Self {
183            noun: merge!(self.noun, other.noun),
184            pronoun: merge!(self.pronoun, other.pronoun),
185            verb: merge!(self.verb, other.verb),
186            adjective: merge!(self.adjective, other.adjective),
187            adverb: merge!(self.adverb, other.adverb),
188            conjunction: merge!(self.conjunction, other.conjunction),
189            dialects: self.dialects | other.dialects,
190            orth_info: self.orth_info | other.orth_info,
191            swear: self.swear.or(other.swear),
192            determiner: merge!(self.determiner, other.determiner),
193            preposition: self.preposition || other.preposition,
194            common: self.common || other.common,
195            derived_from: self.derived_from.or(other.derived_from),
196            pos_tag: self.pos_tag.or(other.pos_tag),
197            np_member: self.np_member.or(other.np_member),
198        }
199    }
200
201    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
202    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
203    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
204    ///
205    /// Additionally, if the metadata does not currently declare the potential of the word to be
206    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
207    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
208    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
209        use UPOS::*;
210        match pos {
211            NOUN => {
212                if let Some(noun) = self.noun {
213                    self.noun = Some(NounData {
214                        is_proper: Some(false),
215                        ..noun
216                    })
217                } else {
218                    self.noun = Some(NounData {
219                        is_proper: Some(false),
220                        is_singular: None,
221                        is_plural: None,
222                        is_countable: None,
223                        is_mass: None,
224                        is_possessive: None,
225                    })
226                }
227
228                self.pronoun = None;
229                self.verb = None;
230                self.adjective = None;
231                self.adverb = None;
232                self.conjunction = None;
233                self.determiner = None;
234                self.preposition = false;
235            }
236            PROPN => {
237                if let Some(noun) = self.noun {
238                    self.noun = Some(NounData {
239                        is_proper: Some(true),
240                        ..noun
241                    })
242                } else {
243                    self.noun = Some(NounData {
244                        is_proper: Some(true),
245                        is_singular: None,
246                        is_plural: None,
247                        is_countable: None,
248                        is_mass: None,
249                        is_possessive: None,
250                    })
251                }
252
253                self.pronoun = None;
254                self.verb = None;
255                self.adjective = None;
256                self.adverb = None;
257                self.conjunction = None;
258                self.determiner = None;
259                self.preposition = false;
260            }
261            PRON => {
262                if self.pronoun.is_none() {
263                    self.pronoun = Some(PronounData::default())
264                }
265
266                self.noun = None;
267                self.verb = None;
268                self.adjective = None;
269                self.adverb = None;
270                self.conjunction = None;
271                self.determiner = None;
272                self.preposition = false;
273            }
274            VERB => {
275                if let Some(verb) = self.verb {
276                    self.verb = Some(VerbData {
277                        is_auxiliary: Some(false),
278                        ..verb
279                    })
280                } else {
281                    self.verb = Some(VerbData {
282                        is_auxiliary: Some(false),
283                        ..Default::default()
284                    })
285                }
286
287                self.noun = None;
288                self.pronoun = None;
289                self.adjective = None;
290                self.adverb = None;
291                self.conjunction = None;
292                self.determiner = None;
293                self.preposition = false;
294            }
295            AUX => {
296                if let Some(verb) = self.verb {
297                    self.verb = Some(VerbData {
298                        is_auxiliary: Some(true),
299                        ..verb
300                    })
301                } else {
302                    self.verb = Some(VerbData {
303                        is_auxiliary: Some(true),
304                        ..Default::default()
305                    })
306                }
307
308                self.noun = None;
309                self.pronoun = None;
310                self.adjective = None;
311                self.adverb = None;
312                self.conjunction = None;
313                self.determiner = None;
314                self.preposition = false;
315            }
316            ADJ => {
317                if self.adjective.is_none() {
318                    self.adjective = Some(AdjectiveData::default())
319                }
320
321                self.noun = None;
322                self.pronoun = None;
323                self.verb = None;
324                self.adverb = None;
325                self.conjunction = None;
326                self.determiner = None;
327                self.preposition = false;
328            }
329            ADV => {
330                if self.adverb.is_none() {
331                    self.adverb = Some(AdverbData::default())
332                }
333
334                self.noun = None;
335                self.pronoun = None;
336                self.verb = None;
337                self.adjective = None;
338                self.conjunction = None;
339                self.determiner = None;
340                self.preposition = false;
341            }
342            ADP => {
343                self.noun = None;
344                self.pronoun = None;
345                self.verb = None;
346                self.adjective = None;
347                self.adverb = None;
348                self.conjunction = None;
349                self.determiner = None;
350                self.preposition = true;
351            }
352            DET => {
353                self.noun = None;
354                self.pronoun = None;
355                self.verb = None;
356                self.adjective = None;
357                self.adverb = None;
358                self.conjunction = None;
359                self.preposition = false;
360                self.determiner = Some(DeterminerData::default());
361            }
362            CCONJ | SCONJ => {
363                if self.conjunction.is_none() {
364                    self.conjunction = Some(ConjunctionData::default())
365                }
366
367                self.noun = None;
368                self.pronoun = None;
369                self.verb = None;
370                self.adjective = None;
371                self.adverb = None;
372                self.determiner = None;
373                self.preposition = false;
374            }
375            _ => {}
376        }
377    }
378
379    generate_metadata_queries!(
380        // Singular and countable default to true, so their metadata queries are not generated.
381        noun has proper, plural, mass, possessive.
382        pronoun has personal, singular, plural, possessive, reflexive, subject, object.
383        determiner has demonstrative, possessive, quantifier.
384        verb has linking, auxiliary.
385        conjunction has.
386        adjective has.
387        adverb has manner, frequency, degree
388    );
389
390    // Manual metadata queries
391
392    // Pronoun metadata queries
393
394    pub fn get_person(&self) -> Option<Person> {
395        self.pronoun.as_ref().and_then(|p| p.person)
396    }
397
398    pub fn is_first_person_plural_pronoun(&self) -> bool {
399        matches!(
400            self.pronoun,
401            Some(PronounData {
402                person: Some(Person::First),
403                is_plural: Some(true),
404                ..
405            })
406        )
407    }
408
409    pub fn is_first_person_singular_pronoun(&self) -> bool {
410        matches!(
411            self.pronoun,
412            Some(PronounData {
413                person: Some(Person::First),
414                is_singular: Some(true),
415                ..
416            })
417        )
418    }
419
420    pub fn is_third_person_plural_pronoun(&self) -> bool {
421        matches!(
422            self.pronoun,
423            Some(PronounData {
424                person: Some(Person::Third),
425                is_plural: Some(true),
426                ..
427            })
428        )
429    }
430
431    pub fn is_third_person_singular_pronoun(&self) -> bool {
432        matches!(
433            self.pronoun,
434            Some(PronounData {
435                person: Some(Person::Third),
436                is_singular: Some(true),
437                ..
438            })
439        )
440    }
441
442    pub fn is_third_person_pronoun(&self) -> bool {
443        matches!(
444            self.pronoun,
445            Some(PronounData {
446                person: Some(Person::Third),
447                ..
448            })
449        )
450    }
451
452    pub fn is_second_person_pronoun(&self) -> bool {
453        matches!(
454            self.pronoun,
455            Some(PronounData {
456                person: Some(Person::Second),
457                ..
458            })
459        )
460    }
461
462    // Lemma is default if no verb form is specified in the dictionary
463    pub fn is_verb_lemma(&self) -> bool {
464        if let Some(verb) = self.verb {
465            if let Some(forms) = verb.verb_forms {
466                return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
467            } else {
468                return true;
469            }
470        }
471        false
472    }
473
474    pub fn is_verb_past_form(&self) -> bool {
475        self.verb.is_some_and(|v| {
476            v.verb_forms
477                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
478        })
479    }
480
481    pub fn is_verb_simple_past_form(&self) -> bool {
482        self.verb.is_some_and(|v| {
483            v.verb_forms
484                .is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
485        })
486    }
487
488    pub fn is_verb_past_participle_form(&self) -> bool {
489        self.verb.is_some_and(|v| {
490            v.verb_forms
491                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
492        })
493    }
494
495    pub fn is_verb_progressive_form(&self) -> bool {
496        self.verb.is_some_and(|v| {
497            v.verb_forms
498                .is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
499        })
500    }
501
502    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
503        self.verb.is_some_and(|v| {
504            v.verb_forms
505                .is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
506        })
507    }
508
509    // Noun metadata queries
510
511    // Singular is default if number is not marked in the dictionary.
512    pub fn is_singular_noun(&self) -> bool {
513        if let Some(noun) = self.noun {
514            matches!(
515                (noun.is_singular, noun.is_plural),
516                (Some(true), _) | (None | Some(false), None | Some(false))
517            )
518        } else {
519            false
520        }
521    }
522    pub fn is_non_singular_noun(&self) -> bool {
523        if let Some(noun) = self.noun {
524            !matches!(
525                (noun.is_singular, noun.is_plural),
526                (Some(true), _) | (None | Some(false), None | Some(false))
527            )
528        } else {
529            false
530        }
531    }
532
533    // Countable is default if countability is not marked in the dictionary.
534    pub fn is_countable_noun(&self) -> bool {
535        if let Some(noun) = self.noun {
536            matches!(
537                (noun.is_countable, noun.is_mass),
538                (Some(true), _) | (None | Some(false), None | Some(false))
539            )
540        } else {
541            false
542        }
543    }
544    pub fn is_non_countable_noun(&self) -> bool {
545        if let Some(noun) = self.noun {
546            !matches!(
547                (noun.is_countable, noun.is_mass),
548                (Some(true), _) | (None | Some(false), None | Some(false))
549            )
550        } else {
551            false
552        }
553    }
554
555    // Most mass nouns also have countable senses. Match those that are only mass nouns.
556    pub fn is_mass_noun_only(&self) -> bool {
557        if let Some(noun) = self.noun {
558            matches!(
559                (noun.is_countable, noun.is_mass),
560                (None | Some(false), Some(true))
561            )
562        } else {
563            false
564        }
565    }
566
567    // Nominal metadata queries (noun + pronoun)
568
569    /// Checks if the word is definitely nominal.
570    pub fn is_nominal(&self) -> bool {
571        self.is_noun() || self.is_pronoun()
572    }
573
574    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
575    pub fn is_singular_nominal(&self) -> bool {
576        self.is_singular_noun() || self.is_singular_pronoun()
577    }
578
579    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
580    pub fn is_plural_nominal(&self) -> bool {
581        self.is_plural_noun() || self.is_plural_pronoun()
582    }
583
584    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
585    pub fn is_possessive_nominal(&self) -> bool {
586        self.is_possessive_noun() || self.is_possessive_pronoun()
587    }
588
589    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
590    pub fn is_non_singular_nominal(&self) -> bool {
591        self.is_non_singular_noun() || self.is_non_singular_pronoun()
592    }
593
594    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
595    pub fn is_non_plural_nominal(&self) -> bool {
596        self.is_non_plural_noun() || self.is_non_plural_pronoun()
597    }
598
599    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
600    pub fn is_non_possessive_nominal(&self) -> bool {
601        self.is_non_possessive_noun() || self.is_non_possessive_pronoun()
602    }
603
604    // Adjective metadata queries
605
606    pub fn get_degree(&self) -> Option<Degree> {
607        self.adjective.as_ref().and_then(|a| a.degree)
608    }
609
610    pub fn is_comparative_adjective(&self) -> bool {
611        matches!(
612            self.adjective,
613            Some(AdjectiveData {
614                degree: Some(Degree::Comparative)
615            })
616        )
617    }
618
619    pub fn is_superlative_adjective(&self) -> bool {
620        matches!(
621            self.adjective,
622            Some(AdjectiveData {
623                degree: Some(Degree::Superlative)
624            })
625        )
626    }
627
628    // Degree::Positive is the default if degree is not marked in the dictionary.
629    pub fn is_positive_adjective(&self) -> bool {
630        match self.adjective {
631            Some(AdjectiveData {
632                degree: Some(Degree::Positive),
633            }) => true,
634            Some(AdjectiveData { degree: None }) => true,
635            Some(AdjectiveData {
636                degree: Some(degree),
637            }) => !matches!(degree, Degree::Comparative | Degree::Superlative),
638            _ => false,
639        }
640    }
641
642    // Determiner metadata queries
643
644    // Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
645    pub fn is_quantifier(&self) -> bool {
646        self.is_quantifier_determiner()
647    }
648
649    // Non-POS queries
650
651    /// Checks whether a word is _definitely_ a swear.
652    pub fn is_swear(&self) -> bool {
653        matches!(self.swear, Some(true))
654    }
655
656    // Orthographic queries
657
658    /// Does the metadata for this word cover an all-lowercase variant? (e.g., "hello")
659    ///
660    /// This returns true if all letters in the word are lowercase. Words containing
661    /// non-letter characters (like numbers or symbols) are only considered if all
662    /// letter characters are lowercase.
663    pub fn is_lowercase(&self) -> bool {
664        self.orth_info.contains(OrthFlags::LOWERCASE)
665    }
666    /// Does the metadata for this word cover a titlecase variant? (e.g., "Hello")
667    ///
668    /// This returns true if the word is in titlecase form, which means:
669    /// - The first letter is uppercase
670    /// - All other letters are lowercase
671    /// - The word is at least 2 characters long
672    ///
673    /// Examples: "Hello", "World"
674    ///
675    /// Note: Words with internal capital letters (like "McDonald") or apostrophes (like "O'Reilly")
676    /// are not considered titlecase - they are classified as UPPER_CAMEL instead.
677    pub fn is_titlecase(&self) -> bool {
678        self.orth_info.contains(OrthFlags::TITLECASE)
679    }
680    /// Does the metadata for this word cover an all-uppercase variant? (e.g., "HELLO")
681    ///
682    /// This returns true if all letters in the word are uppercase. Words containing
683    /// non-letter characters (like numbers or symbols) are only considered if all
684    /// letter characters are uppercase.
685    ///
686    /// Examples: "HELLO", "NASA", "I"
687    pub fn is_allcaps(&self) -> bool {
688        self.orth_info.contains(OrthFlags::ALLCAPS)
689    }
690    /// Does the metadata for this word cover a lower camel case variant? (e.g., "helloWorld")
691    ///
692    /// This returns true if the word is in lower camel case, which means:
693    /// - The first letter is lowercase
694    /// - There is at least one uppercase letter after the first character
695    /// - The word must be at least 2 characters long
696    ///
697    /// Examples: "helloWorld", "getHTTPResponse", "eBay"
698    ///
699    /// Note: Single words that are all lowercase will return false.
700    /// Words starting with an uppercase letter will return false (those would be UpperCamel).
701    pub fn is_lower_camel(&self) -> bool {
702        self.orth_info.contains(OrthFlags::LOWER_CAMEL)
703    }
704    /// Does the metadata for this word cover an upper camel case / pascal case variant? (e.g., "HelloWorld")
705    ///
706    /// This returns true if the word is in upper camel case (also known as Pascal case), which means:
707    /// - The first letter is uppercase
708    /// - There is at least one other uppercase letter after the first character
709    /// - There is at least one lowercase letter after the first uppercase letter
710    /// - The word must be at least 3 characters long
711    ///
712    /// Examples:
713    /// - "HelloWorld" (standard Pascal case)
714    /// - "McDonald" (name with internal caps)
715    /// - "O'Reilly" (name with apostrophe and internal caps)
716    /// - "HttpRequest" (initialism followed by word)
717    ///
718    /// Note: Single words that are titlecase (like "Hello") will return false.
719    /// Words that are all uppercase (like "NASA") will also return false.
720    pub fn is_upper_camel(&self) -> bool {
721        self.orth_info.contains(OrthFlags::UPPER_CAMEL)
722    }
723
724    /// Does the metadata for this word cover an apostrophized variant? (e.g., "doesn't")
725    pub fn is_apostrophized(&self) -> bool {
726        self.orth_info.contains(OrthFlags::APOSTROPHE)
727    }
728
729    pub fn is_roman_numerals(&self) -> bool {
730        self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
731    }
732
733    /// Same thing as [`Self::or`], except in-place rather than a clone.
734    pub fn append(&mut self, other: &Self) -> &mut Self {
735        *self = self.or(other);
736        self
737    }
738}
739
740// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
741// Each form can be used in various TAM combinations:
742// - Lemma form (infinitive, citation form, dictionary form)
743//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
744// - Past form (past participle and simple past)
745//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
746// - Progressive form (present participle and gerund)
747//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
748// - Third person singular present (-s/-es)
749//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
750//
751// Important notes:
752// 1. English expresses time through auxiliary verbs, not verb form alone
753// 2. Irregular verbs can have different forms for past participle and simple past
754// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
755#[repr(u32)]
756pub enum VerbForm {
757    /// The uninflected verb form: "walk", "eat"
758    LemmaForm = 1 << 0,
759    /// The past form for regular verbs: "walked"
760    PastForm = 1 << 1,
761    /// The simple past/preterite form for irregular verbs: "ate"
762    SimplePastForm = 1 << 2,
763    /// The past participle form for irregular verbs: "eaten"
764    PastParticipleForm = 1 << 3,
765    /// The progressive/continuous/gerund/present participle form: "walking", "eating"
766    ProgressiveForm = 1 << 4,
767    /// The third person singular present form: "walks", "eats"
768    ThirdPersonSingularPresentForm = 1 << 5,
769}
770
771/// The underlying type used for verb form flags.
772pub type VerbFormFlagsUnderlyingType = u32;
773
774bitflags::bitflags! {
775    /// A collection of bit flags used to represent verb forms.
776    ///
777    /// This allows a word to be tagged with multiple verb forms when applicable.
778    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
779    #[serde(transparent)]
780    pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
781        const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
782        const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
783        const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
784        const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
785        const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
786        const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
787    }
788}
789
790#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
791pub struct VerbData {
792    pub is_linking: Option<bool>,
793    pub is_auxiliary: Option<bool>,
794    #[serde(rename = "verb_form", default)]
795    pub verb_forms: Option<VerbFormFlags>,
796}
797
798impl VerbData {
799    /// Produce a copy of `self` with the known properties of `other` set.
800    pub fn or(&self, other: &Self) -> Self {
801        let verb_forms = match (self.verb_forms, other.verb_forms) {
802            (Some(self_verb_forms), Some(other_verb_forms)) => {
803                Some(self_verb_forms | other_verb_forms)
804            }
805            (Some(self_verb_forms), None) => Some(self_verb_forms),
806            (None, Some(other_verb_forms)) => Some(other_verb_forms),
807            (None, None) => None,
808        };
809
810        Self {
811            is_linking: self.is_linking.or(other.is_linking),
812            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
813            verb_forms,
814        }
815    }
816}
817
818// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
819// TODO other noun properties may be worth adding: abstract
820#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
821pub struct NounData {
822    pub is_proper: Option<bool>,
823    pub is_singular: Option<bool>,
824    pub is_plural: Option<bool>,
825    pub is_countable: Option<bool>,
826    pub is_mass: Option<bool>,
827    pub is_possessive: Option<bool>,
828}
829
830impl NounData {
831    /// Produce a copy of `self` with the known properties of `other` set.
832    pub fn or(&self, other: &Self) -> Self {
833        Self {
834            is_proper: self.is_proper.or(other.is_proper),
835            is_singular: self.is_singular.or(other.is_singular),
836            is_plural: self.is_plural.or(other.is_plural),
837            is_countable: self.is_countable.or(other.is_countable),
838            is_mass: self.is_mass.or(other.is_mass),
839            is_possessive: self.is_possessive.or(other.is_possessive),
840        }
841    }
842}
843
844// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
845#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
846pub enum Person {
847    First,
848    Second,
849    Third,
850}
851
852// TODO for now focused on personal pronouns?
853#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
854pub struct PronounData {
855    pub is_personal: Option<bool>,
856    pub is_singular: Option<bool>,
857    pub is_plural: Option<bool>,
858    pub is_possessive: Option<bool>,
859    pub is_reflexive: Option<bool>,
860    pub person: Option<Person>,
861    pub is_subject: Option<bool>,
862    pub is_object: Option<bool>,
863}
864
865impl PronounData {
866    /// Produce a copy of `self` with the known properties of `other` set.
867    pub fn or(&self, other: &Self) -> Self {
868        Self {
869            is_personal: self.is_personal.or(other.is_personal),
870            is_singular: self.is_singular.or(other.is_singular),
871            is_plural: self.is_plural.or(other.is_plural),
872            is_possessive: self.is_possessive.or(other.is_possessive),
873            is_reflexive: self.is_reflexive.or(other.is_reflexive),
874            person: self.person.or(other.person),
875            is_subject: self.is_subject.or(other.is_subject),
876            is_object: self.is_object.or(other.is_object),
877        }
878    }
879}
880
881/// Additional metadata for determiners
882#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
883pub struct DeterminerData {
884    pub is_demonstrative: Option<bool>,
885    pub is_possessive: Option<bool>,
886    pub is_quantifier: Option<bool>,
887}
888
889impl DeterminerData {
890    /// Produce a copy of `self` with the known properties of `other` set.
891    pub fn or(&self, other: &Self) -> Self {
892        Self {
893            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
894            is_possessive: self.is_possessive.or(other.is_possessive),
895            is_quantifier: self.is_quantifier.or(other.is_quantifier),
896        }
897    }
898}
899
900/// Degree is a property of adjectives: positive is not inflected
901/// Comparative is inflected with -er or comes after the word "more"
902/// Superlative is inflected with -est or comes after the word "most"
903#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
904pub enum Degree {
905    Positive,
906    Comparative,
907    Superlative,
908}
909
910/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
911/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
912/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
913#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
914pub struct AdjectiveData {
915    pub degree: Option<Degree>,
916}
917
918impl AdjectiveData {
919    /// Produce a copy of `self` with the known properties of `other` set.
920    pub fn or(&self, other: &Self) -> Self {
921        Self {
922            degree: self.degree.or(other.degree),
923        }
924    }
925}
926
927/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
928/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
929/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
930#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
931pub struct AdverbData {
932    pub is_manner: Option<bool>,
933    pub is_frequency: Option<bool>,
934    pub is_degree: Option<bool>,
935}
936
937impl AdverbData {
938    /// Produce a copy of `self` with the known properties of `other` set.
939    pub fn or(&self, _other: &Self) -> Self {
940        Self {
941            is_manner: self.is_manner.or(_other.is_manner),
942            is_frequency: self.is_frequency.or(_other.is_frequency),
943            is_degree: self.is_degree.or(_other.is_degree),
944        }
945    }
946}
947
948#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
949pub struct ConjunctionData {}
950
951impl ConjunctionData {
952    /// Produce a copy of `self` with the known properties of `other` set.
953    pub fn or(&self, _other: &Self) -> Self {
954        Self {}
955    }
956}
957
958/// A regional dialect.
959///
960/// Note: these have bit-shifted values so that they can ergonomically integrate with
961/// `DialectFlags`. Each value here must have a unique bit index inside
962/// `DialectsUnderlyingType`.
963#[derive(
964    Debug,
965    Clone,
966    Copy,
967    Serialize,
968    Deserialize,
969    PartialEq,
970    PartialOrd,
971    Eq,
972    Hash,
973    EnumCount,
974    EnumString,
975    EnumIter,
976    Display,
977    VariantArray,
978)]
979pub enum Dialect {
980    American = 1 << 0,
981    Canadian = 1 << 1,
982    Australian = 1 << 2,
983    British = 1 << 3,
984}
985impl Dialect {
986    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
987    /// Returns `None` if it fails to find a single dialect that is used the most.
988    #[must_use]
989    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
990        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
991    }
992
993    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
994    /// recognized.
995    ///
996    /// # Examples
997    ///
998    /// ```
999    /// use harper_core::Dialect;
1000    ///
1001    /// let abbrs = ["US", "CA", "AU", "GB"];
1002    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
1003    ///
1004    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
1005    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
1006    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
1007    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
1008    /// ```
1009    #[must_use]
1010    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
1011        match abbr {
1012            "US" => Some(Self::American),
1013            "CA" => Some(Self::Canadian),
1014            "AU" => Some(Self::Australian),
1015            "GB" => Some(Self::British),
1016            _ => None,
1017        }
1018    }
1019}
1020impl TryFrom<DialectFlags> for Dialect {
1021    type Error = ();
1022
1023    /// Attempts to convert `DialectFlags` to a single `Dialect`.
1024    ///
1025    /// # Errors
1026    ///
1027    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
1028    /// enabled.
1029    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
1030        // Ensure only one dialect is enabled before converting.
1031        if dialect_flags.bits().count_ones() == 1 {
1032            match dialect_flags {
1033                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
1034                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
1035                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
1036                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
1037                _ => Err(()),
1038            }
1039        } else {
1040            // More than one dialect enabled; can't soundly convert.
1041            Err(())
1042        }
1043    }
1044}
1045
1046// The underlying type used for DialectFlags.
1047// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
1048// the future, we will need to switch this to a larger type.
1049type DialectFlagsUnderlyingType = u8;
1050
1051bitflags::bitflags! {
1052    /// A collection of bit flags used to represent enabled dialects.
1053    ///
1054    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
1055    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
1056    #[serde(transparent)]
1057    pub struct DialectFlags: DialectFlagsUnderlyingType {
1058        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
1059        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
1060        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
1061        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
1062    }
1063}
1064impl DialectFlags {
1065    /// Checks if the provided dialect is enabled.
1066    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
1067    #[must_use]
1068    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
1069        self.is_empty() || self.intersects(Self::from_dialect(dialect))
1070    }
1071
1072    /// Checks if the provided dialect is ***explicitly*** enabled.
1073    ///
1074    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
1075    /// enabled.
1076    #[must_use]
1077    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
1078        self.intersects(Self::from_dialect(dialect))
1079    }
1080
1081    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
1082    /// enabled.
1083    ///
1084    /// # Panics
1085    ///
1086    /// This will panic if `dialect` represents a dialect that is not defined in
1087    /// `DialectFlags`.
1088    #[must_use]
1089    pub fn from_dialect(dialect: Dialect) -> Self {
1090        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
1091            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
1092        };
1093        out
1094    }
1095
1096    /// Gets the most commonly used dialect(s) in the document.
1097    ///
1098    /// If multiple dialects are used equally often, they will all be enabled in the returned
1099    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
1100    /// will be the only one enabled.
1101    #[must_use]
1102    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
1103        // Initialize counters.
1104        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
1105            .iter()
1106            .map(|d| (*d, 0))
1107            .collect_array()
1108            .unwrap();
1109
1110        // Count word dialects.
1111        document.iter_words().for_each(|w| {
1112            if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
1113                // If the token is a word, iterate though the dialects in `dialect_counters` and
1114                // increment those counters where the word has the respective dialect enabled.
1115                dialect_counters.iter_mut().for_each(|(dialect, count)| {
1116                    if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
1117                        *count += 1;
1118                    }
1119                });
1120            }
1121        });
1122
1123        // Find max counter.
1124        let max_counter = dialect_counters
1125            .iter()
1126            .map(|(_, count)| count)
1127            .max()
1128            .unwrap();
1129        // Get and convert the collection of most used dialects into a `DialectFlags`.
1130        dialect_counters
1131            .into_iter()
1132            .filter(|(_, count)| count == max_counter)
1133            .fold(DialectFlags::empty(), |acc, dialect| {
1134                // Fold most used dialects into `DialectFlags` via bitwise or.
1135                acc | Self::from_dialect(dialect.0)
1136            })
1137    }
1138}
1139impl Default for DialectFlags {
1140    /// A default value with no dialects explicitly enabled.
1141    /// Implicitly, this state corresponds to all dialects being enabled.
1142    fn default() -> Self {
1143        Self::empty()
1144    }
1145}
1146
1147#[cfg(test)]
1148pub mod tests {
1149    use crate::DictWordMetadata;
1150    use crate::spell::{Dictionary, FstDictionary};
1151
1152    // Helper function to get metadata from the curated dictionary
1153    pub fn md(word: &str) -> DictWordMetadata {
1154        FstDictionary::curated()
1155            .get_word_metadata_str(word)
1156            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
1157            .into_owned()
1158    }
1159
1160    mod dialect {
1161        use super::super::{Dialect, DialectFlags};
1162        use crate::Document;
1163
1164        #[test]
1165        fn guess_british_dialect() {
1166            let document = Document::new_plain_english_curated("Aluminium was used.");
1167            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1168            assert!(
1169                df.is_dialect_enabled_strict(Dialect::British)
1170                    && !df.is_dialect_enabled_strict(Dialect::American)
1171            );
1172        }
1173
1174        #[test]
1175        fn guess_american_dialect() {
1176            let document = Document::new_plain_english_curated("Aluminum was used.");
1177            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1178            assert!(
1179                df.is_dialect_enabled_strict(Dialect::American)
1180                    && !df.is_dialect_enabled_strict(Dialect::British)
1181            );
1182        }
1183    }
1184
1185    mod noun {
1186        use crate::dict_word_metadata::tests::md;
1187
1188        #[test]
1189        fn puppy_is_noun() {
1190            assert!(md("puppy").is_noun());
1191        }
1192
1193        #[test]
1194        fn prepare_is_not_noun() {
1195            assert!(!md("prepare").is_noun());
1196        }
1197
1198        #[test]
1199        fn paris_is_proper_noun() {
1200            assert!(md("Paris").is_proper_noun());
1201        }
1202
1203        #[test]
1204        fn permit_is_non_proper_noun() {
1205            assert!(md("lapdog").is_non_proper_noun());
1206        }
1207
1208        #[test]
1209        fn hound_is_singular_noun() {
1210            assert!(md("hound").is_singular_noun());
1211        }
1212
1213        #[test]
1214        fn pooches_is_non_singular_noun() {
1215            assert!(md("pooches").is_non_singular_noun());
1216        }
1217
1218        // Make sure is_non_xxx_noun methods don't behave like is_not_xxx_noun.
1219        // In other words, make sure they don't return true for words that are not nouns.
1220        // They must only pass for words that are nouns but not singular etc.
1221        #[test]
1222        fn loyal_doesnt_pass_is_non_singular_noun() {
1223            assert!(!md("loyal").is_non_singular_noun());
1224        }
1225
1226        #[test]
1227        fn hounds_is_plural_noun() {
1228            assert!(md("hounds").is_plural_noun());
1229        }
1230
1231        #[test]
1232        fn pooch_is_non_plural_noun() {
1233            assert!(md("pooch").is_non_plural_noun());
1234        }
1235
1236        #[test]
1237        fn fish_is_singular_noun() {
1238            assert!(md("fish").is_singular_noun());
1239        }
1240
1241        #[test]
1242        fn fish_is_plural_noun() {
1243            assert!(md("fish").is_plural_noun());
1244        }
1245
1246        #[test]
1247        fn fishes_is_plural_noun() {
1248            assert!(md("fishes").is_plural_noun());
1249        }
1250
1251        #[test]
1252        fn sheep_is_singular_noun() {
1253            assert!(md("sheep").is_singular_noun());
1254        }
1255
1256        #[test]
1257        fn sheep_is_plural_noun() {
1258            assert!(md("sheep").is_plural_noun());
1259        }
1260
1261        #[test]
1262        #[should_panic]
1263        fn sheeps_is_not_word() {
1264            md("sheeps");
1265        }
1266
1267        #[test]
1268        fn bicep_is_singular_noun() {
1269            assert!(md("bicep").is_singular_noun());
1270        }
1271
1272        #[test]
1273        fn biceps_is_singular_noun() {
1274            assert!(md("biceps").is_singular_noun());
1275        }
1276
1277        #[test]
1278        fn biceps_is_plural_noun() {
1279            assert!(md("biceps").is_plural_noun());
1280        }
1281
1282        #[test]
1283        fn aircraft_is_singular_noun() {
1284            assert!(md("aircraft").is_singular_noun());
1285        }
1286
1287        #[test]
1288        fn aircraft_is_plural_noun() {
1289            assert!(md("aircraft").is_plural_noun());
1290        }
1291
1292        #[test]
1293        #[should_panic]
1294        fn aircrafts_is_not_word() {
1295            md("aircrafts");
1296        }
1297
1298        #[test]
1299        fn dog_apostrophe_s_is_possessive_noun() {
1300            assert!(md("dog's").is_possessive_noun());
1301        }
1302
1303        #[test]
1304        fn dogs_is_non_possessive_noun() {
1305            assert!(md("dogs").is_non_possessive_noun());
1306        }
1307
1308        // noun countability
1309
1310        #[test]
1311        fn dog_is_countable() {
1312            assert!(md("dog").is_countable_noun());
1313        }
1314        #[test]
1315        fn dog_is_non_mass_noun() {
1316            assert!(md("dog").is_non_mass_noun());
1317        }
1318
1319        #[test]
1320        fn furniture_is_mass_noun() {
1321            assert!(md("furniture").is_mass_noun());
1322        }
1323        #[test]
1324        fn furniture_is_non_countable_noun() {
1325            assert!(md("furniture").is_non_countable_noun());
1326        }
1327
1328        #[test]
1329        fn equipment_is_mass_noun() {
1330            assert!(md("equipment").is_mass_noun());
1331        }
1332        #[test]
1333        fn equipment_is_non_countable_noun() {
1334            assert!(md("equipment").is_non_countable_noun());
1335        }
1336
1337        #[test]
1338        fn beer_is_countable_noun() {
1339            assert!(md("beer").is_countable_noun());
1340        }
1341        #[test]
1342        fn beer_is_mass_noun() {
1343            assert!(md("beer").is_mass_noun());
1344        }
1345    }
1346
1347    mod pronoun {
1348        use crate::dict_word_metadata::tests::md;
1349
1350        mod i_me_myself {
1351            use crate::dict_word_metadata::tests::md;
1352
1353            #[test]
1354            fn i_is_pronoun() {
1355                assert!(md("I").is_pronoun());
1356            }
1357            #[test]
1358            fn i_is_personal_pronoun() {
1359                assert!(md("I").is_personal_pronoun());
1360            }
1361            #[test]
1362            fn i_is_singular_pronoun() {
1363                assert!(md("I").is_singular_pronoun());
1364            }
1365            #[test]
1366            fn i_is_subject_pronoun() {
1367                assert!(md("I").is_subject_pronoun());
1368            }
1369
1370            #[test]
1371            fn me_is_pronoun() {
1372                assert!(md("me").is_pronoun());
1373            }
1374            #[test]
1375            fn me_is_personal_pronoun() {
1376                assert!(md("me").is_personal_pronoun());
1377            }
1378            #[test]
1379            fn me_is_singular_pronoun() {
1380                assert!(md("me").is_singular_pronoun());
1381            }
1382            #[test]
1383            fn me_is_object_pronoun() {
1384                assert!(md("me").is_object_pronoun());
1385            }
1386
1387            #[test]
1388            fn myself_is_pronoun() {
1389                assert!(md("myself").is_pronoun());
1390            }
1391            #[test]
1392            fn myself_is_personal_pronoun() {
1393                assert!(md("myself").is_personal_pronoun());
1394            }
1395            #[test]
1396            fn myself_is_singular_pronoun() {
1397                assert!(md("myself").is_singular_pronoun());
1398            }
1399            #[test]
1400            fn myself_is_reflexive_pronoun() {
1401                assert!(md("myself").is_reflexive_pronoun());
1402            }
1403        }
1404
1405        mod we_us_ourselves {
1406            use crate::dict_word_metadata::tests::md;
1407
1408            #[test]
1409            fn we_is_pronoun() {
1410                assert!(md("we").is_pronoun());
1411            }
1412            #[test]
1413            fn we_is_personal_pronoun() {
1414                assert!(md("we").is_personal_pronoun());
1415            }
1416            #[test]
1417            fn we_is_plural_pronoun() {
1418                assert!(md("we").is_plural_pronoun());
1419            }
1420            #[test]
1421            fn we_is_subject_pronoun() {
1422                assert!(md("we").is_subject_pronoun());
1423            }
1424
1425            #[test]
1426            fn us_is_pronoun() {
1427                assert!(md("us").is_pronoun());
1428            }
1429            #[test]
1430            fn us_is_personal_pronoun() {
1431                assert!(md("us").is_personal_pronoun());
1432            }
1433            #[test]
1434            fn us_is_plural_pronoun() {
1435                assert!(md("us").is_plural_pronoun());
1436            }
1437            #[test]
1438            fn us_is_object_pronoun() {
1439                assert!(md("us").is_object_pronoun());
1440            }
1441
1442            #[test]
1443            fn ourselves_is_pronoun() {
1444                assert!(md("ourselves").is_pronoun());
1445            }
1446            #[test]
1447            fn ourselves_is_personal_pronoun() {
1448                assert!(md("ourselves").is_personal_pronoun());
1449            }
1450            #[test]
1451            fn ourselves_is_plural_pronoun() {
1452                assert!(md("ourselves").is_plural_pronoun());
1453            }
1454            #[test]
1455            fn ourselves_is_reflexive_pronoun() {
1456                assert!(md("ourselves").is_reflexive_pronoun());
1457            }
1458        }
1459
1460        mod you_yourself {
1461            use crate::dict_word_metadata::tests::md;
1462
1463            #[test]
1464            fn you_is_pronoun() {
1465                assert!(md("you").is_pronoun());
1466            }
1467            #[test]
1468            fn you_is_personal_pronoun() {
1469                assert!(md("you").is_personal_pronoun());
1470            }
1471            #[test]
1472            fn you_is_singular_pronoun() {
1473                assert!(md("you").is_singular_pronoun());
1474            }
1475            #[test]
1476            fn you_is_plural_pronoun() {
1477                assert!(md("you").is_plural_pronoun());
1478            }
1479            #[test]
1480            fn you_is_subject_pronoun() {
1481                assert!(md("you").is_subject_pronoun());
1482            }
1483            #[test]
1484            fn you_is_object_pronoun() {
1485                assert!(md("you").is_object_pronoun());
1486            }
1487            #[test]
1488            fn yourself_is_pronoun() {
1489                assert!(md("yourself").is_pronoun());
1490            }
1491            #[test]
1492            fn yourself_is_personal_pronoun() {
1493                assert!(md("yourself").is_personal_pronoun());
1494            }
1495            #[test]
1496            fn yourself_is_singular_pronoun() {
1497                assert!(md("yourself").is_singular_pronoun());
1498            }
1499            #[test]
1500            fn yourself_is_reflexive_pronoun() {
1501                assert!(md("yourself").is_reflexive_pronoun());
1502            }
1503        }
1504
1505        mod he_him_himself {
1506            use crate::dict_word_metadata::tests::md;
1507
1508            #[test]
1509            fn he_is_pronoun() {
1510                assert!(md("he").is_pronoun());
1511            }
1512            #[test]
1513            fn he_is_personal_pronoun() {
1514                assert!(md("he").is_personal_pronoun());
1515            }
1516            #[test]
1517            fn he_is_singular_pronoun() {
1518                assert!(md("he").is_singular_pronoun());
1519            }
1520            #[test]
1521            fn he_is_subject_pronoun() {
1522                assert!(md("he").is_subject_pronoun());
1523            }
1524
1525            #[test]
1526            fn him_is_pronoun() {
1527                assert!(md("him").is_pronoun());
1528            }
1529            #[test]
1530            fn him_is_personal_pronoun() {
1531                assert!(md("him").is_personal_pronoun());
1532            }
1533            #[test]
1534            fn him_is_singular_pronoun() {
1535                assert!(md("him").is_singular_pronoun());
1536            }
1537            #[test]
1538            fn him_is_object_pronoun() {
1539                assert!(md("him").is_object_pronoun());
1540            }
1541
1542            #[test]
1543            fn himself_is_pronoun() {
1544                assert!(md("himself").is_pronoun());
1545            }
1546            #[test]
1547            fn himself_is_personal_pronoun() {
1548                assert!(md("himself").is_personal_pronoun());
1549            }
1550            #[test]
1551            fn himself_is_singular_pronoun() {
1552                assert!(md("himself").is_singular_pronoun());
1553            }
1554            #[test]
1555            fn himself_is_reflexive_pronoun() {
1556                assert!(md("himself").is_reflexive_pronoun());
1557            }
1558        }
1559
1560        mod she_her_herself {
1561            use crate::dict_word_metadata::tests::md;
1562
1563            #[test]
1564            fn she_is_pronoun() {
1565                assert!(md("she").is_pronoun());
1566            }
1567            #[test]
1568            fn she_is_personal_pronoun() {
1569                assert!(md("she").is_personal_pronoun());
1570            }
1571            #[test]
1572            fn she_is_singular_pronoun() {
1573                assert!(md("she").is_singular_pronoun());
1574            }
1575            #[test]
1576            fn she_is_subject_pronoun() {
1577                assert!(md("she").is_subject_pronoun());
1578            }
1579
1580            #[test]
1581            fn her_is_pronoun() {
1582                assert!(md("her").is_pronoun());
1583            }
1584            #[test]
1585            fn her_is_personal_pronoun() {
1586                assert!(md("her").is_personal_pronoun());
1587            }
1588            #[test]
1589            fn her_is_singular_pronoun() {
1590                assert!(md("her").is_singular_pronoun());
1591            }
1592            #[test]
1593            fn her_is_object_pronoun() {
1594                assert!(md("her").is_object_pronoun());
1595            }
1596
1597            #[test]
1598            fn herself_is_pronoun() {
1599                assert!(md("herself").is_pronoun());
1600            }
1601            #[test]
1602            fn herself_is_personal_pronoun() {
1603                assert!(md("herself").is_personal_pronoun());
1604            }
1605            #[test]
1606            fn herself_is_singular_pronoun() {
1607                assert!(md("herself").is_singular_pronoun());
1608            }
1609            #[test]
1610            fn herself_is_reflexive_pronoun() {
1611                assert!(md("herself").is_reflexive_pronoun());
1612            }
1613        }
1614
1615        mod it_itself {
1616            use crate::dict_word_metadata::tests::md;
1617
1618            #[test]
1619            fn it_is_pronoun() {
1620                assert!(md("it").is_pronoun());
1621            }
1622            #[test]
1623            fn it_is_personal_pronoun() {
1624                assert!(md("it").is_personal_pronoun());
1625            }
1626            #[test]
1627            fn it_is_singular_pronoun() {
1628                assert!(md("it").is_singular_pronoun());
1629            }
1630            #[test]
1631            fn it_is_subject_pronoun() {
1632                assert!(md("it").is_subject_pronoun());
1633            }
1634            #[test]
1635            fn it_is_object_pronoun() {
1636                assert!(md("it").is_object_pronoun());
1637            }
1638
1639            #[test]
1640            fn itself_is_pronoun() {
1641                assert!(md("itself").is_pronoun());
1642            }
1643            #[test]
1644            fn itself_is_personal_pronoun() {
1645                assert!(md("itself").is_personal_pronoun());
1646            }
1647            #[test]
1648            fn itself_is_singular_pronoun() {
1649                assert!(md("itself").is_singular_pronoun());
1650            }
1651            #[test]
1652            fn itself_is_reflexive_pronoun() {
1653                assert!(md("itself").is_reflexive_pronoun());
1654            }
1655        }
1656
1657        mod they_them_themselves {
1658            use crate::dict_word_metadata::tests::md;
1659
1660            #[test]
1661            fn they_is_pronoun() {
1662                assert!(md("they").is_pronoun());
1663            }
1664            #[test]
1665            fn they_is_personal_pronoun() {
1666                assert!(md("they").is_personal_pronoun());
1667            }
1668            #[test]
1669            fn they_is_plural_pronoun() {
1670                assert!(md("they").is_plural_pronoun());
1671            }
1672            #[test]
1673            fn they_is_subject_pronoun() {
1674                assert!(md("they").is_subject_pronoun());
1675            }
1676
1677            #[test]
1678            fn them_is_pronoun() {
1679                assert!(md("them").is_pronoun());
1680            }
1681            #[test]
1682            fn them_is_personal_pronoun() {
1683                assert!(md("them").is_personal_pronoun());
1684            }
1685            #[test]
1686            fn them_is_plural_pronoun() {
1687                assert!(md("them").is_plural_pronoun());
1688            }
1689            #[test]
1690            fn them_is_object_pronoun() {
1691                assert!(md("them").is_object_pronoun());
1692            }
1693
1694            #[test]
1695            fn themselves_is_pronoun() {
1696                assert!(md("themselves").is_pronoun());
1697            }
1698            #[test]
1699            fn themselves_is_personal_pronoun() {
1700                assert!(md("themselves").is_personal_pronoun());
1701            }
1702            #[test]
1703            fn themselves_is_plural_pronoun() {
1704                assert!(md("themselves").is_plural_pronoun());
1705            }
1706            #[test]
1707            fn themselves_is_reflexive_pronoun() {
1708                assert!(md("themselves").is_reflexive_pronoun());
1709            }
1710        }
1711
1712        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1713        #[test]
1714        fn mine_is_pronoun() {
1715            assert!(md("mine").is_pronoun());
1716        }
1717        #[test]
1718        fn ours_is_pronoun() {
1719            assert!(md("ours").is_pronoun());
1720        }
1721        #[test]
1722        fn yours_is_pronoun() {
1723            assert!(md("yours").is_pronoun());
1724        }
1725        #[test]
1726        fn his_is_pronoun() {
1727            assert!(md("his").is_pronoun());
1728        }
1729        #[test]
1730        fn hers_is_pronoun() {
1731            assert!(md("hers").is_pronoun());
1732        }
1733        #[test]
1734        fn its_is_pronoun() {
1735            assert!(md("its").is_pronoun());
1736        }
1737        #[test]
1738        fn theirs_is_pronoun() {
1739            assert!(md("theirs").is_pronoun());
1740        }
1741
1742        // archaic pronouns
1743        #[test]
1744        fn archaic_pronouns() {
1745            assert!(md("thou").is_pronoun());
1746            assert!(md("thee").is_pronoun());
1747            assert!(md("thyself").is_pronoun());
1748            assert!(md("thine").is_pronoun());
1749        }
1750
1751        // generic pronouns
1752        #[test]
1753        fn generic_pronouns() {
1754            assert!(md("one").is_pronoun());
1755            assert!(md("oneself").is_pronoun());
1756        }
1757
1758        // relative and interrogative pronouns
1759        #[test]
1760        fn relative_and_interrogative_pronouns() {
1761            assert!(md("who").is_pronoun());
1762            assert!(md("whom").is_pronoun());
1763            assert!(md("whose").is_pronoun());
1764            assert!(md("which").is_pronoun());
1765            assert!(md("what").is_pronoun());
1766        }
1767
1768        // nonstandard pronouns
1769        #[test]
1770        #[ignore = "not in dictionary"]
1771        fn nonstandard_pronouns() {
1772            assert!(md("themself").pronoun.is_some());
1773            assert!(md("y'all'").pronoun.is_some());
1774        }
1775    }
1776
1777    mod adjective {
1778        use crate::{Degree, dict_word_metadata::tests::md};
1779
1780        // Getting degrees
1781
1782        #[test]
1783        #[ignore = "not marked yet because it might not be reliable"]
1784        fn big_is_positive() {
1785            assert_eq!(md("big").get_degree(), Some(Degree::Positive));
1786        }
1787
1788        #[test]
1789        fn bigger_is_comparative() {
1790            assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
1791        }
1792
1793        #[test]
1794        fn biggest_is_superlative() {
1795            assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
1796        }
1797
1798        #[test]
1799        #[should_panic(expected = "Word 'bigly' not found in dictionary")]
1800        fn bigly_is_not_an_adjective_form_we_track() {
1801            assert_eq!(md("bigly").get_degree(), None);
1802        }
1803
1804        // Calling is_ methods
1805
1806        // TODO: positive degree not implemented
1807
1808        #[test]
1809        fn bigger_is_comparative_adjective() {
1810            assert!(md("bigger").is_comparative_adjective());
1811        }
1812
1813        #[test]
1814        fn biggest_is_superlative_adjective() {
1815            assert!(md("biggest").is_superlative_adjective());
1816        }
1817    }
1818
1819    #[test]
1820    fn the_is_determiner() {
1821        assert!(md("the").is_determiner());
1822    }
1823    #[test]
1824    fn this_is_demonstrative_determiner() {
1825        assert!(md("this").is_demonstrative_determiner());
1826    }
1827    #[test]
1828    fn your_is_possessive_determiner() {
1829        assert!(md("your").is_possessive_determiner());
1830    }
1831
1832    #[test]
1833    fn every_is_quantifier() {
1834        assert!(md("every").is_quantifier());
1835    }
1836
1837    #[test]
1838    fn the_isnt_quantifier() {
1839        assert!(!md("the").is_quantifier());
1840    }
1841
1842    #[test]
1843    fn equipment_is_mass_noun() {
1844        assert!(md("equipment").is_mass_noun());
1845    }
1846
1847    #[test]
1848    fn equipment_is_non_countable_noun() {
1849        assert!(md("equipment").is_non_countable_noun());
1850    }
1851
1852    #[test]
1853    fn equipment_isnt_countable_noun() {
1854        assert!(!md("equipment").is_countable_noun());
1855    }
1856
1857    mod verb {
1858        use crate::dict_word_metadata::tests::md;
1859
1860        #[test]
1861        fn lemma_walk() {
1862            let md = md("walk");
1863            assert!(md.is_verb_lemma())
1864        }
1865
1866        #[test]
1867        fn lemma_fix() {
1868            let md = md("fix");
1869            assert!(md.is_verb_lemma())
1870        }
1871
1872        #[test]
1873        fn progressive_walking() {
1874            let md = md("walking");
1875            assert!(md.is_verb_progressive_form())
1876        }
1877
1878        #[test]
1879        fn past_walked() {
1880            let md = md("walked");
1881            assert!(md.is_verb_past_form())
1882        }
1883
1884        #[test]
1885        fn simple_past_ate() {
1886            let md = md("ate");
1887            assert!(md.is_verb_simple_past_form())
1888        }
1889
1890        #[test]
1891        fn past_participle_eaten() {
1892            let md = md("eaten");
1893            assert!(md.is_verb_past_participle_form())
1894        }
1895
1896        #[test]
1897        fn third_pers_sing_walks() {
1898            let md = md("walks");
1899            assert!(md.is_verb_third_person_singular_present_form())
1900        }
1901    }
1902}