Skip to main content

harper_core/
dict_word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use smallvec::SmallVec;
7use strum::{EnumCount as _, VariantArray as _};
8use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
9
10use std::convert::TryFrom;
11
12use crate::dict_word_metadata_orthography::OrthFlags;
13use crate::spell::WordId;
14use crate::{Document, TokenKind, TokenStringExt};
15
16/// This represents a "lexeme" or "headword" which is case-folded but affix-expanded.
17/// So not only lemmata but also inflected forms are stored here, with "horn" and "horns" each
18/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
19#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
20pub struct DictWordMetadata {
21    /// The main parts of speech which have extra data.
22    pub noun: Option<NounData>,
23    pub pronoun: Option<PronounData>,
24    pub verb: Option<VerbData>,
25    pub adjective: Option<AdjectiveData>,
26    pub adverb: Option<AdverbData>,
27    pub conjunction: Option<ConjunctionData>,
28    pub determiner: Option<DeterminerData>,
29    pub affix: Option<AffixData>,
30    /// Parts of speech which don't have extra data.
31    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
32    #[serde(default = "default_false")]
33    pub preposition: bool,
34    /// Whether the word is an offensive word.
35    pub swear: Option<bool>,
36    /// The dialects this word belongs to.
37    /// If no dialects are defined, it can be assumed that the word is
38    /// valid in all dialects of English.
39    #[serde(default = "default_default")]
40    pub dialects: DialectFlags,
41    /// Orthographic information: letter case, spaces, hyphens, etc.
42    #[serde(default = "OrthFlags::empty")]
43    pub orth_info: OrthFlags,
44    /// Whether the word is considered especially common.
45    #[serde(default = "default_false")]
46    pub common: bool,
47    #[serde(default = "default_none")]
48    pub derived_from: Option<WordId>,
49    /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
50    /// this should be preferred over the similarly named `Pattern`.
51    ///
52    /// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
53    pub np_member: Option<bool>,
54    /// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
55    pub pos_tag: Option<UPOS>,
56}
57
58/// Needed for `serde`
59fn default_false() -> bool {
60    false
61}
62
63/// Needed for `serde`
64fn default_none<T>() -> Option<T> {
65    None
66}
67
68/// Needed for `serde`
69fn default_default<T: Default>() -> T {
70    T::default()
71}
72
73macro_rules! generate_metadata_queries {
74    ($($category:ident has $($sub:ident),*).*) => {
75        paste! {
76            pub fn is_likely_homograph(&self) -> bool {
77                [self.is_determiner(), self.preposition, $(
78                    self.[< is_ $category >](),
79                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
80            }
81
82            /// How different is this word from another?
83            pub fn difference(&self, other: &Self) -> u32 {
84                [
85                    $(
86                        Self::[< is_ $category >],
87                        $(
88                            Self::[< is_ $sub _ $category >],
89                            Self::[< is_non_ $sub _ $category >],
90                        )*
91                    )*
92                ]
93                .iter()
94                .fold(0, |acc, func| acc + (func(self) ^ func(other)) as u32)
95            }
96
97            $(
98                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
99                pub fn [< is_ $category >](&self) -> bool {
100                    self.$category.is_some()
101                }
102
103                $(
104                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
105                    pub fn [< is_ $sub _ $category >](&self) -> bool {
106                        matches!(
107                            self.$category,
108                            Some([< $category:camel Data >]{
109                                [< is_ $sub >]: Some(true),
110                                ..
111                            })
112                        ) }
113
114                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
115                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
116                        matches!(
117                            self.$category,
118                            Some([< $category:camel Data >]{
119                                [< is_ $sub >]: None | Some(false),
120                                ..
121                            })
122                        )
123                    }
124                )*
125            )*
126        }
127    };
128}
129
130impl DictWordMetadata {
131    /// If there is only one possible interpretation of the metadata, infer its UPOS tag.
132    pub fn infer_pos_tag(&self) -> Option<UPOS> {
133        // If an explicit POS tag exists, return it immediately.
134        if let Some(pos) = self.pos_tag {
135            return Some(pos);
136        }
137
138        // Collect all possible POS tags from metadata
139        let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
140
141        if self.is_proper_noun() {
142            candidates.push(UPOS::PROPN);
143        }
144
145        if self.is_pronoun() {
146            candidates.push(UPOS::PRON);
147        }
148        if self.is_noun() {
149            candidates.push(UPOS::NOUN);
150        }
151        if self.is_verb() {
152            // Distinguish auxiliary verbs
153            if let Some(data) = &self.verb {
154                if data.is_auxiliary == Some(true) {
155                    candidates.push(UPOS::AUX);
156                } else {
157                    candidates.push(UPOS::VERB);
158                }
159            } else {
160                candidates.push(UPOS::VERB);
161            }
162        }
163        if self.is_adjective() {
164            candidates.push(UPOS::ADJ);
165        }
166        if self.is_adverb() {
167            candidates.push(UPOS::ADV);
168        }
169        if self.is_conjunction() {
170            candidates.push(UPOS::CCONJ);
171        }
172        if self.is_determiner() {
173            candidates.push(UPOS::DET);
174        }
175        if self.preposition {
176            candidates.push(UPOS::ADP);
177        }
178
179        // Remove duplicates
180        candidates.sort();
181        candidates.dedup();
182
183        candidates.into_iter().exactly_one().ok()
184    }
185
186    /// Produce a copy of `self` with the known properties of `other` set.
187    pub fn or(&self, other: &Self) -> Self {
188        macro_rules! merge {
189            ($a:expr, $b:expr) => {
190                match ($a, $b) {
191                    (Some(a), Some(b)) => Some(a.or(&b)),
192                    (Some(a), None) => Some(a),
193                    (None, Some(b)) => Some(b),
194                    (None, None) => None,
195                }
196            };
197        }
198
199        Self {
200            noun: merge!(self.noun, other.noun),
201            pronoun: merge!(self.pronoun, other.pronoun),
202            verb: merge!(self.verb, other.verb),
203            adjective: merge!(self.adjective, other.adjective),
204            adverb: merge!(self.adverb, other.adverb),
205            conjunction: merge!(self.conjunction, other.conjunction),
206            determiner: merge!(self.determiner, other.determiner),
207            affix: merge!(self.affix, other.affix),
208            preposition: self.preposition || other.preposition,
209            dialects: self.dialects | other.dialects,
210            orth_info: self.orth_info | other.orth_info,
211            swear: self.swear.or(other.swear),
212            common: self.common || other.common,
213            derived_from: self.derived_from.or(other.derived_from),
214            pos_tag: self.pos_tag.or(other.pos_tag),
215            np_member: self.np_member.or(other.np_member),
216        }
217    }
218
219    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
220    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
221    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
222    ///
223    /// Additionally, if the metadata does not currently declare the potential of the word to be
224    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
225    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
226    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
227        use UPOS::*;
228        match pos {
229            NOUN => {
230                if let Some(noun) = self.noun {
231                    self.noun = Some(NounData {
232                        is_proper: Some(false),
233                        ..noun
234                    })
235                } else {
236                    self.noun = Some(NounData {
237                        is_proper: Some(false),
238                        is_singular: None,
239                        is_plural: None,
240                        is_countable: None,
241                        is_mass: None,
242                        is_possessive: None,
243                    })
244                }
245
246                self.pronoun = None;
247                self.verb = None;
248                self.adjective = None;
249                self.adverb = None;
250                self.conjunction = None;
251                self.determiner = None;
252                self.affix = None;
253                self.preposition = false;
254            }
255            PROPN => {
256                if let Some(noun) = self.noun {
257                    self.noun = Some(NounData {
258                        is_proper: Some(true),
259                        ..noun
260                    })
261                } else {
262                    self.noun = Some(NounData {
263                        is_proper: Some(true),
264                        is_singular: None,
265                        is_plural: None,
266                        is_countable: None,
267                        is_mass: None,
268                        is_possessive: None,
269                    })
270                }
271
272                self.pronoun = None;
273                self.verb = None;
274                self.adjective = None;
275                self.adverb = None;
276                self.conjunction = None;
277                self.determiner = None;
278                self.affix = None;
279                self.preposition = false;
280            }
281            PRON => {
282                if self.pronoun.is_none() {
283                    self.pronoun = Some(PronounData::default())
284                }
285
286                self.noun = None;
287                self.verb = None;
288                self.adjective = None;
289                self.adverb = None;
290                self.conjunction = None;
291                self.determiner = None;
292                self.affix = None;
293                self.preposition = false;
294            }
295            VERB => {
296                if let Some(verb) = self.verb {
297                    self.verb = Some(VerbData {
298                        is_auxiliary: Some(false),
299                        ..verb
300                    })
301                } else {
302                    self.verb = Some(VerbData {
303                        is_auxiliary: Some(false),
304                        ..Default::default()
305                    })
306                }
307
308                self.noun = None;
309                self.pronoun = None;
310                self.adjective = None;
311                self.adverb = None;
312                self.conjunction = None;
313                self.determiner = None;
314                self.affix = None;
315                self.preposition = false;
316            }
317            AUX => {
318                if let Some(verb) = self.verb {
319                    self.verb = Some(VerbData {
320                        is_auxiliary: Some(true),
321                        ..verb
322                    })
323                } else {
324                    self.verb = Some(VerbData {
325                        is_auxiliary: Some(true),
326                        ..Default::default()
327                    })
328                }
329
330                self.noun = None;
331                self.pronoun = None;
332                self.adjective = None;
333                self.adverb = None;
334                self.conjunction = None;
335                self.determiner = None;
336                self.affix = None;
337                self.preposition = false;
338            }
339            ADJ => {
340                if self.adjective.is_none() {
341                    self.adjective = Some(AdjectiveData::default())
342                }
343
344                self.noun = None;
345                self.pronoun = None;
346                self.verb = None;
347                self.adverb = None;
348                self.conjunction = None;
349                self.determiner = None;
350                self.affix = None;
351                self.preposition = false;
352            }
353            ADV => {
354                if self.adverb.is_none() {
355                    self.adverb = Some(AdverbData::default())
356                }
357
358                self.noun = None;
359                self.pronoun = None;
360                self.verb = None;
361                self.adjective = None;
362                self.conjunction = None;
363                self.determiner = None;
364                self.affix = None;
365                self.preposition = false;
366            }
367            ADP => {
368                self.noun = None;
369                self.pronoun = None;
370                self.verb = None;
371                self.adjective = None;
372                self.adverb = None;
373                self.conjunction = None;
374                self.determiner = None;
375                self.affix = None;
376                self.preposition = true;
377            }
378            DET => {
379                self.noun = None;
380                self.pronoun = None;
381                self.verb = None;
382                self.adjective = None;
383                self.adverb = None;
384                self.conjunction = None;
385                self.affix = None;
386                self.preposition = false;
387                self.determiner = Some(DeterminerData::default());
388            }
389            CCONJ | SCONJ => {
390                if self.conjunction.is_none() {
391                    self.conjunction = Some(ConjunctionData::default())
392                }
393
394                self.noun = None;
395                self.pronoun = None;
396                self.verb = None;
397                self.adjective = None;
398                self.adverb = None;
399                self.determiner = None;
400                self.affix = None;
401                self.preposition = false;
402            }
403            _ => {}
404        }
405    }
406
407    generate_metadata_queries!(
408        // Singular and countable default to true, so their metadata queries are not generated.
409        noun has proper, plural, mass, possessive.
410        pronoun has personal, singular, plural, possessive, reflexive, subject, object.
411        determiner has demonstrative, possessive, quantifier.
412        verb has linking, auxiliary.
413        conjunction has.
414        adjective has.
415        adverb has manner, frequency, degree
416    );
417
418    // Manual metadata queries
419
420    // Pronoun metadata queries
421
422    pub fn get_person(&self) -> Option<Person> {
423        self.pronoun.as_ref().and_then(|p| p.person)
424    }
425
426    pub fn is_first_person_plural_pronoun(&self) -> bool {
427        matches!(
428            self.pronoun,
429            Some(PronounData {
430                person: Some(Person::First),
431                is_plural: Some(true),
432                ..
433            })
434        )
435    }
436
437    pub fn is_first_person_singular_pronoun(&self) -> bool {
438        matches!(
439            self.pronoun,
440            Some(PronounData {
441                person: Some(Person::First),
442                is_singular: Some(true),
443                ..
444            })
445        )
446    }
447
448    pub fn is_third_person_plural_pronoun(&self) -> bool {
449        matches!(
450            self.pronoun,
451            Some(PronounData {
452                person: Some(Person::Third),
453                is_plural: Some(true),
454                ..
455            })
456        )
457    }
458
459    pub fn is_third_person_singular_pronoun(&self) -> bool {
460        matches!(
461            self.pronoun,
462            Some(PronounData {
463                person: Some(Person::Third),
464                is_singular: Some(true),
465                ..
466            })
467        )
468    }
469
470    pub fn is_third_person_pronoun(&self) -> bool {
471        matches!(
472            self.pronoun,
473            Some(PronounData {
474                person: Some(Person::Third),
475                ..
476            })
477        )
478    }
479
480    pub fn is_second_person_pronoun(&self) -> bool {
481        matches!(
482            self.pronoun,
483            Some(PronounData {
484                person: Some(Person::Second),
485                ..
486            })
487        )
488    }
489
490    // Lemma is default if no verb form is specified in the dictionary
491    pub fn is_verb_lemma(&self) -> bool {
492        if let Some(verb) = self.verb {
493            if let Some(forms) = verb.verb_forms {
494                return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
495            } else {
496                return true;
497            }
498        }
499        false
500    }
501
502    pub fn is_verb_past_form(&self) -> bool {
503        self.verb.is_some_and(|v| {
504            v.verb_forms
505                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
506        })
507    }
508
509    pub fn is_verb_simple_past_form(&self) -> bool {
510        self.verb.is_some_and(|v| {
511            v.verb_forms
512                .is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
513        })
514    }
515
516    pub fn is_verb_past_participle_form(&self) -> bool {
517        self.verb.is_some_and(|v| {
518            v.verb_forms
519                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
520        })
521    }
522
523    pub fn is_verb_progressive_form(&self) -> bool {
524        self.verb.is_some_and(|v| {
525            v.verb_forms
526                .is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
527        })
528    }
529
530    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
531        self.verb.is_some_and(|v| {
532            v.verb_forms
533                .is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
534        })
535    }
536
537    // Noun metadata queries
538
539    // Singular is default if number is not marked in the dictionary.
540    pub fn is_singular_noun(&self) -> bool {
541        if let Some(noun) = self.noun {
542            matches!(
543                (noun.is_singular, noun.is_plural),
544                (Some(true), _) | (None | Some(false), None | Some(false))
545            )
546        } else {
547            false
548        }
549    }
550    pub fn is_non_singular_noun(&self) -> bool {
551        if let Some(noun) = self.noun {
552            !matches!(
553                (noun.is_singular, noun.is_plural),
554                (Some(true), _) | (None | Some(false), None | Some(false))
555            )
556        } else {
557            false
558        }
559    }
560
561    // Countable is default if countability is not marked in the dictionary.
562    pub fn is_countable_noun(&self) -> bool {
563        if let Some(noun) = self.noun {
564            matches!(
565                (noun.is_countable, noun.is_mass),
566                (Some(true), _) | (None | Some(false), None | Some(false))
567            )
568        } else {
569            false
570        }
571    }
572    pub fn is_non_countable_noun(&self) -> bool {
573        if let Some(noun) = self.noun {
574            !matches!(
575                (noun.is_countable, noun.is_mass),
576                (Some(true), _) | (None | Some(false), None | Some(false))
577            )
578        } else {
579            false
580        }
581    }
582
583    // Most mass nouns also have countable senses. Match those that are only mass nouns.
584    pub fn is_mass_noun_only(&self) -> bool {
585        if let Some(noun) = self.noun {
586            matches!(
587                (noun.is_countable, noun.is_mass),
588                (None | Some(false), Some(true))
589            )
590        } else {
591            false
592        }
593    }
594
595    // Nominal metadata queries (noun + pronoun)
596
597    /// Checks if the word is definitely nominal.
598    pub fn is_nominal(&self) -> bool {
599        self.is_noun() || self.is_pronoun()
600    }
601
602    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
603    pub fn is_singular_nominal(&self) -> bool {
604        self.is_singular_noun() || self.is_singular_pronoun()
605    }
606
607    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
608    pub fn is_plural_nominal(&self) -> bool {
609        self.is_plural_noun() || self.is_plural_pronoun()
610    }
611
612    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
613    /// NOTE: `possessive pronoun`s are not qualifiers, but words like `mine`, `yours`, etc.
614    /// The terminology of `possessive noun`, `possessive pronoun` and `possessive determiner` only
615    /// tends to reinforce this confusion.
616    pub fn is_possessive_nominal(&self) -> bool {
617        self.is_possessive_noun() || self.is_possessive_determiner()
618    }
619
620    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
621    pub fn is_non_singular_nominal(&self) -> bool {
622        self.is_non_singular_noun() || self.is_non_singular_pronoun()
623    }
624
625    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
626    pub fn is_non_plural_nominal(&self) -> bool {
627        self.is_non_plural_noun() || self.is_non_plural_pronoun()
628    }
629
630    // Adjective metadata queries
631
632    pub fn get_degree(&self) -> Option<Degree> {
633        self.adjective.as_ref().and_then(|a| a.degree)
634    }
635
636    pub fn is_comparative_adjective(&self) -> bool {
637        matches!(
638            self.adjective,
639            Some(AdjectiveData {
640                degree: Some(Degree::Comparative)
641            })
642        )
643    }
644
645    pub fn is_superlative_adjective(&self) -> bool {
646        matches!(
647            self.adjective,
648            Some(AdjectiveData {
649                degree: Some(Degree::Superlative)
650            })
651        )
652    }
653
654    // Degree::Positive is the default if degree is not marked in the dictionary.
655    pub fn is_positive_adjective(&self) -> bool {
656        match self.adjective {
657            Some(AdjectiveData {
658                degree: Some(Degree::Positive),
659            }) => true,
660            Some(AdjectiveData { degree: None }) => true,
661            Some(AdjectiveData {
662                degree: Some(degree),
663            }) => !matches!(degree, Degree::Comparative | Degree::Superlative),
664            _ => false,
665        }
666    }
667
668    // Determiner metadata queries
669
670    // Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
671    pub fn is_quantifier(&self) -> bool {
672        self.is_quantifier_determiner()
673    }
674
675    // Non-POS queries
676
677    /// Checks whether a word is _definitely_ a swear.
678    pub fn is_swear(&self) -> bool {
679        matches!(self.swear, Some(true))
680    }
681
682    // Orthographic queries
683
684    /// Does the metadata for this word cover an all-lowercase variant? (e.g., "hello")
685    ///
686    /// This returns true if all letters in the word are lowercase. Words containing
687    /// non-letter characters (like numbers or symbols) are only considered if all
688    /// letter characters are lowercase.
689    pub fn is_lowercase(&self) -> bool {
690        self.orth_info.contains(OrthFlags::LOWERCASE)
691    }
692    /// Does the metadata for this word cover a titlecase variant? (e.g., "Hello")
693    ///
694    /// This returns true if the word is in titlecase form, which means:
695    /// - The first letter is uppercase
696    /// - All other letters are lowercase
697    /// - The word is at least 2 characters long
698    ///
699    /// Examples: "Hello", "World"
700    ///
701    /// Note: Words with internal capital letters (like "McDonald") or apostrophes (like "O'Reilly")
702    /// are not considered titlecase - they are classified as UPPER_CAMEL instead.
703    pub fn is_titlecase(&self) -> bool {
704        self.orth_info.contains(OrthFlags::TITLECASE)
705    }
706    /// Does the metadata for this word cover an all-uppercase variant? (e.g., "HELLO")
707    ///
708    /// This returns true if all letters in the word are uppercase. Words containing
709    /// non-letter characters (like numbers or symbols) are only considered if all
710    /// letter characters are uppercase.
711    ///
712    /// Examples: "HELLO", "NASA", "I"
713    pub fn is_allcaps(&self) -> bool {
714        self.orth_info.contains(OrthFlags::ALLCAPS)
715    }
716    /// Does the metadata for this word cover a lower camel case variant? (e.g., "helloWorld")
717    ///
718    /// This returns true if the word is in lower camel case, which means:
719    /// - The first letter is lowercase
720    /// - There is at least one uppercase letter after the first character
721    /// - The word must be at least 2 characters long
722    ///
723    /// Examples: "helloWorld", "getHTTPResponse", "eBay"
724    ///
725    /// Note: Single words that are all lowercase will return false.
726    /// Words starting with an uppercase letter will return false (those would be UpperCamel).
727    pub fn is_lower_camel(&self) -> bool {
728        self.orth_info.contains(OrthFlags::LOWER_CAMEL)
729    }
730    /// Does the metadata for this word cover an upper camel case / pascal case variant? (e.g., "HelloWorld")
731    ///
732    /// This returns true if the word is in upper camel case (also known as Pascal case), which means:
733    /// - The first letter is uppercase
734    /// - There is at least one other uppercase letter after the first character
735    /// - There is at least one lowercase letter after the first uppercase letter
736    /// - The word must be at least 3 characters long
737    ///
738    /// Examples:
739    /// - "HelloWorld" (standard Pascal case)
740    /// - "McDonald" (name with internal caps)
741    /// - "O'Reilly" (name with apostrophe and internal caps)
742    /// - "HttpRequest" (initialism followed by word)
743    ///
744    /// Note: Single words that are titlecase (like "Hello") will return false.
745    /// Words that are all uppercase (like "NASA") will also return false.
746    pub fn is_upper_camel(&self) -> bool {
747        self.orth_info.contains(OrthFlags::UPPER_CAMEL)
748    }
749
750    /// Does the metadata for this word cover an apostrophized variant? (e.g., "doesn't")
751    pub fn is_apostrophized(&self) -> bool {
752        self.orth_info.contains(OrthFlags::APOSTROPHE)
753    }
754
755    pub fn is_roman_numerals(&self) -> bool {
756        self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
757    }
758
759    /// Same thing as [`Self::or`], except in-place rather than a clone.
760    pub fn append(&mut self, other: &Self) -> &mut Self {
761        *self = self.or(other);
762        self
763    }
764}
765
766// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
767// Each form can be used in various TAM combinations:
768// - Lemma form (infinitive, citation form, dictionary form)
769//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
770// - Past form (past participle and simple past)
771//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
772// - Progressive form (present participle and gerund)
773//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
774// - Third person singular present (-s/-es)
775//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
776//
777// Important notes:
778// 1. English expresses time through auxiliary verbs, not verb form alone
779// 2. Irregular verbs can have different forms for past participle and simple past
780// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
781#[repr(u32)]
782pub enum VerbForm {
783    /// The uninflected verb form: "walk", "eat"
784    LemmaForm = 1 << 0,
785    /// The past form for regular verbs: "walked"
786    PastForm = 1 << 1,
787    /// The simple past/preterite form for irregular verbs: "ate"
788    SimplePastForm = 1 << 2,
789    /// The past participle form for irregular verbs: "eaten"
790    PastParticipleForm = 1 << 3,
791    /// The progressive/continuous/gerund/present participle form: "walking", "eating"
792    ProgressiveForm = 1 << 4,
793    /// The third person singular present form: "walks", "eats"
794    ThirdPersonSingularPresentForm = 1 << 5,
795}
796
797/// The underlying type used for verb form flags.
798pub type VerbFormFlagsUnderlyingType = u32;
799
800bitflags::bitflags! {
801    /// A collection of bit flags used to represent verb forms.
802    ///
803    /// This allows a word to be tagged with multiple verb forms when applicable.
804    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
805    #[serde(transparent)]
806    pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
807        const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
808        const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
809        const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
810        const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
811        const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
812        const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
813    }
814}
815
816#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
817pub struct VerbData {
818    pub is_linking: Option<bool>,
819    pub is_auxiliary: Option<bool>,
820    #[serde(rename = "verb_form", default)]
821    pub verb_forms: Option<VerbFormFlags>,
822}
823
824impl VerbData {
825    /// Produce a copy of `self` with the known properties of `other` set.
826    pub fn or(&self, other: &Self) -> Self {
827        let verb_forms = match (self.verb_forms, other.verb_forms) {
828            (Some(self_verb_forms), Some(other_verb_forms)) => {
829                Some(self_verb_forms | other_verb_forms)
830            }
831            (Some(self_verb_forms), None) => Some(self_verb_forms),
832            (None, Some(other_verb_forms)) => Some(other_verb_forms),
833            (None, None) => None,
834        };
835
836        Self {
837            is_linking: self.is_linking.or(other.is_linking),
838            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
839            verb_forms,
840        }
841    }
842}
843
844// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
845// TODO other noun properties may be worth adding: abstract
846#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
847pub struct NounData {
848    pub is_proper: Option<bool>,
849    pub is_singular: Option<bool>,
850    pub is_plural: Option<bool>,
851    pub is_countable: Option<bool>,
852    pub is_mass: Option<bool>,
853    pub is_possessive: Option<bool>,
854}
855
856impl NounData {
857    /// Produce a copy of `self` with the known properties of `other` set.
858    pub fn or(&self, other: &Self) -> Self {
859        Self {
860            is_proper: self.is_proper.or(other.is_proper),
861            is_singular: self.is_singular.or(other.is_singular),
862            is_plural: self.is_plural.or(other.is_plural),
863            is_countable: self.is_countable.or(other.is_countable),
864            is_mass: self.is_mass.or(other.is_mass),
865            is_possessive: self.is_possessive.or(other.is_possessive),
866        }
867    }
868}
869
870// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
871#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
872pub enum Person {
873    First,
874    Second,
875    Third,
876}
877
878// TODO for now focused on personal pronouns?
879#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
880pub struct PronounData {
881    pub is_personal: Option<bool>,
882    pub is_singular: Option<bool>,
883    pub is_plural: Option<bool>,
884    pub is_possessive: Option<bool>,
885    pub is_reflexive: Option<bool>,
886    pub person: Option<Person>,
887    pub is_subject: Option<bool>,
888    pub is_object: Option<bool>,
889}
890
891impl PronounData {
892    /// Produce a copy of `self` with the known properties of `other` set.
893    pub fn or(&self, other: &Self) -> Self {
894        Self {
895            is_personal: self.is_personal.or(other.is_personal),
896            is_singular: self.is_singular.or(other.is_singular),
897            is_plural: self.is_plural.or(other.is_plural),
898            is_possessive: self.is_possessive.or(other.is_possessive),
899            is_reflexive: self.is_reflexive.or(other.is_reflexive),
900            person: self.person.or(other.person),
901            is_subject: self.is_subject.or(other.is_subject),
902            is_object: self.is_object.or(other.is_object),
903        }
904    }
905}
906
907/// Additional metadata for determiners
908#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
909pub struct DeterminerData {
910    pub is_demonstrative: Option<bool>,
911    pub is_possessive: Option<bool>,
912    pub is_quantifier: Option<bool>,
913}
914
915impl DeterminerData {
916    /// Produce a copy of `self` with the known properties of `other` set.
917    pub fn or(&self, other: &Self) -> Self {
918        Self {
919            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
920            is_possessive: self.is_possessive.or(other.is_possessive),
921            is_quantifier: self.is_quantifier.or(other.is_quantifier),
922        }
923    }
924}
925
926/// Degree is a property of adjectives: positive is not inflected
927/// Comparative is inflected with -er or comes after the word "more"
928/// Superlative is inflected with -est or comes after the word "most"
929#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
930pub enum Degree {
931    Positive,
932    Comparative,
933    Superlative,
934}
935
936/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
937/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
938/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
939#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
940pub struct AdjectiveData {
941    pub degree: Option<Degree>,
942}
943
944impl AdjectiveData {
945    /// Produce a copy of `self` with the known properties of `other` set.
946    pub fn or(&self, other: &Self) -> Self {
947        Self {
948            degree: self.degree.or(other.degree),
949        }
950    }
951}
952
953/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
954/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
955/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
956#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
957pub struct AdverbData {
958    pub is_manner: Option<bool>,
959    pub is_frequency: Option<bool>,
960    pub is_degree: Option<bool>,
961}
962
963impl AdverbData {
964    /// Produce a copy of `self` with the known properties of `other` set.
965    pub fn or(&self, _other: &Self) -> Self {
966        Self {
967            is_manner: self.is_manner.or(_other.is_manner),
968            is_frequency: self.is_frequency.or(_other.is_frequency),
969            is_degree: self.is_degree.or(_other.is_degree),
970        }
971    }
972}
973
974#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
975pub struct ConjunctionData {}
976
977impl ConjunctionData {
978    /// Produce a copy of `self` with the known properties of `other` set.
979    pub fn or(&self, _other: &Self) -> Self {
980        Self {}
981    }
982}
983
984#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
985pub struct AffixData {
986    pub is_prefix: Option<bool>,
987    pub is_suffix: Option<bool>,
988}
989
990impl AffixData {
991    /// Produce a copy of `self` with the known properties of `other` set.
992    pub fn or(&self, _other: &Self) -> Self {
993        Self {
994            is_prefix: self.is_prefix.or(_other.is_prefix),
995            is_suffix: self.is_suffix.or(_other.is_suffix),
996        }
997    }
998}
999
1000/// A regional dialect.
1001///
1002/// Note: these have bit-shifted values so that they can ergonomically integrate with
1003/// `DialectFlags`. Each value here must have a unique bit index inside
1004/// `DialectsUnderlyingType`.
1005#[derive(
1006    Debug,
1007    Clone,
1008    Copy,
1009    Serialize,
1010    Deserialize,
1011    PartialEq,
1012    PartialOrd,
1013    Eq,
1014    Hash,
1015    EnumCount,
1016    EnumString,
1017    EnumIter,
1018    Display,
1019    VariantArray,
1020)]
1021pub enum Dialect {
1022    American = 1 << 0,
1023    Canadian = 1 << 1,
1024    Australian = 1 << 2,
1025    British = 1 << 3,
1026    Indian = 1 << 4,
1027}
1028impl Dialect {
1029    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
1030    /// Returns `None` if it fails to find a single dialect that is used the most.
1031    #[must_use]
1032    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
1033        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
1034    }
1035
1036    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
1037    /// recognized.
1038    ///
1039    /// # Examples
1040    ///
1041    /// ```
1042    /// use harper_core::Dialect;
1043    ///
1044    /// let abbrs = ["US", "CA", "AU", "GB", "IN"];
1045    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
1046    ///
1047    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
1048    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
1049    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
1050    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
1051    /// assert_eq!(Some(Dialect::Indian), dialects.next().unwrap()); // IN
1052    /// ```
1053    #[must_use]
1054    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
1055        match abbr {
1056            "US" => Some(Self::American),
1057            "CA" => Some(Self::Canadian),
1058            "AU" => Some(Self::Australian),
1059            "GB" => Some(Self::British),
1060            "IN" => Some(Self::Indian),
1061            _ => None,
1062        }
1063    }
1064}
1065impl TryFrom<DialectFlags> for Dialect {
1066    type Error = ();
1067
1068    /// Attempts to convert `DialectFlags` to a single `Dialect`.
1069    ///
1070    /// # Errors
1071    ///
1072    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
1073    /// enabled.
1074    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
1075        // Ensure only one dialect is enabled before converting.
1076        if dialect_flags.bits().count_ones() == 1 {
1077            match dialect_flags {
1078                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
1079                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
1080                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
1081                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
1082                df if df.is_dialect_enabled_strict(Dialect::Indian) => Ok(Dialect::Indian),
1083                _ => Err(()),
1084            }
1085        } else {
1086            // More than one dialect enabled; can't soundly convert.
1087            Err(())
1088        }
1089    }
1090}
1091
1092// The underlying type used for DialectFlags.
1093// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
1094// the future, we will need to switch this to a larger type.
1095type DialectFlagsUnderlyingType = u8;
1096
1097bitflags::bitflags! {
1098    /// A collection of bit flags used to represent enabled dialects.
1099    ///
1100    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
1101    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
1102    #[serde(transparent)]
1103    pub struct DialectFlags: DialectFlagsUnderlyingType {
1104        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
1105        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
1106        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
1107        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
1108        const INDIAN = Dialect::Indian as DialectFlagsUnderlyingType;
1109    }
1110}
1111impl DialectFlags {
1112    /// Checks if the provided dialect is enabled.
1113    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
1114    #[must_use]
1115    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
1116        self.is_empty() || self.intersects(Self::from_dialect(dialect))
1117    }
1118
1119    /// Checks if the provided dialect is ***explicitly*** enabled.
1120    ///
1121    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
1122    /// enabled.
1123    #[must_use]
1124    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
1125        self.intersects(Self::from_dialect(dialect))
1126    }
1127
1128    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
1129    /// enabled.
1130    ///
1131    /// # Panics
1132    ///
1133    /// This will panic if `dialect` represents a dialect that is not defined in
1134    /// `DialectFlags`.
1135    #[must_use]
1136    pub fn from_dialect(dialect: Dialect) -> Self {
1137        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
1138            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
1139        };
1140        out
1141    }
1142
1143    /// Gets the most commonly used dialect(s) in the document.
1144    ///
1145    /// If multiple dialects are used equally often, they will all be enabled in the returned
1146    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
1147    /// will be the only one enabled.
1148    #[must_use]
1149    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
1150        // Initialize counters.
1151        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
1152            .iter()
1153            .map(|d| (*d, 0))
1154            .collect_array()
1155            .unwrap();
1156
1157        // Count word dialects.
1158        document.iter_words().for_each(|w| {
1159            if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
1160                // If the token is a word, iterate though the dialects in `dialect_counters` and
1161                // increment those counters where the word has the respective dialect enabled.
1162                dialect_counters.iter_mut().for_each(|(dialect, count)| {
1163                    if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
1164                        *count += 1;
1165                    }
1166                });
1167            }
1168        });
1169
1170        // Find max counter.
1171        let max_counter = dialect_counters
1172            .iter()
1173            .map(|(_, count)| count)
1174            .max()
1175            .unwrap();
1176        // Get and convert the collection of most used dialects into a `DialectFlags`.
1177        dialect_counters
1178            .into_iter()
1179            .filter(|(_, count)| count == max_counter)
1180            .fold(DialectFlags::empty(), |acc, dialect| {
1181                // Fold most used dialects into `DialectFlags` via bitwise or.
1182                acc | Self::from_dialect(dialect.0)
1183            })
1184    }
1185}
1186impl Default for DialectFlags {
1187    /// A default value with no dialects explicitly enabled.
1188    /// Implicitly, this state corresponds to all dialects being enabled.
1189    fn default() -> Self {
1190        Self::empty()
1191    }
1192}
1193
1194#[cfg(test)]
1195pub mod tests {
1196    use crate::DictWordMetadata;
1197    use crate::spell::{Dictionary, FstDictionary};
1198
1199    // Helper function to get metadata from the curated dictionary
1200    pub fn md(word: &str) -> DictWordMetadata {
1201        FstDictionary::curated()
1202            .get_word_metadata_str(word)
1203            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
1204            .into_owned()
1205    }
1206
1207    mod dialect {
1208        use super::super::{Dialect, DialectFlags};
1209        use crate::Document;
1210
1211        #[test]
1212        fn guess_british_dialect() {
1213            let document = Document::new_plain_english_curated("Aluminium was used.");
1214            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1215            assert!(
1216                df.is_dialect_enabled_strict(Dialect::British)
1217                    && !df.is_dialect_enabled_strict(Dialect::American)
1218            );
1219        }
1220
1221        #[test]
1222        fn guess_american_dialect() {
1223            let document = Document::new_plain_english_curated("Aluminum was used.");
1224            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1225            assert!(
1226                df.is_dialect_enabled_strict(Dialect::American)
1227                    && !df.is_dialect_enabled_strict(Dialect::British)
1228            );
1229        }
1230    }
1231
1232    mod noun {
1233        use crate::dict_word_metadata::tests::md;
1234
1235        #[test]
1236        fn puppy_is_noun() {
1237            assert!(md("puppy").is_noun());
1238        }
1239
1240        #[test]
1241        fn prepare_is_not_noun() {
1242            assert!(!md("prepare").is_noun());
1243        }
1244
1245        #[test]
1246        fn paris_is_proper_noun() {
1247            assert!(md("Paris").is_proper_noun());
1248        }
1249
1250        #[test]
1251        fn permit_is_non_proper_noun() {
1252            assert!(md("lapdog").is_non_proper_noun());
1253        }
1254
1255        #[test]
1256        fn hound_is_singular_noun() {
1257            assert!(md("hound").is_singular_noun());
1258        }
1259
1260        #[test]
1261        fn pooches_is_non_singular_noun() {
1262            assert!(md("pooches").is_non_singular_noun());
1263        }
1264
1265        // Make sure is_non_xxx_noun methods don't behave like is_not_xxx_noun.
1266        // In other words, make sure they don't return true for words that are not nouns.
1267        // They must only pass for words that are nouns but not singular etc.
1268        #[test]
1269        fn loyal_doesnt_pass_is_non_singular_noun() {
1270            assert!(!md("loyal").is_non_singular_noun());
1271        }
1272
1273        #[test]
1274        fn hounds_is_plural_noun() {
1275            assert!(md("hounds").is_plural_noun());
1276        }
1277
1278        #[test]
1279        fn pooch_is_non_plural_noun() {
1280            assert!(md("pooch").is_non_plural_noun());
1281        }
1282
1283        #[test]
1284        fn fish_is_singular_noun() {
1285            assert!(md("fish").is_singular_noun());
1286        }
1287
1288        #[test]
1289        fn fish_is_plural_noun() {
1290            assert!(md("fish").is_plural_noun());
1291        }
1292
1293        #[test]
1294        fn fishes_is_plural_noun() {
1295            assert!(md("fishes").is_plural_noun());
1296        }
1297
1298        #[test]
1299        fn sheep_is_singular_noun() {
1300            assert!(md("sheep").is_singular_noun());
1301        }
1302
1303        #[test]
1304        fn sheep_is_plural_noun() {
1305            assert!(md("sheep").is_plural_noun());
1306        }
1307
1308        #[test]
1309        #[should_panic]
1310        fn sheeps_is_not_word() {
1311            md("sheeps");
1312        }
1313
1314        #[test]
1315        fn bicep_is_singular_noun() {
1316            assert!(md("bicep").is_singular_noun());
1317        }
1318
1319        #[test]
1320        fn biceps_is_singular_noun() {
1321            assert!(md("biceps").is_singular_noun());
1322        }
1323
1324        #[test]
1325        fn biceps_is_plural_noun() {
1326            assert!(md("biceps").is_plural_noun());
1327        }
1328
1329        #[test]
1330        fn aircraft_is_singular_noun() {
1331            assert!(md("aircraft").is_singular_noun());
1332        }
1333
1334        #[test]
1335        fn aircraft_is_plural_noun() {
1336            assert!(md("aircraft").is_plural_noun());
1337        }
1338
1339        #[test]
1340        #[should_panic]
1341        fn aircrafts_is_not_word() {
1342            md("aircrafts");
1343        }
1344
1345        #[test]
1346        fn dog_apostrophe_s_is_possessive_noun() {
1347            assert!(md("dog's").is_possessive_noun());
1348        }
1349
1350        #[test]
1351        fn dogs_is_non_possessive_noun() {
1352            assert!(md("dogs").is_non_possessive_noun());
1353        }
1354
1355        // noun countability
1356
1357        #[test]
1358        fn dog_is_countable() {
1359            assert!(md("dog").is_countable_noun());
1360        }
1361        #[test]
1362        fn dog_is_non_mass_noun() {
1363            assert!(md("dog").is_non_mass_noun());
1364        }
1365
1366        #[test]
1367        fn furniture_is_mass_noun() {
1368            assert!(md("furniture").is_mass_noun());
1369        }
1370        #[test]
1371        fn furniture_is_non_countable_noun() {
1372            assert!(md("furniture").is_non_countable_noun());
1373        }
1374
1375        #[test]
1376        fn equipment_is_mass_noun() {
1377            assert!(md("equipment").is_mass_noun());
1378        }
1379        #[test]
1380        fn equipment_is_non_countable_noun() {
1381            assert!(md("equipment").is_non_countable_noun());
1382        }
1383
1384        #[test]
1385        fn beer_is_countable_noun() {
1386            assert!(md("beer").is_countable_noun());
1387        }
1388        #[test]
1389        fn beer_is_mass_noun() {
1390            assert!(md("beer").is_mass_noun());
1391        }
1392    }
1393
1394    mod pronoun {
1395        use crate::dict_word_metadata::tests::md;
1396
1397        mod i_me_myself {
1398            use crate::dict_word_metadata::tests::md;
1399
1400            #[test]
1401            fn i_is_pronoun() {
1402                assert!(md("I").is_pronoun());
1403            }
1404            #[test]
1405            fn i_is_personal_pronoun() {
1406                assert!(md("I").is_personal_pronoun());
1407            }
1408            #[test]
1409            fn i_is_singular_pronoun() {
1410                assert!(md("I").is_singular_pronoun());
1411            }
1412            #[test]
1413            fn i_is_subject_pronoun() {
1414                assert!(md("I").is_subject_pronoun());
1415            }
1416
1417            #[test]
1418            fn me_is_pronoun() {
1419                assert!(md("me").is_pronoun());
1420            }
1421            #[test]
1422            fn me_is_personal_pronoun() {
1423                assert!(md("me").is_personal_pronoun());
1424            }
1425            #[test]
1426            fn me_is_singular_pronoun() {
1427                assert!(md("me").is_singular_pronoun());
1428            }
1429            #[test]
1430            fn me_is_object_pronoun() {
1431                assert!(md("me").is_object_pronoun());
1432            }
1433
1434            #[test]
1435            fn myself_is_pronoun() {
1436                assert!(md("myself").is_pronoun());
1437            }
1438            #[test]
1439            fn myself_is_personal_pronoun() {
1440                assert!(md("myself").is_personal_pronoun());
1441            }
1442            #[test]
1443            fn myself_is_singular_pronoun() {
1444                assert!(md("myself").is_singular_pronoun());
1445            }
1446            #[test]
1447            fn myself_is_reflexive_pronoun() {
1448                assert!(md("myself").is_reflexive_pronoun());
1449            }
1450        }
1451
1452        mod we_us_ourselves {
1453            use crate::dict_word_metadata::tests::md;
1454
1455            #[test]
1456            fn we_is_pronoun() {
1457                assert!(md("we").is_pronoun());
1458            }
1459            #[test]
1460            fn we_is_personal_pronoun() {
1461                assert!(md("we").is_personal_pronoun());
1462            }
1463            #[test]
1464            fn we_is_plural_pronoun() {
1465                assert!(md("we").is_plural_pronoun());
1466            }
1467            #[test]
1468            fn we_is_subject_pronoun() {
1469                assert!(md("we").is_subject_pronoun());
1470            }
1471
1472            #[test]
1473            fn us_is_pronoun() {
1474                assert!(md("us").is_pronoun());
1475            }
1476            #[test]
1477            fn us_is_personal_pronoun() {
1478                assert!(md("us").is_personal_pronoun());
1479            }
1480            #[test]
1481            fn us_is_plural_pronoun() {
1482                assert!(md("us").is_plural_pronoun());
1483            }
1484            #[test]
1485            fn us_is_object_pronoun() {
1486                assert!(md("us").is_object_pronoun());
1487            }
1488
1489            #[test]
1490            fn ourselves_is_pronoun() {
1491                assert!(md("ourselves").is_pronoun());
1492            }
1493            #[test]
1494            fn ourselves_is_personal_pronoun() {
1495                assert!(md("ourselves").is_personal_pronoun());
1496            }
1497            #[test]
1498            fn ourselves_is_plural_pronoun() {
1499                assert!(md("ourselves").is_plural_pronoun());
1500            }
1501            #[test]
1502            fn ourselves_is_reflexive_pronoun() {
1503                assert!(md("ourselves").is_reflexive_pronoun());
1504            }
1505        }
1506
1507        mod you_yourself {
1508            use crate::dict_word_metadata::tests::md;
1509
1510            #[test]
1511            fn you_is_pronoun() {
1512                assert!(md("you").is_pronoun());
1513            }
1514            #[test]
1515            fn you_is_personal_pronoun() {
1516                assert!(md("you").is_personal_pronoun());
1517            }
1518            #[test]
1519            fn you_is_singular_pronoun() {
1520                assert!(md("you").is_singular_pronoun());
1521            }
1522            #[test]
1523            fn you_is_plural_pronoun() {
1524                assert!(md("you").is_plural_pronoun());
1525            }
1526            #[test]
1527            fn you_is_subject_pronoun() {
1528                assert!(md("you").is_subject_pronoun());
1529            }
1530            #[test]
1531            fn you_is_object_pronoun() {
1532                assert!(md("you").is_object_pronoun());
1533            }
1534            #[test]
1535            fn yourself_is_pronoun() {
1536                assert!(md("yourself").is_pronoun());
1537            }
1538            #[test]
1539            fn yourself_is_personal_pronoun() {
1540                assert!(md("yourself").is_personal_pronoun());
1541            }
1542            #[test]
1543            fn yourself_is_singular_pronoun() {
1544                assert!(md("yourself").is_singular_pronoun());
1545            }
1546            #[test]
1547            fn yourself_is_reflexive_pronoun() {
1548                assert!(md("yourself").is_reflexive_pronoun());
1549            }
1550        }
1551
1552        mod he_him_himself {
1553            use crate::dict_word_metadata::tests::md;
1554
1555            #[test]
1556            fn he_is_pronoun() {
1557                assert!(md("he").is_pronoun());
1558            }
1559            #[test]
1560            fn he_is_personal_pronoun() {
1561                assert!(md("he").is_personal_pronoun());
1562            }
1563            #[test]
1564            fn he_is_singular_pronoun() {
1565                assert!(md("he").is_singular_pronoun());
1566            }
1567            #[test]
1568            fn he_is_subject_pronoun() {
1569                assert!(md("he").is_subject_pronoun());
1570            }
1571
1572            #[test]
1573            fn him_is_pronoun() {
1574                assert!(md("him").is_pronoun());
1575            }
1576            #[test]
1577            fn him_is_personal_pronoun() {
1578                assert!(md("him").is_personal_pronoun());
1579            }
1580            #[test]
1581            fn him_is_singular_pronoun() {
1582                assert!(md("him").is_singular_pronoun());
1583            }
1584            #[test]
1585            fn him_is_object_pronoun() {
1586                assert!(md("him").is_object_pronoun());
1587            }
1588
1589            #[test]
1590            fn himself_is_pronoun() {
1591                assert!(md("himself").is_pronoun());
1592            }
1593            #[test]
1594            fn himself_is_personal_pronoun() {
1595                assert!(md("himself").is_personal_pronoun());
1596            }
1597            #[test]
1598            fn himself_is_singular_pronoun() {
1599                assert!(md("himself").is_singular_pronoun());
1600            }
1601            #[test]
1602            fn himself_is_reflexive_pronoun() {
1603                assert!(md("himself").is_reflexive_pronoun());
1604            }
1605        }
1606
1607        mod she_her_herself {
1608            use crate::dict_word_metadata::tests::md;
1609
1610            #[test]
1611            fn she_is_pronoun() {
1612                assert!(md("she").is_pronoun());
1613            }
1614            #[test]
1615            fn she_is_personal_pronoun() {
1616                assert!(md("she").is_personal_pronoun());
1617            }
1618            #[test]
1619            fn she_is_singular_pronoun() {
1620                assert!(md("she").is_singular_pronoun());
1621            }
1622            #[test]
1623            fn she_is_subject_pronoun() {
1624                assert!(md("she").is_subject_pronoun());
1625            }
1626
1627            #[test]
1628            fn her_is_pronoun() {
1629                assert!(md("her").is_pronoun());
1630            }
1631            #[test]
1632            fn her_is_personal_pronoun() {
1633                assert!(md("her").is_personal_pronoun());
1634            }
1635            #[test]
1636            fn her_is_singular_pronoun() {
1637                assert!(md("her").is_singular_pronoun());
1638            }
1639            #[test]
1640            fn her_is_object_pronoun() {
1641                assert!(md("her").is_object_pronoun());
1642            }
1643
1644            #[test]
1645            fn herself_is_pronoun() {
1646                assert!(md("herself").is_pronoun());
1647            }
1648            #[test]
1649            fn herself_is_personal_pronoun() {
1650                assert!(md("herself").is_personal_pronoun());
1651            }
1652            #[test]
1653            fn herself_is_singular_pronoun() {
1654                assert!(md("herself").is_singular_pronoun());
1655            }
1656            #[test]
1657            fn herself_is_reflexive_pronoun() {
1658                assert!(md("herself").is_reflexive_pronoun());
1659            }
1660        }
1661
1662        mod it_itself {
1663            use crate::dict_word_metadata::tests::md;
1664
1665            #[test]
1666            fn it_is_pronoun() {
1667                assert!(md("it").is_pronoun());
1668            }
1669            #[test]
1670            fn it_is_personal_pronoun() {
1671                assert!(md("it").is_personal_pronoun());
1672            }
1673            #[test]
1674            fn it_is_singular_pronoun() {
1675                assert!(md("it").is_singular_pronoun());
1676            }
1677            #[test]
1678            fn it_is_subject_pronoun() {
1679                assert!(md("it").is_subject_pronoun());
1680            }
1681            #[test]
1682            fn it_is_object_pronoun() {
1683                assert!(md("it").is_object_pronoun());
1684            }
1685
1686            #[test]
1687            fn itself_is_pronoun() {
1688                assert!(md("itself").is_pronoun());
1689            }
1690            #[test]
1691            fn itself_is_personal_pronoun() {
1692                assert!(md("itself").is_personal_pronoun());
1693            }
1694            #[test]
1695            fn itself_is_singular_pronoun() {
1696                assert!(md("itself").is_singular_pronoun());
1697            }
1698            #[test]
1699            fn itself_is_reflexive_pronoun() {
1700                assert!(md("itself").is_reflexive_pronoun());
1701            }
1702        }
1703
1704        mod they_them_themselves {
1705            use crate::dict_word_metadata::tests::md;
1706
1707            #[test]
1708            fn they_is_pronoun() {
1709                assert!(md("they").is_pronoun());
1710            }
1711            #[test]
1712            fn they_is_personal_pronoun() {
1713                assert!(md("they").is_personal_pronoun());
1714            }
1715            #[test]
1716            fn they_is_plural_pronoun() {
1717                assert!(md("they").is_plural_pronoun());
1718            }
1719            #[test]
1720            fn they_is_subject_pronoun() {
1721                assert!(md("they").is_subject_pronoun());
1722            }
1723
1724            #[test]
1725            fn them_is_pronoun() {
1726                assert!(md("them").is_pronoun());
1727            }
1728            #[test]
1729            fn them_is_personal_pronoun() {
1730                assert!(md("them").is_personal_pronoun());
1731            }
1732            #[test]
1733            fn them_is_plural_pronoun() {
1734                assert!(md("them").is_plural_pronoun());
1735            }
1736            #[test]
1737            fn them_is_object_pronoun() {
1738                assert!(md("them").is_object_pronoun());
1739            }
1740
1741            #[test]
1742            fn themselves_is_pronoun() {
1743                assert!(md("themselves").is_pronoun());
1744            }
1745            #[test]
1746            fn themselves_is_personal_pronoun() {
1747                assert!(md("themselves").is_personal_pronoun());
1748            }
1749            #[test]
1750            fn themselves_is_plural_pronoun() {
1751                assert!(md("themselves").is_plural_pronoun());
1752            }
1753            #[test]
1754            fn themselves_is_reflexive_pronoun() {
1755                assert!(md("themselves").is_reflexive_pronoun());
1756            }
1757        }
1758
1759        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1760        #[test]
1761        fn mine_is_pronoun() {
1762            assert!(md("mine").is_pronoun());
1763        }
1764        #[test]
1765        fn ours_is_pronoun() {
1766            assert!(md("ours").is_pronoun());
1767        }
1768        #[test]
1769        fn yours_is_pronoun() {
1770            assert!(md("yours").is_pronoun());
1771        }
1772        #[test]
1773        fn his_is_pronoun() {
1774            assert!(md("his").is_pronoun());
1775        }
1776        #[test]
1777        fn hers_is_pronoun() {
1778            assert!(md("hers").is_pronoun());
1779        }
1780        #[test]
1781        fn its_is_pronoun() {
1782            assert!(md("its").is_pronoun());
1783        }
1784        #[test]
1785        fn theirs_is_pronoun() {
1786            assert!(md("theirs").is_pronoun());
1787        }
1788
1789        // archaic pronouns
1790        #[test]
1791        fn archaic_pronouns() {
1792            assert!(md("thou").is_pronoun());
1793            assert!(md("thee").is_pronoun());
1794            assert!(md("thyself").is_pronoun());
1795            assert!(md("thine").is_pronoun());
1796        }
1797
1798        // generic pronouns
1799        #[test]
1800        fn generic_pronouns() {
1801            assert!(md("one").is_pronoun());
1802            assert!(md("oneself").is_pronoun());
1803        }
1804
1805        // relative and interrogative pronouns
1806        #[test]
1807        fn relative_and_interrogative_pronouns() {
1808            assert!(md("who").is_pronoun());
1809            assert!(md("whom").is_pronoun());
1810            assert!(md("whose").is_pronoun());
1811            assert!(md("which").is_pronoun());
1812            assert!(md("what").is_pronoun());
1813        }
1814
1815        // nonstandard pronouns
1816        #[test]
1817        #[ignore = "not in dictionary"]
1818        fn nonstandard_pronouns() {
1819            assert!(md("themself").pronoun.is_some());
1820            assert!(md("y'all'").pronoun.is_some());
1821        }
1822    }
1823
1824    mod nominal {
1825        use crate::dict_word_metadata::tests::md;
1826
1827        #[test]
1828        fn my_is_possessive_nominal() {
1829            assert!(md("my").is_possessive_nominal());
1830        }
1831
1832        #[test]
1833        fn mine_is_not_possessive_nominal() {
1834            assert!(!md("mine").is_possessive_nominal());
1835        }
1836
1837        #[test]
1838        fn freds_is_possessive_nominal() {
1839            assert!(md("Fred's").is_possessive_nominal());
1840        }
1841
1842        #[test]
1843        fn fred_is_not_possessive_nominal() {
1844            assert!(!md("Fred").is_possessive_nominal());
1845        }
1846
1847        #[test]
1848        fn dogs_is_possessive_nominal() {
1849            assert!(md("dog's").is_possessive_nominal());
1850        }
1851
1852        #[test]
1853        fn microsofts_is_possessive_nominal() {
1854            assert!(md("Microsoft's").is_possessive_nominal());
1855        }
1856    }
1857
1858    mod adjective {
1859        use crate::{Degree, dict_word_metadata::tests::md};
1860
1861        // Getting degrees
1862
1863        #[test]
1864        #[ignore = "not marked yet because it might not be reliable"]
1865        fn big_is_positive() {
1866            assert_eq!(md("big").get_degree(), Some(Degree::Positive));
1867        }
1868
1869        #[test]
1870        fn bigger_is_comparative() {
1871            assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
1872        }
1873
1874        #[test]
1875        fn biggest_is_superlative() {
1876            assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
1877        }
1878
1879        #[test]
1880        #[should_panic(expected = "Word 'bigly' not found in dictionary")]
1881        fn bigly_is_not_an_adjective_form_we_track() {
1882            assert_eq!(md("bigly").get_degree(), None);
1883        }
1884
1885        // Calling is_ methods
1886
1887        // TODO: positive degree not implemented
1888
1889        #[test]
1890        fn bigger_is_comparative_adjective() {
1891            assert!(md("bigger").is_comparative_adjective());
1892        }
1893
1894        #[test]
1895        fn biggest_is_superlative_adjective() {
1896            assert!(md("biggest").is_superlative_adjective());
1897        }
1898    }
1899
1900    #[test]
1901    fn the_is_determiner() {
1902        assert!(md("the").is_determiner());
1903    }
1904    #[test]
1905    fn this_is_demonstrative_determiner() {
1906        assert!(md("this").is_demonstrative_determiner());
1907    }
1908    #[test]
1909    fn your_is_possessive_determiner() {
1910        assert!(md("your").is_possessive_determiner());
1911    }
1912
1913    #[test]
1914    fn every_is_quantifier() {
1915        assert!(md("every").is_quantifier());
1916    }
1917
1918    #[test]
1919    fn the_isnt_quantifier() {
1920        assert!(!md("the").is_quantifier());
1921    }
1922
1923    #[test]
1924    fn equipment_is_mass_noun() {
1925        assert!(md("equipment").is_mass_noun());
1926    }
1927
1928    #[test]
1929    fn equipment_is_non_countable_noun() {
1930        assert!(md("equipment").is_non_countable_noun());
1931    }
1932
1933    #[test]
1934    fn equipment_isnt_countable_noun() {
1935        assert!(!md("equipment").is_countable_noun());
1936    }
1937
1938    mod verb {
1939        use crate::dict_word_metadata::tests::md;
1940
1941        #[test]
1942        fn lemma_walk() {
1943            let md = md("walk");
1944            assert!(md.is_verb_lemma())
1945        }
1946
1947        #[test]
1948        fn lemma_fix() {
1949            let md = md("fix");
1950            assert!(md.is_verb_lemma())
1951        }
1952
1953        #[test]
1954        fn progressive_walking() {
1955            let md = md("walking");
1956            assert!(md.is_verb_progressive_form())
1957        }
1958
1959        #[test]
1960        fn past_walked() {
1961            let md = md("walked");
1962            assert!(md.is_verb_past_form())
1963        }
1964
1965        #[test]
1966        fn simple_past_ate() {
1967            let md = md("ate");
1968            assert!(md.is_verb_simple_past_form())
1969        }
1970
1971        #[test]
1972        fn past_participle_eaten() {
1973            let md = md("eaten");
1974            assert!(md.is_verb_past_participle_form())
1975        }
1976
1977        #[test]
1978        fn third_pers_sing_walks() {
1979            let md = md("walks");
1980            assert!(md.is_verb_third_person_singular_present_form())
1981        }
1982    }
1983}