harper_core/
dict_word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use smallvec::SmallVec;
7use strum::{EnumCount as _, VariantArray as _};
8use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
9
10use std::convert::TryFrom;
11
12use crate::dict_word_metadata_orthography::OrthFlags;
13use crate::spell::WordId;
14use crate::{Document, TokenKind, TokenStringExt};
15
16/// This represents a "lexeme" or "headword" which is case-folded but affix-expanded.
17/// So not only lemmata but also inflected forms are stored here, with "horn" and "horns" each
18/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
19#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
20pub struct DictWordMetadata {
21    /// The main parts of speech which have extra data.
22    pub noun: Option<NounData>,
23    pub pronoun: Option<PronounData>,
24    pub verb: Option<VerbData>,
25    pub adjective: Option<AdjectiveData>,
26    pub adverb: Option<AdverbData>,
27    pub conjunction: Option<ConjunctionData>,
28    pub determiner: Option<DeterminerData>,
29    pub affix: Option<AffixData>,
30    /// Parts of speech which don't have extra data.
31    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
32    #[serde(default = "default_false")]
33    pub preposition: bool,
34    /// Whether the word is an offensive word.
35    pub swear: Option<bool>,
36    /// The dialects this word belongs to.
37    /// If no dialects are defined, it can be assumed that the word is
38    /// valid in all dialects of English.
39    #[serde(default = "default_default")]
40    pub dialects: DialectFlags,
41    /// Orthographic information: letter case, spaces, hyphens, etc.
42    #[serde(default = "OrthFlags::empty")]
43    pub orth_info: OrthFlags,
44    /// Whether the word is considered especially common.
45    #[serde(default = "default_false")]
46    pub common: bool,
47    #[serde(default = "default_none")]
48    pub derived_from: Option<WordId>,
49    /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using
50    /// this should be preferred over the similarly named `Pattern`.
51    ///
52    /// For more details, see [the announcement blog post](https://elijahpotter.dev/articles/training_a_chunker_with_burn).
53    pub np_member: Option<bool>,
54    /// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
55    pub pos_tag: Option<UPOS>,
56}
57
58/// Needed for `serde`
59fn default_false() -> bool {
60    false
61}
62
63/// Needed for `serde`
64fn default_none<T>() -> Option<T> {
65    None
66}
67
68/// Needed for `serde`
69fn default_default<T: Default>() -> T {
70    T::default()
71}
72
73macro_rules! generate_metadata_queries {
74    ($($category:ident has $($sub:ident),*).*) => {
75        paste! {
76            pub fn is_likely_homograph(&self) -> bool {
77                [self.is_determiner(), self.preposition, $(
78                    self.[< is_ $category >](),
79                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
80            }
81
82            /// How different is this word from another?
83            pub fn difference(&self, other: &Self) -> u32 {
84                [
85                    $(
86                        Self::[< is_ $category >],
87                        $(
88                            Self::[< is_ $sub _ $category >],
89                            Self::[< is_non_ $sub _ $category >],
90                        )*
91                    )*
92                ]
93                .iter()
94                .fold(0, |acc, func| acc + (func(self) ^ func(other)) as u32)
95            }
96
97            $(
98                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
99                pub fn [< is_ $category >](&self) -> bool {
100                    self.$category.is_some()
101                }
102
103                $(
104                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
105                    pub fn [< is_ $sub _ $category >](&self) -> bool {
106                        matches!(
107                            self.$category,
108                            Some([< $category:camel Data >]{
109                                [< is_ $sub >]: Some(true),
110                                ..
111                            })
112                        ) }
113
114                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
115                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
116                        matches!(
117                            self.$category,
118                            Some([< $category:camel Data >]{
119                                [< is_ $sub >]: None | Some(false),
120                                ..
121                            })
122                        )
123                    }
124                )*
125            )*
126        }
127    };
128}
129
130impl DictWordMetadata {
131    /// If there is only one possible interpretation of the metadata, infer its UPOS tag.
132    pub fn infer_pos_tag(&self) -> Option<UPOS> {
133        // If an explicit POS tag exists, return it immediately.
134        if let Some(pos) = self.pos_tag {
135            return Some(pos);
136        }
137
138        // Collect all possible POS tags from metadata
139        let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
140
141        if self.is_proper_noun() {
142            candidates.push(UPOS::PROPN);
143        }
144
145        if self.is_pronoun() {
146            candidates.push(UPOS::PRON);
147        }
148        if self.is_noun() {
149            candidates.push(UPOS::NOUN);
150        }
151        if self.is_verb() {
152            // Distinguish auxiliary verbs
153            if let Some(data) = &self.verb {
154                if data.is_auxiliary == Some(true) {
155                    candidates.push(UPOS::AUX);
156                } else {
157                    candidates.push(UPOS::VERB);
158                }
159            } else {
160                candidates.push(UPOS::VERB);
161            }
162        }
163        if self.is_adjective() {
164            candidates.push(UPOS::ADJ);
165        }
166        if self.is_adverb() {
167            candidates.push(UPOS::ADV);
168        }
169        if self.is_conjunction() {
170            candidates.push(UPOS::CCONJ);
171        }
172        if self.is_determiner() {
173            candidates.push(UPOS::DET);
174        }
175        if self.preposition {
176            candidates.push(UPOS::ADP);
177        }
178
179        // Remove duplicates
180        candidates.sort();
181        candidates.dedup();
182
183        candidates.into_iter().exactly_one().ok()
184    }
185
186    /// Produce a copy of `self` with the known properties of `other` set.
187    pub fn or(&self, other: &Self) -> Self {
188        macro_rules! merge {
189            ($a:expr, $b:expr) => {
190                match ($a, $b) {
191                    (Some(a), Some(b)) => Some(a.or(&b)),
192                    (Some(a), None) => Some(a),
193                    (None, Some(b)) => Some(b),
194                    (None, None) => None,
195                }
196            };
197        }
198
199        Self {
200            noun: merge!(self.noun, other.noun),
201            pronoun: merge!(self.pronoun, other.pronoun),
202            verb: merge!(self.verb, other.verb),
203            adjective: merge!(self.adjective, other.adjective),
204            adverb: merge!(self.adverb, other.adverb),
205            conjunction: merge!(self.conjunction, other.conjunction),
206            determiner: merge!(self.determiner, other.determiner),
207            affix: merge!(self.affix, other.affix),
208            preposition: self.preposition || other.preposition,
209            dialects: self.dialects | other.dialects,
210            orth_info: self.orth_info | other.orth_info,
211            swear: self.swear.or(other.swear),
212            common: self.common || other.common,
213            derived_from: self.derived_from.or(other.derived_from),
214            pos_tag: self.pos_tag.or(other.pos_tag),
215            np_member: self.np_member.or(other.np_member),
216        }
217    }
218
219    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
220    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
221    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
222    ///
223    /// Additionally, if the metadata does not currently declare the potential of the word to be
224    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
225    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
226    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
227        use UPOS::*;
228        match pos {
229            NOUN => {
230                if let Some(noun) = self.noun {
231                    self.noun = Some(NounData {
232                        is_proper: Some(false),
233                        ..noun
234                    })
235                } else {
236                    self.noun = Some(NounData {
237                        is_proper: Some(false),
238                        is_singular: None,
239                        is_plural: None,
240                        is_countable: None,
241                        is_mass: None,
242                        is_possessive: None,
243                    })
244                }
245
246                self.pronoun = None;
247                self.verb = None;
248                self.adjective = None;
249                self.adverb = None;
250                self.conjunction = None;
251                self.determiner = None;
252                self.affix = None;
253                self.preposition = false;
254            }
255            PROPN => {
256                if let Some(noun) = self.noun {
257                    self.noun = Some(NounData {
258                        is_proper: Some(true),
259                        ..noun
260                    })
261                } else {
262                    self.noun = Some(NounData {
263                        is_proper: Some(true),
264                        is_singular: None,
265                        is_plural: None,
266                        is_countable: None,
267                        is_mass: None,
268                        is_possessive: None,
269                    })
270                }
271
272                self.pronoun = None;
273                self.verb = None;
274                self.adjective = None;
275                self.adverb = None;
276                self.conjunction = None;
277                self.determiner = None;
278                self.affix = None;
279                self.preposition = false;
280            }
281            PRON => {
282                if self.pronoun.is_none() {
283                    self.pronoun = Some(PronounData::default())
284                }
285
286                self.noun = None;
287                self.verb = None;
288                self.adjective = None;
289                self.adverb = None;
290                self.conjunction = None;
291                self.determiner = None;
292                self.affix = None;
293                self.preposition = false;
294            }
295            VERB => {
296                if let Some(verb) = self.verb {
297                    self.verb = Some(VerbData {
298                        is_auxiliary: Some(false),
299                        ..verb
300                    })
301                } else {
302                    self.verb = Some(VerbData {
303                        is_auxiliary: Some(false),
304                        ..Default::default()
305                    })
306                }
307
308                self.noun = None;
309                self.pronoun = None;
310                self.adjective = None;
311                self.adverb = None;
312                self.conjunction = None;
313                self.determiner = None;
314                self.affix = None;
315                self.preposition = false;
316            }
317            AUX => {
318                if let Some(verb) = self.verb {
319                    self.verb = Some(VerbData {
320                        is_auxiliary: Some(true),
321                        ..verb
322                    })
323                } else {
324                    self.verb = Some(VerbData {
325                        is_auxiliary: Some(true),
326                        ..Default::default()
327                    })
328                }
329
330                self.noun = None;
331                self.pronoun = None;
332                self.adjective = None;
333                self.adverb = None;
334                self.conjunction = None;
335                self.determiner = None;
336                self.affix = None;
337                self.preposition = false;
338            }
339            ADJ => {
340                if self.adjective.is_none() {
341                    self.adjective = Some(AdjectiveData::default())
342                }
343
344                self.noun = None;
345                self.pronoun = None;
346                self.verb = None;
347                self.adverb = None;
348                self.conjunction = None;
349                self.determiner = None;
350                self.affix = None;
351                self.preposition = false;
352            }
353            ADV => {
354                if self.adverb.is_none() {
355                    self.adverb = Some(AdverbData::default())
356                }
357
358                self.noun = None;
359                self.pronoun = None;
360                self.verb = None;
361                self.adjective = None;
362                self.conjunction = None;
363                self.determiner = None;
364                self.affix = None;
365                self.preposition = false;
366            }
367            ADP => {
368                self.noun = None;
369                self.pronoun = None;
370                self.verb = None;
371                self.adjective = None;
372                self.adverb = None;
373                self.conjunction = None;
374                self.determiner = None;
375                self.affix = None;
376                self.preposition = true;
377            }
378            DET => {
379                self.noun = None;
380                self.pronoun = None;
381                self.verb = None;
382                self.adjective = None;
383                self.adverb = None;
384                self.conjunction = None;
385                self.affix = None;
386                self.preposition = false;
387                self.determiner = Some(DeterminerData::default());
388            }
389            CCONJ | SCONJ => {
390                if self.conjunction.is_none() {
391                    self.conjunction = Some(ConjunctionData::default())
392                }
393
394                self.noun = None;
395                self.pronoun = None;
396                self.verb = None;
397                self.adjective = None;
398                self.adverb = None;
399                self.determiner = None;
400                self.affix = None;
401                self.preposition = false;
402            }
403            _ => {}
404        }
405    }
406
407    generate_metadata_queries!(
408        // Singular and countable default to true, so their metadata queries are not generated.
409        noun has proper, plural, mass, possessive.
410        pronoun has personal, singular, plural, possessive, reflexive, subject, object.
411        determiner has demonstrative, possessive, quantifier.
412        verb has linking, auxiliary.
413        conjunction has.
414        adjective has.
415        adverb has manner, frequency, degree
416    );
417
418    // Manual metadata queries
419
420    // Pronoun metadata queries
421
422    pub fn get_person(&self) -> Option<Person> {
423        self.pronoun.as_ref().and_then(|p| p.person)
424    }
425
426    pub fn is_first_person_plural_pronoun(&self) -> bool {
427        matches!(
428            self.pronoun,
429            Some(PronounData {
430                person: Some(Person::First),
431                is_plural: Some(true),
432                ..
433            })
434        )
435    }
436
437    pub fn is_first_person_singular_pronoun(&self) -> bool {
438        matches!(
439            self.pronoun,
440            Some(PronounData {
441                person: Some(Person::First),
442                is_singular: Some(true),
443                ..
444            })
445        )
446    }
447
448    pub fn is_third_person_plural_pronoun(&self) -> bool {
449        matches!(
450            self.pronoun,
451            Some(PronounData {
452                person: Some(Person::Third),
453                is_plural: Some(true),
454                ..
455            })
456        )
457    }
458
459    pub fn is_third_person_singular_pronoun(&self) -> bool {
460        matches!(
461            self.pronoun,
462            Some(PronounData {
463                person: Some(Person::Third),
464                is_singular: Some(true),
465                ..
466            })
467        )
468    }
469
470    pub fn is_third_person_pronoun(&self) -> bool {
471        matches!(
472            self.pronoun,
473            Some(PronounData {
474                person: Some(Person::Third),
475                ..
476            })
477        )
478    }
479
480    pub fn is_second_person_pronoun(&self) -> bool {
481        matches!(
482            self.pronoun,
483            Some(PronounData {
484                person: Some(Person::Second),
485                ..
486            })
487        )
488    }
489
490    // Lemma is default if no verb form is specified in the dictionary
491    pub fn is_verb_lemma(&self) -> bool {
492        if let Some(verb) = self.verb {
493            if let Some(forms) = verb.verb_forms {
494                return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
495            } else {
496                return true;
497            }
498        }
499        false
500    }
501
502    pub fn is_verb_past_form(&self) -> bool {
503        self.verb.is_some_and(|v| {
504            v.verb_forms
505                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
506        })
507    }
508
509    pub fn is_verb_simple_past_form(&self) -> bool {
510        self.verb.is_some_and(|v| {
511            v.verb_forms
512                .is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
513        })
514    }
515
516    pub fn is_verb_past_participle_form(&self) -> bool {
517        self.verb.is_some_and(|v| {
518            v.verb_forms
519                .is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
520        })
521    }
522
523    pub fn is_verb_progressive_form(&self) -> bool {
524        self.verb.is_some_and(|v| {
525            v.verb_forms
526                .is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
527        })
528    }
529
530    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
531        self.verb.is_some_and(|v| {
532            v.verb_forms
533                .is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
534        })
535    }
536
537    // Noun metadata queries
538
539    // Singular is default if number is not marked in the dictionary.
540    pub fn is_singular_noun(&self) -> bool {
541        if let Some(noun) = self.noun {
542            matches!(
543                (noun.is_singular, noun.is_plural),
544                (Some(true), _) | (None | Some(false), None | Some(false))
545            )
546        } else {
547            false
548        }
549    }
550    pub fn is_non_singular_noun(&self) -> bool {
551        if let Some(noun) = self.noun {
552            !matches!(
553                (noun.is_singular, noun.is_plural),
554                (Some(true), _) | (None | Some(false), None | Some(false))
555            )
556        } else {
557            false
558        }
559    }
560
561    // Countable is default if countability is not marked in the dictionary.
562    pub fn is_countable_noun(&self) -> bool {
563        if let Some(noun) = self.noun {
564            matches!(
565                (noun.is_countable, noun.is_mass),
566                (Some(true), _) | (None | Some(false), None | Some(false))
567            )
568        } else {
569            false
570        }
571    }
572    pub fn is_non_countable_noun(&self) -> bool {
573        if let Some(noun) = self.noun {
574            !matches!(
575                (noun.is_countable, noun.is_mass),
576                (Some(true), _) | (None | Some(false), None | Some(false))
577            )
578        } else {
579            false
580        }
581    }
582
583    // Most mass nouns also have countable senses. Match those that are only mass nouns.
584    pub fn is_mass_noun_only(&self) -> bool {
585        if let Some(noun) = self.noun {
586            matches!(
587                (noun.is_countable, noun.is_mass),
588                (None | Some(false), Some(true))
589            )
590        } else {
591            false
592        }
593    }
594
595    // Nominal metadata queries (noun + pronoun)
596
597    /// Checks if the word is definitely nominal.
598    pub fn is_nominal(&self) -> bool {
599        self.is_noun() || self.is_pronoun()
600    }
601
602    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
603    pub fn is_singular_nominal(&self) -> bool {
604        self.is_singular_noun() || self.is_singular_pronoun()
605    }
606
607    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
608    pub fn is_plural_nominal(&self) -> bool {
609        self.is_plural_noun() || self.is_plural_pronoun()
610    }
611
612    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
613    pub fn is_possessive_nominal(&self) -> bool {
614        self.is_possessive_noun() || self.is_possessive_pronoun()
615    }
616
617    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
618    pub fn is_non_singular_nominal(&self) -> bool {
619        self.is_non_singular_noun() || self.is_non_singular_pronoun()
620    }
621
622    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
623    pub fn is_non_plural_nominal(&self) -> bool {
624        self.is_non_plural_noun() || self.is_non_plural_pronoun()
625    }
626
627    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
628    pub fn is_non_possessive_nominal(&self) -> bool {
629        self.is_non_possessive_noun() || self.is_non_possessive_pronoun()
630    }
631
632    // Adjective metadata queries
633
634    pub fn get_degree(&self) -> Option<Degree> {
635        self.adjective.as_ref().and_then(|a| a.degree)
636    }
637
638    pub fn is_comparative_adjective(&self) -> bool {
639        matches!(
640            self.adjective,
641            Some(AdjectiveData {
642                degree: Some(Degree::Comparative)
643            })
644        )
645    }
646
647    pub fn is_superlative_adjective(&self) -> bool {
648        matches!(
649            self.adjective,
650            Some(AdjectiveData {
651                degree: Some(Degree::Superlative)
652            })
653        )
654    }
655
656    // Degree::Positive is the default if degree is not marked in the dictionary.
657    pub fn is_positive_adjective(&self) -> bool {
658        match self.adjective {
659            Some(AdjectiveData {
660                degree: Some(Degree::Positive),
661            }) => true,
662            Some(AdjectiveData { degree: None }) => true,
663            Some(AdjectiveData {
664                degree: Some(degree),
665            }) => !matches!(degree, Degree::Comparative | Degree::Superlative),
666            _ => false,
667        }
668    }
669
670    // Determiner metadata queries
671
672    // Checks if the word is definitely a determiner and more specifically is labeled as (a) quantifier.
673    pub fn is_quantifier(&self) -> bool {
674        self.is_quantifier_determiner()
675    }
676
677    // Non-POS queries
678
679    /// Checks whether a word is _definitely_ a swear.
680    pub fn is_swear(&self) -> bool {
681        matches!(self.swear, Some(true))
682    }
683
684    // Orthographic queries
685
686    /// Does the metadata for this word cover an all-lowercase variant? (e.g., "hello")
687    ///
688    /// This returns true if all letters in the word are lowercase. Words containing
689    /// non-letter characters (like numbers or symbols) are only considered if all
690    /// letter characters are lowercase.
691    pub fn is_lowercase(&self) -> bool {
692        self.orth_info.contains(OrthFlags::LOWERCASE)
693    }
694    /// Does the metadata for this word cover a titlecase variant? (e.g., "Hello")
695    ///
696    /// This returns true if the word is in titlecase form, which means:
697    /// - The first letter is uppercase
698    /// - All other letters are lowercase
699    /// - The word is at least 2 characters long
700    ///
701    /// Examples: "Hello", "World"
702    ///
703    /// Note: Words with internal capital letters (like "McDonald") or apostrophes (like "O'Reilly")
704    /// are not considered titlecase - they are classified as UPPER_CAMEL instead.
705    pub fn is_titlecase(&self) -> bool {
706        self.orth_info.contains(OrthFlags::TITLECASE)
707    }
708    /// Does the metadata for this word cover an all-uppercase variant? (e.g., "HELLO")
709    ///
710    /// This returns true if all letters in the word are uppercase. Words containing
711    /// non-letter characters (like numbers or symbols) are only considered if all
712    /// letter characters are uppercase.
713    ///
714    /// Examples: "HELLO", "NASA", "I"
715    pub fn is_allcaps(&self) -> bool {
716        self.orth_info.contains(OrthFlags::ALLCAPS)
717    }
718    /// Does the metadata for this word cover a lower camel case variant? (e.g., "helloWorld")
719    ///
720    /// This returns true if the word is in lower camel case, which means:
721    /// - The first letter is lowercase
722    /// - There is at least one uppercase letter after the first character
723    /// - The word must be at least 2 characters long
724    ///
725    /// Examples: "helloWorld", "getHTTPResponse", "eBay"
726    ///
727    /// Note: Single words that are all lowercase will return false.
728    /// Words starting with an uppercase letter will return false (those would be UpperCamel).
729    pub fn is_lower_camel(&self) -> bool {
730        self.orth_info.contains(OrthFlags::LOWER_CAMEL)
731    }
732    /// Does the metadata for this word cover an upper camel case / pascal case variant? (e.g., "HelloWorld")
733    ///
734    /// This returns true if the word is in upper camel case (also known as Pascal case), which means:
735    /// - The first letter is uppercase
736    /// - There is at least one other uppercase letter after the first character
737    /// - There is at least one lowercase letter after the first uppercase letter
738    /// - The word must be at least 3 characters long
739    ///
740    /// Examples:
741    /// - "HelloWorld" (standard Pascal case)
742    /// - "McDonald" (name with internal caps)
743    /// - "O'Reilly" (name with apostrophe and internal caps)
744    /// - "HttpRequest" (initialism followed by word)
745    ///
746    /// Note: Single words that are titlecase (like "Hello") will return false.
747    /// Words that are all uppercase (like "NASA") will also return false.
748    pub fn is_upper_camel(&self) -> bool {
749        self.orth_info.contains(OrthFlags::UPPER_CAMEL)
750    }
751
752    /// Does the metadata for this word cover an apostrophized variant? (e.g., "doesn't")
753    pub fn is_apostrophized(&self) -> bool {
754        self.orth_info.contains(OrthFlags::APOSTROPHE)
755    }
756
757    pub fn is_roman_numerals(&self) -> bool {
758        self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
759    }
760
761    /// Same thing as [`Self::or`], except in-place rather than a clone.
762    pub fn append(&mut self, other: &Self) -> &mut Self {
763        *self = self.or(other);
764        self
765    }
766}
767
768// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
769// Each form can be used in various TAM combinations:
770// - Lemma form (infinitive, citation form, dictionary form)
771//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
772// - Past form (past participle and simple past)
773//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
774// - Progressive form (present participle and gerund)
775//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
776// - Third person singular present (-s/-es)
777//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
778//
779// Important notes:
780// 1. English expresses time through auxiliary verbs, not verb form alone
781// 2. Irregular verbs can have different forms for past participle and simple past
782// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
783#[repr(u32)]
784pub enum VerbForm {
785    /// The uninflected verb form: "walk", "eat"
786    LemmaForm = 1 << 0,
787    /// The past form for regular verbs: "walked"
788    PastForm = 1 << 1,
789    /// The simple past/preterite form for irregular verbs: "ate"
790    SimplePastForm = 1 << 2,
791    /// The past participle form for irregular verbs: "eaten"
792    PastParticipleForm = 1 << 3,
793    /// The progressive/continuous/gerund/present participle form: "walking", "eating"
794    ProgressiveForm = 1 << 4,
795    /// The third person singular present form: "walks", "eats"
796    ThirdPersonSingularPresentForm = 1 << 5,
797}
798
799/// The underlying type used for verb form flags.
800pub type VerbFormFlagsUnderlyingType = u32;
801
802bitflags::bitflags! {
803    /// A collection of bit flags used to represent verb forms.
804    ///
805    /// This allows a word to be tagged with multiple verb forms when applicable.
806    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
807    #[serde(transparent)]
808    pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
809        const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
810        const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
811        const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
812        const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
813        const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
814        const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
815    }
816}
817
818#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
819pub struct VerbData {
820    pub is_linking: Option<bool>,
821    pub is_auxiliary: Option<bool>,
822    #[serde(rename = "verb_form", default)]
823    pub verb_forms: Option<VerbFormFlags>,
824}
825
826impl VerbData {
827    /// Produce a copy of `self` with the known properties of `other` set.
828    pub fn or(&self, other: &Self) -> Self {
829        let verb_forms = match (self.verb_forms, other.verb_forms) {
830            (Some(self_verb_forms), Some(other_verb_forms)) => {
831                Some(self_verb_forms | other_verb_forms)
832            }
833            (Some(self_verb_forms), None) => Some(self_verb_forms),
834            (None, Some(other_verb_forms)) => Some(other_verb_forms),
835            (None, None) => None,
836        };
837
838        Self {
839            is_linking: self.is_linking.or(other.is_linking),
840            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
841            verb_forms,
842        }
843    }
844}
845
846// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
847// TODO other noun properties may be worth adding: abstract
848#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
849pub struct NounData {
850    pub is_proper: Option<bool>,
851    pub is_singular: Option<bool>,
852    pub is_plural: Option<bool>,
853    pub is_countable: Option<bool>,
854    pub is_mass: Option<bool>,
855    pub is_possessive: Option<bool>,
856}
857
858impl NounData {
859    /// Produce a copy of `self` with the known properties of `other` set.
860    pub fn or(&self, other: &Self) -> Self {
861        Self {
862            is_proper: self.is_proper.or(other.is_proper),
863            is_singular: self.is_singular.or(other.is_singular),
864            is_plural: self.is_plural.or(other.is_plural),
865            is_countable: self.is_countable.or(other.is_countable),
866            is_mass: self.is_mass.or(other.is_mass),
867            is_possessive: self.is_possessive.or(other.is_possessive),
868        }
869    }
870}
871
872// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
873#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
874pub enum Person {
875    First,
876    Second,
877    Third,
878}
879
880// TODO for now focused on personal pronouns?
881#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
882pub struct PronounData {
883    pub is_personal: Option<bool>,
884    pub is_singular: Option<bool>,
885    pub is_plural: Option<bool>,
886    pub is_possessive: Option<bool>,
887    pub is_reflexive: Option<bool>,
888    pub person: Option<Person>,
889    pub is_subject: Option<bool>,
890    pub is_object: Option<bool>,
891}
892
893impl PronounData {
894    /// Produce a copy of `self` with the known properties of `other` set.
895    pub fn or(&self, other: &Self) -> Self {
896        Self {
897            is_personal: self.is_personal.or(other.is_personal),
898            is_singular: self.is_singular.or(other.is_singular),
899            is_plural: self.is_plural.or(other.is_plural),
900            is_possessive: self.is_possessive.or(other.is_possessive),
901            is_reflexive: self.is_reflexive.or(other.is_reflexive),
902            person: self.person.or(other.person),
903            is_subject: self.is_subject.or(other.is_subject),
904            is_object: self.is_object.or(other.is_object),
905        }
906    }
907}
908
909/// Additional metadata for determiners
910#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
911pub struct DeterminerData {
912    pub is_demonstrative: Option<bool>,
913    pub is_possessive: Option<bool>,
914    pub is_quantifier: Option<bool>,
915}
916
917impl DeterminerData {
918    /// Produce a copy of `self` with the known properties of `other` set.
919    pub fn or(&self, other: &Self) -> Self {
920        Self {
921            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
922            is_possessive: self.is_possessive.or(other.is_possessive),
923            is_quantifier: self.is_quantifier.or(other.is_quantifier),
924        }
925    }
926}
927
928/// Degree is a property of adjectives: positive is not inflected
929/// Comparative is inflected with -er or comes after the word "more"
930/// Superlative is inflected with -est or comes after the word "most"
931#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
932pub enum Degree {
933    Positive,
934    Comparative,
935    Superlative,
936}
937
938/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
939/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
940/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
941#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
942pub struct AdjectiveData {
943    pub degree: Option<Degree>,
944}
945
946impl AdjectiveData {
947    /// Produce a copy of `self` with the known properties of `other` set.
948    pub fn or(&self, other: &Self) -> Self {
949        Self {
950            degree: self.degree.or(other.degree),
951        }
952    }
953}
954
955/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
956/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
957/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
958#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
959pub struct AdverbData {
960    pub is_manner: Option<bool>,
961    pub is_frequency: Option<bool>,
962    pub is_degree: Option<bool>,
963}
964
965impl AdverbData {
966    /// Produce a copy of `self` with the known properties of `other` set.
967    pub fn or(&self, _other: &Self) -> Self {
968        Self {
969            is_manner: self.is_manner.or(_other.is_manner),
970            is_frequency: self.is_frequency.or(_other.is_frequency),
971            is_degree: self.is_degree.or(_other.is_degree),
972        }
973    }
974}
975
976#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
977pub struct ConjunctionData {}
978
979impl ConjunctionData {
980    /// Produce a copy of `self` with the known properties of `other` set.
981    pub fn or(&self, _other: &Self) -> Self {
982        Self {}
983    }
984}
985
986#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
987pub struct AffixData {
988    pub is_prefix: Option<bool>,
989    pub is_suffix: Option<bool>,
990}
991
992impl AffixData {
993    /// Produce a copy of `self` with the known properties of `other` set.
994    pub fn or(&self, _other: &Self) -> Self {
995        Self {
996            is_prefix: self.is_prefix.or(_other.is_prefix),
997            is_suffix: self.is_suffix.or(_other.is_suffix),
998        }
999    }
1000}
1001
1002/// A regional dialect.
1003///
1004/// Note: these have bit-shifted values so that they can ergonomically integrate with
1005/// `DialectFlags`. Each value here must have a unique bit index inside
1006/// `DialectsUnderlyingType`.
1007#[derive(
1008    Debug,
1009    Clone,
1010    Copy,
1011    Serialize,
1012    Deserialize,
1013    PartialEq,
1014    PartialOrd,
1015    Eq,
1016    Hash,
1017    EnumCount,
1018    EnumString,
1019    EnumIter,
1020    Display,
1021    VariantArray,
1022)]
1023pub enum Dialect {
1024    American = 1 << 0,
1025    Canadian = 1 << 1,
1026    Australian = 1 << 2,
1027    British = 1 << 3,
1028    Indian = 1 << 4,
1029}
1030impl Dialect {
1031    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
1032    /// Returns `None` if it fails to find a single dialect that is used the most.
1033    #[must_use]
1034    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
1035        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
1036    }
1037
1038    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
1039    /// recognized.
1040    ///
1041    /// # Examples
1042    ///
1043    /// ```
1044    /// use harper_core::Dialect;
1045    ///
1046    /// let abbrs = ["US", "CA", "AU", "GB", "IN"];
1047    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
1048    ///
1049    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
1050    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
1051    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
1052    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
1053    /// assert_eq!(Some(Dialect::Indian), dialects.next().unwrap()); // IN
1054    /// ```
1055    #[must_use]
1056    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
1057        match abbr {
1058            "US" => Some(Self::American),
1059            "CA" => Some(Self::Canadian),
1060            "AU" => Some(Self::Australian),
1061            "GB" => Some(Self::British),
1062            "IN" => Some(Self::Indian),
1063            _ => None,
1064        }
1065    }
1066}
1067impl TryFrom<DialectFlags> for Dialect {
1068    type Error = ();
1069
1070    /// Attempts to convert `DialectFlags` to a single `Dialect`.
1071    ///
1072    /// # Errors
1073    ///
1074    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
1075    /// enabled.
1076    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
1077        // Ensure only one dialect is enabled before converting.
1078        if dialect_flags.bits().count_ones() == 1 {
1079            match dialect_flags {
1080                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
1081                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
1082                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
1083                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
1084                df if df.is_dialect_enabled_strict(Dialect::Indian) => Ok(Dialect::Indian),
1085                _ => Err(()),
1086            }
1087        } else {
1088            // More than one dialect enabled; can't soundly convert.
1089            Err(())
1090        }
1091    }
1092}
1093
1094// The underlying type used for DialectFlags.
1095// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
1096// the future, we will need to switch this to a larger type.
1097type DialectFlagsUnderlyingType = u8;
1098
1099bitflags::bitflags! {
1100    /// A collection of bit flags used to represent enabled dialects.
1101    ///
1102    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
1103    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
1104    #[serde(transparent)]
1105    pub struct DialectFlags: DialectFlagsUnderlyingType {
1106        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
1107        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
1108        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
1109        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
1110        const INDIAN = Dialect::Indian as DialectFlagsUnderlyingType;
1111    }
1112}
1113impl DialectFlags {
1114    /// Checks if the provided dialect is enabled.
1115    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
1116    #[must_use]
1117    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
1118        self.is_empty() || self.intersects(Self::from_dialect(dialect))
1119    }
1120
1121    /// Checks if the provided dialect is ***explicitly*** enabled.
1122    ///
1123    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
1124    /// enabled.
1125    #[must_use]
1126    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
1127        self.intersects(Self::from_dialect(dialect))
1128    }
1129
1130    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
1131    /// enabled.
1132    ///
1133    /// # Panics
1134    ///
1135    /// This will panic if `dialect` represents a dialect that is not defined in
1136    /// `DialectFlags`.
1137    #[must_use]
1138    pub fn from_dialect(dialect: Dialect) -> Self {
1139        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
1140            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
1141        };
1142        out
1143    }
1144
1145    /// Gets the most commonly used dialect(s) in the document.
1146    ///
1147    /// If multiple dialects are used equally often, they will all be enabled in the returned
1148    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
1149    /// will be the only one enabled.
1150    #[must_use]
1151    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
1152        // Initialize counters.
1153        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
1154            .iter()
1155            .map(|d| (*d, 0))
1156            .collect_array()
1157            .unwrap();
1158
1159        // Count word dialects.
1160        document.iter_words().for_each(|w| {
1161            if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
1162                // If the token is a word, iterate though the dialects in `dialect_counters` and
1163                // increment those counters where the word has the respective dialect enabled.
1164                dialect_counters.iter_mut().for_each(|(dialect, count)| {
1165                    if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
1166                        *count += 1;
1167                    }
1168                });
1169            }
1170        });
1171
1172        // Find max counter.
1173        let max_counter = dialect_counters
1174            .iter()
1175            .map(|(_, count)| count)
1176            .max()
1177            .unwrap();
1178        // Get and convert the collection of most used dialects into a `DialectFlags`.
1179        dialect_counters
1180            .into_iter()
1181            .filter(|(_, count)| count == max_counter)
1182            .fold(DialectFlags::empty(), |acc, dialect| {
1183                // Fold most used dialects into `DialectFlags` via bitwise or.
1184                acc | Self::from_dialect(dialect.0)
1185            })
1186    }
1187}
1188impl Default for DialectFlags {
1189    /// A default value with no dialects explicitly enabled.
1190    /// Implicitly, this state corresponds to all dialects being enabled.
1191    fn default() -> Self {
1192        Self::empty()
1193    }
1194}
1195
1196#[cfg(test)]
1197pub mod tests {
1198    use crate::DictWordMetadata;
1199    use crate::spell::{Dictionary, FstDictionary};
1200
1201    // Helper function to get metadata from the curated dictionary
1202    pub fn md(word: &str) -> DictWordMetadata {
1203        FstDictionary::curated()
1204            .get_word_metadata_str(word)
1205            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
1206            .into_owned()
1207    }
1208
1209    mod dialect {
1210        use super::super::{Dialect, DialectFlags};
1211        use crate::Document;
1212
1213        #[test]
1214        fn guess_british_dialect() {
1215            let document = Document::new_plain_english_curated("Aluminium was used.");
1216            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1217            assert!(
1218                df.is_dialect_enabled_strict(Dialect::British)
1219                    && !df.is_dialect_enabled_strict(Dialect::American)
1220            );
1221        }
1222
1223        #[test]
1224        fn guess_american_dialect() {
1225            let document = Document::new_plain_english_curated("Aluminum was used.");
1226            let df = DialectFlags::get_most_used_dialects_from_document(&document);
1227            assert!(
1228                df.is_dialect_enabled_strict(Dialect::American)
1229                    && !df.is_dialect_enabled_strict(Dialect::British)
1230            );
1231        }
1232    }
1233
1234    mod noun {
1235        use crate::dict_word_metadata::tests::md;
1236
1237        #[test]
1238        fn puppy_is_noun() {
1239            assert!(md("puppy").is_noun());
1240        }
1241
1242        #[test]
1243        fn prepare_is_not_noun() {
1244            assert!(!md("prepare").is_noun());
1245        }
1246
1247        #[test]
1248        fn paris_is_proper_noun() {
1249            assert!(md("Paris").is_proper_noun());
1250        }
1251
1252        #[test]
1253        fn permit_is_non_proper_noun() {
1254            assert!(md("lapdog").is_non_proper_noun());
1255        }
1256
1257        #[test]
1258        fn hound_is_singular_noun() {
1259            assert!(md("hound").is_singular_noun());
1260        }
1261
1262        #[test]
1263        fn pooches_is_non_singular_noun() {
1264            assert!(md("pooches").is_non_singular_noun());
1265        }
1266
1267        // Make sure is_non_xxx_noun methods don't behave like is_not_xxx_noun.
1268        // In other words, make sure they don't return true for words that are not nouns.
1269        // They must only pass for words that are nouns but not singular etc.
1270        #[test]
1271        fn loyal_doesnt_pass_is_non_singular_noun() {
1272            assert!(!md("loyal").is_non_singular_noun());
1273        }
1274
1275        #[test]
1276        fn hounds_is_plural_noun() {
1277            assert!(md("hounds").is_plural_noun());
1278        }
1279
1280        #[test]
1281        fn pooch_is_non_plural_noun() {
1282            assert!(md("pooch").is_non_plural_noun());
1283        }
1284
1285        #[test]
1286        fn fish_is_singular_noun() {
1287            assert!(md("fish").is_singular_noun());
1288        }
1289
1290        #[test]
1291        fn fish_is_plural_noun() {
1292            assert!(md("fish").is_plural_noun());
1293        }
1294
1295        #[test]
1296        fn fishes_is_plural_noun() {
1297            assert!(md("fishes").is_plural_noun());
1298        }
1299
1300        #[test]
1301        fn sheep_is_singular_noun() {
1302            assert!(md("sheep").is_singular_noun());
1303        }
1304
1305        #[test]
1306        fn sheep_is_plural_noun() {
1307            assert!(md("sheep").is_plural_noun());
1308        }
1309
1310        #[test]
1311        #[should_panic]
1312        fn sheeps_is_not_word() {
1313            md("sheeps");
1314        }
1315
1316        #[test]
1317        fn bicep_is_singular_noun() {
1318            assert!(md("bicep").is_singular_noun());
1319        }
1320
1321        #[test]
1322        fn biceps_is_singular_noun() {
1323            assert!(md("biceps").is_singular_noun());
1324        }
1325
1326        #[test]
1327        fn biceps_is_plural_noun() {
1328            assert!(md("biceps").is_plural_noun());
1329        }
1330
1331        #[test]
1332        fn aircraft_is_singular_noun() {
1333            assert!(md("aircraft").is_singular_noun());
1334        }
1335
1336        #[test]
1337        fn aircraft_is_plural_noun() {
1338            assert!(md("aircraft").is_plural_noun());
1339        }
1340
1341        #[test]
1342        #[should_panic]
1343        fn aircrafts_is_not_word() {
1344            md("aircrafts");
1345        }
1346
1347        #[test]
1348        fn dog_apostrophe_s_is_possessive_noun() {
1349            assert!(md("dog's").is_possessive_noun());
1350        }
1351
1352        #[test]
1353        fn dogs_is_non_possessive_noun() {
1354            assert!(md("dogs").is_non_possessive_noun());
1355        }
1356
1357        // noun countability
1358
1359        #[test]
1360        fn dog_is_countable() {
1361            assert!(md("dog").is_countable_noun());
1362        }
1363        #[test]
1364        fn dog_is_non_mass_noun() {
1365            assert!(md("dog").is_non_mass_noun());
1366        }
1367
1368        #[test]
1369        fn furniture_is_mass_noun() {
1370            assert!(md("furniture").is_mass_noun());
1371        }
1372        #[test]
1373        fn furniture_is_non_countable_noun() {
1374            assert!(md("furniture").is_non_countable_noun());
1375        }
1376
1377        #[test]
1378        fn equipment_is_mass_noun() {
1379            assert!(md("equipment").is_mass_noun());
1380        }
1381        #[test]
1382        fn equipment_is_non_countable_noun() {
1383            assert!(md("equipment").is_non_countable_noun());
1384        }
1385
1386        #[test]
1387        fn beer_is_countable_noun() {
1388            assert!(md("beer").is_countable_noun());
1389        }
1390        #[test]
1391        fn beer_is_mass_noun() {
1392            assert!(md("beer").is_mass_noun());
1393        }
1394    }
1395
1396    mod pronoun {
1397        use crate::dict_word_metadata::tests::md;
1398
1399        mod i_me_myself {
1400            use crate::dict_word_metadata::tests::md;
1401
1402            #[test]
1403            fn i_is_pronoun() {
1404                assert!(md("I").is_pronoun());
1405            }
1406            #[test]
1407            fn i_is_personal_pronoun() {
1408                assert!(md("I").is_personal_pronoun());
1409            }
1410            #[test]
1411            fn i_is_singular_pronoun() {
1412                assert!(md("I").is_singular_pronoun());
1413            }
1414            #[test]
1415            fn i_is_subject_pronoun() {
1416                assert!(md("I").is_subject_pronoun());
1417            }
1418
1419            #[test]
1420            fn me_is_pronoun() {
1421                assert!(md("me").is_pronoun());
1422            }
1423            #[test]
1424            fn me_is_personal_pronoun() {
1425                assert!(md("me").is_personal_pronoun());
1426            }
1427            #[test]
1428            fn me_is_singular_pronoun() {
1429                assert!(md("me").is_singular_pronoun());
1430            }
1431            #[test]
1432            fn me_is_object_pronoun() {
1433                assert!(md("me").is_object_pronoun());
1434            }
1435
1436            #[test]
1437            fn myself_is_pronoun() {
1438                assert!(md("myself").is_pronoun());
1439            }
1440            #[test]
1441            fn myself_is_personal_pronoun() {
1442                assert!(md("myself").is_personal_pronoun());
1443            }
1444            #[test]
1445            fn myself_is_singular_pronoun() {
1446                assert!(md("myself").is_singular_pronoun());
1447            }
1448            #[test]
1449            fn myself_is_reflexive_pronoun() {
1450                assert!(md("myself").is_reflexive_pronoun());
1451            }
1452        }
1453
1454        mod we_us_ourselves {
1455            use crate::dict_word_metadata::tests::md;
1456
1457            #[test]
1458            fn we_is_pronoun() {
1459                assert!(md("we").is_pronoun());
1460            }
1461            #[test]
1462            fn we_is_personal_pronoun() {
1463                assert!(md("we").is_personal_pronoun());
1464            }
1465            #[test]
1466            fn we_is_plural_pronoun() {
1467                assert!(md("we").is_plural_pronoun());
1468            }
1469            #[test]
1470            fn we_is_subject_pronoun() {
1471                assert!(md("we").is_subject_pronoun());
1472            }
1473
1474            #[test]
1475            fn us_is_pronoun() {
1476                assert!(md("us").is_pronoun());
1477            }
1478            #[test]
1479            fn us_is_personal_pronoun() {
1480                assert!(md("us").is_personal_pronoun());
1481            }
1482            #[test]
1483            fn us_is_plural_pronoun() {
1484                assert!(md("us").is_plural_pronoun());
1485            }
1486            #[test]
1487            fn us_is_object_pronoun() {
1488                assert!(md("us").is_object_pronoun());
1489            }
1490
1491            #[test]
1492            fn ourselves_is_pronoun() {
1493                assert!(md("ourselves").is_pronoun());
1494            }
1495            #[test]
1496            fn ourselves_is_personal_pronoun() {
1497                assert!(md("ourselves").is_personal_pronoun());
1498            }
1499            #[test]
1500            fn ourselves_is_plural_pronoun() {
1501                assert!(md("ourselves").is_plural_pronoun());
1502            }
1503            #[test]
1504            fn ourselves_is_reflexive_pronoun() {
1505                assert!(md("ourselves").is_reflexive_pronoun());
1506            }
1507        }
1508
1509        mod you_yourself {
1510            use crate::dict_word_metadata::tests::md;
1511
1512            #[test]
1513            fn you_is_pronoun() {
1514                assert!(md("you").is_pronoun());
1515            }
1516            #[test]
1517            fn you_is_personal_pronoun() {
1518                assert!(md("you").is_personal_pronoun());
1519            }
1520            #[test]
1521            fn you_is_singular_pronoun() {
1522                assert!(md("you").is_singular_pronoun());
1523            }
1524            #[test]
1525            fn you_is_plural_pronoun() {
1526                assert!(md("you").is_plural_pronoun());
1527            }
1528            #[test]
1529            fn you_is_subject_pronoun() {
1530                assert!(md("you").is_subject_pronoun());
1531            }
1532            #[test]
1533            fn you_is_object_pronoun() {
1534                assert!(md("you").is_object_pronoun());
1535            }
1536            #[test]
1537            fn yourself_is_pronoun() {
1538                assert!(md("yourself").is_pronoun());
1539            }
1540            #[test]
1541            fn yourself_is_personal_pronoun() {
1542                assert!(md("yourself").is_personal_pronoun());
1543            }
1544            #[test]
1545            fn yourself_is_singular_pronoun() {
1546                assert!(md("yourself").is_singular_pronoun());
1547            }
1548            #[test]
1549            fn yourself_is_reflexive_pronoun() {
1550                assert!(md("yourself").is_reflexive_pronoun());
1551            }
1552        }
1553
1554        mod he_him_himself {
1555            use crate::dict_word_metadata::tests::md;
1556
1557            #[test]
1558            fn he_is_pronoun() {
1559                assert!(md("he").is_pronoun());
1560            }
1561            #[test]
1562            fn he_is_personal_pronoun() {
1563                assert!(md("he").is_personal_pronoun());
1564            }
1565            #[test]
1566            fn he_is_singular_pronoun() {
1567                assert!(md("he").is_singular_pronoun());
1568            }
1569            #[test]
1570            fn he_is_subject_pronoun() {
1571                assert!(md("he").is_subject_pronoun());
1572            }
1573
1574            #[test]
1575            fn him_is_pronoun() {
1576                assert!(md("him").is_pronoun());
1577            }
1578            #[test]
1579            fn him_is_personal_pronoun() {
1580                assert!(md("him").is_personal_pronoun());
1581            }
1582            #[test]
1583            fn him_is_singular_pronoun() {
1584                assert!(md("him").is_singular_pronoun());
1585            }
1586            #[test]
1587            fn him_is_object_pronoun() {
1588                assert!(md("him").is_object_pronoun());
1589            }
1590
1591            #[test]
1592            fn himself_is_pronoun() {
1593                assert!(md("himself").is_pronoun());
1594            }
1595            #[test]
1596            fn himself_is_personal_pronoun() {
1597                assert!(md("himself").is_personal_pronoun());
1598            }
1599            #[test]
1600            fn himself_is_singular_pronoun() {
1601                assert!(md("himself").is_singular_pronoun());
1602            }
1603            #[test]
1604            fn himself_is_reflexive_pronoun() {
1605                assert!(md("himself").is_reflexive_pronoun());
1606            }
1607        }
1608
1609        mod she_her_herself {
1610            use crate::dict_word_metadata::tests::md;
1611
1612            #[test]
1613            fn she_is_pronoun() {
1614                assert!(md("she").is_pronoun());
1615            }
1616            #[test]
1617            fn she_is_personal_pronoun() {
1618                assert!(md("she").is_personal_pronoun());
1619            }
1620            #[test]
1621            fn she_is_singular_pronoun() {
1622                assert!(md("she").is_singular_pronoun());
1623            }
1624            #[test]
1625            fn she_is_subject_pronoun() {
1626                assert!(md("she").is_subject_pronoun());
1627            }
1628
1629            #[test]
1630            fn her_is_pronoun() {
1631                assert!(md("her").is_pronoun());
1632            }
1633            #[test]
1634            fn her_is_personal_pronoun() {
1635                assert!(md("her").is_personal_pronoun());
1636            }
1637            #[test]
1638            fn her_is_singular_pronoun() {
1639                assert!(md("her").is_singular_pronoun());
1640            }
1641            #[test]
1642            fn her_is_object_pronoun() {
1643                assert!(md("her").is_object_pronoun());
1644            }
1645
1646            #[test]
1647            fn herself_is_pronoun() {
1648                assert!(md("herself").is_pronoun());
1649            }
1650            #[test]
1651            fn herself_is_personal_pronoun() {
1652                assert!(md("herself").is_personal_pronoun());
1653            }
1654            #[test]
1655            fn herself_is_singular_pronoun() {
1656                assert!(md("herself").is_singular_pronoun());
1657            }
1658            #[test]
1659            fn herself_is_reflexive_pronoun() {
1660                assert!(md("herself").is_reflexive_pronoun());
1661            }
1662        }
1663
1664        mod it_itself {
1665            use crate::dict_word_metadata::tests::md;
1666
1667            #[test]
1668            fn it_is_pronoun() {
1669                assert!(md("it").is_pronoun());
1670            }
1671            #[test]
1672            fn it_is_personal_pronoun() {
1673                assert!(md("it").is_personal_pronoun());
1674            }
1675            #[test]
1676            fn it_is_singular_pronoun() {
1677                assert!(md("it").is_singular_pronoun());
1678            }
1679            #[test]
1680            fn it_is_subject_pronoun() {
1681                assert!(md("it").is_subject_pronoun());
1682            }
1683            #[test]
1684            fn it_is_object_pronoun() {
1685                assert!(md("it").is_object_pronoun());
1686            }
1687
1688            #[test]
1689            fn itself_is_pronoun() {
1690                assert!(md("itself").is_pronoun());
1691            }
1692            #[test]
1693            fn itself_is_personal_pronoun() {
1694                assert!(md("itself").is_personal_pronoun());
1695            }
1696            #[test]
1697            fn itself_is_singular_pronoun() {
1698                assert!(md("itself").is_singular_pronoun());
1699            }
1700            #[test]
1701            fn itself_is_reflexive_pronoun() {
1702                assert!(md("itself").is_reflexive_pronoun());
1703            }
1704        }
1705
1706        mod they_them_themselves {
1707            use crate::dict_word_metadata::tests::md;
1708
1709            #[test]
1710            fn they_is_pronoun() {
1711                assert!(md("they").is_pronoun());
1712            }
1713            #[test]
1714            fn they_is_personal_pronoun() {
1715                assert!(md("they").is_personal_pronoun());
1716            }
1717            #[test]
1718            fn they_is_plural_pronoun() {
1719                assert!(md("they").is_plural_pronoun());
1720            }
1721            #[test]
1722            fn they_is_subject_pronoun() {
1723                assert!(md("they").is_subject_pronoun());
1724            }
1725
1726            #[test]
1727            fn them_is_pronoun() {
1728                assert!(md("them").is_pronoun());
1729            }
1730            #[test]
1731            fn them_is_personal_pronoun() {
1732                assert!(md("them").is_personal_pronoun());
1733            }
1734            #[test]
1735            fn them_is_plural_pronoun() {
1736                assert!(md("them").is_plural_pronoun());
1737            }
1738            #[test]
1739            fn them_is_object_pronoun() {
1740                assert!(md("them").is_object_pronoun());
1741            }
1742
1743            #[test]
1744            fn themselves_is_pronoun() {
1745                assert!(md("themselves").is_pronoun());
1746            }
1747            #[test]
1748            fn themselves_is_personal_pronoun() {
1749                assert!(md("themselves").is_personal_pronoun());
1750            }
1751            #[test]
1752            fn themselves_is_plural_pronoun() {
1753                assert!(md("themselves").is_plural_pronoun());
1754            }
1755            #[test]
1756            fn themselves_is_reflexive_pronoun() {
1757                assert!(md("themselves").is_reflexive_pronoun());
1758            }
1759        }
1760
1761        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1762        #[test]
1763        fn mine_is_pronoun() {
1764            assert!(md("mine").is_pronoun());
1765        }
1766        #[test]
1767        fn ours_is_pronoun() {
1768            assert!(md("ours").is_pronoun());
1769        }
1770        #[test]
1771        fn yours_is_pronoun() {
1772            assert!(md("yours").is_pronoun());
1773        }
1774        #[test]
1775        fn his_is_pronoun() {
1776            assert!(md("his").is_pronoun());
1777        }
1778        #[test]
1779        fn hers_is_pronoun() {
1780            assert!(md("hers").is_pronoun());
1781        }
1782        #[test]
1783        fn its_is_pronoun() {
1784            assert!(md("its").is_pronoun());
1785        }
1786        #[test]
1787        fn theirs_is_pronoun() {
1788            assert!(md("theirs").is_pronoun());
1789        }
1790
1791        // archaic pronouns
1792        #[test]
1793        fn archaic_pronouns() {
1794            assert!(md("thou").is_pronoun());
1795            assert!(md("thee").is_pronoun());
1796            assert!(md("thyself").is_pronoun());
1797            assert!(md("thine").is_pronoun());
1798        }
1799
1800        // generic pronouns
1801        #[test]
1802        fn generic_pronouns() {
1803            assert!(md("one").is_pronoun());
1804            assert!(md("oneself").is_pronoun());
1805        }
1806
1807        // relative and interrogative pronouns
1808        #[test]
1809        fn relative_and_interrogative_pronouns() {
1810            assert!(md("who").is_pronoun());
1811            assert!(md("whom").is_pronoun());
1812            assert!(md("whose").is_pronoun());
1813            assert!(md("which").is_pronoun());
1814            assert!(md("what").is_pronoun());
1815        }
1816
1817        // nonstandard pronouns
1818        #[test]
1819        #[ignore = "not in dictionary"]
1820        fn nonstandard_pronouns() {
1821            assert!(md("themself").pronoun.is_some());
1822            assert!(md("y'all'").pronoun.is_some());
1823        }
1824    }
1825
1826    mod adjective {
1827        use crate::{Degree, dict_word_metadata::tests::md};
1828
1829        // Getting degrees
1830
1831        #[test]
1832        #[ignore = "not marked yet because it might not be reliable"]
1833        fn big_is_positive() {
1834            assert_eq!(md("big").get_degree(), Some(Degree::Positive));
1835        }
1836
1837        #[test]
1838        fn bigger_is_comparative() {
1839            assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
1840        }
1841
1842        #[test]
1843        fn biggest_is_superlative() {
1844            assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
1845        }
1846
1847        #[test]
1848        #[should_panic(expected = "Word 'bigly' not found in dictionary")]
1849        fn bigly_is_not_an_adjective_form_we_track() {
1850            assert_eq!(md("bigly").get_degree(), None);
1851        }
1852
1853        // Calling is_ methods
1854
1855        // TODO: positive degree not implemented
1856
1857        #[test]
1858        fn bigger_is_comparative_adjective() {
1859            assert!(md("bigger").is_comparative_adjective());
1860        }
1861
1862        #[test]
1863        fn biggest_is_superlative_adjective() {
1864            assert!(md("biggest").is_superlative_adjective());
1865        }
1866    }
1867
1868    #[test]
1869    fn the_is_determiner() {
1870        assert!(md("the").is_determiner());
1871    }
1872    #[test]
1873    fn this_is_demonstrative_determiner() {
1874        assert!(md("this").is_demonstrative_determiner());
1875    }
1876    #[test]
1877    fn your_is_possessive_determiner() {
1878        assert!(md("your").is_possessive_determiner());
1879    }
1880
1881    #[test]
1882    fn every_is_quantifier() {
1883        assert!(md("every").is_quantifier());
1884    }
1885
1886    #[test]
1887    fn the_isnt_quantifier() {
1888        assert!(!md("the").is_quantifier());
1889    }
1890
1891    #[test]
1892    fn equipment_is_mass_noun() {
1893        assert!(md("equipment").is_mass_noun());
1894    }
1895
1896    #[test]
1897    fn equipment_is_non_countable_noun() {
1898        assert!(md("equipment").is_non_countable_noun());
1899    }
1900
1901    #[test]
1902    fn equipment_isnt_countable_noun() {
1903        assert!(!md("equipment").is_countable_noun());
1904    }
1905
1906    mod verb {
1907        use crate::dict_word_metadata::tests::md;
1908
1909        #[test]
1910        fn lemma_walk() {
1911            let md = md("walk");
1912            assert!(md.is_verb_lemma())
1913        }
1914
1915        #[test]
1916        fn lemma_fix() {
1917            let md = md("fix");
1918            assert!(md.is_verb_lemma())
1919        }
1920
1921        #[test]
1922        fn progressive_walking() {
1923            let md = md("walking");
1924            assert!(md.is_verb_progressive_form())
1925        }
1926
1927        #[test]
1928        fn past_walked() {
1929            let md = md("walked");
1930            assert!(md.is_verb_past_form())
1931        }
1932
1933        #[test]
1934        fn simple_past_ate() {
1935            let md = md("ate");
1936            assert!(md.is_verb_simple_past_form())
1937        }
1938
1939        #[test]
1940        fn past_participle_eaten() {
1941            let md = md("eaten");
1942            assert!(md.is_verb_past_participle_form())
1943        }
1944
1945        #[test]
1946        fn third_pers_sing_walks() {
1947            let md = md("walks");
1948            assert!(md.is_verb_third_person_singular_present_form())
1949        }
1950    }
1951}