harper_core/
word_metadata.rs

1use harper_brill::UPOS;
2use is_macro::Is;
3use itertools::Itertools;
4use paste::paste;
5use serde::{Deserialize, Serialize};
6use strum::{EnumCount, VariantArray};
7use strum_macros::{Display, EnumCount, EnumString, VariantArray};
8
9use std::convert::TryFrom;
10
11use crate::{Document, TokenKind, TokenStringExt, WordId};
12
13#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
14pub struct WordMetadata {
15    pub noun: Option<NounData>,
16    pub pronoun: Option<PronounData>,
17    pub verb: Option<VerbData>,
18    pub adjective: Option<AdjectiveData>,
19    pub adverb: Option<AdverbData>,
20    pub conjunction: Option<ConjunctionData>,
21    pub swear: Option<bool>,
22    /// The dialects this word belongs to.
23    /// If no dialects are defined, it can be assumed that the word is
24    /// valid in all dialects of English.
25    #[serde(default = "default_default")]
26    pub dialects: DialectFlags,
27    /// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
28    pub determiner: Option<DeterminerData>,
29    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
30    #[serde(default = "default_false")]
31    pub preposition: bool,
32    /// Whether the word is considered especially common.
33    #[serde(default = "default_false")]
34    pub common: bool,
35    #[serde(default = "default_none")]
36    pub derived_from: Option<WordId>,
37    /// Generated by a chunker
38    pub np_member: Option<bool>,
39    /// Generated by a POS tagger
40    pub pos_tag: Option<UPOS>,
41}
42
43/// Needed for `serde`
44fn default_false() -> bool {
45    false
46}
47
48/// Needed for `serde`
49fn default_none<T>() -> Option<T> {
50    None
51}
52
53/// Needed for `serde`
54fn default_default<T: Default>() -> T {
55    T::default()
56}
57
58macro_rules! generate_metadata_queries {
59    ($($category:ident has $($sub:ident),*).*) => {
60        paste! {
61            pub fn is_likely_homograph(&self) -> bool {
62                [self.is_determiner(), self.preposition, $(
63                    self.[< is_ $category >](),
64                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
65            }
66
67            $(
68                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
69                pub fn [< is_ $category >](&self) -> bool {
70                    self.$category.is_some()
71                }
72
73                $(
74                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
75                    pub fn [< is_ $sub _ $category >](&self) -> bool {
76                        matches!(
77                            self.$category,
78                            Some([< $category:camel Data >]{
79                                [< is_ $sub >]: Some(true),
80                                ..
81                            })
82                        ) }
83
84                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
85                    pub fn [< is_non_ $sub _ $category >](&self) -> bool {
86                        matches!(
87                            self.$category,
88                            Some([< $category:camel Data >]{
89                                [< is_ $sub >]: None | Some(false),
90                                ..
91                            })
92                        )
93                    }
94                )*
95            )*
96        }
97    };
98}
99
100impl WordMetadata {
101    /// Produce a copy of `self` with the known properties of `other` set.
102    pub fn or(&self, other: &Self) -> Self {
103        macro_rules! merge {
104            ($a:expr, $b:expr) => {
105                match ($a, $b) {
106                    (Some(a), Some(b)) => Some(a.or(&b)),
107                    (Some(a), None) => Some(a),
108                    (None, Some(b)) => Some(b),
109                    (None, None) => None,
110                }
111            };
112        }
113
114        Self {
115            noun: merge!(self.noun, other.noun),
116            pronoun: merge!(self.pronoun, other.pronoun),
117            verb: merge!(self.verb, other.verb),
118            adjective: merge!(self.adjective, other.adjective),
119            adverb: merge!(self.adverb, other.adverb),
120            conjunction: merge!(self.conjunction, other.conjunction),
121            dialects: self.dialects | other.dialects,
122            swear: self.swear.or(other.swear),
123            determiner: merge!(self.determiner, other.determiner),
124            preposition: self.preposition || other.preposition,
125            common: self.common || other.common,
126            derived_from: self.derived_from.or(other.derived_from),
127            pos_tag: self.pos_tag.or(other.pos_tag),
128            np_member: self.np_member.or(other.np_member),
129        }
130    }
131
132    /// Given a UPOS tag, discard any metadata that would disagree with the given POS tag.
133    /// For example, if the metadata suggests a word could either be a noun or an adjective, and we
134    /// provide a [`UPOS::NOUN`], this function will remove the adjective data.
135    ///
136    /// Additionally, if the metadata does not currently declare the potential of the word to be
137    /// the specific POS, it becomes so. That means if we provide a [`UPOS::ADJ`] to the function
138    /// for a metadata whose `Self::adjective = None`, it will become `Some`.
139    pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
140        use UPOS::*;
141        match pos {
142            NOUN => {
143                if let Some(noun) = self.noun {
144                    self.noun = Some(NounData {
145                        is_proper: Some(false),
146                        ..noun
147                    })
148                } else {
149                    self.noun = Some(NounData {
150                        is_proper: Some(false),
151                        is_singular: None,
152                        is_plural: None,
153                        is_possessive: None,
154                    })
155                }
156
157                self.pronoun = None;
158                self.verb = None;
159                self.adjective = None;
160                self.adverb = None;
161                self.conjunction = None;
162                self.determiner = None;
163                self.preposition = false;
164            }
165            PROPN => {
166                if let Some(noun) = self.noun {
167                    self.noun = Some(NounData {
168                        is_proper: Some(true),
169                        ..noun
170                    })
171                } else {
172                    self.noun = Some(NounData {
173                        is_proper: Some(true),
174                        is_singular: None,
175                        is_plural: None,
176                        is_possessive: None,
177                    })
178                }
179
180                self.pronoun = None;
181                self.verb = None;
182                self.adjective = None;
183                self.adverb = None;
184                self.conjunction = None;
185                self.determiner = None;
186                self.preposition = false;
187            }
188            PRON => {
189                if self.pronoun.is_none() {
190                    self.pronoun = Some(PronounData::default())
191                }
192
193                self.noun = None;
194                self.verb = None;
195                self.adjective = None;
196                self.adverb = None;
197                self.conjunction = None;
198                self.determiner = None;
199                self.preposition = false;
200            }
201            VERB => {
202                if let Some(verb) = self.verb {
203                    self.verb = Some(VerbData {
204                        is_auxiliary: Some(false),
205                        ..verb
206                    })
207                } else {
208                    self.verb = Some(VerbData {
209                        is_auxiliary: Some(false),
210                        ..Default::default()
211                    })
212                }
213
214                self.noun = None;
215                self.pronoun = None;
216                self.adjective = None;
217                self.adverb = None;
218                self.conjunction = None;
219                self.determiner = None;
220                self.preposition = false;
221            }
222            AUX => {
223                if let Some(verb) = self.verb {
224                    self.verb = Some(VerbData {
225                        is_auxiliary: Some(true),
226                        ..verb
227                    })
228                } else {
229                    self.verb = Some(VerbData {
230                        is_auxiliary: Some(true),
231                        ..Default::default()
232                    })
233                }
234
235                self.noun = None;
236                self.pronoun = None;
237                self.adjective = None;
238                self.adverb = None;
239                self.conjunction = None;
240                self.determiner = None;
241                self.preposition = false;
242            }
243            ADJ => {
244                if self.adjective.is_none() {
245                    self.adjective = Some(AdjectiveData::default())
246                }
247
248                self.noun = None;
249                self.pronoun = None;
250                self.verb = None;
251                self.adverb = None;
252                self.conjunction = None;
253                self.determiner = None;
254                self.preposition = false;
255            }
256            ADV => {
257                if self.adverb.is_none() {
258                    self.adverb = Some(AdverbData::default())
259                }
260
261                self.noun = None;
262                self.pronoun = None;
263                self.verb = None;
264                self.adjective = None;
265                self.conjunction = None;
266                self.determiner = None;
267                self.preposition = false;
268            }
269            ADP => {
270                self.noun = None;
271                self.pronoun = None;
272                self.verb = None;
273                self.adjective = None;
274                self.adverb = None;
275                self.conjunction = None;
276                self.determiner = None;
277                self.preposition = true;
278            }
279            DET => {
280                self.noun = None;
281                self.pronoun = None;
282                self.verb = None;
283                self.adjective = None;
284                self.adverb = None;
285                self.conjunction = None;
286                self.preposition = false;
287                self.determiner = Some(DeterminerData::default());
288            }
289            CCONJ | SCONJ => {
290                if self.conjunction.is_none() {
291                    self.conjunction = Some(ConjunctionData::default())
292                }
293
294                self.noun = None;
295                self.pronoun = None;
296                self.verb = None;
297                self.adjective = None;
298                self.adverb = None;
299                self.determiner = None;
300                self.preposition = false;
301            }
302            _ => {}
303        }
304    }
305
306    generate_metadata_queries!(
307        noun has proper, plural, possessive.
308        pronoun has personal, singular, plural, possessive, reflexive, object.
309        determiner has demonstrative, possessive.
310        verb has linking, auxiliary.
311        conjunction has.
312        adjective has.
313        adverb has
314    );
315
316    pub fn is_first_person_plural_pronoun(&self) -> bool {
317        matches!(
318            self.pronoun,
319            Some(PronounData {
320                person: Some(Person::First),
321                is_plural: Some(true),
322                ..
323            })
324        )
325    }
326
327    pub fn is_first_person_singular_pronoun(&self) -> bool {
328        matches!(
329            self.pronoun,
330            Some(PronounData {
331                person: Some(Person::First),
332                is_singular: Some(true),
333                ..
334            })
335        )
336    }
337
338    pub fn is_third_person_plural_pronoun(&self) -> bool {
339        matches!(
340            self.pronoun,
341            Some(PronounData {
342                person: Some(Person::Third),
343                is_plural: Some(true),
344                ..
345            })
346        )
347    }
348
349    pub fn is_third_person_singular_pronoun(&self) -> bool {
350        matches!(
351            self.pronoun,
352            Some(PronounData {
353                person: Some(Person::Third),
354                is_singular: Some(true),
355                ..
356            })
357        )
358    }
359
360    pub fn is_third_person_pronoun(&self) -> bool {
361        matches!(
362            self.pronoun,
363            Some(PronounData {
364                person: Some(Person::Third),
365                ..
366            })
367        )
368    }
369
370    pub fn is_second_person_pronoun(&self) -> bool {
371        matches!(
372            self.pronoun,
373            Some(PronounData {
374                person: Some(Person::Second),
375                ..
376            })
377        )
378    }
379
380    pub fn is_verb_lemma(&self) -> bool {
381        matches!(
382            self.verb,
383            Some(VerbData {
384                verb_form: Some(VerbForm::LemmaForm),
385                ..
386            })
387        )
388    }
389
390    pub fn is_verb_past_form(&self) -> bool {
391        matches!(
392            self.verb,
393            Some(VerbData {
394                verb_form: Some(VerbForm::PastForm),
395                ..
396            })
397        )
398    }
399
400    pub fn is_verb_progressive_form(&self) -> bool {
401        matches!(
402            self.verb,
403            Some(VerbData {
404                verb_form: Some(VerbForm::ProgressiveForm),
405                ..
406            })
407        )
408    }
409
410    pub fn is_verb_third_person_singular_present_form(&self) -> bool {
411        matches!(
412            self.verb,
413            Some(VerbData {
414                verb_form: Some(VerbForm::ThirdPersonSingularPresentForm),
415                ..
416            })
417        )
418    }
419
420    /// Checks if the word is definitely nominal.
421    pub fn is_nominal(&self) -> bool {
422        self.noun.is_some() || self.pronoun.is_some()
423    }
424
425    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) singular.
426    pub fn is_singular_nominal(&self) -> bool {
427        matches!(
428            self.noun,
429            Some(NounData {
430                is_singular: None | Some(true),
431                ..
432            })
433        ) || matches!(
434            self.pronoun,
435            Some(PronounData {
436                is_singular: None | Some(true),
437                ..
438            })
439        )
440    }
441
442    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
443    pub fn is_plural_nominal(&self) -> bool {
444        matches!(
445            self.noun,
446            Some(NounData {
447                is_plural: Some(true),
448                ..
449            })
450        ) || matches!(
451            self.pronoun,
452            Some(PronounData {
453                is_plural: Some(true),
454                ..
455            })
456        )
457    }
458
459    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
460    pub fn is_possessive_nominal(&self) -> bool {
461        matches!(
462            self.noun,
463            Some(NounData {
464                is_possessive: Some(true),
465                ..
466            })
467        ) || matches!(
468            self.pronoun,
469            Some(PronounData {
470                is_possessive: Some(true),
471                ..
472            })
473        )
474    }
475
476    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) singular.
477    pub fn is_non_singular_nominal(&self) -> bool {
478        matches!(
479            self.noun,
480            Some(NounData {
481                is_singular: Some(false),
482                ..
483            })
484        ) || matches!(
485            self.pronoun,
486            Some(PronounData {
487                is_singular: Some(false),
488                ..
489            })
490        )
491    }
492
493    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
494    pub fn is_non_plural_nominal(&self) -> bool {
495        matches!(
496            self.noun,
497            Some(NounData {
498                is_plural: None | Some(false),
499                ..
500            })
501        ) || matches!(
502            self.pronoun,
503            Some(PronounData {
504                is_plural: None | Some(false),
505                ..
506            })
507        )
508    }
509
510    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
511    pub fn is_non_possessive_nominal(&self) -> bool {
512        matches!(
513            self.noun,
514            Some(NounData {
515                is_possessive: Some(false),
516                ..
517            })
518        ) && matches!(
519            self.pronoun,
520            Some(PronounData {
521                is_possessive: Some(false),
522                ..
523            })
524        )
525    }
526
527    /// Checks whether a word is _definitely_ a swear.
528    pub fn is_swear(&self) -> bool {
529        matches!(self.swear, Some(true))
530    }
531
532    /// Same thing as [`Self::or`], except in-place rather than a clone.
533    pub fn append(&mut self, other: &Self) -> &mut Self {
534        *self = self.or(other);
535        self
536    }
537}
538
539// These verb forms are morphological variations, distinct from TAM (Tense-Aspect-Mood)
540// Each form can be used in various TAM combinations:
541// - Lemma form (infinitive, citation form, dictionary form)
542//   Used in infinitives (e.g., "to sleep"), imperatives (e.g., "sleep!"), and with modals (e.g., "will sleep")
543// - Past form (past participle and simple past)
544//   Used as verbs (e.g., "slept") or adjectives (e.g., "closed door")
545// - Progressive form (present participle and gerund)
546//   Used as verbs (e.g., "sleeping"), nouns (e.g., "sleeping is important"), or adjectives (e.g., "sleeping dog")
547// - Third person singular present (-s/-es)
548//   Used for third person singular subjects (e.g., "he sleeps", "she reads")
549//
550// Important notes:
551// 1. English expresses time through auxiliary verbs, not verb form alone
552// 2. Irregular verbs can have different forms for past participle and simple past
553// 3. Future is always expressed through auxiliary verbs (e.g., "will sleep", "going to sleep")
554#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
555pub enum VerbForm {
556    LemmaForm,
557    PastForm,
558    ProgressiveForm,
559    ThirdPersonSingularPresentForm,
560}
561
562#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
563pub struct VerbData {
564    pub is_linking: Option<bool>,
565    pub is_auxiliary: Option<bool>,
566    pub verb_form: Option<VerbForm>,
567}
568
569impl VerbData {
570    /// Produce a copy of `self` with the known properties of `other` set.
571    pub fn or(&self, other: &Self) -> Self {
572        Self {
573            is_linking: self.is_linking.or(other.is_linking),
574            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
575            verb_form: self.verb_form.or(other.verb_form),
576        }
577    }
578}
579
580// nouns can be both singular and plural: "aircraft", "biceps", "fish", "sheep"
581// TODO other noun properties may be worth adding:
582// TODO count vs mass; abstract
583#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
584pub struct NounData {
585    pub is_proper: Option<bool>,
586    pub is_singular: Option<bool>,
587    pub is_plural: Option<bool>,
588    pub is_possessive: Option<bool>,
589}
590
591impl NounData {
592    /// Produce a copy of `self` with the known properties of `other` set.
593    pub fn or(&self, other: &Self) -> Self {
594        Self {
595            is_proper: self.is_proper.or(other.is_proper),
596            is_singular: self.is_singular.or(other.is_singular),
597            is_plural: self.is_plural.or(other.is_plural),
598            is_possessive: self.is_possessive.or(other.is_possessive),
599        }
600    }
601}
602
603// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
604#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
605pub enum Person {
606    First,
607    Second,
608    Third,
609}
610
611// TODO for now focused on personal pronouns?
612#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
613pub struct PronounData {
614    pub is_personal: Option<bool>,
615    pub is_singular: Option<bool>,
616    pub is_plural: Option<bool>,
617    pub is_possessive: Option<bool>,
618    pub is_reflexive: Option<bool>,
619    pub person: Option<Person>,
620    pub is_subject: Option<bool>,
621    pub is_object: Option<bool>,
622}
623
624impl PronounData {
625    /// Produce a copy of `self` with the known properties of `other` set.
626    pub fn or(&self, other: &Self) -> Self {
627        Self {
628            is_personal: self.is_personal.or(other.is_personal),
629            is_singular: self.is_singular.or(other.is_singular),
630            is_plural: self.is_plural.or(other.is_plural),
631            is_possessive: self.is_possessive.or(other.is_possessive),
632            is_reflexive: self.is_reflexive.or(other.is_reflexive),
633            person: self.person.or(other.person),
634            is_subject: self.is_subject.or(other.is_subject),
635            is_object: self.is_object.or(other.is_object),
636        }
637    }
638}
639
640#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
641pub struct DeterminerData {
642    pub is_demonstrative: Option<bool>,
643    pub is_possessive: Option<bool>,
644}
645
646impl DeterminerData {
647    /// Produce a copy of `self` with the known properties of `other` set.
648    pub fn or(&self, other: &Self) -> Self {
649        Self {
650            is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
651            is_possessive: self.is_possessive.or(other.is_possessive),
652        }
653    }
654}
655
656// Degree is a property of adjectives: positive is not inflected
657// Comparative is inflected with -er or comes after the word "more"
658// Superlative is inflected with -est or comes after the word "most"
659#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
660pub enum Degree {
661    Positive,
662    Comparative,
663    Superlative,
664}
665
666// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
667// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
668// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
669#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
670pub struct AdjectiveData {
671    pub degree: Option<Degree>,
672}
673
674impl AdjectiveData {
675    /// Produce a copy of `self` with the known properties of `other` set.
676    pub fn or(&self, other: &Self) -> Self {
677        Self {
678            degree: self.degree.or(other.degree),
679        }
680    }
681}
682
683// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
684// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
685// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
686#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
687pub struct AdverbData {}
688
689impl AdverbData {
690    /// Produce a copy of `self` with the known properties of `other` set.
691    pub fn or(&self, _other: &Self) -> Self {
692        Self {}
693    }
694}
695
696#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
697pub struct ConjunctionData {}
698
699impl ConjunctionData {
700    /// Produce a copy of `self` with the known properties of `other` set.
701    pub fn or(&self, _other: &Self) -> Self {
702        Self {}
703    }
704}
705
706/// A regional dialect.
707#[derive(
708    Debug,
709    Clone,
710    Copy,
711    Serialize,
712    Deserialize,
713    PartialEq,
714    PartialOrd,
715    Eq,
716    Hash,
717    EnumCount,
718    EnumString,
719    Display,
720    VariantArray,
721)]
722pub enum Dialect {
723    // Note: these have bit-shifted values so that they can ergonomically integrate with
724    // `DialectFlags`. Each value here must have a unique bit index inside
725    // `DialectsUnderlyingType`.
726    American = 1 << 0,
727    Canadian = 1 << 1,
728    Australian = 1 << 2,
729    British = 1 << 3,
730}
731impl Dialect {
732    /// Tries to guess the dialect used in the document by finding which dialect is used the most.
733    /// Returns `None` if it fails to find a single dialect that is used the most.
734    #[must_use]
735    pub fn try_guess_from_document(document: &Document) -> Option<Self> {
736        Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
737    }
738
739    /// Tries to get a dialect from its abbreviation. Returns `None` if the abbreviation is not
740    /// recognized.
741    ///
742    /// # Examples
743    ///
744    /// ```
745    /// use harper_core::Dialect;
746    ///
747    /// let abbrs = ["US", "CA", "AU", "GB"];
748    /// let mut dialects = abbrs.iter().map(|abbr| Dialect::try_from_abbr(abbr));
749    ///
750    /// assert_eq!(Some(Dialect::American), dialects.next().unwrap()); // US
751    /// assert_eq!(Some(Dialect::Canadian), dialects.next().unwrap()); // CA
752    /// assert_eq!(Some(Dialect::Australian), dialects.next().unwrap()); // AU
753    /// assert_eq!(Some(Dialect::British), dialects.next().unwrap()); // GB
754    /// ```
755    #[must_use]
756    pub fn try_from_abbr(abbr: &str) -> Option<Self> {
757        match abbr {
758            "US" => Some(Self::American),
759            "CA" => Some(Self::Canadian),
760            "AU" => Some(Self::Australian),
761            "GB" => Some(Self::British),
762            _ => None,
763        }
764    }
765}
766impl TryFrom<DialectFlags> for Dialect {
767    type Error = ();
768
769    /// Attempts to convert `DialectFlags` to a single `Dialect`.
770    ///
771    /// # Errors
772    ///
773    /// Will return `Err` if more than one dialect is enabled or if an undefined dialect is
774    /// enabled.
775    fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
776        // Ensure only one dialect is enabled before converting.
777        if dialect_flags.bits().count_ones() == 1 {
778            match dialect_flags {
779                df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
780                df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
781                df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
782                df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
783                _ => Err(()),
784            }
785        } else {
786            // More than one dialect enabled; can't soundly convert.
787            Err(())
788        }
789    }
790}
791
792// The underlying type used for DialectFlags.
793// At the time of writing, this is currently a `u8`. If we want to define more than 8 dialects in
794// the future, we will need to switch this to a larger type.
795type DialectFlagsUnderlyingType = u8;
796
797bitflags::bitflags! {
798    /// A collection of bit flags used to represent enabled dialects.
799    ///
800    /// This is generally used to allow a word (or similar) to be tagged with multiple dialects.
801    #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
802    #[serde(transparent)]
803    pub struct DialectFlags: DialectFlagsUnderlyingType {
804        const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
805        const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
806        const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
807        const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
808    }
809}
810impl DialectFlags {
811    /// Checks if the provided dialect is enabled.
812    /// If no dialect is explicitly enabled, it is assumed that all dialects are enabled.
813    #[must_use]
814    pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
815        self.is_empty() || self.intersects(Self::from_dialect(dialect))
816    }
817
818    /// Checks if the provided dialect is ***explicitly*** enabled.
819    ///
820    /// Unlike `is_dialect_enabled`, this will return false when no dialects are explicitly
821    /// enabled.
822    #[must_use]
823    pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
824        self.intersects(Self::from_dialect(dialect))
825    }
826
827    /// Constructs a `DialectFlags` from the provided `Dialect`, with only that dialect being
828    /// enabled.
829    ///
830    /// # Panics
831    ///
832    /// This will panic if `dialect` represents a dialect that is not defined in
833    /// `DialectFlags`.
834    #[must_use]
835    pub fn from_dialect(dialect: Dialect) -> Self {
836        let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
837            panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
838        };
839        out
840    }
841
842    /// Gets the most commonly used dialect(s) in the document.
843    ///
844    /// If multiple dialects are used equally often, they will all be enabled in the returned
845    /// `DialectFlags`. On the other hand, if there is a single dialect that is used the most, it
846    /// will be the only one enabled.
847    #[must_use]
848    pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
849        // Initialize counters.
850        let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
851            .iter()
852            .map(|d| (*d, 0))
853            .collect_array()
854            .unwrap();
855
856        // Count word dialects.
857        document.iter_words().for_each(|w| {
858            if let TokenKind::Word(Some(word_metadata)) = &w.kind {
859                // If the token is a word, iterate though the dialects in `dialect_counters` and
860                // increment those counters where the word has the respective dialect enabled.
861                dialect_counters.iter_mut().for_each(|(dialect, count)| {
862                    if word_metadata.dialects.is_dialect_enabled(*dialect) {
863                        *count += 1;
864                    }
865                });
866            }
867        });
868
869        // Find max counter.
870        let max_counter = dialect_counters
871            .iter()
872            .map(|(_, count)| count)
873            .max()
874            .unwrap();
875        // Get and convert the collection of most used dialects into a `DialectFlags`.
876        dialect_counters
877            .into_iter()
878            .filter(|(_, count)| count == max_counter)
879            .fold(DialectFlags::empty(), |acc, dialect| {
880                // Fold most used dialects into `DialectFlags` via bitwise or.
881                acc | Self::from_dialect(dialect.0)
882            })
883    }
884}
885impl Default for DialectFlags {
886    /// A default value with no dialects explicitly enabled.
887    /// Implicitly, this state corresponds to all dialects being enabled.
888    fn default() -> Self {
889        Self::empty()
890    }
891}
892
893#[cfg(test)]
894mod tests {
895    use crate::{Dictionary, FstDictionary, WordMetadata};
896
897    // Helper function to get word metadata from the curated dictionary
898    fn md(word: &str) -> WordMetadata {
899        FstDictionary::curated()
900            .get_word_metadata_str(word)
901            .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
902            .clone()
903    }
904
905    mod dialect {
906        use super::super::{Dialect, DialectFlags};
907        use crate::Document;
908
909        #[test]
910        fn guess_british_dialect() {
911            let document = Document::new_plain_english_curated("Aluminium was used.");
912            let df = DialectFlags::get_most_used_dialects_from_document(&document);
913            assert!(
914                df.is_dialect_enabled_strict(Dialect::British)
915                    && !df.is_dialect_enabled_strict(Dialect::American)
916            );
917        }
918
919        #[test]
920        fn guess_american_dialect() {
921            let document = Document::new_plain_english_curated("Aluminum was used.");
922            let df = DialectFlags::get_most_used_dialects_from_document(&document);
923            assert!(
924                df.is_dialect_enabled_strict(Dialect::American)
925                    && !df.is_dialect_enabled_strict(Dialect::British)
926            );
927        }
928    }
929
930    mod noun {
931        use crate::word_metadata::tests::md;
932
933        #[test]
934        fn puppy_is_noun() {
935            assert!(md("puppy").noun.is_some());
936        }
937
938        #[test]
939        fn prepare_is_not_noun() {
940            assert!(md("prepare").noun.is_none());
941        }
942
943        #[test]
944        fn paris_is_proper_noun() {
945            assert!(md("Paris").noun.unwrap().is_proper.unwrap());
946        }
947
948        #[test]
949        fn permit_is_not_proper_noun() {
950            assert!(matches!(
951                md("lapdog").noun.and_then(|n| n.is_proper),
952                None | Some(false)
953            ));
954        }
955
956        #[test]
957        #[ignore = "noun singular property not implemented yet"]
958        fn hound_is_singular_noun() {
959            assert!(md("hound").noun.unwrap().is_singular.unwrap());
960        }
961
962        #[test]
963        fn pooches_is_not_singular_noun() {
964            assert!(matches!(
965                md("pooches").noun.and_then(|n| n.is_singular),
966                None | Some(false)
967            ));
968        }
969
970        #[test]
971        fn hounds_is_plural_noun() {
972            assert!(md("hounds").noun.unwrap().is_plural.unwrap());
973        }
974
975        #[test]
976        fn pooch_is_not_plural_noun() {
977            assert!(matches!(
978                md("pooch").noun.and_then(|n| n.is_plural),
979                None | Some(false)
980            ));
981        }
982
983        #[test]
984        #[ignore = "noun singular property not implemented yet"]
985        fn fish_is_singular_noun() {
986            assert!(md("fish").noun.unwrap().is_singular.unwrap());
987        }
988
989        #[test]
990        fn fish_is_plural_noun() {
991            assert!(md("fish").noun.unwrap().is_plural.unwrap());
992        }
993
994        #[test]
995        fn fishes_is_plural_noun() {
996            assert!(md("fishes").noun.unwrap().is_plural.unwrap());
997        }
998
999        #[test]
1000        #[ignore = "noun singular property not implemented yet"]
1001        fn sheep_is_singular_noun() {
1002            assert!(md("sheep").noun.unwrap().is_singular.unwrap());
1003        }
1004
1005        #[test]
1006        fn sheep_is_plural_noun() {
1007            assert!(md("sheep").noun.unwrap().is_plural.unwrap());
1008        }
1009
1010        #[test]
1011        #[should_panic]
1012        fn sheeps_is_not_word() {
1013            md("sheeps");
1014        }
1015
1016        #[test]
1017        #[ignore = "noun singular property not implemented yet"]
1018        fn bicep_is_singular_noun() {
1019            assert!(md("bicep").noun.unwrap().is_singular.unwrap());
1020        }
1021
1022        #[test]
1023        #[ignore = "noun singular property not implemented yet"]
1024        fn biceps_is_singular_noun() {
1025            assert!(md("biceps").noun.unwrap().is_singular.unwrap());
1026        }
1027
1028        #[test]
1029        fn biceps_is_plural_noun() {
1030            assert!(md("biceps").noun.unwrap().is_plural.unwrap());
1031        }
1032
1033        #[test]
1034        #[ignore = "noun singular property not implemented yet"]
1035        fn aircraft_is_singular_noun() {
1036            assert!(md("aircraft").noun.unwrap().is_singular.unwrap());
1037        }
1038
1039        #[test]
1040        #[ignore = "noun plural property not implemented yet"]
1041        fn aircraft_is_plural_noun() {
1042            assert!(md("aircraft").noun.unwrap().is_plural.unwrap());
1043        }
1044
1045        #[test]
1046        #[should_panic]
1047        fn aircrafts_is_not_word() {
1048            md("aircrafts");
1049        }
1050
1051        #[test]
1052        fn dog_apostrophe_s_is_possessive_noun() {
1053            assert!(md("dog's").noun.unwrap().is_possessive.unwrap());
1054        }
1055
1056        #[test]
1057        fn dogs_is_not_possessive_noun() {
1058            assert!(md("dogs").noun.unwrap().is_possessive.is_none());
1059        }
1060    }
1061
1062    mod pronoun {
1063        use crate::word_metadata::tests::md;
1064
1065        mod i_me_myself {
1066            use crate::word_metadata::tests::md;
1067
1068            #[test]
1069            fn i_is_pronoun() {
1070                assert!(md("I").pronoun.is_some());
1071            }
1072            #[test]
1073            fn i_is_personal_pronoun() {
1074                assert!(md("I").pronoun.unwrap().is_personal.unwrap());
1075            }
1076            #[test]
1077            fn i_is_singular_pronoun() {
1078                assert!(md("I").pronoun.unwrap().is_singular.unwrap());
1079            }
1080            #[test]
1081            fn i_is_subject_pronoun() {
1082                assert!(md("I").pronoun.unwrap().is_subject.unwrap());
1083            }
1084
1085            #[test]
1086            fn me_is_pronoun() {
1087                assert!(md("me").pronoun.is_some());
1088            }
1089            #[test]
1090            fn me_is_personal_pronoun() {
1091                assert!(md("me").pronoun.unwrap().is_personal.unwrap());
1092            }
1093            #[test]
1094            fn me_is_singular_pronoun() {
1095                assert!(md("me").pronoun.unwrap().is_singular.unwrap());
1096            }
1097            #[test]
1098            fn me_is_object_pronoun() {
1099                assert!(md("me").pronoun.unwrap().is_object.unwrap());
1100            }
1101
1102            #[test]
1103            fn myself_is_pronoun() {
1104                assert!(md("myself").pronoun.is_some());
1105            }
1106            #[test]
1107            fn myself_is_personal_pronoun() {
1108                assert!(md("myself").pronoun.unwrap().is_personal.unwrap());
1109            }
1110            #[test]
1111            fn myself_is_singular_pronoun() {
1112                assert!(md("myself").pronoun.unwrap().is_singular.unwrap());
1113            }
1114            #[test]
1115            fn myself_is_reflexive_pronoun() {
1116                assert!(md("myself").pronoun.unwrap().is_reflexive.unwrap());
1117            }
1118        }
1119
1120        mod we_us_ourselves {
1121            use crate::word_metadata::tests::md;
1122
1123            #[test]
1124            fn we_is_pronoun() {
1125                assert!(md("we").pronoun.is_some());
1126            }
1127            #[test]
1128            fn we_is_personal_pronoun() {
1129                assert!(md("we").pronoun.unwrap().is_personal.unwrap());
1130            }
1131            #[test]
1132            fn we_is_plural_pronoun() {
1133                assert!(md("we").pronoun.unwrap().is_plural.unwrap());
1134            }
1135            #[test]
1136            fn we_is_subject_pronoun() {
1137                assert!(md("we").pronoun.unwrap().is_subject.unwrap());
1138            }
1139
1140            #[test]
1141            fn us_is_pronoun() {
1142                assert!(md("us").pronoun.is_some());
1143            }
1144            #[test]
1145            fn us_is_personal_pronoun() {
1146                assert!(md("us").pronoun.unwrap().is_personal.unwrap());
1147            }
1148            #[test]
1149            fn us_is_plural_pronoun() {
1150                assert!(md("us").pronoun.unwrap().is_plural.unwrap());
1151            }
1152            #[test]
1153            fn us_is_object_pronoun() {
1154                assert!(md("us").pronoun.unwrap().is_object.unwrap());
1155            }
1156
1157            #[test]
1158            fn ourselves_is_pronoun() {
1159                assert!(md("ourselves").pronoun.is_some());
1160            }
1161            #[test]
1162            fn ourselves_is_personal_pronoun() {
1163                assert!(md("ourselves").pronoun.unwrap().is_personal.unwrap());
1164            }
1165            #[test]
1166            fn ourselves_is_plural_pronoun() {
1167                assert!(md("ourselves").pronoun.unwrap().is_plural.unwrap());
1168            }
1169            #[test]
1170            fn ourselves_is_reflexive_pronoun() {
1171                assert!(md("ourselves").pronoun.unwrap().is_reflexive.unwrap());
1172            }
1173        }
1174
1175        mod you_yourself {
1176            use crate::word_metadata::tests::md;
1177
1178            #[test]
1179            fn you_is_pronoun() {
1180                assert!(md("you").pronoun.is_some());
1181            }
1182            #[test]
1183            fn you_is_personal_pronoun() {
1184                assert!(md("you").pronoun.unwrap().is_personal.unwrap());
1185            }
1186            #[test]
1187            fn you_is_singular_pronoun() {
1188                assert!(md("you").pronoun.unwrap().is_singular.unwrap());
1189            }
1190            #[test]
1191            fn you_is_plural_pronoun() {
1192                assert!(md("you").pronoun.unwrap().is_plural.unwrap());
1193            }
1194            #[test]
1195            fn you_is_subject_pronoun() {
1196                assert!(md("you").pronoun.unwrap().is_subject.unwrap());
1197            }
1198            #[test]
1199            fn you_is_object_pronoun() {
1200                assert!(md("you").pronoun.unwrap().is_object.unwrap());
1201            }
1202            #[test]
1203            fn yourself_is_pronoun() {
1204                assert!(md("yourself").pronoun.is_some());
1205            }
1206            #[test]
1207            fn yourself_is_personal_pronoun() {
1208                assert!(md("yourself").pronoun.unwrap().is_personal.unwrap());
1209            }
1210            #[test]
1211            fn yourself_is_singular_pronoun() {
1212                assert!(md("yourself").pronoun.unwrap().is_singular.unwrap());
1213            }
1214            #[test]
1215            fn yourself_is_reflexive_pronoun() {
1216                assert!(md("yourself").pronoun.unwrap().is_reflexive.unwrap());
1217            }
1218        }
1219
1220        mod he_him_himself {
1221            use crate::word_metadata::tests::md;
1222
1223            #[test]
1224            fn he_is_pronoun() {
1225                assert!(md("he").pronoun.is_some());
1226            }
1227            #[test]
1228            fn he_is_personal_pronoun() {
1229                assert!(md("he").pronoun.unwrap().is_personal.unwrap());
1230            }
1231            #[test]
1232            fn he_is_singular_pronoun() {
1233                assert!(md("he").pronoun.unwrap().is_singular.unwrap());
1234            }
1235            #[test]
1236            fn he_is_subject_pronoun() {
1237                assert!(md("he").pronoun.unwrap().is_subject.unwrap());
1238            }
1239
1240            #[test]
1241            fn him_is_pronoun() {
1242                assert!(md("him").pronoun.is_some());
1243            }
1244            #[test]
1245            fn him_is_personal_pronoun() {
1246                assert!(md("him").pronoun.unwrap().is_personal.unwrap());
1247            }
1248            #[test]
1249            fn him_is_singular_pronoun() {
1250                assert!(md("him").pronoun.unwrap().is_singular.unwrap());
1251            }
1252            #[test]
1253            fn him_is_object_pronoun() {
1254                assert!(md("him").pronoun.unwrap().is_object.unwrap());
1255            }
1256
1257            #[test]
1258            fn himself_is_pronoun() {
1259                assert!(md("himself").pronoun.is_some());
1260            }
1261            #[test]
1262            fn himself_is_personal_pronoun() {
1263                assert!(md("himself").pronoun.unwrap().is_personal.unwrap());
1264            }
1265            #[test]
1266            fn himself_is_singular_pronoun() {
1267                assert!(md("himself").pronoun.unwrap().is_singular.unwrap());
1268            }
1269            #[test]
1270            fn himself_is_reflexive_pronoun() {
1271                assert!(md("himself").pronoun.unwrap().is_reflexive.unwrap());
1272            }
1273        }
1274
1275        mod she_her_herself {
1276            use crate::word_metadata::tests::md;
1277
1278            #[test]
1279            fn she_is_pronoun() {
1280                assert!(md("she").pronoun.is_some());
1281            }
1282            #[test]
1283            fn she_is_personal_pronoun() {
1284                assert!(md("she").pronoun.unwrap().is_personal.unwrap());
1285            }
1286            #[test]
1287            fn she_is_singular_pronoun() {
1288                assert!(md("she").pronoun.unwrap().is_singular.unwrap());
1289            }
1290            #[test]
1291            fn she_is_subject_pronoun() {
1292                assert!(md("she").pronoun.unwrap().is_subject.unwrap());
1293            }
1294
1295            #[test]
1296            fn her_is_pronoun() {
1297                assert!(md("her").pronoun.is_some());
1298            }
1299            #[test]
1300            fn her_is_personal_pronoun() {
1301                assert!(md("her").pronoun.unwrap().is_personal.unwrap());
1302            }
1303            #[test]
1304            fn her_is_singular_pronoun() {
1305                assert!(md("her").pronoun.unwrap().is_singular.unwrap());
1306            }
1307            #[test]
1308            fn her_is_object_pronoun() {
1309                assert!(md("her").pronoun.unwrap().is_object.unwrap());
1310            }
1311
1312            #[test]
1313            fn herself_is_pronoun() {
1314                assert!(md("herself").pronoun.is_some());
1315            }
1316            #[test]
1317            fn herself_is_personal_pronoun() {
1318                assert!(md("herself").pronoun.unwrap().is_personal.unwrap());
1319            }
1320            #[test]
1321            fn herself_is_singular_pronoun() {
1322                assert!(md("herself").pronoun.unwrap().is_singular.unwrap());
1323            }
1324            #[test]
1325            fn herself_is_reflexive_pronoun() {
1326                assert!(md("herself").pronoun.unwrap().is_reflexive.unwrap());
1327            }
1328        }
1329
1330        mod it_itself {
1331            use crate::word_metadata::tests::md;
1332
1333            #[test]
1334            fn it_is_pronoun() {
1335                assert!(md("it").pronoun.is_some());
1336            }
1337            #[test]
1338            fn it_is_personal_pronoun() {
1339                assert!(md("it").pronoun.unwrap().is_personal.unwrap());
1340            }
1341            #[test]
1342            fn it_is_singular_pronoun() {
1343                assert!(md("it").pronoun.unwrap().is_singular.unwrap());
1344            }
1345            #[test]
1346            fn it_is_subject_pronoun() {
1347                assert!(md("it").pronoun.unwrap().is_subject.unwrap());
1348            }
1349            #[test]
1350            fn it_is_object_pronoun() {
1351                assert!(md("it").pronoun.unwrap().is_object.unwrap());
1352            }
1353
1354            #[test]
1355            fn itself_is_pronoun() {
1356                assert!(md("itself").pronoun.is_some());
1357            }
1358            #[test]
1359            fn itself_is_personal_pronoun() {
1360                assert!(md("itself").pronoun.unwrap().is_personal.unwrap());
1361            }
1362            #[test]
1363            fn itself_is_singular_pronoun() {
1364                assert!(md("itself").pronoun.unwrap().is_singular.unwrap());
1365            }
1366            #[test]
1367            fn itself_is_reflexive_pronoun() {
1368                assert!(md("itself").pronoun.unwrap().is_reflexive.unwrap());
1369            }
1370        }
1371
1372        mod they_them_themselves {
1373            use crate::word_metadata::tests::md;
1374
1375            #[test]
1376            fn they_is_pronoun() {
1377                assert!(md("they").pronoun.is_some());
1378            }
1379            #[test]
1380            fn they_is_personal_pronoun() {
1381                assert!(md("they").pronoun.unwrap().is_personal.unwrap());
1382            }
1383            #[test]
1384            fn they_is_plural_pronoun() {
1385                assert!(md("they").pronoun.unwrap().is_plural.unwrap());
1386            }
1387            #[test]
1388            fn they_is_subject_pronoun() {
1389                assert!(md("they").pronoun.unwrap().is_subject.unwrap());
1390            }
1391
1392            #[test]
1393            fn them_is_pronoun() {
1394                assert!(md("them").pronoun.is_some());
1395            }
1396            #[test]
1397            fn them_is_personal_pronoun() {
1398                assert!(md("them").pronoun.unwrap().is_personal.unwrap());
1399            }
1400            #[test]
1401            fn them_is_plural_pronoun() {
1402                assert!(md("them").pronoun.unwrap().is_plural.unwrap());
1403            }
1404            #[test]
1405            fn them_is_object_pronoun() {
1406                assert!(md("them").pronoun.unwrap().is_object.unwrap());
1407            }
1408
1409            #[test]
1410            fn themselves_is_pronoun() {
1411                assert!(md("themselves").pronoun.is_some());
1412            }
1413            #[test]
1414            fn themselves_is_personal_pronoun() {
1415                assert!(md("themselves").pronoun.unwrap().is_personal.unwrap());
1416            }
1417            #[test]
1418            fn themselves_is_plural_pronoun() {
1419                assert!(md("themselves").pronoun.unwrap().is_plural.unwrap());
1420            }
1421            #[test]
1422            fn themselves_is_reflexive_pronoun() {
1423                assert!(md("themselves").pronoun.unwrap().is_reflexive.unwrap());
1424            }
1425        }
1426
1427        // Possessive pronouns (not to be confused with possessive adjectives/determiners)
1428        #[test]
1429        fn mine_is_pronoun() {
1430            assert!(md("mine").pronoun.is_some());
1431        }
1432        #[test]
1433        fn ours_is_pronoun() {
1434            assert!(md("ours").pronoun.is_some());
1435        }
1436        #[test]
1437        fn yours_is_pronoun() {
1438            assert!(md("yours").pronoun.is_some());
1439        }
1440        #[test]
1441        fn his_is_pronoun() {
1442            assert!(md("his").pronoun.is_some());
1443        }
1444        #[test]
1445        fn hers_is_pronoun() {
1446            assert!(md("hers").pronoun.is_some());
1447        }
1448        #[test]
1449        fn its_is_pronoun() {
1450            assert!(md("its").pronoun.is_some());
1451        }
1452        #[test]
1453        fn theirs_is_pronoun() {
1454            assert!(md("theirs").pronoun.is_some());
1455        }
1456
1457        // archaic pronouns
1458        #[test]
1459        fn archaic_pronouns() {
1460            assert!(md("thou").pronoun.is_some());
1461            assert!(md("thee").pronoun.is_some());
1462            assert!(md("thyself").pronoun.is_some());
1463            assert!(md("thine").pronoun.is_some());
1464        }
1465
1466        // generic pronouns
1467        #[test]
1468        fn generic_pronouns() {
1469            assert!(md("one").pronoun.is_some());
1470            assert!(md("oneself").pronoun.is_some());
1471        }
1472
1473        // relative and interrogative pronouns
1474        #[test]
1475        fn relative_and_interrogative_pronouns() {
1476            assert!(md("who").pronoun.is_some());
1477            assert!(md("whom").pronoun.is_some());
1478            assert!(md("whose").pronoun.is_some());
1479            assert!(md("which").pronoun.is_some());
1480            assert!(md("what").pronoun.is_some());
1481        }
1482
1483        // nonstandard pronouns
1484        #[test]
1485        #[ignore = "not in dictionary"]
1486        fn nonstandard_pronouns() {
1487            assert!(md("themself").pronoun.is_some());
1488            assert!(md("y'all'").pronoun.is_some());
1489        }
1490    }
1491
1492    #[test]
1493    fn the_is_determiner() {
1494        assert!(md("the").determiner.is_some());
1495    }
1496    #[test]
1497    fn this_is_demonstrative_determiner() {
1498        assert!(md("this").determiner.unwrap().is_demonstrative.unwrap());
1499    }
1500    #[test]
1501    fn your_is_possessive_determiner() {
1502        assert!(md("your").determiner.unwrap().is_possessive.unwrap());
1503    }
1504}