use harper_brill::UPOS;
use is_macro::Is;
use itertools::Itertools;
use paste::paste;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use strum::{EnumCount as _, VariantArray as _};
use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray};
use std::convert::TryFrom;
use crate::dict_word_metadata_orthography::OrthFlags;
use crate::spell::WordId;
use crate::{Document, TokenKind, TokenStringExt};
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
pub struct DictWordMetadata {
pub noun: Option<NounData>,
pub pronoun: Option<PronounData>,
pub verb: Option<VerbData>,
pub adjective: Option<AdjectiveData>,
pub adverb: Option<AdverbData>,
pub conjunction: Option<ConjunctionData>,
pub determiner: Option<DeterminerData>,
pub affix: Option<AffixData>,
#[serde(default = "default_false")]
pub preposition: bool,
pub swear: Option<bool>,
#[serde(default = "default_default")]
pub dialects: DialectFlags,
#[serde(default = "OrthFlags::empty")]
pub orth_info: OrthFlags,
#[serde(default = "default_false")]
pub common: bool,
#[serde(default = "default_none")]
pub derived_from: Option<WordId>,
pub np_member: Option<bool>,
pub pos_tag: Option<UPOS>,
}
fn default_false() -> bool {
false
}
fn default_none<T>() -> Option<T> {
None
}
fn default_default<T: Default>() -> T {
T::default()
}
macro_rules! generate_metadata_queries {
($($category:ident has $($sub:ident),*).*) => {
paste! {
pub fn is_likely_homograph(&self) -> bool {
[self.is_determiner(), self.preposition, $(
self.[< is_ $category >](),
)*].iter().map(|b| *b as u8).sum::<u8>() > 1
}
pub fn difference(&self, other: &Self) -> u32 {
[
$(
Self::[< is_ $category >],
$(
Self::[< is_ $sub _ $category >],
Self::[< is_non_ $sub _ $category >],
)*
)*
]
.iter()
.fold(0, |acc, func| acc + (func(self) ^ func(other)) as u32)
}
$(
#[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
pub fn [< is_ $category >](&self) -> bool {
self.$category.is_some()
}
$(
#[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
pub fn [< is_ $sub _ $category >](&self) -> bool {
matches!(
self.$category,
Some([< $category:camel Data >]{
[< is_ $sub >]: Some(true),
..
})
) }
#[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
pub fn [< is_non_ $sub _ $category >](&self) -> bool {
matches!(
self.$category,
Some([< $category:camel Data >]{
[< is_ $sub >]: None | Some(false),
..
})
)
}
)*
)*
}
};
}
impl DictWordMetadata {
pub fn infer_pos_tag(&self) -> Option<UPOS> {
if let Some(pos) = self.pos_tag {
return Some(pos);
}
let mut candidates = SmallVec::<[UPOS; 14]>::with_capacity(14);
if self.is_proper_noun() {
candidates.push(UPOS::PROPN);
}
if self.is_pronoun() {
candidates.push(UPOS::PRON);
}
if self.is_noun() {
candidates.push(UPOS::NOUN);
}
if self.is_verb() {
if let Some(data) = &self.verb {
if data.is_auxiliary == Some(true) {
candidates.push(UPOS::AUX);
} else {
candidates.push(UPOS::VERB);
}
} else {
candidates.push(UPOS::VERB);
}
}
if self.is_adjective() {
candidates.push(UPOS::ADJ);
}
if self.is_adverb() {
candidates.push(UPOS::ADV);
}
if self.is_conjunction() {
candidates.push(UPOS::CCONJ);
}
if self.is_determiner() {
candidates.push(UPOS::DET);
}
if self.preposition {
candidates.push(UPOS::ADP);
}
candidates.sort();
candidates.dedup();
candidates.into_iter().exactly_one().ok()
}
pub fn or(&self, other: &Self) -> Self {
let mut clone = self.clone();
clone.merge(other);
clone
}
pub fn enforce_pos_exclusivity(&mut self, pos: &UPOS) {
use UPOS::*;
match pos {
NOUN => {
if let Some(noun) = self.noun {
self.noun = Some(NounData {
is_proper: Some(false),
..noun
})
} else {
self.noun = Some(NounData {
is_proper: Some(false),
is_singular: None,
is_plural: None,
is_countable: None,
is_mass: None,
is_possessive: None,
})
}
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
PROPN => {
if let Some(noun) = self.noun {
self.noun = Some(NounData {
is_proper: Some(true),
..noun
})
} else {
self.noun = Some(NounData {
is_proper: Some(true),
is_singular: None,
is_plural: None,
is_countable: None,
is_mass: None,
is_possessive: None,
})
}
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
PRON => {
if self.pronoun.is_none() {
self.pronoun = Some(PronounData::default())
}
self.noun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
VERB => {
if let Some(verb) = self.verb {
self.verb = Some(VerbData {
is_auxiliary: Some(false),
..verb
})
} else {
self.verb = Some(VerbData {
is_auxiliary: Some(false),
..Default::default()
})
}
self.noun = None;
self.pronoun = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
AUX => {
if let Some(verb) = self.verb {
self.verb = Some(VerbData {
is_auxiliary: Some(true),
..verb
})
} else {
self.verb = Some(VerbData {
is_auxiliary: Some(true),
..Default::default()
})
}
self.noun = None;
self.pronoun = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
ADJ => {
if self.adjective.is_none() {
self.adjective = Some(AdjectiveData::default())
}
self.noun = None;
self.pronoun = None;
self.verb = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
ADV => {
if self.adverb.is_none() {
self.adverb = Some(AdverbData::default())
}
self.noun = None;
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
ADP => {
self.noun = None;
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.determiner = None;
self.affix = None;
self.preposition = true;
}
DET => {
self.noun = None;
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.conjunction = None;
self.affix = None;
self.preposition = false;
self.determiner = Some(DeterminerData::default());
}
CCONJ | SCONJ => {
if self.conjunction.is_none() {
self.conjunction = Some(ConjunctionData::default())
}
self.noun = None;
self.pronoun = None;
self.verb = None;
self.adjective = None;
self.adverb = None;
self.determiner = None;
self.affix = None;
self.preposition = false;
}
_ => {}
}
}
generate_metadata_queries!(
noun has proper, plural, mass, possessive.
pronoun has personal, singular, plural, possessive, reflexive, subject, object.
determiner has demonstrative, possessive, quantifier.
verb has linking, auxiliary.
conjunction has.
adjective has.
adverb has manner, frequency, degree
);
pub fn get_person(&self) -> Option<Person> {
self.pronoun.as_ref().and_then(|p| p.person)
}
pub fn is_first_person_plural_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::First),
is_plural: Some(true),
..
})
)
}
pub fn is_first_person_singular_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::First),
is_singular: Some(true),
..
})
)
}
pub fn is_third_person_plural_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::Third),
is_plural: Some(true),
..
})
)
}
pub fn is_third_person_singular_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::Third),
is_singular: Some(true),
..
})
)
}
pub fn is_third_person_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::Third),
..
})
)
}
pub fn is_second_person_pronoun(&self) -> bool {
matches!(
self.pronoun,
Some(PronounData {
person: Some(Person::Second),
..
})
)
}
pub fn is_verb_lemma(&self) -> bool {
if let Some(verb) = self.verb {
if let Some(forms) = verb.verb_forms {
return forms.is_empty() || forms.contains(VerbFormFlags::LEMMA);
} else {
return true;
}
}
false
}
pub fn is_verb_past_form(&self) -> bool {
self.verb.is_some_and(|v| {
v.verb_forms
.is_some_and(|vf| vf.contains(VerbFormFlags::PAST))
})
}
pub fn is_verb_simple_past_form(&self) -> bool {
self.verb.is_some_and(|v| {
v.verb_forms
.is_some_and(|vf| vf.contains(VerbFormFlags::PRETERITE))
})
}
pub fn is_verb_past_participle_form(&self) -> bool {
self.verb.is_some_and(|v| {
v.verb_forms
.is_some_and(|vf| vf.contains(VerbFormFlags::PAST_PARTICIPLE))
})
}
pub fn is_verb_progressive_form(&self) -> bool {
self.verb.is_some_and(|v| {
v.verb_forms
.is_some_and(|vf| vf.contains(VerbFormFlags::PROGRESSIVE))
})
}
pub fn is_verb_third_person_singular_present_form(&self) -> bool {
self.verb.is_some_and(|v| {
v.verb_forms
.is_some_and(|vf| vf.contains(VerbFormFlags::THIRD_PERSON_SINGULAR))
})
}
pub fn is_singular_noun(&self) -> bool {
if let Some(noun) = self.noun {
matches!(
(noun.is_singular, noun.is_plural),
(Some(true), _) | (None | Some(false), None | Some(false))
)
} else {
false
}
}
pub fn is_non_singular_noun(&self) -> bool {
if let Some(noun) = self.noun {
!matches!(
(noun.is_singular, noun.is_plural),
(Some(true), _) | (None | Some(false), None | Some(false))
)
} else {
false
}
}
pub fn is_countable_noun(&self) -> bool {
if let Some(noun) = self.noun {
matches!(
(noun.is_countable, noun.is_mass),
(Some(true), _) | (None | Some(false), None | Some(false))
)
} else {
false
}
}
pub fn is_non_countable_noun(&self) -> bool {
if let Some(noun) = self.noun {
!matches!(
(noun.is_countable, noun.is_mass),
(Some(true), _) | (None | Some(false), None | Some(false))
)
} else {
false
}
}
pub fn is_mass_noun_only(&self) -> bool {
if let Some(noun) = self.noun {
matches!(
(noun.is_countable, noun.is_mass),
(None | Some(false), Some(true))
)
} else {
false
}
}
pub fn is_nominal(&self) -> bool {
self.is_noun() || self.is_pronoun()
}
pub fn is_singular_nominal(&self) -> bool {
self.is_singular_noun() || self.is_singular_pronoun()
}
pub fn is_plural_nominal(&self) -> bool {
self.is_plural_noun() || self.is_plural_pronoun()
}
pub fn is_possessive_nominal(&self) -> bool {
self.is_possessive_noun() || self.is_possessive_determiner()
}
pub fn is_non_singular_nominal(&self) -> bool {
self.is_non_singular_noun() || self.is_non_singular_pronoun()
}
pub fn is_non_plural_nominal(&self) -> bool {
self.is_non_plural_noun() || self.is_non_plural_pronoun()
}
pub fn get_degree(&self) -> Option<Degree> {
self.adjective.as_ref().and_then(|a| a.degree)
}
pub fn is_comparative_adjective(&self) -> bool {
matches!(
self.adjective,
Some(AdjectiveData {
degree: Some(Degree::Comparative)
})
)
}
pub fn is_superlative_adjective(&self) -> bool {
matches!(
self.adjective,
Some(AdjectiveData {
degree: Some(Degree::Superlative)
})
)
}
pub fn is_positive_adjective(&self) -> bool {
match self.adjective {
Some(AdjectiveData {
degree: Some(Degree::Positive),
}) => true,
Some(AdjectiveData { degree: None }) => true,
Some(AdjectiveData {
degree: Some(degree),
}) => !matches!(degree, Degree::Comparative | Degree::Superlative),
_ => false,
}
}
pub fn is_quantifier(&self) -> bool {
self.is_quantifier_determiner()
}
pub fn is_swear(&self) -> bool {
matches!(self.swear, Some(true))
}
pub fn is_lowercase(&self) -> bool {
self.orth_info.contains(OrthFlags::LOWERCASE)
}
pub fn is_titlecase(&self) -> bool {
self.orth_info.contains(OrthFlags::TITLECASE)
}
pub fn is_allcaps(&self) -> bool {
self.orth_info.contains(OrthFlags::ALLCAPS)
}
pub fn is_lower_camel(&self) -> bool {
self.orth_info.contains(OrthFlags::LOWER_CAMEL)
}
pub fn is_upper_camel(&self) -> bool {
self.orth_info.contains(OrthFlags::UPPER_CAMEL)
}
pub fn is_apostrophized(&self) -> bool {
self.orth_info.contains(OrthFlags::APOSTROPHE)
}
pub fn is_roman_numerals(&self) -> bool {
self.orth_info.contains(OrthFlags::ROMAN_NUMERALS)
}
pub fn merge(&mut self, other: &Self) -> &mut Self {
macro_rules! merge {
($a:expr, $b:expr) => {
match ($a, $b) {
(Some(a), Some(b)) => Some(a.or(&b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
};
}
self.noun = merge!(self.noun, other.noun);
self.pronoun = merge!(self.pronoun, other.pronoun);
self.verb = merge!(self.verb, other.verb);
self.adjective = merge!(self.adjective, other.adjective);
self.adverb = merge!(self.adverb, other.adverb);
self.conjunction = merge!(self.conjunction, other.conjunction);
self.determiner = merge!(self.determiner, other.determiner);
self.affix = merge!(self.affix, other.affix);
self.preposition |= other.preposition;
self.dialects |= other.dialects;
self.orth_info |= other.orth_info;
self.swear = self.swear.or(other.swear);
self.common |= other.common;
self.derived_from = self.derived_from.or(other.derived_from);
self.pos_tag = self.pos_tag.or(other.pos_tag);
self.np_member = self.np_member.or(other.np_member);
self
}
}
#[repr(u32)]
pub enum VerbForm {
LemmaForm = 1 << 0,
PastForm = 1 << 1,
SimplePastForm = 1 << 2,
PastParticipleForm = 1 << 3,
ProgressiveForm = 1 << 4,
ThirdPersonSingularPresentForm = 1 << 5,
}
pub type VerbFormFlagsUnderlyingType = u32;
bitflags::bitflags! {
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
#[serde(transparent)]
pub struct VerbFormFlags: VerbFormFlagsUnderlyingType {
const LEMMA = VerbForm::LemmaForm as VerbFormFlagsUnderlyingType;
const PAST = VerbForm::PastForm as VerbFormFlagsUnderlyingType;
const PRETERITE = VerbForm::SimplePastForm as VerbFormFlagsUnderlyingType;
const PAST_PARTICIPLE = VerbForm::PastParticipleForm as VerbFormFlagsUnderlyingType;
const PROGRESSIVE = VerbForm::ProgressiveForm as VerbFormFlagsUnderlyingType;
const THIRD_PERSON_SINGULAR = VerbForm::ThirdPersonSingularPresentForm as VerbFormFlagsUnderlyingType;
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct VerbData {
pub is_linking: Option<bool>,
pub is_auxiliary: Option<bool>,
#[serde(rename = "verb_form", default)]
pub verb_forms: Option<VerbFormFlags>,
}
impl VerbData {
pub fn or(&self, other: &Self) -> Self {
let verb_forms = match (self.verb_forms, other.verb_forms) {
(Some(self_verb_forms), Some(other_verb_forms)) => {
Some(self_verb_forms | other_verb_forms)
}
(Some(self_verb_forms), None) => Some(self_verb_forms),
(None, Some(other_verb_forms)) => Some(other_verb_forms),
(None, None) => None,
};
Self {
is_linking: self.is_linking.or(other.is_linking),
is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
verb_forms,
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct NounData {
pub is_proper: Option<bool>,
pub is_singular: Option<bool>,
pub is_plural: Option<bool>,
pub is_countable: Option<bool>,
pub is_mass: Option<bool>,
pub is_possessive: Option<bool>,
}
impl NounData {
pub fn or(&self, other: &Self) -> Self {
Self {
is_proper: self.is_proper.or(other.is_proper),
is_singular: self.is_singular.or(other.is_singular),
is_plural: self.is_plural.or(other.is_plural),
is_countable: self.is_countable.or(other.is_countable),
is_mass: self.is_mass.or(other.is_mass),
is_possessive: self.is_possessive.or(other.is_possessive),
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
pub enum Person {
First,
Second,
Third,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct PronounData {
pub is_personal: Option<bool>,
pub is_singular: Option<bool>,
pub is_plural: Option<bool>,
pub is_possessive: Option<bool>,
pub is_reflexive: Option<bool>,
pub person: Option<Person>,
pub is_subject: Option<bool>,
pub is_object: Option<bool>,
}
impl PronounData {
pub fn or(&self, other: &Self) -> Self {
Self {
is_personal: self.is_personal.or(other.is_personal),
is_singular: self.is_singular.or(other.is_singular),
is_plural: self.is_plural.or(other.is_plural),
is_possessive: self.is_possessive.or(other.is_possessive),
is_reflexive: self.is_reflexive.or(other.is_reflexive),
person: self.person.or(other.person),
is_subject: self.is_subject.or(other.is_subject),
is_object: self.is_object.or(other.is_object),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct DeterminerData {
pub is_demonstrative: Option<bool>,
pub is_possessive: Option<bool>,
pub is_quantifier: Option<bool>,
}
impl DeterminerData {
pub fn or(&self, other: &Self) -> Self {
Self {
is_demonstrative: self.is_demonstrative.or(other.is_demonstrative),
is_possessive: self.is_possessive.or(other.is_possessive),
is_quantifier: self.is_quantifier.or(other.is_quantifier),
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
pub enum Degree {
Positive,
Comparative,
Superlative,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AdjectiveData {
pub degree: Option<Degree>,
}
impl AdjectiveData {
pub fn or(&self, other: &Self) -> Self {
Self {
degree: self.degree.or(other.degree),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AdverbData {
pub is_manner: Option<bool>,
pub is_frequency: Option<bool>,
pub is_degree: Option<bool>,
}
impl AdverbData {
pub fn or(&self, _other: &Self) -> Self {
Self {
is_manner: self.is_manner.or(_other.is_manner),
is_frequency: self.is_frequency.or(_other.is_frequency),
is_degree: self.is_degree.or(_other.is_degree),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct ConjunctionData {}
impl ConjunctionData {
pub fn or(&self, _other: &Self) -> Self {
Self {}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AffixData {
pub is_prefix: Option<bool>,
pub is_suffix: Option<bool>,
}
impl AffixData {
pub fn or(&self, _other: &Self) -> Self {
Self {
is_prefix: self.is_prefix.or(_other.is_prefix),
is_suffix: self.is_suffix.or(_other.is_suffix),
}
}
}
#[derive(
Debug,
Clone,
Copy,
Serialize,
Deserialize,
PartialEq,
PartialOrd,
Eq,
Hash,
EnumCount,
EnumString,
EnumIter,
Display,
VariantArray,
)]
pub enum Dialect {
American = 1 << 0,
Canadian = 1 << 1,
Australian = 1 << 2,
British = 1 << 3,
Indian = 1 << 4,
}
impl Dialect {
#[must_use]
pub fn try_guess_from_document(document: &Document) -> Option<Self> {
Self::try_from(DialectFlags::get_most_used_dialects_from_document(document)).ok()
}
#[must_use]
pub fn try_from_abbr(abbr: &str) -> Option<Self> {
match abbr {
"US" => Some(Self::American),
"CA" => Some(Self::Canadian),
"AU" => Some(Self::Australian),
"GB" => Some(Self::British),
"IN" => Some(Self::Indian),
_ => None,
}
}
}
impl TryFrom<DialectFlags> for Dialect {
type Error = ();
fn try_from(dialect_flags: DialectFlags) -> Result<Self, Self::Error> {
if dialect_flags.bits().count_ones() == 1 {
match dialect_flags {
df if df.is_dialect_enabled_strict(Dialect::American) => Ok(Dialect::American),
df if df.is_dialect_enabled_strict(Dialect::Canadian) => Ok(Dialect::Canadian),
df if df.is_dialect_enabled_strict(Dialect::Australian) => Ok(Dialect::Australian),
df if df.is_dialect_enabled_strict(Dialect::British) => Ok(Dialect::British),
df if df.is_dialect_enabled_strict(Dialect::Indian) => Ok(Dialect::Indian),
_ => Err(()),
}
} else {
Err(())
}
}
}
type DialectFlagsUnderlyingType = u8;
bitflags::bitflags! {
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
#[serde(transparent)]
pub struct DialectFlags: DialectFlagsUnderlyingType {
const AMERICAN = Dialect::American as DialectFlagsUnderlyingType;
const CANADIAN = Dialect::Canadian as DialectFlagsUnderlyingType;
const AUSTRALIAN = Dialect::Australian as DialectFlagsUnderlyingType;
const BRITISH = Dialect::British as DialectFlagsUnderlyingType;
const INDIAN = Dialect::Indian as DialectFlagsUnderlyingType;
}
}
impl DialectFlags {
#[must_use]
pub fn is_dialect_enabled(self, dialect: Dialect) -> bool {
self.is_empty() || self.intersects(Self::from_dialect(dialect))
}
#[must_use]
pub fn is_dialect_enabled_strict(self, dialect: Dialect) -> bool {
self.intersects(Self::from_dialect(dialect))
}
#[must_use]
pub fn from_dialect(dialect: Dialect) -> Self {
let Some(out) = Self::from_bits(dialect as DialectFlagsUnderlyingType) else {
panic!("The '{dialect}' dialect isn't defined in DialectFlags!");
};
out
}
#[must_use]
pub fn get_most_used_dialects_from_document(document: &Document) -> Self {
let mut dialect_counters: [(Dialect, usize); Dialect::COUNT] = Dialect::VARIANTS
.iter()
.map(|d| (*d, 0))
.collect_array()
.unwrap();
document.iter_words().for_each(|w| {
if let TokenKind::Word(Some(lexeme_metadata)) = &w.kind {
dialect_counters.iter_mut().for_each(|(dialect, count)| {
if lexeme_metadata.dialects.is_dialect_enabled(*dialect) {
*count += 1;
}
});
}
});
let max_counter = dialect_counters
.iter()
.map(|(_, count)| count)
.max()
.unwrap();
dialect_counters
.into_iter()
.filter(|(_, count)| count == max_counter)
.fold(DialectFlags::empty(), |acc, dialect| {
acc | Self::from_dialect(dialect.0)
})
}
}
impl Default for DialectFlags {
fn default() -> Self {
Self::empty()
}
}
#[cfg(test)]
pub mod tests {
use crate::DictWordMetadata;
use crate::spell::{Dictionary, FstDictionary};
pub fn md(word: &str) -> DictWordMetadata {
FstDictionary::curated()
.get_word_metadata_str(word)
.unwrap_or_else(|| panic!("Word '{word}' not found in dictionary"))
.into_owned()
}
mod dialect {
use super::super::{Dialect, DialectFlags};
use crate::Document;
#[test]
fn guess_british_dialect() {
let document = Document::new_plain_english_curated("Aluminium was used.");
let df = DialectFlags::get_most_used_dialects_from_document(&document);
assert!(
df.is_dialect_enabled_strict(Dialect::British)
&& !df.is_dialect_enabled_strict(Dialect::American)
);
}
#[test]
fn guess_american_dialect() {
let document = Document::new_plain_english_curated("Aluminum was used.");
let df = DialectFlags::get_most_used_dialects_from_document(&document);
assert!(
df.is_dialect_enabled_strict(Dialect::American)
&& !df.is_dialect_enabled_strict(Dialect::British)
);
}
}
mod noun {
use crate::dict_word_metadata::tests::md;
#[test]
fn puppy_is_noun() {
assert!(md("puppy").is_noun());
}
#[test]
fn prepare_is_not_noun() {
assert!(!md("prepare").is_noun());
}
#[test]
fn paris_is_proper_noun() {
assert!(md("Paris").is_proper_noun());
}
#[test]
fn permit_is_non_proper_noun() {
assert!(md("lapdog").is_non_proper_noun());
}
#[test]
fn hound_is_singular_noun() {
assert!(md("hound").is_singular_noun());
}
#[test]
fn pooches_is_non_singular_noun() {
assert!(md("pooches").is_non_singular_noun());
}
#[test]
fn loyal_doesnt_pass_is_non_singular_noun() {
assert!(!md("loyal").is_non_singular_noun());
}
#[test]
fn hounds_is_plural_noun() {
assert!(md("hounds").is_plural_noun());
}
#[test]
fn pooch_is_non_plural_noun() {
assert!(md("pooch").is_non_plural_noun());
}
#[test]
fn fish_is_singular_noun() {
assert!(md("fish").is_singular_noun());
}
#[test]
fn fish_is_plural_noun() {
assert!(md("fish").is_plural_noun());
}
#[test]
fn fishes_is_plural_noun() {
assert!(md("fishes").is_plural_noun());
}
#[test]
fn sheep_is_singular_noun() {
assert!(md("sheep").is_singular_noun());
}
#[test]
fn sheep_is_plural_noun() {
assert!(md("sheep").is_plural_noun());
}
#[test]
#[should_panic]
fn sheeps_is_not_word() {
md("sheeps");
}
#[test]
fn bicep_is_singular_noun() {
assert!(md("bicep").is_singular_noun());
}
#[test]
fn biceps_is_singular_noun() {
assert!(md("biceps").is_singular_noun());
}
#[test]
fn biceps_is_plural_noun() {
assert!(md("biceps").is_plural_noun());
}
#[test]
fn aircraft_is_singular_noun() {
assert!(md("aircraft").is_singular_noun());
}
#[test]
fn aircraft_is_plural_noun() {
assert!(md("aircraft").is_plural_noun());
}
#[test]
#[should_panic]
fn aircrafts_is_not_word() {
md("aircrafts");
}
#[test]
fn dog_apostrophe_s_is_possessive_noun() {
assert!(md("dog's").is_possessive_noun());
}
#[test]
fn dogs_is_non_possessive_noun() {
assert!(md("dogs").is_non_possessive_noun());
}
#[test]
fn dog_is_countable() {
assert!(md("dog").is_countable_noun());
}
#[test]
fn dog_is_non_mass_noun() {
assert!(md("dog").is_non_mass_noun());
}
#[test]
fn furniture_is_mass_noun() {
assert!(md("furniture").is_mass_noun());
}
#[test]
fn furniture_is_non_countable_noun() {
assert!(md("furniture").is_non_countable_noun());
}
#[test]
fn equipment_is_mass_noun() {
assert!(md("equipment").is_mass_noun());
}
#[test]
fn equipment_is_non_countable_noun() {
assert!(md("equipment").is_non_countable_noun());
}
#[test]
fn beer_is_countable_noun() {
assert!(md("beer").is_countable_noun());
}
#[test]
fn beer_is_mass_noun() {
assert!(md("beer").is_mass_noun());
}
}
mod pronoun {
use crate::dict_word_metadata::tests::md;
mod i_me_myself {
use crate::dict_word_metadata::tests::md;
#[test]
fn i_is_pronoun() {
assert!(md("I").is_pronoun());
}
#[test]
fn i_is_personal_pronoun() {
assert!(md("I").is_personal_pronoun());
}
#[test]
fn i_is_singular_pronoun() {
assert!(md("I").is_singular_pronoun());
}
#[test]
fn i_is_subject_pronoun() {
assert!(md("I").is_subject_pronoun());
}
#[test]
fn me_is_pronoun() {
assert!(md("me").is_pronoun());
}
#[test]
fn me_is_personal_pronoun() {
assert!(md("me").is_personal_pronoun());
}
#[test]
fn me_is_singular_pronoun() {
assert!(md("me").is_singular_pronoun());
}
#[test]
fn me_is_object_pronoun() {
assert!(md("me").is_object_pronoun());
}
#[test]
fn myself_is_pronoun() {
assert!(md("myself").is_pronoun());
}
#[test]
fn myself_is_personal_pronoun() {
assert!(md("myself").is_personal_pronoun());
}
#[test]
fn myself_is_singular_pronoun() {
assert!(md("myself").is_singular_pronoun());
}
#[test]
fn myself_is_reflexive_pronoun() {
assert!(md("myself").is_reflexive_pronoun());
}
}
mod we_us_ourselves {
use crate::dict_word_metadata::tests::md;
#[test]
fn we_is_pronoun() {
assert!(md("we").is_pronoun());
}
#[test]
fn we_is_personal_pronoun() {
assert!(md("we").is_personal_pronoun());
}
#[test]
fn we_is_plural_pronoun() {
assert!(md("we").is_plural_pronoun());
}
#[test]
fn we_is_subject_pronoun() {
assert!(md("we").is_subject_pronoun());
}
#[test]
fn us_is_pronoun() {
assert!(md("us").is_pronoun());
}
#[test]
fn us_is_personal_pronoun() {
assert!(md("us").is_personal_pronoun());
}
#[test]
fn us_is_plural_pronoun() {
assert!(md("us").is_plural_pronoun());
}
#[test]
fn us_is_object_pronoun() {
assert!(md("us").is_object_pronoun());
}
#[test]
fn ourselves_is_pronoun() {
assert!(md("ourselves").is_pronoun());
}
#[test]
fn ourselves_is_personal_pronoun() {
assert!(md("ourselves").is_personal_pronoun());
}
#[test]
fn ourselves_is_plural_pronoun() {
assert!(md("ourselves").is_plural_pronoun());
}
#[test]
fn ourselves_is_reflexive_pronoun() {
assert!(md("ourselves").is_reflexive_pronoun());
}
}
mod you_yourself {
use crate::dict_word_metadata::tests::md;
#[test]
fn you_is_pronoun() {
assert!(md("you").is_pronoun());
}
#[test]
fn you_is_personal_pronoun() {
assert!(md("you").is_personal_pronoun());
}
#[test]
fn you_is_singular_pronoun() {
assert!(md("you").is_singular_pronoun());
}
#[test]
fn you_is_plural_pronoun() {
assert!(md("you").is_plural_pronoun());
}
#[test]
fn you_is_subject_pronoun() {
assert!(md("you").is_subject_pronoun());
}
#[test]
fn you_is_object_pronoun() {
assert!(md("you").is_object_pronoun());
}
#[test]
fn yourself_is_pronoun() {
assert!(md("yourself").is_pronoun());
}
#[test]
fn yourself_is_personal_pronoun() {
assert!(md("yourself").is_personal_pronoun());
}
#[test]
fn yourself_is_singular_pronoun() {
assert!(md("yourself").is_singular_pronoun());
}
#[test]
fn yourself_is_reflexive_pronoun() {
assert!(md("yourself").is_reflexive_pronoun());
}
}
mod he_him_himself {
use crate::dict_word_metadata::tests::md;
#[test]
fn he_is_pronoun() {
assert!(md("he").is_pronoun());
}
#[test]
fn he_is_personal_pronoun() {
assert!(md("he").is_personal_pronoun());
}
#[test]
fn he_is_singular_pronoun() {
assert!(md("he").is_singular_pronoun());
}
#[test]
fn he_is_subject_pronoun() {
assert!(md("he").is_subject_pronoun());
}
#[test]
fn him_is_pronoun() {
assert!(md("him").is_pronoun());
}
#[test]
fn him_is_personal_pronoun() {
assert!(md("him").is_personal_pronoun());
}
#[test]
fn him_is_singular_pronoun() {
assert!(md("him").is_singular_pronoun());
}
#[test]
fn him_is_object_pronoun() {
assert!(md("him").is_object_pronoun());
}
#[test]
fn himself_is_pronoun() {
assert!(md("himself").is_pronoun());
}
#[test]
fn himself_is_personal_pronoun() {
assert!(md("himself").is_personal_pronoun());
}
#[test]
fn himself_is_singular_pronoun() {
assert!(md("himself").is_singular_pronoun());
}
#[test]
fn himself_is_reflexive_pronoun() {
assert!(md("himself").is_reflexive_pronoun());
}
}
mod she_her_herself {
use crate::dict_word_metadata::tests::md;
#[test]
fn she_is_pronoun() {
assert!(md("she").is_pronoun());
}
#[test]
fn she_is_personal_pronoun() {
assert!(md("she").is_personal_pronoun());
}
#[test]
fn she_is_singular_pronoun() {
assert!(md("she").is_singular_pronoun());
}
#[test]
fn she_is_subject_pronoun() {
assert!(md("she").is_subject_pronoun());
}
#[test]
fn her_is_pronoun() {
assert!(md("her").is_pronoun());
}
#[test]
fn her_is_personal_pronoun() {
assert!(md("her").is_personal_pronoun());
}
#[test]
fn her_is_singular_pronoun() {
assert!(md("her").is_singular_pronoun());
}
#[test]
fn her_is_object_pronoun() {
assert!(md("her").is_object_pronoun());
}
#[test]
fn herself_is_pronoun() {
assert!(md("herself").is_pronoun());
}
#[test]
fn herself_is_personal_pronoun() {
assert!(md("herself").is_personal_pronoun());
}
#[test]
fn herself_is_singular_pronoun() {
assert!(md("herself").is_singular_pronoun());
}
#[test]
fn herself_is_reflexive_pronoun() {
assert!(md("herself").is_reflexive_pronoun());
}
}
mod it_itself {
use crate::dict_word_metadata::tests::md;
#[test]
fn it_is_pronoun() {
assert!(md("it").is_pronoun());
}
#[test]
fn it_is_personal_pronoun() {
assert!(md("it").is_personal_pronoun());
}
#[test]
fn it_is_singular_pronoun() {
assert!(md("it").is_singular_pronoun());
}
#[test]
fn it_is_subject_pronoun() {
assert!(md("it").is_subject_pronoun());
}
#[test]
fn it_is_object_pronoun() {
assert!(md("it").is_object_pronoun());
}
#[test]
fn itself_is_pronoun() {
assert!(md("itself").is_pronoun());
}
#[test]
fn itself_is_personal_pronoun() {
assert!(md("itself").is_personal_pronoun());
}
#[test]
fn itself_is_singular_pronoun() {
assert!(md("itself").is_singular_pronoun());
}
#[test]
fn itself_is_reflexive_pronoun() {
assert!(md("itself").is_reflexive_pronoun());
}
}
mod they_them_themselves {
use crate::dict_word_metadata::tests::md;
#[test]
fn they_is_pronoun() {
assert!(md("they").is_pronoun());
}
#[test]
fn they_is_personal_pronoun() {
assert!(md("they").is_personal_pronoun());
}
#[test]
fn they_is_plural_pronoun() {
assert!(md("they").is_plural_pronoun());
}
#[test]
fn they_is_subject_pronoun() {
assert!(md("they").is_subject_pronoun());
}
#[test]
fn them_is_pronoun() {
assert!(md("them").is_pronoun());
}
#[test]
fn them_is_personal_pronoun() {
assert!(md("them").is_personal_pronoun());
}
#[test]
fn them_is_plural_pronoun() {
assert!(md("them").is_plural_pronoun());
}
#[test]
fn them_is_object_pronoun() {
assert!(md("them").is_object_pronoun());
}
#[test]
fn themselves_is_pronoun() {
assert!(md("themselves").is_pronoun());
}
#[test]
fn themselves_is_personal_pronoun() {
assert!(md("themselves").is_personal_pronoun());
}
#[test]
fn themselves_is_plural_pronoun() {
assert!(md("themselves").is_plural_pronoun());
}
#[test]
fn themselves_is_reflexive_pronoun() {
assert!(md("themselves").is_reflexive_pronoun());
}
}
#[test]
fn mine_is_pronoun() {
assert!(md("mine").is_pronoun());
}
#[test]
fn ours_is_pronoun() {
assert!(md("ours").is_pronoun());
}
#[test]
fn yours_is_pronoun() {
assert!(md("yours").is_pronoun());
}
#[test]
fn his_is_pronoun() {
assert!(md("his").is_pronoun());
}
#[test]
fn hers_is_pronoun() {
assert!(md("hers").is_pronoun());
}
#[test]
fn its_is_pronoun() {
assert!(md("its").is_pronoun());
}
#[test]
fn theirs_is_pronoun() {
assert!(md("theirs").is_pronoun());
}
#[test]
fn archaic_pronouns() {
assert!(md("thou").is_pronoun());
assert!(md("thee").is_pronoun());
assert!(md("thyself").is_pronoun());
assert!(md("thine").is_pronoun());
}
#[test]
fn generic_pronouns() {
assert!(md("one").is_pronoun());
assert!(md("oneself").is_pronoun());
}
#[test]
fn relative_and_interrogative_pronouns() {
assert!(md("who").is_pronoun());
assert!(md("whom").is_pronoun());
assert!(md("whose").is_pronoun());
assert!(md("which").is_pronoun());
assert!(md("what").is_pronoun());
}
#[test]
#[ignore = "not in dictionary"]
fn nonstandard_pronouns() {
assert!(md("themself").pronoun.is_some());
assert!(md("y'all'").pronoun.is_some());
}
}
mod nominal {
use crate::dict_word_metadata::tests::md;
#[test]
fn my_is_possessive_nominal() {
assert!(md("my").is_possessive_nominal());
}
#[test]
fn mine_is_not_possessive_nominal() {
assert!(!md("mine").is_possessive_nominal());
}
#[test]
fn freds_is_possessive_nominal() {
assert!(md("Fred's").is_possessive_nominal());
}
#[test]
fn fred_is_not_possessive_nominal() {
assert!(!md("Fred").is_possessive_nominal());
}
#[test]
fn dogs_is_possessive_nominal() {
assert!(md("dog's").is_possessive_nominal());
}
#[test]
fn microsofts_is_possessive_nominal() {
assert!(md("Microsoft's").is_possessive_nominal());
}
}
mod adjective {
use crate::{Degree, dict_word_metadata::tests::md};
#[test]
#[ignore = "not marked yet because it might not be reliable"]
fn big_is_positive() {
assert_eq!(md("big").get_degree(), Some(Degree::Positive));
}
#[test]
fn bigger_is_comparative() {
assert_eq!(md("bigger").get_degree(), Some(Degree::Comparative));
}
#[test]
fn biggest_is_superlative() {
assert_eq!(md("biggest").get_degree(), Some(Degree::Superlative));
}
#[test]
#[should_panic(expected = "Word 'bigly' not found in dictionary")]
fn bigly_is_not_an_adjective_form_we_track() {
assert_eq!(md("bigly").get_degree(), None);
}
#[test]
fn bigger_is_comparative_adjective() {
assert!(md("bigger").is_comparative_adjective());
}
#[test]
fn biggest_is_superlative_adjective() {
assert!(md("biggest").is_superlative_adjective());
}
}
#[test]
fn the_is_determiner() {
assert!(md("the").is_determiner());
}
#[test]
fn this_is_demonstrative_determiner() {
assert!(md("this").is_demonstrative_determiner());
}
#[test]
fn your_is_possessive_determiner() {
assert!(md("your").is_possessive_determiner());
}
#[test]
fn every_is_quantifier() {
assert!(md("every").is_quantifier());
}
#[test]
fn the_isnt_quantifier() {
assert!(!md("the").is_quantifier());
}
#[test]
fn equipment_is_mass_noun() {
assert!(md("equipment").is_mass_noun());
}
#[test]
fn equipment_is_non_countable_noun() {
assert!(md("equipment").is_non_countable_noun());
}
#[test]
fn equipment_isnt_countable_noun() {
assert!(!md("equipment").is_countable_noun());
}
mod verb {
use crate::dict_word_metadata::tests::md;
#[test]
fn lemma_walk() {
let md = md("walk");
assert!(md.is_verb_lemma())
}
#[test]
fn lemma_fix() {
let md = md("fix");
assert!(md.is_verb_lemma())
}
#[test]
fn progressive_walking() {
let md = md("walking");
assert!(md.is_verb_progressive_form())
}
#[test]
fn past_walked() {
let md = md("walked");
assert!(md.is_verb_past_form())
}
#[test]
fn simple_past_ate() {
let md = md("ate");
assert!(md.is_verb_simple_past_form())
}
#[test]
fn past_participle_eaten() {
let md = md("eaten");
assert!(md.is_verb_past_participle_form())
}
#[test]
fn third_pers_sing_walks() {
let md = md("walks");
assert!(md.is_verb_third_person_singular_present_form())
}
}
}