harper_core/
word_metadata.rs

1use is_macro::Is;
2use paste::paste;
3use serde::{Deserialize, Serialize};
4use strum_macros::EnumString;
5
6use crate::WordId;
7
8#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
9pub struct WordMetadata {
10    pub noun: Option<NounData>,
11    pub pronoun: Option<PronounData>,
12    pub verb: Option<VerbData>,
13    pub adjective: Option<AdjectiveData>,
14    pub adverb: Option<AdverbData>,
15    pub conjunction: Option<ConjunctionData>,
16    pub swear: Option<bool>,
17    /// The dialect this word belongs to.
18    /// If no dialect is defined, it can be assumed that the word is
19    /// valid in all dialects of English.
20    pub dialect: Option<Dialect>,
21    /// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
22    #[serde(default = "default_false")]
23    pub determiner: bool,
24    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
25    #[serde(default = "default_false")]
26    pub preposition: bool,
27    /// Whether the word is considered especially common.
28    #[serde(default = "default_false")]
29    pub common: bool,
30    #[serde(default = "default_none")]
31    pub derived_from: Option<WordId>,
32}
33
34/// Needed for `serde`
35fn default_false() -> bool {
36    false
37}
38
39/// Needed for `serde`
40fn default_none<T>() -> Option<T> {
41    None
42}
43
44macro_rules! generate_metadata_queries {
45    ($($category:ident has $($sub:ident),*).*) => {
46        paste! {
47            pub fn is_likely_homograph(&self) -> bool {
48                if [$($(self.[< is_ $sub _ $category >](),)*)*].iter().map(|b| *b as u8).sum::<u8>() > 1 {
49                    return true;
50                }
51
52                [self.determiner, self.preposition, $(
53                    self.[< is_ $category >](),
54                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
55            }
56
57            $(
58                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
59                pub fn [< is_ $category >](&self) -> bool {
60                    self.$category.is_some()
61                }
62
63                $(
64                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
65                    pub fn [< is_ $sub _ $category >](&self) -> bool {
66                        matches!(
67                            self.$category,
68                            Some([< $category:camel Data >]{
69                                [< is_ $sub >]: Some(true),
70                                ..
71                            })
72                        )
73                    }
74
75
76                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
77                    pub fn [< is_not_ $sub _ $category >](&self) -> bool {
78                        matches!(
79                            self.$category,
80                            Some([< $category:camel Data >]{
81                                [< is_ $sub >]: Some(false),
82                                ..
83                            })
84                        )
85                    }
86                )*
87            )*
88        }
89    };
90}
91
92impl WordMetadata {
93    /// Produce a copy of `self` with the known properties of `other` set.
94    pub fn or(&self, other: &Self) -> Self {
95        macro_rules! merge {
96            ($a:expr, $b:expr) => {
97                match ($a, $b) {
98                    (Some(a), Some(b)) => Some(a.or(&b)),
99                    (Some(a), None) => Some(a),
100                    (None, Some(b)) => Some(b),
101                    (None, None) => None,
102                }
103            };
104        }
105
106        Self {
107            noun: merge!(self.noun, other.noun),
108            pronoun: merge!(self.pronoun, other.pronoun),
109            verb: merge!(self.verb, other.verb),
110            adjective: merge!(self.adjective, other.adjective),
111            adverb: merge!(self.adverb, other.adverb),
112            conjunction: merge!(self.conjunction, other.conjunction),
113            dialect: self.dialect.or(other.dialect),
114            swear: self.swear.or(other.swear),
115            determiner: self.determiner || other.determiner,
116            preposition: self.preposition || other.preposition,
117            common: self.common || other.common,
118            derived_from: self.derived_from.or(other.derived_from),
119        }
120    }
121
122    generate_metadata_queries!(
123        noun has proper, plural, possessive.
124        pronoun has plural, possessive.
125        verb has linking, auxiliary.
126        conjunction has.
127        adjective has.
128        adverb has
129    );
130
131    /// Checks if the word is definitely nominalpro.
132    pub fn is_nominal(&self) -> bool {
133        self.noun.is_some() || self.pronoun.is_some()
134    }
135
136    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
137    pub fn is_plural_nominal(&self) -> bool {
138        matches!(
139            self.noun,
140            Some(NounData {
141                is_plural: Some(true),
142                ..
143            })
144        ) || matches!(
145            self.pronoun,
146            Some(PronounData {
147                is_plural: Some(true),
148                ..
149            })
150        )
151    }
152
153    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
154    pub fn is_possessive_nominal(&self) -> bool {
155        matches!(
156            self.noun,
157            Some(NounData {
158                is_possessive: Some(true),
159                ..
160            })
161        ) || matches!(
162            self.pronoun,
163            Some(PronounData {
164                is_possessive: Some(true),
165                ..
166            })
167        )
168    }
169
170    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
171    pub fn is_not_plural_nominal(&self) -> bool {
172        matches!(
173            self.noun,
174            Some(NounData {
175                is_plural: Some(false),
176                ..
177            })
178        ) || matches!(
179            self.pronoun,
180            Some(PronounData {
181                is_plural: Some(false),
182                ..
183            })
184        )
185    }
186
187    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
188    pub fn is_not_possessive_nominal(&self) -> bool {
189        matches!(
190            self.noun,
191            Some(NounData {
192                is_possessive: Some(false),
193                ..
194            })
195        ) && matches!(
196            self.pronoun,
197            Some(PronounData {
198                is_possessive: Some(false),
199                ..
200            })
201        )
202    }
203
204    /// Checks whether a word is _definitely_ a swear.
205    pub fn is_swear(&self) -> bool {
206        matches!(self.swear, Some(true))
207    }
208
209    /// Same thing as [`Self::or`], except in-place rather than a clone.
210    pub fn append(&mut self, other: &Self) -> &mut Self {
211        *self = self.or(other);
212        self
213    }
214}
215
216// TODO currently unused and probably should be changed to the forms of an inflected verb
217// TODO - (present, infinitive); -ed (past tense, past participle), -ing (present participle, continuous, progressive)
218// TODO irregular verbs can have different forms for past tense and past participle
219// TODO -ed forms can act as verbs and adjectives, -ing forms can act as verbs and nouns
220// TODO future shares a form with present/infinitive
221#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
222pub enum Tense {
223    // Past,
224    // Present,
225    // Future,
226}
227
228#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
229pub struct VerbData {
230    pub is_linking: Option<bool>,
231    pub is_auxiliary: Option<bool>,
232    pub tense: Option<Tense>,
233}
234
235impl VerbData {
236    /// Produce a copy of `self` with the known properties of `other` set.
237    pub fn or(&self, other: &Self) -> Self {
238        Self {
239            is_linking: self.is_linking.or(other.is_linking),
240            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
241            tense: self.tense.or(other.tense),
242        }
243    }
244}
245
246// TODO renamed from "noun" until refactoring is complete
247// TODO other noun properties may be worth adding:
248// TODO count vs mass; abstract
249#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
250pub struct NounData {
251    pub is_proper: Option<bool>,
252    pub is_plural: Option<bool>,
253    pub is_possessive: Option<bool>,
254}
255
256impl NounData {
257    /// Produce a copy of `self` with the known properties of `other` set.
258    pub fn or(&self, other: &Self) -> Self {
259        Self {
260            is_proper: self.is_proper.or(other.is_proper),
261            is_plural: self.is_plural.or(other.is_plural),
262            is_possessive: self.is_possessive.or(other.is_possessive),
263        }
264    }
265}
266
267// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
268#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
269pub enum Person {
270    First,
271    Second,
272    Third,
273}
274
275// case is a property of pronouns
276#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
277pub enum Case {
278    Subject,
279    Object,
280}
281
282// TODO for now focused on personal pronouns?
283#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
284pub struct PronounData {
285    pub is_plural: Option<bool>,
286    pub is_possessive: Option<bool>,
287    pub person: Option<Person>,
288    pub case: Option<Case>,
289}
290
291impl PronounData {
292    /// Produce a copy of `self` with the known properties of `other` set.
293    pub fn or(&self, other: &Self) -> Self {
294        Self {
295            is_plural: self.is_plural.or(other.is_plural),
296            is_possessive: self.is_possessive.or(other.is_possessive),
297            person: self.person.or(other.person),
298            case: self.case.or(other.case),
299        }
300    }
301}
302
303// Degree is a property of adjectives: positive is not inflected
304// Comparative is inflected with -er or comes after the word "more"
305// Superlative is inflected with -est or comes after the word "most"
306#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
307pub enum Degree {
308    Positive,
309    Comparative,
310    Superlative,
311}
312
313// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
314// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
315// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
316#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
317pub struct AdjectiveData {
318    pub degree: Option<Degree>,
319}
320
321impl AdjectiveData {
322    /// Produce a copy of `self` with the known properties of `other` set.
323    pub fn or(&self, other: &Self) -> Self {
324        Self {
325            degree: self.degree.or(other.degree),
326        }
327    }
328}
329
330// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
331// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
332// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
333#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
334pub struct AdverbData {}
335
336impl AdverbData {
337    /// Produce a copy of `self` with the known properties of `other` set.
338    pub fn or(&self, _other: &Self) -> Self {
339        Self {}
340    }
341}
342
343#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
344pub struct ConjunctionData {}
345
346impl ConjunctionData {
347    /// Produce a copy of `self` with the known properties of `other` set.
348    pub fn or(&self, _other: &Self) -> Self {
349        Self {}
350    }
351}
352
353/// A regional dialect.
354#[derive(
355    Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, EnumString,
356)]
357pub enum Dialect {
358    American,
359    Canadian,
360    Australian,
361    British,
362}