harper_core/
word_metadata.rs

1use is_macro::Is;
2use paste::paste;
3use serde::{Deserialize, Serialize};
4use strum_macros::{Display, EnumString};
5
6use crate::WordId;
7
8#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
9pub struct WordMetadata {
10    pub noun: Option<NounData>,
11    pub pronoun: Option<PronounData>,
12    pub verb: Option<VerbData>,
13    pub adjective: Option<AdjectiveData>,
14    pub adverb: Option<AdverbData>,
15    pub conjunction: Option<ConjunctionData>,
16    pub swear: Option<bool>,
17    /// The dialect this word belongs to.
18    /// If no dialect is defined, it can be assumed that the word is
19    /// valid in all dialects of English.
20    pub dialect: Option<Dialect>,
21    /// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
22    #[serde(default = "default_false")]
23    pub determiner: bool,
24    /// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
25    #[serde(default = "default_false")]
26    pub preposition: bool,
27    /// Whether the word is considered especially common.
28    #[serde(default = "default_false")]
29    pub common: bool,
30    #[serde(default = "default_none")]
31    pub derived_from: Option<WordId>,
32}
33
34/// Needed for `serde`
35fn default_false() -> bool {
36    false
37}
38
39/// Needed for `serde`
40fn default_none<T>() -> Option<T> {
41    None
42}
43
44macro_rules! generate_metadata_queries {
45    ($($category:ident has $($sub:ident),*).*) => {
46        paste! {
47            pub fn is_likely_homograph(&self) -> bool {
48                [self.determiner, self.preposition, $(
49                    self.[< is_ $category >](),
50                )*].iter().map(|b| *b as u8).sum::<u8>() > 1
51            }
52
53            $(
54                #[doc = concat!("Checks if the word is definitely a ", stringify!($category), ".")]
55                pub fn [< is_ $category >](&self) -> bool {
56                    self.$category.is_some()
57                }
58
59                $(
60                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as (a) ", stringify!($sub), ".")]
61                    pub fn [< is_ $sub _ $category >](&self) -> bool {
62                        matches!(
63                            self.$category,
64                            Some([< $category:camel Data >]{
65                                [< is_ $sub >]: Some(true),
66                                ..
67                            })
68                        )
69                    }
70
71
72                    #[doc = concat!("Checks if the word is definitely a ", stringify!($category), " and more specifically is labeled as __not__ (a) ", stringify!($sub), ".")]
73                    pub fn [< is_not_ $sub _ $category >](&self) -> bool {
74                        matches!(
75                            self.$category,
76                            Some([< $category:camel Data >]{
77                                [< is_ $sub >]: Some(false),
78                                ..
79                            })
80                        )
81                    }
82                )*
83            )*
84        }
85    };
86}
87
88impl WordMetadata {
89    /// Produce a copy of `self` with the known properties of `other` set.
90    pub fn or(&self, other: &Self) -> Self {
91        macro_rules! merge {
92            ($a:expr, $b:expr) => {
93                match ($a, $b) {
94                    (Some(a), Some(b)) => Some(a.or(&b)),
95                    (Some(a), None) => Some(a),
96                    (None, Some(b)) => Some(b),
97                    (None, None) => None,
98                }
99            };
100        }
101
102        Self {
103            noun: merge!(self.noun, other.noun),
104            pronoun: merge!(self.pronoun, other.pronoun),
105            verb: merge!(self.verb, other.verb),
106            adjective: merge!(self.adjective, other.adjective),
107            adverb: merge!(self.adverb, other.adverb),
108            conjunction: merge!(self.conjunction, other.conjunction),
109            dialect: self.dialect.or(other.dialect),
110            swear: self.swear.or(other.swear),
111            determiner: self.determiner || other.determiner,
112            preposition: self.preposition || other.preposition,
113            common: self.common || other.common,
114            derived_from: self.derived_from.or(other.derived_from),
115        }
116    }
117
118    generate_metadata_queries!(
119        noun has proper, plural, possessive.
120        pronoun has plural, possessive.
121        verb has linking, auxiliary.
122        conjunction has.
123        adjective has.
124        adverb has
125    );
126
127    pub fn is_present_tense_verb(&self) -> bool {
128        matches!(
129            self.verb,
130            Some(VerbData {
131                tense: Some(Tense::Present),
132                ..
133            })
134        )
135    }
136
137    /// Checks if the word is definitely nominalpro.
138    pub fn is_nominal(&self) -> bool {
139        self.noun.is_some() || self.pronoun.is_some()
140    }
141
142    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) plural.
143    pub fn is_plural_nominal(&self) -> bool {
144        matches!(
145            self.noun,
146            Some(NounData {
147                is_plural: Some(true),
148                ..
149            })
150        ) || matches!(
151            self.pronoun,
152            Some(PronounData {
153                is_plural: Some(true),
154                ..
155            })
156        )
157    }
158
159    /// Checks if the word is definitely a nominal and more specifically is labeled as (a) possessive.
160    pub fn is_possessive_nominal(&self) -> bool {
161        matches!(
162            self.noun,
163            Some(NounData {
164                is_possessive: Some(true),
165                ..
166            })
167        ) || matches!(
168            self.pronoun,
169            Some(PronounData {
170                is_possessive: Some(true),
171                ..
172            })
173        )
174    }
175
176    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) plural.
177    pub fn is_not_plural_nominal(&self) -> bool {
178        matches!(
179            self.noun,
180            Some(NounData {
181                is_plural: Some(false),
182                ..
183            })
184        ) || matches!(
185            self.pronoun,
186            Some(PronounData {
187                is_plural: Some(false),
188                ..
189            })
190        )
191    }
192
193    /// Checks if the word is definitely a nominal and more specifically is labeled as __not__ (a) possessive.
194    pub fn is_not_possessive_nominal(&self) -> bool {
195        matches!(
196            self.noun,
197            Some(NounData {
198                is_possessive: Some(false),
199                ..
200            })
201        ) && matches!(
202            self.pronoun,
203            Some(PronounData {
204                is_possessive: Some(false),
205                ..
206            })
207        )
208    }
209
210    /// Checks whether a word is _definitely_ a swear.
211    pub fn is_swear(&self) -> bool {
212        matches!(self.swear, Some(true))
213    }
214
215    /// Same thing as [`Self::or`], except in-place rather than a clone.
216    pub fn append(&mut self, other: &Self) -> &mut Self {
217        *self = self.or(other);
218        self
219    }
220}
221
222// TODO currently unused and probably should be changed to the forms of an inflected verb
223// TODO - (present, infinitive); -ed (past tense, past participle), -ing (present participle, continuous, progressive)
224// TODO irregular verbs can have different forms for past tense and past participle
225// TODO -ed forms can act as verbs and adjectives, -ing forms can act as verbs and nouns
226// TODO future shares a form with present/infinitive
227#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
228pub enum Tense {
229    Past,
230    Present,
231    // Future,
232}
233
234#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
235pub struct VerbData {
236    pub is_linking: Option<bool>,
237    pub is_auxiliary: Option<bool>,
238    pub tense: Option<Tense>,
239}
240
241impl VerbData {
242    /// Produce a copy of `self` with the known properties of `other` set.
243    pub fn or(&self, other: &Self) -> Self {
244        Self {
245            is_linking: self.is_linking.or(other.is_linking),
246            is_auxiliary: self.is_auxiliary.or(other.is_auxiliary),
247            tense: self.tense.or(other.tense),
248        }
249    }
250}
251
252// TODO renamed from "noun" until refactoring is complete
253// TODO other noun properties may be worth adding:
254// TODO count vs mass; abstract
255#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
256pub struct NounData {
257    pub is_proper: Option<bool>,
258    pub is_plural: Option<bool>,
259    pub is_possessive: Option<bool>,
260}
261
262impl NounData {
263    /// Produce a copy of `self` with the known properties of `other` set.
264    pub fn or(&self, other: &Self) -> Self {
265        Self {
266            is_proper: self.is_proper.or(other.is_proper),
267            is_plural: self.is_plural.or(other.is_plural),
268            is_possessive: self.is_possessive.or(other.is_possessive),
269        }
270    }
271}
272
273// Person is a property of pronouns; the verb 'be', plus all verbs reflect 3rd person singular with -s
274#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
275pub enum Person {
276    First,
277    Second,
278    Third,
279}
280
281// case is a property of pronouns
282#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
283pub enum Case {
284    Subject,
285    Object,
286}
287
288// TODO for now focused on personal pronouns?
289#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
290pub struct PronounData {
291    pub is_plural: Option<bool>,
292    pub is_possessive: Option<bool>,
293    pub person: Option<Person>,
294    pub case: Option<Case>,
295}
296
297impl PronounData {
298    /// Produce a copy of `self` with the known properties of `other` set.
299    pub fn or(&self, other: &Self) -> Self {
300        Self {
301            is_plural: self.is_plural.or(other.is_plural),
302            is_possessive: self.is_possessive.or(other.is_possessive),
303            person: self.person.or(other.person),
304            case: self.case.or(other.case),
305        }
306    }
307}
308
309// Degree is a property of adjectives: positive is not inflected
310// Comparative is inflected with -er or comes after the word "more"
311// Superlative is inflected with -est or comes after the word "most"
312#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
313pub enum Degree {
314    Positive,
315    Comparative,
316    Superlative,
317}
318
319// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
320// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
321// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
322#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
323pub struct AdjectiveData {
324    pub degree: Option<Degree>,
325}
326
327impl AdjectiveData {
328    /// Produce a copy of `self` with the known properties of `other` set.
329    pub fn or(&self, other: &Self) -> Self {
330        Self {
331            degree: self.degree.or(other.degree),
332        }
333    }
334}
335
336// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
337// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
338// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
339#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
340pub struct AdverbData {}
341
342impl AdverbData {
343    /// Produce a copy of `self` with the known properties of `other` set.
344    pub fn or(&self, _other: &Self) -> Self {
345        Self {}
346    }
347}
348
349#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
350pub struct ConjunctionData {}
351
352impl ConjunctionData {
353    /// Produce a copy of `self` with the known properties of `other` set.
354    pub fn or(&self, _other: &Self) -> Self {
355        Self {}
356    }
357}
358
359/// A regional dialect.
360#[derive(
361    Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, EnumString, Display,
362)]
363pub enum Dialect {
364    American,
365    Canadian,
366    Australian,
367    British,
368}