harper_pos_utils/
upos.rs

1use std::fmt::Display;
2
3use is_macro::Is;
4use serde::{Deserialize, Serialize};
5use strum_macros::{AsRefStr, EnumIter};
6
7/// Represents the universal parts of speech as outlined by [universaldependencies.org](https://universaldependencies.org/u/pos/index.html).
8#[derive(
9    Debug,
10    Default,
11    Hash,
12    Eq,
13    PartialEq,
14    Clone,
15    Copy,
16    EnumIter,
17    AsRefStr,
18    Serialize,
19    Deserialize,
20    PartialOrd,
21    Ord,
22    Is,
23)]
24pub enum UPOS {
25    /// Adjective
26    ADJ,
27    /// Adposition
28    ADP,
29    /// Adverb
30    ADV,
31    /// Auxiliary
32    AUX,
33    /// Coordinating conjunction
34    CCONJ,
35    /// Determiner
36    DET,
37    /// Interjection
38    INTJ,
39    /// Noun
40    #[default]
41    NOUN,
42    /// Numeral
43    NUM,
44    /// Particle
45    PART,
46    /// Pronoun
47    PRON,
48    /// Proper noun
49    PROPN,
50    /// Punctuation
51    PUNCT,
52    /// Subordinating conjunction
53    SCONJ,
54    /// Symbol
55    SYM,
56    /// Verb
57    VERB,
58}
59
60impl UPOS {
61    pub fn from_conllu(other: rs_conllu::UPOS) -> Option<Self> {
62        Some(match other {
63            rs_conllu::UPOS::ADJ => UPOS::ADJ,
64            rs_conllu::UPOS::ADP => UPOS::ADP,
65            rs_conllu::UPOS::ADV => UPOS::ADV,
66            rs_conllu::UPOS::AUX => UPOS::AUX,
67            rs_conllu::UPOS::CCONJ => UPOS::CCONJ,
68            rs_conllu::UPOS::DET => UPOS::DET,
69            rs_conllu::UPOS::INTJ => UPOS::INTJ,
70            rs_conllu::UPOS::NOUN => UPOS::NOUN,
71            rs_conllu::UPOS::NUM => UPOS::NUM,
72            rs_conllu::UPOS::PART => UPOS::PART,
73            rs_conllu::UPOS::PRON => UPOS::PRON,
74            rs_conllu::UPOS::PROPN => UPOS::PROPN,
75            rs_conllu::UPOS::PUNCT => UPOS::PUNCT,
76            rs_conllu::UPOS::SCONJ => UPOS::SCONJ,
77            rs_conllu::UPOS::SYM => UPOS::SYM,
78            rs_conllu::UPOS::VERB => UPOS::VERB,
79            rs_conllu::UPOS::X => return None,
80        })
81    }
82
83    pub fn is_nominal(&self) -> bool {
84        matches!(self, Self::NOUN | Self::PROPN | Self::PRON)
85    }
86}
87
88impl Display for UPOS {
89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90        let desc = match self {
91            UPOS::ADJ => "Adjective",
92            UPOS::ADP => "Adposition",
93            UPOS::ADV => "Adverb",
94            UPOS::AUX => "Auxiliary",
95            UPOS::CCONJ => "Coordinating conjunction",
96            UPOS::DET => "Determiner",
97            UPOS::INTJ => "Interjection",
98            UPOS::NOUN => "Noun",
99            UPOS::NUM => "Numeral",
100            UPOS::PART => "Particle",
101            UPOS::PRON => "Pronoun",
102            UPOS::PROPN => "Proper noun",
103            UPOS::PUNCT => "Punctuation",
104            UPOS::SCONJ => "Subordinating conjunction",
105            UPOS::SYM => "Symbol",
106            UPOS::VERB => "Verb",
107        };
108        write!(f, "{desc}")
109    }
110}