Skip to main content

wn_parser/
common.rs

1use serde_derive::{Serialize, Deserialize};
2
3#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
4pub enum PointerSymbol {
5    Antonym,               // !
6    Hypernym,              // @
7    InstanceHypernym,      // @i
8    Hyponym,               // ~
9    InstanceHyponym,       // ~i
10    Entailment,            // *
11    SimilarTo,             // &
12    MemberMeronym,         // #m
13    SubstanceMeronym,      // #s
14    PartMeronym,           // #p
15    MemberHolonym,         // %m
16    SubstanceHolonym,      // %s
17    PartHolonym,           // %p
18    Meronym,               // %
19    Holonym,               // #
20    CauseTo,               // >
21    ParticipleOf,          // <
22    SeeAlso,               // ^
23    Pertainym,             // \
24    Attribute,             // =
25    VerbGroup,             // $
26    DerivationallyRelated, // +
27    Classification,        // ;
28    ClassificationCategory,// ;c
29    ClassificationUsage,   // ;u
30    ClassificationRegional,// ;r
31    Class,                 // -
32    ClassCategory,         // -c
33    ClassUsage,            // -u
34    ClassRegional,         // -r
35    Unknown(String),
36}
37
38impl From<&str> for PointerSymbol {
39    fn from(s: &str) -> Self {
40        match s {
41            "!" => PointerSymbol::Antonym,
42            "@" => PointerSymbol::Hypernym,
43            "@i" => PointerSymbol::InstanceHypernym,
44            "~" => PointerSymbol::Hyponym,
45            "~i" => PointerSymbol::InstanceHyponym,
46            "*" => PointerSymbol::Entailment,
47            "&" => PointerSymbol::SimilarTo,
48            "#m" => PointerSymbol::MemberMeronym,
49            "#s" => PointerSymbol::SubstanceMeronym,
50            "#p" => PointerSymbol::PartMeronym,
51            "%m" => PointerSymbol::MemberHolonym,
52            "%s" => PointerSymbol::SubstanceHolonym,
53            "%p" => PointerSymbol::PartHolonym,
54            "%" => PointerSymbol::Meronym,
55            "#" => PointerSymbol::Holonym,
56            ">" => PointerSymbol::CauseTo,
57            "<" => PointerSymbol::ParticipleOf,
58            "^" => PointerSymbol::SeeAlso,
59            "\\" => PointerSymbol::Pertainym,
60            "=" => PointerSymbol::Attribute,
61            "$" => PointerSymbol::VerbGroup,
62            "+" => PointerSymbol::DerivationallyRelated,
63            ";" => PointerSymbol::Classification,
64            ";c" => PointerSymbol::ClassificationCategory,
65            ";u" => PointerSymbol::ClassificationUsage,
66            ";r" => PointerSymbol::ClassificationRegional,
67            "-" => PointerSymbol::Class,
68            "-c" => PointerSymbol::ClassCategory,
69            "-u" => PointerSymbol::ClassUsage,
70            "-r" => PointerSymbol::ClassRegional,
71            _ => PointerSymbol::Unknown(s.to_string()),
72        }
73    }
74}
75
76#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
77pub enum SynsetType {
78    Noun,         // n
79    Verb,         // v
80    Adjective,    // a
81    Adverb,       // r
82    AdjectiveSatellite, // s
83    Unknown(String),
84}
85
86impl From<&str> for SynsetType {
87    fn from(s: &str) -> Self {
88        match s {
89            "n" => SynsetType::Noun,
90            "v" => SynsetType::Verb,
91            "a" => SynsetType::Adjective,
92            "r" => SynsetType::Adverb,
93            "s" => SynsetType::AdjectiveSatellite,
94            _ => SynsetType::Unknown(s.to_string()),
95        }
96    }
97}
98
99#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
100pub struct Pointer {
101    pub symbol: PointerSymbol,
102    pub offset: u64,
103    pub pos: SynsetType,
104    pub source_target: (u16, u16), // Source and target word numbers
105}
106
107#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
108pub struct Word {
109    pub word: String,
110    pub lex_id: u8,
111}
112
113#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
114pub struct Frame {
115    pub frame_number: u16,
116    pub word_number: u16, // 0 means all words in the synset
117}
118
119#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
120pub struct Synset {
121    pub offset: u64,
122    pub lex_filenum: u8,
123    pub ss_type: SynsetType,
124    pub words: Vec<Word>,
125    pub pointers: Vec<Pointer>,
126    pub frames: Vec<Frame>,
127    pub gloss: String,
128}
129
130#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
131pub struct IndexEntry {
132    pub lemma: String,
133    pub pos: SynsetType,
134    pub synset_cnt: u32,
135    pub ptr_symbols: Vec<PointerSymbol>,
136    pub sense_cnt: u32,
137    pub tagsense_cnt: u32,
138    pub synset_offsets: Vec<u64>,
139}
140
141#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize)]
142pub struct SenseEntry {
143    pub sense_key: String,
144    pub synset_offset: u64,
145    pub sense_number: u32,
146    pub tag_cnt: u32,
147}