jmdict_enums/
lib.rs

1/*******************************************************************************
2* Copyright 2021 Stefan Majewsky <majewsky@gmx.net>
3* SPDX-License-Identifier: Apache-2.0
4* Refer to the file "LICENSE" for details.
5*******************************************************************************/
6
7//! Autogenerated enums for the `jmdict` crate.
8//!
9//! This code is in a separate crate because, if we put it in the `jmdict` crate itself, its
10//! `build.rs` could not import it.
11//!
12//! # Compatibility promise
13//!
14//! **There is none.** This crate can disappear at any time if we choose to restructure the build
15//! system for the `jmdict` crate. To use the types from this crate, look at the re-exports of the
16//! same name in [the `jmdict` crate](https://docs.rs/jmdict/).
17
18///Error type for all enum conversions of the form `impl TryFrom<AllFoo> for Foo`.
19///
20///The error is returned for variants from the full enum that are disabled in the main enum because
21///of the compile-time configuration. For example:
22///
23///```
24///# use jmdict_enums::*;
25///use std::convert::TryInto;
26///let val: Result<PartOfSpeech, _> = AllPartOfSpeech::NariAdjective.try_into();
27///#[cfg(feature = "scope-archaic")]
28///assert_eq!(val, Ok(PartOfSpeech::NariAdjective));
29///#[cfg(not(feature = "scope-archaic"))]
30///assert_eq!(val, Err(DisabledVariant));
31///```
32#[derive(Clone, Copy, Default, Hash, PartialEq, Eq, Debug)]
33pub struct DisabledVariant;
34
35///Internal helper functions for serialization and deserialization of enum values.
36///
37///This is an internal trait; it is not re-exported by the `jmdict` crate and thus not part of the
38///public API.
39pub trait EnumPayload {
40    fn to_u32(&self) -> u32;
41    fn from_u32(code: u32) -> Self;
42}
43
44///Common methods provided by all enums in this crate.
45pub trait Enum: Sized {
46    ///Returns a list of all variant values in this enum. No particular order is guaranteed or
47    ///implied.
48    fn all_variants() -> &'static [Self];
49
50    ///Returns the string that marks this enum variant in the JMdict. For values that JMdict
51    ///represents as XML entities, only the entity name is returned, e.g. `adj-n` instead of
52    ///`&adj-n;`.
53    fn code(&self) -> &'static str;
54
55    ///Parses a representation from the JMdict file into a value of this enum. This is the reverse
56    ///of `self.code()`, i.e. `Self::from_code(self.code()) == Some(self)`.
57    fn from_code(code: &str) -> Option<Self>;
58
59    ///Returns the variant name. This is used to generate Rust code for this enum. The `impl
60    ///Display` for enums uses this same representation.
61    fn constant_name(&self) -> &'static str;
62
63    ///Returns the variant that is identified the given name in Rust code, or `None` if there is no
64    ///such variant. This is the reverse of `self.constant_name()`, i.e.
65    ///`Self::from_constant_name(self.constant_name()) == Some(self)`.
66    fn from_constant_name(name: &str) -> Option<Self>;
67}
68
69///PriorityInCorpus appears in struct [Priority]. It describes how often a dictionary entry
70///appears in a certain corpus of text.
71#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
72pub enum PriorityInCorpus {
73    ///The vocabulary appears often within the given corpus.
74    Primary,
75    ///The vocabulary appears within the given corpus, but not particularly often.
76    Secondary,
77    ///The vocabulary does not appear in the given corpus. This is the `Default::default()` value.
78    Absent,
79}
80
81impl Default for PriorityInCorpus {
82    fn default() -> Self {
83        Self::Absent
84    }
85}
86
87impl PriorityInCorpus {
88    fn to_repr(&self) -> u32 {
89        match *self {
90            Self::Absent => 0,
91            Self::Primary => 1,
92            Self::Secondary => 2,
93        }
94    }
95
96    fn from_repr(code: u32) -> Self {
97        match code {
98            0 => Self::Absent,
99            1 => Self::Primary,
100            2 => Self::Secondary,
101            _ => panic!("invalid PriorityInCorpus code: {}", code),
102        }
103    }
104}
105
106///Relative priority of a ReadingElement or KanjiElement.
107///
108///The various fields indicate if the vocabulary appears in various references, which can be taken
109///as an indivication of the frequency with which it is used.
110///
111///For the sake of encoding efficiency, this struct is not a perfect representation of the data in
112///the JMdict. Some entries in the JMdict are marked with contradictory priority information. In
113///this case, `Priority` will only contain the values corresponding to the highest priority. For
114///example, a priority of `ichi1,ichi2,news1,nf09` is represented as:
115///
116///```
117///# use jmdict_enums::{PriorityInCorpus::*, Priority};
118///let p = Priority {
119///    news: Primary,
120///    ichimango: Primary, //"ichi2" gets ignored
121///    loanwords: Absent,
122///    additional: Absent,
123///    frequency_bucket: 9,
124///};
125///```
126#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
127pub struct Priority {
128    ///If not `Absent`, this vocabulary appears in the wordfreq file compiled by Alexandre Girardi
129    ///from the Mainichi Shimbun. (A copy of the file can be obtained from the EDRDG.)
130    pub news: PriorityInCorpus,
131    ///If not `Absent`, this vocabulary appears in the book "1万語語彙分類集" (Ichimango goi
132    ///bunruishuu) by Senmon Kyouiku Publishing, Tokyo, 1998. The entries with priority `Secondary`
133    ///were demoted from `Primary` because they were observed to have low frequencies in the WWW
134    ///and newspapers.
135    pub ichimango: PriorityInCorpus,
136    ///If not `Absent`, this vocabulary is a common loanword that appears in the wordfreq file.
137    pub loanwords: PriorityInCorpus,
138    ///This covers a small number of words when they are detected as being common, but are not
139    ///included in the above corpora.
140    pub additional: PriorityInCorpus,
141    ///If `self.news != Absent`, this field contains a value between 1 and 48, indicating the
142    ///frequency-of-use ranking for this vocabulary in the wordfreq file. The value 1 is used for
143    ///the 500 most common words, the value 2 is used for the 500 next most common words, and so
144    ///on. If `self.news == Absent`, this value will be 0.
145    pub frequency_bucket: u16,
146}
147
148impl Priority {
149    ///Indicates whether this is a common vocabulary. This follows the same logic as the `(P)`
150    ///markers in the EDICT and EDICT2 files: A word is common if any of its `PriorityInCorpus`
151    ///fields is `Primary`, or if `self.additional == Secondary`.
152    pub fn is_common(&self) -> bool {
153        use PriorityInCorpus::*;
154        self.news == Primary
155            || self.ichimango == Primary
156            || self.loanwords == Primary
157            || self.additional != Absent
158    }
159}
160
161//Priority gets serialized into u32, same as the enum types. The lower 16 bits are used for the
162//frequency buckets. The higher 16 bits are evenly distributed among the four PriorityInCorpus
163//fields. The encoding could be denser if we wanted to, but u32 is the smallest encoding unit
164//available to us anyway, so we don't need to bother.
165impl EnumPayload for Priority {
166    fn to_u32(&self) -> u32 {
167        let mut result = self.frequency_bucket as u32;
168        result |= self.news.to_repr() << 16;
169        result |= self.ichimango.to_repr() << 20;
170        result |= self.loanwords.to_repr() << 24;
171        result |= self.additional.to_repr() << 28;
172        result
173    }
174
175    fn from_u32(code: u32) -> Self {
176        Self {
177            news: PriorityInCorpus::from_repr((code & 0xF0000) >> 16),
178            ichimango: PriorityInCorpus::from_repr((code & 0xF00000) >> 20),
179            loanwords: PriorityInCorpus::from_repr((code & 0xF000000) >> 24),
180            additional: PriorityInCorpus::from_repr((code & 0xF0000000) >> 28),
181            frequency_bucket: (code & 0xFFFF) as u16,
182        }
183    }
184}
185
186include!(concat!(env!("OUT_DIR"), "/generated.rs"));