jmdict_enums/lib.rs
1/*******************************************************************************
2* Copyright 2021 Stefan Majewsky <majewsky@gmx.net>
3* SPDX-License-Identifier: Apache-2.0
4* Refer to the file "LICENSE" for details.
5*******************************************************************************/
6
7//! Autogenerated enums for the `jmdict` crate.
8//!
9//! This code is in a separate crate because, if we put it in the `jmdict` crate itself, its
10//! `build.rs` could not import it.
11//!
12//! # Compatibility promise
13//!
14//! **There is none.** This crate can disappear at any time if we choose to restructure the build
15//! system for the `jmdict` crate. To use the types from this crate, look at the re-exports of the
16//! same name in [the `jmdict` crate](https://docs.rs/jmdict/).
17
18///Error type for all enum conversions of the form `impl TryFrom<AllFoo> for Foo`.
19///
20///The error is returned for variants from the full enum that are disabled in the main enum because
21///of the compile-time configuration. For example:
22///
23///```
24///# use jmdict_enums::*;
25///use std::convert::TryInto;
26///let val: Result<PartOfSpeech, _> = AllPartOfSpeech::NariAdjective.try_into();
27///#[cfg(feature = "scope-archaic")]
28///assert_eq!(val, Ok(PartOfSpeech::NariAdjective));
29///#[cfg(not(feature = "scope-archaic"))]
30///assert_eq!(val, Err(DisabledVariant));
31///```
32#[derive(Clone, Copy, Default, Hash, PartialEq, Eq, Debug)]
33pub struct DisabledVariant;
34
35///Internal helper functions for serialization and deserialization of enum values.
36///
37///This is an internal trait; it is not re-exported by the `jmdict` crate and thus not part of the
38///public API.
39pub trait EnumPayload {
40 fn to_u32(&self) -> u32;
41 fn from_u32(code: u32) -> Self;
42}
43
44///Common methods provided by all enums in this crate.
45pub trait Enum: Sized {
46 ///Returns a list of all variant values in this enum. No particular order is guaranteed or
47 ///implied.
48 fn all_variants() -> &'static [Self];
49
50 ///Returns the string that marks this enum variant in the JMdict. For values that JMdict
51 ///represents as XML entities, only the entity name is returned, e.g. `adj-n` instead of
52 ///`&adj-n;`.
53 fn code(&self) -> &'static str;
54
55 ///Parses a representation from the JMdict file into a value of this enum. This is the reverse
56 ///of `self.code()`, i.e. `Self::from_code(self.code()) == Some(self)`.
57 fn from_code(code: &str) -> Option<Self>;
58
59 ///Returns the variant name. This is used to generate Rust code for this enum. The `impl
60 ///Display` for enums uses this same representation.
61 fn constant_name(&self) -> &'static str;
62
63 ///Returns the variant that is identified the given name in Rust code, or `None` if there is no
64 ///such variant. This is the reverse of `self.constant_name()`, i.e.
65 ///`Self::from_constant_name(self.constant_name()) == Some(self)`.
66 fn from_constant_name(name: &str) -> Option<Self>;
67}
68
69///PriorityInCorpus appears in struct [Priority]. It describes how often a dictionary entry
70///appears in a certain corpus of text.
71#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
72pub enum PriorityInCorpus {
73 ///The vocabulary appears often within the given corpus.
74 Primary,
75 ///The vocabulary appears within the given corpus, but not particularly often.
76 Secondary,
77 ///The vocabulary does not appear in the given corpus. This is the `Default::default()` value.
78 Absent,
79}
80
81impl Default for PriorityInCorpus {
82 fn default() -> Self {
83 Self::Absent
84 }
85}
86
87impl PriorityInCorpus {
88 fn to_repr(&self) -> u32 {
89 match *self {
90 Self::Absent => 0,
91 Self::Primary => 1,
92 Self::Secondary => 2,
93 }
94 }
95
96 fn from_repr(code: u32) -> Self {
97 match code {
98 0 => Self::Absent,
99 1 => Self::Primary,
100 2 => Self::Secondary,
101 _ => panic!("invalid PriorityInCorpus code: {}", code),
102 }
103 }
104}
105
106///Relative priority of a ReadingElement or KanjiElement.
107///
108///The various fields indicate if the vocabulary appears in various references, which can be taken
109///as an indivication of the frequency with which it is used.
110///
111///For the sake of encoding efficiency, this struct is not a perfect representation of the data in
112///the JMdict. Some entries in the JMdict are marked with contradictory priority information. In
113///this case, `Priority` will only contain the values corresponding to the highest priority. For
114///example, a priority of `ichi1,ichi2,news1,nf09` is represented as:
115///
116///```
117///# use jmdict_enums::{PriorityInCorpus::*, Priority};
118///let p = Priority {
119/// news: Primary,
120/// ichimango: Primary, //"ichi2" gets ignored
121/// loanwords: Absent,
122/// additional: Absent,
123/// frequency_bucket: 9,
124///};
125///```
126#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
127pub struct Priority {
128 ///If not `Absent`, this vocabulary appears in the wordfreq file compiled by Alexandre Girardi
129 ///from the Mainichi Shimbun. (A copy of the file can be obtained from the EDRDG.)
130 pub news: PriorityInCorpus,
131 ///If not `Absent`, this vocabulary appears in the book "1万語語彙分類集" (Ichimango goi
132 ///bunruishuu) by Senmon Kyouiku Publishing, Tokyo, 1998. The entries with priority `Secondary`
133 ///were demoted from `Primary` because they were observed to have low frequencies in the WWW
134 ///and newspapers.
135 pub ichimango: PriorityInCorpus,
136 ///If not `Absent`, this vocabulary is a common loanword that appears in the wordfreq file.
137 pub loanwords: PriorityInCorpus,
138 ///This covers a small number of words when they are detected as being common, but are not
139 ///included in the above corpora.
140 pub additional: PriorityInCorpus,
141 ///If `self.news != Absent`, this field contains a value between 1 and 48, indicating the
142 ///frequency-of-use ranking for this vocabulary in the wordfreq file. The value 1 is used for
143 ///the 500 most common words, the value 2 is used for the 500 next most common words, and so
144 ///on. If `self.news == Absent`, this value will be 0.
145 pub frequency_bucket: u16,
146}
147
148impl Priority {
149 ///Indicates whether this is a common vocabulary. This follows the same logic as the `(P)`
150 ///markers in the EDICT and EDICT2 files: A word is common if any of its `PriorityInCorpus`
151 ///fields is `Primary`, or if `self.additional == Secondary`.
152 pub fn is_common(&self) -> bool {
153 use PriorityInCorpus::*;
154 self.news == Primary
155 || self.ichimango == Primary
156 || self.loanwords == Primary
157 || self.additional != Absent
158 }
159}
160
161//Priority gets serialized into u32, same as the enum types. The lower 16 bits are used for the
162//frequency buckets. The higher 16 bits are evenly distributed among the four PriorityInCorpus
163//fields. The encoding could be denser if we wanted to, but u32 is the smallest encoding unit
164//available to us anyway, so we don't need to bother.
165impl EnumPayload for Priority {
166 fn to_u32(&self) -> u32 {
167 let mut result = self.frequency_bucket as u32;
168 result |= self.news.to_repr() << 16;
169 result |= self.ichimango.to_repr() << 20;
170 result |= self.loanwords.to_repr() << 24;
171 result |= self.additional.to_repr() << 28;
172 result
173 }
174
175 fn from_u32(code: u32) -> Self {
176 Self {
177 news: PriorityInCorpus::from_repr((code & 0xF0000) >> 16),
178 ichimango: PriorityInCorpus::from_repr((code & 0xF00000) >> 20),
179 loanwords: PriorityInCorpus::from_repr((code & 0xF000000) >> 24),
180 additional: PriorityInCorpus::from_repr((code & 0xF0000000) >> 28),
181 frequency_bucket: (code & 0xFFFF) as u16,
182 }
183 }
184}
185
186include!(concat!(env!("OUT_DIR"), "/generated.rs"));