1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*******************************************************************************
* Copyright 2021 Stefan Majewsky <majewsky@gmx.net>
* SPDX-License-Identifier: Apache-2.0
* Refer to the file "LICENSE" for details.
*******************************************************************************/

//! Autogenerated enums for the `jmdict` crate.
//!
//! This code is in a separate crate because, if we put it in the `jmdict` crate itself, its
//! `build.rs` could not import it.
//!
//! # Compatibility promise
//!
//! **There is none.** This crate can disappear at any time if we choose to restructure the build
//! system for the `jmdict` crate. To use the types from this crate, look at the re-exports of the
//! same name in [the `jmdict` crate](https://docs.rs/jmdict/).

///Error type for all enum conversions of the form `impl TryFrom<AllFoo> for Foo`.
///
///The error is returned for variants from the full enum that are disabled in the main enum because
///of the compile-time configuration. For example:
///
///```
///# use jmdict_enums::*;
///use std::convert::TryInto;
///let val: Result<PartOfSpeech, _> = AllPartOfSpeech::NariAdjective.try_into();
///#[cfg(feature = "scope-archaic")]
///assert_eq!(val, Ok(PartOfSpeech::NariAdjective));
///#[cfg(not(feature = "scope-archaic"))]
///assert_eq!(val, Err(DisabledVariant));
///```
#[derive(Clone, Copy, Default, Hash, PartialEq, Eq, Debug)]
pub struct DisabledVariant;

///Internal helper functions for serialization and deserialization of enum values.
///
///This is an internal trait; it is not re-exported by the `jmdict` crate and thus not part of the
///public API.
pub trait EnumPayload {
    fn to_u32(&self) -> u32;
    fn from_u32(code: u32) -> Self;
}

///Common methods provided by all enums in this crate.
pub trait Enum: Sized {
    ///Returns the string that marks this enum variant in the JMdict. For values that JMdict
    ///represents as XML entities, only the entity name is returned, e.g. `adj-n` instead of
    ///`&adj-n;`.
    fn code(&self) -> &'static str;

    ///Parses a representation from the JMdict file into a value of this enum. This is the reverse
    ///of `self.code()`, i.e. `Self::from_code(self.code()) == Some(self)`.
    fn from_code(code: &str) -> Option<Self>;

    ///Returns the variant name. This is used to generate Rust code for this enum. The `impl
    ///Display` for enums uses this same representation.
    fn constant_name(&self) -> &'static str;
}

///PriorityInCorpus appears in struct [Priority]. It describes how often a dictionary entry
///appears in a certain corpus of text.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PriorityInCorpus {
    ///The vocabulary appears often within the given corpus.
    Primary,
    ///The vocabulary appears within the given corpus, but not particularly often.
    Secondary,
    ///The vocabulary does not appear in the given corpus. This is the `Default::default()` value.
    Absent,
}

impl Default for PriorityInCorpus {
    fn default() -> Self {
        Self::Absent
    }
}

impl PriorityInCorpus {
    fn to_repr(&self) -> u32 {
        match *self {
            Self::Absent => 0,
            Self::Primary => 1,
            Self::Secondary => 2,
        }
    }

    fn from_repr(code: u32) -> Self {
        match code {
            0 => Self::Absent,
            1 => Self::Primary,
            2 => Self::Secondary,
            _ => panic!("invalid PriorityInCorpus code: {}", code),
        }
    }
}

///Relative priority of a ReadingElement or KanjiElement.
///
///The various fields indicate if the vocabulary appears in various references, which can be taken
///as an indivication of the frequency with which it is used.
///
///For the sake of encoding efficiency, this struct is not a perfect representation of the data in
///the JMdict. Some entries in the JMdict are marked with contradictory priority information. In
///this case, `Priority` will only contain the values corresponding to the highest priority. For
///example, a priority of `ichi1,ichi2,news1,nf09` is represented as:
///
///```
///# use jmdict_enums::{PriorityInCorpus::*, Priority};
///let p = Priority {
///    news: Primary,
///    ichimango: Primary, //"ichi2" gets ignored
///    loanwords: Absent,
///    additional: Absent,
///    frequency_bucket: 9,
///};
///```
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
pub struct Priority {
    ///If not `Absent`, this vocabulary appears in the wordfreq file compiled by Alexandre Girardi
    ///from the Mainichi Shimbun. (A copy of the file can be obtained from the EDRDG.)
    pub news: PriorityInCorpus,
    ///If not `Absent`, this vocabulary appears in the book "1万語語彙分類集" (Ichimango goi
    ///bunruishuu) by Senmon Kyouiku Publishing, Tokyo, 1998. The entries with priority `Secondary`
    ///were demoted from `Primary` because they were observed to have low frequencies in the WWW
    ///and newspapers.
    pub ichimango: PriorityInCorpus,
    ///If not `Absent`, this vocabulary is a common loanword that appears in the wordfreq file.
    pub loanwords: PriorityInCorpus,
    ///This covers a small number of words when they are detected as being common, but are not
    ///included in the above corpora.
    pub additional: PriorityInCorpus,
    ///If `self.news != Absent`, this field contains a value between 1 and 48, indicating the
    ///frequency-of-use ranking for this vocabulary in the wordfreq file. The value 1 is used for
    ///the 500 most common words, the value 2 is used for the 500 next most common words, and so
    ///on. If `self.news == Absent`, this value will be 0.
    pub frequency_bucket: u16,
}

impl Priority {
    ///Indicates whether this is a common vocabulary. This follows the same logic as the `(P)`
    ///markers in the EDICT and EDICT2 files: A word is common if any of its `PriorityInCorpus`
    ///fields is `Primary`, or if `self.additional == Secondary`.
    pub fn is_common(&self) -> bool {
        use PriorityInCorpus::*;
        self.news == Primary
            || self.ichimango == Primary
            || self.loanwords == Primary
            || self.additional != Absent
    }
}

//Priority gets serialized into u32, same as the enum types. The lower 16 bits are used for the
//frequency buckets. The higher 16 bits are evenly distributed among the four PriorityInCorpus
//fields. The encoding could be denser if we wanted to, but u32 is the smallest encoding unit
//available to us anyway, so we don't need to bother.
impl EnumPayload for Priority {
    fn to_u32(&self) -> u32 {
        let mut result = self.frequency_bucket as u32;
        result |= self.news.to_repr() << 16;
        result |= self.ichimango.to_repr() << 20;
        result |= self.loanwords.to_repr() << 24;
        result |= self.additional.to_repr() << 28;
        result
    }

    fn from_u32(code: u32) -> Self {
        Self {
            news: PriorityInCorpus::from_repr((code & 0xF0000) >> 16),
            ichimango: PriorityInCorpus::from_repr((code & 0xF00000) >> 20),
            loanwords: PriorityInCorpus::from_repr((code & 0xF000000) >> 24),
            additional: PriorityInCorpus::from_repr((code & 0xF0000000) >> 28),
            frequency_bucket: (code & 0xFFFF) as u16,
        }
    }
}

include!(concat!(env!("OUT_DIR"), "/generated.rs"));