wareki_conv/
conv.rs

1//! Converts Wareki (JIS X 0301) based date into ISO 8601 based one
2
3use chrono::prelude::*;
4use kana::*;
5use regex::Regex;
6
7const START_YEAR_OF_MEIJI: i32 = 1868;
8const START_YEAR_OF_TAISHO: i32 = 1912;
9const START_YEAR_OF_SHOWA: i32 = 1926;
10const START_YEAR_OF_HEISEI: i32 = 1989;
11const START_YEAR_OF_REIWA: i32 = 2019;
12
13/// Struct for date
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub struct Date {
16    /// Year
17    pub year: i32,
18    /// Month
19    pub month: u32,
20    /// Day
21    pub day: u32,
22}
23
24impl Date {
25    /// Returns new Date
26    pub fn new(year: i32, month: u32, day: u32) -> Self {
27        Self { year, month, day }
28    }
29    /// Returns year
30    pub fn year(&self) -> i32 {
31        self.year
32    }
33    /// Returns month
34    pub fn month(&self) -> u32 {
35        self.month
36    }
37    /// Returns day
38    pub fn day(&self) -> u32 {
39        self.day
40    }
41}
42
43/// List of Gengo
44#[derive(Clone, Debug, PartialEq, Eq)]
45pub enum Gengo {
46    /// Meiji
47    Meiji,
48    /// Taisho
49    Taisho,
50    /// Showa
51    Showa,
52    /// Heisei
53    Heisei,
54    /// Reiwa
55    Reiwa,
56}
57
58impl Gengo {
59    /// Returns the first year of the Gengo
60    ///
61    /// ```rust
62    /// use wareki_conv::conv::Gengo;
63    ///
64    /// assert_eq!(Gengo::Meiji.first_year(), 1868)
65    /// ```
66    pub const fn first_year(&self) -> i32 {
67        match *self {
68            Gengo::Meiji => START_YEAR_OF_MEIJI,
69            Gengo::Taisho => START_YEAR_OF_TAISHO,
70            Gengo::Showa => START_YEAR_OF_SHOWA,
71            Gengo::Heisei => START_YEAR_OF_HEISEI,
72            Gengo::Reiwa => START_YEAR_OF_REIWA,
73        }
74    }
75
76    /// Get the name of the Gengo
77    ///
78    /// ```rust
79    /// use wareki_conv::conv::Gengo;
80    ///
81    /// assert_eq!(Gengo::Meiji.name(), "Meiji")
82    /// ```
83    pub const fn name(&self) -> &'static str {
84        match *self {
85            Gengo::Meiji => "Meiji",
86            Gengo::Taisho => "Taisho",
87            Gengo::Showa => "Showa",
88            Gengo::Heisei => "Heisei",
89            Gengo::Reiwa => "Reiwa",
90        }
91    }
92}
93
94/// Date type
95///
96/// Each type has following format:
97///
98/// |                Type                 | Format Example  |
99/// | :---------------------------------- | :-------------- |
100/// |           `JisX0301Basic`           |   `01.02.03`    |
101/// |         `JisX0301Extended`          |   `R01.02.03`   |
102/// |     `JisX0301ExtendedWithKanji`     |  `令01.02.03`   |
103/// |        `SeparatedWithKanji`         | `令和1年2月3日` |
104///
105/// ## Remark
106/// JIS X 0301 requires each value (year, month and day) to be padded with 0
107/// if it is 1-digit value.
108///
109/// Ref: <https://kikakurui.com/x0/X0301-2019-01.html>
110///
111/// This library also accepts un-padded value because 0-padding is not always
112/// complied even in official documents.
113#[derive(PartialEq, Eq, Copy, Clone, Debug, Hash, PartialOrd, Ord)]
114pub enum DateType {
115    JisX0301Basic,
116    JisX0301Extended,
117    JisX0301ExtendedWithKanji,
118    SeparatedWithKanji,
119}
120
121/// Normalize input data
122///
123/// Japanese character has Zenkaku(Full-width) and Hankaku(Half-width) mode. For
124/// example,
125///
126/// | Zenkaku | Hankaku |
127/// | :-----: | :-----: |
128/// |  `1`   |   `1`   |
129/// |   `A`   |   `A`  |
130///
131/// Input data should be normalized beforehand because both are often used in
132/// common.
133///
134/// ## Example
135/// ```rust
136/// use wareki_conv::conv::to_half_width;
137///
138/// assert_eq!(to_half_width("R01.02.03"), "R01.02.03");
139/// assert_eq!(to_half_width("昭和15年12月31日"), "昭和15年12月31日");
140/// ```
141pub fn to_half_width(input: &str) -> String {
142    wide2ascii(input)
143}
144
145/// Finds structure type by pattern matching
146///
147/// ## Example:
148/// ```rust
149/// use wareki_conv::conv::find_type;
150/// use wareki_conv::conv::DateType;
151///
152/// assert_eq!(
153///     find_type("R01.02.03").unwrap(),
154///     Some(DateType::JisX0301Extended)
155/// )
156/// ```
157pub fn find_type(wareki: &str) -> Result<Option<DateType>, regex::Error> {
158    let wareki_half = to_half_width(wareki);
159    let elm: Vec<&str> = wareki_half.split('.').collect();
160    let re_begin_with_digit = Regex::new(r"^\d")?;
161    let re_begin_with_char = Regex::new(r"^(M|T|S|H|R)")?;
162    let re_begin_with_kanji = Regex::new(r"^(明|大|昭|平|令)")?;
163    let re_separated_with_kanji = Regex::new(r"^(明治|大正|昭和|平成|令和)\d+年\d+月\d+日")?;
164
165    if elm.len() == 1 {
166        // A minimum syntax assertion
167        assert!(re_separated_with_kanji.is_match(elm.get(0).unwrap()));
168        return Ok(Some(DateType::SeparatedWithKanji));
169    }
170
171    assert_eq!(elm.len(), 3);
172    let date_type = match elm.get(0) {
173        Some(x) if re_begin_with_digit.is_match(x) => Some(DateType::JisX0301Basic),
174        Some(x) if re_begin_with_char.is_match(x) => Some(DateType::JisX0301Extended),
175        Some(x) if re_begin_with_kanji.is_match(x) => Some(DateType::JisX0301ExtendedWithKanji),
176        _ => None,
177    };
178
179    Ok(date_type)
180}
181
182/// Maps meta character to corresponding Gengo
183///
184/// ## Example
185/// ```rust
186/// use wareki_conv::conv::gengo_resolve;
187/// use wareki_conv::conv::Gengo;
188///
189/// assert_eq!(gengo_resolve("R01.02.03"), Some(Gengo::Reiwa))
190/// ```
191pub fn gengo_resolve(wareki: &str) -> Option<Gengo> {
192    let meiji = vec!['M', '明'];
193    let taisho = vec!['T', '大'];
194    let showa = vec!['S', '昭'];
195    let heisei = vec!['H', '平'];
196    #[allow(unused_variables)]
197    // Currently, date with no meta attribute is mapped to this value.
198    let reiwa = vec!['R', '令'];
199
200    let wareki_half = to_half_width(wareki);
201    let first_char = wareki_half.chars().nth(0);
202    let gengo = match first_char {
203        Some(x) if meiji.contains(&x) => Some(Gengo::Meiji),
204        Some(x) if taisho.contains(&x) => Some(Gengo::Taisho),
205        Some(x) if showa.contains(&x) => Some(Gengo::Showa),
206        Some(x) if heisei.contains(&x) => Some(Gengo::Heisei),
207        // If no meta attribute is appended, the Gengo is assumed to be the current one.
208        _ => Some(Gengo::Reiwa),
209    };
210
211    gengo
212}
213
214/// Converts Wareki (JIS X 0301) based date into ISO based one
215///
216/// Adding to the JIS X 0301 standard, some additional features are
217/// implemented for utility. such as:
218/// * Accepts Full-width numbers
219///   * Full-width numbers are also used along with Half-width.
220/// * Accepts Non 0-padded patterns
221///   * A leading 0 is generally omitted in practical use.
222/// * Accepts first year notation in `"元年"`
223///   * NOTE: In Japanese calendar system, the first year of each Gengo(元号; An
224///     era name) is sometimes noted in `"元年"` instead of `<Era name>1年`.
225///
226/// ## Example
227/// ```rust
228/// use chrono::prelude::*;
229/// use wareki_conv::conv::convert;
230///
231/// assert_eq!(
232///     convert("明治1年2月3日").unwrap(),
233///     Some(Utc.with_ymd_and_hms(1868, 2, 3, 0, 0, 0).unwrap())
234/// );
235///
236/// assert_eq!(
237///     convert("明治元年2月3日").unwrap(),
238///     Some(Utc.with_ymd_and_hms(1868, 2, 3, 0, 0, 0).unwrap())
239/// );
240///
241/// assert_eq!(
242///     convert("令01.02.03").unwrap(),
243///     Some(Utc.with_ymd_and_hms(2019, 2, 3, 0, 0, 0).unwrap())
244/// );
245/// ```
246///
247/// ## Remark
248/// Actually, the first day of each era is not January 1 and it differs for each
249/// era. For example, the first day of the Heisei is January 8. This
250/// library does not take such conditions into account and assumes that the
251/// input values are correct.
252pub fn convert(wareki: &str) -> Result<Option<DateTime<Utc>>, regex::Error> {
253    let mut wareki_half = to_half_width(wareki);
254    // Replace `"元年"` to `"1年"`
255    wareki_half = wareki_half.replace("元", "1");
256    let date_type = match find_type(&wareki_half) {
257        Ok(x) => x,
258        Err(e) => return Err(e),
259    };
260    let gengo = gengo_resolve(&wareki_half);
261    let ymd_elements: Vec<u32>;
262
263    match date_type {
264        Some(DateType::SeparatedWithKanji) => {
265            let tmp: String = wareki_half
266                .chars()
267                .skip(2)
268                .filter(|x| x != &'日')
269                .map(|x| if x.is_ascii_digit() { x } else { '.' })
270                .collect();
271            ymd_elements = tmp
272                .split('.')
273                .into_iter()
274                .map(|x| x.parse().unwrap())
275                .collect();
276            assert_eq!(ymd_elements.len(), 3);
277        }
278        Some(DateType::JisX0301Basic) => {
279            ymd_elements = wareki_half
280                .split('.')
281                .into_iter()
282                .map(|x| x.parse().unwrap())
283                .collect();
284            assert_eq!(ymd_elements.len(), 3);
285        }
286        Some(DateType::JisX0301Extended) | Some(DateType::JisX0301ExtendedWithKanji) => {
287            ymd_elements = wareki_half
288                .chars()
289                .skip(1)
290                .collect::<String>()
291                .split('.')
292                .into_iter()
293                .map(|x| x.parse().unwrap())
294                .collect();
295            assert_eq!(ymd_elements.len(), 3);
296        }
297        None => return Ok(None),
298    }
299
300    // Converts year corresponding to Gengo
301    let year = match gengo {
302        Some(Gengo::Meiji) => {
303            ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Meiji) - 1
304        }
305        Some(Gengo::Taisho) => {
306            ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Taisho) - 1
307        }
308        Some(Gengo::Showa) => {
309            ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Showa) - 1
310        }
311        Some(Gengo::Heisei) => {
312            ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Heisei) - 1
313        }
314        Some(Gengo::Reiwa) => {
315            ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Reiwa) - 1
316        }
317        None => return Ok(None),
318    };
319
320    let date = Date::new(
321        year,
322        *ymd_elements.get(1).unwrap(),
323        *ymd_elements.get(2).unwrap(),
324    );
325
326    let date_time: DateTime<Utc> = Utc
327        .with_ymd_and_hms(date.year(), date.month(), date.day(), 00, 00, 00)
328        .unwrap();
329
330    Ok(Some(date_time))
331}