wareki_conv/conv.rs
1//! Converts Wareki (JIS X 0301) based date into ISO 8601 based one
2
3use chrono::prelude::*;
4use kana::*;
5use regex::Regex;
6
7const START_YEAR_OF_MEIJI: i32 = 1868;
8const START_YEAR_OF_TAISHO: i32 = 1912;
9const START_YEAR_OF_SHOWA: i32 = 1926;
10const START_YEAR_OF_HEISEI: i32 = 1989;
11const START_YEAR_OF_REIWA: i32 = 2019;
12
13/// Struct for date
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub struct Date {
16 /// Year
17 pub year: i32,
18 /// Month
19 pub month: u32,
20 /// Day
21 pub day: u32,
22}
23
24impl Date {
25 /// Returns new Date
26 pub fn new(year: i32, month: u32, day: u32) -> Self {
27 Self { year, month, day }
28 }
29 /// Returns year
30 pub fn year(&self) -> i32 {
31 self.year
32 }
33 /// Returns month
34 pub fn month(&self) -> u32 {
35 self.month
36 }
37 /// Returns day
38 pub fn day(&self) -> u32 {
39 self.day
40 }
41}
42
43/// List of Gengo
44#[derive(Clone, Debug, PartialEq, Eq)]
45pub enum Gengo {
46 /// Meiji
47 Meiji,
48 /// Taisho
49 Taisho,
50 /// Showa
51 Showa,
52 /// Heisei
53 Heisei,
54 /// Reiwa
55 Reiwa,
56}
57
58impl Gengo {
59 /// Returns the first year of the Gengo
60 ///
61 /// ```rust
62 /// use wareki_conv::conv::Gengo;
63 ///
64 /// assert_eq!(Gengo::Meiji.first_year(), 1868)
65 /// ```
66 pub const fn first_year(&self) -> i32 {
67 match *self {
68 Gengo::Meiji => START_YEAR_OF_MEIJI,
69 Gengo::Taisho => START_YEAR_OF_TAISHO,
70 Gengo::Showa => START_YEAR_OF_SHOWA,
71 Gengo::Heisei => START_YEAR_OF_HEISEI,
72 Gengo::Reiwa => START_YEAR_OF_REIWA,
73 }
74 }
75
76 /// Get the name of the Gengo
77 ///
78 /// ```rust
79 /// use wareki_conv::conv::Gengo;
80 ///
81 /// assert_eq!(Gengo::Meiji.name(), "Meiji")
82 /// ```
83 pub const fn name(&self) -> &'static str {
84 match *self {
85 Gengo::Meiji => "Meiji",
86 Gengo::Taisho => "Taisho",
87 Gengo::Showa => "Showa",
88 Gengo::Heisei => "Heisei",
89 Gengo::Reiwa => "Reiwa",
90 }
91 }
92}
93
94/// Date type
95///
96/// Each type has following format:
97///
98/// | Type | Format Example |
99/// | :---------------------------------- | :-------------- |
100/// | `JisX0301Basic` | `01.02.03` |
101/// | `JisX0301Extended` | `R01.02.03` |
102/// | `JisX0301ExtendedWithKanji` | `令01.02.03` |
103/// | `SeparatedWithKanji` | `令和1年2月3日` |
104///
105/// ## Remark
106/// JIS X 0301 requires each value (year, month and day) to be padded with 0
107/// if it is 1-digit value.
108///
109/// Ref: <https://kikakurui.com/x0/X0301-2019-01.html>
110///
111/// This library also accepts un-padded value because 0-padding is not always
112/// complied even in official documents.
113#[derive(PartialEq, Eq, Copy, Clone, Debug, Hash, PartialOrd, Ord)]
114pub enum DateType {
115 JisX0301Basic,
116 JisX0301Extended,
117 JisX0301ExtendedWithKanji,
118 SeparatedWithKanji,
119}
120
121/// Normalize input data
122///
123/// Japanese character has Zenkaku(Full-width) and Hankaku(Half-width) mode. For
124/// example,
125///
126/// | Zenkaku | Hankaku |
127/// | :-----: | :-----: |
128/// | `1` | `1` |
129/// | `A` | `A` |
130///
131/// Input data should be normalized beforehand because both are often used in
132/// common.
133///
134/// ## Example
135/// ```rust
136/// use wareki_conv::conv::to_half_width;
137///
138/// assert_eq!(to_half_width("R01.02.03"), "R01.02.03");
139/// assert_eq!(to_half_width("昭和15年12月31日"), "昭和15年12月31日");
140/// ```
141pub fn to_half_width(input: &str) -> String {
142 wide2ascii(input)
143}
144
145/// Finds structure type by pattern matching
146///
147/// ## Example:
148/// ```rust
149/// use wareki_conv::conv::find_type;
150/// use wareki_conv::conv::DateType;
151///
152/// assert_eq!(
153/// find_type("R01.02.03").unwrap(),
154/// Some(DateType::JisX0301Extended)
155/// )
156/// ```
157pub fn find_type(wareki: &str) -> Result<Option<DateType>, regex::Error> {
158 let wareki_half = to_half_width(wareki);
159 let elm: Vec<&str> = wareki_half.split('.').collect();
160 let re_begin_with_digit = Regex::new(r"^\d")?;
161 let re_begin_with_char = Regex::new(r"^(M|T|S|H|R)")?;
162 let re_begin_with_kanji = Regex::new(r"^(明|大|昭|平|令)")?;
163 let re_separated_with_kanji = Regex::new(r"^(明治|大正|昭和|平成|令和)\d+年\d+月\d+日")?;
164
165 if elm.len() == 1 {
166 // A minimum syntax assertion
167 assert!(re_separated_with_kanji.is_match(elm.get(0).unwrap()));
168 return Ok(Some(DateType::SeparatedWithKanji));
169 }
170
171 assert_eq!(elm.len(), 3);
172 let date_type = match elm.get(0) {
173 Some(x) if re_begin_with_digit.is_match(x) => Some(DateType::JisX0301Basic),
174 Some(x) if re_begin_with_char.is_match(x) => Some(DateType::JisX0301Extended),
175 Some(x) if re_begin_with_kanji.is_match(x) => Some(DateType::JisX0301ExtendedWithKanji),
176 _ => None,
177 };
178
179 Ok(date_type)
180}
181
182/// Maps meta character to corresponding Gengo
183///
184/// ## Example
185/// ```rust
186/// use wareki_conv::conv::gengo_resolve;
187/// use wareki_conv::conv::Gengo;
188///
189/// assert_eq!(gengo_resolve("R01.02.03"), Some(Gengo::Reiwa))
190/// ```
191pub fn gengo_resolve(wareki: &str) -> Option<Gengo> {
192 let meiji = vec!['M', '明'];
193 let taisho = vec!['T', '大'];
194 let showa = vec!['S', '昭'];
195 let heisei = vec!['H', '平'];
196 #[allow(unused_variables)]
197 // Currently, date with no meta attribute is mapped to this value.
198 let reiwa = vec!['R', '令'];
199
200 let wareki_half = to_half_width(wareki);
201 let first_char = wareki_half.chars().nth(0);
202 let gengo = match first_char {
203 Some(x) if meiji.contains(&x) => Some(Gengo::Meiji),
204 Some(x) if taisho.contains(&x) => Some(Gengo::Taisho),
205 Some(x) if showa.contains(&x) => Some(Gengo::Showa),
206 Some(x) if heisei.contains(&x) => Some(Gengo::Heisei),
207 // If no meta attribute is appended, the Gengo is assumed to be the current one.
208 _ => Some(Gengo::Reiwa),
209 };
210
211 gengo
212}
213
214/// Converts Wareki (JIS X 0301) based date into ISO based one
215///
216/// Adding to the JIS X 0301 standard, some additional features are
217/// implemented for utility. such as:
218/// * Accepts Full-width numbers
219/// * Full-width numbers are also used along with Half-width.
220/// * Accepts Non 0-padded patterns
221/// * A leading 0 is generally omitted in practical use.
222/// * Accepts first year notation in `"元年"`
223/// * NOTE: In Japanese calendar system, the first year of each Gengo(元号; An
224/// era name) is sometimes noted in `"元年"` instead of `<Era name>1年`.
225///
226/// ## Example
227/// ```rust
228/// use chrono::prelude::*;
229/// use wareki_conv::conv::convert;
230///
231/// assert_eq!(
232/// convert("明治1年2月3日").unwrap(),
233/// Some(Utc.with_ymd_and_hms(1868, 2, 3, 0, 0, 0).unwrap())
234/// );
235///
236/// assert_eq!(
237/// convert("明治元年2月3日").unwrap(),
238/// Some(Utc.with_ymd_and_hms(1868, 2, 3, 0, 0, 0).unwrap())
239/// );
240///
241/// assert_eq!(
242/// convert("令01.02.03").unwrap(),
243/// Some(Utc.with_ymd_and_hms(2019, 2, 3, 0, 0, 0).unwrap())
244/// );
245/// ```
246///
247/// ## Remark
248/// Actually, the first day of each era is not January 1 and it differs for each
249/// era. For example, the first day of the Heisei is January 8. This
250/// library does not take such conditions into account and assumes that the
251/// input values are correct.
252pub fn convert(wareki: &str) -> Result<Option<DateTime<Utc>>, regex::Error> {
253 let mut wareki_half = to_half_width(wareki);
254 // Replace `"元年"` to `"1年"`
255 wareki_half = wareki_half.replace("元", "1");
256 let date_type = match find_type(&wareki_half) {
257 Ok(x) => x,
258 Err(e) => return Err(e),
259 };
260 let gengo = gengo_resolve(&wareki_half);
261 let ymd_elements: Vec<u32>;
262
263 match date_type {
264 Some(DateType::SeparatedWithKanji) => {
265 let tmp: String = wareki_half
266 .chars()
267 .skip(2)
268 .filter(|x| x != &'日')
269 .map(|x| if x.is_ascii_digit() { x } else { '.' })
270 .collect();
271 ymd_elements = tmp
272 .split('.')
273 .into_iter()
274 .map(|x| x.parse().unwrap())
275 .collect();
276 assert_eq!(ymd_elements.len(), 3);
277 }
278 Some(DateType::JisX0301Basic) => {
279 ymd_elements = wareki_half
280 .split('.')
281 .into_iter()
282 .map(|x| x.parse().unwrap())
283 .collect();
284 assert_eq!(ymd_elements.len(), 3);
285 }
286 Some(DateType::JisX0301Extended) | Some(DateType::JisX0301ExtendedWithKanji) => {
287 ymd_elements = wareki_half
288 .chars()
289 .skip(1)
290 .collect::<String>()
291 .split('.')
292 .into_iter()
293 .map(|x| x.parse().unwrap())
294 .collect();
295 assert_eq!(ymd_elements.len(), 3);
296 }
297 None => return Ok(None),
298 }
299
300 // Converts year corresponding to Gengo
301 let year = match gengo {
302 Some(Gengo::Meiji) => {
303 ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Meiji) - 1
304 }
305 Some(Gengo::Taisho) => {
306 ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Taisho) - 1
307 }
308 Some(Gengo::Showa) => {
309 ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Showa) - 1
310 }
311 Some(Gengo::Heisei) => {
312 ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Heisei) - 1
313 }
314 Some(Gengo::Reiwa) => {
315 ymd_elements.get(0).unwrap().clone() as i32 + Gengo::first_year(&Gengo::Reiwa) - 1
316 }
317 None => return Ok(None),
318 };
319
320 let date = Date::new(
321 year,
322 *ymd_elements.get(1).unwrap(),
323 *ymd_elements.get(2).unwrap(),
324 );
325
326 let date_time: DateTime<Utc> = Utc
327 .with_ymd_and_hms(date.year(), date.month(), date.day(), 00, 00, 00)
328 .unwrap();
329
330 Ok(Some(date_time))
331}