Skip to main content

whichtime_sys/parsers/ja/
standard_date.rs

1//! Japanese standard date parser
2//!
3//! Handles Japanese date formats like:
4//! - "2012年3月31日" (YYYY年M月D日)
5//! - "9月3日" (M月D日 with full-width numbers)
6//! - "平成26年12月29日" (Era year format)
7//! - "令和元年5月1日" (Reiwa era with gannen)
8//! - "同年7月27日", "本年7月27日", "今年7月27日" (relative year)
9//! - Date ranges: "2013年12月26日-2014年1月7日"
10
11use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::ja::to_hankaku;
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21// Pattern for standard Japanese date: [Era/Year]年[M]月[D]日
22static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(
24        r"(?:(?P<era>平成|令和|昭和|大正|明治)(?P<era_year>[0-90-9]+|元)年|(?P<year_prefix>同年|本年|今年)|(?P<year>[0-90-9]{2,4})年)?(?P<month>[0-90-9]{1,2})月(?P<day>[0-90-9]{1,2})日"
25    ).unwrap()
26});
27
28// Pattern for date range
29static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30    Regex::new(
31        r"(?P<year1>[0-90-9]{2,4})年(?P<month1>[0-90-9]{1,2})月(?P<day1>[0-90-9]{1,2})日\s*[-~~ー]\s*(?P<year2>[0-90-9]{2,4})年(?P<month2>[0-90-9]{1,2})月(?P<day2>[0-90-9]{1,2})日"
32    ).unwrap()
33});
34
35/// Japanese standard date parser
36pub struct JAStandardDateParser;
37
38impl JAStandardDateParser {
39    pub fn new() -> Self {
40        Self
41    }
42
43    fn parse_number(s: &str) -> i32 {
44        let hankaku = to_hankaku(s);
45        hankaku.parse().unwrap_or(0)
46    }
47
48    fn era_to_year(era: &str, era_year: &str) -> Option<i32> {
49        let year_num = if era_year == "元" {
50            1
51        } else {
52            Self::parse_number(era_year)
53        };
54
55        let base_year = match era {
56            "令和" => 2018, // Reiwa 1 = 2019
57            "平成" => 1988, // Heisei 1 = 1989
58            "昭和" => 1925, // Showa 1 = 1926
59            "大正" => 1911, // Taisho 1 = 1912
60            "明治" => 1867, // Meiji 1 = 1868
61            _ => return None,
62        };
63
64        Some(base_year + year_num)
65    }
66
67    fn is_valid_date(year: i32, month: i32, day: i32) -> bool {
68        if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
69            return false;
70        }
71        let days_in_month = match month {
72            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
73            4 | 6 | 9 | 11 => 30,
74            2 => {
75                if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
76                    29
77                } else {
78                    28
79                }
80            }
81            _ => return false,
82        };
83        day <= days_in_month
84    }
85}
86
87impl Parser for JAStandardDateParser {
88    fn name(&self) -> &'static str {
89        "JAStandardDateParser"
90    }
91
92    fn should_apply(&self, context: &ParsingContext) -> bool {
93        context.text.contains('月') && context.text.contains('日')
94    }
95
96    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
97        let mut results = Vec::new();
98        let ref_date = context.reference.instant;
99
100        let mut start = 0;
101        while start < context.text.len() {
102            let search_text = &context.text[start..];
103
104            // Try range pattern first
105            if let Ok(Some(caps)) = RANGE_PATTERN.captures(search_text) {
106                let full_match = caps.get(0).unwrap();
107                let match_start = start + full_match.start();
108                let match_end = start + full_match.end();
109
110                let year1 = caps
111                    .name("year1")
112                    .map(|m| Self::parse_number(m.as_str()))
113                    .unwrap_or(0);
114                let month1 = caps
115                    .name("month1")
116                    .map(|m| Self::parse_number(m.as_str()))
117                    .unwrap_or(0);
118                let day1 = caps
119                    .name("day1")
120                    .map(|m| Self::parse_number(m.as_str()))
121                    .unwrap_or(0);
122
123                let year2 = caps
124                    .name("year2")
125                    .map(|m| Self::parse_number(m.as_str()))
126                    .unwrap_or(0);
127                let month2 = caps
128                    .name("month2")
129                    .map(|m| Self::parse_number(m.as_str()))
130                    .unwrap_or(0);
131                let day2 = caps
132                    .name("day2")
133                    .map(|m| Self::parse_number(m.as_str()))
134                    .unwrap_or(0);
135
136                if Self::is_valid_date(year1, month1, day1)
137                    && Self::is_valid_date(year2, month2, day2)
138                {
139                    let mut components = context.create_components();
140                    components.assign(Component::Year, year1);
141                    components.assign(Component::Month, month1);
142                    components.assign(Component::Day, day1);
143
144                    let mut end_comp = context.create_components();
145                    end_comp.assign(Component::Year, year2);
146                    end_comp.assign(Component::Month, month2);
147                    end_comp.assign(Component::Day, day2);
148
149                    results.push(context.create_result(
150                        match_start,
151                        match_end,
152                        components,
153                        Some(end_comp),
154                    ));
155                    start = match_end;
156                    continue;
157                }
158            }
159
160            // Try standard pattern
161            if let Ok(Some(caps)) = PATTERN.captures(search_text) {
162                let full_match = caps.get(0).unwrap();
163                let match_start = start + full_match.start();
164                let match_end = start + full_match.end();
165
166                let month = caps
167                    .name("month")
168                    .map(|m| Self::parse_number(m.as_str()))
169                    .unwrap_or(0);
170                let day = caps
171                    .name("day")
172                    .map(|m| Self::parse_number(m.as_str()))
173                    .unwrap_or(0);
174
175                // Determine year
176                let year = if let (Some(era), Some(era_year)) =
177                    (caps.name("era"), caps.name("era_year"))
178                {
179                    Self::era_to_year(era.as_str(), era_year.as_str()).unwrap_or(ref_date.year())
180                } else if let Some(_year_prefix) = caps.name("year_prefix") {
181                    // 同年, 本年, 今年 all mean current year
182                    ref_date.year()
183                } else if let Some(year_match) = caps.name("year") {
184                    let y = Self::parse_number(year_match.as_str());
185                    if y < 100 {
186                        if y > 50 { 1900 + y } else { 2000 + y }
187                    } else {
188                        y
189                    }
190                } else {
191                    // No year specified - imply current year
192                    ref_date.year()
193                };
194
195                if Self::is_valid_date(year, month, day) {
196                    let mut components = context.create_components();
197
198                    // If year was explicitly specified, assign it; otherwise imply it
199                    if caps.name("year").is_some() || caps.name("era").is_some() {
200                        components.assign(Component::Year, year);
201                    } else if caps.name("year_prefix").is_some() {
202                        // 同年, 本年, 今年 - assign the year
203                        components.assign(Component::Year, year);
204                    } else {
205                        components.imply(Component::Year, year);
206                    }
207                    components.assign(Component::Month, month);
208                    components.assign(Component::Day, day);
209
210                    results.push(context.create_result(match_start, match_end, components, None));
211                    start = match_end;
212                    continue;
213                }
214            }
215
216            // No match - advance
217            if let Some(c) = search_text.chars().next() {
218                start += c.len_utf8();
219            } else {
220                break;
221            }
222        }
223
224        Ok(results)
225    }
226}
227
228impl Default for JAStandardDateParser {
229    fn default() -> Self {
230        Self::new()
231    }
232}