whichtime_sys/parsers/ja/
standard_date.rs1use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::ja::to_hankaku;
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(
24 r"(?:(?P<era>平成|令和|昭和|大正|明治)(?P<era_year>[0-90-9]+|元)年|(?P<year_prefix>同年|本年|今年)|(?P<year>[0-90-9]{2,4})年)?(?P<month>[0-90-9]{1,2})月(?P<day>[0-90-9]{1,2})日"
25 ).unwrap()
26});
27
28static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30 Regex::new(
31 r"(?P<year1>[0-90-9]{2,4})年(?P<month1>[0-90-9]{1,2})月(?P<day1>[0-90-9]{1,2})日\s*[-~~ー]\s*(?P<year2>[0-90-9]{2,4})年(?P<month2>[0-90-9]{1,2})月(?P<day2>[0-90-9]{1,2})日"
32 ).unwrap()
33});
34
35pub struct JAStandardDateParser;
37
38impl JAStandardDateParser {
39 pub fn new() -> Self {
40 Self
41 }
42
43 fn parse_number(s: &str) -> i32 {
44 let hankaku = to_hankaku(s);
45 hankaku.parse().unwrap_or(0)
46 }
47
48 fn era_to_year(era: &str, era_year: &str) -> Option<i32> {
49 let year_num = if era_year == "元" {
50 1
51 } else {
52 Self::parse_number(era_year)
53 };
54
55 let base_year = match era {
56 "令和" => 2018, "平成" => 1988, "昭和" => 1925, "大正" => 1911, "明治" => 1867, _ => return None,
62 };
63
64 Some(base_year + year_num)
65 }
66
67 fn is_valid_date(year: i32, month: i32, day: i32) -> bool {
68 if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
69 return false;
70 }
71 let days_in_month = match month {
72 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
73 4 | 6 | 9 | 11 => 30,
74 2 => {
75 if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
76 29
77 } else {
78 28
79 }
80 }
81 _ => return false,
82 };
83 day <= days_in_month
84 }
85}
86
87impl Parser for JAStandardDateParser {
88 fn name(&self) -> &'static str {
89 "JAStandardDateParser"
90 }
91
92 fn should_apply(&self, context: &ParsingContext) -> bool {
93 context.text.contains('月') && context.text.contains('日')
94 }
95
96 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
97 let mut results = Vec::new();
98 let ref_date = context.reference.instant;
99
100 let mut start = 0;
101 while start < context.text.len() {
102 let search_text = &context.text[start..];
103
104 if let Ok(Some(caps)) = RANGE_PATTERN.captures(search_text) {
106 let full_match = caps.get(0).unwrap();
107 let match_start = start + full_match.start();
108 let match_end = start + full_match.end();
109
110 let year1 = caps
111 .name("year1")
112 .map(|m| Self::parse_number(m.as_str()))
113 .unwrap_or(0);
114 let month1 = caps
115 .name("month1")
116 .map(|m| Self::parse_number(m.as_str()))
117 .unwrap_or(0);
118 let day1 = caps
119 .name("day1")
120 .map(|m| Self::parse_number(m.as_str()))
121 .unwrap_or(0);
122
123 let year2 = caps
124 .name("year2")
125 .map(|m| Self::parse_number(m.as_str()))
126 .unwrap_or(0);
127 let month2 = caps
128 .name("month2")
129 .map(|m| Self::parse_number(m.as_str()))
130 .unwrap_or(0);
131 let day2 = caps
132 .name("day2")
133 .map(|m| Self::parse_number(m.as_str()))
134 .unwrap_or(0);
135
136 if Self::is_valid_date(year1, month1, day1)
137 && Self::is_valid_date(year2, month2, day2)
138 {
139 let mut components = context.create_components();
140 components.assign(Component::Year, year1);
141 components.assign(Component::Month, month1);
142 components.assign(Component::Day, day1);
143
144 let mut end_comp = context.create_components();
145 end_comp.assign(Component::Year, year2);
146 end_comp.assign(Component::Month, month2);
147 end_comp.assign(Component::Day, day2);
148
149 results.push(context.create_result(
150 match_start,
151 match_end,
152 components,
153 Some(end_comp),
154 ));
155 start = match_end;
156 continue;
157 }
158 }
159
160 if let Ok(Some(caps)) = PATTERN.captures(search_text) {
162 let full_match = caps.get(0).unwrap();
163 let match_start = start + full_match.start();
164 let match_end = start + full_match.end();
165
166 let month = caps
167 .name("month")
168 .map(|m| Self::parse_number(m.as_str()))
169 .unwrap_or(0);
170 let day = caps
171 .name("day")
172 .map(|m| Self::parse_number(m.as_str()))
173 .unwrap_or(0);
174
175 let year = if let (Some(era), Some(era_year)) =
177 (caps.name("era"), caps.name("era_year"))
178 {
179 Self::era_to_year(era.as_str(), era_year.as_str()).unwrap_or(ref_date.year())
180 } else if let Some(_year_prefix) = caps.name("year_prefix") {
181 ref_date.year()
183 } else if let Some(year_match) = caps.name("year") {
184 let y = Self::parse_number(year_match.as_str());
185 if y < 100 {
186 if y > 50 { 1900 + y } else { 2000 + y }
187 } else {
188 y
189 }
190 } else {
191 ref_date.year()
193 };
194
195 if Self::is_valid_date(year, month, day) {
196 let mut components = context.create_components();
197
198 if caps.name("year").is_some() || caps.name("era").is_some() {
200 components.assign(Component::Year, year);
201 } else if caps.name("year_prefix").is_some() {
202 components.assign(Component::Year, year);
204 } else {
205 components.imply(Component::Year, year);
206 }
207 components.assign(Component::Month, month);
208 components.assign(Component::Day, day);
209
210 results.push(context.create_result(match_start, match_end, components, None));
211 start = match_end;
212 continue;
213 }
214 }
215
216 if let Some(c) = search_text.chars().next() {
218 start += c.len_utf8();
219 } else {
220 break;
221 }
222 }
223
224 Ok(results)
225 }
226}
227
228impl Default for JAStandardDateParser {
229 fn default() -> Self {
230 Self::new()
231 }
232}