whichtime_sys/parsers/zh/
standard_date.rs1use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::zh::{fullwidth_to_halfwidth, zh_string_to_number};
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use chrono::Datelike;
16use fancy_regex::Regex;
17use std::sync::LazyLock;
18
19static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(
24 r"(?P<year>[0-90-9一二三四五六七八九十零〇]+)年[,,\s]*(?P<month>[0-90-9一二三四五六七八九十]+)月(?P<day>[0-90-9一二三四五六七八九十]+)(?:号|號|日)"
25 ).unwrap()
26});
27
28static MONTH_DAY_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30 Regex::new(
31 r"(?P<month>[0-90-9一二三四五六七八九十]+)月(?P<day>[0-90-9一二三四五六七八九十]+)(?:号|號|日)"
32 ).unwrap()
33});
34
35pub struct ZHStandardDateParser;
37
38impl ZHStandardDateParser {
39 pub fn new() -> Self {
40 Self
41 }
42
43 fn parse_number(s: &str) -> i32 {
44 let hankaku = fullwidth_to_halfwidth(s);
46
47 if let Ok(n) = hankaku.parse::<i32>() {
49 return n;
50 }
51
52 zh_string_to_number(s) as i32
54 }
55
56 fn parse_year(s: &str) -> i32 {
57 let hankaku = fullwidth_to_halfwidth(s);
60
61 if let Ok(n) = hankaku.parse::<i32>() {
63 if n < 100 {
65 return if n > 50 { 1900 + n } else { 2000 + n };
66 }
67 return n;
68 }
69
70 let mut result = 0i32;
73 let mut has_chinese = false;
74
75 for c in s.chars() {
76 let c_str = c.to_string();
77 let digit = match c_str.as_str() {
78 "零" | "〇" => {
79 has_chinese = true;
80 0
81 }
82 "一" => {
83 has_chinese = true;
84 1
85 }
86 "二" => {
87 has_chinese = true;
88 2
89 }
90 "三" => {
91 has_chinese = true;
92 3
93 }
94 "四" => {
95 has_chinese = true;
96 4
97 }
98 "五" => {
99 has_chinese = true;
100 5
101 }
102 "六" => {
103 has_chinese = true;
104 6
105 }
106 "七" => {
107 has_chinese = true;
108 7
109 }
110 "八" => {
111 has_chinese = true;
112 8
113 }
114 "九" => {
115 has_chinese = true;
116 9
117 }
118 _ => continue,
119 };
120 result = result * 10 + digit;
121 }
122
123 if has_chinese && result > 0 {
124 if result < 100 {
126 return if result > 50 {
127 1900 + result
128 } else {
129 2000 + result
130 };
131 }
132 return result;
133 }
134
135 zh_string_to_number(s) as i32
137 }
138
139 fn is_valid_date(year: i32, month: i32, day: i32) -> bool {
140 if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
141 return false;
142 }
143 let days_in_month = match month {
144 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
145 4 | 6 | 9 | 11 => 30,
146 2 => {
147 if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
148 29
149 } else {
150 28
151 }
152 }
153 _ => return false,
154 };
155 day <= days_in_month
156 }
157}
158
159impl Parser for ZHStandardDateParser {
160 fn name(&self) -> &'static str {
161 "ZHStandardDateParser"
162 }
163
164 fn should_apply(&self, context: &ParsingContext) -> bool {
165 context.text.contains('月')
166 && (context.text.contains('号')
167 || context.text.contains('號')
168 || context.text.contains('日'))
169 }
170
171 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
172 let mut results = Vec::new();
173 let ref_date = context.reference.instant;
174
175 let mut start = 0;
176 while start < context.text.len() {
177 let search_text = &context.text[start..];
178
179 if let Ok(Some(caps)) = PATTERN.captures(search_text) {
181 let full_match = caps.get(0).unwrap();
182 let match_start = start + full_match.start();
183 let match_end = start + full_match.end();
184
185 let year = caps
186 .name("year")
187 .map(|m| Self::parse_year(m.as_str()))
188 .unwrap_or(0);
189 let month = caps
190 .name("month")
191 .map(|m| Self::parse_number(m.as_str()))
192 .unwrap_or(0);
193 let day = caps
194 .name("day")
195 .map(|m| Self::parse_number(m.as_str()))
196 .unwrap_or(0);
197
198 if Self::is_valid_date(year, month, day) {
199 let mut components = context.create_components();
200 components.assign(Component::Year, year);
201 components.assign(Component::Month, month);
202 components.assign(Component::Day, day);
203
204 results.push(context.create_result(match_start, match_end, components, None));
205 start = match_end;
206 continue;
207 }
208 }
209
210 if let Ok(Some(caps)) = MONTH_DAY_PATTERN.captures(search_text) {
212 let full_match = caps.get(0).unwrap();
213 let match_start = start + full_match.start();
214 let match_end = start + full_match.end();
215
216 let month = caps
217 .name("month")
218 .map(|m| Self::parse_number(m.as_str()))
219 .unwrap_or(0);
220 let day = caps
221 .name("day")
222 .map(|m| Self::parse_number(m.as_str()))
223 .unwrap_or(0);
224 let year = ref_date.year();
225
226 if Self::is_valid_date(year, month, day) {
227 let mut components = context.create_components();
228 components.imply(Component::Year, year);
229 components.assign(Component::Month, month);
230 components.assign(Component::Day, day);
231
232 results.push(context.create_result(match_start, match_end, components, None));
233 start = match_end;
234 continue;
235 }
236 }
237
238 if let Some(c) = search_text.chars().next() {
240 start += c.len_utf8();
241 } else {
242 break;
243 }
244 }
245
246 Ok(results)
247 }
248}
249
250impl Default for ZHStandardDateParser {
251 fn default() -> Self {
252 Self::new()
253 }
254}