whichtime_sys/parsers/common/
month_name.rs1use crate::components::Component;
7use crate::context::ParsingContext;
8use crate::dictionaries::Locale;
9use crate::error::Result;
10use crate::parsers::Parser;
11use crate::results::ParsedResult;
12use crate::scanner::TokenType;
13use chrono::Datelike;
14use regex::Regex;
15use std::sync::LazyLock;
16
17static EN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
19 Regex::new(r"(?i)(?:^|\W)(\d{1,2})(?:st|nd|rd|th)?\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*,?\s*(\d{2,4})?").unwrap()
20});
21
22static DE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(r"(?i)(?:^|\W)(\d{1,2})\.?\s+(januar|jänner|janner|jan|februar|feber|feb|märz|maerz|mär|mrz|april|apr|mai|juni|jun|juli|jul|august|aug|september|sep|sept|oktober|okt|november|nov|dezember|dez)\s*,?\s*(\d{2,4})?").unwrap()
24});
25
26static ES_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
27 Regex::new(r"(?i)(?:^|\W)(?:el\s+)?(\d{1,2})(?:\s+de)?\s+(enero|ene|febrero|feb|marzo|mar|abril|abr|mayo|may|junio|jun|julio|jul|agosto|ago|septiembre|sep|sept|octubre|oct|noviembre|nov|diciembre|dic)(?:\s+(?:de(?:l)?\s+)?(\d{2,4}))?").unwrap()
28});
29
30static FR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(r"(?i)(?:^|\W)(?:le\s+)?(\d{1,2})(?:er|ème|eme|e)?\s+(janvier|janv?|février|fevrier|févr?|fevr?|mars|avril|avr|mai|juin|juillet|juil?|août|aout|aou|septembre|sept?|octobre|oct|novembre|nov|décembre|decembre|déc|dec)\s*(\d{2,4})?").unwrap()
32});
33
34static IT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(r"(?i)(?:^|\W)(?:il\s+)?(\d{1,2})(?:°)?\s+(gennaio|gen|febbraio|feb|marzo|mar|aprile|apr|maggio|mag|giugno|giu|luglio|lug|agosto|ago|settembre|set|sett|ottobre|ott|novembre|nov|dicembre|dic)\s*(\d{2,4})?").unwrap()
36});
37
38static JA_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
39 Regex::new(r"(\d{2,4})?年?\s*(1|2|3|4|5|6|7|8|9|10|11|12|一|二|三|四|五|六|七|八|九|十|十一|十二)\s*月\s*(\d{1,2}|一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|三十|三十一)\s*[日号]").unwrap()
40});
41
42static NL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
43 Regex::new(r"(?i)(?:^|\W)(\d{1,2})(?:e|ste|de)?\s+(januari|jan|februari|feb|maart|mrt|april|apr|mei|juni|jun|juli|jul|augustus|aug|september|sep|sept|oktober|okt|november|nov|december|dec)\s*(\d{2,4})?").unwrap()
44});
45
46static PT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"(?i)(?:^|\W)(\d{1,2})(?:\s+de)?\s+(janeiro|jan|fevereiro|fev|março|marco|mar|abril|abr|maio|mai|junho|jun|julho|jul|agosto|ago|setembro|set|outubro|out|novembro|nov|dezembro|dez)(?:\s+(?:de\s+)?(\d{2,4}))?").unwrap()
48});
49
50static RU_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
51 Regex::new(r"(?i)(?:^|\W)(\d{1,2})(?:-?[ео]?е?|го)?\s+(января|янв|февраля|фев|марта|мар|апреля|апр|мая|июня|июн|июля|июл|августа|авг|сентября|сен|сент|октября|окт|ноября|ноя|декабря|дек)\s*(\d{2,4})?").unwrap()
52});
53
54static SV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
55 Regex::new(r"(?i)(?:^|\W)(?:den\s+)?(\d{1,2})(?::?[ae]?)?\s+(januari|jan|februari|feb|mars|april|apr|maj|juni|jun|juli|jul|augusti|aug|september|sep|sept|oktober|okt|november|nov|december|dec)\s*(\d{2,4})?").unwrap()
56});
57
58static UK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
59 Regex::new(r"(?i)(?:^|\W)(\d{1,2})(?:-?[еиого]?)?\s+(січня|січ|лютого|лют|березня|бер|квітня|квіт|травня|трав|червня|черв|липня|лип|серпня|серп|вересня|вер|жовтня|жовт|листопада|лист|грудня|груд)\s*(\d{2,4})?").unwrap()
60});
61
62static ZH_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
63 Regex::new(r"(\d{2,4})?年?\s*(1|2|3|4|5|6|7|8|9|10|11|12|一|二|三|四|五|六|七|八|九|十|十一|十二)\s*月\s*(\d{1,2}|一|二|三|四|五|六|七|八|九|十|十一|十二|十三|十四|十五|十六|十七|十八|十九|二十|二十一|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|三十|三十一)\s*[日号號]").unwrap()
64});
65
66pub struct MultiLocaleMonthNameParser {
68 locale: Locale,
69}
70
71impl MultiLocaleMonthNameParser {
72 pub fn new(locale: Locale) -> Self {
73 Self { locale }
74 }
75
76 fn get_pattern(&self) -> &'static Regex {
77 match self.locale {
78 Locale::En => &EN_PATTERN,
79 Locale::De => &DE_PATTERN,
80 Locale::Es => &ES_PATTERN,
81 Locale::Fr => &FR_PATTERN,
82 Locale::It => &IT_PATTERN,
83 Locale::Ja => &JA_PATTERN,
84 Locale::Nl => &NL_PATTERN,
85 Locale::Pt => &PT_PATTERN,
86 Locale::Ru => &RU_PATTERN,
87 Locale::Sv => &SV_PATTERN,
88 Locale::Uk => &UK_PATTERN,
89 Locale::Zh => &ZH_PATTERN,
90 }
91 }
92
93 fn lookup_month(&self, text: &str) -> Option<u32> {
94 let lower = text.to_lowercase();
95 match self.locale {
96 Locale::En => crate::dictionaries::en::get_month(&lower),
97 Locale::De => crate::dictionaries::de::get_month(&lower),
98 Locale::Es => crate::dictionaries::es::get_month(&lower),
99 Locale::Fr => crate::dictionaries::fr::get_month(&lower),
100 Locale::It => crate::dictionaries::it::get_month(&lower),
101 Locale::Ja => crate::dictionaries::ja::get_month(text)
102 .or_else(|| crate::dictionaries::ja::get_month(&lower)),
103 Locale::Nl => crate::dictionaries::nl::get_month(&lower),
104 Locale::Pt => crate::dictionaries::pt::get_month(&lower),
105 Locale::Ru => crate::dictionaries::ru::get_month(&lower),
106 Locale::Sv => crate::dictionaries::sv::get_month(&lower),
107 Locale::Uk => crate::dictionaries::uk::get_month(&lower),
108 Locale::Zh => crate::dictionaries::zh::get_month(text)
109 .or_else(|| crate::dictionaries::zh::get_month(&lower)),
110 }
111 }
112
113 fn parse_day(&self, text: &str) -> Option<i32> {
114 if let Ok(n) = text.parse::<i32>() {
116 return Some(n);
117 }
118
119 let num = match self.locale {
121 Locale::Ja => crate::dictionaries::ja::parse_number_pattern(text),
122 Locale::Zh => crate::dictionaries::zh::parse_number_pattern(text),
123 _ => return None,
124 };
125
126 if num > 0.0 { Some(num as i32) } else { None }
127 }
128}
129
130impl Parser for MultiLocaleMonthNameParser {
131 fn name(&self) -> &'static str {
132 "MultiLocaleMonthNameParser"
133 }
134
135 fn should_apply(&self, context: &ParsingContext) -> bool {
136 context.has_token_type(TokenType::Month)
137 }
138
139 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
140 let mut results = Vec::new();
141 let pattern = self.get_pattern();
142 let ref_date = context.reference.instant;
143
144 for mat in pattern.find_iter(context.text) {
145 let matched_text = mat.as_str();
146 let index = mat.start();
147
148 let Some(caps) = pattern.captures(matched_text) else {
149 continue;
150 };
151
152 let (day, month, year_str) = match self.locale {
154 Locale::Ja | Locale::Zh => {
155 let year_str = caps.get(1).map(|m| m.as_str());
157 let month_str = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
158 let day_str = caps.get(3).map(|m| m.as_str()).unwrap_or_default();
159
160 let month = self
161 .lookup_month(month_str)
162 .or_else(|| month_str.parse::<u32>().ok());
163 let day = self.parse_day(day_str);
164
165 (day, month, year_str)
166 }
167 _ => {
168 let day_str = caps.get(1).map(|m| m.as_str()).unwrap_or_default();
170 let month_str = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
171 let year_str = caps.get(3).map(|m| m.as_str());
172
173 let day: Option<i32> = day_str.parse().ok();
174 let month = self.lookup_month(month_str);
175
176 (day, month, year_str)
177 }
178 };
179
180 let Some(month) = month else {
181 continue;
182 };
183
184 let day = day.unwrap_or(1);
185
186 if !(1..=31).contains(&day) {
187 continue;
188 }
189
190 let year = if let Some(y) = year_str {
191 parse_year(y)
192 } else {
193 let current_month = ref_date.month() as i32;
195 if (month as i32) < current_month {
196 ref_date.year() + 1
197 } else {
198 ref_date.year()
199 }
200 };
201
202 let mut components = context.create_components();
203 components.assign(Component::Year, year);
204 components.assign(Component::Month, month as i32);
205 components.assign(Component::Day, day);
206
207 if !components.is_valid_date() {
208 continue;
209 }
210
211 let actual_start = matched_text
213 .find(|c: char| c.is_alphanumeric())
214 .unwrap_or(0);
215 results.push(context.create_result(
216 index + actual_start,
217 index + matched_text.len(),
218 components,
219 None,
220 ));
221 }
222
223 Ok(results)
224 }
225}
226
227fn parse_year(s: &str) -> i32 {
228 let year: i32 = s.parse().unwrap_or(0);
229 if year < 100 {
230 if year > 50 { 1900 + year } else { 2000 + year }
231 } else {
232 year
233 }
234}