whichtime_sys/parsers/nl/
month_name.rs1use crate::components::Component;
11use crate::context::ParsingContext;
12use crate::dictionaries::nl::{get_month, get_weekday};
13use crate::error::Result;
14use crate::parsers::Parser;
15use crate::results::ParsedResult;
16use chrono::Datelike;
17use fancy_regex::Regex;
18use std::sync::LazyLock;
19
20static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(
24 r"(?ix)
25 (?:
26 (?P<weekday>zondag|zon|zo|maandag|maan|ma|dinsdag|dins|di|woensdag|woens|wo|donderdag|donder|do|vrijdag|vrij|vr|zaterdag|zater|za)
27 (?:\s*,?\s*)?
28 )?
29 (?P<day>\d{1,2})(?:e|ste|de)?\s*
30 (?:
31 (?:(?:tot|\-|–)\s*(?P<end_day>\d{1,2})(?:e|ste|de)?\s*)?
32 )?
33 (?P<month>januari|jan\.?|februari|feb\.?|maart|mrt\.?|april|apr\.?|mei|juni|jun\.?|juli|jul\.?|augustus|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|december|dec\.?)
34 (?:
35 (?:\s*[\-/,]?\s*)?
36 (?P<year>\d{1,4}(?!:))?
37 (?:\s*(?P<era>
38 (?:voor|v\.?)\s*(?:Christus|Chr\.?)
39 |(?:na|n\.?)\s*(?:Christus|Chr\.?)
40 ))?
41 )?
42 (?=\W|$)
43 "
44 ).unwrap()
45});
46
47static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49 Regex::new(
50 r"(?ix)(?P<weekday>zondag|zon|zo|maandag|maan|ma|dinsdag|dins|di|woensdag|woens|wo|donderdag|donder|do|vrijdag|vrij|vr|zaterdag|zater|za)\.?\s+(?P<day>\d{1,2})(?:e|ste|de)?\s+(?P<month>januari|jan\.?|februari|feb\.?|maart|mrt\.?|april|apr\.?|mei|juni|jun\.?|juli|jul\.?|augustus|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|december|dec\.?)(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
51 ).unwrap()
52});
53
54pub struct NLMonthNameParser;
56
57impl NLMonthNameParser {
58 pub fn new() -> Self {
59 Self
60 }
61
62 fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
63 let year_text = year_str?;
64 let mut year: i32 = year_text.parse().ok()?;
65
66 if year < 100 && era_str.is_none() {
68 year = if year > 50 { 1900 + year } else { 2000 + year };
69 }
70
71 if let Some(era) = era_str {
73 let era_lower = era.to_lowercase();
74
75 if era_lower.contains("voor") || era_lower.starts_with('v') {
77 return Some(-year);
91 }
92 }
93
94 Some(year)
95 }
96}
97
98impl Default for NLMonthNameParser {
99 fn default() -> Self {
100 Self::new()
101 }
102}
103
104impl Parser for NLMonthNameParser {
105 fn name(&self) -> &'static str {
106 "NLMonthNameParser"
107 }
108
109 fn should_apply(&self, _context: &ParsingContext) -> bool {
110 true
111 }
112
113 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
114 let mut results = Vec::new();
115 let ref_date = context.reference.instant;
116
117 let mut start = 0;
119 while start < context.text.len() {
120 let search_text = &context.text[start..];
121 let captures = match ABBREV_PATTERN.captures(search_text) {
122 Ok(Some(caps)) => caps,
123 Ok(None) => break,
124 Err(_) => break,
125 };
126
127 let full_match = match captures.get(0) {
128 Some(m) => m,
129 None => break,
130 };
131
132 let match_start = start + full_match.start();
133 let match_end = start + full_match.end();
134
135 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
136 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
137 let month_str = captures
138 .name("month")
139 .map(|m| m.as_str().to_lowercase())
140 .unwrap_or_default();
141 let year_str = captures.name("year").map(|m| m.as_str());
142
143 let month_clean = month_str.trim_end_matches('.');
145
146 let Some(month) = get_month(month_clean) else {
147 start = match_end;
148 continue;
149 };
150
151 let day: i32 = day_str.parse().unwrap_or(1);
152 if !(1..=31).contains(&day) {
153 start = match_end;
154 continue;
155 }
156
157 let mut components = context.create_components();
158
159 if let Some(y) = year_str {
160 let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
161 components.assign(Component::Year, year);
162 } else {
163 components.imply(Component::Year, ref_date.year());
164 }
165
166 components.assign(Component::Month, month as i32);
167 components.assign(Component::Day, day);
168
169 if let Some(ref wd_str) = weekday_str
170 && let Some(weekday) = get_weekday(wd_str)
171 {
172 components.assign(Component::Weekday, weekday as i32);
173 }
174
175 if !components.is_valid_date() {
176 start = match_end;
177 continue;
178 }
179
180 results.push(context.create_result(match_start, match_end, components, None));
181 start = match_end;
182 }
183
184 start = 0;
186 while start < context.text.len() {
187 let search_text = &context.text[start..];
188 let captures = match PATTERN.captures(search_text) {
189 Ok(Some(caps)) => caps,
190 Ok(None) => break,
191 Err(_) => break,
192 };
193
194 let full_match = match captures.get(0) {
195 Some(m) => m,
196 None => break,
197 };
198
199 let match_start = start + full_match.start();
200 let match_end = start + full_match.end();
201
202 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
203 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
204 let month_str = captures
205 .name("month")
206 .map(|m| m.as_str().to_lowercase())
207 .unwrap_or_default();
208 let year_str = captures.name("year").map(|m| m.as_str());
209 let era_str = captures.name("era").map(|m| m.as_str());
210 let end_day_str = captures.name("end_day").map(|m| m.as_str());
211
212 let month_clean = month_str.trim_end_matches('.');
213
214 let Some(month) = get_month(month_clean) else {
215 start = match_end;
216 continue;
217 };
218
219 let day: i32 = day_str.parse().unwrap_or(1);
220 if !(1..=31).contains(&day) {
221 start = match_end;
222 continue;
223 }
224
225 let mut components = context.create_components();
226
227 if year_str.is_some() || era_str.is_some() {
228 let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
229 components.assign(Component::Year, year);
230 } else {
231 components.imply(Component::Year, ref_date.year());
232 }
233
234 components.assign(Component::Month, month as i32);
235 components.assign(Component::Day, day);
236
237 if let Some(ref wd_str) = weekday_str
238 && let Some(weekday) = get_weekday(wd_str)
239 {
240 components.assign(Component::Weekday, weekday as i32);
241 }
242
243 if !components.is_valid_date() {
244 start = match_end;
245 continue;
246 }
247
248 let end_components = if let Some(end_day_text) = end_day_str {
250 let end_day: i32 = end_day_text.parse().unwrap_or(0);
251 if end_day > 0 && end_day <= 31 {
252 let mut end_comp = context.create_components();
253 if let Some(start_year) = components.get(Component::Year) {
254 if year_str.is_some() || era_str.is_some() {
255 end_comp.assign(Component::Year, start_year);
256 } else {
257 end_comp.imply(Component::Year, start_year);
258 }
259 }
260 end_comp.assign(Component::Month, month as i32);
261 end_comp.assign(Component::Day, end_day);
262
263 if end_comp.is_valid_date() {
264 Some(end_comp)
265 } else {
266 None
267 }
268 } else {
269 None
270 }
271 } else {
272 None
273 };
274
275 results.push(context.create_result(match_start, match_end, components, end_components));
276 start = match_end;
277 }
278
279 Ok(results)
280 }
281}