whichtime_sys/parsers/de/
month_name.rs1use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::de::{get_month, get_weekday};
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
24 Regex::new(
25 r"(?ix)
26 (?:
27 (?:am\s+)?
28 (?:(?P<weekday>sonntag|so|montag|mo|dienstag|di|mittwoch|mi|donnerstag|do|freitag|fr|samstag|sa)
29 (?:\s*,?\s*(?:den\s+)?)?
30 )?
31 )?
32 (?P<day>\d{1,2})\.?\s*
33 (?:
34 (?:(?:bis(?:\s*(?:am|zum))?|\-|–)\s*(?P<end_day>\d{1,2})\.?\s*)?
35 )?
36 (?P<month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
37 (?:
38 (?:\s*[\-/,]?\s*)?
39 (?P<year>\d{1,4}(?!:))?
40 (?:\s*(?P<era>
41 v\.?\s*(?:Chr\.?|u\.?\s*Z\.?|d\.?\s*(?:g\.?\s*)?Z\.?)
42 |n\.?\s*(?:Chr\.?|C|u\.?\s*Z\.?|d\.?\s*(?:g\.?\s*)?Z\.?)
43 |u\.?\s*Z\.?
44 |d\.?\s*g\.?\s*Z\.?
45 ))?
46 )?
47 (?=\W|$)
48 "
49 ).unwrap()
50});
51
52static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
54 Regex::new(
55 r"(?ix)
56 (?P<start_day>\d{1,2})\.?\s*
57 (?P<start_month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
58 \s*(?:-|–|bis)\s*
59 (?P<end_day>\d{1,2})\.?\s*
60 (?P<end_month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
61 (?:\s*(?P<year>\d{1,4}))?
62 (?=\W|$)
63 "
64 ).unwrap()
65});
66
67static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
69 Regex::new(
70 r"(?i)(?P<weekday>so|mo|di|mi|do|fr|sa)\s+(?P<day>\d{1,2})\.(?P<month>jan\.?|feb\.?|mär\.?|mrz\.?|maerz|apr\.?|mai|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|okt\.?|nov\.?|dez\.?)(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
71 ).unwrap()
72});
73
74pub struct DEMonthNameParser;
76
77impl DEMonthNameParser {
78 pub fn new() -> Self {
79 Self
80 }
81
82 fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
83 let year_text = year_str?;
84 let mut year: i32 = year_text.parse().ok()?;
85
86 if year < 100 && era_str.is_none() {
88 year = if year > 50 { 1900 + year } else { 2000 + year };
89 }
90
91 if let Some(era) = era_str {
93 let era_lower = era.to_lowercase().replace([' ', '.'], "");
94
95 if era_lower.starts_with('v') {
97 return Some(-year);
98 }
99 }
102
103 Some(year)
104 }
105}
106
107impl Default for DEMonthNameParser {
108 fn default() -> Self {
109 Self::new()
110 }
111}
112
113impl Parser for DEMonthNameParser {
114 fn name(&self) -> &'static str {
115 "DEMonthNameParser"
116 }
117
118 fn should_apply(&self, _context: &ParsingContext) -> bool {
119 true
120 }
121
122 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
123 let mut results = Vec::new();
124 let ref_date = context.reference.instant;
125
126 let mut start = 0;
128 while start < context.text.len() {
129 let search_text = &context.text[start..];
130 let captures = match RANGE_PATTERN.captures(search_text) {
131 Ok(Some(caps)) => caps,
132 Ok(None) => break,
133 Err(_) => break,
134 };
135
136 let full_match = match captures.get(0) {
137 Some(m) => m,
138 None => break,
139 };
140
141 let match_start = start + full_match.start();
142 let match_end = start + full_match.end();
143
144 let start_day_str = captures
145 .name("start_day")
146 .map(|m| m.as_str())
147 .unwrap_or("1");
148 let start_month_str = captures
149 .name("start_month")
150 .map(|m| m.as_str().to_lowercase())
151 .unwrap_or_default();
152 let end_day_str = captures.name("end_day").map(|m| m.as_str()).unwrap_or("1");
153 let end_month_str = captures
154 .name("end_month")
155 .map(|m| m.as_str().to_lowercase())
156 .unwrap_or_default();
157 let year_str = captures.name("year").map(|m| m.as_str());
158
159 let start_month_clean = start_month_str.trim_end_matches('.');
160 let end_month_clean = end_month_str.trim_end_matches('.');
161
162 let Some(start_month) = get_month(start_month_clean) else {
163 start = match_end;
164 continue;
165 };
166 let Some(end_month) = get_month(end_month_clean) else {
167 start = match_end;
168 continue;
169 };
170
171 let start_day: i32 = start_day_str.parse().unwrap_or(1);
172 let end_day: i32 = end_day_str.parse().unwrap_or(1);
173
174 if !(1..=31).contains(&start_day) || !(1..=31).contains(&end_day) {
175 start = match_end;
176 continue;
177 }
178
179 let year = if let Some(y) = year_str {
180 Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year())
181 } else {
182 ref_date.year()
183 };
184
185 let mut start_components = context.create_components();
186 start_components.assign(Component::Year, year);
187 start_components.assign(Component::Month, start_month as i32);
188 start_components.assign(Component::Day, start_day);
189
190 let mut end_components = context.create_components();
191 end_components.assign(Component::Year, year);
192 end_components.assign(Component::Month, end_month as i32);
193 end_components.assign(Component::Day, end_day);
194
195 if start_components.is_valid_date() && end_components.is_valid_date() {
196 results.push(context.create_result(
197 match_start,
198 match_end,
199 start_components,
200 Some(end_components),
201 ));
202 }
203
204 start = match_end;
205 }
206
207 start = 0;
209 while start < context.text.len() {
210 let search_text = &context.text[start..];
211 let captures = match ABBREV_PATTERN.captures(search_text) {
212 Ok(Some(caps)) => caps,
213 Ok(None) => break,
214 Err(_) => break,
215 };
216
217 let full_match = match captures.get(0) {
218 Some(m) => m,
219 None => break,
220 };
221
222 let match_start = start + full_match.start();
223 let match_end = start + full_match.end();
224
225 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
226 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
227 let month_str = captures
228 .name("month")
229 .map(|m| m.as_str().to_lowercase())
230 .unwrap_or_default();
231 let year_str = captures.name("year").map(|m| m.as_str());
232
233 let month_clean = month_str.trim_end_matches('.');
235
236 let Some(month) = get_month(month_clean) else {
237 start = match_end;
238 continue;
239 };
240
241 let day: i32 = day_str.parse().unwrap_or(1);
242 if !(1..=31).contains(&day) {
243 start = match_end;
244 continue;
245 }
246
247 let mut components = context.create_components();
248
249 if let Some(y) = year_str {
250 let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
251 components.assign(Component::Year, year);
252 } else {
253 components.imply(Component::Year, ref_date.year());
255 }
256
257 components.assign(Component::Month, month as i32);
258 components.assign(Component::Day, day);
259
260 if let Some(ref wd_str) = weekday_str
261 && let Some(weekday) = get_weekday(wd_str)
262 {
263 components.assign(Component::Weekday, weekday as i32);
264 }
265
266 if !components.is_valid_date() {
267 start = match_end;
268 continue;
269 }
270
271 results.push(context.create_result(match_start, match_end, components, None));
272 start = match_end;
273 }
274
275 start = 0;
277 while start < context.text.len() {
278 let search_text = &context.text[start..];
279 let captures = match PATTERN.captures(search_text) {
280 Ok(Some(caps)) => caps,
281 Ok(None) => break,
282 Err(_) => break,
283 };
284
285 let full_match = match captures.get(0) {
286 Some(m) => m,
287 None => break,
288 };
289
290 let match_start = start + full_match.start();
291 let match_end = start + full_match.end();
292
293 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
294 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
295 let month_str = captures
296 .name("month")
297 .map(|m| m.as_str().to_lowercase())
298 .unwrap_or_default();
299 let year_str = captures.name("year").map(|m| m.as_str());
300 let era_str = captures.name("era").map(|m| m.as_str());
301 let end_day_str = captures
302 .name("end_day")
303 .or_else(|| captures.name("end_day2"))
304 .map(|m| m.as_str());
305 let end_month_str = captures
306 .name("end_month")
307 .map(|m| m.as_str().to_lowercase());
308
309 let month_clean = month_str.trim_end_matches('.');
311
312 let Some(month) = get_month(month_clean) else {
313 start = match_end;
314 continue;
315 };
316
317 let day: i32 = day_str.parse().unwrap_or(1);
318 if !(1..=31).contains(&day) {
319 start = match_end;
320 continue;
321 }
322
323 let mut components = context.create_components();
324
325 if year_str.is_some() || era_str.is_some() {
326 let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
327 components.assign(Component::Year, year);
328 } else {
329 components.imply(Component::Year, ref_date.year());
332 }
333
334 components.assign(Component::Month, month as i32);
335 components.assign(Component::Day, day);
336
337 if let Some(ref wd_str) = weekday_str
338 && let Some(weekday) = get_weekday(wd_str)
339 {
340 components.assign(Component::Weekday, weekday as i32);
341 }
342
343 if !components.is_valid_date() {
344 start = match_end;
345 continue;
346 }
347
348 let end_components = if let Some(end_day_text) = end_day_str {
350 let end_day: i32 = end_day_text.parse().unwrap_or(0);
351 if end_day > 0 && end_day <= 31 {
352 let end_month = if let Some(ref em_str) = end_month_str {
353 get_month(em_str.trim_end_matches('.')).unwrap_or(month)
354 } else {
355 month
356 };
357
358 let mut end_comp = context.create_components();
359 if let Some(start_year) = components.get(Component::Year) {
361 if year_str.is_some() || era_str.is_some() {
362 end_comp.assign(Component::Year, start_year);
363 } else {
364 end_comp.imply(Component::Year, start_year);
365 }
366 }
367 end_comp.assign(Component::Month, end_month as i32);
368 end_comp.assign(Component::Day, end_day);
369
370 if end_comp.is_valid_date() {
371 Some(end_comp)
372 } else {
373 None
374 }
375 } else {
376 None
377 }
378 } else {
379 None
380 };
381
382 results.push(context.create_result(match_start, match_end, components, end_components));
383 start = match_end;
384 }
385
386 Ok(results)
387 }
388}