Skip to main content

whichtime_sys/parsers/de/
month_name.rs

1//! German month name little endian parser
2//!
3//! Handles German date expressions with month names like:
4//! - "10. August 2012"
5//! - "10. August 85 n. Chr." / "10. August 113 v. Chr."
6//! - "10. - 22. August 2012" (date ranges)
7//! - "am 10. August", "am Dienstag, den 10. Januar"
8//! - "So 15.Sep", "Di, 10. Januar"
9//! - Various year suffixes: v.u.Z., n.u.Z., d.g.Z., v.d.Z., etc.
10
11use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::de::{get_month, get_weekday};
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21// Main pattern for German month name dates
22// Supports: "10. August 2012", "am 10. August", "Di, 10. Januar", "10. August 85 n. Chr."
23static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
24    Regex::new(
25        r"(?ix)
26        (?:
27            (?:am\s+)?
28            (?:(?P<weekday>sonntag|so|montag|mo|dienstag|di|mittwoch|mi|donnerstag|do|freitag|fr|samstag|sa)
29                (?:\s*,?\s*(?:den\s+)?)?
30            )?
31        )?
32        (?P<day>\d{1,2})\.?\s*
33        (?:
34            (?:(?:bis(?:\s*(?:am|zum))?|\-|–)\s*(?P<end_day>\d{1,2})\.?\s*)?
35        )?
36        (?P<month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
37        (?:
38            (?:\s*[\-/,]?\s*)?
39            (?P<year>\d{1,4}(?!:))?
40            (?:\s*(?P<era>
41                v\.?\s*(?:Chr\.?|u\.?\s*Z\.?|d\.?\s*(?:g\.?\s*)?Z\.?)
42                |n\.?\s*(?:Chr\.?|C|u\.?\s*Z\.?|d\.?\s*(?:g\.?\s*)?Z\.?)
43                |u\.?\s*Z\.?
44                |d\.?\s*g\.?\s*Z\.?
45            ))?
46        )?
47        (?=\W|$)
48        "
49    ).unwrap()
50});
51
52// Pattern for cross-month date ranges: "10. Oktober - 12. Dezember", "10. August - 12. Oktober 2013"
53static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
54    Regex::new(
55        r"(?ix)
56        (?P<start_day>\d{1,2})\.?\s*
57        (?P<start_month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
58        \s*(?:-|–|bis)\s*
59        (?P<end_day>\d{1,2})\.?\s*
60        (?P<end_month>januar|jänner|janner|jan\.?|februar|feber|feb\.?|märz|maerz|mär\.?|mrz\.?|april|apr\.?|mai|juni|jun\.?|juli|jul\.?|august|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|dezember|dez\.?)
61        (?:\s*(?P<year>\d{1,4}))?
62        (?=\W|$)
63        "
64    ).unwrap()
65});
66
67// Pattern for abbreviated weekday + date format: "So 15.Sep", "SO 15.SEPT"
68static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
69    Regex::new(
70        r"(?i)(?P<weekday>so|mo|di|mi|do|fr|sa)\s+(?P<day>\d{1,2})\.(?P<month>jan\.?|feb\.?|mär\.?|mrz\.?|maerz|apr\.?|mai|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|okt\.?|nov\.?|dez\.?)(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
71    ).unwrap()
72});
73
74/// German month name parser
75pub struct DEMonthNameParser;
76
77impl DEMonthNameParser {
78    pub fn new() -> Self {
79        Self
80    }
81
82    fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
83        let year_text = year_str?;
84        let mut year: i32 = year_text.parse().ok()?;
85
86        // Handle two-digit years
87        if year < 100 && era_str.is_none() {
88            year = if year > 50 { 1900 + year } else { 2000 + year };
89        }
90
91        // Handle era suffixes
92        if let Some(era) = era_str {
93            let era_lower = era.to_lowercase().replace([' ', '.'], "");
94
95            // BC/negative years: v.Chr., v.u.Z., v.d.Z., v.d.g.Z.
96            if era_lower.starts_with('v') {
97                return Some(-year);
98            }
99            // AD/positive years: n.Chr., n.u.Z., n.d.Z., n.d.g.Z., nC, uZ, d.g.Z.
100            // These are already positive, just return as-is
101        }
102
103        Some(year)
104    }
105}
106
107impl Default for DEMonthNameParser {
108    fn default() -> Self {
109        Self::new()
110    }
111}
112
113impl Parser for DEMonthNameParser {
114    fn name(&self) -> &'static str {
115        "DEMonthNameParser"
116    }
117
118    fn should_apply(&self, _context: &ParsingContext) -> bool {
119        true
120    }
121
122    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
123        let mut results = Vec::new();
124        let ref_date = context.reference.instant;
125
126        // Try cross-month range pattern first (10. Oktober - 12. Dezember)
127        let mut start = 0;
128        while start < context.text.len() {
129            let search_text = &context.text[start..];
130            let captures = match RANGE_PATTERN.captures(search_text) {
131                Ok(Some(caps)) => caps,
132                Ok(None) => break,
133                Err(_) => break,
134            };
135
136            let full_match = match captures.get(0) {
137                Some(m) => m,
138                None => break,
139            };
140
141            let match_start = start + full_match.start();
142            let match_end = start + full_match.end();
143
144            let start_day_str = captures
145                .name("start_day")
146                .map(|m| m.as_str())
147                .unwrap_or("1");
148            let start_month_str = captures
149                .name("start_month")
150                .map(|m| m.as_str().to_lowercase())
151                .unwrap_or_default();
152            let end_day_str = captures.name("end_day").map(|m| m.as_str()).unwrap_or("1");
153            let end_month_str = captures
154                .name("end_month")
155                .map(|m| m.as_str().to_lowercase())
156                .unwrap_or_default();
157            let year_str = captures.name("year").map(|m| m.as_str());
158
159            let start_month_clean = start_month_str.trim_end_matches('.');
160            let end_month_clean = end_month_str.trim_end_matches('.');
161
162            let Some(start_month) = get_month(start_month_clean) else {
163                start = match_end;
164                continue;
165            };
166            let Some(end_month) = get_month(end_month_clean) else {
167                start = match_end;
168                continue;
169            };
170
171            let start_day: i32 = start_day_str.parse().unwrap_or(1);
172            let end_day: i32 = end_day_str.parse().unwrap_or(1);
173
174            if !(1..=31).contains(&start_day) || !(1..=31).contains(&end_day) {
175                start = match_end;
176                continue;
177            }
178
179            let year = if let Some(y) = year_str {
180                Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year())
181            } else {
182                ref_date.year()
183            };
184
185            let mut start_components = context.create_components();
186            start_components.assign(Component::Year, year);
187            start_components.assign(Component::Month, start_month as i32);
188            start_components.assign(Component::Day, start_day);
189
190            let mut end_components = context.create_components();
191            end_components.assign(Component::Year, year);
192            end_components.assign(Component::Month, end_month as i32);
193            end_components.assign(Component::Day, end_day);
194
195            if start_components.is_valid_date() && end_components.is_valid_date() {
196                results.push(context.create_result(
197                    match_start,
198                    match_end,
199                    start_components,
200                    Some(end_components),
201                ));
202            }
203
204            start = match_end;
205        }
206
207        // Try abbreviated pattern (So 15.Sep)
208        start = 0;
209        while start < context.text.len() {
210            let search_text = &context.text[start..];
211            let captures = match ABBREV_PATTERN.captures(search_text) {
212                Ok(Some(caps)) => caps,
213                Ok(None) => break,
214                Err(_) => break,
215            };
216
217            let full_match = match captures.get(0) {
218                Some(m) => m,
219                None => break,
220            };
221
222            let match_start = start + full_match.start();
223            let match_end = start + full_match.end();
224
225            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
226            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
227            let month_str = captures
228                .name("month")
229                .map(|m| m.as_str().to_lowercase())
230                .unwrap_or_default();
231            let year_str = captures.name("year").map(|m| m.as_str());
232
233            // Clean up month string (remove trailing dot)
234            let month_clean = month_str.trim_end_matches('.');
235
236            let Some(month) = get_month(month_clean) else {
237                start = match_end;
238                continue;
239            };
240
241            let day: i32 = day_str.parse().unwrap_or(1);
242            if !(1..=31).contains(&day) {
243                start = match_end;
244                continue;
245            }
246
247            let mut components = context.create_components();
248
249            if let Some(y) = year_str {
250                let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
251                components.assign(Component::Year, year);
252            } else {
253                // When no year is specified, imply the reference year
254                components.imply(Component::Year, ref_date.year());
255            }
256
257            components.assign(Component::Month, month as i32);
258            components.assign(Component::Day, day);
259
260            if let Some(ref wd_str) = weekday_str
261                && let Some(weekday) = get_weekday(wd_str)
262            {
263                components.assign(Component::Weekday, weekday as i32);
264            }
265
266            if !components.is_valid_date() {
267                start = match_end;
268                continue;
269            }
270
271            results.push(context.create_result(match_start, match_end, components, None));
272            start = match_end;
273        }
274
275        // Try main pattern
276        start = 0;
277        while start < context.text.len() {
278            let search_text = &context.text[start..];
279            let captures = match PATTERN.captures(search_text) {
280                Ok(Some(caps)) => caps,
281                Ok(None) => break,
282                Err(_) => break,
283            };
284
285            let full_match = match captures.get(0) {
286                Some(m) => m,
287                None => break,
288            };
289
290            let match_start = start + full_match.start();
291            let match_end = start + full_match.end();
292
293            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
294            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
295            let month_str = captures
296                .name("month")
297                .map(|m| m.as_str().to_lowercase())
298                .unwrap_or_default();
299            let year_str = captures.name("year").map(|m| m.as_str());
300            let era_str = captures.name("era").map(|m| m.as_str());
301            let end_day_str = captures
302                .name("end_day")
303                .or_else(|| captures.name("end_day2"))
304                .map(|m| m.as_str());
305            let end_month_str = captures
306                .name("end_month")
307                .map(|m| m.as_str().to_lowercase());
308
309            // Clean up month string (remove trailing dot)
310            let month_clean = month_str.trim_end_matches('.');
311
312            let Some(month) = get_month(month_clean) else {
313                start = match_end;
314                continue;
315            };
316
317            let day: i32 = day_str.parse().unwrap_or(1);
318            if !(1..=31).contains(&day) {
319                start = match_end;
320                continue;
321            }
322
323            let mut components = context.create_components();
324
325            if year_str.is_some() || era_str.is_some() {
326                let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
327                components.assign(Component::Year, year);
328            } else {
329                // When no year is specified, imply the reference year
330                // The ForwardDateRefiner will handle forward-looking behavior if needed
331                components.imply(Component::Year, ref_date.year());
332            }
333
334            components.assign(Component::Month, month as i32);
335            components.assign(Component::Day, day);
336
337            if let Some(ref wd_str) = weekday_str
338                && let Some(weekday) = get_weekday(wd_str)
339            {
340                components.assign(Component::Weekday, weekday as i32);
341            }
342
343            if !components.is_valid_date() {
344                start = match_end;
345                continue;
346            }
347
348            // Handle end date for ranges
349            let end_components = if let Some(end_day_text) = end_day_str {
350                let end_day: i32 = end_day_text.parse().unwrap_or(0);
351                if end_day > 0 && end_day <= 31 {
352                    let end_month = if let Some(ref em_str) = end_month_str {
353                        get_month(em_str.trim_end_matches('.')).unwrap_or(month)
354                    } else {
355                        month
356                    };
357
358                    let mut end_comp = context.create_components();
359                    // Copy year from start components
360                    if let Some(start_year) = components.get(Component::Year) {
361                        if year_str.is_some() || era_str.is_some() {
362                            end_comp.assign(Component::Year, start_year);
363                        } else {
364                            end_comp.imply(Component::Year, start_year);
365                        }
366                    }
367                    end_comp.assign(Component::Month, end_month as i32);
368                    end_comp.assign(Component::Day, end_day);
369
370                    if end_comp.is_valid_date() {
371                        Some(end_comp)
372                    } else {
373                        None
374                    }
375                } else {
376                    None
377                }
378            } else {
379                None
380            };
381
382            results.push(context.create_result(match_start, match_end, components, end_components));
383            start = match_end;
384        }
385
386        Ok(results)
387    }
388}