Skip to main content

whichtime_sys/parsers/nl/
month_name.rs

1//! Dutch month name little endian parser
2//!
3//! Handles Dutch date expressions with month names like:
4//! - "10 januari 2012"
5//! - "10 augustus 88 na Christus" / "10 augustus 234 voor Christus"
6//! - "10 - 22 augustus 2012" (date ranges)
7//! - "10 tot 22 augustus 2012"
8//! - "Zon 15 Sept", "di, 10 januari"
9
10use crate::components::Component;
11use crate::context::ParsingContext;
12use crate::dictionaries::nl::{get_month, get_weekday};
13use crate::error::Result;
14use crate::parsers::Parser;
15use crate::results::ParsedResult;
16use chrono::Datelike;
17use fancy_regex::Regex;
18use std::sync::LazyLock;
19
20// Main pattern for Dutch month name dates
21// Supports: "10 augustus 2012", "10 tot 22 augustus", "di, 10 januari", "10 augustus 88 na Christus"
22static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(
24        r"(?ix)
25        (?:
26            (?P<weekday>zondag|zon|zo|maandag|maan|ma|dinsdag|dins|di|woensdag|woens|wo|donderdag|donder|do|vrijdag|vrij|vr|zaterdag|zater|za)
27            (?:\s*,?\s*)?
28        )?
29        (?P<day>\d{1,2})(?:e|ste|de)?\s*
30        (?:
31            (?:(?:tot|\-|–)\s*(?P<end_day>\d{1,2})(?:e|ste|de)?\s*)?
32        )?
33        (?P<month>januari|jan\.?|februari|feb\.?|maart|mrt\.?|april|apr\.?|mei|juni|jun\.?|juli|jul\.?|augustus|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|december|dec\.?)
34        (?:
35            (?:\s*[\-/,]?\s*)?
36            (?P<year>\d{1,4}(?!:))?
37            (?:\s*(?P<era>
38                (?:voor|v\.?)\s*(?:Christus|Chr\.?)
39                |(?:na|n\.?)\s*(?:Christus|Chr\.?)
40            ))?
41        )?
42        (?=\W|$)
43        "
44    ).unwrap()
45});
46
47// Pattern for abbreviated weekday + date format: "Zon 15 Sept"
48static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r"(?ix)(?P<weekday>zondag|zon|zo|maandag|maan|ma|dinsdag|dins|di|woensdag|woens|wo|donderdag|donder|do|vrijdag|vrij|vr|zaterdag|zater|za)\.?\s+(?P<day>\d{1,2})(?:e|ste|de)?\s+(?P<month>januari|jan\.?|februari|feb\.?|maart|mrt\.?|april|apr\.?|mei|juni|jun\.?|juli|jul\.?|augustus|aug\.?|september|sep\.?|sept\.?|oktober|okt\.?|november|nov\.?|december|dec\.?)(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
51    ).unwrap()
52});
53
54/// Dutch month name parser
55pub struct NLMonthNameParser;
56
57impl NLMonthNameParser {
58    pub fn new() -> Self {
59        Self
60    }
61
62    fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
63        let year_text = year_str?;
64        let mut year: i32 = year_text.parse().ok()?;
65
66        // Handle two-digit years
67        if year < 100 && era_str.is_none() {
68            year = if year > 50 { 1900 + year } else { 2000 + year };
69        }
70
71        // Handle era suffixes
72        if let Some(era) = era_str {
73            let era_lower = era.to_lowercase();
74
75            // BC: voor Christus, v. Chr.
76            if era_lower.contains("voor") || era_lower.starts_with('v') {
77                // 1 BC is year 0 in chrono? No, chrono uses astronomical year numbering: 1 BC is 0, 2 BC is -1.
78                // Wait, chrono::Datelike::year() returns i32. 1 CE = 1. 1 BCE = 0.
79                // If input says "234 voor Christus" (234 BC), we want year -233 (astronomical) or just let chrono handle it?
80                // Whichtime usually returns the year number.
81                // If I return -234, it means 235 BC.
82                // Let's stick to negative for BC. 1 BC -> 0. 234 BC -> -233.
83                // Formula: 1 - year_bc
84                // year 234 BC -> 1 - 234 = -233.
85
86                // BUT, let's check how other parsers handle it.
87                // DE parser: starts_with('v') -> returns -year.
88                // If year is 234, returns -234.
89                // Let's assume simplistic -year for now.
90                return Some(-year);
91            }
92        }
93
94        Some(year)
95    }
96}
97
98impl Default for NLMonthNameParser {
99    fn default() -> Self {
100        Self::new()
101    }
102}
103
104impl Parser for NLMonthNameParser {
105    fn name(&self) -> &'static str {
106        "NLMonthNameParser"
107    }
108
109    fn should_apply(&self, _context: &ParsingContext) -> bool {
110        true
111    }
112
113    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
114        let mut results = Vec::new();
115        let ref_date = context.reference.instant;
116
117        // Try abbreviated pattern (Zon 15 Sept)
118        let mut start = 0;
119        while start < context.text.len() {
120            let search_text = &context.text[start..];
121            let captures = match ABBREV_PATTERN.captures(search_text) {
122                Ok(Some(caps)) => caps,
123                Ok(None) => break,
124                Err(_) => break,
125            };
126
127            let full_match = match captures.get(0) {
128                Some(m) => m,
129                None => break,
130            };
131
132            let match_start = start + full_match.start();
133            let match_end = start + full_match.end();
134
135            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
136            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
137            let month_str = captures
138                .name("month")
139                .map(|m| m.as_str().to_lowercase())
140                .unwrap_or_default();
141            let year_str = captures.name("year").map(|m| m.as_str());
142
143            // Clean up month string (remove trailing dot)
144            let month_clean = month_str.trim_end_matches('.');
145
146            let Some(month) = get_month(month_clean) else {
147                start = match_end;
148                continue;
149            };
150
151            let day: i32 = day_str.parse().unwrap_or(1);
152            if !(1..=31).contains(&day) {
153                start = match_end;
154                continue;
155            }
156
157            let mut components = context.create_components();
158
159            if let Some(y) = year_str {
160                let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
161                components.assign(Component::Year, year);
162            } else {
163                components.imply(Component::Year, ref_date.year());
164            }
165
166            components.assign(Component::Month, month as i32);
167            components.assign(Component::Day, day);
168
169            if let Some(ref wd_str) = weekday_str
170                && let Some(weekday) = get_weekday(wd_str)
171            {
172                components.assign(Component::Weekday, weekday as i32);
173            }
174
175            if !components.is_valid_date() {
176                start = match_end;
177                continue;
178            }
179
180            results.push(context.create_result(match_start, match_end, components, None));
181            start = match_end;
182        }
183
184        // Try main pattern
185        start = 0;
186        while start < context.text.len() {
187            let search_text = &context.text[start..];
188            let captures = match PATTERN.captures(search_text) {
189                Ok(Some(caps)) => caps,
190                Ok(None) => break,
191                Err(_) => break,
192            };
193
194            let full_match = match captures.get(0) {
195                Some(m) => m,
196                None => break,
197            };
198
199            let match_start = start + full_match.start();
200            let match_end = start + full_match.end();
201
202            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
203            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
204            let month_str = captures
205                .name("month")
206                .map(|m| m.as_str().to_lowercase())
207                .unwrap_or_default();
208            let year_str = captures.name("year").map(|m| m.as_str());
209            let era_str = captures.name("era").map(|m| m.as_str());
210            let end_day_str = captures.name("end_day").map(|m| m.as_str());
211
212            let month_clean = month_str.trim_end_matches('.');
213
214            let Some(month) = get_month(month_clean) else {
215                start = match_end;
216                continue;
217            };
218
219            let day: i32 = day_str.parse().unwrap_or(1);
220            if !(1..=31).contains(&day) {
221                start = match_end;
222                continue;
223            }
224
225            let mut components = context.create_components();
226
227            if year_str.is_some() || era_str.is_some() {
228                let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
229                components.assign(Component::Year, year);
230            } else {
231                components.imply(Component::Year, ref_date.year());
232            }
233
234            components.assign(Component::Month, month as i32);
235            components.assign(Component::Day, day);
236
237            if let Some(ref wd_str) = weekday_str
238                && let Some(weekday) = get_weekday(wd_str)
239            {
240                components.assign(Component::Weekday, weekday as i32);
241            }
242
243            if !components.is_valid_date() {
244                start = match_end;
245                continue;
246            }
247
248            // Handle end date for ranges (10 tot 22 augustus)
249            let end_components = if let Some(end_day_text) = end_day_str {
250                let end_day: i32 = end_day_text.parse().unwrap_or(0);
251                if end_day > 0 && end_day <= 31 {
252                    let mut end_comp = context.create_components();
253                    if let Some(start_year) = components.get(Component::Year) {
254                        if year_str.is_some() || era_str.is_some() {
255                            end_comp.assign(Component::Year, start_year);
256                        } else {
257                            end_comp.imply(Component::Year, start_year);
258                        }
259                    }
260                    end_comp.assign(Component::Month, month as i32);
261                    end_comp.assign(Component::Day, end_day);
262
263                    if end_comp.is_valid_date() {
264                        Some(end_comp)
265                    } else {
266                        None
267                    }
268                } else {
269                    None
270                }
271            } else {
272                None
273            };
274
275            results.push(context.create_result(match_start, match_end, components, end_components));
276            start = match_end;
277        }
278
279        Ok(results)
280    }
281}