Skip to main content

whichtime_sys/parsers/fr/
month_name.rs

1//! French month name parser
2//!
3//! Handles French date formats with month names like:
4//! - "10 août 2012"
5//! - "le 10 août"
6//! - "10 - 22 août 2012" (ranges)
7//! - "10 au 22 août 2012" (ranges with "au")
8//! - "10 août 234 AC" (year suffixes)
9//! - "Dim 15 Sept" (abbreviated weekday + date)
10//! - "Mardi 10 janvier" (weekday prefix)
11
12use crate::components::Component;
13use crate::context::ParsingContext;
14use crate::dictionaries::fr as dict;
15use crate::error::Result;
16use crate::parsers::Parser;
17use crate::results::ParsedResult;
18use crate::scanner::TokenType;
19use chrono::Datelike;
20use fancy_regex::Regex;
21use std::sync::LazyLock;
22
23// Main pattern for date with month name
24// Year pattern requires digits NOT followed by : (to avoid matching time like "12:00")
25static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
26    Regex::new(
27        r"(?i)(?:le\s+)?(?:(?P<weekday>lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche|lun|mar|mer|jeu|ven|sam|dim)\.?\s+)?(?P<day>\d{1,2})(?:er|ème|e)?(?:\s*(?:-|–|au?)\s*(?P<end_day>\d{1,2})(?:er|ème|e)?)?\s+(?P<month>janvier|février|fevrier|mars|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre|jan|janv|fév|fev|mar|avr|jun|juil|jul|aoû|aou|sep|sept|oct|nov|déc|dec)\.?(?:\s+(?P<year>\d{1,4})(?![\d:]))?(?:\s*(?P<era>a\.?\s*c\.?|p\.?\s*chr\.?\s*n\.?|av\.?\s*j\.?-?c\.?))?"
28    ).unwrap()
29});
30
31// Pattern for abbreviated weekday + day + month (Dim 15 Sept, DIM 15SEPT)
32static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r"(?i)(?P<weekday>lun|mar|mer|jeu|ven|sam|dim)\.?\s*(?P<day>\d{1,2})\s*(?P<month>jan|janv|fév|fev|mar|avr|mai|jun|juil|jul|aoû|aou|sep|sept|oct|nov|déc|dec)t?\.?"
35    ).unwrap()
36});
37
38// Pattern for cross-month ranges (handles "24 août 2023 au 26 août 2023")
39static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
40    Regex::new(
41        r"(?i)(?P<start_day>\d{1,2})\s*(?P<start_month>janvier|février|fevrier|mars|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre|jan|janv|fév|fev|mar|avr|jun|juil|jul|aoû|aou|sep|sept|oct|nov|déc|dec)\.?(?:\s+(?P<start_year>\d{4}))?\s*(?:-|–|au?)\s*(?P<end_day>\d{1,2})\s*(?P<end_month>janvier|février|fevrier|mars|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre|jan|janv|fév|fev|mar|avr|jun|juil|jul|aoû|aou|sep|sept|oct|nov|déc|dec)\.?(?:\s+(?P<end_year>\d{1,4}))?"
42    ).unwrap()
43});
44
45/// French month name parser
46pub struct FRMonthNameParser;
47
48impl FRMonthNameParser {
49    pub fn new() -> Self {
50        Self
51    }
52
53    fn is_valid_date(year: i32, month: u32, day: u32) -> bool {
54        if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
55            return false;
56        }
57        // Check days in month
58        let days_in_month = match month {
59            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
60            4 | 6 | 9 | 11 => 30,
61            2 => {
62                // Leap year check
63                if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
64                    29
65                } else {
66                    28
67                }
68            }
69            _ => return false,
70        };
71        day <= days_in_month
72    }
73
74    fn parse_year_with_era(year_str: &str, era_str: Option<&str>) -> i32 {
75        let mut year: i32 = year_str.parse().unwrap_or(0);
76
77        // Check if era is present - if so, don't convert 2-digit years
78        let has_era = era_str.is_some() && !era_str.unwrap().trim().is_empty();
79
80        // Handle 2-digit years only if no era suffix
81        if !has_era && year < 100 {
82            year = if year > 50 { 1900 + year } else { 2000 + year };
83        }
84
85        // Handle era suffixes
86        if let Some(era) = era_str {
87            let era_lower = era.to_lowercase().replace(['.', ' ', '-'], "");
88            if era_lower.contains("ac") || era_lower.contains("av") {
89                year = -year;
90            }
91            // "p. Chr. n." and similar keep positive year
92        }
93
94        year
95    }
96}
97
98impl Parser for FRMonthNameParser {
99    fn name(&self) -> &'static str {
100        "FRMonthNameParser"
101    }
102
103    fn should_apply(&self, context: &ParsingContext) -> bool {
104        context.has_token_type(TokenType::Month) || context.text.bytes().any(|b| b.is_ascii_digit())
105    }
106
107    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
108        let mut results = Vec::new();
109        let ref_date = context.reference.instant;
110
111        let mut start = 0;
112        while start < context.text.len() {
113            let search_text = &context.text[start..];
114
115            // Try cross-month range pattern first
116            if let Ok(Some(caps)) = RANGE_PATTERN.captures(search_text)
117                && let (Some(sd), Some(sm), Some(ed), Some(em)) = (
118                    caps.name("start_day"),
119                    caps.name("start_month"),
120                    caps.name("end_day"),
121                    caps.name("end_month"),
122                )
123            {
124                let start_day: u32 = sd.as_str().parse().unwrap_or(0);
125                let start_month = dict::get_month(&sm.as_str().to_lowercase()).unwrap_or(0);
126                let end_day: u32 = ed.as_str().parse().unwrap_or(0);
127                let end_month = dict::get_month(&em.as_str().to_lowercase()).unwrap_or(0);
128
129                if start_day > 0
130                    && start_day <= 31
131                    && start_month > 0
132                    && end_day > 0
133                    && end_day <= 31
134                    && end_month > 0
135                {
136                    // Get years - start_year or end_year, preferring end_year if both present
137                    let start_year = caps
138                        .name("start_year")
139                        .map(|y| Self::parse_year_with_era(y.as_str(), None));
140                    let end_year = caps
141                        .name("end_year")
142                        .map(|y| Self::parse_year_with_era(y.as_str(), None));
143
144                    let year = end_year.or(start_year).unwrap_or(ref_date.year());
145
146                    let full_match = caps.get(0).unwrap();
147                    let match_start = start + full_match.start();
148                    let match_end = start + full_match.end();
149
150                    let mut components = context.create_components();
151                    components.assign(Component::Year, start_year.unwrap_or(year));
152                    components.assign(Component::Month, start_month as i32);
153                    components.assign(Component::Day, start_day as i32);
154
155                    let mut end_comp = context.create_components();
156                    end_comp.assign(Component::Year, year);
157                    end_comp.assign(Component::Month, end_month as i32);
158                    end_comp.assign(Component::Day, end_day as i32);
159
160                    results.push(context.create_result(
161                        match_start,
162                        match_end,
163                        components,
164                        Some(end_comp),
165                    ));
166                    start = match_end;
167                    continue;
168                }
169            }
170
171            // Try abbreviated weekday pattern
172            if let Ok(Some(caps)) = ABBREV_PATTERN.captures(search_text)
173                && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
174            {
175                let day: u32 = d.as_str().parse().unwrap_or(0);
176                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
177
178                if day > 0 && day <= 31 && month > 0 {
179                    let full_match = caps.get(0).unwrap();
180                    let match_start = start + full_match.start();
181                    let match_end = start + full_match.end();
182
183                    let mut components = context.create_components();
184                    components.imply(Component::Year, ref_date.year());
185                    components.assign(Component::Month, month as i32);
186                    components.assign(Component::Day, day as i32);
187
188                    // Add weekday if present
189                    if let Some(wd) = caps.name("weekday")
190                        && let Some(weekday) = dict::get_weekday(&wd.as_str().to_lowercase())
191                    {
192                        components.assign(Component::Weekday, weekday as i32);
193                    }
194
195                    results.push(context.create_result(match_start, match_end, components, None));
196                    start = match_end;
197                    continue;
198                }
199            }
200
201            // Try main pattern
202            if let Ok(Some(caps)) = PATTERN.captures(search_text)
203                && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
204            {
205                let day: u32 = d.as_str().parse().unwrap_or(0);
206                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
207
208                let full_match = caps.get(0).unwrap();
209                let matched_text = full_match.as_str();
210
211                // Skip "le " prefix if present
212                let prefix_offset = if matched_text.to_lowercase().starts_with("le ") {
213                    3
214                } else {
215                    0
216                };
217
218                let match_start = start + full_match.start() + prefix_offset;
219                let match_end = start + full_match.end();
220
221                let year = caps.name("year").map(|y| {
222                    let era = caps.name("era").map(|e| e.as_str());
223                    Self::parse_year_with_era(y.as_str(), era)
224                });
225
226                let actual_year = year.unwrap_or(ref_date.year());
227
228                // Validate the date
229                if day == 0 || month == 0 || !Self::is_valid_date(actual_year, month, day) {
230                    start = match_end;
231                    continue;
232                }
233
234                let mut components = context.create_components();
235                if let Some(y) = year {
236                    components.assign(Component::Year, y);
237                } else {
238                    // No explicit year - imply it so ForwardDateRefiner can adjust
239                    components.imply(Component::Year, ref_date.year());
240                }
241                components.assign(Component::Month, month as i32);
242                components.assign(Component::Day, day as i32);
243
244                // Add weekday if present
245                if let Some(wd) = caps.name("weekday")
246                    && let Some(weekday) = dict::get_weekday(&wd.as_str().to_lowercase())
247                {
248                    components.assign(Component::Weekday, weekday as i32);
249                }
250
251                // Handle date range within same month
252                let end_comp = if let Some(end_day_match) = caps.name("end_day") {
253                    let end_day: u32 = end_day_match.as_str().parse().unwrap_or(0);
254                    if end_day > 0 && end_day <= 31 {
255                        let mut ec = context.create_components();
256                        if let Some(y) = year {
257                            ec.assign(Component::Year, y);
258                        } else {
259                            ec.imply(Component::Year, ref_date.year());
260                        }
261                        ec.assign(Component::Month, month as i32);
262                        ec.assign(Component::Day, end_day as i32);
263                        Some(ec)
264                    } else {
265                        None
266                    }
267                } else {
268                    None
269                };
270
271                results.push(context.create_result(match_start, match_end, components, end_comp));
272                start = match_end;
273                continue;
274            }
275
276            // No match at this position - advance to next character (UTF-8 safe)
277            if let Some(c) = search_text.chars().next() {
278                start += c.len_utf8();
279            } else {
280                break;
281            }
282        }
283
284        Ok(results)
285    }
286}
287
288impl Default for FRMonthNameParser {
289    fn default() -> Self {
290        Self::new()
291    }
292}