Skip to main content

whichtime_sys/parsers/pt/
month_name.rs

1//! Portuguese month name little endian parser
2//!
3//! Handles Portuguese date expressions with month names like:
4//! - "10 Agosto 2012", "10 de Agosto de 2012"
5//! - "10 a 22 Agosto 2012" (date ranges)
6//! - "Dom 15Set", "DOM 15SET"
7//! - "Qua, 10 Janeiro"
8
9use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::pt::{get_month, get_weekday};
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use chrono::Datelike;
16use fancy_regex::Regex;
17use std::sync::LazyLock;
18
19static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
20    Regex::new(
21        r"(?ix)
22        (?:
23            (?P<weekday>domingo|dom\.?|segunda(?:-feira)?|seg\.?|terça(?:-feira)?|terca(?:-feira)?|ter\.?|quarta(?:-feira)?|qua\.?|quinta(?:-feira)?|qui\.?|sexta(?:-feira)?|sex\.?|sábado|sabado|sab\.?)\s*,?\s*
24        )?
25        (?P<day>\d{1,2})(?:º|°)?\s*
26        (?:
27            (?:(?:a|até|\-|–)\s*(?P<end_day>\d{1,2})(?:º|°)?\s*)?
28        )?
29        (?:de\s+)?
30        (?P<month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
31        (?:
32            (?:\s*(?:de|,|-)?\s*)
33            (?P<year>\d{1,4}(?!:))
34            (?:\s*(?P<era>AC|d\.?\s*C\.?|A\.?\s*C\.?))?
35        )?
36        (?=\W|$)
37        "
38    ).unwrap()
39});
40
41// Pattern for abbreviated weekday + date format: "Dom 15Set"
42static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
43    Regex::new(
44        r"(?ix)(?P<weekday>dom|seg|ter|qua|qui|sex|sab)\.?\s*(?P<day>\d{1,2})(?P<month>jan|fev|mar|abr|mai|jun|jul|ago|set|out|nov|dez)\.?(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
45    ).unwrap()
46});
47
48// Pattern for cross-month date ranges: "10 Agosto - 12 Setembro"
49static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(
51        r"(?ix)
52        (?P<start_day>\d{1,2})(?:º|°)?\s*(?:de\s+)?
53        (?P<start_month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
54        \s*(?:-|a|até)\s*
55        (?P<end_day>\d{1,2})(?:º|°)?\s*(?:de\s+)?
56        (?P<end_month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
57        (?:\s*(?:de\s+)?(?P<year>\d{1,4}))?
58        (?=\W|$)
59        "
60    ).unwrap()
61});
62
63/// Portuguese month name parser
64pub struct PTMonthNameParser;
65
66impl PTMonthNameParser {
67    pub fn new() -> Self {
68        Self
69    }
70
71    fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
72        let year_text = year_str?;
73        let mut year: i32 = year_text.parse().ok()?;
74
75        // Handle two-digit years if needed (though typically 4 digits for ranges/formal dates)
76        if year < 100 && era_str.is_none() {
77            year = if year > 50 { 1900 + year } else { 2000 + year };
78        }
79
80        // Handle era suffixes
81        if let Some(era) = era_str {
82            let era_lower = era.to_lowercase().replace([' ', '.'], "");
83
84            // AC (Before Christ)
85            if era_lower == "ac" {
86                return Some(-year);
87            }
88            // dC (After Christ) - default
89        }
90
91        Some(year)
92    }
93}
94
95impl Default for PTMonthNameParser {
96    fn default() -> Self {
97        Self::new()
98    }
99}
100
101impl Parser for PTMonthNameParser {
102    fn name(&self) -> &'static str {
103        "PTMonthNameParser"
104    }
105
106    fn should_apply(&self, _context: &ParsingContext) -> bool {
107        true
108    }
109
110    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
111        let mut results = Vec::new();
112        let ref_date = context.reference.instant;
113
114        // Try abbreviated pattern (Dom 15Set)
115        let mut start = 0;
116        while start < context.text.len() {
117            let search_text = &context.text[start..];
118            let captures = match ABBREV_PATTERN.captures(search_text) {
119                Ok(Some(caps)) => caps,
120                Ok(None) => break,
121                Err(_) => break,
122            };
123
124            let full_match = match captures.get(0) {
125                Some(m) => m,
126                None => break,
127            };
128
129            let match_start = start + full_match.start();
130            let match_end = start + full_match.end();
131
132            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
133            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
134            let month_str = captures
135                .name("month")
136                .map(|m| m.as_str().to_lowercase())
137                .unwrap_or_default();
138            let year_str = captures.name("year").map(|m| m.as_str());
139
140            let month_clean = month_str.trim_end_matches('.');
141            let Some(month) = get_month(month_clean) else {
142                start = match_end;
143                continue;
144            };
145
146            let day: i32 = day_str.parse().unwrap_or(1);
147            if !(1..=31).contains(&day) {
148                start = match_end;
149                continue;
150            }
151
152            let mut components = context.create_components();
153
154            if let Some(y) = year_str {
155                let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
156                components.assign(Component::Year, year);
157            } else {
158                components.imply(Component::Year, ref_date.year());
159            }
160
161            components.assign(Component::Month, month as i32);
162            components.assign(Component::Day, day);
163
164            if let Some(ref wd_str) = weekday_str
165                && let Some(weekday) = get_weekday(wd_str)
166            {
167                components.assign(Component::Weekday, weekday as i32);
168            }
169
170            if !components.is_valid_date() {
171                start = match_end;
172                continue;
173            }
174
175            results.push(context.create_result(match_start, match_end, components, None));
176            start = match_end;
177        }
178
179        // Try cross-month range pattern
180        start = 0;
181        while start < context.text.len() {
182            let search_text = &context.text[start..];
183            let captures = match RANGE_PATTERN.captures(search_text) {
184                Ok(Some(caps)) => caps,
185                Ok(None) => break,
186                Err(_) => break,
187            };
188
189            let full_match = match captures.get(0) {
190                Some(m) => m,
191                None => break,
192            };
193
194            let match_start = start + full_match.start();
195            let match_end = start + full_match.end();
196
197            let start_day_str = captures
198                .name("start_day")
199                .map(|m| m.as_str())
200                .unwrap_or("1");
201            let start_month_str = captures
202                .name("start_month")
203                .map(|m| m.as_str().to_lowercase())
204                .unwrap_or_default();
205            let end_day_str = captures.name("end_day").map(|m| m.as_str()).unwrap_or("1");
206            let end_month_str = captures
207                .name("end_month")
208                .map(|m| m.as_str().to_lowercase())
209                .unwrap_or_default();
210            let year_str = captures.name("year").map(|m| m.as_str());
211
212            let start_month = get_month(start_month_str.trim_end_matches('.')).unwrap_or(1);
213            let end_month = get_month(end_month_str.trim_end_matches('.')).unwrap_or(1);
214
215            let start_day: i32 = start_day_str.parse().unwrap_or(1);
216            let end_day: i32 = end_day_str.parse().unwrap_or(1);
217
218            let mut start_components = context.create_components();
219            let mut end_components = context.create_components();
220
221            if let Some(y) = year_str {
222                let year = y.parse::<i32>().unwrap_or(ref_date.year());
223                start_components.assign(Component::Year, year);
224                end_components.assign(Component::Year, year);
225            } else {
226                start_components.imply(Component::Year, ref_date.year());
227                end_components.imply(Component::Year, ref_date.year());
228            }
229
230            start_components.assign(Component::Month, start_month as i32);
231            start_components.assign(Component::Day, start_day);
232
233            end_components.assign(Component::Month, end_month as i32);
234            end_components.assign(Component::Day, end_day);
235
236            results.push(context.create_result(
237                match_start,
238                match_end,
239                start_components,
240                Some(end_components),
241            ));
242            start = match_end;
243        }
244
245        // Try main pattern
246        start = 0;
247        while start < context.text.len() {
248            let search_text = &context.text[start..];
249            let captures = match PATTERN.captures(search_text) {
250                Ok(Some(caps)) => caps,
251                Ok(None) => break,
252                Err(_) => break,
253            };
254
255            let full_match = match captures.get(0) {
256                Some(m) => m,
257                None => break,
258            };
259
260            let match_start = start + full_match.start();
261            let match_end = start + full_match.end();
262
263            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
264            let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
265            let month_str = captures
266                .name("month")
267                .map(|m| m.as_str().to_lowercase())
268                .unwrap_or_default();
269            let year_str = captures.name("year").map(|m| m.as_str());
270            let era_str = captures.name("era").map(|m| m.as_str());
271            let end_day_str = captures.name("end_day").map(|m| m.as_str());
272
273            let month_clean = month_str.trim_end_matches('.');
274
275            let Some(month) = get_month(month_clean) else {
276                start = match_end;
277                continue;
278            };
279
280            let day: i32 = day_str.parse().unwrap_or(1);
281            if !(1..=31).contains(&day) {
282                start = match_end;
283                continue;
284            }
285
286            let mut components = context.create_components();
287
288            if year_str.is_some() || era_str.is_some() {
289                let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
290                components.assign(Component::Year, year);
291            } else {
292                components.imply(Component::Year, ref_date.year());
293            }
294
295            components.assign(Component::Month, month as i32);
296            components.assign(Component::Day, day);
297
298            if let Some(ref wd_str) = weekday_str {
299                // Need to strip punctuation/trailing space from weekday match if captured roughly?
300                // Regex should capture clean word.
301                // But regex `domingo|dom\.?|...` matches "dom." or "dom".
302                // Check if trailing comma/space is outside capture group. Yes, `\s*,?\s*` is outside `weekday` group.
303                let wd_clean = wd_str.trim_end_matches(['.', ',']);
304                if let Some(weekday) = get_weekday(wd_clean) {
305                    components.assign(Component::Weekday, weekday as i32);
306                }
307            }
308
309            if !components.is_valid_date() {
310                start = match_end;
311                continue;
312            }
313
314            // Handle end date for ranges
315            let end_components = if let Some(end_day_text) = end_day_str {
316                let end_day: i32 = end_day_text.parse().unwrap_or(0);
317                if end_day > 0 && end_day <= 31 {
318                    let mut end_comp = context.create_components();
319                    if let Some(start_year) = components.get(Component::Year) {
320                        if year_str.is_some() || era_str.is_some() {
321                            end_comp.assign(Component::Year, start_year);
322                        } else {
323                            end_comp.imply(Component::Year, start_year);
324                        }
325                    }
326                    end_comp.assign(Component::Month, month as i32);
327                    end_comp.assign(Component::Day, end_day);
328
329                    if end_comp.is_valid_date() {
330                        Some(end_comp)
331                    } else {
332                        None
333                    }
334                } else {
335                    None
336                }
337            } else {
338                None
339            };
340
341            results.push(context.create_result(match_start, match_end, components, end_components));
342            start = match_end;
343        }
344
345        Ok(results)
346    }
347}