Skip to main content

whichtime_sys/parsers/it/
month_name.rs

1//! Italian month name parser
2//!
3//! Handles Italian date formats with month names like:
4//! - Little endian: "10 Agosto 2012", "il 10 Agosto"
5//! - Middle endian: "Agosto 10, 2012", "Agosto 2017"
6//! - Date ranges: "10 - 22 Agosto 2012"
7//! - Combined with time: "12 Luglio alle 19:00"
8
9use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::it as dict;
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use crate::scanner::TokenType;
16use chrono::Datelike;
17use fancy_regex::Regex;
18use std::sync::LazyLock;
19
20// Little endian pattern: "10 Agosto 2012", "il 10 Agosto", "10 - 22 Agosto 2012"
21static LITTLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(
23        r"(?i)(?:il\s+)?(?P<day>\d{1,2})(?:\s*(?:-|–|a)\s*(?P<end_day>\d{1,2}))?\s+(?P<month>gennaio|febbraio|febraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre|gen|feb|febb|mar|apr|mag|giu|lug|lugl|ago|set|sett|ott|nov|dic|dice)\.?(?:\s+(?P<year>\d{1,4})(?![\d:]))?(?:\s+(?:alle?\s+)?(?P<hour>\d{1,2})(?:[:\.](?P<minute>\d{2}))?)?"
24    ).unwrap()
25});
26
27// Middle endian pattern: "Agosto 10, 2012", "Agosto 2017", "Agosto 10"
28// Note: "Agosto 10" (day without year) needs to be distinguished from year-only "Agosto 2017"
29static MIDDLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30    Regex::new(
31        r"(?i)(?P<month>gennaio|febbraio|febraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre|gen|feb|febb|mar|apr|mag|giu|lug|lugl|ago|set|sett|ott|nov|dic|dice)\.?(?:\s+(?P<day>\d{1,2})(?:,\s*(?P<year>\d{4}))?|\s+(?P<year_only>\d{4}))(?![\d])"
32    ).unwrap()
33});
34
35/// Italian month name parser
36pub struct ITMonthNameParser;
37
38impl ITMonthNameParser {
39    pub fn new() -> Self {
40        Self
41    }
42
43    fn is_valid_date(year: i32, month: u32, day: u32) -> bool {
44        if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
45            return false;
46        }
47        let days_in_month = match month {
48            1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
49            4 | 6 | 9 | 11 => 30,
50            2 => {
51                if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
52                    29
53                } else {
54                    28
55                }
56            }
57            _ => return false,
58        };
59        day <= days_in_month
60    }
61
62    fn parse_year(year_str: &str) -> i32 {
63        let year: i32 = year_str.parse().unwrap_or(0);
64        if year < 100 {
65            if year > 50 { 1900 + year } else { 2000 + year }
66        } else {
67            year
68        }
69    }
70}
71
72impl Parser for ITMonthNameParser {
73    fn name(&self) -> &'static str {
74        "ITMonthNameParser"
75    }
76
77    fn should_apply(&self, context: &ParsingContext) -> bool {
78        context.has_token_type(TokenType::Month) || context.text.bytes().any(|b| b.is_ascii_digit())
79    }
80
81    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
82        let mut results = Vec::new();
83        let ref_date = context.reference.instant;
84
85        let mut start = 0;
86        while start < context.text.len() {
87            let search_text = &context.text[start..];
88
89            // Try little endian pattern first (more specific)
90            if let Ok(Some(caps)) = LITTLE_ENDIAN_PATTERN.captures(search_text)
91                && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
92            {
93                let day: u32 = d.as_str().parse().unwrap_or(0);
94                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
95
96                let full_match = caps.get(0).unwrap();
97                let matched_text = full_match.as_str();
98
99                // Skip "il " prefix for the result text
100                let prefix_offset = if matched_text.to_lowercase().starts_with("il ") {
101                    3
102                } else {
103                    0
104                };
105
106                let match_start = start + full_match.start() + prefix_offset;
107                let match_end = start + full_match.end();
108
109                let year = caps.name("year").map(|y| Self::parse_year(y.as_str()));
110
111                let actual_year = year.unwrap_or(ref_date.year());
112
113                // Validate the date
114                if day == 0 || month == 0 || !Self::is_valid_date(actual_year, month, day) {
115                    start = match_end;
116                    continue;
117                }
118
119                // Check if time is present
120                let has_time = caps.name("hour").is_some();
121
122                let mut components = context.create_components();
123                if let Some(y) = year {
124                    components.assign(Component::Year, y);
125                } else if has_time {
126                    // When time is specified, assign year to prevent ForwardDateRefiner from moving it
127                    components.assign(Component::Year, ref_date.year());
128                } else {
129                    components.imply(Component::Year, ref_date.year());
130                }
131                components.assign(Component::Month, month as i32);
132                components.assign(Component::Day, day as i32);
133
134                // Handle time if present
135                if let Some(hour_match) = caps.name("hour") {
136                    let hour: i32 = hour_match.as_str().parse().unwrap_or(0);
137                    let minute: i32 = caps
138                        .name("minute")
139                        .and_then(|m| m.as_str().parse().ok())
140                        .unwrap_or(0);
141                    components.assign(Component::Hour, hour);
142                    components.assign(Component::Minute, minute);
143                }
144
145                // Handle date range within same month
146                let end_comp = if let Some(end_day_match) = caps.name("end_day") {
147                    let end_day: u32 = end_day_match.as_str().parse().unwrap_or(0);
148                    if end_day > 0 && end_day <= 31 {
149                        let mut ec = context.create_components();
150                        if let Some(y) = year {
151                            ec.assign(Component::Year, y);
152                        } else {
153                            ec.imply(Component::Year, ref_date.year());
154                        }
155                        ec.assign(Component::Month, month as i32);
156                        ec.assign(Component::Day, end_day as i32);
157                        Some(ec)
158                    } else {
159                        None
160                    }
161                } else {
162                    None
163                };
164
165                results.push(context.create_result(match_start, match_end, components, end_comp));
166                start = match_end;
167                continue;
168            }
169
170            // Try middle endian pattern
171            if let Ok(Some(caps)) = MIDDLE_ENDIAN_PATTERN.captures(search_text)
172                && let Some(m) = caps.name("month")
173            {
174                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
175
176                if month > 0 {
177                    let full_match = caps.get(0).unwrap();
178                    let match_start = start + full_match.start();
179                    let match_end = start + full_match.end();
180
181                    let day: u32 = caps
182                        .name("day")
183                        .and_then(|d| d.as_str().parse().ok())
184                        .unwrap_or(1); // Default to 1st if only month/year
185
186                    let year = caps
187                        .name("year")
188                        .or(caps.name("year_only"))
189                        .map(|y| Self::parse_year(y.as_str()));
190
191                    let actual_year = year.unwrap_or(ref_date.year());
192
193                    // Validate the date
194                    if !Self::is_valid_date(actual_year, month, day) {
195                        start = match_end;
196                        continue;
197                    }
198
199                    let mut components = context.create_components();
200                    if let Some(y) = year {
201                        components.assign(Component::Year, y);
202                    } else {
203                        components.imply(Component::Year, ref_date.year());
204                    }
205                    components.assign(Component::Month, month as i32);
206                    components.assign(Component::Day, day as i32);
207
208                    results.push(context.create_result(match_start, match_end, components, None));
209                    start = match_end;
210                    continue;
211                }
212            }
213
214            // No match at this position - advance to next character (UTF-8 safe)
215            if let Some(c) = search_text.chars().next() {
216                start += c.len_utf8();
217            } else {
218                break;
219            }
220        }
221
222        Ok(results)
223    }
224}
225
226impl Default for ITMonthNameParser {
227    fn default() -> Self {
228        Self::new()
229    }
230}