whichtime_sys/parsers/it/
month_name.rs1use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::it as dict;
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use crate::scanner::TokenType;
16use chrono::Datelike;
17use fancy_regex::Regex;
18use std::sync::LazyLock;
19
20static LITTLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
22 Regex::new(
23 r"(?i)(?:il\s+)?(?P<day>\d{1,2})(?:\s*(?:-|–|a)\s*(?P<end_day>\d{1,2}))?\s+(?P<month>gennaio|febbraio|febraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre|gen|feb|febb|mar|apr|mag|giu|lug|lugl|ago|set|sett|ott|nov|dic|dice)\.?(?:\s+(?P<year>\d{1,4})(?![\d:]))?(?:\s+(?:alle?\s+)?(?P<hour>\d{1,2})(?:[:\.](?P<minute>\d{2}))?)?"
24 ).unwrap()
25});
26
27static MIDDLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
30 Regex::new(
31 r"(?i)(?P<month>gennaio|febbraio|febraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre|gen|feb|febb|mar|apr|mag|giu|lug|lugl|ago|set|sett|ott|nov|dic|dice)\.?(?:\s+(?P<day>\d{1,2})(?:,\s*(?P<year>\d{4}))?|\s+(?P<year_only>\d{4}))(?![\d])"
32 ).unwrap()
33});
34
35pub struct ITMonthNameParser;
37
38impl ITMonthNameParser {
39 pub fn new() -> Self {
40 Self
41 }
42
43 fn is_valid_date(year: i32, month: u32, day: u32) -> bool {
44 if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
45 return false;
46 }
47 let days_in_month = match month {
48 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
49 4 | 6 | 9 | 11 => 30,
50 2 => {
51 if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
52 29
53 } else {
54 28
55 }
56 }
57 _ => return false,
58 };
59 day <= days_in_month
60 }
61
62 fn parse_year(year_str: &str) -> i32 {
63 let year: i32 = year_str.parse().unwrap_or(0);
64 if year < 100 {
65 if year > 50 { 1900 + year } else { 2000 + year }
66 } else {
67 year
68 }
69 }
70}
71
72impl Parser for ITMonthNameParser {
73 fn name(&self) -> &'static str {
74 "ITMonthNameParser"
75 }
76
77 fn should_apply(&self, context: &ParsingContext) -> bool {
78 context.has_token_type(TokenType::Month) || context.text.bytes().any(|b| b.is_ascii_digit())
79 }
80
81 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
82 let mut results = Vec::new();
83 let ref_date = context.reference.instant;
84
85 let mut start = 0;
86 while start < context.text.len() {
87 let search_text = &context.text[start..];
88
89 if let Ok(Some(caps)) = LITTLE_ENDIAN_PATTERN.captures(search_text)
91 && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
92 {
93 let day: u32 = d.as_str().parse().unwrap_or(0);
94 let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
95
96 let full_match = caps.get(0).unwrap();
97 let matched_text = full_match.as_str();
98
99 let prefix_offset = if matched_text.to_lowercase().starts_with("il ") {
101 3
102 } else {
103 0
104 };
105
106 let match_start = start + full_match.start() + prefix_offset;
107 let match_end = start + full_match.end();
108
109 let year = caps.name("year").map(|y| Self::parse_year(y.as_str()));
110
111 let actual_year = year.unwrap_or(ref_date.year());
112
113 if day == 0 || month == 0 || !Self::is_valid_date(actual_year, month, day) {
115 start = match_end;
116 continue;
117 }
118
119 let has_time = caps.name("hour").is_some();
121
122 let mut components = context.create_components();
123 if let Some(y) = year {
124 components.assign(Component::Year, y);
125 } else if has_time {
126 components.assign(Component::Year, ref_date.year());
128 } else {
129 components.imply(Component::Year, ref_date.year());
130 }
131 components.assign(Component::Month, month as i32);
132 components.assign(Component::Day, day as i32);
133
134 if let Some(hour_match) = caps.name("hour") {
136 let hour: i32 = hour_match.as_str().parse().unwrap_or(0);
137 let minute: i32 = caps
138 .name("minute")
139 .and_then(|m| m.as_str().parse().ok())
140 .unwrap_or(0);
141 components.assign(Component::Hour, hour);
142 components.assign(Component::Minute, minute);
143 }
144
145 let end_comp = if let Some(end_day_match) = caps.name("end_day") {
147 let end_day: u32 = end_day_match.as_str().parse().unwrap_or(0);
148 if end_day > 0 && end_day <= 31 {
149 let mut ec = context.create_components();
150 if let Some(y) = year {
151 ec.assign(Component::Year, y);
152 } else {
153 ec.imply(Component::Year, ref_date.year());
154 }
155 ec.assign(Component::Month, month as i32);
156 ec.assign(Component::Day, end_day as i32);
157 Some(ec)
158 } else {
159 None
160 }
161 } else {
162 None
163 };
164
165 results.push(context.create_result(match_start, match_end, components, end_comp));
166 start = match_end;
167 continue;
168 }
169
170 if let Ok(Some(caps)) = MIDDLE_ENDIAN_PATTERN.captures(search_text)
172 && let Some(m) = caps.name("month")
173 {
174 let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
175
176 if month > 0 {
177 let full_match = caps.get(0).unwrap();
178 let match_start = start + full_match.start();
179 let match_end = start + full_match.end();
180
181 let day: u32 = caps
182 .name("day")
183 .and_then(|d| d.as_str().parse().ok())
184 .unwrap_or(1); let year = caps
187 .name("year")
188 .or(caps.name("year_only"))
189 .map(|y| Self::parse_year(y.as_str()));
190
191 let actual_year = year.unwrap_or(ref_date.year());
192
193 if !Self::is_valid_date(actual_year, month, day) {
195 start = match_end;
196 continue;
197 }
198
199 let mut components = context.create_components();
200 if let Some(y) = year {
201 components.assign(Component::Year, y);
202 } else {
203 components.imply(Component::Year, ref_date.year());
204 }
205 components.assign(Component::Month, month as i32);
206 components.assign(Component::Day, day as i32);
207
208 results.push(context.create_result(match_start, match_end, components, None));
209 start = match_end;
210 continue;
211 }
212 }
213
214 if let Some(c) = search_text.chars().next() {
216 start += c.len_utf8();
217 } else {
218 break;
219 }
220 }
221
222 Ok(results)
223 }
224}
225
226impl Default for ITMonthNameParser {
227 fn default() -> Self {
228 Self::new()
229 }
230}