whichtime_sys/parsers/pt/
month_name.rs1use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::pt::{get_month, get_weekday};
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use chrono::Datelike;
16use fancy_regex::Regex;
17use std::sync::LazyLock;
18
19static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
20 Regex::new(
21 r"(?ix)
22 (?:
23 (?P<weekday>domingo|dom\.?|segunda(?:-feira)?|seg\.?|terça(?:-feira)?|terca(?:-feira)?|ter\.?|quarta(?:-feira)?|qua\.?|quinta(?:-feira)?|qui\.?|sexta(?:-feira)?|sex\.?|sábado|sabado|sab\.?)\s*,?\s*
24 )?
25 (?P<day>\d{1,2})(?:º|°)?\s*
26 (?:
27 (?:(?:a|até|\-|–)\s*(?P<end_day>\d{1,2})(?:º|°)?\s*)?
28 )?
29 (?:de\s+)?
30 (?P<month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
31 (?:
32 (?:\s*(?:de|,|-)?\s*)
33 (?P<year>\d{1,4}(?!:))
34 (?:\s*(?P<era>AC|d\.?\s*C\.?|A\.?\s*C\.?))?
35 )?
36 (?=\W|$)
37 "
38 ).unwrap()
39});
40
41static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
43 Regex::new(
44 r"(?ix)(?P<weekday>dom|seg|ter|qua|qui|sex|sab)\.?\s*(?P<day>\d{1,2})(?P<month>jan|fev|mar|abr|mai|jun|jul|ago|set|out|nov|dez)\.?(?:\s*(?P<year>\d{2,4}))?(?=\W|$)"
45 ).unwrap()
46});
47
48static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(
51 r"(?ix)
52 (?P<start_day>\d{1,2})(?:º|°)?\s*(?:de\s+)?
53 (?P<start_month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
54 \s*(?:-|a|até)\s*
55 (?P<end_day>\d{1,2})(?:º|°)?\s*(?:de\s+)?
56 (?P<end_month>janeiro|jan\.?|fevereiro|fev\.?|março|marco|mar\.?|abril|abr\.?|maio|mai\.?|junho|jun\.?|julho|jul\.?|agosto|ago\.?|setembro|set\.?|outubro|out\.?|novembro|nov\.?|dezembro|dez\.?)
57 (?:\s*(?:de\s+)?(?P<year>\d{1,4}))?
58 (?=\W|$)
59 "
60 ).unwrap()
61});
62
63pub struct PTMonthNameParser;
65
66impl PTMonthNameParser {
67 pub fn new() -> Self {
68 Self
69 }
70
71 fn parse_year_with_era(year_str: Option<&str>, era_str: Option<&str>) -> Option<i32> {
72 let year_text = year_str?;
73 let mut year: i32 = year_text.parse().ok()?;
74
75 if year < 100 && era_str.is_none() {
77 year = if year > 50 { 1900 + year } else { 2000 + year };
78 }
79
80 if let Some(era) = era_str {
82 let era_lower = era.to_lowercase().replace([' ', '.'], "");
83
84 if era_lower == "ac" {
86 return Some(-year);
87 }
88 }
90
91 Some(year)
92 }
93}
94
95impl Default for PTMonthNameParser {
96 fn default() -> Self {
97 Self::new()
98 }
99}
100
101impl Parser for PTMonthNameParser {
102 fn name(&self) -> &'static str {
103 "PTMonthNameParser"
104 }
105
106 fn should_apply(&self, _context: &ParsingContext) -> bool {
107 true
108 }
109
110 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
111 let mut results = Vec::new();
112 let ref_date = context.reference.instant;
113
114 let mut start = 0;
116 while start < context.text.len() {
117 let search_text = &context.text[start..];
118 let captures = match ABBREV_PATTERN.captures(search_text) {
119 Ok(Some(caps)) => caps,
120 Ok(None) => break,
121 Err(_) => break,
122 };
123
124 let full_match = match captures.get(0) {
125 Some(m) => m,
126 None => break,
127 };
128
129 let match_start = start + full_match.start();
130 let match_end = start + full_match.end();
131
132 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
133 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
134 let month_str = captures
135 .name("month")
136 .map(|m| m.as_str().to_lowercase())
137 .unwrap_or_default();
138 let year_str = captures.name("year").map(|m| m.as_str());
139
140 let month_clean = month_str.trim_end_matches('.');
141 let Some(month) = get_month(month_clean) else {
142 start = match_end;
143 continue;
144 };
145
146 let day: i32 = day_str.parse().unwrap_or(1);
147 if !(1..=31).contains(&day) {
148 start = match_end;
149 continue;
150 }
151
152 let mut components = context.create_components();
153
154 if let Some(y) = year_str {
155 let year = Self::parse_year_with_era(Some(y), None).unwrap_or(ref_date.year());
156 components.assign(Component::Year, year);
157 } else {
158 components.imply(Component::Year, ref_date.year());
159 }
160
161 components.assign(Component::Month, month as i32);
162 components.assign(Component::Day, day);
163
164 if let Some(ref wd_str) = weekday_str
165 && let Some(weekday) = get_weekday(wd_str)
166 {
167 components.assign(Component::Weekday, weekday as i32);
168 }
169
170 if !components.is_valid_date() {
171 start = match_end;
172 continue;
173 }
174
175 results.push(context.create_result(match_start, match_end, components, None));
176 start = match_end;
177 }
178
179 start = 0;
181 while start < context.text.len() {
182 let search_text = &context.text[start..];
183 let captures = match RANGE_PATTERN.captures(search_text) {
184 Ok(Some(caps)) => caps,
185 Ok(None) => break,
186 Err(_) => break,
187 };
188
189 let full_match = match captures.get(0) {
190 Some(m) => m,
191 None => break,
192 };
193
194 let match_start = start + full_match.start();
195 let match_end = start + full_match.end();
196
197 let start_day_str = captures
198 .name("start_day")
199 .map(|m| m.as_str())
200 .unwrap_or("1");
201 let start_month_str = captures
202 .name("start_month")
203 .map(|m| m.as_str().to_lowercase())
204 .unwrap_or_default();
205 let end_day_str = captures.name("end_day").map(|m| m.as_str()).unwrap_or("1");
206 let end_month_str = captures
207 .name("end_month")
208 .map(|m| m.as_str().to_lowercase())
209 .unwrap_or_default();
210 let year_str = captures.name("year").map(|m| m.as_str());
211
212 let start_month = get_month(start_month_str.trim_end_matches('.')).unwrap_or(1);
213 let end_month = get_month(end_month_str.trim_end_matches('.')).unwrap_or(1);
214
215 let start_day: i32 = start_day_str.parse().unwrap_or(1);
216 let end_day: i32 = end_day_str.parse().unwrap_or(1);
217
218 let mut start_components = context.create_components();
219 let mut end_components = context.create_components();
220
221 if let Some(y) = year_str {
222 let year = y.parse::<i32>().unwrap_or(ref_date.year());
223 start_components.assign(Component::Year, year);
224 end_components.assign(Component::Year, year);
225 } else {
226 start_components.imply(Component::Year, ref_date.year());
227 end_components.imply(Component::Year, ref_date.year());
228 }
229
230 start_components.assign(Component::Month, start_month as i32);
231 start_components.assign(Component::Day, start_day);
232
233 end_components.assign(Component::Month, end_month as i32);
234 end_components.assign(Component::Day, end_day);
235
236 results.push(context.create_result(
237 match_start,
238 match_end,
239 start_components,
240 Some(end_components),
241 ));
242 start = match_end;
243 }
244
245 start = 0;
247 while start < context.text.len() {
248 let search_text = &context.text[start..];
249 let captures = match PATTERN.captures(search_text) {
250 Ok(Some(caps)) => caps,
251 Ok(None) => break,
252 Err(_) => break,
253 };
254
255 let full_match = match captures.get(0) {
256 Some(m) => m,
257 None => break,
258 };
259
260 let match_start = start + full_match.start();
261 let match_end = start + full_match.end();
262
263 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
264 let day_str = captures.name("day").map(|m| m.as_str()).unwrap_or("1");
265 let month_str = captures
266 .name("month")
267 .map(|m| m.as_str().to_lowercase())
268 .unwrap_or_default();
269 let year_str = captures.name("year").map(|m| m.as_str());
270 let era_str = captures.name("era").map(|m| m.as_str());
271 let end_day_str = captures.name("end_day").map(|m| m.as_str());
272
273 let month_clean = month_str.trim_end_matches('.');
274
275 let Some(month) = get_month(month_clean) else {
276 start = match_end;
277 continue;
278 };
279
280 let day: i32 = day_str.parse().unwrap_or(1);
281 if !(1..=31).contains(&day) {
282 start = match_end;
283 continue;
284 }
285
286 let mut components = context.create_components();
287
288 if year_str.is_some() || era_str.is_some() {
289 let year = Self::parse_year_with_era(year_str, era_str).unwrap_or(ref_date.year());
290 components.assign(Component::Year, year);
291 } else {
292 components.imply(Component::Year, ref_date.year());
293 }
294
295 components.assign(Component::Month, month as i32);
296 components.assign(Component::Day, day);
297
298 if let Some(ref wd_str) = weekday_str {
299 let wd_clean = wd_str.trim_end_matches(['.', ',']);
304 if let Some(weekday) = get_weekday(wd_clean) {
305 components.assign(Component::Weekday, weekday as i32);
306 }
307 }
308
309 if !components.is_valid_date() {
310 start = match_end;
311 continue;
312 }
313
314 let end_components = if let Some(end_day_text) = end_day_str {
316 let end_day: i32 = end_day_text.parse().unwrap_or(0);
317 if end_day > 0 && end_day <= 31 {
318 let mut end_comp = context.create_components();
319 if let Some(start_year) = components.get(Component::Year) {
320 if year_str.is_some() || era_str.is_some() {
321 end_comp.assign(Component::Year, start_year);
322 } else {
323 end_comp.imply(Component::Year, start_year);
324 }
325 }
326 end_comp.assign(Component::Month, month as i32);
327 end_comp.assign(Component::Day, end_day);
328
329 if end_comp.is_valid_date() {
330 Some(end_comp)
331 } else {
332 None
333 }
334 } else {
335 None
336 }
337 } else {
338 None
339 };
340
341 results.push(context.create_result(match_start, match_end, components, end_components));
342 start = match_end;
343 }
344
345 Ok(results)
346 }
347}