Skip to main content

whichtime_sys/parsers/pt/
time_expression.rs

1//! Portuguese time expression parser
2//!
3//! Handles Portuguese time expressions like:
4//! - "6.13 AM" (dot separator)
5//! - "às 6:30pm"
6//! - "de 6:30pm a 11:00pm" (ranges)
7//! - "8:10 - 12.32" (ranges)
8
9use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::error::Result;
12use crate::parsers::Parser;
13use crate::results::ParsedResult;
14use crate::types::Meridiem;
15use fancy_regex::Regex;
16use std::sync::LazyLock;
17
18// Primary pattern: handles single times and "de X a Y" ranges
19// Supports both : and . as separators
20// Matches:
21// - às 6.13 AM
22// - 8:10
23// - de 1pm a 3
24static PRIMARY_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
25    Regex::new(
26        r"(?ix)
27        (?<!\d[:\.])(?<!\w)
28        (?:de\s+)?
29        (?:às|as|a|ao)?\s*
30        (?P<hour1>\d{1,2})
31        (?:[:\.](?P<minute1>\d{2}))?
32        (?:[:\.](?P<second1>\d{2}))?
33        (?:\s*(?P<meridiem1>a\.?m\.?|p\.?m\.?))?
34        (?:
35            \s*(?:a|às|as|[\-–~])\s*
36            (?P<hour2>\d{1,2})
37            (?:[:\.](?P<minute2>\d{2}))?
38            (?:[:\.](?P<second2>\d{2}))?
39            (?:\s*(?P<meridiem2>a\.?m\.?|p\.?m\.?))?
40        )?
41        (?=\W|$)
42        ",
43    )
44    .unwrap()
45});
46
47/// Portuguese time expression parser
48pub struct PTTimeExpressionParser;
49
50impl PTTimeExpressionParser {
51    pub fn new() -> Self {
52        Self
53    }
54
55    fn parse_meridiem(s: &str) -> Option<Meridiem> {
56        let lower = s.to_lowercase();
57        if lower.starts_with('p') {
58            Some(Meridiem::PM)
59        } else if lower.starts_with('a') {
60            Some(Meridiem::AM)
61        } else {
62            None
63        }
64    }
65
66    fn adjust_hour(hour: i32, meridiem: Option<Meridiem>) -> i32 {
67        match meridiem {
68            Some(Meridiem::PM) => {
69                if hour < 12 {
70                    hour + 12
71                } else {
72                    hour
73                }
74            }
75            Some(Meridiem::AM) => {
76                if hour == 12 {
77                    0
78                } else {
79                    hour
80                }
81            }
82            None => hour,
83        }
84    }
85}
86
87impl Parser for PTTimeExpressionParser {
88    fn name(&self) -> &'static str {
89        "PTTimeExpressionParser"
90    }
91
92    fn should_apply(&self, context: &ParsingContext) -> bool {
93        let text = context.text;
94        text.bytes().any(|b| b.is_ascii_digit())
95    }
96
97    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
98        let mut results = Vec::new();
99        let ref_date = context.reference.instant;
100
101        let mut start = 0;
102        while start < context.text.len() {
103            let search_text = &context.text[start..];
104            let captures = match PRIMARY_PATTERN.captures(search_text) {
105                Ok(Some(caps)) => caps,
106                Ok(None) => break,
107                Err(_) => break,
108            };
109
110            let full_match = match captures.get(0) {
111                Some(m) => m,
112                None => break,
113            };
114
115            let match_start = start + full_match.start();
116            let match_end = start + full_match.end();
117            let matched_text = full_match.as_str();
118
119            let hour1: i32 = captures
120                .name("hour1")
121                .and_then(|m| m.as_str().parse().ok())
122                .unwrap_or(0);
123            let minute1: i32 = captures
124                .name("minute1")
125                .and_then(|m| m.as_str().parse().ok())
126                .unwrap_or(0);
127            let second1: i32 = captures
128                .name("second1")
129                .and_then(|m| m.as_str().parse().ok())
130                .unwrap_or(0);
131            let meridiem1 = captures
132                .name("meridiem1")
133                .map(|m| m.as_str())
134                .and_then(Self::parse_meridiem);
135
136            // Validate hours
137            if hour1 > 23 {
138                start = match_end;
139                continue;
140            }
141
142            let hour2_opt = captures.name("hour2").and_then(|m| m.as_str().parse().ok());
143            let minute2: i32 = captures
144                .name("minute2")
145                .and_then(|m| m.as_str().parse().ok())
146                .unwrap_or(0);
147            let second2: i32 = captures
148                .name("second2")
149                .and_then(|m| m.as_str().parse().ok())
150                .unwrap_or(0);
151            let meridiem2 = captures
152                .name("meridiem2")
153                .map(|m| m.as_str())
154                .and_then(Self::parse_meridiem);
155
156            // Heuristic: If no meridiem and no minutes/seconds, ensure context supports it (like "às", "de")
157            // or regex handles it via `(?:de\s+)?...`. But regex allows loose matches.
158            // If simple number "6", regex matches. We want to avoid false positives.
159            let has_context_prefix = matched_text.to_lowercase().contains("às")
160                || matched_text.to_lowercase().contains("as")
161                || matched_text.to_lowercase().contains("de ")
162                || matched_text.to_lowercase().starts_with("a "); // careful with "a"
163
164            let has_time_separator = matched_text.contains(':')
165                || (matched_text.contains('.')
166                    && matched_text
167                        .chars()
168                        .any(|c| c == 'a' || c == 'p' || c == 'A' || c == 'P')); // Dot usually only with AM/PM or specific contexts?
169
170            let has_meridiem = meridiem1.is_some() || meridiem2.is_some();
171
172            if !has_time_separator && !has_meridiem && !has_context_prefix && hour2_opt.is_none() {
173                // Just a number? e.g. "10" in "10 Agosto" might be matched?
174                // Regex `(?<!\d)` helps but `Agosto` follows.
175                // If we match "10" in "10 Agosto", we shouldn't treat it as time.
176                // We should let date parser handle "10 Agosto".
177                // But date parser runs separately.
178                // OverlapRemovalRefiner should handle conflicts if ranges overlap.
179                // But simpler is to skip if not confident.
180                start = match_end;
181                continue;
182            }
183
184            // Adjust hours
185            let adj_hour1 = Self::adjust_hour(hour1, meridiem1);
186
187            let mut components = context.create_components();
188            components.assign(Component::Hour, adj_hour1);
189            components.assign(Component::Minute, minute1);
190            components.assign(Component::Second, second1);
191
192            if let Some(m) = meridiem1 {
193                components.assign(Component::Meridiem, m as i32);
194            } else {
195                // If hours > 12, imply PM (already handled by 24h input, but set flag?)
196                if hour1 >= 12 {
197                    components.assign(Component::Meridiem, Meridiem::PM as i32);
198                } else if hour1 < 12 {
199                    // Ambiguous. Leave unset.
200                }
201            }
202
203            // Handle Range
204            let end_components = if let Some(hour2) = hour2_opt {
205                if hour2 > 23 {
206                    None
207                } else {
208                    // Inherit meridiem from end to start if start missing?
209                    // e.g. "1 to 3 pm" -> 1pm to 3pm.
210                    // "de 1pm a 3" -> 1pm to 3pm?
211
212                    let final_meridiem2 = meridiem2.or(meridiem1);
213
214                    // Also check start time inheritance?
215                    // "5 - 7pm" -> 5pm - 7pm.
216
217                    // Adjust start hour if needed (logic from `en` parser)
218                    // If start is ambiguous (no meridiem, < 12) and end is PM, maybe start is PM?
219                    // Or if end is AM, start is AM.
220                    // But here we construct end components separately.
221
222                    let mut end_comp = context.create_components();
223                    let adj_hour2 = Self::adjust_hour(hour2, final_meridiem2);
224
225                    end_comp.assign(Component::Hour, adj_hour2);
226                    end_comp.assign(Component::Minute, minute2);
227                    end_comp.assign(Component::Second, second2);
228                    if let Some(m) = final_meridiem2 {
229                        end_comp.assign(Component::Meridiem, m as i32);
230                    } else if hour2 >= 12 {
231                        end_comp.assign(Component::Meridiem, Meridiem::PM as i32);
232                    }
233
234                    // Copy date from reference
235                    use chrono::Datelike;
236                    end_comp.imply(Component::Year, ref_date.year());
237                    end_comp.imply(Component::Month, ref_date.month() as i32);
238                    end_comp.imply(Component::Day, ref_date.day() as i32);
239
240                    Some(end_comp)
241                }
242            } else {
243                None
244            };
245
246            // Re-adjust start hour if we learned from end meridiem?
247            // "de 1pm a 3" -> 1pm to 3pm (test case). Here mer1=PM, mer2=None.
248            // My logic: `effective_meridiem2 = meridiem2.or(meridiem1)`. So mer2 becomes PM. Correct.
249
250            // What about "5 - 7pm"? mer1=None, mer2=PM.
251            // effective_meridiem2 = PM.
252            // Does start inherit PM?
253            // If hour1 < 12 and hour2 < 12 and mer2=PM -> likely start is PM too?
254            // e.g. 5 - 7pm -> 17:00 - 19:00.
255            // e.g. 10 - 2pm -> 10am - 2pm? Or 10pm - 2am? Probably 10am.
256            // Only imply if start < end?
257            // Standard whichtime logic usually implies start meridiem from end if sensible.
258            // But let's stick to basics unless test fails.
259
260            results.push(context.create_result(match_start, match_end, components, end_components));
261
262            start = match_end;
263        }
264
265        Ok(results)
266    }
267}
268
269impl Default for PTTimeExpressionParser {
270    fn default() -> Self {
271        Self::new()
272    }
273}