Skip to main content

whichtime_sys/parsers/en/
time_expression.rs

1//! Time expression parser: 3:30 PM, 15:00, 10:30:45, etc.
2
3use crate::components::Component;
4use crate::context::ParsingContext;
5use crate::error::Result;
6use crate::parsers::Parser;
7use crate::results::ParsedResult;
8use crate::types::Meridiem;
9use regex::Regex;
10use std::sync::LazyLock;
11
12// Time pattern: HH:MM[:SS] [AM/PM] or HH[h]MM
13static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
14    Regex::new(
15        r"(?i)(?:^|[^\d])(\d{1,2})(?::(\d{2}))?(?::(\d{2}))?(?:\s*(a\.?m\.?|p\.?m\.?))?(?:[^\d]|$)",
16    )
17    .unwrap()
18});
19
20// 24-hour time pattern
21static PATTERN_24H: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(r"(?i)(?:^|[^\d])(\d{1,2}):(\d{2})(?::(\d{2}))?(?:[^\d]|$)").unwrap()
23});
24
25/// Parser for English clock-time expressions.
26pub struct TimeExpressionParser;
27
28impl Parser for TimeExpressionParser {
29    fn name(&self) -> &'static str {
30        "TimeExpressionParser"
31    }
32
33    fn should_apply(&self, context: &ParsingContext) -> bool {
34        // Must contain digits and either : or AM/PM
35        let text = context.lower_text();
36        text.bytes().any(|b| b.is_ascii_digit())
37            && (text.contains(':') || text.contains("am") || text.contains("pm"))
38    }
39
40    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
41        let mut results = Vec::new();
42
43        // Try 24-hour pattern first
44        for mat in PATTERN_24H.find_iter(context.text) {
45            let matched_text = mat.as_str();
46            let index = mat.start();
47
48            let Some(caps) = PATTERN_24H.captures(matched_text) else {
49                continue;
50            };
51
52            let hour: i32 = caps
53                .get(1)
54                .and_then(|m| m.as_str().parse().ok())
55                .unwrap_or(-1);
56            let minute: i32 = caps
57                .get(2)
58                .and_then(|m| m.as_str().parse().ok())
59                .unwrap_or(0);
60            let second: i32 = caps
61                .get(3)
62                .and_then(|m| m.as_str().parse().ok())
63                .unwrap_or(0);
64
65            if !(0..=23).contains(&hour) || !(0..=59).contains(&minute) {
66                continue;
67            }
68
69            let mut components = context.create_components();
70            components.assign(Component::Hour, hour);
71            components.assign(Component::Minute, minute);
72            if caps.get(3).is_some() {
73                components.assign(Component::Second, second);
74            }
75
76            // Set meridiem based on hour
77            if hour >= 12 {
78                components.assign(Component::Meridiem, Meridiem::PM as i32);
79            } else {
80                components.assign(Component::Meridiem, Meridiem::AM as i32);
81            }
82
83            let actual_start = matched_text.find(|c: char| c.is_ascii_digit()).unwrap_or(0);
84            let actual_end = matched_text
85                .rfind(|c: char| c.is_ascii_digit() || c == 'm' || c == 'M')
86                .map(|i| i + matched_text[i..].chars().next().map_or(1, char::len_utf8))
87                .unwrap_or(matched_text.len());
88
89            results.push(context.create_result(
90                index + actual_start,
91                index + actual_end,
92                components,
93                None,
94            ));
95        }
96
97        // Try AM/PM pattern
98        for mat in PATTERN.find_iter(context.text) {
99            let matched_text = mat.as_str();
100            let index = mat.start();
101
102            // Skip if already matched
103            if results
104                .iter()
105                .any(|r| r.index <= index && r.end_index > index)
106            {
107                continue;
108            }
109
110            let Some(caps) = PATTERN.captures(matched_text) else {
111                continue;
112            };
113
114            let meridiem_match = caps.get(4);
115
116            // Only process if has AM/PM (otherwise 24h pattern should have caught it)
117            if meridiem_match.is_none() && caps.get(2).is_none() {
118                continue;
119            }
120
121            let mut hour: i32 = caps
122                .get(1)
123                .and_then(|m| m.as_str().parse().ok())
124                .unwrap_or(-1);
125            let minute: i32 = caps
126                .get(2)
127                .and_then(|m| m.as_str().parse().ok())
128                .unwrap_or(0);
129            let second: i32 = caps
130                .get(3)
131                .and_then(|m| m.as_str().parse().ok())
132                .unwrap_or(0);
133
134            if !(0..=23).contains(&hour) || minute > 59 {
135                continue;
136            }
137
138            // Handle AM/PM
139            let meridiem = if let Some(m) = meridiem_match {
140                let m_str = m.as_str().to_lowercase();
141                if m_str.starts_with('p') {
142                    if hour > 12 {
143                        continue; // Invalid: 14PM
144                    }
145                    if hour < 12 {
146                        hour += 12;
147                    }
148                    Some(Meridiem::PM)
149                } else {
150                    if hour > 12 {
151                        continue; // Invalid: 14AM
152                    }
153                    if hour == 12 {
154                        hour = 0;
155                    }
156                    Some(Meridiem::AM)
157                }
158            } else if hour >= 12 {
159                Some(Meridiem::PM)
160            } else {
161                Some(Meridiem::AM)
162            };
163
164            let mut components = context.create_components();
165            components.assign(Component::Hour, hour);
166            components.assign(Component::Minute, minute);
167            if caps.get(3).is_some() {
168                components.assign(Component::Second, second);
169            }
170            if let Some(m) = meridiem {
171                components.assign(Component::Meridiem, m as i32);
172            }
173
174            let actual_start = matched_text.find(|c: char| c.is_ascii_digit()).unwrap_or(0);
175            let actual_end = matched_text
176                .rfind(|c: char| c.is_ascii_alphanumeric())
177                .map(|i| i + matched_text[i..].chars().next().map_or(1, char::len_utf8))
178                .unwrap_or(matched_text.len());
179
180            results.push(context.create_result(
181                index + actual_start,
182                index + actual_end,
183                components,
184                None,
185            ));
186        }
187
188        Ok(results)
189    }
190}