text_processing_rs/taggers/
time.rs1use super::cardinal::words_to_number;
10
11pub fn parse(input: &str) -> Option<String> {
13 let original = input.trim();
14 let input_lower = original.to_lowercase();
15
16 let (time_part, period, timezone) = extract_period_and_tz(original, &input_lower);
18
19 if let Some(result) = parse_quarter_half(&time_part, &period, &timezone) {
21 return Some(result);
22 }
23
24 if let Some(result) = parse_oclock(&time_part, &period, &timezone) {
25 return Some(result);
26 }
27
28 if let Some(result) = parse_to_pattern(&time_part, &period, &timezone) {
29 return Some(result);
30 }
31
32 if let Some(result) = parse_standard_time(&time_part, &period, &timezone) {
33 return Some(result);
34 }
35
36 None
37}
38
39fn extract_period_and_tz(original: &str, input_lower: &str) -> (String, String, String) {
41 let mut time_part = input_lower.to_string();
42 let mut period = String::new();
43 let mut timezone = String::new();
44
45 let tz_patterns = [
47 "g m t", "gmt", "e s t", "est", "p s t", "pst", "c s t", "cst", "m s t", "mst",
48 ];
49 for tz in &tz_patterns {
50 if time_part.ends_with(tz) {
51 let tz_start = original.len() - tz.len();
53 timezone = original[tz_start..].replace(" ", "");
54 time_part = time_part[..time_part.len() - tz.len()].trim().to_string();
55 break;
56 }
57 }
58
59 let period_patterns = [
61 (" a m", 4), (" am", 3), (" p m", 4),
64 (" pm", 3),
65 (" in the morning", 16),
66 (" in the afternoon", 18),
67 (" in the evening", 15),
68 ];
69
70 for (pattern, len) in &period_patterns {
71 if time_part.ends_with(pattern) {
72 let suffix_start = original.len().saturating_sub(
74 timezone.len()
75 + if timezone.is_empty() {
76 0
77 } else {
78 tz_patterns
80 .iter()
81 .find(|p| p.replace(" ", "") == timezone)
82 .map(|p| p.len())
83 .unwrap_or(timezone.len())
84 },
85 );
86 let time_original = if timezone.is_empty() {
87 original
88 } else {
89 &original[..suffix_start]
90 }
91 .trim();
92
93 let period_start = time_original.len().saturating_sub(*len);
95 let orig_suffix = &time_original[period_start..];
96
97 period = format_period_with_case(orig_suffix, *pattern);
98 time_part = time_part[..time_part.len() - len].trim().to_string();
99 break;
100 }
101 }
102
103 (time_part, period, timezone)
104}
105
106fn format_period_with_case(orig_suffix: &str, pattern: &str) -> String {
108 let orig_upper = orig_suffix.to_uppercase();
109
110 if pattern.contains("in the") {
112 if pattern.contains("morning") {
114 return "a.m.".to_string();
115 } else {
116 return "p.m.".to_string();
117 }
118 }
119
120 let is_uppercase = orig_suffix
122 .trim()
123 .chars()
124 .filter(|c| c.is_alphabetic())
125 .all(|c| c.is_uppercase());
126
127 if is_uppercase {
128 if orig_upper.contains('A') {
129 "A.M.".to_string()
130 } else {
131 "P.M.".to_string()
132 }
133 } else {
134 if pattern.contains('a') {
135 "a.m.".to_string()
136 } else {
137 "p.m.".to_string()
138 }
139 }
140}
141
142fn format_time(hour: i64, minute: i64, period: &str, timezone: &str) -> String {
144 let mut result = format!("{:02}:{:02}", hour, minute);
145
146 if !period.is_empty() {
147 result.push(' ');
148 result.push_str(period);
149 }
150
151 if !timezone.is_empty() {
152 result.push(' ');
153 result.push_str(timezone);
154 }
155
156 result
157}
158
159fn parse_quarter_half(input: &str, period: &str, timezone: &str) -> Option<String> {
161 if input.starts_with("quarter past ") {
162 let hour_part = input.trim_start_matches("quarter past ");
163 let hour = words_to_number(hour_part)? as i64;
164 return Some(format_time(hour, 15, period, timezone));
165 }
166
167 if input.starts_with("half past ") {
168 let hour_part = input.trim_start_matches("half past ");
169 let hour = words_to_number(hour_part)? as i64;
170 return Some(format_time(hour, 30, period, timezone));
171 }
172
173 None
174}
175
176fn parse_oclock(input: &str, period: &str, timezone: &str) -> Option<String> {
178 if input.ends_with(" o'clock") || input.ends_with(" oclock") {
179 let hour_part = input
180 .trim_end_matches(" o'clock")
181 .trim_end_matches(" oclock");
182 let hour = words_to_number(hour_part)? as i64;
183 return Some(format_time(hour, 0, period, timezone));
184 }
185
186 None
187}
188
189fn parse_to_pattern(input: &str, period: &str, timezone: &str) -> Option<String> {
191 if input.starts_with("quarter to ") {
192 let hour_part = input.trim_start_matches("quarter to ");
193 let hour = words_to_number(hour_part)? as i64;
194 let prev_hour = if hour == 1 { 12 } else { hour - 1 };
195 return Some(format_time(prev_hour, 45, period, timezone));
196 }
197
198 if input.contains(" to ") {
200 let parts: Vec<&str> = input.split(" to ").collect();
201 if parts.len() == 2 {
202 let min_part = parts[0]
203 .trim_end_matches(" min")
204 .trim_end_matches(" mins")
205 .trim_end_matches(" minute")
206 .trim_end_matches(" minutes");
207 let minutes_before = words_to_number(min_part)? as i64;
208 let hour = words_to_number(parts[1])? as i64;
209 let prev_hour = if hour == 1 { 12 } else { hour - 1 };
210 let minute = 60 - minutes_before;
211 return Some(format_time(prev_hour, minute, period, timezone));
212 }
213 }
214
215 None
216}
217
218fn parse_standard_time(input: &str, period: &str, timezone: &str) -> Option<String> {
220 let words: Vec<&str> = input.split_whitespace().collect();
221
222 if words.is_empty() {
223 return None;
224 }
225
226 if words.len() == 1 {
229 if period.is_empty() && timezone.is_empty() {
230 return None;
231 }
232 let hour = words_to_number(words[0])? as i64;
233 if hour >= 1 && hour <= 24 {
234 return Some(format_time(hour, 0, period, timezone));
235 }
236 return None;
237 }
238
239 let hour_word = words[0];
243 let hour = parse_simple_hour(hour_word)?;
244
245 if period.is_empty() && timezone.is_empty() && (hour < 1 || hour > 12) {
247 return None;
248 }
249
250 let minute_words = words[1..].join(" ");
252 let minute = parse_minute(&minute_words)?;
253
254 if period.is_empty() && timezone.is_empty() {
258 if hour >= 10 && hour <= 19 && minute >= 10 && minute <= 99 {
259 return None;
260 }
261 }
262
263 if minute >= 0 && minute < 60 {
264 Some(format_time(hour, minute, period, timezone))
265 } else {
266 None
267 }
268}
269
270fn parse_simple_hour(word: &str) -> Option<i64> {
272 match word {
273 "one" => Some(1),
274 "two" => Some(2),
275 "three" => Some(3),
276 "four" => Some(4),
277 "five" => Some(5),
278 "six" => Some(6),
279 "seven" => Some(7),
280 "eight" => Some(8),
281 "nine" => Some(9),
282 "ten" => Some(10),
283 "eleven" => Some(11),
284 "twelve" => Some(12),
285 _ => None,
286 }
287}
288
289fn parse_minute(input: &str) -> Option<i64> {
292 let words: Vec<&str> = input.split_whitespace().collect();
293
294 if words.is_empty() {
295 return None;
296 }
297
298 if words.len() == 2 && (words[0] == "o" || words[0] == "oh") {
300 let digit_word = words[1];
301 let minute = words_to_number(digit_word).map(|n| n as i64)?;
302 if minute >= 0 && minute <= 9 {
303 return Some(minute);
304 }
305 return None;
306 }
307
308 if words.len() == 1 {
310 let minute = words_to_number(words[0]).map(|n| n as i64)?;
311 if minute >= 0 && minute <= 59 {
312 return Some(minute);
313 }
314 return None;
315 }
316
317 if words.len() == 2 {
320 let is_tens = matches!(words[0], "twenty" | "thirty" | "forty" | "fifty");
322 if !is_tens {
323 return None;
324 }
325 let is_units = matches!(
327 words[1],
328 "one" | "two" | "three" | "four" | "five" | "six" | "seven" | "eight" | "nine"
329 );
330 if !is_units {
331 return None;
332 }
333 let minute = words_to_number(input).map(|n| n as i64)?;
334 if minute >= 0 && minute <= 59 {
335 return Some(minute);
336 }
337 }
338
339 None
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345
346 #[test]
347 fn test_standard_time() {
348 assert_eq!(parse("two thirty"), Some("02:30".to_string()));
349 assert_eq!(parse("eight fifty one"), Some("08:51".to_string()));
350 assert_eq!(parse("eleven forty five"), None);
353 assert_eq!(
355 parse("eleven forty five a m"),
356 Some("11:45 a.m.".to_string())
357 );
358 }
359
360 #[test]
361 fn test_with_period() {
362 assert_eq!(parse("two p m"), Some("02:00 p.m.".to_string()));
363 assert_eq!(
364 parse("eleven fifty five p m"),
365 Some("11:55 p.m.".to_string())
366 );
367 assert_eq!(parse("seven a m"), Some("07:00 a.m.".to_string()));
368 }
369
370 #[test]
371 fn test_quarter_half() {
372 assert_eq!(parse("quarter past one"), Some("01:15".to_string()));
373 assert_eq!(parse("half past three"), Some("03:30".to_string()));
374 assert_eq!(parse("half past twelve"), Some("12:30".to_string()));
375 }
376
377 #[test]
378 fn test_quarter_to() {
379 assert_eq!(parse("quarter to one"), Some("12:45".to_string()));
380 assert_eq!(parse("quarter to twelve"), Some("11:45".to_string()));
381 }
382
383 #[test]
384 fn test_oclock() {
385 assert_eq!(parse("three o'clock"), Some("03:00".to_string()));
386 }
387
388 #[test]
389 fn test_oh_minutes() {
390 assert_eq!(parse("eight o six"), Some("08:06".to_string()));
391 assert_eq!(parse("twelve oh five"), Some("12:05".to_string()));
392 }
393
394 #[test]
395 fn test_with_timezone() {
396 assert_eq!(parse("eight oclock g m t"), Some("08:00 gmt".to_string()));
397 assert_eq!(parse("seven a m e s t"), Some("07:00 a.m. est".to_string()));
398 }
399
400 #[test]
401 fn test_rejects_phone_like_input() {
402 assert_eq!(
404 parse("one two three one two three five six seven eight"),
405 None
406 );
407 assert_eq!(parse("seven nine nine"), None);
408 }
409}