Skip to main content

whichtime_sys/parsers/de/
time_expression.rs

1//! German time expression parser
2//!
3//! Handles German time expressions like:
4//! - "14 Uhr", "14:30", "14h30", "um 16h"
5//! - "um 7 morgens", "8 Uhr abends"
6//! - "11:00 Uhr vormittags", "um 8 Uhr nachmittags"
7//! - "um 5 Uhr in der Nacht"
8//! - Time ranges: "18:10 - 22.32", "von 6:30 bis 23:00"
9
10use crate::components::Component;
11use crate::context::ParsingContext;
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use crate::types::Meridiem;
16use fancy_regex::Regex;
17use std::sync::LazyLock;
18
19// Pattern for German time expressions
20// Matches: "14 Uhr", "14:30", "14h30", "um 14 Uhr", "um 16h", "8 Uhr abends", etc.
21static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(
23        r"(?ix)
24        (?:(?:von|um)\s+)?
25        (\d{1,2})
26        (?:[:h\.](\d{2}))?
27        (?:\s*uhr)?
28        (?:\s*(morgens?|vormittags?|nachmittags?|abends?|nachts?|in\s+der\s+nacht))?
29        ",
30    )
31    .unwrap()
32});
33
34// Pattern for time ranges: "18:10 - 22.32", "von 6:30 bis 23:00", "von 6h30 bis 23h00"
35static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(
37        r"(?ix)
38        (?:von\s+)?
39        (\d{1,2})[:h\.](\d{2})
40        (?:\s*uhr)?
41        \s*
42        (?:-|–|bis)\s*
43        (\d{1,2})[:h\.](\d{2})
44        (?:\s*uhr)?
45        ",
46    )
47    .unwrap()
48});
49
50// Pattern for "um Xh" format (e.g., "um 16h")
51static H_FORMAT_PATTERN: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r"(?i)(?:um\s+)?(\d{1,2})h(?:(\d{2}))?(?=\W|$)").unwrap());
53
54/// German time expression parser
55pub struct DETimeExpressionParser;
56
57impl DETimeExpressionParser {
58    pub fn new() -> Self {
59        Self
60    }
61}
62
63impl Parser for DETimeExpressionParser {
64    fn name(&self) -> &'static str {
65        "DETimeExpressionParser"
66    }
67
68    fn should_apply(&self, context: &ParsingContext) -> bool {
69        context.text.bytes().any(|b| b.is_ascii_digit())
70    }
71
72    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
73        let mut results = Vec::new();
74
75        // First, try to match time ranges
76        let mut start = 0;
77        while start < context.text.len() {
78            let search_text = &context.text[start..];
79            let captures = match RANGE_PATTERN.captures(search_text) {
80                Ok(Some(caps)) => caps,
81                Ok(None) => break,
82                Err(_) => break,
83            };
84
85            let full_match = match captures.get(0) {
86                Some(m) => m,
87                None => break,
88            };
89
90            let match_start = start + full_match.start();
91            let match_end = start + full_match.end();
92
93            let start_hour: i32 = captures
94                .get(1)
95                .and_then(|m| m.as_str().parse().ok())
96                .unwrap_or(-1);
97            let start_minute: i32 = captures
98                .get(2)
99                .and_then(|m| m.as_str().parse().ok())
100                .unwrap_or(0);
101            let end_hour: i32 = captures
102                .get(3)
103                .and_then(|m| m.as_str().parse().ok())
104                .unwrap_or(-1);
105            let end_minute: i32 = captures
106                .get(4)
107                .and_then(|m| m.as_str().parse().ok())
108                .unwrap_or(0);
109
110            if (0..=23).contains(&start_hour)
111                && start_minute <= 59
112                && (0..=23).contains(&end_hour)
113                && end_minute <= 59
114            {
115                let mut start_components = context.create_components();
116                start_components.assign(Component::Hour, start_hour);
117                start_components.assign(Component::Minute, start_minute);
118                if start_hour >= 12 {
119                    start_components.assign(Component::Meridiem, Meridiem::PM as i32);
120                } else {
121                    start_components.assign(Component::Meridiem, Meridiem::AM as i32);
122                }
123
124                let mut end_components = context.create_components();
125                end_components.assign(Component::Hour, end_hour);
126                end_components.assign(Component::Minute, end_minute);
127                if end_hour >= 12 {
128                    end_components.assign(Component::Meridiem, Meridiem::PM as i32);
129                } else {
130                    end_components.assign(Component::Meridiem, Meridiem::AM as i32);
131                }
132
133                results.push(context.create_result(
134                    match_start,
135                    match_end,
136                    start_components,
137                    Some(end_components),
138                ));
139            }
140
141            start = match_end;
142        }
143
144        // Try "um Xh" format
145        start = 0;
146        while start < context.text.len() {
147            let search_text = &context.text[start..];
148            let captures = match H_FORMAT_PATTERN.captures(search_text) {
149                Ok(Some(caps)) => caps,
150                Ok(None) => break,
151                Err(_) => break,
152            };
153
154            let full_match = match captures.get(0) {
155                Some(m) => m,
156                None => break,
157            };
158
159            let match_start = start + full_match.start();
160            let match_end = start + full_match.end();
161
162            // Skip if this range was already captured by the range pattern
163            if results
164                .iter()
165                .any(|r| r.index <= match_start && r.end_index >= match_end)
166            {
167                start = match_end;
168                continue;
169            }
170
171            let hour: i32 = captures
172                .get(1)
173                .and_then(|m| m.as_str().parse().ok())
174                .unwrap_or(-1);
175            let minute: i32 = captures
176                .get(2)
177                .and_then(|m| m.as_str().parse().ok())
178                .unwrap_or(0);
179
180            if (0..=23).contains(&hour) && minute <= 59 {
181                let mut components = context.create_components();
182                components.assign(Component::Hour, hour);
183                components.assign(Component::Minute, minute);
184                if hour >= 12 {
185                    components.assign(Component::Meridiem, Meridiem::PM as i32);
186                } else {
187                    components.assign(Component::Meridiem, Meridiem::AM as i32);
188                }
189
190                results.push(context.create_result(match_start, match_end, components, None));
191            }
192
193            start = match_end;
194        }
195
196        // Standard time patterns
197        start = 0;
198        while start < context.text.len() {
199            let search_text = &context.text[start..];
200            let captures = match PATTERN.captures(search_text) {
201                Ok(Some(caps)) => caps,
202                Ok(None) => break,
203                Err(_) => break,
204            };
205
206            let full_match = match captures.get(0) {
207                Some(m) => m,
208                None => break,
209            };
210
211            let matched_text = full_match.as_str();
212            let match_start = start + full_match.start();
213            let match_end = start + full_match.end();
214
215            // Skip if this range was already captured
216            if results.iter().any(|r| {
217                (r.index <= match_start && r.end_index > match_start)
218                    || (match_start <= r.index && match_end > r.index)
219            }) {
220                start = match_end.max(start + 1);
221                continue;
222            }
223
224            // Skip if match doesn't contain "uhr", "h" with minutes, ":", or a time modifier
225            // This prevents matching bare numbers
226            let matched_lower = matched_text.to_lowercase();
227            let has_time_indicator = matched_lower.contains("uhr")
228                || matched_lower.contains(':')
229                || (matched_text.to_lowercase().contains('h') && captures.get(2).is_some())
230                || captures.get(3).is_some();
231
232            if !has_time_indicator {
233                start = match_end.max(start + 1);
234                continue;
235            }
236
237            let mut hour: i32 = captures
238                .get(1)
239                .and_then(|m| m.as_str().parse().ok())
240                .unwrap_or(-1);
241
242            let minute: i32 = captures
243                .get(2)
244                .and_then(|m| m.as_str().parse().ok())
245                .unwrap_or(0);
246
247            let modifier = captures.get(3).map(|m| m.as_str().to_lowercase());
248
249            // Validate hour and minute
250            if !(0..=23).contains(&hour) || minute > 59 {
251                start = match_end;
252                continue;
253            }
254
255            // Determine meridiem and adjust hour based on modifier
256            let meridiem = if let Some(ref mod_str) = modifier {
257                if mod_str.starts_with("morgen") || mod_str.starts_with("vormittag") {
258                    // Morning: keep hour as is (assumed AM)
259                    if hour == 12 {
260                        hour = 0;
261                    }
262                    Some(Meridiem::AM)
263                } else if mod_str.starts_with("nachmittag") || mod_str.starts_with("abend") {
264                    // Afternoon/Evening: add 12 if hour < 12
265                    if hour < 12 {
266                        hour += 12;
267                    }
268                    Some(Meridiem::PM)
269                } else if mod_str.starts_with("nacht") || mod_str.contains("in der nacht") {
270                    // Night: depends on hour
271                    // 8 Uhr in der Nacht = 20:00 (PM)
272                    // 5 Uhr in der Nacht = 05:00 (AM)
273                    if hour <= 6 {
274                        Some(Meridiem::AM)
275                    } else if hour < 12 {
276                        hour += 12;
277                        Some(Meridiem::PM)
278                    } else {
279                        Some(Meridiem::PM)
280                    }
281                } else {
282                    None
283                }
284            } else if hour >= 12 {
285                Some(Meridiem::PM)
286            } else {
287                Some(Meridiem::AM)
288            };
289
290            let mut components = context.create_components();
291            components.assign(Component::Hour, hour);
292            components.assign(Component::Minute, minute);
293
294            if let Some(m) = meridiem {
295                components.assign(Component::Meridiem, m as i32);
296            }
297
298            results.push(context.create_result(match_start, match_end, components, None));
299
300            start = match_end;
301        }
302
303        Ok(results)
304    }
305}
306
307impl Default for DETimeExpressionParser {
308    fn default() -> Self {
309        Self::new()
310    }
311}