Skip to main content

whichtime_sys/parsers/common/
casual_date.rs

1//! Multi-locale casual date parser: now, today, tomorrow, yesterday, etc.
2//!
3//! This parser handles casual date expressions across all supported locales
4//! by using locale-specific patterns and dictionary lookups. It also handles
5//! combined casual date + time expressions like "heute Morgen" (today morning).
6
7use crate::components::Component;
8use crate::context::ParsingContext;
9use crate::dictionaries::{CasualDateType, CasualTimeType, Locale};
10use crate::error::Result;
11use crate::parsers::Parser;
12use crate::results::ParsedResult;
13use crate::scanner::TokenType;
14use crate::types::Meridiem;
15use chrono::{Datelike, Duration, Timelike};
16use regex::Regex;
17use std::sync::LazyLock;
18
19// Locale-specific patterns - now with optional trailing casual time
20static EN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
21    Regex::new(r"(?i)\b(now|today|tonight|tomorrow|overmorrow|tmr|tmrw|yesterday)(?:\s+(morning|afternoon|evening|night))?\b").unwrap()
22});
23
24static DE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
25    Regex::new(
26        r"(?i)\b(jetzt|heute|morgen|gestern|übermorgen|uebermorgen|vorgestern)(?:\s+(Morgen|Morgens|Vormittag|Vormittags|Nachmittag|Nachmittags|Abend|Abends|Nacht|Nachts))?\b",
27    )
28    .unwrap()
29});
30
31static ES_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"(?i)\b(esta\s+mañana|esta\s+manana|esta\s+tarde|esta\s+noche|ahora|hoy|mañana|manana|ayer|pasado\s*mañana|pasado\s*manana|anteayer)(?:\s+(?:de\s+)?(mañana|manana|tarde|noche))?\b").unwrap()
33});
34
35static FR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(r"(?i)\b(ce\s+matin|cet\s+après-?midi|cet\s+apres-?midi|cet\s+aprem|ce\s+soir|maintenant|aujourd'?hui|demain|hier|après-?demain|apres-?demain|avant-?hier)(?:\s+(matin|après-?midi|apres-?midi|soir|nuit))?\b").unwrap()
37});
38
39static IT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
40    Regex::new(r"(?i)\b(adesso|ora|oggi|stanotte|stasera|stamattina|domani|ieri|dopodomani|l'?altro\s*ieri|altroieri)(?:\s+(mattina|pomeriggio|sera|notte))?\b")
41        .unwrap()
42});
43
44static JA_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(r"(今日|きょう|本日|ほんじつ|今夜|こんや|今晩|こんばん|明日|あした|あす|昨日|きのう|さくじつ|明後日|あさって|一昨日|おととい)(?:の(朝|午前|午後|夕方|夜|深夜))?").unwrap()
46});
47
48static NL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(r"(?i)\b(nu|vandaag|morgen|gisteren|overmorgen|eergisteren)(?:\s+(ochtend|middag|avond|nacht))?\b").unwrap()
50});
51
52static PT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
53    Regex::new(r"(?i)\b(agora|hoje|amanhã|amanha|ontem|depois\s*de\s*amanhã|depois\s*de\s*amanha|anteontem)(?:\s+(?:de\s+|à\s+)?(manhã|manha|tarde|noite))?\b").unwrap()
54});
55
56static RU_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
57    Regex::new(r"(?i)\b(сейчас|сегодня|завтра|вчера|послезавтра|послепослезавтра|позавчера|позапозавчера)(?:\s+(утром|днём|днем|вечером|ночью))?\b").unwrap()
58});
59
60static SV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
61    Regex::new(r"(?i)\b(nu|idag|imorgon|igår|igar|i\s*övermorgon|i\s*overmorgon|förrgår|forrgar)(?:\s+(?:på\s+)?(morgonen|förmiddagen|formiddagen|eftermiddagen|kvällen|kvallen|natten|midnatt))?\b").unwrap()
62});
63
64static UK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
65    Regex::new(r"(?i)\b(зараз|сьогодні|завтра|вчора|післязавтра|післяпіслязавтра|позавчора|позапозавчора)(?:\s+(вранці|вдень|ввечері|вночі))?\b").unwrap()
66});
67
68static ZH_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
69    Regex::new(r"(现在|現在|今天|今日|今晚|明天|明日|聽日|昨天|昨日|后天|後天|前天|而家)(早上|上午|中午|下午|傍晚|晚上)?").unwrap()
70});
71
72/// Multi-locale casual date parser
73pub struct MultiLocaleCasualDateParser {
74    locale: Locale,
75}
76
77impl MultiLocaleCasualDateParser {
78    pub fn new(locale: Locale) -> Self {
79        Self { locale }
80    }
81
82    fn get_pattern(&self) -> &'static Regex {
83        match self.locale {
84            Locale::En => &EN_PATTERN,
85            Locale::De => &DE_PATTERN,
86            Locale::Es => &ES_PATTERN,
87            Locale::Fr => &FR_PATTERN,
88            Locale::It => &IT_PATTERN,
89            Locale::Ja => &JA_PATTERN,
90            Locale::Nl => &NL_PATTERN,
91            Locale::Pt => &PT_PATTERN,
92            Locale::Ru => &RU_PATTERN,
93            Locale::Sv => &SV_PATTERN,
94            Locale::Uk => &UK_PATTERN,
95            Locale::Zh => &ZH_PATTERN,
96        }
97    }
98
99    fn lookup_casual_date(&self, text: &str) -> Option<CasualDateType> {
100        let lower = text.to_lowercase();
101        let normalized: String = lower.split_whitespace().collect::<Vec<_>>().join(" ");
102
103        match self.locale {
104            Locale::En => crate::dictionaries::en::get_casual_date(&normalized),
105            Locale::De => crate::dictionaries::de::get_casual_date(&normalized),
106            Locale::Es => crate::dictionaries::es::get_casual_date(&normalized),
107            Locale::Fr => crate::dictionaries::fr::get_casual_date(&normalized),
108            Locale::It => crate::dictionaries::it::get_casual_date(&normalized),
109            Locale::Ja => crate::dictionaries::ja::get_casual_date(&normalized)
110                .or_else(|| crate::dictionaries::ja::get_casual_date(text)), // Japanese doesn't need lowercase
111            Locale::Nl => crate::dictionaries::nl::get_casual_date(&normalized),
112            Locale::Pt => crate::dictionaries::pt::get_casual_date(&normalized),
113            Locale::Ru => crate::dictionaries::ru::get_casual_date(&normalized),
114            Locale::Sv => crate::dictionaries::sv::get_casual_date(&normalized),
115            Locale::Uk => crate::dictionaries::uk::get_casual_date(&normalized),
116            Locale::Zh => crate::dictionaries::zh::get_casual_date(&normalized)
117                .or_else(|| crate::dictionaries::zh::get_casual_date(text)), // Chinese doesn't need lowercase
118        }
119    }
120
121    fn lookup_casual_time(&self, text: &str) -> Option<CasualTimeType> {
122        let lower = text.to_lowercase();
123        let normalized: String = lower.split_whitespace().collect::<Vec<_>>().join(" ");
124
125        match self.locale {
126            Locale::En => crate::dictionaries::en::get_casual_time(&normalized),
127            Locale::De => crate::dictionaries::de::get_casual_time(&normalized),
128            Locale::Es => crate::dictionaries::es::get_casual_time(&normalized),
129            Locale::Fr => crate::dictionaries::fr::get_casual_time(&normalized),
130            Locale::It => crate::dictionaries::it::get_casual_time(&normalized),
131            Locale::Ja => crate::dictionaries::ja::get_casual_time(text)
132                .or_else(|| crate::dictionaries::ja::get_casual_time(&normalized)),
133            Locale::Nl => crate::dictionaries::nl::get_casual_time(&normalized),
134            Locale::Pt => crate::dictionaries::pt::get_casual_time(&normalized),
135            Locale::Ru => crate::dictionaries::ru::get_casual_time(&normalized),
136            Locale::Sv => crate::dictionaries::sv::get_casual_time(&normalized),
137            Locale::Uk => crate::dictionaries::uk::get_casual_time(&normalized),
138            Locale::Zh => crate::dictionaries::zh::get_casual_time(text)
139                .or_else(|| crate::dictionaries::zh::get_casual_time(&normalized)),
140        }
141    }
142
143    fn apply_casual_time(
144        &self,
145        components: &mut crate::components::FastComponents,
146        time_type: CasualTimeType,
147        time_word: Option<&str>,
148    ) {
149        match time_type {
150            CasualTimeType::Noon => {
151                components.assign(Component::Hour, 12);
152                components.assign(Component::Minute, 0);
153                components.assign(Component::Meridiem, Meridiem::PM as i32);
154            }
155            CasualTimeType::Midnight => {
156                components.assign(Component::Hour, 0);
157                components.assign(Component::Minute, 0);
158            }
159            CasualTimeType::Morning => {
160                // German "Vormittag" (forenoon) is ~9 AM, while "Morgen" is ~6 AM
161                let hour = if self.locale == Locale::De {
162                    match time_word {
163                        Some(w) if w.starts_with("vormittag") => 9,
164                        _ => 6,
165                    }
166                } else {
167                    6
168                };
169                components.imply(Component::Hour, hour);
170                components.assign(Component::Meridiem, Meridiem::AM as i32);
171            }
172            CasualTimeType::Afternoon => {
173                components.imply(Component::Hour, 15);
174                components.assign(Component::Meridiem, Meridiem::PM as i32);
175            }
176            CasualTimeType::Evening => {
177                components.imply(Component::Hour, 18);
178                components.assign(Component::Meridiem, Meridiem::PM as i32);
179            }
180            CasualTimeType::Night => {
181                components.imply(Component::Hour, 22);
182                components.assign(Component::Meridiem, Meridiem::PM as i32);
183            }
184        }
185    }
186}
187
188impl Parser for MultiLocaleCasualDateParser {
189    fn name(&self) -> &'static str {
190        "MultiLocaleCasualDateParser"
191    }
192
193    fn should_apply(&self, context: &ParsingContext) -> bool {
194        context.has_token_type(TokenType::CasualDate)
195    }
196
197    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
198        let mut results = Vec::new();
199        let pattern = self.get_pattern();
200
201        for caps in pattern.captures_iter(context.text) {
202            let mat = caps.get(0).unwrap();
203            let matched_text = mat.as_str();
204            let index = mat.start();
205
206            // Get the date part (first capture group)
207            let date_text = caps.get(1).map(|m| m.as_str()).unwrap_or(matched_text);
208
209            let Some(casual_type) = self.lookup_casual_date(date_text) else {
210                continue;
211            };
212
213            // Get optional time part (second capture group)
214            let time_type = caps
215                .get(2)
216                .and_then(|m| self.lookup_casual_time(m.as_str()));
217
218            let mut components = context.create_components();
219            let ref_date = context.reference.instant;
220
221            match casual_type {
222                CasualDateType::Now => {
223                    components.assign(Component::Year, ref_date.year());
224                    components.assign(Component::Month, ref_date.month() as i32);
225                    components.assign(Component::Day, ref_date.day() as i32);
226                    components.assign(Component::Hour, ref_date.hour() as i32);
227                    components.assign(Component::Minute, ref_date.minute() as i32);
228                    components.assign(Component::Second, ref_date.second() as i32);
229                }
230                CasualDateType::Today => {
231                    components.assign(Component::Year, ref_date.year());
232                    components.assign(Component::Month, ref_date.month() as i32);
233                    components.assign(Component::Day, ref_date.day() as i32);
234                }
235                CasualDateType::Tonight => {
236                    components.assign(Component::Year, ref_date.year());
237                    components.assign(Component::Month, ref_date.month() as i32);
238                    components.assign(Component::Day, ref_date.day() as i32);
239                    components.imply(Component::Hour, 22);
240                }
241                CasualDateType::Tomorrow => {
242                    let tomorrow = ref_date + Duration::days(1);
243                    components.assign(Component::Year, tomorrow.year());
244                    components.assign(Component::Month, tomorrow.month() as i32);
245                    components.assign(Component::Day, tomorrow.day() as i32);
246                }
247                CasualDateType::Yesterday => {
248                    let yesterday = ref_date - Duration::days(1);
249                    components.assign(Component::Year, yesterday.year());
250                    components.assign(Component::Month, yesterday.month() as i32);
251                    components.assign(Component::Day, yesterday.day() as i32);
252                }
253                CasualDateType::Overmorrow => {
254                    let day_after = ref_date + Duration::days(2);
255                    components.assign(Component::Year, day_after.year());
256                    components.assign(Component::Month, day_after.month() as i32);
257                    components.assign(Component::Day, day_after.day() as i32);
258                }
259                CasualDateType::DayBeforeYesterday => {
260                    let day_before = ref_date - Duration::days(2);
261                    components.assign(Component::Year, day_before.year());
262                    components.assign(Component::Month, day_before.month() as i32);
263                    components.assign(Component::Day, day_before.day() as i32);
264                }
265                CasualDateType::ThisMorning => {
266                    components.assign(Component::Year, ref_date.year());
267                    components.assign(Component::Month, ref_date.month() as i32);
268                    components.assign(Component::Day, ref_date.day() as i32);
269                    components.imply(Component::Hour, 6);
270                }
271                CasualDateType::ThisAfternoon => {
272                    components.assign(Component::Year, ref_date.year());
273                    components.assign(Component::Month, ref_date.month() as i32);
274                    components.assign(Component::Day, ref_date.day() as i32);
275                    components.imply(Component::Hour, 15);
276                }
277                CasualDateType::ThisEvening => {
278                    components.assign(Component::Year, ref_date.year());
279                    components.assign(Component::Month, ref_date.month() as i32);
280                    components.assign(Component::Day, ref_date.day() as i32);
281                    components.imply(Component::Hour, 20);
282                }
283            }
284
285            // Apply time component if present
286            if let Some(time) = time_type {
287                // Get the actual matched time word for locale-specific hour handling
288                let time_word = caps.get(2).map(|m| m.as_str().to_lowercase());
289                self.apply_casual_time(&mut components, time, time_word.as_deref());
290            }
291
292            results.push(context.create_result(
293                index,
294                index + matched_text.len(),
295                components,
296                None,
297            ));
298        }
299
300        Ok(results)
301    }
302}