Skip to main content

whichtime_sys/parsers/common/
time_unit_within.rs

1//! Multi-locale time unit within parser: "in 5 days", "within 2 hours", etc.
2//!
3//! Handles time expressions with "in/within" semantics across all supported locales.
4
5use crate::components::Component;
6use crate::context::ParsingContext;
7use crate::dictionaries::Locale;
8use crate::error::Result;
9use crate::parsers::Parser;
10use crate::results::ParsedResult;
11use crate::scanner::TokenType;
12use crate::types::{Duration, TimeUnit, add_duration};
13use chrono::Datelike;
14use regex::Regex;
15use std::sync::LazyLock;
16
17// Locale-specific patterns
18static EN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(r"(?i)\b(?:in|within)\s+(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|a|an|few|several|couple)\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?|mins?|hrs?|secs?)\b").unwrap()
20});
21
22static DE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(r"(?i)(?:in|für|fuer|innerhalb(?:\s+von)?)\s+(\d+|ein(?:e[mnrs]?)?|zwei|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf|zwölf|zwoelf|einige[nm]?|paar)\s+(sekunden?|minuten?|stunden?|tagen?|wochen?|monaten?|monats?|jahren?|jahres?|jahr|sek\.?|min\.?|std\.?)(?:\b|$)").unwrap()
24});
25
26static ES_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
27    Regex::new(r"(?i)(?:en|dentro\s+de|por|de)\s+(un|una|uno|\d+|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|algunos?)\s*(segundos?|minutos?|horas?|días?|dias?|semanas?|mes|meses|años?|anos?)\b").unwrap()
28});
29
30static FR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31    Regex::new(r"(?i)(?:dans|d'ici|pour|en)\s+(\d+|une?|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze)\s*(secondes?|minutes?|heures?|jours?|semaines?|mois|ans?|années?|annees?)\b").unwrap()
32});
33
34static IT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(r"(?i)(?:tra|fra|entro|in)\s+(\d+|un[oa']?|due|tre|quattro|cinque|sei|sette|otto|nove|dieci|undici|dodici)\s*(second[oi]|minut[oi]|or[ae]|giorn[oi]|settiman[ae]|mes[ei]|ann[oi])\b").unwrap()
36});
37
38static JA_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
39    Regex::new(
40        r"(\d+|一|二|三|四|五|六|七|八|九|十)\s*(秒|分|時間|日|週間|ヶ月|か月|年)\s*(?:後|以内|内)",
41    )
42    .unwrap()
43});
44
45static NL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
46    Regex::new(r"(?i)(?:over|binnen)\s+(\d+|een|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf)\s*(seconden?|minuten?|uren?|dagen?|weken?|maanden?|jaren?)\b").unwrap()
47});
48
49static PT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(r"(?i)(?:em|daqui\s+a|dentro\s+de)\s+(\d+|um[a]?|dois|duas|três|tres|quatro|cinco|seis|sete|oito|nove|dez|onze|doze)\s*(segundos?|minutos?|horas?|dias?|semanas?|meses?|anos?)\b").unwrap()
51});
52
53static RU_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
54    Regex::new(r"(?i)(?:через|в\s+течени[ие])\s+(\d+|один|одну|одна|одной|два|две|три|четыре|пять|шесть|семь|восемь|девять|десять|)?\s*(секунд[уы]?|минут[уы]?|час(?:ов|а)?|дн[яей]|недел[юиь]|месяц(?:ев|а)?|год(?:а|ов)?|лет)\b").unwrap()
55});
56
57static SV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
58    Regex::new(r"(?i)(?:om|inom)\s+(\d+|en|ett|två|tva|tre|fyra|fem|sex|sju|åtta|atta|nio|tio|elva|tolv)\s*(sekunder?|minuter?|timm(?:ar|e)?|dagar?|veckor?|månader?|manader?|år|ar)\b").unwrap()
59});
60
61static UK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
62    Regex::new(r"(?i)(?:через|за|протягом)\s+(\d+|один|одну|одна|два|дві|три|чотири|п'ять|шість|сім|вісім|дев'ять|десять)\s*(секунд[уи]?|хвилин[уи]?|годин[уи]?|дн[яів]|тижн[яів]|місяц[яів]|рок(?:ів|у)?)\b").unwrap()
63});
64
65static ZH_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
66    Regex::new(r"(\d+|一|二|两|兩|三|四|五|六|七|八|九|十)\s*(秒[钟鐘]?|分[钟鐘]?|[小个個]?时|時|天|[个個]?星期|周|週|[个個]?月|年)\s*(?:后|後|内|內|以内|以內)").unwrap()
67});
68
69/// Multi-locale time unit within parser
70pub struct MultiLocaleTimeUnitWithinParser {
71    locale: Locale,
72}
73
74impl MultiLocaleTimeUnitWithinParser {
75    pub fn new(locale: Locale) -> Self {
76        Self { locale }
77    }
78
79    fn get_pattern(&self) -> &'static Regex {
80        match self.locale {
81            Locale::En => &EN_PATTERN,
82            Locale::De => &DE_PATTERN,
83            Locale::Es => &ES_PATTERN,
84            Locale::Fr => &FR_PATTERN,
85            Locale::It => &IT_PATTERN,
86            Locale::Ja => &JA_PATTERN,
87            Locale::Nl => &NL_PATTERN,
88            Locale::Pt => &PT_PATTERN,
89            Locale::Ru => &RU_PATTERN,
90            Locale::Sv => &SV_PATTERN,
91            Locale::Uk => &UK_PATTERN,
92            Locale::Zh => &ZH_PATTERN,
93        }
94    }
95
96    fn parse_number(&self, text: &str) -> f64 {
97        match self.locale {
98            Locale::En => crate::dictionaries::en::parse_number_pattern(text),
99            Locale::De => crate::dictionaries::de::parse_number_pattern(text),
100            Locale::Es => crate::dictionaries::es::parse_number_pattern(text),
101            Locale::Fr => crate::dictionaries::fr::parse_number_pattern(text),
102            Locale::It => crate::dictionaries::it::parse_number_pattern(text),
103            Locale::Ja => crate::dictionaries::ja::parse_number_pattern(text),
104            Locale::Nl => crate::dictionaries::nl::parse_number_pattern(text),
105            Locale::Pt => crate::dictionaries::pt::parse_number_pattern(text),
106            Locale::Ru => crate::dictionaries::ru::parse_number_pattern(text),
107            Locale::Sv => crate::dictionaries::sv::parse_number_pattern(text),
108            Locale::Uk => crate::dictionaries::uk::parse_number_pattern(text),
109            Locale::Zh => crate::dictionaries::zh::parse_number_pattern(text),
110        }
111    }
112
113    fn lookup_time_unit(&self, text: &str) -> Option<TimeUnit> {
114        let lower = text.to_lowercase();
115        match self.locale {
116            Locale::En => crate::dictionaries::en::get_time_unit(&lower),
117            Locale::De => crate::dictionaries::de::get_time_unit(&lower),
118            Locale::Es => crate::dictionaries::es::get_time_unit(&lower),
119            Locale::Fr => crate::dictionaries::fr::get_time_unit(&lower),
120            Locale::It => crate::dictionaries::it::get_time_unit(&lower),
121            Locale::Ja => crate::dictionaries::ja::get_time_unit(text)
122                .or_else(|| crate::dictionaries::ja::get_time_unit(&lower)),
123            Locale::Nl => crate::dictionaries::nl::get_time_unit(&lower),
124            Locale::Pt => crate::dictionaries::pt::get_time_unit(&lower),
125            Locale::Ru => crate::dictionaries::ru::get_time_unit(&lower),
126            Locale::Sv => crate::dictionaries::sv::get_time_unit(&lower),
127            Locale::Uk => crate::dictionaries::uk::get_time_unit(&lower),
128            Locale::Zh => crate::dictionaries::zh::get_time_unit(text)
129                .or_else(|| crate::dictionaries::zh::get_time_unit(&lower)),
130        }
131    }
132}
133
134impl Parser for MultiLocaleTimeUnitWithinParser {
135    fn name(&self) -> &'static str {
136        "MultiLocaleTimeUnitWithinParser"
137    }
138
139    fn should_apply(&self, context: &ParsingContext) -> bool {
140        context.has_token_type(TokenType::TimeUnit)
141    }
142
143    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
144        let mut results = Vec::new();
145        let pattern = self.get_pattern();
146        let ref_date = context.reference.instant;
147
148        for mat in pattern.find_iter(context.text) {
149            let matched_text = mat.as_str();
150            let index = mat.start();
151
152            let Some(caps) = pattern.captures(matched_text) else {
153                continue;
154            };
155
156            let num_str = caps.get(1).map(|m| m.as_str()).unwrap_or("1");
157            let unit_str = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
158
159            let num = self.parse_number(num_str);
160            let Some(unit) = self.lookup_time_unit(unit_str) else {
161                continue;
162            };
163
164            // Create positive duration
165            let mut duration = Duration::new();
166            match unit {
167                TimeUnit::Second => duration.second = Some(num),
168                TimeUnit::Minute => duration.minute = Some(num),
169                TimeUnit::Hour => duration.hour = Some(num),
170                TimeUnit::Day => duration.day = Some(num),
171                TimeUnit::Week => duration.week = Some(num),
172                TimeUnit::Month => duration.month = Some(num),
173                TimeUnit::Year => duration.year = Some(num),
174                TimeUnit::Quarter => duration.quarter = Some(num),
175                TimeUnit::Millisecond => duration.millisecond = Some(num),
176            }
177
178            let target_date = add_duration(ref_date, &duration);
179
180            let mut components = context.create_components();
181            components.assign(Component::Year, target_date.year());
182            components.assign(Component::Month, target_date.month() as i32);
183            components.assign(Component::Day, target_date.day() as i32);
184
185            use chrono::Timelike;
186            if duration.has_time_component() {
187                components.assign(Component::Hour, target_date.hour() as i32);
188                components.assign(Component::Minute, target_date.minute() as i32);
189                components.assign(Component::Second, target_date.second() as i32);
190            } else {
191                components.imply(Component::Hour, target_date.hour() as i32);
192                components.imply(Component::Minute, target_date.minute() as i32);
193                components.imply(Component::Second, target_date.second() as i32);
194            }
195
196            results.push(context.create_result(
197                index,
198                index + matched_text.len(),
199                components,
200                None,
201            ));
202        }
203
204        Ok(results)
205    }
206}