whichtime_sys/parsers/common/
time_unit_ago.rs1use crate::components::Component;
6use crate::context::ParsingContext;
7use crate::dictionaries::Locale;
8use crate::error::Result;
9use crate::parsers::Parser;
10use crate::results::ParsedResult;
11use crate::scanner::TokenType;
12use crate::types::{Duration, TimeUnit, add_duration};
13use chrono::Datelike;
14use regex::Regex;
15use std::sync::LazyLock;
16
17static EN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
19 Regex::new(r"(?i)(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|a|an|the|few|several|couple)\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?|mins?|hrs?|secs?)\s*(?:ago|before|earlier)\b").unwrap()
20});
21
22static DE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(r"(?i)(?:vor\s+)?(\d+|ein(?:e[mnrs]?)?|zwei|drei|vier|fünf|fuenf|sechs|sieben|acht|neun|zehn|elf|zwölf|zwoelf)\s*(sekunden?|minuten?|stunden?|tagen?|wochen?|monaten?|monats?|jahren?|jahres?|sek|min|std)(?:\s+(?:her|früher|frueher|zuvor))?\b").unwrap()
24});
25
26static ES_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
27 Regex::new(r"(?i)(?:hace\s+)?(\d+|un[oa]?|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce)\s*(segundos?|minutos?|horas?|días?|dias?|semanas?|meses?|años?|anos?|seg|min|hr?s?)\s*(?:atrás|atras)?\b").unwrap()
28});
29
30static FR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
31 Regex::new(r"(?i)(?:il\s+y\s+a\s+)?(\d+|une?|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze)\s*(secondes?|minutes?|heures?|jours?|semaines?|mois|ans?|années?|annees?|sec|min|h)\b").unwrap()
32});
33
34static IT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(r"(?i)(\d+|un[oa']?|due|tre|quattro|cinque|sei|sette|otto|nove|dieci|undici|dodici)\s*(second[oi]|minut[oi]|or[ae]|giorn[oi]|settiman[ae]|mes[ei]|ann[oi]|sec|min|h)\s*fa\b").unwrap()
36});
37
38static JA_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
39 Regex::new(r"(\d+|一|二|三|四|五|六|七|八|九|十)\s*(秒|分|時間|日|週間|ヶ月|か月|年)\s*前")
40 .unwrap()
41});
42
43static NL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"(?i)(\d+|een|twee|drie|vier|vijf|zes|zeven|acht|negen|tien|elf|twaalf)\s*(seconden?|minuten?|uren?|dagen?|weken?|maanden?|jaren?|sec|min|u)\s*(?:geleden|terug)\b").unwrap()
45});
46
47static PT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
48 Regex::new(r"(?i)(?:há\s+|ha\s+)?(\d+|um[a]?|dois|duas|três|tres|quatro|cinco|seis|sete|oito|nove|dez|onze|doze)\s*(segundos?|minutos?|horas?|dias?|semanas?|meses?|anos?|seg|min|h)\s*(?:atrás|atras)?\b").unwrap()
49});
50
51static RU_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
52 Regex::new(r"(?i)(\d+|один|одну|одна|два|две|три|четыре|пять|шесть|семь|восемь|девять|десять|одиннадцать|двенадцать)\s*(секунд[уы]?|минут[уы]?|час(?:ов|а)?|дн[яей]|недел[юиь]|месяц(?:ев|а)?|год(?:а|ов)?|лет)\s*назад\b").unwrap()
53});
54
55static SV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
56 Regex::new(r"(?i)(?:för\s+|for\s+)?(\d+|en|ett|två|tva|tre|fyra|fem|sex|sju|åtta|atta|nio|tio|elva|tolv)\s*(sekunder?|minuter?|timm(?:ar|e)?|dagar?|veckor?|månader?|manader?|år|ar)\s*(?:sedan)?\b").unwrap()
57});
58
59static UK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
60 Regex::new(r"(?i)(\d+|один|одну|одна|два|дві|три|чотири|п'ять|шість|сім|вісім|дев'ять|десять|одинадцять|дванадцять)\s*(секунд[уи]?|хвилин[уи]?|годин[уи]?|дн[яів]|тижн[яів]|місяц[яів]|рок(?:ів|у)?)\s*(?:тому|назад)\b").unwrap()
61});
62
63static ZH_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
64 Regex::new(r"(\d+|一|二|两|兩|三|四|五|六|七|八|九|十)\s*(秒[钟鐘]?|分[钟鐘]?|[小个個]?时|時|天|[个個]?星期|周|週|[个個]?月|年)\s*(?:前|以前)").unwrap()
65});
66
67pub struct MultiLocaleTimeUnitAgoParser {
69 locale: Locale,
70}
71
72impl MultiLocaleTimeUnitAgoParser {
73 pub fn new(locale: Locale) -> Self {
74 Self { locale }
75 }
76
77 fn get_pattern(&self) -> &'static Regex {
78 match self.locale {
79 Locale::En => &EN_PATTERN,
80 Locale::De => &DE_PATTERN,
81 Locale::Es => &ES_PATTERN,
82 Locale::Fr => &FR_PATTERN,
83 Locale::It => &IT_PATTERN,
84 Locale::Ja => &JA_PATTERN,
85 Locale::Nl => &NL_PATTERN,
86 Locale::Pt => &PT_PATTERN,
87 Locale::Ru => &RU_PATTERN,
88 Locale::Sv => &SV_PATTERN,
89 Locale::Uk => &UK_PATTERN,
90 Locale::Zh => &ZH_PATTERN,
91 }
92 }
93
94 fn parse_number(&self, text: &str) -> f64 {
95 let _lower = text.to_lowercase();
96 match self.locale {
97 Locale::En => crate::dictionaries::en::parse_number_pattern(text),
98 Locale::De => crate::dictionaries::de::parse_number_pattern(text),
99 Locale::Es => crate::dictionaries::es::parse_number_pattern(text),
100 Locale::Fr => crate::dictionaries::fr::parse_number_pattern(text),
101 Locale::It => crate::dictionaries::it::parse_number_pattern(text),
102 Locale::Ja => crate::dictionaries::ja::parse_number_pattern(text),
103 Locale::Nl => crate::dictionaries::nl::parse_number_pattern(text),
104 Locale::Pt => crate::dictionaries::pt::parse_number_pattern(text),
105 Locale::Ru => crate::dictionaries::ru::parse_number_pattern(text),
106 Locale::Sv => crate::dictionaries::sv::parse_number_pattern(text),
107 Locale::Uk => crate::dictionaries::uk::parse_number_pattern(text),
108 Locale::Zh => crate::dictionaries::zh::parse_number_pattern(text),
109 }
110 }
111
112 fn lookup_time_unit(&self, text: &str) -> Option<TimeUnit> {
113 let lower = text.to_lowercase();
114 match self.locale {
115 Locale::En => crate::dictionaries::en::get_time_unit(&lower),
116 Locale::De => crate::dictionaries::de::get_time_unit(&lower),
117 Locale::Es => crate::dictionaries::es::get_time_unit(&lower),
118 Locale::Fr => crate::dictionaries::fr::get_time_unit(&lower),
119 Locale::It => crate::dictionaries::it::get_time_unit(&lower),
120 Locale::Ja => crate::dictionaries::ja::get_time_unit(text)
121 .or_else(|| crate::dictionaries::ja::get_time_unit(&lower)),
122 Locale::Nl => crate::dictionaries::nl::get_time_unit(&lower),
123 Locale::Pt => crate::dictionaries::pt::get_time_unit(&lower),
124 Locale::Ru => crate::dictionaries::ru::get_time_unit(&lower),
125 Locale::Sv => crate::dictionaries::sv::get_time_unit(&lower),
126 Locale::Uk => crate::dictionaries::uk::get_time_unit(&lower),
127 Locale::Zh => crate::dictionaries::zh::get_time_unit(text)
128 .or_else(|| crate::dictionaries::zh::get_time_unit(&lower)),
129 }
130 }
131}
132
133impl Parser for MultiLocaleTimeUnitAgoParser {
134 fn name(&self) -> &'static str {
135 "MultiLocaleTimeUnitAgoParser"
136 }
137
138 fn should_apply(&self, context: &ParsingContext) -> bool {
139 context.has_token_type(TokenType::Ago) && context.has_token_type(TokenType::TimeUnit)
140 }
141
142 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
143 let mut results = Vec::new();
144 let pattern = self.get_pattern();
145 let ref_date = context.reference.instant;
146
147 for mat in pattern.find_iter(context.text) {
148 let matched_text = mat.as_str();
149 let index = mat.start();
150
151 let Some(caps) = pattern.captures(matched_text) else {
152 continue;
153 };
154
155 let num_str = caps.get(1).map(|m| m.as_str()).unwrap_or("1");
156 let unit_str = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
157
158 let num = self.parse_number(num_str);
159 let Some(unit) = self.lookup_time_unit(unit_str) else {
160 continue;
161 };
162
163 let mut duration = Duration::new();
165 match unit {
166 TimeUnit::Second => duration.second = Some(-num),
167 TimeUnit::Minute => duration.minute = Some(-num),
168 TimeUnit::Hour => duration.hour = Some(-num),
169 TimeUnit::Day => duration.day = Some(-num),
170 TimeUnit::Week => duration.week = Some(-num),
171 TimeUnit::Month => duration.month = Some(-num),
172 TimeUnit::Year => duration.year = Some(-num),
173 TimeUnit::Quarter => duration.quarter = Some(-num),
174 TimeUnit::Millisecond => duration.millisecond = Some(-num),
175 }
176
177 let target_date = add_duration(ref_date, &duration);
178
179 let mut components = context.create_components();
180 components.assign(Component::Year, target_date.year());
181 components.assign(Component::Month, target_date.month() as i32);
182 components.assign(Component::Day, target_date.day() as i32);
183
184 if duration.has_time_component() {
185 use chrono::Timelike;
186 components.assign(Component::Hour, target_date.hour() as i32);
187 components.assign(Component::Minute, target_date.minute() as i32);
188 components.assign(Component::Second, target_date.second() as i32);
189 }
190
191 results.push(context.create_result(
192 index,
193 index + matched_text.len(),
194 components,
195 None,
196 ));
197 }
198
199 Ok(results)
200 }
201}