whichtime_sys/parsers/uk/
month_name.rs1use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::uk::{get_month, get_weekday};
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21static DOT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(r"(?<![0-9])(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4}|\d{2})(?![0-9])")
24 .unwrap()
25});
26
27static MONTH_NAME_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(
30 r"(?ix)
31 (?:
32 (?P<weekday>понеділок|вівторок|середа|середу|четвер|п'ятниця|п'ятницю|пятниця|субота|суботу|неділя|неділю|пн|вт|ср|чт|пт|сб|нд)(?:\.|,)?\s*
33 (?:,\s*)?
34 )?
35 (?P<day>\d{1,2})
36 (?:
37 \s*(?:-|–|до)\s*
38 (?P<end_day>\d{1,2})
39 )?
40 \s+
41 (?P<month>січня?|лютого?|лют\.?|березня?|бер\.?|квітня?|квіт\.?|травня?|трав\.?|червня?|черв\.?|липня?|лип\.?|серпня?|серп\.?|вересня?|вер\.?|жовтня?|жовт\.?|листопада?|лист\.?|грудня?|груд\.?)
42 (?:
43 \s+
44 (?P<year>\d{4}|\d{2})
45 (?:\s*р\.?)?
46 )?
47 (?![а-яА-ЯіїєґІЇЄҐ])"
48 ).unwrap()
49});
50
51pub struct UKMonthNameParser;
53
54impl UKMonthNameParser {
55 pub fn new() -> Self {
56 Self
57 }
58}
59
60impl Default for UKMonthNameParser {
61 fn default() -> Self {
62 Self::new()
63 }
64}
65
66impl Parser for UKMonthNameParser {
67 fn name(&self) -> &'static str {
68 "UKMonthNameParser"
69 }
70
71 fn should_apply(&self, _context: &ParsingContext) -> bool {
72 true
73 }
74
75 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
76 let mut results = Vec::new();
77 let ref_date = context.reference.instant;
78
79 let mut start = 0;
81 while start < context.text.len() {
82 let search_text = &context.text[start..];
83 let captures = match DOT_PATTERN.captures(search_text) {
84 Ok(Some(caps)) => caps,
85 Ok(None) => break,
86 Err(_) => break,
87 };
88
89 let full_match = match captures.get(0) {
90 Some(m) => m,
91 None => break,
92 };
93
94 let match_start = start + full_match.start();
95 let match_end = start + full_match.end();
96
97 let day: i32 = captures
98 .name("day")
99 .and_then(|m| m.as_str().parse().ok())
100 .unwrap_or(0);
101 let month: i32 = captures
102 .name("month")
103 .and_then(|m| m.as_str().parse().ok())
104 .unwrap_or(0);
105 let year_str = captures.name("year").map(|m| m.as_str());
106
107 if !(1..=31).contains(&day) || !(1..=12).contains(&month) {
108 start = match_end;
109 continue;
110 }
111
112 let mut components = context.create_components();
113
114 if let Some(y) = year_str {
115 let mut year: i32 = y.parse().unwrap_or(ref_date.year());
116 if year < 100 {
117 year = if year > 50 { 1900 + year } else { 2000 + year };
118 }
119 components.assign(Component::Year, year);
120 } else {
121 components.imply(Component::Year, ref_date.year());
122 }
123
124 components.assign(Component::Month, month);
125 components.assign(Component::Day, day);
126
127 if components.is_valid_date() {
128 results.push(context.create_result(match_start, match_end, components, None));
129 }
130
131 start = match_end;
132 }
133
134 start = 0;
136 while start < context.text.len() {
137 let search_text = &context.text[start..];
138 let captures = match MONTH_NAME_PATTERN.captures(search_text) {
139 Ok(Some(caps)) => caps,
140 Ok(None) => break,
141 Err(_) => break,
142 };
143
144 let full_match = match captures.get(0) {
145 Some(m) => m,
146 None => break,
147 };
148
149 let match_start = start + full_match.start();
150 let match_end = start + full_match.end();
151
152 let overlaps = results.iter().any(|r| {
154 (match_start >= r.index && match_start < r.index + r.text.len())
155 || (r.index >= match_start && r.index < match_end)
156 });
157 if overlaps {
158 start = match_end;
159 continue;
160 }
161
162 let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
163 let day: i32 = captures
164 .name("day")
165 .and_then(|m| m.as_str().parse().ok())
166 .unwrap_or(0);
167 let month_str = captures
168 .name("month")
169 .map(|m| m.as_str().to_lowercase())
170 .unwrap_or_default();
171 let year_str = captures.name("year").map(|m| m.as_str());
172 let end_day_str = captures.name("end_day").map(|m| m.as_str());
173
174 let clean_month = month_str.trim_end_matches('.');
176 let month = get_month(clean_month).unwrap_or(0);
177
178 if month == 0 || !(1..=31).contains(&day) {
179 start = match_end;
180 continue;
181 }
182
183 let mut components = context.create_components();
184
185 if let Some(y) = year_str {
187 let mut year: i32 = y.parse().unwrap_or(ref_date.year());
188 if year < 100 {
189 year = if year > 50 { 1900 + year } else { 2000 + year };
190 }
191 components.assign(Component::Year, year);
192 } else {
193 components.imply(Component::Year, ref_date.year());
194 }
195
196 components.assign(Component::Month, month as i32);
197 components.assign(Component::Day, day);
198
199 if let Some(ref wd_str) = weekday_str {
201 let clean_wd = wd_str.trim_end_matches('.').trim_end_matches(',');
202 if let Some(weekday) = get_weekday(clean_wd) {
203 components.assign(Component::Weekday, weekday as i32);
204 }
205 }
206
207 if !components.is_valid_date() {
208 start = match_end;
209 continue;
210 }
211
212 let end_components = if let Some(end_day_text) = end_day_str {
214 let end_day: i32 = end_day_text.parse().unwrap_or(0);
215 if end_day > 0 && end_day <= 31 {
216 let mut end_comp = context.create_components();
217 if let Some(start_year) = components.get(Component::Year) {
218 if year_str.is_some() {
219 end_comp.assign(Component::Year, start_year);
220 } else {
221 end_comp.imply(Component::Year, start_year);
222 }
223 }
224 end_comp.assign(Component::Month, month as i32);
225 end_comp.assign(Component::Day, end_day);
226
227 if end_comp.is_valid_date() {
228 Some(end_comp)
229 } else {
230 None
231 }
232 } else {
233 None
234 }
235 } else {
236 None
237 };
238
239 results.push(context.create_result(match_start, match_end, components, end_components));
240 start = match_end;
241 }
242
243 Ok(results)
244 }
245}