Skip to main content

whichtime_sys/parsers/uk/
month_name.rs

1//! Ukrainian month name parser
2//!
3//! Handles Ukrainian date expressions with month names like:
4//! - "10 серпня 2012"
5//! - "3 лют 82" (abbreviated month)
6//! - "10.08.2012" (European dot format)
7//! - "Четвер, 10 січня" (weekday + date)
8//! - "10 - 22 серпня 2012" (date ranges)
9//! - "неділя, 7 грудня 2014" (weekday + date)
10
11use crate::components::Component;
12use crate::context::ParsingContext;
13use crate::dictionaries::uk::{get_month, get_weekday};
14use crate::error::Result;
15use crate::parsers::Parser;
16use crate::results::ParsedResult;
17use chrono::Datelike;
18use fancy_regex::Regex;
19use std::sync::LazyLock;
20
21// Pattern for "DD.MM.YYYY" European format
22static DOT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(r"(?<![0-9])(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4}|\d{2})(?![0-9])")
24        .unwrap()
25});
26
27// Pattern for dates with month names
28static MONTH_NAME_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(
30        r"(?ix)
31        (?:
32            (?P<weekday>понеділок|вівторок|середа|середу|четвер|п'ятниця|п'ятницю|пятниця|субота|суботу|неділя|неділю|пн|вт|ср|чт|пт|сб|нд)(?:\.|,)?\s*
33            (?:,\s*)?
34        )?
35        (?P<day>\d{1,2})
36        (?:
37            \s*(?:-|–|до)\s*
38            (?P<end_day>\d{1,2})
39        )?
40        \s+
41        (?P<month>січня?|лютого?|лют\.?|березня?|бер\.?|квітня?|квіт\.?|травня?|трав\.?|червня?|черв\.?|липня?|лип\.?|серпня?|серп\.?|вересня?|вер\.?|жовтня?|жовт\.?|листопада?|лист\.?|грудня?|груд\.?)
42        (?:
43            \s+
44            (?P<year>\d{4}|\d{2})
45            (?:\s*р\.?)?
46        )?
47        (?![а-яА-ЯіїєґІЇЄҐ])"
48    ).unwrap()
49});
50
51/// Ukrainian month name parser
52pub struct UKMonthNameParser;
53
54impl UKMonthNameParser {
55    pub fn new() -> Self {
56        Self
57    }
58}
59
60impl Default for UKMonthNameParser {
61    fn default() -> Self {
62        Self::new()
63    }
64}
65
66impl Parser for UKMonthNameParser {
67    fn name(&self) -> &'static str {
68        "UKMonthNameParser"
69    }
70
71    fn should_apply(&self, _context: &ParsingContext) -> bool {
72        true
73    }
74
75    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
76        let mut results = Vec::new();
77        let ref_date = context.reference.instant;
78
79        // Parse DD.MM.YYYY format first
80        let mut start = 0;
81        while start < context.text.len() {
82            let search_text = &context.text[start..];
83            let captures = match DOT_PATTERN.captures(search_text) {
84                Ok(Some(caps)) => caps,
85                Ok(None) => break,
86                Err(_) => break,
87            };
88
89            let full_match = match captures.get(0) {
90                Some(m) => m,
91                None => break,
92            };
93
94            let match_start = start + full_match.start();
95            let match_end = start + full_match.end();
96
97            let day: i32 = captures
98                .name("day")
99                .and_then(|m| m.as_str().parse().ok())
100                .unwrap_or(0);
101            let month: i32 = captures
102                .name("month")
103                .and_then(|m| m.as_str().parse().ok())
104                .unwrap_or(0);
105            let year_str = captures.name("year").map(|m| m.as_str());
106
107            if !(1..=31).contains(&day) || !(1..=12).contains(&month) {
108                start = match_end;
109                continue;
110            }
111
112            let mut components = context.create_components();
113
114            if let Some(y) = year_str {
115                let mut year: i32 = y.parse().unwrap_or(ref_date.year());
116                if year < 100 {
117                    year = if year > 50 { 1900 + year } else { 2000 + year };
118                }
119                components.assign(Component::Year, year);
120            } else {
121                components.imply(Component::Year, ref_date.year());
122            }
123
124            components.assign(Component::Month, month);
125            components.assign(Component::Day, day);
126
127            if components.is_valid_date() {
128                results.push(context.create_result(match_start, match_end, components, None));
129            }
130
131            start = match_end;
132        }
133
134        // Parse month name patterns
135        start = 0;
136        while start < context.text.len() {
137            let search_text = &context.text[start..];
138            let captures = match MONTH_NAME_PATTERN.captures(search_text) {
139                Ok(Some(caps)) => caps,
140                Ok(None) => break,
141                Err(_) => break,
142            };
143
144            let full_match = match captures.get(0) {
145                Some(m) => m,
146                None => break,
147            };
148
149            let match_start = start + full_match.start();
150            let match_end = start + full_match.end();
151
152            // Skip if overlaps with dot pattern results
153            let overlaps = results.iter().any(|r| {
154                (match_start >= r.index && match_start < r.index + r.text.len())
155                    || (r.index >= match_start && r.index < match_end)
156            });
157            if overlaps {
158                start = match_end;
159                continue;
160            }
161
162            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
163            let day: i32 = captures
164                .name("day")
165                .and_then(|m| m.as_str().parse().ok())
166                .unwrap_or(0);
167            let month_str = captures
168                .name("month")
169                .map(|m| m.as_str().to_lowercase())
170                .unwrap_or_default();
171            let year_str = captures.name("year").map(|m| m.as_str());
172            let end_day_str = captures.name("end_day").map(|m| m.as_str());
173
174            // Parse month - remove trailing dots
175            let clean_month = month_str.trim_end_matches('.');
176            let month = get_month(clean_month).unwrap_or(0);
177
178            if month == 0 || !(1..=31).contains(&day) {
179                start = match_end;
180                continue;
181            }
182
183            let mut components = context.create_components();
184
185            // Parse year
186            if let Some(y) = year_str {
187                let mut year: i32 = y.parse().unwrap_or(ref_date.year());
188                if year < 100 {
189                    year = if year > 50 { 1900 + year } else { 2000 + year };
190                }
191                components.assign(Component::Year, year);
192            } else {
193                components.imply(Component::Year, ref_date.year());
194            }
195
196            components.assign(Component::Month, month as i32);
197            components.assign(Component::Day, day);
198
199            // Add weekday if present
200            if let Some(ref wd_str) = weekday_str {
201                let clean_wd = wd_str.trim_end_matches('.').trim_end_matches(',');
202                if let Some(weekday) = get_weekday(clean_wd) {
203                    components.assign(Component::Weekday, weekday as i32);
204                }
205            }
206
207            if !components.is_valid_date() {
208                start = match_end;
209                continue;
210            }
211
212            // Handle end date for ranges
213            let end_components = if let Some(end_day_text) = end_day_str {
214                let end_day: i32 = end_day_text.parse().unwrap_or(0);
215                if end_day > 0 && end_day <= 31 {
216                    let mut end_comp = context.create_components();
217                    if let Some(start_year) = components.get(Component::Year) {
218                        if year_str.is_some() {
219                            end_comp.assign(Component::Year, start_year);
220                        } else {
221                            end_comp.imply(Component::Year, start_year);
222                        }
223                    }
224                    end_comp.assign(Component::Month, month as i32);
225                    end_comp.assign(Component::Day, end_day);
226
227                    if end_comp.is_valid_date() {
228                        Some(end_comp)
229                    } else {
230                        None
231                    }
232                } else {
233                    None
234                }
235            } else {
236                None
237            };
238
239            results.push(context.create_result(match_start, match_end, components, end_components));
240            start = match_end;
241        }
242
243        Ok(results)
244    }
245}