Skip to main content

whichtime_sys/parsers/ru/
month_name.rs

1//! Russian month name little endian parser
2//!
3//! Handles Russian date expressions with month names like:
4//! - "10 января 2012"
5//! - "10.08.2012" (dot separator)
6//! - "10 - 22 августа 2012" (date ranges)
7//! - "четверг, 10 января"
8
9use crate::components::Component;
10use crate::context::ParsingContext;
11use crate::dictionaries::ru::{get_month, get_weekday, parse_ordinal_pattern};
12use crate::error::Result;
13use crate::parsers::Parser;
14use crate::results::ParsedResult;
15use chrono::Datelike;
16use fancy_regex::Regex;
17use std::sync::LazyLock;
18
19// Main pattern for dates like "10 августа 2012", "Четверг, 10 января"
20static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
21    Regex::new(
22        r"(?ix)
23        (?:
24            (?P<weekday>понедельник|вторник|среда|четверг|пятница|суббота|воскресенье|пн|вт|ср|чт|пт|сб|вс|вск)(?:\.|,)?\s*
25            (?:,\s*)?
26        )?
27        (?:
28            (?P<ordinal_day>первое|второе|третье|четвертое|пятое|шестое|седьмое|восьмое|девятое|десятое|[\w]+oe|[\w]+ье|[\w]+ое)\s+
29            |
30            (?P<day>\d{1,2})(?:\.|,|\s+)?
31        )
32        (?:
33            (?:(?:\-|\–|по|до)\s*(?P<end_day>\d{1,2})(?:\.|,|\s+)?)?
34        )
35        (?P<month>января?|февраля?|марта?|апреля?|мая|июня?|июля?|августа?|сентября?|октября?|ноября?|декабря?|янв\.?|фев\.?|мар\.?|апр\.?|май|июн\.?|июл\.?|авг\.?|сен\.?|окт\.?|ноя\.?|дек\.?|01|02|03|04|05|06|07|08|09|10|11|12)(?:\.|,|\s+)?
36        (?:
37            (?:\s*[\-/,]?\s*)?
38            (?:(?P<year>\d{1,4})|(?P<year_short>'\d{2}))?
39            (?:\s*(?:г\.?|года?))?
40        )?
41        (?=\W|$)"
42    ).unwrap()
43});
44
45// Pattern for "Month Year" format like "Сентябрь 2012"
46static MONTH_YEAR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r"(?ix)
49        (?P<month>январь|февраль|март|апрель|май|июнь|июль|август|сентябрь|октябрь|ноябрь|декабрь)\s+
50        (?P<year>\d{4})"
51    ).unwrap()
52});
53
54/// Russian month name parser
55pub struct RUMonthNameParser;
56
57impl RUMonthNameParser {
58    pub fn new() -> Self {
59        Self
60    }
61}
62
63impl Default for RUMonthNameParser {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl Parser for RUMonthNameParser {
70    fn name(&self) -> &'static str {
71        "RUMonthNameParser"
72    }
73
74    fn should_apply(&self, _context: &ParsingContext) -> bool {
75        true
76    }
77
78    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
79        let mut results = Vec::new();
80        let ref_date = context.reference.instant;
81
82        // First, try month-year pattern (e.g., "Сентябрь 2012")
83        let mut start = 0;
84        while start < context.text.len() {
85            let search_text = &context.text[start..];
86            let captures = match MONTH_YEAR_PATTERN.captures(search_text) {
87                Ok(Some(caps)) => caps,
88                Ok(None) => break,
89                Err(_) => break,
90            };
91
92            let full_match = match captures.get(0) {
93                Some(m) => m,
94                None => break,
95            };
96
97            let match_start = start + full_match.start();
98            let match_end = start + full_match.end();
99
100            let month_str = captures
101                .name("month")
102                .map(|m| m.as_str().to_lowercase())
103                .unwrap_or_default();
104            let year_str = captures.name("year").map(|m| m.as_str());
105
106            if let Some(month) = get_month(&month_str)
107                && let Some(year) = year_str.and_then(|y| y.parse::<i32>().ok())
108            {
109                let mut components = context.create_components();
110                components.assign(Component::Year, year);
111                components.assign(Component::Month, month as i32);
112                components.assign(Component::Day, 1);
113
114                results.push(context.create_result(match_start, match_end, components, None));
115            }
116
117            start = match_end;
118        }
119
120        // Then, try the main pattern for day-month-year
121        start = 0;
122        while start < context.text.len() {
123            let search_text = &context.text[start..];
124            let captures = match PATTERN.captures(search_text) {
125                Ok(Some(caps)) => caps,
126                Ok(None) => break,
127                Err(_) => break,
128            };
129
130            let full_match = match captures.get(0) {
131                Some(m) => m,
132                None => break,
133            };
134
135            let match_start = start + full_match.start();
136            let match_end = start + full_match.end();
137
138            // Skip if this match overlaps with a month-year result
139            let overlaps = results.iter().any(|r| {
140                (match_start >= r.index && match_start < r.index + r.text.len())
141                    || (r.index >= match_start && r.index < match_end)
142            });
143            if overlaps {
144                start = match_end;
145                continue;
146            }
147
148            let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
149            let day_str = captures.name("day").map(|m| m.as_str());
150            let ordinal_day_str = captures.name("ordinal_day").map(|m| m.as_str());
151            let month_str = captures
152                .name("month")
153                .map(|m| m.as_str().to_lowercase())
154                .unwrap_or_default();
155            let year_str = captures.name("year").map(|m| m.as_str());
156            let year_short_str = captures.name("year_short").map(|m| m.as_str());
157            let end_day_str = captures.name("end_day").map(|m| m.as_str());
158
159            // Parse day
160            let day = if let Some(d) = day_str {
161                d.parse::<i32>().unwrap_or(1)
162            } else if let Some(od) = ordinal_day_str {
163                parse_ordinal_pattern(od).map(|v| v as i32).unwrap_or(1)
164            } else {
165                1
166            };
167
168            // Parse month
169            let month = if let Ok(m_num) = month_str.trim_end_matches('.').parse::<u32>() {
170                m_num
171            } else {
172                get_month(month_str.trim_end_matches('.')).unwrap_or(0)
173            };
174
175            if month == 0 {
176                start = match_end;
177                continue;
178            }
179
180            let mut components = context.create_components();
181
182            // Parse year
183            if let Some(y) = year_str {
184                let mut year: i32 = y.parse().unwrap_or(ref_date.year());
185                if year < 100 {
186                    year = if year > 50 { 1900 + year } else { 2000 + year };
187                }
188                components.assign(Component::Year, year);
189            } else if let Some(y_short) = year_short_str {
190                let val: i32 = y_short.trim_start_matches('\'').parse().unwrap_or(0);
191                let year = if val > 50 { 1900 + val } else { 2000 + val };
192                components.assign(Component::Year, year);
193            } else {
194                components.imply(Component::Year, ref_date.year());
195            }
196
197            components.assign(Component::Month, month as i32);
198            components.assign(Component::Day, day);
199
200            if let Some(ref wd_str) = weekday_str {
201                // Clean up dots/commas
202                let clean_wd = wd_str.trim_end_matches('.').trim_end_matches(',');
203                if let Some(weekday) = get_weekday(clean_wd) {
204                    components.assign(Component::Weekday, weekday as i32);
205                }
206            }
207
208            if !components.is_valid_date() {
209                start = match_end;
210                continue;
211            }
212
213            // Handle end date for ranges
214            let end_components = if let Some(end_day_text) = end_day_str {
215                let end_day: i32 = end_day_text.parse().unwrap_or(0);
216                if end_day > 0 && end_day <= 31 {
217                    let mut end_comp = context.create_components();
218                    if let Some(start_year) = components.get(Component::Year) {
219                        // If year was explicit, copy it. If implied, imply it.
220                        if year_str.is_some() || year_short_str.is_some() {
221                            end_comp.assign(Component::Year, start_year);
222                        } else {
223                            end_comp.imply(Component::Year, start_year);
224                        }
225                    }
226                    end_comp.assign(Component::Month, month as i32);
227                    end_comp.assign(Component::Day, end_day);
228
229                    if end_comp.is_valid_date() {
230                        Some(end_comp)
231                    } else {
232                        None
233                    }
234                } else {
235                    None
236                }
237            } else {
238                None
239            };
240
241            results.push(context.create_result(match_start, match_end, components, end_components));
242            start = match_end;
243        }
244
245        Ok(results)
246    }
247}