use crate::components::Component;
use crate::context::ParsingContext;
use crate::dictionaries::ru::{get_month, get_weekday, parse_ordinal_pattern};
use crate::error::Result;
use crate::parsers::Parser;
use crate::results::ParsedResult;
use chrono::Datelike;
use fancy_regex::Regex;
use std::sync::LazyLock;
static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?ix)
(?:
(?P<weekday>понедельник|вторник|среда|четверг|пятница|суббота|воскресенье|пн|вт|ср|чт|пт|сб|вс|вск)(?:\.|,)?\s*
(?:,\s*)?
)?
(?:
(?P<ordinal_day>первое|второе|третье|четвертое|пятое|шестое|седьмое|восьмое|девятое|десятое|[\w]+oe|[\w]+ье|[\w]+ое)\s+
|
(?P<day>\d{1,2})(?:\.|,|\s+)?
)
(?:
(?:(?:\-|\–|по|до)\s*(?P<end_day>\d{1,2})(?:\.|,|\s+)?)?
)
(?P<month>января?|февраля?|марта?|апреля?|мая|июня?|июля?|августа?|сентября?|октября?|ноября?|декабря?|янв\.?|фев\.?|мар\.?|апр\.?|май|июн\.?|июл\.?|авг\.?|сен\.?|окт\.?|ноя\.?|дек\.?|01|02|03|04|05|06|07|08|09|10|11|12)(?:\.|,|\s+)?
(?:
(?:\s*[\-/,]?\s*)?
(?:(?P<year>\d{1,4})|(?P<year_short>'\d{2}))?
(?:\s*(?:г\.?|года?))?
)?
(?=\W|$)"
).unwrap()
});
static MONTH_YEAR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?ix)
(?P<month>январь|февраль|март|апрель|май|июнь|июль|август|сентябрь|октябрь|ноябрь|декабрь)\s+
(?P<year>\d{4})"
).unwrap()
});
pub struct RUMonthNameParser;
impl RUMonthNameParser {
pub fn new() -> Self {
Self
}
}
impl Default for RUMonthNameParser {
fn default() -> Self {
Self::new()
}
}
impl Parser for RUMonthNameParser {
fn name(&self) -> &'static str {
"RUMonthNameParser"
}
fn should_apply(&self, _context: &ParsingContext) -> bool {
true
}
fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
let mut results = Vec::new();
let ref_date = context.reference.instant;
let mut start = 0;
while start < context.text.len() {
let search_text = &context.text[start..];
let captures = match MONTH_YEAR_PATTERN.captures(search_text) {
Ok(Some(caps)) => caps,
Ok(None) => break,
Err(_) => break,
};
let full_match = match captures.get(0) {
Some(m) => m,
None => break,
};
let match_start = start + full_match.start();
let match_end = start + full_match.end();
let month_str = captures
.name("month")
.map(|m| m.as_str().to_lowercase())
.unwrap_or_default();
let year_str = captures.name("year").map(|m| m.as_str());
if let Some(month) = get_month(&month_str)
&& let Some(year) = year_str.and_then(|y| y.parse::<i32>().ok())
{
let mut components = context.create_components();
components.assign(Component::Year, year);
components.assign(Component::Month, month as i32);
components.assign(Component::Day, 1);
results.push(context.create_result(match_start, match_end, components, None));
}
start = match_end;
}
start = 0;
while start < context.text.len() {
let search_text = &context.text[start..];
let captures = match PATTERN.captures(search_text) {
Ok(Some(caps)) => caps,
Ok(None) => break,
Err(_) => break,
};
let full_match = match captures.get(0) {
Some(m) => m,
None => break,
};
let match_start = start + full_match.start();
let match_end = start + full_match.end();
let overlaps = results.iter().any(|r| {
(match_start >= r.index && match_start < r.index + r.text.len())
|| (r.index >= match_start && r.index < match_end)
});
if overlaps {
start = match_end;
continue;
}
let weekday_str = captures.name("weekday").map(|m| m.as_str().to_lowercase());
let day_str = captures.name("day").map(|m| m.as_str());
let ordinal_day_str = captures.name("ordinal_day").map(|m| m.as_str());
let month_str = captures
.name("month")
.map(|m| m.as_str().to_lowercase())
.unwrap_or_default();
let year_str = captures.name("year").map(|m| m.as_str());
let year_short_str = captures.name("year_short").map(|m| m.as_str());
let end_day_str = captures.name("end_day").map(|m| m.as_str());
let day = if let Some(d) = day_str {
d.parse::<i32>().unwrap_or(1)
} else if let Some(od) = ordinal_day_str {
parse_ordinal_pattern(od).map(|v| v as i32).unwrap_or(1)
} else {
1
};
let month = if let Ok(m_num) = month_str.trim_end_matches('.').parse::<u32>() {
m_num
} else {
get_month(month_str.trim_end_matches('.')).unwrap_or(0)
};
if month == 0 {
start = match_end;
continue;
}
let mut components = context.create_components();
if let Some(y) = year_str {
let mut year: i32 = y.parse().unwrap_or(ref_date.year());
if year < 100 {
year = if year > 50 { 1900 + year } else { 2000 + year };
}
components.assign(Component::Year, year);
} else if let Some(y_short) = year_short_str {
let val: i32 = y_short.trim_start_matches('\'').parse().unwrap_or(0);
let year = if val > 50 { 1900 + val } else { 2000 + val };
components.assign(Component::Year, year);
} else {
components.imply(Component::Year, ref_date.year());
}
components.assign(Component::Month, month as i32);
components.assign(Component::Day, day);
if let Some(ref wd_str) = weekday_str {
let clean_wd = wd_str.trim_end_matches('.').trim_end_matches(',');
if let Some(weekday) = get_weekday(clean_wd) {
components.assign(Component::Weekday, weekday as i32);
}
}
if !components.is_valid_date() {
start = match_end;
continue;
}
let end_components = if let Some(end_day_text) = end_day_str {
let end_day: i32 = end_day_text.parse().unwrap_or(0);
if end_day > 0 && end_day <= 31 {
let mut end_comp = context.create_components();
if let Some(start_year) = components.get(Component::Year) {
if year_str.is_some() || year_short_str.is_some() {
end_comp.assign(Component::Year, start_year);
} else {
end_comp.imply(Component::Year, start_year);
}
}
end_comp.assign(Component::Month, month as i32);
end_comp.assign(Component::Day, end_day);
if end_comp.is_valid_date() {
Some(end_comp)
} else {
None
}
} else {
None
}
} else {
None
};
results.push(context.create_result(match_start, match_end, components, end_components));
start = match_end;
}
Ok(results)
}
}