Skip to main content

whichtime_sys/parsers/es/
month_name.rs

1//! Spanish month name parser
2//!
3//! Handles Spanish date formats with month names like:
4//! - "10 Agosto 2012"
5//! - "10 de Agosto de 2012"
6//! - "10 - 22 Agosto 2012" (ranges)
7//! - "10 a 22 Agosto 2012" (ranges with "a")
8//! - "10 Agosto 234 AC" (year suffixes)
9//! - "Dom 15Sep" (abbreviated weekday + date)
10//! - "martes, 10 de enero" (weekday prefix)
11
12use crate::components::Component;
13use crate::context::ParsingContext;
14use crate::dictionaries::es as dict;
15use crate::error::Result;
16use crate::parsers::Parser;
17use crate::results::ParsedResult;
18use crate::scanner::TokenType;
19use chrono::Datelike;
20use fancy_regex::Regex;
21use std::sync::LazyLock;
22
23// Main pattern for date with month name (with optional "el" that won't be captured)
24static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
25    Regex::new(
26        r"(?i)(?:el\s+)?(?:(?P<weekday>lunes|martes|miércoles|miercoles|jueves|viernes|sábado|sabado|domingo|lun|mar|mié|mie|jue|vie|sáb|sab|dom)[,\s]+)?(?P<day>\d{1,2})(?:º|ª|°)?(?:\s*(?:de|desde|-|–|a)\s*(?P<end_day>\d{1,2})(?:º|ª|°)?)?\s*(?:de\s+)?(?P<month>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre|ene|feb|mar|abr|may|jun|jul|ago|sep|set|oct|nov|dic)(?:\s*(?:de|,)?\s*(?P<year>\d{1,4}))?(?:\s*(?P<era>a\.?\s*c\.?|d\.?\s*c\.?|a\.?\s*d\.?))?"
27    ).unwrap()
28});
29
30// Pattern for abbreviated weekday + day + month (Dom 15Sep, DOM 15SEP)
31static ABBREV_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(
33        r"(?i)(?P<weekday>lun\.?|mar\.?|mié\.?|mie\.?|jue\.?|vie\.?|sáb\.?|sab\.?|dom\.?)\s*(?P<day>\d{1,2})\s*(?P<month>ene\.?|feb\.?|mar\.?|abr\.?|may\.?|jun\.?|jul\.?|ago\.?|sep\.?|set\.?|oct\.?|nov\.?|dic\.?)"
34    ).unwrap()
35});
36
37// Pattern for cross-month ranges
38static RANGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
39    Regex::new(
40        r"(?i)(?P<start_day>\d{1,2})\s*(?P<start_month>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre|ene|feb|mar|abr|may|jun|jul|ago|sep|set|oct|nov|dic)\s*(?:-|–|a)\s*(?P<end_day>\d{1,2})\s*(?P<end_month>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre|ene|feb|mar|abr|may|jun|jul|ago|sep|set|oct|nov|dic)(?:\s*(?:de|,)?\s*(?P<year>\d{1,4}))?"
41    ).unwrap()
42});
43
44/// Spanish month name parser
45pub struct ESMonthNameParser;
46
47impl ESMonthNameParser {
48    pub fn new() -> Self {
49        Self
50    }
51
52    fn parse_year_with_era(year_str: &str, era_str: Option<&str>) -> i32 {
53        let mut year: i32 = year_str.parse().unwrap_or(0);
54
55        // Check if era is present - if so, don't convert 2-digit years
56        let has_era = era_str.is_some() && !era_str.unwrap().trim().is_empty();
57
58        // Handle 2-digit years only if no era suffix
59        if !has_era && year < 100 {
60            year = if year > 50 { 1900 + year } else { 2000 + year };
61        }
62
63        // Handle era suffixes
64        if let Some(era) = era_str {
65            let era_lower = era.to_lowercase().replace(['.', ' '], "");
66            if era_lower == "ac" {
67                year = -year;
68            }
69            // "d.c." and "a.d." keep positive year
70        }
71
72        year
73    }
74}
75
76impl Parser for ESMonthNameParser {
77    fn name(&self) -> &'static str {
78        "ESMonthNameParser"
79    }
80
81    fn should_apply(&self, context: &ParsingContext) -> bool {
82        // Check for month token or digit patterns that might contain abbreviated months
83        context.has_token_type(TokenType::Month) || context.text.bytes().any(|b| b.is_ascii_digit())
84    }
85
86    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
87        let mut results = Vec::new();
88        let ref_date = context.reference.instant;
89
90        // Try cross-month range pattern first
91        let mut start = 0;
92        while start < context.text.len() {
93            let search_text = &context.text[start..];
94
95            if let Ok(Some(caps)) = RANGE_PATTERN.captures(search_text)
96                && let (Some(sd), Some(sm), Some(ed), Some(em)) = (
97                    caps.name("start_day"),
98                    caps.name("start_month"),
99                    caps.name("end_day"),
100                    caps.name("end_month"),
101                )
102            {
103                let start_day: u32 = sd.as_str().parse().unwrap_or(0);
104                let start_month = dict::get_month(&sm.as_str().to_lowercase()).unwrap_or(0);
105                let end_day: u32 = ed.as_str().parse().unwrap_or(0);
106                let end_month = dict::get_month(&em.as_str().to_lowercase()).unwrap_or(0);
107
108                if start_day > 0
109                    && start_day <= 31
110                    && start_month > 0
111                    && end_day > 0
112                    && end_day <= 31
113                    && end_month > 0
114                {
115                    let year = caps
116                        .name("year")
117                        .map(|y| Self::parse_year_with_era(y.as_str(), None))
118                        .unwrap_or(ref_date.year());
119
120                    let full_match = caps.get(0).unwrap();
121                    let match_start = start + full_match.start();
122                    let match_end = start + full_match.end();
123
124                    let mut components = context.create_components();
125                    components.assign(Component::Year, year);
126                    components.assign(Component::Month, start_month as i32);
127                    components.assign(Component::Day, start_day as i32);
128
129                    let mut end_comp = context.create_components();
130                    end_comp.assign(Component::Year, year);
131                    end_comp.assign(Component::Month, end_month as i32);
132                    end_comp.assign(Component::Day, end_day as i32);
133
134                    results.push(context.create_result(
135                        match_start,
136                        match_end,
137                        components,
138                        Some(end_comp),
139                    ));
140                    start = match_end;
141                    continue;
142                }
143            }
144
145            // Try abbreviated weekday pattern
146            if let Ok(Some(caps)) = ABBREV_PATTERN.captures(search_text)
147                && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
148            {
149                let day: u32 = d.as_str().parse().unwrap_or(0);
150                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
151
152                if day > 0 && day <= 31 && month > 0 {
153                    let full_match = caps.get(0).unwrap();
154                    let match_start = start + full_match.start();
155                    let match_end = start + full_match.end();
156
157                    let mut components = context.create_components();
158                    components.imply(Component::Year, ref_date.year());
159                    components.assign(Component::Month, month as i32);
160                    components.assign(Component::Day, day as i32);
161
162                    // Add weekday if present
163                    if let Some(wd) = caps.name("weekday")
164                        && let Some(weekday) = dict::get_weekday(&wd.as_str().to_lowercase())
165                    {
166                        components.assign(Component::Weekday, weekday as i32);
167                    }
168
169                    results.push(context.create_result(match_start, match_end, components, None));
170                    start = match_end;
171                    continue;
172                }
173            }
174
175            // Try main pattern
176            if let Ok(Some(caps)) = PATTERN.captures(search_text)
177                && let (Some(d), Some(m)) = (caps.name("day"), caps.name("month"))
178            {
179                let day: u32 = d.as_str().parse().unwrap_or(0);
180                let month = dict::get_month(&m.as_str().to_lowercase()).unwrap_or(0);
181
182                if day > 0 && day <= 31 && month > 0 {
183                    let full_match = caps.get(0).unwrap();
184                    let matched_text = full_match.as_str();
185
186                    // Skip "el " prefix if present
187                    let prefix_offset = if matched_text.to_lowercase().starts_with("el ") {
188                        3
189                    } else {
190                        0
191                    };
192
193                    let match_start = start + full_match.start() + prefix_offset;
194                    let match_end = start + full_match.end();
195
196                    let year = caps.name("year").map(|y| {
197                        let era = caps.name("era").map(|e| e.as_str());
198                        Self::parse_year_with_era(y.as_str(), era)
199                    });
200
201                    let mut components = context.create_components();
202                    let has_weekday = caps.name("weekday").is_some();
203
204                    if let Some(y) = year {
205                        components.assign(Component::Year, y);
206                    } else if has_weekday {
207                        // When weekday is specified, assign year to prevent ForwardDateRefiner from moving it
208                        components.assign(Component::Year, ref_date.year());
209                    } else {
210                        components.imply(Component::Year, ref_date.year());
211                    }
212                    components.assign(Component::Month, month as i32);
213                    components.assign(Component::Day, day as i32);
214
215                    // Add weekday if present
216                    if let Some(wd) = caps.name("weekday")
217                        && let Some(weekday) = dict::get_weekday(&wd.as_str().to_lowercase())
218                    {
219                        components.assign(Component::Weekday, weekday as i32);
220                    }
221
222                    // Handle date range within same month
223                    let end_comp = if let Some(end_day_match) = caps.name("end_day") {
224                        let end_day: u32 = end_day_match.as_str().parse().unwrap_or(0);
225                        if end_day > 0 && end_day <= 31 {
226                            let mut ec = context.create_components();
227                            if let Some(y) = year {
228                                ec.assign(Component::Year, y);
229                            } else {
230                                ec.imply(Component::Year, ref_date.year());
231                            }
232                            ec.assign(Component::Month, month as i32);
233                            ec.assign(Component::Day, end_day as i32);
234                            Some(ec)
235                        } else {
236                            None
237                        }
238                    } else {
239                        None
240                    };
241
242                    results.push(context.create_result(
243                        match_start,
244                        match_end,
245                        components,
246                        end_comp,
247                    ));
248                    start = match_end;
249                    continue;
250                }
251            }
252
253            // No match at this position - advance to next character (UTF-8 safe)
254            if let Some(c) = search_text.chars().next() {
255                start += c.len_utf8();
256            } else {
257                break;
258            }
259        }
260
261        Ok(results)
262    }
263}
264
265impl Default for ESMonthNameParser {
266    fn default() -> Self {
267        Self::new()
268    }
269}