Skip to main content

whichtime_sys/parsers/en/
month_name.rs

1//! Month name parser: "January 15, 2024", "15 January 2024", etc.
2
3use crate::components::Component;
4use crate::context::ParsingContext;
5use crate::dictionaries::en::{get_month, parse_ordinal_pattern};
6use crate::error::Result;
7use crate::parsers::Parser;
8use crate::results::ParsedResult;
9use crate::scanner::TokenType;
10use chrono::Datelike;
11use regex::Regex;
12use std::sync::LazyLock;
13
14// Month name patterns
15static MONTH_NAME_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
16    Regex::new(
17        r"(?i)(?:^|\W)((?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?))\s*(?:(\d{1,2})(?:st|nd|rd|th)?\s*,?\s*)?(\d{2,4})?"
18    ).unwrap()
19});
20
21// Little endian: "15 January 2024" or "15th January 2024"
22static LITTLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(
24        r"(?i)(?:^|\W)(\d{1,2})(?:st|nd|rd|th)?\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*,?\s*(\d{2,4})?"
25    ).unwrap()
26});
27
28/// Parser for English month-name date expressions.
29pub struct MonthNameParser;
30
31impl Parser for MonthNameParser {
32    fn name(&self) -> &'static str {
33        "MonthNameParser"
34    }
35
36    fn should_apply(&self, context: &ParsingContext) -> bool {
37        context.has_token_type(TokenType::Month)
38    }
39
40    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
41        let mut results = Vec::new();
42        let ref_date = context.reference.instant;
43
44        // Try little endian first (15 January 2024)
45        for mat in LITTLE_ENDIAN_PATTERN.find_iter(context.text) {
46            let matched_text = mat.as_str();
47            let index = mat.start();
48
49            let Some(caps) = LITTLE_ENDIAN_PATTERN.captures(matched_text) else {
50                continue;
51            };
52
53            let day: i32 = caps
54                .get(1)
55                .and_then(|m| m.as_str().parse().ok())
56                .unwrap_or(0);
57            let month_str = caps
58                .get(2)
59                .map(|m| m.as_str().to_lowercase())
60                .unwrap_or_default();
61            let year_str = caps.get(3).map(|m| m.as_str());
62
63            let Some(month) = get_month(&month_str) else {
64                continue;
65            };
66
67            let year = if let Some(y) = year_str {
68                parse_year(y)
69            } else {
70                ref_date.year()
71            };
72
73            if !(1..=31).contains(&day) {
74                continue;
75            }
76
77            let mut components = context.create_components();
78            components.assign(Component::Year, year);
79            components.assign(Component::Month, month as i32);
80            components.assign(Component::Day, day);
81
82            if !components.is_valid_date() {
83                continue;
84            }
85
86            // Find actual text bounds
87            let actual_start = matched_text
88                .find(|c: char| c.is_alphanumeric())
89                .unwrap_or(0);
90            results.push(context.create_result(
91                index + actual_start,
92                index + matched_text.len(),
93                components,
94                None,
95            ));
96        }
97
98        // Try month-first pattern (January 15, 2024)
99        for mat in MONTH_NAME_PATTERN.find_iter(context.text) {
100            let matched_text = mat.as_str();
101            let index = mat.start();
102
103            // Skip if we already matched this region
104            if results
105                .iter()
106                .any(|r| r.index <= index && r.end_index > index)
107            {
108                continue;
109            }
110
111            let Some(caps) = MONTH_NAME_PATTERN.captures(matched_text) else {
112                continue;
113            };
114
115            let month_str = caps
116                .get(1)
117                .map(|m| m.as_str().to_lowercase())
118                .unwrap_or_default();
119            let day_str = caps.get(2).map(|m| m.as_str());
120            let year_str = caps.get(3).map(|m| m.as_str());
121
122            let Some(month) = get_month(&month_str) else {
123                continue;
124            };
125
126            let day = if let Some(d) = day_str {
127                parse_ordinal_pattern(d).unwrap_or(1) as i32
128            } else {
129                1 // Default to 1st of month
130            };
131
132            let year = if let Some(y) = year_str {
133                parse_year(y)
134            } else {
135                // Determine year based on whether month is in future or past
136                let current_month = ref_date.month() as i32;
137                if (month as i32) < current_month {
138                    ref_date.year() + 1
139                } else {
140                    ref_date.year()
141                }
142            };
143
144            if !(1..=31).contains(&day) {
145                continue;
146            }
147
148            let mut components = context.create_components();
149            components.assign(Component::Year, year);
150            components.assign(Component::Month, month as i32);
151            components.assign(Component::Day, day);
152
153            if !components.is_valid_date() {
154                continue;
155            }
156
157            let actual_start = matched_text
158                .find(|c: char| c.is_alphanumeric())
159                .unwrap_or(0);
160            results.push(context.create_result(
161                index + actual_start,
162                index + matched_text.len(),
163                components,
164                None,
165            ));
166        }
167
168        Ok(results)
169    }
170}
171
172fn parse_year(s: &str) -> i32 {
173    let year: i32 = s.parse().unwrap_or(0);
174    if year < 100 {
175        if year > 50 { 1900 + year } else { 2000 + year }
176    } else {
177        year
178    }
179}