whichtime_sys/parsers/en/
month_name.rs1use crate::components::Component;
4use crate::context::ParsingContext;
5use crate::dictionaries::en::{get_month, parse_ordinal_pattern};
6use crate::error::Result;
7use crate::parsers::Parser;
8use crate::results::ParsedResult;
9use crate::scanner::TokenType;
10use chrono::Datelike;
11use regex::Regex;
12use std::sync::LazyLock;
13
14static MONTH_NAME_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
16 Regex::new(
17 r"(?i)(?:^|\W)((?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?))\s*(?:(\d{1,2})(?:st|nd|rd|th)?\s*,?\s*)?(\d{2,4})?"
18 ).unwrap()
19});
20
21static LITTLE_ENDIAN_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(
24 r"(?i)(?:^|\W)(\d{1,2})(?:st|nd|rd|th)?\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*,?\s*(\d{2,4})?"
25 ).unwrap()
26});
27
28pub struct MonthNameParser;
30
31impl Parser for MonthNameParser {
32 fn name(&self) -> &'static str {
33 "MonthNameParser"
34 }
35
36 fn should_apply(&self, context: &ParsingContext) -> bool {
37 context.has_token_type(TokenType::Month)
38 }
39
40 fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
41 let mut results = Vec::new();
42 let ref_date = context.reference.instant;
43
44 for mat in LITTLE_ENDIAN_PATTERN.find_iter(context.text) {
46 let matched_text = mat.as_str();
47 let index = mat.start();
48
49 let Some(caps) = LITTLE_ENDIAN_PATTERN.captures(matched_text) else {
50 continue;
51 };
52
53 let day: i32 = caps
54 .get(1)
55 .and_then(|m| m.as_str().parse().ok())
56 .unwrap_or(0);
57 let month_str = caps
58 .get(2)
59 .map(|m| m.as_str().to_lowercase())
60 .unwrap_or_default();
61 let year_str = caps.get(3).map(|m| m.as_str());
62
63 let Some(month) = get_month(&month_str) else {
64 continue;
65 };
66
67 let year = if let Some(y) = year_str {
68 parse_year(y)
69 } else {
70 ref_date.year()
71 };
72
73 if !(1..=31).contains(&day) {
74 continue;
75 }
76
77 let mut components = context.create_components();
78 components.assign(Component::Year, year);
79 components.assign(Component::Month, month as i32);
80 components.assign(Component::Day, day);
81
82 if !components.is_valid_date() {
83 continue;
84 }
85
86 let actual_start = matched_text
88 .find(|c: char| c.is_alphanumeric())
89 .unwrap_or(0);
90 results.push(context.create_result(
91 index + actual_start,
92 index + matched_text.len(),
93 components,
94 None,
95 ));
96 }
97
98 for mat in MONTH_NAME_PATTERN.find_iter(context.text) {
100 let matched_text = mat.as_str();
101 let index = mat.start();
102
103 if results
105 .iter()
106 .any(|r| r.index <= index && r.end_index > index)
107 {
108 continue;
109 }
110
111 let Some(caps) = MONTH_NAME_PATTERN.captures(matched_text) else {
112 continue;
113 };
114
115 let month_str = caps
116 .get(1)
117 .map(|m| m.as_str().to_lowercase())
118 .unwrap_or_default();
119 let day_str = caps.get(2).map(|m| m.as_str());
120 let year_str = caps.get(3).map(|m| m.as_str());
121
122 let Some(month) = get_month(&month_str) else {
123 continue;
124 };
125
126 let day = if let Some(d) = day_str {
127 parse_ordinal_pattern(d).unwrap_or(1) as i32
128 } else {
129 1 };
131
132 let year = if let Some(y) = year_str {
133 parse_year(y)
134 } else {
135 let current_month = ref_date.month() as i32;
137 if (month as i32) < current_month {
138 ref_date.year() + 1
139 } else {
140 ref_date.year()
141 }
142 };
143
144 if !(1..=31).contains(&day) {
145 continue;
146 }
147
148 let mut components = context.create_components();
149 components.assign(Component::Year, year);
150 components.assign(Component::Month, month as i32);
151 components.assign(Component::Day, day);
152
153 if !components.is_valid_date() {
154 continue;
155 }
156
157 let actual_start = matched_text
158 .find(|c: char| c.is_alphanumeric())
159 .unwrap_or(0);
160 results.push(context.create_result(
161 index + actual_start,
162 index + matched_text.len(),
163 components,
164 None,
165 ));
166 }
167
168 Ok(results)
169 }
170}
171
172fn parse_year(s: &str) -> i32 {
173 let year: i32 = s.parse().unwrap_or(0);
174 if year < 100 {
175 if year > 50 { 1900 + year } else { 2000 + year }
176 } else {
177 year
178 }
179}