Skip to main content

whichtime_sys/parsers/en/
slash_date.rs

1//! Slash date format parser: MM/DD/YYYY or DD/MM/YYYY
2
3use crate::components::Component;
4use crate::context::ParsingContext;
5use crate::error::Result;
6use crate::parsers::Parser;
7use crate::results::ParsedResult;
8use regex::Regex;
9use std::sync::LazyLock;
10
11// Pattern: M/D/YYYY, MM/DD/YYYY, M-D-YYYY, etc.
12static PATTERN: LazyLock<Regex> = LazyLock::new(|| {
13    Regex::new(r"(?i)(?:^|[^\d])(\d{1,2})[/\-.](\d{1,2})[/\-.](\d{2,4})(?:[^\d]|$)").unwrap()
14});
15
16/// Parser for slash- or dash-separated numeric dates.
17pub struct SlashDateParser {
18    /// If true, interpret as DD/MM/YYYY (little endian, UK style)
19    /// If false, interpret as MM/DD/YYYY (middle endian, US style)
20    little_endian: bool,
21}
22
23impl SlashDateParser {
24    /// Create a slash-date parser with either little-endian or middle-endian ordering.
25    pub fn new(little_endian: bool) -> Self {
26        Self { little_endian }
27    }
28
29    fn parse_year(year_str: &str) -> Option<i32> {
30        let year: i32 = year_str.parse().ok()?;
31        Some(if year < 100 {
32            if year > 50 { 1900 + year } else { 2000 + year }
33        } else {
34            year
35        })
36    }
37}
38
39impl Parser for SlashDateParser {
40    fn name(&self) -> &'static str {
41        "SlashDateParser"
42    }
43
44    fn should_apply(&self, context: &ParsingContext) -> bool {
45        let text = context.text;
46        // Quick check: must contain / or - and digits
47        // For non-English locales, we might support space as separator? But regex doesn't.
48        // Also, we might need to check if text is long enough.
49        (text.contains('/') || text.contains('-') || text.contains('.'))
50            && text.bytes().any(|b| b.is_ascii_digit())
51    }
52
53    fn parse(&self, context: &ParsingContext) -> Result<Vec<ParsedResult>> {
54        let mut results = Vec::new();
55
56        for mat in PATTERN.find_iter(context.text) {
57            let matched_text = mat.as_str();
58            let index = mat.start();
59
60            let Some(caps) = PATTERN.captures(matched_text) else {
61                continue;
62            };
63
64            let first: i32 = caps
65                .get(1)
66                .and_then(|m| m.as_str().parse().ok())
67                .unwrap_or(0);
68            let second: i32 = caps
69                .get(2)
70                .and_then(|m| m.as_str().parse().ok())
71                .unwrap_or(0);
72            let year = caps
73                .get(3)
74                .and_then(|m| Self::parse_year(m.as_str()))
75                .unwrap_or(0);
76
77            let (month, day) = if self.little_endian {
78                (second, first) // DD/MM/YYYY
79            } else {
80                (first, second) // MM/DD/YYYY
81            };
82
83            // Validate
84            if !(1..=12).contains(&month) || !(1..=31).contains(&day) || year == 0 {
85                continue;
86            }
87
88            let mut components = context.create_components();
89            components.assign(Component::Year, year);
90            components.assign(Component::Month, month);
91            components.assign(Component::Day, day);
92
93            if !components.is_valid_date() {
94                continue;
95            }
96
97            // Trim the leading/trailing non-digit characters from the matched text
98            let actual_start = matched_text.find(|c: char| c.is_ascii_digit()).unwrap_or(0);
99            let actual_end = matched_text
100                .rfind(|c: char| c.is_ascii_digit())
101                .map(|i| i + matched_text[i..].chars().next().map_or(1, char::len_utf8))
102                .unwrap_or(matched_text.len());
103            let clean_text = &matched_text[actual_start..actual_end];
104
105            results.push(context.create_result(
106                index + actual_start,
107                index + actual_start + clean_text.len(),
108                components,
109                None,
110            ));
111        }
112
113        Ok(results)
114    }
115}