sac13/
parse.rs

1use core::{fmt::Display, iter::Peekable};
2
3use crate::{Date, GregorianDate};
4
5#[derive(Debug, Clone)]
6pub enum GregorianOrSac13 {
7    GregorianDate(GregorianDate),
8    Sac13Date(Date),
9}
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ComponentOrder {
13    YMD,
14    DMY,
15    MDY,
16}
17
18#[derive(Debug, Clone)]
19pub struct ParsedDate {
20    pub date: GregorianOrSac13,
21    pub format: ParsedFormat,
22}
23
24#[derive(Debug, Clone)]
25pub struct ParsedFormat {
26    pub separator: u8,
27    pub component_order: ComponentOrder,
28    pub len_day: u8,
29    pub len_month: u8,
30    pub len_year: u8,
31}
32
33#[derive(Debug)]
34struct ComponentParse {
35    letter: bool,
36    char_cnt: u8,
37    value: i16,
38    end: u8,
39}
40
41impl ComponentParse {
42    pub fn parse<T>(i: &mut Peekable<T>) -> Option<Self>
43    where
44        T: Iterator<Item = u8>,
45    {
46        let mut result = Self {
47            letter: false,
48            value: 0,
49            end: 0,
50            char_cnt: 0,
51        };
52
53        let invert = *i.peek()? == b'-';
54
55        if invert {
56            // consume peeked negative sign
57            _ = i.next();
58            result.char_cnt += 1;
59        }
60
61        if (b'A'..=b'Z').contains(i.peek()?) {
62            if invert {
63                // negative SAC13 years are not allowed
64                return None;
65            }
66
67            // consume and process prefix letter
68            result.value = (i.next().unwrap() - b'A') as i16;
69            result.letter = true;
70            result.char_cnt += 1;
71        }
72
73        loop {
74            let x = i.next();
75
76            let (exit, x) = match x {
77                None => (true, 0),
78                Some(x) => ([b'.', b'/', b'-'].contains(&x), x),
79            };
80
81            if exit {
82                result.end = x;
83
84                if invert {
85                    result.value = result.value.checked_mul(-1)?;
86                }
87
88                return Some(result);
89            }
90
91            if !((b'0'..=b'9').contains(&x)) {
92                return None;
93            }
94
95            result.char_cnt += 1;
96            result.value = result.value.checked_mul(10)?;
97            result.value = result.value.checked_add((x - b'0') as i16)?;
98        }
99    }
100}
101
102/// Parses various SAC13 and Gregorian Calendar formats.
103///
104/// ## Supported Formats
105///
106/// Even if the year is less than 100 it must be written
107/// with leading zeros, to at least be three characters long.
108///
109/// - YYYY-MM-DD
110/// - DD-MM-YYYY
111pub fn parse_date_str(input: &str) -> Option<ParsedDate> {
112    const MIN_YEAR_LENGTH: u8 = 4;
113
114    let mut input = input.as_bytes().iter().copied().peekable();
115
116    let c1 = ComponentParse::parse(&mut input)?;
117    let c2 = ComponentParse::parse(&mut input)?;
118    let c3: ComponentParse = ComponentParse::parse(&mut input)?;
119
120    if c1.char_cnt == 3 || c2.char_cnt == 3 || c1.char_cnt == 3 {
121        // No component is allowed to be three digits.
122        // Days and months must be 1 or 2, and years must be 4 or more.
123        return None;
124    }
125
126    if c1.end != c2.end {
127        // different delimiters in same date are not allowed
128        return None;
129    }
130
131    let separator = c1.end;
132
133    if c3.end != 0 {
134        // c3 must be the last component (delimiter zero)
135        return None;
136    }
137
138    let year_first = c1.char_cnt >= MIN_YEAR_LENGTH;
139    let year_last = c3.char_cnt >= MIN_YEAR_LENGTH;
140
141    if c2.char_cnt >= MIN_YEAR_LENGTH {
142        // middle part is never allowed to be a year
143        return None;
144    }
145
146    if year_first == year_last {
147        // either both ends or neither seem to be a year which is not allowed
148        return None;
149    }
150
151    // determine sort order
152    let (year, month, day, order) = if year_first {
153        (c1, c2, c3, ComponentOrder::YMD)
154    } else if c1.end == b'/' && !c3.letter {
155        // US format only for Gregorian (no SAC13 millennium indicator letter)
156        (c3, c1, c2, ComponentOrder::MDY)
157    } else {
158        (c3, c2, c1, ComponentOrder::DMY)
159    };
160
161    if day.letter || month.letter {
162        return None;
163    }
164
165    if !(1..=31).contains(&day.value) || !(1..=13).contains(&month.value) {
166        return None;
167    }
168
169    let format = ParsedFormat {
170        separator,
171        component_order: order,
172        len_day: day.char_cnt,
173        len_month: month.char_cnt,
174        len_year: year.char_cnt,
175    };
176
177    let day = day.value as u8;
178    let month = month.value as u8;
179
180    let date = if year.letter {
181        if year.value < 0 {
182            return None;
183        }
184
185        GregorianOrSac13::Sac13Date(Date::from_ymd_untyped(year.value as u16, month, day)?)
186    } else {
187        GregorianOrSac13::GregorianDate(GregorianDate::from_ymd(year.value, month, day)?)
188    };
189
190    Some(ParsedDate { date, format })
191}
192
193impl Display for ParsedFormat {
194    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
195        let separator = self.separator as char;
196
197        let format_order = match self.component_order {
198            ComponentOrder::YMD => [
199                ('Y', self.len_year),
200                ('M', self.len_month),
201                ('D', self.len_day),
202            ],
203            ComponentOrder::DMY => [
204                ('D', self.len_day),
205                ('M', self.len_month),
206                ('Y', self.len_year),
207            ],
208            ComponentOrder::MDY => [
209                ('M', self.len_month),
210                ('D', self.len_day),
211                ('Y', self.len_year),
212            ],
213        };
214
215        for (i, &(c, count)) in format_order.iter().enumerate() {
216            if i != 0 {
217                write!(f, "{}", separator)?;
218            }
219
220            for _ in 0..count {
221                write!(f, "{}", c)?;
222            }
223        }
224
225        Ok(())
226    }
227}
228
229impl Display for GregorianOrSac13 {
230    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
231        match self {
232            GregorianOrSac13::GregorianDate(x) => write!(f, "{}", x),
233            GregorianOrSac13::Sac13Date(x) => write!(f, "{}", x),
234        }
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    macro_rules! assert_matches {
243        ($left:expr, $right:pat) => {
244            assert!(matches!($left, $right));
245        };
246    }
247
248    macro_rules! assert_parse_error {
249        ($inp:literal) => {
250            assert_matches!(parse_date_str($inp), None);
251        };
252    }
253
254    macro_rules! parse_expect_greg {
255        ($inp:expr) => {
256            match parse_date_str($inp) {
257                Some(ParsedDate {
258                    date: GregorianOrSac13::GregorianDate(x),
259                    ..
260                }) => x,
261                _ => panic!(concat!(
262                    "Expected ",
263                    stringify!($inp),
264                    " to parse as Gregorian date."
265                )),
266            }
267        };
268    }
269
270    macro_rules! parse_expect_sac13 {
271        ($inp:expr) => {
272            match parse_date_str($inp) {
273                Some(ParsedDate {
274                    date: GregorianOrSac13::Sac13Date(x),
275                    ..
276                }) => x,
277                _ => panic!(concat!(
278                    "Expected ",
279                    stringify!($inp),
280                    " to parse as Gregorian date."
281                )),
282            }
283        };
284    }
285
286    macro_rules! assert_sac13 {
287        ($inp:expr, $y:ident - $m:literal - $d:literal) => {
288            assert_eq!(parse_expect_sac13!($inp), date!($y - $m - $d));
289        };
290    }
291
292    macro_rules! assert_greg {
293        ($inp:expr, $y:literal - $m:literal - $d:literal) => {
294            assert_eq!(parse_expect_greg!($inp), date_greg!($y - $m - $d));
295        };
296    }
297
298    #[test]
299    fn parsing_gregorian() {
300        // DD-MM-YYYY
301        assert_greg!("11-12-2000", 2000 - 12 - 11);
302        assert_greg!("11.12.2000", 2000 - 12 - 11);
303
304        // YYYY-MM-DD
305        assert_greg!("2000-12-11", 2000 - 12 - 11);
306        assert_greg!("2000.12.11", 2000 - 12 - 11);
307        assert_greg!("2000/12/11", 2000 - 12 - 11);
308
309        // Gregorian US Format:
310        assert_greg!("12/11/2000", 2000 - 12 - 11);
311    }
312
313    #[test]
314    fn negative_year_greg() {
315        assert_greg!("-2000-12-11", -2000 - 12 - 11);
316        assert_greg!("11-12--2000", -2000 - 12 - 11);
317        assert_greg!("-2000.12.11", -2000 - 12 - 11);
318        assert_greg!("11.12.-2000", -2000 - 12 - 11);
319    }
320
321    #[test]
322    fn parsing_sac13() {
323        assert_sac13!("M003-02-01", M003 - 02 - 01);
324        assert_sac13!("M003.02.01", M003 - 02 - 01);
325        assert_sac13!("M003/02/01", M003 - 02 - 01);
326
327        assert_sac13!("01-02-M003", M003 - 02 - 01);
328        assert_sac13!("01.02.M003", M003 - 02 - 01);
329        assert_sac13!("01/02/M003", M003 - 02 - 01);
330
331        // Note: SAC13 is always YMD or DMY and never the US format MDY
332    }
333
334    #[test]
335    fn no_letter_allowed_as_month() {
336        assert_parse_error!("2001-L-03");
337    }
338
339    #[test]
340    fn no_letter_allowed_as_day() {
341        assert_parse_error!("2001-02-L");
342    }
343
344    #[test]
345    fn ambiguous_year_end_fails_to_parse() {
346        assert_parse_error!("2020-12-2020");
347    }
348
349    #[test]
350    fn no_year_end_fails_to_parse() {
351        assert_parse_error!("01-01-01");
352    }
353
354    #[test]
355    fn three_digit_components_fail_to_parse() {
356        assert_parse_error!("001-01-01");
357        assert_parse_error!("01-01-001");
358
359        assert_parse_error!("01-001-2000");
360        assert_parse_error!("001-01-2000");
361    }
362}