mail_parser/parsers/fields/
date.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::fmt;
8
9use crate::{parsers::MessageStream, DateTime, HeaderValue};
10
11pub static DOW: &[&str] = &["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
12pub static MONTH: &[&str] = &[
13    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
14];
15
16impl DateTime {
17    /// Parses an RFC822 date
18    pub fn parse_rfc822(value: &str) -> Option<Self> {
19        match MessageStream::new(value.as_bytes()).parse_date() {
20            HeaderValue::DateTime(dt) => dt.into(),
21            _ => None,
22        }
23    }
24
25    /// Parses an RFC3339 date
26    pub fn parse_rfc3339(value: &str) -> Option<Self> {
27        // 2004 - 06 - 28 T 23 : 43 : 45 . 000 Z
28        // 1969 - 02 - 13 T 23 : 32 : 00 - 03 : 30
29        //   0     1    2    3    4    5    6    7
30
31        let mut pos = 0;
32        let mut parts = [0u32; 8];
33        let mut parts_sizes = [
34            4u32, // Year (0)
35            2u32, // Month (1)
36            2u32, // Day (2)
37            2u32, // Hour (3)
38            2u32, // Minute (4)
39            2u32, // Second (5)
40            2u32, // TZ Hour (6)
41            2u32, // TZ Minute (7)
42        ];
43        let mut skip_digits = false;
44        let mut is_plus = true;
45
46        for ch in value.as_bytes() {
47            match ch {
48                b'0'..=b'9' if !skip_digits => {
49                    if parts_sizes[pos] > 0 {
50                        parts_sizes[pos] -= 1;
51                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
52                    } else {
53                        return None;
54                    }
55                }
56                b'-' => {
57                    if pos <= 1 {
58                        pos += 1;
59                    } else if pos == 5 {
60                        pos += 1;
61                        is_plus = false;
62                        skip_digits = false;
63                    } else {
64                        return None;
65                    }
66                }
67                b'T' => {
68                    if pos == 2 {
69                        pos += 1;
70                    } else {
71                        return None;
72                    }
73                }
74                b':' => {
75                    if [3, 4, 6].contains(&pos) {
76                        pos += 1;
77                    } else {
78                        return None;
79                    }
80                }
81                b'+' => {
82                    if pos == 5 {
83                        pos += 1;
84                        skip_digits = false;
85                    } else {
86                        return None;
87                    }
88                }
89                b'.' => {
90                    if pos == 5 {
91                        skip_digits = true;
92                    } else {
93                        return None;
94                    }
95                }
96
97                _ => (),
98            }
99        }
100
101        if pos >= 5 {
102            DateTime {
103                year: parts[0] as u16,
104                month: parts[1] as u8,
105                day: parts[2] as u8,
106                hour: parts[3] as u8,
107                minute: parts[4] as u8,
108                second: parts[5] as u8,
109                tz_hour: parts[6] as u8,
110                tz_minute: parts[7] as u8,
111                tz_before_gmt: !is_plus,
112            }
113            .into()
114        } else {
115            None
116        }
117    }
118
119    /// Return an RFC822 date
120    pub fn to_rfc822(&self) -> String {
121        format!(
122            "{}, {} {} {:04} {:02}:{:02}:{:02} {}{:02}{:02}",
123            DOW[self.day_of_week() as usize],
124            self.day,
125            MONTH
126                .get(self.month.saturating_sub(1) as usize)
127                .unwrap_or(&""),
128            self.year,
129            self.hour,
130            self.minute,
131            self.second,
132            if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
133                "-"
134            } else {
135                "+"
136            },
137            self.tz_hour,
138            self.tz_minute
139        )
140    }
141
142    /// Returns an RFC3339 representation of the parsed RFC5322 datetime field
143    pub fn to_rfc3339(&self) -> String {
144        if self.tz_hour != 0 || self.tz_minute != 0 {
145            format!(
146                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}{:02}:{:02}",
147                self.year,
148                self.month,
149                self.day,
150                self.hour,
151                self.minute,
152                self.second,
153                if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
154                    "-"
155                } else {
156                    "+"
157                },
158                self.tz_hour,
159                self.tz_minute
160            )
161        } else {
162            format!(
163                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
164                self.year, self.month, self.day, self.hour, self.minute, self.second,
165            )
166        }
167    }
168
169    /// Returns true if the date is valid
170    pub fn is_valid(&self) -> bool {
171        (0..=23).contains(&self.tz_hour)
172            && (1900..=3000).contains(&self.year)
173            && (0..=59).contains(&self.tz_minute)
174            && (1..=12).contains(&self.month)
175            && (1..=31).contains(&self.day)
176            && (0..=23).contains(&self.hour)
177            && (0..=59).contains(&self.minute)
178            && (0..=59).contains(&self.second)
179    }
180
181    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch)
182    /// or None if the date is invalid.
183    pub fn to_timestamp(&self) -> i64 {
184        self.to_timestamp_local()
185            + ((self.tz_hour as i64 * 3600 + self.tz_minute as i64 * 60)
186                * if self.tz_before_gmt { 1 } else { -1 })
187    }
188
189    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch) in local time
190    /// or None if the date is invalid.
191    pub fn to_timestamp_local(&self) -> i64 {
192        // Ported from https://github.com/protocolbuffers/upb/blob/22182e6e/upb/json_decode.c#L982-L992
193        let month = self.month as u32;
194        let year_base = 4800; /* Before min year, multiple of 400. */
195        let m_adj = month.wrapping_sub(3); /* March-based month. */
196        let carry = i64::from(m_adj > month);
197        let adjust = if carry > 0 { 12 } else { 0 };
198        let y_adj = self.year as i64 + year_base - carry;
199        let month_days = ((m_adj.wrapping_add(adjust)) * 62719 + 769) / 2048;
200        let leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
201        (y_adj * 365 + leap_days + month_days as i64 + (self.day as i64 - 1) - 2472632) * 86400
202            + self.hour as i64 * 3600
203            + self.minute as i64 * 60
204            + self.second as i64
205    }
206
207    /// Creates a DateTime object from a timestamp
208    pub fn from_timestamp(timestamp: i64) -> Self {
209        // Ported from http://howardhinnant.github.io/date_algorithms.html#civil_from_days
210        let (z, seconds) = (
211            (timestamp.div_euclid(86400)) + 719468,
212            timestamp.rem_euclid(86400),
213        );
214        let era: i64 = (if z >= 0 { z } else { z - 146096 }) / 146097;
215        let doe: u64 = (z - era * 146097) as u64; // [0, 146096]
216        let yoe: u64 = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // [0, 399]
217        let y: i64 = (yoe as i64) + era * 400;
218        let doy: u64 = doe - (365 * yoe + yoe / 4 - yoe / 100); // [0, 365]
219        let mp = (5 * doy + 2) / 153; // [0, 11]
220        let d: u64 = doy - (153 * mp + 2) / 5 + 1; // [1, 31]
221        let m: u64 = if mp < 10 { mp + 3 } else { mp - 9 }; // [1, 12]
222        let (h, mn, s) = (seconds / 3600, (seconds / 60) % 60, seconds % 60);
223
224        DateTime {
225            year: (y + i64::from(m <= 2)) as u16,
226            month: m as u8,
227            day: d as u8,
228            hour: h as u8,
229            minute: mn as u8,
230            second: s as u8,
231            tz_before_gmt: false,
232            tz_hour: 0,
233            tz_minute: 0,
234        }
235    }
236
237    /// Returns the day of week where [0, 6] represents [Sun, Sat].
238    pub fn day_of_week(&self) -> u8 {
239        (((self.to_timestamp_local() as f64 / 86400.0).floor() as i64 + 4).rem_euclid(7)) as u8
240    }
241
242    /// Returns the julian day
243    pub fn julian_day(&self) -> i64 {
244        let day = self.day as i64;
245        let (month, year) = if self.month > 2 {
246            ((self.month - 3) as i64, self.year as i64)
247        } else {
248            ((self.month + 9) as i64, (self.year - 1) as i64)
249        };
250
251        let c = year / 100;
252        c * 146097 / 4 + (year - c * 100) * 1461 / 4 + (month * 153 + 2) / 5 + day + 1721119
253    }
254
255    /// Converts the DateTime to the given timezone
256    pub fn to_timezone(&self, tz: i64) -> DateTime {
257        let mut dt = DateTime::from_timestamp(self.to_timestamp() + tz);
258        dt.tz_before_gmt = tz < 0;
259        let tz = tz.abs();
260        dt.tz_hour = (tz / 3600) as u8;
261        dt.tz_minute = (tz % 3600) as u8;
262        dt
263    }
264}
265
266impl PartialOrd for DateTime {
267    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
268        Some(self.cmp(other))
269    }
270}
271
272impl Ord for DateTime {
273    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
274        match self.to_timestamp() - other.to_timestamp() {
275            0 => std::cmp::Ordering::Equal,
276            x if x > 0 => std::cmp::Ordering::Greater,
277            _ => std::cmp::Ordering::Less,
278        }
279    }
280}
281
282impl fmt::Display for DateTime {
283    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
284        fmt.write_str(&self.to_rfc3339())
285    }
286}
287
288impl<'x> MessageStream<'x> {
289    pub fn parse_date(&mut self) -> HeaderValue<'x> {
290        let mut pos = 0;
291        let mut parts = [0u32; 7];
292        let mut parts_sizes = [
293            2u32, // Day (0)
294            2u32, // Month (1)
295            4u32, // Year (2)
296            2u32, // Hour (3)
297            2u32, // Minute (4)
298            2u32, // Second (5)
299            4u32, // TZ (6)
300        ];
301        let mut month_hash: usize = 0;
302        let mut month_pos: usize = 0;
303
304        let mut is_plus = true;
305        let mut is_new_token = true;
306        let mut ignore = true;
307        let mut comment_count = 0;
308
309        while let Some(ch) = self.next() {
310            let mut next_part = false;
311
312            match ch {
313                b'\n' => {
314                    if self.try_next_is_space() {
315                        if !is_new_token && !ignore && comment_count == 0 {
316                            next_part = true;
317                        } else {
318                            continue;
319                        }
320                    } else {
321                        break;
322                    }
323                }
324                _ if comment_count > 0 => {
325                    if *ch == b')' {
326                        comment_count -= 1;
327                    } else if *ch == b'(' {
328                        comment_count += 1;
329                    } else if *ch == b'\\' {
330                        self.try_skip_char(b')');
331                    }
332                    continue;
333                }
334                b'0'..=b'9' => {
335                    if pos < 7 && parts_sizes[pos] > 0 {
336                        parts_sizes[pos] -= 1;
337                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
338
339                        ignore = false;
340                    }
341                    is_new_token = false;
342                }
343                b':' => {
344                    if !is_new_token && !ignore && (pos == 3 || pos == 4) {
345                        next_part = true;
346                    }
347                }
348                b'+' => {
349                    pos = 6;
350                }
351                b'-' => {
352                    is_plus = false;
353                    pos = 6;
354                }
355                b' ' | b'\t' => {
356                    if !is_new_token && !ignore {
357                        next_part = true;
358                    }
359                }
360                b'a'..=b'z' | b'A'..=b'Z' => {
361                    if pos == 1 {
362                        if (1..=2).contains(&month_pos) {
363                            month_hash += MONTH_HASH
364                                [(if *ch <= b'Z' { *ch + 32 } else { *ch }) as usize]
365                                as usize;
366                        }
367                        month_pos += 1;
368                    }
369                    if pos == 6 {
370                        let mut buf = [*ch, 0, 0];
371                        let zone = self.obs_zone(&mut buf);
372                        is_plus = !zone.is_negative();
373                        parts[pos] = 100 * zone.unsigned_abs();
374                        parts_sizes[pos] = 0;
375                        next_part = true;
376                    }
377                    is_new_token = false;
378                }
379                b'(' => {
380                    comment_count += 1;
381                    is_new_token = true;
382                    continue;
383                }
384                b',' | b'\r' => (),
385                b';' => {
386                    // May be parsing Received field, reset state.
387                    pos = 0;
388                    parts = [0u32; 7];
389                    parts_sizes = [
390                        2u32, // Day (0)
391                        2u32, // Month (1)
392                        4u32, // Year (2)
393                        2u32, // Hour (3)
394                        2u32, // Minute (4)
395                        2u32, // Second (5)
396                        4u32, // TZ (6)
397                    ];
398                    month_hash = 0;
399                    month_pos = 0;
400
401                    is_plus = true;
402                    is_new_token = true;
403                    ignore = true;
404                    continue;
405                }
406                _ => (),
407            }
408
409            if next_part {
410                if pos < 7 && parts_sizes[pos] > 0 {
411                    parts[pos] /= u32::pow(10, parts_sizes[pos]);
412                }
413                pos += 1;
414                is_new_token = true;
415            }
416        }
417
418        if pos >= 6 {
419            HeaderValue::DateTime(DateTime {
420                year: if (0..=49).contains(&parts[2]) {
421                    parts[2] + 2000
422                } else if (50..=99).contains(&parts[2]) {
423                    parts[2] + 1900
424                } else {
425                    parts[2]
426                } as u16,
427                month: if month_pos == 3 && month_hash <= 30 {
428                    MONTH_MAP[month_hash]
429                } else {
430                    parts[1] as u8
431                },
432                day: parts[0] as u8,
433                hour: parts[3] as u8,
434                minute: parts[4] as u8,
435                second: parts[5] as u8,
436                tz_hour: ((parts[6] / 100) % 12) as u8,
437                tz_minute: ((parts[6] % 100) % 60) as u8,
438                tz_before_gmt: !is_plus,
439            })
440        } else {
441            HeaderValue::Empty
442        }
443    }
444    // 4.3 obsolete date and time
445    fn obs_zone(&mut self, buf: &mut [u8; 3]) -> i32 {
446        let mut i = 1;
447        for &b in self.by_ref() {
448            buf[i] = b;
449            i += 1;
450            if i == 3 {
451                break;
452            }
453        }
454
455        hashify::tiny_map! { buf.as_ref(),
456            "EDT" => -4,
457            "EST" => -5,
458            "CDT" => -5,
459            "CST" => -6,
460            "MDT" => -6,
461            "MST" => -7,
462            "PDT" => -7,
463            "PST" => -8,
464        }
465        .unwrap_or(0)
466    }
467}
468
469static MONTH_HASH: &[u8] = &[
470    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
471    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
472    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
473    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
474    31, 0, 14, 4, 31, 10, 31, 14, 31, 31, 31, 31, 4, 31, 10, 15, 15, 31, 5, 31, 0, 5, 15, 31, 31,
475    0, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
476    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
477    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
478    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
479    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
480    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
481];
482
483pub static MONTH_MAP: &[u8; 31] = &[
484    5, 0, 0, 0, 10, 3, 0, 0, 0, 7, 1, 0, 0, 0, 12, 6, 0, 0, 0, 8, 4, 0, 0, 0, 2, 9, 0, 0, 0, 0, 11,
485];
486
487#[cfg(test)]
488mod tests {
489    use chrono::{FixedOffset, LocalResult, SecondsFormat, TimeZone, Utc};
490
491    use crate::{
492        parsers::{fields::load_tests, MessageStream},
493        DateTime,
494    };
495
496    #[test]
497    fn parse_dates() {
498        for test in load_tests("date.json") {
499            let datetime = MessageStream::new(test.header.as_bytes())
500                .parse_date()
501                .into_datetime();
502            assert_eq!(datetime, test.expected, "failed for {:?}", test.header);
503
504            match datetime {
505                Some(datetime) if datetime.is_valid() => {
506                    if let LocalResult::Single(chrono_datetime)
507                    | LocalResult::Ambiguous(chrono_datetime, _) = FixedOffset::west_opt(
508                        ((datetime.tz_hour as i32 * 3600i32) + datetime.tz_minute as i32 * 60)
509                            * if datetime.tz_before_gmt { 1i32 } else { -1i32 },
510                    )
511                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap())
512                    .with_ymd_and_hms(
513                        datetime.year as i32,
514                        datetime.month as u32,
515                        datetime.day as u32,
516                        datetime.hour as u32,
517                        datetime.minute as u32,
518                        datetime.second as u32,
519                    ) {
520                        assert_eq!(
521                            chrono_datetime.timestamp(),
522                            datetime.to_timestamp(),
523                            "{} -> {} ({}) -> {} ({})",
524                            test.header.escape_debug(),
525                            datetime.to_timestamp(),
526                            Utc.timestamp_opt(datetime.to_timestamp(), 0)
527                                .unwrap()
528                                .to_rfc3339_opts(SecondsFormat::Secs, true),
529                            chrono_datetime.timestamp(),
530                            Utc.timestamp_opt(chrono_datetime.timestamp(), 0)
531                                .unwrap()
532                                .to_rfc3339_opts(SecondsFormat::Secs, true)
533                        );
534                        let ts = datetime.to_timestamp();
535                        assert_eq!(DateTime::from_timestamp(ts).to_timestamp(), ts);
536                    }
537                }
538                _ => {}
539            }
540        }
541    }
542}