Skip to main content

mail_parser/parsers/fields/
date.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::fmt;
8
9use crate::{parsers::MessageStream, DateTime, HeaderValue};
10
11pub static DOW: &[&str] = &["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
12pub static MONTH: &[&str] = &[
13    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
14];
15
16impl DateTime {
17    /// Parses an RFC822 date
18    pub fn parse_rfc822(value: &str) -> Option<Self> {
19        match MessageStream::new(value.as_bytes()).parse_date() {
20            HeaderValue::DateTime(dt) => dt.into(),
21            _ => None,
22        }
23    }
24
25    /// Parses an RFC3339 date
26    pub fn parse_rfc3339(value: &str) -> Option<Self> {
27        // 2004 - 06 - 28 T 23 : 43 : 45 . 000 Z
28        // 1969 - 02 - 13 T 23 : 32 : 00 - 03 : 30
29        //   0     1    2    3    4    5    6    7
30
31        let mut pos = 0;
32        let mut parts = [0u32; 8];
33        let mut parts_sizes = [
34            4u32, // Year (0)
35            2u32, // Month (1)
36            2u32, // Day (2)
37            2u32, // Hour (3)
38            2u32, // Minute (4)
39            2u32, // Second (5)
40            2u32, // TZ Hour (6)
41            2u32, // TZ Minute (7)
42        ];
43        let mut skip_digits = false;
44        let mut is_plus = true;
45
46        for ch in value.as_bytes() {
47            match ch {
48                b'0'..=b'9' if !skip_digits => {
49                    if parts_sizes[pos] > 0 {
50                        parts_sizes[pos] -= 1;
51                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
52                    } else {
53                        return None;
54                    }
55                }
56                b'-' => {
57                    if pos <= 1 {
58                        pos += 1;
59                    } else if pos == 5 {
60                        pos += 1;
61                        is_plus = false;
62                        skip_digits = false;
63                    } else {
64                        return None;
65                    }
66                }
67                b'T' => {
68                    if pos == 2 {
69                        pos += 1;
70                    } else {
71                        return None;
72                    }
73                }
74                b':' => {
75                    if [3, 4, 6].contains(&pos) {
76                        pos += 1;
77                    } else {
78                        return None;
79                    }
80                }
81                b'+' => {
82                    if pos == 5 {
83                        pos += 1;
84                        skip_digits = false;
85                    } else {
86                        return None;
87                    }
88                }
89                b'.' => {
90                    if pos == 5 {
91                        skip_digits = true;
92                    } else {
93                        return None;
94                    }
95                }
96
97                _ => (),
98            }
99        }
100
101        if pos >= 5 {
102            DateTime {
103                year: parts[0] as u16,
104                month: parts[1] as u8,
105                day: parts[2] as u8,
106                hour: parts[3] as u8,
107                minute: parts[4] as u8,
108                second: parts[5] as u8,
109                tz_hour: parts[6] as u8,
110                tz_minute: parts[7] as u8,
111                tz_before_gmt: !is_plus,
112            }
113            .into()
114        } else {
115            None
116        }
117    }
118
119    /// Return an RFC822 date
120    pub fn to_rfc822(&self) -> String {
121        format!(
122            "{}, {} {} {:04} {:02}:{:02}:{:02} {}{:02}{:02}",
123            DOW[self.day_of_week() as usize],
124            self.day,
125            MONTH
126                .get(self.month.saturating_sub(1) as usize)
127                .unwrap_or(&""),
128            self.year,
129            self.hour,
130            self.minute,
131            self.second,
132            if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
133                "-"
134            } else {
135                "+"
136            },
137            self.tz_hour,
138            self.tz_minute
139        )
140    }
141
142    /// Returns an RFC3339 representation of the parsed RFC5322 datetime field
143    pub fn to_rfc3339(&self) -> String {
144        if self.tz_hour != 0 || self.tz_minute != 0 {
145            format!(
146                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}{:02}:{:02}",
147                self.year,
148                self.month,
149                self.day,
150                self.hour,
151                self.minute,
152                self.second,
153                if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
154                    "-"
155                } else {
156                    "+"
157                },
158                self.tz_hour,
159                self.tz_minute
160            )
161        } else {
162            format!(
163                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
164                self.year, self.month, self.day, self.hour, self.minute, self.second,
165            )
166        }
167    }
168
169    /// Returns true if the date is valid
170    pub fn is_valid(&self) -> bool {
171        (0..=23).contains(&self.tz_hour)
172            && (1900..=3000).contains(&self.year)
173            && (0..=59).contains(&self.tz_minute)
174            && (1..=12).contains(&self.month)
175            && (1..=31).contains(&self.day)
176            && (0..=23).contains(&self.hour)
177            && (0..=59).contains(&self.minute)
178            && (0..=59).contains(&self.second)
179    }
180
181    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch)
182    /// or None if the date is invalid.
183    pub fn to_timestamp(&self) -> i64 {
184        self.to_timestamp_local()
185            + ((self.tz_hour as i64 * 3600 + self.tz_minute as i64 * 60)
186                * if self.tz_before_gmt { 1 } else { -1 })
187    }
188
189    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch) in local time
190    /// or None if the date is invalid.
191    pub fn to_timestamp_local(&self) -> i64 {
192        // Ported from https://github.com/protocolbuffers/upb/blob/22182e6e/upb/json_decode.c#L982-L992
193        let month = self.month as u32;
194        let year_base = 4800; /* Before min year, multiple of 400. */
195        let m_adj = month.wrapping_sub(3); /* March-based month. */
196        let carry = i64::from(m_adj > month);
197        let adjust = if carry > 0 { 12 } else { 0 };
198        let y_adj = self.year as i64 + year_base - carry;
199        let month_days = ((m_adj.wrapping_add(adjust)) * 62719 + 769) / 2048;
200        let leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
201        (y_adj * 365 + leap_days + month_days as i64 + (self.day as i64 - 1) - 2472632) * 86400
202            + self.hour as i64 * 3600
203            + self.minute as i64 * 60
204            + self.second as i64
205    }
206
207    /// Creates a DateTime object from a timestamp
208    pub fn from_timestamp(timestamp: i64) -> Self {
209        // Ported from http://howardhinnant.github.io/date_algorithms.html#civil_from_days
210        let (z, seconds) = (
211            (timestamp.div_euclid(86400)) + 719468,
212            timestamp.rem_euclid(86400),
213        );
214        let era: i64 = (if z >= 0 { z } else { z - 146096 }) / 146097;
215        let doe: u64 = (z - era * 146097) as u64; // [0, 146096]
216        let yoe: u64 = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // [0, 399]
217        let y: i64 = (yoe as i64) + era * 400;
218        let doy: u64 = doe - (365 * yoe + yoe / 4 - yoe / 100); // [0, 365]
219        let mp = (5 * doy + 2) / 153; // [0, 11]
220        let d: u64 = doy - (153 * mp + 2) / 5 + 1; // [1, 31]
221        let m: u64 = if mp < 10 { mp + 3 } else { mp - 9 }; // [1, 12]
222        let (h, mn, s) = (seconds / 3600, (seconds / 60) % 60, seconds % 60);
223
224        DateTime {
225            year: (y + i64::from(m <= 2)) as u16,
226            month: m as u8,
227            day: d as u8,
228            hour: h as u8,
229            minute: mn as u8,
230            second: s as u8,
231            tz_before_gmt: false,
232            tz_hour: 0,
233            tz_minute: 0,
234        }
235    }
236
237    /// Returns the day of week where [0, 6] represents [Sun, Sat].
238    pub fn day_of_week(&self) -> u8 {
239        (((self.to_timestamp_local() as f64 / 86400.0).floor() as i64 + 4).rem_euclid(7)) as u8
240    }
241
242    /// Returns the julian day
243    pub fn julian_day(&self) -> i64 {
244        let day = self.day as i64;
245        let (month, year) = if self.month > 2 {
246            ((self.month - 3) as i64, self.year as i64)
247        } else {
248            ((self.month + 9) as i64, (self.year - 1) as i64)
249        };
250
251        let c = year / 100;
252        c * 146097 / 4 + (year - c * 100) * 1461 / 4 + (month * 153 + 2) / 5 + day + 1721119
253    }
254
255    /// Converts the DateTime to the given timezone
256    pub fn to_timezone(&self, tz: i64) -> DateTime {
257        let mut dt = DateTime::from_timestamp(self.to_timestamp() + tz);
258        dt.tz_before_gmt = tz < 0;
259        let tz = tz.abs();
260        dt.tz_hour = (tz / 3600) as u8;
261        dt.tz_minute = (tz % 3600) as u8;
262        dt
263    }
264}
265
266impl PartialOrd for DateTime {
267    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
268        Some(self.cmp(other))
269    }
270}
271
272impl Ord for DateTime {
273    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
274        match self.to_timestamp() - other.to_timestamp() {
275            0 => std::cmp::Ordering::Equal,
276            x if x > 0 => std::cmp::Ordering::Greater,
277            _ => std::cmp::Ordering::Less,
278        }
279    }
280}
281
282impl fmt::Display for DateTime {
283    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
284        fmt.write_str(&self.to_rfc3339())
285    }
286}
287
288impl<'x> MessageStream<'x> {
289    pub fn parse_date(&mut self) -> HeaderValue<'x> {
290        let mut pos = 0;
291        let mut parts = [0u32; 7];
292        let mut parts_sizes = [
293            2u32, // Day (0)
294            2u32, // Month (1)
295            4u32, // Year (2)
296            2u32, // Hour (3)
297            2u32, // Minute (4)
298            2u32, // Second (5)
299            4u32, // TZ (6)
300        ];
301        let mut month_hash: usize = 0;
302        let mut month_pos: usize = 0;
303
304        let mut is_plus = true;
305        let mut is_new_token = true;
306        let mut ignore = true;
307        let mut comment_count = 0;
308
309        while let Some(ch) = self.next() {
310            let mut next_part = false;
311
312            match ch {
313                b'\n' => {
314                    if self.try_next_is_space() {
315                        if !is_new_token && !ignore && comment_count == 0 {
316                            next_part = true;
317                        } else {
318                            continue;
319                        }
320                    } else {
321                        break;
322                    }
323                }
324                _ if comment_count > 0 => {
325                    if *ch == b')' {
326                        comment_count -= 1;
327                    } else if *ch == b'(' {
328                        comment_count += 1;
329                    } else if *ch == b'\\' {
330                        self.try_skip_char(b')');
331                    }
332                    continue;
333                }
334                b'0'..=b'9' => {
335                    if pos < 7 && parts_sizes[pos] > 0 {
336                        parts_sizes[pos] -= 1;
337                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
338
339                        ignore = false;
340                    }
341                    is_new_token = false;
342                }
343                b':' if !is_new_token && !ignore && (pos == 3 || pos == 4) => {
344                    next_part = true;
345                }
346                b'+' => {
347                    pos = 6;
348                }
349                b'-' => {
350                    is_plus = false;
351                    pos = 6;
352                }
353                b' ' | b'\t' if !is_new_token && !ignore => {
354                    next_part = true;
355                }
356                b'a'..=b'z' | b'A'..=b'Z' => {
357                    if pos == 1 {
358                        if (1..=2).contains(&month_pos) {
359                            month_hash += MONTH_HASH
360                                [(if *ch <= b'Z' { *ch + 32 } else { *ch }) as usize]
361                                as usize;
362                        }
363                        month_pos += 1;
364                    }
365                    if pos == 6 {
366                        let mut buf = [*ch, 0, 0];
367                        let zone = self.obs_zone(&mut buf);
368                        is_plus = !zone.is_negative();
369                        parts[pos] = 100 * zone.unsigned_abs();
370                        parts_sizes[pos] = 0;
371                        next_part = true;
372                    }
373                    is_new_token = false;
374                }
375                b'(' => {
376                    comment_count += 1;
377                    is_new_token = true;
378                    continue;
379                }
380                b',' | b'\r' => (),
381                b';' => {
382                    // May be parsing Received field, reset state.
383                    pos = 0;
384                    parts = [0u32; 7];
385                    parts_sizes = [
386                        2u32, // Day (0)
387                        2u32, // Month (1)
388                        4u32, // Year (2)
389                        2u32, // Hour (3)
390                        2u32, // Minute (4)
391                        2u32, // Second (5)
392                        4u32, // TZ (6)
393                    ];
394                    month_hash = 0;
395                    month_pos = 0;
396
397                    is_plus = true;
398                    is_new_token = true;
399                    ignore = true;
400                    continue;
401                }
402                _ => (),
403            }
404
405            if next_part {
406                if pos < 7 && parts_sizes[pos] > 0 {
407                    parts[pos] /= u32::pow(10, parts_sizes[pos]);
408                }
409                pos += 1;
410                is_new_token = true;
411            }
412        }
413
414        if pos >= 6 {
415            HeaderValue::DateTime(DateTime {
416                year: if (0..=49).contains(&parts[2]) {
417                    parts[2] + 2000
418                } else if (50..=99).contains(&parts[2]) {
419                    parts[2] + 1900
420                } else {
421                    parts[2]
422                } as u16,
423                month: if month_pos == 3 && month_hash <= 30 {
424                    MONTH_MAP[month_hash]
425                } else {
426                    parts[1] as u8
427                },
428                day: parts[0] as u8,
429                hour: parts[3] as u8,
430                minute: parts[4] as u8,
431                second: parts[5] as u8,
432                tz_hour: ((parts[6] / 100) % 12) as u8,
433                tz_minute: ((parts[6] % 100) % 60) as u8,
434                tz_before_gmt: !is_plus,
435            })
436        } else {
437            HeaderValue::Empty
438        }
439    }
440    // 4.3 obsolete date and time
441    fn obs_zone(&mut self, buf: &mut [u8; 3]) -> i32 {
442        let mut i = 1;
443        for &b in self.by_ref() {
444            buf[i] = b;
445            i += 1;
446            if i == 3 {
447                break;
448            }
449        }
450
451        hashify::tiny_map! { buf.as_ref(),
452            "EDT" => -4,
453            "EST" => -5,
454            "CDT" => -5,
455            "CST" => -6,
456            "MDT" => -6,
457            "MST" => -7,
458            "PDT" => -7,
459            "PST" => -8,
460        }
461        .unwrap_or(0)
462    }
463}
464
465static MONTH_HASH: &[u8] = &[
466    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
467    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
468    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
469    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
470    31, 0, 14, 4, 31, 10, 31, 14, 31, 31, 31, 31, 4, 31, 10, 15, 15, 31, 5, 31, 0, 5, 15, 31, 31,
471    0, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
472    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
473    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
474    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
475    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
476    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
477];
478
479pub static MONTH_MAP: &[u8; 31] = &[
480    5, 0, 0, 0, 10, 3, 0, 0, 0, 7, 1, 0, 0, 0, 12, 6, 0, 0, 0, 8, 4, 0, 0, 0, 2, 9, 0, 0, 0, 0, 11,
481];
482
483#[cfg(test)]
484mod tests {
485    use chrono::{FixedOffset, LocalResult, SecondsFormat, TimeZone, Utc};
486
487    use crate::{
488        parsers::{fields::load_tests, MessageStream},
489        DateTime,
490    };
491
492    #[test]
493    fn parse_dates() {
494        for test in load_tests("date.json") {
495            let datetime = MessageStream::new(test.header.as_bytes())
496                .parse_date()
497                .into_datetime();
498            assert_eq!(datetime, test.expected, "failed for {:?}", test.header);
499
500            match datetime {
501                Some(datetime) if datetime.is_valid() => {
502                    if let LocalResult::Single(chrono_datetime)
503                    | LocalResult::Ambiguous(chrono_datetime, _) = FixedOffset::west_opt(
504                        ((datetime.tz_hour as i32 * 3600i32) + datetime.tz_minute as i32 * 60)
505                            * if datetime.tz_before_gmt { 1i32 } else { -1i32 },
506                    )
507                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap())
508                    .with_ymd_and_hms(
509                        datetime.year as i32,
510                        datetime.month as u32,
511                        datetime.day as u32,
512                        datetime.hour as u32,
513                        datetime.minute as u32,
514                        datetime.second as u32,
515                    ) {
516                        assert_eq!(
517                            chrono_datetime.timestamp(),
518                            datetime.to_timestamp(),
519                            "{} -> {} ({}) -> {} ({})",
520                            test.header.escape_debug(),
521                            datetime.to_timestamp(),
522                            Utc.timestamp_opt(datetime.to_timestamp(), 0)
523                                .unwrap()
524                                .to_rfc3339_opts(SecondsFormat::Secs, true),
525                            chrono_datetime.timestamp(),
526                            Utc.timestamp_opt(chrono_datetime.timestamp(), 0)
527                                .unwrap()
528                                .to_rfc3339_opts(SecondsFormat::Secs, true)
529                        );
530                        let ts = datetime.to_timestamp();
531                        assert_eq!(DateTime::from_timestamp(ts).to_timestamp(), ts);
532                    }
533                }
534                _ => {}
535            }
536        }
537    }
538}