mail_parser/parsers/fields/
date.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::fmt;
8
9use crate::{parsers::MessageStream, DateTime, HeaderValue};
10
11pub static DOW: &[&str] = &["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
12pub static MONTH: &[&str] = &[
13    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
14];
15
16impl DateTime {
17    /// Parses an RFC822 date
18    pub fn parse_rfc822(value: &str) -> Option<Self> {
19        match MessageStream::new(value.as_bytes()).parse_date() {
20            HeaderValue::DateTime(dt) => dt.into(),
21            _ => None,
22        }
23    }
24
25    /// Parses an RFC3339 date
26    pub fn parse_rfc3339(value: &str) -> Option<Self> {
27        // 2004 - 06 - 28 T 23 : 43 : 45 . 000 Z
28        // 1969 - 02 - 13 T 23 : 32 : 00 - 03 : 30
29        //   0     1    2    3    4    5    6    7
30
31        let mut pos = 0;
32        let mut parts = [0u32; 8];
33        let mut parts_sizes = [
34            4u32, // Year (0)
35            2u32, // Month (1)
36            2u32, // Day (2)
37            2u32, // Hour (3)
38            2u32, // Minute (4)
39            2u32, // Second (5)
40            2u32, // TZ Hour (6)
41            2u32, // TZ Minute (7)
42        ];
43        let mut skip_digits = false;
44        let mut is_plus = true;
45
46        for ch in value.as_bytes() {
47            match ch {
48                b'0'..=b'9' if !skip_digits => {
49                    if parts_sizes[pos] > 0 {
50                        parts_sizes[pos] -= 1;
51                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
52                    } else {
53                        return None;
54                    }
55                }
56                b'-' => {
57                    if pos <= 1 {
58                        pos += 1;
59                    } else if pos == 5 {
60                        pos += 1;
61                        is_plus = false;
62                        skip_digits = false;
63                    } else {
64                        return None;
65                    }
66                }
67                b'T' => {
68                    if pos == 2 {
69                        pos += 1;
70                    } else {
71                        return None;
72                    }
73                }
74                b':' => {
75                    if [3, 4, 6].contains(&pos) {
76                        pos += 1;
77                    } else {
78                        return None;
79                    }
80                }
81                b'+' => {
82                    if pos == 5 {
83                        pos += 1;
84                        skip_digits = false;
85                    } else {
86                        return None;
87                    }
88                }
89                b'.' => {
90                    if pos == 5 {
91                        skip_digits = true;
92                    } else {
93                        return None;
94                    }
95                }
96
97                _ => (),
98            }
99        }
100
101        if pos >= 5 {
102            DateTime {
103                year: parts[0] as u16,
104                month: parts[1] as u8,
105                day: parts[2] as u8,
106                hour: parts[3] as u8,
107                minute: parts[4] as u8,
108                second: parts[5] as u8,
109                tz_hour: parts[6] as u8,
110                tz_minute: parts[7] as u8,
111                tz_before_gmt: !is_plus,
112            }
113            .into()
114        } else {
115            None
116        }
117    }
118
119    /// Return an RFC822 date
120    pub fn to_rfc822(&self) -> String {
121        format!(
122            "{}, {} {} {:04} {:02}:{:02}:{:02} {}{:02}{:02}",
123            DOW[self.day_of_week() as usize],
124            self.day,
125            MONTH
126                .get(self.month.saturating_sub(1) as usize)
127                .unwrap_or(&""),
128            self.year,
129            self.hour,
130            self.minute,
131            self.second,
132            if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
133                "-"
134            } else {
135                "+"
136            },
137            self.tz_hour,
138            self.tz_minute
139        )
140    }
141
142    /// Returns an RFC3339 representation of the parsed RFC5322 datetime field
143    pub fn to_rfc3339(&self) -> String {
144        if self.tz_hour != 0 || self.tz_minute != 0 {
145            format!(
146                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}{:02}:{:02}",
147                self.year,
148                self.month,
149                self.day,
150                self.hour,
151                self.minute,
152                self.second,
153                if self.tz_before_gmt && (self.tz_hour > 0 || self.tz_minute > 0) {
154                    "-"
155                } else {
156                    "+"
157                },
158                self.tz_hour,
159                self.tz_minute
160            )
161        } else {
162            format!(
163                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
164                self.year, self.month, self.day, self.hour, self.minute, self.second,
165            )
166        }
167    }
168
169    /// Returns true if the date is valid
170    pub fn is_valid(&self) -> bool {
171        (0..=23).contains(&self.tz_hour)
172            && (1900..=3000).contains(&self.year)
173            && (0..=59).contains(&self.tz_minute)
174            && (1..=12).contains(&self.month)
175            && (1..=31).contains(&self.day)
176            && (0..=23).contains(&self.hour)
177            && (0..=59).contains(&self.minute)
178            && (0..=59).contains(&self.second)
179    }
180
181    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch)
182    /// or None if the date is invalid.
183    pub fn to_timestamp(&self) -> i64 {
184        self.to_timestamp_local()
185            + ((self.tz_hour as i64 * 3600 + self.tz_minute as i64 * 60)
186                * if self.tz_before_gmt { 1 } else { -1 })
187    }
188
189    /// Returns the numbers of seconds since 1970-01-01T00:00:00Z (Unix epoch) in local time
190    /// or None if the date is invalid.
191    pub fn to_timestamp_local(&self) -> i64 {
192        // Ported from https://github.com/protocolbuffers/upb/blob/22182e6e/upb/json_decode.c#L982-L992
193        let month = self.month as u32;
194        let year_base = 4800; /* Before min year, multiple of 400. */
195        let m_adj = month.wrapping_sub(3); /* March-based month. */
196        let carry = i64::from(m_adj > month);
197        let adjust = if carry > 0 { 12 } else { 0 };
198        let y_adj = self.year as i64 + year_base - carry;
199        let month_days = ((m_adj.wrapping_add(adjust)) * 62719 + 769) / 2048;
200        let leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
201        (y_adj * 365 + leap_days + month_days as i64 + (self.day as i64 - 1) - 2472632) * 86400
202            + self.hour as i64 * 3600
203            + self.minute as i64 * 60
204            + self.second as i64
205    }
206
207    /// Creates a DateTime object from a timestamp
208    pub fn from_timestamp(timestamp: i64) -> Self {
209        // Ported from http://howardhinnant.github.io/date_algorithms.html#civil_from_days
210        let (z, seconds) = ((timestamp / 86400) + 719468, timestamp % 86400);
211        let era: i64 = (if z >= 0 { z } else { z - 146096 }) / 146097;
212        let doe: u64 = (z - era * 146097) as u64; // [0, 146096]
213        let yoe: u64 = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; // [0, 399]
214        let y: i64 = (yoe as i64) + era * 400;
215        let doy: u64 = doe - (365 * yoe + yoe / 4 - yoe / 100); // [0, 365]
216        let mp = (5 * doy + 2) / 153; // [0, 11]
217        let d: u64 = doy - (153 * mp + 2) / 5 + 1; // [1, 31]
218        let m: u64 = if mp < 10 { mp + 3 } else { mp - 9 }; // [1, 12]
219        let (h, mn, s) = (seconds / 3600, (seconds / 60) % 60, seconds % 60);
220
221        DateTime {
222            year: (y + i64::from(m <= 2)) as u16,
223            month: m as u8,
224            day: d as u8,
225            hour: h as u8,
226            minute: mn as u8,
227            second: s as u8,
228            tz_before_gmt: false,
229            tz_hour: 0,
230            tz_minute: 0,
231        }
232    }
233
234    /// Returns the day of week where [0, 6] represents [Sun, Sat].
235    pub fn day_of_week(&self) -> u8 {
236        (((self.to_timestamp_local() as f64 / 86400.0).floor() as i64 + 4).rem_euclid(7)) as u8
237    }
238
239    /// Returns the julian day
240    pub fn julian_day(&self) -> i64 {
241        let day = self.day as i64;
242        let (month, year) = if self.month > 2 {
243            ((self.month - 3) as i64, self.year as i64)
244        } else {
245            ((self.month + 9) as i64, (self.year - 1) as i64)
246        };
247
248        let c = year / 100;
249        c * 146097 / 4 + (year - c * 100) * 1461 / 4 + (month * 153 + 2) / 5 + day + 1721119
250    }
251
252    /// Converts the DateTime to the given timezone
253    pub fn to_timezone(&self, tz: i64) -> DateTime {
254        let mut dt = DateTime::from_timestamp(self.to_timestamp() + tz);
255        dt.tz_before_gmt = tz < 0;
256        let tz = tz.abs();
257        dt.tz_hour = (tz / 3600) as u8;
258        dt.tz_minute = (tz % 3600) as u8;
259        dt
260    }
261}
262
263impl PartialOrd for DateTime {
264    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
265        Some(self.cmp(other))
266    }
267}
268
269impl Ord for DateTime {
270    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
271        match self.to_timestamp() - other.to_timestamp() {
272            0 => std::cmp::Ordering::Equal,
273            x if x > 0 => std::cmp::Ordering::Greater,
274            _ => std::cmp::Ordering::Less,
275        }
276    }
277}
278
279impl fmt::Display for DateTime {
280    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
281        fmt.write_str(&self.to_rfc3339())
282    }
283}
284
285impl<'x> MessageStream<'x> {
286    pub fn parse_date(&mut self) -> HeaderValue<'x> {
287        let mut pos = 0;
288        let mut parts = [0u32; 7];
289        let mut parts_sizes = [
290            2u32, // Day (0)
291            2u32, // Month (1)
292            4u32, // Year (2)
293            2u32, // Hour (3)
294            2u32, // Minute (4)
295            2u32, // Second (5)
296            4u32, // TZ (6)
297        ];
298        let mut month_hash: usize = 0;
299        let mut month_pos: usize = 0;
300
301        let mut is_plus = true;
302        let mut is_new_token = true;
303        let mut ignore = true;
304        let mut comment_count = 0;
305
306        while let Some(ch) = self.next() {
307            let mut next_part = false;
308
309            match ch {
310                b'\n' => {
311                    if self.try_next_is_space() {
312                        if !is_new_token && !ignore && comment_count == 0 {
313                            next_part = true;
314                        } else {
315                            continue;
316                        }
317                    } else {
318                        break;
319                    }
320                }
321                _ if comment_count > 0 => {
322                    if *ch == b')' {
323                        comment_count -= 1;
324                    } else if *ch == b'(' {
325                        comment_count += 1;
326                    } else if *ch == b'\\' {
327                        self.try_skip_char(b')');
328                    }
329                    continue;
330                }
331                b'0'..=b'9' => {
332                    if pos < 7 && parts_sizes[pos] > 0 {
333                        parts_sizes[pos] -= 1;
334                        parts[pos] += (*ch - b'0') as u32 * u32::pow(10, parts_sizes[pos]);
335
336                        ignore = false;
337                    }
338                    is_new_token = false;
339                }
340                b':' => {
341                    if !is_new_token && !ignore && (pos == 3 || pos == 4) {
342                        next_part = true;
343                    }
344                }
345                b'+' => {
346                    pos = 6;
347                }
348                b'-' => {
349                    is_plus = false;
350                    pos = 6;
351                }
352                b' ' | b'\t' => {
353                    if !is_new_token && !ignore {
354                        next_part = true;
355                    }
356                }
357                b'a'..=b'z' | b'A'..=b'Z' => {
358                    if pos == 1 {
359                        if (1..=2).contains(&month_pos) {
360                            month_hash += MONTH_HASH
361                                [(if *ch <= b'Z' { *ch + 32 } else { *ch }) as usize]
362                                as usize;
363                        }
364                        month_pos += 1;
365                    }
366                    if pos == 6 {
367                        let mut buf = [*ch, 0, 0];
368                        let zone = self.obs_zone(&mut buf);
369                        is_plus = !zone.is_negative();
370                        parts[pos] = 100 * zone.unsigned_abs();
371                        parts_sizes[pos] = 0;
372                        next_part = true;
373                    }
374                    is_new_token = false;
375                }
376                b'(' => {
377                    comment_count += 1;
378                    is_new_token = true;
379                    continue;
380                }
381                b',' | b'\r' => (),
382                b';' => {
383                    // May be parsing Received field, reset state.
384                    pos = 0;
385                    parts = [0u32; 7];
386                    parts_sizes = [
387                        2u32, // Day (0)
388                        2u32, // Month (1)
389                        4u32, // Year (2)
390                        2u32, // Hour (3)
391                        2u32, // Minute (4)
392                        2u32, // Second (5)
393                        4u32, // TZ (6)
394                    ];
395                    month_hash = 0;
396                    month_pos = 0;
397
398                    is_plus = true;
399                    is_new_token = true;
400                    ignore = true;
401                    continue;
402                }
403                _ => (),
404            }
405
406            if next_part {
407                if pos < 7 && parts_sizes[pos] > 0 {
408                    parts[pos] /= u32::pow(10, parts_sizes[pos]);
409                }
410                pos += 1;
411                is_new_token = true;
412            }
413        }
414
415        if pos >= 6 {
416            HeaderValue::DateTime(DateTime {
417                year: if (0..=49).contains(&parts[2]) {
418                    parts[2] + 2000
419                } else if (50..=99).contains(&parts[2]) {
420                    parts[2] + 1900
421                } else {
422                    parts[2]
423                } as u16,
424                month: if month_pos == 3 && month_hash <= 30 {
425                    MONTH_MAP[month_hash]
426                } else {
427                    parts[1] as u8
428                },
429                day: parts[0] as u8,
430                hour: parts[3] as u8,
431                minute: parts[4] as u8,
432                second: parts[5] as u8,
433                tz_hour: ((parts[6] / 100) % 12) as u8,
434                tz_minute: ((parts[6] % 100) % 60) as u8,
435                tz_before_gmt: !is_plus,
436            })
437        } else {
438            HeaderValue::Empty
439        }
440    }
441    // 4.3 obsolete date and time
442    fn obs_zone(&mut self, buf: &mut [u8; 3]) -> i32 {
443        let mut i = 1;
444        for &b in self.by_ref() {
445            buf[i] = b;
446            i += 1;
447            if i == 3 {
448                break;
449            }
450        }
451
452        hashify::tiny_map! { buf.as_ref(),
453            "EDT" => -4,
454            "EST" => -5,
455            "CDT" => -5,
456            "CST" => -6,
457            "MDT" => -6,
458            "MST" => -7,
459            "PDT" => -7,
460            "PST" => -8,
461        }
462        .unwrap_or(0)
463    }
464}
465
466static MONTH_HASH: &[u8] = &[
467    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
468    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
469    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
470    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
471    31, 0, 14, 4, 31, 10, 31, 14, 31, 31, 31, 31, 4, 31, 10, 15, 15, 31, 5, 31, 0, 5, 15, 31, 31,
472    0, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
473    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
474    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
475    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
476    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
477    31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
478];
479
480pub static MONTH_MAP: &[u8; 31] = &[
481    5, 0, 0, 0, 10, 3, 0, 0, 0, 7, 1, 0, 0, 0, 12, 6, 0, 0, 0, 8, 4, 0, 0, 0, 2, 9, 0, 0, 0, 0, 11,
482];
483
484#[cfg(test)]
485mod tests {
486    use chrono::{FixedOffset, LocalResult, SecondsFormat, TimeZone, Utc};
487
488    use crate::parsers::{fields::load_tests, MessageStream};
489
490    #[test]
491    fn parse_dates() {
492        for test in load_tests("date.json") {
493            let datetime = MessageStream::new(test.header.as_bytes())
494                .parse_date()
495                .into_datetime();
496            assert_eq!(datetime, test.expected, "failed for {:?}", test.header);
497
498            match datetime {
499                Some(datetime) if datetime.is_valid() => {
500                    if let LocalResult::Single(chrono_datetime)
501                    | LocalResult::Ambiguous(chrono_datetime, _) = FixedOffset::west_opt(
502                        ((datetime.tz_hour as i32 * 3600i32) + datetime.tz_minute as i32 * 60)
503                            * if datetime.tz_before_gmt { 1i32 } else { -1i32 },
504                    )
505                    .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap())
506                    .with_ymd_and_hms(
507                        datetime.year as i32,
508                        datetime.month as u32,
509                        datetime.day as u32,
510                        datetime.hour as u32,
511                        datetime.minute as u32,
512                        datetime.second as u32,
513                    ) {
514                        assert_eq!(
515                            chrono_datetime.timestamp(),
516                            datetime.to_timestamp(),
517                            "{} -> {} ({}) -> {} ({})",
518                            test.header.escape_debug(),
519                            datetime.to_timestamp(),
520                            Utc.timestamp_opt(datetime.to_timestamp(), 0)
521                                .unwrap()
522                                .to_rfc3339_opts(SecondsFormat::Secs, true),
523                            chrono_datetime.timestamp(),
524                            Utc.timestamp_opt(chrono_datetime.timestamp(), 0)
525                                .unwrap()
526                                .to_rfc3339_opts(SecondsFormat::Secs, true)
527                        );
528                    }
529                }
530                _ => {}
531            }
532        }
533    }
534}