mail_parser/mailbox/
mbox.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use crate::DateTime;
8use std::io::BufRead;
9
10/// Parses an Mbox mailbox from a `Read` stream, returning each message as a
11/// `Vec<u8>`.
12///
13/// Supports >From  quoting as defined in the [QMail mbox specification](http://qmail.org/qmail-manual-html/man5/mbox.html).
14pub struct MessageIterator<T> {
15    reader: T,
16    message: Option<Message>,
17}
18
19/// Mbox message contents and metadata
20#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
21pub struct Message {
22    internal_date: u64,
23    from: String,
24    contents: Vec<u8>,
25}
26
27impl<T> MessageIterator<T>
28where
29    T: BufRead,
30{
31    pub fn new(reader: T) -> MessageIterator<T> {
32        MessageIterator {
33            reader,
34            message: None,
35        }
36    }
37}
38
39impl<T> Iterator for MessageIterator<T>
40where
41    T: BufRead,
42{
43    type Item = std::io::Result<Message>;
44
45    fn next(&mut self) -> Option<Self::Item> {
46        let mut message_line = Vec::with_capacity(80);
47
48        loop {
49            match self.reader.read_until(b'\n', &mut message_line) {
50                Ok(0) => return self.message.take().map(Ok),
51                Ok(_) => {}
52                Err(e) => return Some(Err(e)),
53            }
54
55            let is_from = message_line.starts_with(b"From ");
56
57            if is_from {
58                let message = self.message.take().map(Ok);
59                self.message =
60                    Message::new(std::str::from_utf8(&message_line).unwrap_or("")).into();
61                if message.is_some() {
62                    return message;
63                }
64                message_line.clear();
65                continue;
66            }
67
68            if let Some(message) = &mut self.message {
69                if message_line[0] != b'>' {
70                    message.contents.extend_from_slice(&message_line);
71                    message_line.clear();
72                    continue;
73                }
74                // can become split_once once slice_split_once becomes stable
75                let i = message_line
76                    .iter()
77                    .position(|&ch| ch != b'>')
78                    .unwrap_or(message_line.len());
79                if message_line[i..].starts_with(b"From ") {
80                    message.contents.extend_from_slice(&message_line[1..]);
81                } else {
82                    message.contents.extend_from_slice(&message_line);
83                }
84            }
85            message_line.clear();
86        }
87    }
88}
89
90impl Message {
91    fn new(hdr: &str) -> Self {
92        let (internal_date, from) = if let Some((from, date)) = hdr
93            .strip_prefix("From ")
94            .and_then(|hdr| hdr.split_once(' '))
95        {
96            let mut dt = DateTime {
97                year: u16::MAX,
98                month: u8::MAX,
99                day: u8::MAX,
100                hour: u8::MAX,
101                minute: u8::MAX,
102                second: u8::MAX,
103                tz_before_gmt: false,
104                tz_hour: 0,
105                tz_minute: 0,
106            };
107
108            for (pos, part) in date.split_whitespace().enumerate() {
109                match pos {
110                    1 => {
111                        dt.month = if part.eq_ignore_ascii_case("jan") {
112                            1
113                        } else if part.eq_ignore_ascii_case("feb") {
114                            2
115                        } else if part.eq_ignore_ascii_case("mar") {
116                            3
117                        } else if part.eq_ignore_ascii_case("apr") {
118                            4
119                        } else if part.eq_ignore_ascii_case("may") {
120                            5
121                        } else if part.eq_ignore_ascii_case("jun") {
122                            6
123                        } else if part.eq_ignore_ascii_case("jul") {
124                            7
125                        } else if part.eq_ignore_ascii_case("aug") {
126                            8
127                        } else if part.eq_ignore_ascii_case("sep") {
128                            9
129                        } else if part.eq_ignore_ascii_case("oct") {
130                            10
131                        } else if part.eq_ignore_ascii_case("nov") {
132                            11
133                        } else if part.eq_ignore_ascii_case("dec") {
134                            12
135                        } else {
136                            u8::MAX
137                        };
138                    }
139                    2 => {
140                        dt.day = part.parse().unwrap_or(u8::MAX);
141                    }
142                    3 => {
143                        for (pos, part) in part.split(':').enumerate() {
144                            match pos {
145                                0 => {
146                                    dt.hour = part.parse().unwrap_or(u8::MAX);
147                                }
148                                1 => {
149                                    dt.minute = part.parse().unwrap_or(u8::MAX);
150                                }
151                                2 => {
152                                    dt.second = part.parse().unwrap_or(u8::MAX);
153                                }
154                                _ => {
155                                    break;
156                                }
157                            }
158                        }
159                    }
160                    4 => {
161                        dt.year = part.parse().unwrap_or(u16::MAX);
162                    }
163                    _ => (),
164                }
165            }
166
167            (
168                if dt.is_valid() {
169                    dt.to_timestamp() as u64
170                } else {
171                    0
172                },
173                from.trim().to_string(),
174            )
175        } else {
176            (0, "".to_string())
177        };
178
179        Self {
180            internal_date,
181            from,
182            contents: Vec::with_capacity(1024),
183        }
184    }
185
186    /// Returns the message creation date in UTC seconds since UNIX epoch
187    pub fn internal_date(&self) -> u64 {
188        self.internal_date
189    }
190
191    /// Returns the message sender address
192    pub fn from(&self) -> &str {
193        &self.from
194    }
195
196    /// Returns the message contents
197    pub fn contents(&self) -> &[u8] {
198        &self.contents
199    }
200
201    /// Unwraps the message contents
202    pub fn unwrap_contents(self) -> Vec<u8> {
203        self.contents
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use crate::mailbox::mbox::Message;
210
211    use super::MessageIterator;
212
213    #[test]
214    fn parse_mbox() {
215        let message = br#"From god@heaven.af.mil Sat Jan  3 01:05:34 1996
216Message 1
217
218From cras@irccrew.org  Tue Jul 23 19:39:23 2002
219Message 2
220
221From test@test.com Tue Aug  6 13:34:34 2002
222Message 3
223>From hello
224>>From world
225>>>From test
226
227From other@domain.com Mon Jan 15  15:30:00  2018
228Message 4
229> From
230>F
231"#;
232
233        let parser = MessageIterator::new(&message[..]);
234        let expected_messages = vec![
235            Message {
236                internal_date: 820631134,
237                from: "god@heaven.af.mil".to_string(),
238                contents: b"Message 1\n\n".to_vec(),
239            },
240            Message {
241                internal_date: 1027453163,
242                from: "cras@irccrew.org".to_string(),
243                contents: b"Message 2\n\n".to_vec(),
244            },
245            Message {
246                internal_date: 1028640874,
247                from: "test@test.com".to_string(),
248                contents: b"Message 3\nFrom hello\n>From world\n>>From test\n\n".to_vec(),
249            },
250            Message {
251                internal_date: 1516030200,
252                from: "other@domain.com".to_string(),
253                contents: b"Message 4\n> From\n>F\n".to_vec(),
254            },
255        ];
256
257        for (message, expected_messages) in parser.zip(expected_messages) {
258            assert_eq!(message.unwrap(), expected_messages);
259        }
260    }
261}