mail_parser/mailbox/
mbox.rs1use crate::DateTime;
8use std::io::BufRead;
9
10pub struct MessageIterator<T> {
15 reader: T,
16 message: Option<Message>,
17}
18
19#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
21pub struct Message {
22 internal_date: u64,
23 from: String,
24 contents: Vec<u8>,
25}
26
27impl<T> MessageIterator<T>
28where
29 T: BufRead,
30{
31 pub fn new(reader: T) -> MessageIterator<T> {
32 MessageIterator {
33 reader,
34 message: None,
35 }
36 }
37}
38
39impl<T> Iterator for MessageIterator<T>
40where
41 T: BufRead,
42{
43 type Item = std::io::Result<Message>;
44
45 fn next(&mut self) -> Option<Self::Item> {
46 let mut message_line = Vec::with_capacity(80);
47
48 loop {
49 match self.reader.read_until(b'\n', &mut message_line) {
50 Ok(0) => return self.message.take().map(Ok),
51 Ok(_) => {}
52 Err(e) => return Some(Err(e)),
53 }
54
55 let is_from = message_line.starts_with(b"From ");
56
57 if is_from {
58 let message = self.message.take().map(Ok);
59 self.message =
60 Message::new(std::str::from_utf8(&message_line).unwrap_or("")).into();
61 if message.is_some() {
62 return message;
63 }
64 message_line.clear();
65 continue;
66 }
67
68 if let Some(message) = &mut self.message {
69 if message_line[0] != b'>' {
70 message.contents.extend_from_slice(&message_line);
71 message_line.clear();
72 continue;
73 }
74 let i = message_line
76 .iter()
77 .position(|&ch| ch != b'>')
78 .unwrap_or(message_line.len());
79 if message_line[i..].starts_with(b"From ") {
80 message.contents.extend_from_slice(&message_line[1..]);
81 } else {
82 message.contents.extend_from_slice(&message_line);
83 }
84 }
85 message_line.clear();
86 }
87 }
88}
89
90impl Message {
91 fn new(hdr: &str) -> Self {
92 let (internal_date, from) = if let Some((from, date)) = hdr
93 .strip_prefix("From ")
94 .and_then(|hdr| hdr.split_once(' '))
95 {
96 let mut dt = DateTime {
97 year: u16::MAX,
98 month: u8::MAX,
99 day: u8::MAX,
100 hour: u8::MAX,
101 minute: u8::MAX,
102 second: u8::MAX,
103 tz_before_gmt: false,
104 tz_hour: 0,
105 tz_minute: 0,
106 };
107
108 for (pos, part) in date.split_whitespace().enumerate() {
109 match pos {
110 1 => {
111 dt.month = if part.eq_ignore_ascii_case("jan") {
112 1
113 } else if part.eq_ignore_ascii_case("feb") {
114 2
115 } else if part.eq_ignore_ascii_case("mar") {
116 3
117 } else if part.eq_ignore_ascii_case("apr") {
118 4
119 } else if part.eq_ignore_ascii_case("may") {
120 5
121 } else if part.eq_ignore_ascii_case("jun") {
122 6
123 } else if part.eq_ignore_ascii_case("jul") {
124 7
125 } else if part.eq_ignore_ascii_case("aug") {
126 8
127 } else if part.eq_ignore_ascii_case("sep") {
128 9
129 } else if part.eq_ignore_ascii_case("oct") {
130 10
131 } else if part.eq_ignore_ascii_case("nov") {
132 11
133 } else if part.eq_ignore_ascii_case("dec") {
134 12
135 } else {
136 u8::MAX
137 };
138 }
139 2 => {
140 dt.day = part.parse().unwrap_or(u8::MAX);
141 }
142 3 => {
143 for (pos, part) in part.split(':').enumerate() {
144 match pos {
145 0 => {
146 dt.hour = part.parse().unwrap_or(u8::MAX);
147 }
148 1 => {
149 dt.minute = part.parse().unwrap_or(u8::MAX);
150 }
151 2 => {
152 dt.second = part.parse().unwrap_or(u8::MAX);
153 }
154 _ => {
155 break;
156 }
157 }
158 }
159 }
160 4 => {
161 dt.year = part.parse().unwrap_or(u16::MAX);
162 }
163 _ => (),
164 }
165 }
166
167 (
168 if dt.is_valid() {
169 dt.to_timestamp() as u64
170 } else {
171 0
172 },
173 from.trim().to_string(),
174 )
175 } else {
176 (0, "".to_string())
177 };
178
179 Self {
180 internal_date,
181 from,
182 contents: Vec::with_capacity(1024),
183 }
184 }
185
186 pub fn internal_date(&self) -> u64 {
188 self.internal_date
189 }
190
191 pub fn from(&self) -> &str {
193 &self.from
194 }
195
196 pub fn contents(&self) -> &[u8] {
198 &self.contents
199 }
200
201 pub fn unwrap_contents(self) -> Vec<u8> {
203 self.contents
204 }
205}
206
207#[cfg(test)]
208mod tests {
209 use crate::mailbox::mbox::Message;
210
211 use super::MessageIterator;
212
213 #[test]
214 fn parse_mbox() {
215 let message = br#"From god@heaven.af.mil Sat Jan 3 01:05:34 1996
216Message 1
217
218From cras@irccrew.org Tue Jul 23 19:39:23 2002
219Message 2
220
221From test@test.com Tue Aug 6 13:34:34 2002
222Message 3
223>From hello
224>>From world
225>>>From test
226
227From other@domain.com Mon Jan 15 15:30:00 2018
228Message 4
229> From
230>F
231"#;
232
233 let parser = MessageIterator::new(&message[..]);
234 let expected_messages = vec![
235 Message {
236 internal_date: 820631134,
237 from: "god@heaven.af.mil".to_string(),
238 contents: b"Message 1\n\n".to_vec(),
239 },
240 Message {
241 internal_date: 1027453163,
242 from: "cras@irccrew.org".to_string(),
243 contents: b"Message 2\n\n".to_vec(),
244 },
245 Message {
246 internal_date: 1028640874,
247 from: "test@test.com".to_string(),
248 contents: b"Message 3\nFrom hello\n>From world\n>>From test\n\n".to_vec(),
249 },
250 Message {
251 internal_date: 1516030200,
252 from: "other@domain.com".to_string(),
253 contents: b"Message 4\n> From\n>F\n".to_vec(),
254 },
255 ];
256
257 for (message, expected_messages) in parser.zip(expected_messages) {
258 assert_eq!(message.unwrap(), expected_messages);
259 }
260 }
261}