1use std::borrow::Cow;
8
9use crate::{
10 decoders::{charsets::map::charset_decoder, DecodeFnc},
11 ContentType, Encoding, GetHeader, HeaderName, HeaderValue, Message, MessageParser, MessagePart,
12 MessagePartId, PartType,
13};
14
15use super::MessageStream;
16
17const MAX_NESTED_ENCODED: usize = 3;
18
19#[derive(Debug, PartialEq, Default)]
20enum MimeType {
21 MultipartMixed,
22 MultipartAlternative,
23 MultipartRelated,
24 MultipartDigest,
25 TextPlain,
26 TextHtml,
27 TextOther,
28 Inline,
29 #[default]
30 Message,
31 Other,
32}
33
34#[inline(always)]
35fn mime_type(
36 content_type: Option<&ContentType<'_>>,
37 parent_content_type: &MimeType,
38) -> (bool, bool, bool, MimeType) {
39 if let Some(content_type) = content_type {
40 match content_type.ctype() {
41 "multipart" => (
42 true,
43 false,
44 false,
45 match content_type.subtype() {
46 Some("mixed") => MimeType::MultipartMixed,
47 Some("alternative") => MimeType::MultipartAlternative,
48 Some("related") => MimeType::MultipartRelated,
49 Some("digest") => MimeType::MultipartDigest,
50 _ => MimeType::Other,
51 },
52 ),
53 "text" => match content_type.subtype() {
54 Some("plain") => (false, true, true, MimeType::TextPlain),
55 Some("html") => (false, true, true, MimeType::TextHtml),
56 _ => (false, false, true, MimeType::TextOther),
57 },
58 "image" | "audio" | "video" => (false, true, false, MimeType::Inline),
59 "message" if [Some("rfc822"), Some("global")].contains(&content_type.subtype()) => {
60 (false, false, false, MimeType::Message)
61 }
62 _ => (false, false, false, MimeType::Other),
63 }
64 } else if let MimeType::MultipartDigest = parent_content_type {
65 (false, false, false, MimeType::Message)
66 } else {
67 (false, true, true, MimeType::TextPlain)
68 }
69}
70
71#[derive(Default, Debug)]
72struct MessageParserState {
73 mime_type: MimeType,
74 mime_boundary: Option<Vec<u8>>,
75 in_alternative: bool,
76 parts: usize,
77 html_parts: usize,
78 text_parts: usize,
79 need_html_body: bool,
80 need_text_body: bool,
81 part_id: MessagePartId,
82 sub_part_ids: Vec<MessagePartId>,
83 offset_header: usize,
84 offset_body: usize,
85 offset_end: usize,
86}
87
88impl MessageParserState {
89 fn new() -> MessageParserState {
90 MessageParserState {
91 mime_type: MimeType::Message,
92 mime_boundary: None,
93 in_alternative: false,
94 parts: 0,
95 html_parts: 0,
96 text_parts: 0,
97 need_text_body: true,
98 need_html_body: true,
99 ..Default::default()
100 }
101 }
102}
103
104impl MessageParser {
105 pub fn parse<'x>(&self, raw_message: &'x (impl AsRef<[u8]> + ?Sized)) -> Option<Message<'x>> {
112 self.parse_(raw_message.as_ref(), MAX_NESTED_ENCODED, false)
113 }
114
115 pub fn parse_headers<'x>(
118 &self,
119 raw_message: &'x (impl AsRef<[u8]> + ?Sized),
120 ) -> Option<Message<'x>> {
121 self.parse_(raw_message.as_ref(), MAX_NESTED_ENCODED, true)
122 }
123
124 fn parse_<'x>(
125 &self,
126 raw_message: &'x [u8],
127 depth: usize,
128 skip_body: bool,
129 ) -> Option<Message<'x>> {
130 let mut stream = MessageStream::new(raw_message);
131
132 let mut message = Message::new();
133
134 let mut state = MessageParserState::new();
135 let mut state_stack = Vec::with_capacity(4);
136
137 let mut part_headers = Vec::new();
138
139 'outer: loop {
140 state.offset_header = stream.offset();
142 if !stream.parse_headers(self, &mut part_headers) {
143 break;
144 }
145 state.offset_body = stream.offset();
146 if skip_body {
147 break;
148 }
149
150 state.parts += 1;
151 state.sub_part_ids.push(message.parts.len() as u32);
152
153 let content_type = part_headers
154 .header_value(&HeaderName::ContentType)
155 .and_then(|c| c.as_content_type());
156
157 let (is_multipart, mut is_inline, mut is_text, mut mime_type) =
158 mime_type(content_type, &state.mime_type);
159
160 if is_multipart {
161 if let Some(mime_boundary) = content_type.and_then(|f| f.attribute("boundary")) {
162 if stream.seek_next_part(mime_boundary.as_bytes()) {
163 let part_id = message.parts.len();
164 let new_state = MessageParserState {
165 in_alternative: state.in_alternative
166 || mime_type == MimeType::MultipartAlternative,
167 mime_type,
168 mime_boundary: mime_boundary.as_bytes().to_vec().into(),
169 html_parts: message.html_body.len(),
170 text_parts: message.text_body.len(),
171 need_html_body: state.need_html_body,
172 need_text_body: state.need_text_body,
173 part_id: part_id as u32,
174 ..Default::default()
175 };
176 message.parts.push(MessagePart {
178 headers: std::mem::take(&mut part_headers),
179 offset_header: state.offset_header as u32,
180 offset_body: state.offset_body as u32,
181 offset_end: 0,
182 is_encoding_problem: false,
183 encoding: Encoding::None,
184 body: PartType::default(),
185 });
186 state_stack.push((state, None));
187 state = new_state;
188 stream.skip_crlf();
189 continue;
190 } else {
191 mime_type = MimeType::TextOther;
192 is_text = true;
193 }
194 }
195 }
196
197 let (mut encoding, decode_fnc): (Encoding, DecodeFnc<'_>) = match part_headers
198 .header_value(&HeaderName::ContentTransferEncoding)
199 {
200 Some(HeaderValue::Text(encoding)) if encoding.eq_ignore_ascii_case("base64") => {
201 (Encoding::Base64, MessageStream::decode_base64_mime)
202 }
203 Some(HeaderValue::Text(encoding))
204 if encoding.eq_ignore_ascii_case("quoted-printable") =>
205 {
206 (
207 Encoding::QuotedPrintable,
208 MessageStream::decode_quoted_printable_mime,
209 )
210 }
211 _ => (Encoding::None, MessageStream::mime_part),
212 };
213
214 if mime_type == MimeType::Message && encoding == Encoding::None {
215 let new_state = MessageParserState {
216 mime_type: MimeType::Message,
217 mime_boundary: state.mime_boundary.take(),
218 need_html_body: true,
219 need_text_body: true,
220 part_id: message.parts.len() as u32,
221 ..Default::default()
222 };
223 message.attachments.push(message.parts.len() as u32);
224 message.parts.push(MessagePart {
225 headers: std::mem::take(&mut part_headers),
226 encoding,
227 is_encoding_problem: false,
228 offset_header: state.offset_header as u32,
229 offset_body: state.offset_body as u32,
230 offset_end: 0,
231 body: PartType::default(), });
233 state_stack.push((state, message.into()));
234 message = Message::new();
235 state = new_state;
236 continue;
237 }
238
239 let (offset_end, mut bytes) = decode_fnc(
240 &mut stream,
241 state.mime_boundary.as_deref().unwrap_or(&b""[..]),
242 );
243
244 let mut is_encoding_problem = offset_end == usize::MAX;
246 if is_encoding_problem {
247 encoding = Encoding::None;
248 if mime_type != MimeType::TextPlain {
249 mime_type = MimeType::TextOther;
250 }
251 is_inline = false;
252 is_text = true;
253
254 let (offset_end, boundary_found) =
255 stream.seek_part_end(state.mime_boundary.as_deref());
256 state.offset_end = offset_end;
257 bytes = stream.data[state.offset_body..state.offset_end].into();
258
259 if !boundary_found {
260 state.mime_boundary = None;
261 }
262 } else {
263 state.offset_end = offset_end;
264 }
265
266 let body_part = if mime_type != MimeType::Message {
267 let is_inline = is_inline
268 && part_headers
269 .header_value(&HeaderName::ContentDisposition)
270 .is_none_or(|d| !d.as_content_type().is_some_and(|ct| ct.is_attachment()))
271 && (state.parts == 1
272 || state.mime_type != MimeType::MultipartRelated
273 && (mime_type == MimeType::Inline
274 || content_type.is_none_or(|c| !c.has_attribute("name"))));
275
276 let is_inline = is_inline
279 || state.parts == 1
280 && state.mime_type == MimeType::Message
281 && mime_type == MimeType::TextPlain
282 && is_encoding_problem;
283
284 let (add_to_html, add_to_text) =
285 if let MimeType::MultipartAlternative = state.mime_type {
286 match mime_type {
287 MimeType::TextHtml => (true, false),
288 MimeType::TextPlain => (false, true),
289 _ => (false, false),
290 }
291 } else if is_inline {
292 if state.in_alternative && (state.need_text_body || state.need_html_body) {
293 match mime_type {
294 MimeType::TextHtml => {
295 state.need_text_body = false;
296 }
297 MimeType::TextPlain => {
298 state.need_html_body = false;
299 }
300 _ => (),
301 }
302 }
303 (state.need_html_body, state.need_text_body)
304 } else {
305 (false, false)
306 };
307
308 if add_to_html {
309 message.html_body.push(message.parts.len() as u32);
310 }
311 if add_to_text {
312 message.text_body.push(message.parts.len() as u32);
313 }
314
315 if is_text {
316 let text = match (
317 bytes,
318 content_type.and_then(|ct| {
319 ct.attribute("charset")
320 .and_then(|c| charset_decoder(c.as_bytes()))
321 }),
322 ) {
323 (Cow::Owned(vec), Some(charset_decoder)) => charset_decoder(&vec).into(),
324 (Cow::Owned(vec), None) => String::from_utf8(vec)
325 .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
326 .into(),
327 (Cow::Borrowed(bytes), Some(charset_decoder)) => {
328 charset_decoder(bytes).into()
329 }
330 (Cow::Borrowed(bytes), None) => String::from_utf8_lossy(bytes),
331 };
332
333 let is_html = mime_type == MimeType::TextHtml;
334
335 if !add_to_html && is_html || !add_to_text && !is_html {
336 message.attachments.push(message.parts.len() as u32);
337 }
338
339 if is_html {
340 PartType::Html(text)
341 } else {
342 PartType::Text(text)
343 }
344 } else {
345 message.attachments.push(message.parts.len() as u32);
346
347 if !is_inline {
348 PartType::Binary(bytes)
349 } else {
350 PartType::InlineBinary(bytes)
351 }
352 }
353 } else {
354 message.attachments.push(message.parts.len() as u32);
355
356 if depth != 0 {
357 if let Some(nested_message) = self.parse_(bytes.as_ref(), depth - 1, false) {
358 PartType::Message(Message {
359 html_body: nested_message.html_body,
360 text_body: nested_message.text_body,
361 attachments: nested_message.attachments,
362 parts: nested_message
363 .parts
364 .into_iter()
365 .map(|p| p.into_owned())
366 .collect(),
367 raw_message: bytes.into_owned().into(),
368 })
369 } else {
370 is_encoding_problem = true;
371 PartType::Binary(bytes)
372 }
373 } else {
374 is_encoding_problem = true;
375 PartType::Binary(bytes)
376 }
377 };
378
379 message.parts.push(MessagePart {
381 headers: std::mem::take(&mut part_headers),
382 encoding,
383 is_encoding_problem,
384 body: body_part,
385 offset_header: state.offset_header as u32,
386 offset_body: state.offset_body as u32,
387 offset_end: state.offset_end as u32,
388 });
389
390 if state.mime_boundary.is_some() {
391 'inner: loop {
393 if let MimeType::Message = state.mime_type {
394 if let Some((mut prev_state, Some(mut prev_message))) = state_stack.pop() {
396 let offset_end = state
397 .mime_boundary
398 .as_ref()
399 .map(|b| {
400 let pos = stream.offset().saturating_sub(b.len() + 2);
401 stream.data.get(pos - 2).map_or(pos - 1, |&ch| {
402 if ch == b'\r' {
403 pos - 2
404 } else {
405 pos - 1
406 }
407 })
408 })
409 .unwrap_or_else(|| stream.offset());
410 message.raw_message = raw_message.into();
411 if let Some(part) = prev_message.parts.get_mut(state.part_id as usize) {
414 part.body = PartType::Message(message);
415 part.offset_end = offset_end as u32;
416 } else {
417 debug_assert!(false, "Invalid part ID, could not find message.");
418 }
419
420 message = prev_message;
421 prev_state.mime_boundary = state.mime_boundary;
422 state = prev_state;
423 } else {
424 debug_assert!(false, "Failed to restore parent message. Aborting.");
425 break 'outer;
426 }
427 }
428
429 if stream.is_multipart_end() {
430 if MimeType::MultipartAlternative == state.mime_type
433 && state.need_html_body
434 && state.need_text_body
435 {
436 if state.text_parts == message.text_body.len()
438 && state.html_parts != message.html_body.len()
439 {
440 for &part_id in &message.html_body[state.html_parts..] {
441 message.text_body.push(part_id);
442 }
443 }
444
445 if state.html_parts == message.html_body.len()
447 && state.text_parts != message.text_body.len()
448 {
449 for &part_id in &message.text_body[state.html_parts..] {
450 message.html_body.push(part_id);
451 }
452 }
453 }
454
455 if let Some(part) = message.parts.get_mut(state.part_id as usize) {
456 part.body =
458 PartType::Multipart(std::mem::take(&mut state.sub_part_ids));
459
460 if let Some((prev_state, _)) = state_stack.pop() {
462 state = prev_state;
463
464 if let Some(ref mime_boundary) = state.mime_boundary {
465 if let Some(offset) =
467 stream.seek_next_part_offset(mime_boundary)
468 {
469 part.offset_end = offset as u32;
470 continue 'inner;
471 }
472 }
473 }
474
475 part.offset_end = stream.offset() as u32;
477 } else {
478 debug_assert!(false, "Invalid part ID, could not find multipart.");
479 }
480
481 break 'outer;
482 } else {
483 break 'inner;
485 }
486 }
487 } else if stream.offset() >= stream.data.len() {
488 break 'outer;
489 }
490 }
491
492 while let Some((prev_state, prev_message)) = state_stack.pop() {
494 if let Some(mut prev_message) = prev_message {
495 message.raw_message = raw_message.into(); if let Some(part) = prev_message.parts.get_mut(state.part_id as usize) {
498 part.body = PartType::Message(message);
499 part.offset_end = stream.offset() as u32;
500 } else {
501 debug_assert!(false, "Invalid part ID, could not find message.");
502 }
503
504 message = prev_message;
505 } else if let Some(part) = message.parts.get_mut(state.part_id as usize) {
506 part.offset_end = stream.offset() as u32;
507 part.body = PartType::Multipart(state.sub_part_ids);
508 } else {
509 debug_assert!(false, "This should not have happened.");
510 }
511 state = prev_state;
512 }
513
514 message.raw_message = raw_message.into();
515
516 if !message.is_empty() {
517 message.parts[0].offset_end = message.raw_message.len() as u32;
518 Some(message)
519 } else if !part_headers.is_empty() {
520 message.parts.push(MessagePart {
522 headers: part_headers,
523 encoding: Encoding::None,
524 is_encoding_problem: true,
525 body: PartType::Text("".into()),
526 offset_header: 0,
527 offset_body: message.raw_message.len() as u32,
528 offset_end: message.raw_message.len() as u32,
529 });
530 Some(message)
531 } else {
532 None
533 }
534 }
535}
536
537impl<'x> Message<'x> {
538 fn new() -> Message<'x> {
539 Message {
540 ..Default::default()
541 }
542 }
543
544 pub fn is_empty(&self) -> bool {
546 self.parts.is_empty()
547 }
548}
549
550#[cfg(test)]
551mod tests {
552 use std::{fs, path::PathBuf};
553
554 use crate::MessageParser;
555
556 #[test]
557 fn parse_full_messages() {
558 for test_suite in ["rfc", "legacy", "thirdparty", "malformed"] {
559 let test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
560 .join("resources")
561 .join("eml")
562 .join(test_suite);
563
564 let mut tests_run = 0;
565
566 for file_name in fs::read_dir(&test_dir).unwrap() {
567 let mut file_name = file_name.unwrap().path();
568 if file_name.extension().is_some_and(|e| e == "eml") {
569 let raw_original = fs::read(&file_name).unwrap();
570 tests_run += 1;
571
572 let raw_message = strip_crlf(&raw_original);
574 file_name.set_extension("json");
575 let expected_result = fs::read(&file_name).unwrap();
576
577 let message = MessageParser::default().parse(&raw_message).unwrap();
578 let json_message = serde_json::to_string_pretty(&message).unwrap();
579
580 let _: super::Message<'_> = serde_json::from_str(&json_message).unwrap();
582
583 if json_message.as_bytes() != expected_result {
584 file_name.set_extension("failed");
585 fs::write(&file_name, json_message.as_bytes()).unwrap();
586 panic!(
587 "Test failed, parsed message saved to {}",
588 file_name.display()
589 );
590 }
591
592 let raw_message = add_crlf(&raw_original);
594 file_name.set_extension("crlf.json");
595 let expected_result = fs::read(&file_name).unwrap();
596
597 let message = MessageParser::default().parse(&raw_message).unwrap();
598 let json_message = serde_json::to_string_pretty(&message).unwrap();
599
600 if json_message.as_bytes() != expected_result {
601 file_name.set_extension("failed");
602 fs::write(&file_name, json_message.as_bytes()).unwrap();
603 panic!(
604 "Test failed, parsed message saved to {}",
605 file_name.display()
606 );
607 }
608 }
609 }
610
611 assert!(
612 tests_run > 0,
613 "Did not find any tests to run in folder {}.",
614 test_dir.display()
615 );
616 }
617 }
618
619 fn add_crlf(bytes: &[u8]) -> Vec<u8> {
620 let mut result = Vec::with_capacity(bytes.len());
621 let mut last_ch = 0;
622 for &ch in bytes {
623 if ch == b'\n' && last_ch != b'\r' {
624 result.push(b'\r');
625 }
626 result.push(ch);
627 last_ch = ch;
628 }
629
630 result
631 }
632
633 fn strip_crlf(bytes: &[u8]) -> Vec<u8> {
634 let mut result = Vec::with_capacity(bytes.len());
635 for &ch in bytes {
636 if !ch != b'\r' {
637 result.push(ch);
638 }
639 }
640
641 result
642 }
643}