#![allow(clippy::unwrap_used, clippy::expect_used)]
use super::{parse_email, parse_headers_only};
use super::wire::{parse_headers, split_header_body, split_mime_parts};
#[allow(unused_imports)]
use super::interpret::{
contains_at_outside_quotes, decode_body, decode_encoded_words, decode_hex_pair,
decode_q_encoding, decode_quoted_printable, decode_transfer_encoding, extract_comment_text,
extract_filename, extract_mime_type, extract_param, extract_rfc2231_continuation,
extract_rfc2231_param, find_closing_quote, find_param_value, hex_digit, is_disposition_type,
is_inside_quotes, normalize_display_name_phrase, parse_address_list, parse_rfc5322_date,
parse_single_address, parse_timezone, parse_year, percent_decode, strip_comments,
strip_outer_quotes, unescape_quoted_string,
};
use crate::error::Error;
#[allow(unused_imports)]
use crate::types::Address;
#[test]
fn parse_simple_text_email() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <abc123@example.com>\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Hello, World!";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.to[0].email, "recipient@example.com");
assert_eq!(parsed.subject.as_deref(), Some("Test"));
assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
assert_eq!(parsed.body_text.as_deref(), Some("Hello, World!"));
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_multipart_alternative() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Multi\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: multipart/alternative; boundary=\"bound42\"\r\n\
\r\n\
--bound42\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Plain text body\r\n\
--bound42\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<html><body>HTML body</body></html>\r\n\
--bound42--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Plain text body"));
assert_eq!(
parsed.body_html.as_deref(),
Some("<html><body>HTML body</body></html>")
);
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_encoded_words_base64_subject() {
let raw = b"From: sender@example.com\r\n\
Subject: =?UTF-8?B?SGVsbG8gV29ybGQ=?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_encoded_words_q_subject() {
let raw = b"From: sender@example.com\r\n\
Subject: =?UTF-8?Q?Hello_World?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_encoded_words_in_display_name() {
let raw = b"From: =?UTF-8?B?Sm9obiBEb2U=?= <john@example.com>\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("John Doe"));
assert_eq!(parsed.from[0].email, "john@example.com");
}
#[test]
fn parse_non_utf8_charset() {
let raw = b"From: sender@example.com\r\n\
Subject: =?ISO-8859-1?Q?H=E9llo?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Héllo"));
}
#[test]
fn parse_message_id_strips_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: <unique-id@host.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.message_id.as_deref(), Some("unique-id@host.com"));
}
#[test]
fn parse_in_reply_to_multiple() {
let raw = b"From: a@b.com\r\n\
In-Reply-To: <first@host> <second@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.in_reply_to.len(), 2);
assert_eq!(parsed.in_reply_to[0], "first@host");
assert_eq!(parsed.in_reply_to[1], "second@host");
}
#[test]
fn parse_references_all_ids() {
let raw = b"From: a@b.com\r\n\
References: <ref1@host> <ref2@host> <ref3@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.references.len(), 3);
assert_eq!(parsed.references[0], "ref1@host");
assert_eq!(parsed.references[1], "ref2@host");
assert_eq!(parsed.references[2], "ref3@host");
}
#[test]
fn parse_in_reply_to_all_ids() {
let raw = b"From: a@b.com\r\n\
In-Reply-To: <id1@a.com> <id2@b.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.in_reply_to.len(), 2);
assert_eq!(parsed.in_reply_to[0], "id1@a.com");
assert_eq!(parsed.in_reply_to[1], "id2@b.com");
}
#[test]
fn parse_empty_top_level_attachment_is_preserved() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: application/octet-stream\r\n\
Content-Disposition: attachment; filename=\"empty.bin\"\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments.len(),
1,
"empty top-level attachment must not be dropped"
);
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("empty.bin"));
assert_eq!(parsed.attachments[0].size, Some(0));
assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
}
#[test]
fn parse_date_with_numeric_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0530\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
assert_eq!(date.hour, 15);
assert_eq!(date.minute, 47);
assert_eq!(date.second, 33);
assert_eq!(date.tz_offset_minutes, 330);
}
#[test]
fn parse_date_named_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 10:30:00 EST\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.tz_offset_minutes, -300);
}
#[test]
fn parse_address_with_display_name() {
let raw = b"From: \"John Doe\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("John Doe"));
assert_eq!(parsed.from[0].email, "john@example.com");
}
#[test]
fn parse_multiple_recipients() {
let raw = b"From: a@b.com\r\n\
To: one@x.com, \"Two\" <two@x.com>, three@x.com\r\n\
Cc: cc1@x.com, cc2@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 3);
assert_eq!(parsed.to[1].name.as_deref(), Some("Two"));
assert_eq!(parsed.cc.len(), 2);
}
#[test]
fn parse_multipart_with_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: multipart/mixed; boundary=\"mixbound\"\r\n\
\r\n\
--mixbound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Message body\r\n\
--mixbound\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF_CONTENT_HERE\r\n\
--mixbound--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Message body"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
assert_eq!(parsed.attachments[0].content_type, "application/pdf");
assert!(!parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}
#[test]
fn parse_inline_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"bound\"\r\n\
\r\n\
--bound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--bound\r\n\
Content-Type: image/png\r\n\
Content-Disposition: inline\r\n\
Content-ID: <img001>\r\n\
\r\n\
PNG_DATA\r\n\
--bound--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img001"));
}
#[test]
fn parse_headers_only_no_body() {
let raw = b"From: a@b.com\r\n\
Subject: Headers only\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Headers only"));
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
}
#[test]
fn parse_empty_input() {
let result = parse_email(b"");
assert!(matches!(result, Err(Error::EmptyInput)));
}
#[test]
fn parse_missing_from() {
let raw = b"Subject: No from\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).expect(
"messages without From should still parse when other headers are usable \
(consumer robustness / Postel's law)",
);
assert!(
parsed.from.is_empty(),
"missing From should produce an empty from list, got {:?}",
parsed.from
);
assert_eq!(parsed.subject.as_deref(), Some("No from"));
}
#[test]
fn parse_body_only_message_without_headers() {
let raw = b"Hello from the body only parser path";
let parsed = parse_email(raw).expect(
"body-only top-level messages should still parse so consumers can \
inspect truncated or malformed maildrops",
);
assert!(parsed.from.is_empty());
assert!(parsed.raw_headers.is_empty());
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello from the body only parser path")
);
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_blank_line_then_body_without_headers() {
let raw = b"\r\nBody after an empty top-level header block";
let parsed = parse_email(raw)
.expect("an empty top-level header block followed by body text should still parse");
assert!(parsed.from.is_empty());
assert!(parsed.raw_headers.is_empty());
assert_eq!(
parsed.body_text.as_deref(),
Some("Body after an empty top-level header block")
);
}
#[test]
fn parse_custom_headers_without_well_known_headers() {
let raw = b"X-Trace: 12345\r\n\
List-Id: Example List <list.example>\r\n\
\r\n\
body";
let parsed = parse_email(raw).expect(
"messages with only optional fields should still parse so consumers \
can inspect extra headers and body content",
);
assert!(parsed.from.is_empty());
assert!(parsed.subject.is_none());
assert_eq!(parsed.body_text.as_deref(), Some("body"));
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "x-trace" && value == "12345"),
"X-Trace must be preserved in extra_headers"
);
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "list-id" && value == "Example List <list.example>"),
"List-Id must be preserved in extra_headers"
);
}
#[test]
fn parse_quoted_printable_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable\r\n\
\r\n\
Hello=20World=0D=0ASoft=\r\n break";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World\r\nSoft break")
);
}
#[test]
fn parse_base64_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}
#[test]
fn parse_nested_multipart_section_numbers() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/alternative; boundary=\"inner\"\r\n\
\r\n\
--inner\r\n\
Content-Type: text/plain\r\n\
\r\n\
Plain\r\n\
--inner\r\n\
Content-Type: text/html\r\n\
\r\n\
<b>HTML</b>\r\n\
--inner--\r\n\
--outer\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
\r\n\
DATA\r\n\
--outer--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Plain"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>HTML</b>"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}
#[test]
fn parse_rfc2231_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("résumé.pdf")
);
}
#[test]
fn parse_raw_headers_preserved() {
let raw = b"From: a@b.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert!(parsed.raw_headers.contains("From: a@b.com"));
assert!(parsed.raw_headers.contains("Subject: Test"));
}
#[test]
fn parse_lf_only_line_endings() {
let raw = b"From: a@b.com\n\
Subject: LF\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\n\
\n\
Body with LF";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("LF"));
assert_eq!(parsed.body_text.as_deref(), Some("Body with LF"));
}
#[test]
fn parse_header_continuation_lines() {
let raw = b"From: a@b.com\r\nSubject: This is a very long\r\n subject line that wraps\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("This is a very long subject line that wraps")
);
}
#[test]
fn parse_garbage_input_best_effort() {
let result = parse_email(b"\x00\x01\x02\x03\xff\xfe");
assert!(result.is_err());
}
#[test]
fn parse_truncated_multipart() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"trunc\"\r\n\
\r\n\
--trunc\r\n\
Content-Type: text/plain\r\n\
\r\n\
Some text here";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Some text here"));
}
#[test]
fn decode_adjacent_encoded_words() {
let input = "=?UTF-8?B?SGVs?= =?UTF-8?B?bG8=?=";
let decoded = decode_encoded_words(input);
assert_eq!(decoded, "Hello");
}
#[test]
fn decode_iso8859_encoded_word() {
let input = "=?ISO-8859-1?Q?caf=E9?=";
let decoded = decode_encoded_words(input);
assert_eq!(decoded, "café");
}
#[test]
fn parse_date_without_seconds() {
let dt = parse_rfc5322_date("Thu, 13 Feb 2025 15:47 +0000").unwrap();
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 0);
}
#[test]
fn parse_two_digit_year() {
let dt = parse_rfc5322_date("13 Feb 99 12:00:00 +0000").unwrap();
assert_eq!(dt.year, 1999);
let dt = parse_rfc5322_date("13 Feb 25 12:00:00 +0000").unwrap();
assert_eq!(dt.year, 2025);
}
#[test]
fn parse_three_digit_year_rfc5322_section_4_3() {
let dt = parse_rfc5322_date("13 Feb 107 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2007,
"3-digit year 107 must map to 2007 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 100 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2000,
"3-digit year 100 must map to 2000 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 999 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2899,
"3-digit year 999 must map to 2899 per RFC 5322 Section 4.3"
);
}
#[test]
fn parse_two_digit_year_rfc5322_section_4_3_cutoff() {
let dt = parse_rfc5322_date("13 Feb 50 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 1950,
"2-digit year 50 must map to 1950 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 69 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 1969,
"2-digit year 69 must map to 1969 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 49 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2049,
"2-digit year 49 must map to 2049 per RFC 5322 Section 4.3"
);
}
#[test]
fn parse_non_text_part_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Text\r\n\
--b\r\n\
Content-Type: image/jpeg\r\n\
\r\n\
JPEG_DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
}
#[test]
fn parse_windows1252_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=windows-1252\r\n\
\r\n\
\x93Hello\x94";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.unwrap();
assert!(text.contains("Hello"));
assert!(text.contains('\u{201c}') || text.contains('\u{201d}'));
}
#[test]
fn parse_html_only_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<html><body>Hello</body></html>";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_none());
assert_eq!(
parsed.body_html.as_deref(),
Some("<html><body>Hello</body></html>")
);
}
#[test]
fn parse_bcc_addresses() {
let raw = b"From: a@b.com\r\n\
To: to@x.com\r\n\
Bcc: hidden@x.com, secret@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.bcc.len(), 2);
assert_eq!(parsed.bcc[0].email, "hidden@x.com");
}
#[test]
fn mime_depth_limit() {
let mut msg = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b0\"\r\n\r\n"
.to_vec();
for i in 0..70 {
msg.extend_from_slice(
format!(
"--b{i}\r\nContent-Type: multipart/mixed; boundary=\"b{}\"\r\n\r\n",
i + 1
)
.as_bytes(),
);
}
msg.extend_from_slice(b"--b70\r\nContent-Type: text/plain\r\n\r\nDeep\r\n--b70--\r\n");
let parsed = parse_email(&msg).unwrap();
assert!(parsed.body_text.is_none() || parsed.body_text.is_some());
}
#[test]
fn parse_reply_to() {
let raw = b"From: a@b.com\r\n\
Reply-To: noreply@example.com, support@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.reply_to.len(), 2);
assert_eq!(parsed.reply_to[0].email, "noreply@example.com");
assert_eq!(parsed.reply_to[1].email, "support@example.com");
}
#[test]
fn parse_gb2312_encoded_word() {
let raw = b"From: sender@example.com\r\n\
Subject: =?GB2312?B?xOO6ww==?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("你好"));
}
#[test]
fn parse_content_id_strips_brackets() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-ID: <cid:image001@01D00000.00000000>\r\n\
\r\n\
PNG\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments[0].content_id.as_deref(),
Some("cid:image001@01D00000.00000000")
);
}
#[test]
fn parse_attachment_without_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/octet-stream\r\n\
Content-Disposition: attachment\r\n\
\r\n\
BINARY\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].filename.is_none());
assert_eq!(
parsed.attachments[0].content_type,
"application/octet-stream"
);
assert!(!parsed.attachments[0].is_inline);
}
#[test]
fn parse_content_type_without_charset_defaults() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello ASCII";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}
#[test]
fn parse_mime_part_no_content_type_defaults_to_us_ascii() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Transfer-Encoding: 7bit\r\n\
\r\n\
Hello ASCII\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}
#[test]
fn parse_mime_part_no_headers_at_all() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
\r\n\
Headerless body\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Headerless body"));
}
#[test]
fn parse_multipart_only_attachments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"a.pdf\"\r\n\
\r\n\
PDF1\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-Disposition: attachment; filename=\"b.png\"\r\n\
\r\n\
PNG2\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert_eq!(parsed.attachments.len(), 2);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
assert_eq!(parsed.attachments[1].section.as_deref(), Some("2"));
}
#[test]
fn parse_unknown_charset_body_fallback() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=x-unknown-fake\r\n\
\r\n\
Plain text in unknown charset";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
assert!(parsed.body_text.unwrap().contains("Plain text"));
}
#[test]
fn parse_content_id_without_disposition_is_inline() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/gif\r\n\
Content-ID: <img42>\r\n\
\r\n\
GIF89a\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img42"));
}
#[test]
fn parse_overlong_subject() {
let long_subject = "A".repeat(10_000);
let raw = format!(
"From: a@b.com\r\n\
Subject: {long_subject}\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n"
);
let parsed = parse_email(raw.as_bytes()).unwrap();
assert_eq!(parsed.subject.as_deref(), Some(long_subject.as_str()));
}
#[test]
fn parse_multiple_from_preserves_all() {
let raw = b"From: Alice <alice@example.com>, Bob <bob@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from.len(),
2,
"RFC 5322 Section 3.6.2: all From mailboxes must be preserved"
);
assert_eq!(parsed.from[0].email, "alice@example.com");
assert_eq!(parsed.from[0].name.as_deref(), Some("Alice"));
assert_eq!(parsed.from[1].email, "bob@example.com");
assert_eq!(parsed.from[1].name.as_deref(), Some("Bob"));
}
#[test]
fn parse_multiple_from_takes_first() {
let raw = b"From: first@example.com, second@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.len(), 2);
assert_eq!(parsed.from[0].email, "first@example.com");
assert_eq!(parsed.from[1].email, "second@example.com");
}
#[test]
fn parse_multipart_no_boundary_param() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed\r\n\
\r\n\
Some text content";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_empty_body_after_headers() {
let raw = b"From: a@b.com\r\n\
Subject: Empty body\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Empty body"));
assert!(parsed.body_text.is_none());
}
#[test]
fn parse_mixed_charset_encoded_words() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?B?SGVsbG8=?= =?ISO-8859-1?Q?_caf=E9?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello caf\u{e9}"));
}
#[test]
fn parse_no_date_header() {
let raw = b"From: a@b.com\r\n\
Subject: No date\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert!(parsed.date.is_none());
assert_eq!(parsed.subject.as_deref(), Some("No date"));
}
#[test]
fn parse_explicit_attachment_text_plain() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body text\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
Content-Disposition: attachment; filename=\"log.txt\"\r\n\
\r\n\
Log file content\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
assert_eq!(parsed.attachments[0].content_type, "text/plain");
}
#[test]
fn parse_date_negative_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Fri, 14 Feb 2025 09:15:00 -0800\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.tz_offset_minutes, -480);
}
#[test]
fn parse_size_equals_input_length() {
let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_binary_garbage_returns_error() {
let garbage: Vec<u8> = (0..=255_u8).collect();
let result = parse_email(&garbage);
assert!(result.is_err());
}
#[test]
fn parse_folded_encoded_word_subject() {
let raw = b"From: a@b.com\r\nSubject: =?UTF-8?B?SGVsbG8=?=\r\n =?UTF-8?B?V29ybGQ=?=\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("HelloWorld"));
}
#[test]
fn parse_encoded_word_lowercase_encoding() {
let raw = b"From: sender@example.com\r\n\
Subject: =?utf-8?b?SGVsbG8=?= =?utf-8?q?_World?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_malformed_encoded_word_passthrough() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?B?broken\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert!(parsed.subject.unwrap().contains("=?"));
}
#[test]
fn parse_encoded_word_unknown_encoding_type() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?X?data?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert!(parsed.subject.unwrap().contains("=?"));
}
#[test]
fn parse_utf8_directly_in_headers_rfc6532() {
let raw = "From: José <jose@example.com>\r\n\
Subject: Ñoño café\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw.as_bytes()).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Ñoño café"));
assert_eq!(parsed.from[0].name.as_deref(), Some("José"));
assert_eq!(parsed.from[0].email, "jose@example.com");
}
#[test]
fn parse_multipart_with_preamble() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"preamble-test\"\r\n\
\r\n\
This is the preamble, which should be ignored.\r\n\
--preamble-test\r\n\
Content-Type: text/plain\r\n\
\r\n\
Actual body\r\n\
--preamble-test--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Actual body"));
}
#[test]
fn parse_attachment_name_from_content_type() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf; name=\"report.pdf\"\r\n\
Content-Disposition: attachment\r\n\
\r\n\
PDF\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("report.pdf")
);
}
#[test]
fn text_plain_name_parameter_is_treated_as_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; name=\"notes.txt\"\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8=\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"filename-bearing text/plain part should not become body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "text/plain");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("notes.txt"));
}
#[test]
fn multipart_text_plain_name_parameter_is_treated_as_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain; name=\"notes.txt\"\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8=\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"filename-bearing multipart text/plain part should not become body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "text/plain");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("notes.txt"));
}
#[test]
fn parse_qp_soft_break_lf_only() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable\r\n\
\r\n\
Hello=\nWorld";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("HelloWorld"));
}
#[test]
fn parse_subject_mixed_encoded_and_plain() {
let raw = b"From: a@b.com\r\n\
Subject: Re: =?UTF-8?B?SGVsbG8=?= there\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Re: Hello there"));
}
#[test]
fn parse_whitespace_only_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
\r\n \r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_date_missing_timezone() {
let raw = b"From: a@b.com\r\n\
Date: 13 Feb 2025 12:00:00\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.tz_offset_minutes, 0);
}
#[test]
fn parse_deeply_nested_section_dot_notation() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/related; boundary=\"rel\"\r\n\
\r\n\
--rel\r\n\
Content-Type: text/html\r\n\
\r\n\
<img src=\"cid:img1\">\r\n\
--rel\r\n\
Content-Type: image/png\r\n\
Content-ID: <img1>\r\n\
\r\n\
PNG_DATA\r\n\
--rel--\r\n\
--outer\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF\r\n\
--outer--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_html.is_some());
assert!(parsed.body_html.unwrap().contains("cid:img1"));
let inline_att = parsed
.attachments
.iter()
.find(|a| a.content_type == "image/png")
.unwrap();
assert_eq!(inline_att.section.as_deref(), Some("1.2"));
assert!(inline_att.is_inline);
let pdf_att = parsed
.attachments
.iter()
.find(|a| a.content_type == "application/pdf")
.unwrap();
assert_eq!(pdf_att.section.as_deref(), Some("2"));
}
#[test]
fn parse_non_ascii_bytes_in_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello \xff\xfe world";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_base64_body_with_line_breaks() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVs\r\nbG8g\r\nV29y\r\nbGQ=";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}
#[test]
fn parse_date_extra_whitespace() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 \r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
}
#[test]
fn parse_multipart_related_with_inline_images() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/related; boundary=\"rel\"\r\n\
\r\n\
--rel\r\n\
Content-Type: text/html\r\n\
\r\n\
<html><img src=\"cid:logo\"></html>\r\n\
--rel\r\n\
Content-Type: image/jpeg\r\n\
Content-ID: <logo>\r\n\
Content-Disposition: inline; filename=\"logo.jpg\"\r\n\
\r\n\
JPEG_DATA\r\n\
--rel--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_html.is_some());
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("logo"));
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("logo.jpg"));
}
#[test]
fn parse_single_part_text_inline_filename_stays_body_text() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8; name=\"notice.txt\"\r\n\
Content-Disposition: inline; filename=\"notice.txt\"\r\n\
\r\n\
Inline notice";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Inline notice"),
"RFC 2183 Sections 2.1 and 2.3: explicit inline text/plain with a filename hint must still populate body_text"
);
assert!(
parsed.attachments.is_empty(),
"explicit inline text/plain with a filename hint must not be reclassified as an attachment"
);
}
#[test]
fn parse_multipart_inline_html_filename_stays_body_html() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/related; boundary=\"rel\"\r\n\
\r\n\
--rel\r\n\
Content-Type: text/html; charset=utf-8; name=\"body.html\"\r\n\
Content-Disposition: inline; filename=\"body.html\"\r\n\
\r\n\
<p>Inline html body</p>\r\n\
--rel--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_html.as_deref(),
Some("<p>Inline html body</p>"),
"RFC 2183 Sections 2.1 and 2.3: explicit inline text/html with a filename hint must still populate body_html"
);
assert!(
parsed.attachments.is_empty(),
"explicit inline text/html with a filename hint must not be reclassified as an attachment"
);
}
#[test]
fn parse_minimal_message_from_only() {
let raw = b"From: a@b.com\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "a@b.com");
assert!(parsed.subject.is_none());
assert!(parsed.date.is_none());
assert!(parsed.body_text.is_none());
}
#[test]
fn parse_multiple_same_headers() {
let raw = b"From: first@example.com\r\n\
From: second@example.com\r\n\
Subject: First\r\n\
Subject: Second\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "first@example.com");
assert_eq!(parsed.subject.as_deref(), Some("First"));
}
#[test]
fn parse_date_all_named_timezones() {
let test_cases = [
("EST", -300),
("EDT", -240),
("CST", -360),
("CDT", -300),
("MST", -420),
("MDT", -360),
("PST", -480),
("PDT", -420),
("GMT", 0),
("UTC", 0),
("UT", 0),
];
for (tz_name, expected_offset) in test_cases {
let raw = format!("From: a@b.com\r\nDate: Thu, 13 Feb 2025 12:00:00 {tz_name}\r\n\r\n");
let parsed = parse_email(raw.as_bytes()).unwrap();
let date = parsed.date.unwrap();
assert_eq!(
date.tz_offset_minutes, expected_offset,
"Failed for timezone {tz_name}"
);
}
}
#[test]
fn parse_boundary_with_special_chars() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"----=_Part_123+abc\"\r\n\
\r\n\
------=_Part_123+abc\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body text\r\n\
------=_Part_123+abc--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
}
#[test]
fn parse_truncated_base64_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gV29yb===invalid";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_address_group_syntax() {
let raw = b"From: sender@example.com\r\n\
To: Undisclosed:;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "sender@example.com");
}
#[test]
fn parse_iso2022jp_encoded_word() {
let raw = b"From: a@b.com\r\n\
Subject: =?ISO-2022-JP?B?GyRCJUYlOSVIGyhC?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert_eq!(parsed.subject.as_deref(), Some("テスト"));
}
#[test]
fn parse_multipart_missing_parts_tolerance() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"never-appears\"\r\n\
\r\n\
This body doesn't contain any boundaries at all.";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("This body doesn't contain any boundaries at all.")
);
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_nested_multipart_without_boundary_falls_back_to_text() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/alternative\r\n\
\r\n\
Inner text that should not disappear.\r\n\
--outer--\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Inner text that should not disappear."),
"malformed nested multipart parts should fall back to simple text"
);
}
#[test]
fn parse_nested_multipart_without_boundary_preserves_outer_section_number() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: text/plain\r\n\
\r\n\
First part.\r\n\
--outer\r\n\
Content-Type: multipart/mixed\r\n\
Content-Disposition: attachment; filename=\"nested.txt\"\r\n\
\r\n\
Recovered nested payload.\r\n\
--outer--\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].section.as_deref(),
Some("2"),
"recovered malformed nested multipart should keep its outer section number"
);
}
#[test]
fn parse_encoded_word_in_multiple_header_types() {
let raw = b"From: =?UTF-8?Q?M=C3=BCller?= <mueller@example.com>\r\n\
To: =?UTF-8?B?U21pdGg=?= <smith@example.com>\r\n\
Subject: =?UTF-8?Q?Caf=C3=A9?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("Müller"));
assert_eq!(parsed.to[0].name.as_deref(), Some("Smith"));
assert_eq!(parsed.subject.as_deref(), Some("Café"));
}
#[test]
fn parse_attachment_size_reflects_part_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
\r\n\
0123456789\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].size, Some(10));
}
#[test]
fn parse_unquoted_boundary() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=simple_boundary\r\n\
\r\n\
--simple_boundary\r\n\
Content-Type: text/plain\r\n\
\r\n\
Text\r\n\
--simple_boundary--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Text"));
}
#[test]
fn parse_boundary_ignores_stray_bare_word_after_token() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=actual unexpected\r\n\
\r\n\
--actual\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello\r\n\
--actual--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello"),
"RFC 2046 Section 5.1.1: stray bare text after boundary token must not \
prevent multipart parsing when the actual delimiter is still unambiguous"
);
}
#[test]
fn parse_message_id_without_angle_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: bare-id@host.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.message_id.as_deref(), Some("bare-id@host.com"));
}
#[test]
fn parse_message_id_without_angle_brackets_requires_msg_id_syntax() {
let raw = b"From: a@b.com\r\n\
Message-ID: not-a-msg-id\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.message_id.is_none(),
"invalid bare Message-ID fallback must be ignored, got {:?}",
parsed.message_id
);
}
#[test]
fn parse_message_id_with_invalid_bracketed_syntax_is_ignored() {
let raw = b"From: a@b.com\r\n\
Message-ID: <not a msg-id>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.message_id.is_none(),
"invalid bracketed Message-ID must be ignored, got {:?}",
parsed.message_id
);
}
#[test]
fn parse_message_id_with_quoted_id_left() {
let raw = b"From: a@b.com\r\n\
Message-ID: <\"user@inner\"@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.message_id.as_deref(),
Some("\"user@inner\"@example.com")
);
}
#[test]
fn parse_message_id_with_obs_cfws_between_atoms() {
let raw = b"From: a@b.com\r\n\
Message-ID: <foo . bar@example . com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.message_id.as_deref(),
Some("foo.bar@example.com"),
"obsolete CFWS inside a bracketed Message-ID must be normalized \
per RFC 5322 Section 4.5.4"
);
}
#[test]
fn parse_in_reply_to_without_angle_brackets() {
let raw = b"From: a@b.com\r\n\
In-Reply-To: first@host second@[127.0.0.1]\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.in_reply_to, vec!["first@host", "second@[127.0.0.1]"]);
}
#[test]
fn parse_references_without_angle_brackets() {
let raw = b"From: a@b.com\r\n\
References: ref1@host ref2@[IPv6:2001:db8::1]\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.references,
vec!["ref1@host", "ref2@[IPv6:2001:db8::1]"]
);
}
#[test]
fn parse_references_filters_invalid_bracketed_msg_ids() {
let raw = b"From: a@b.com\r\n\
References: <good@example.com> <not a msg-id> <also.good@[127.0.0.1]>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.references,
vec!["good@example.com", "also.good@[127.0.0.1]"]
);
}
#[test]
fn parse_empty_references_header() {
let raw = b"From: a@b.com\r\n\
References: \r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.references.is_empty());
}
#[test]
fn parse_large_multipart_many_attachments() {
let mut raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"multi\"\r\n\r\n"
.to_vec();
raw.extend_from_slice(b"--multi\r\nContent-Type: text/plain\r\n\r\nBody\r\n");
for i in 1..=5 {
raw.extend_from_slice(
format!(
"--multi\r\nContent-Type: application/octet-stream\r\n\
Content-Disposition: attachment; filename=\"file{i}.bin\"\r\n\r\n\
DATA{i}\r\n"
)
.as_bytes(),
);
}
raw.extend_from_slice(b"--multi--");
let parsed = parse_email(&raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body"));
assert_eq!(parsed.attachments.len(), 5);
for (i, att) in parsed.attachments.iter().enumerate() {
assert_eq!(
att.section.as_deref(),
Some(&(i + 2).to_string() as &str),
"Wrong section for attachment {i}"
);
assert_eq!(
att.filename.as_deref(),
Some(&format!("file{}.bin", i + 1) as &str)
);
}
}
#[test]
fn parse_message_id_empty_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: <>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.message_id.is_none());
}
#[test]
fn parse_message_id_empty_value() {
let raw = b"From: a@b.com\r\n\
Message-ID: \r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.message_id.is_none());
}
#[test]
fn rfc2231_param_boundary_check() {
let disposition = "attachment; xfilename*=UTF-8''bad.pdf; filename*=UTF-8''good.pdf";
let result = extract_rfc2231_param(disposition, "filename");
assert_eq!(result.as_deref(), Some("good.pdf"));
}
#[test]
fn rfc2231_param_at_start() {
let value = "filename*=UTF-8''test.pdf";
let result = extract_rfc2231_param(value, "filename");
assert_eq!(result.as_deref(), Some("test.pdf"));
}
#[test]
fn parse_quoted_display_name_with_comma() {
let raw = b"From: \"Doe, John\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("Doe, John"));
assert_eq!(parsed.from[0].email, "john@example.com");
}
#[test]
fn parse_quoted_display_name_with_escaped_chars() {
let raw = b"From: \"John \\\"Doc\\\" Doe\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("John \"Doc\" Doe"));
assert_eq!(parsed.from[0].email, "john@example.com");
}
#[test]
fn unescape_quoted_string_backslash() {
assert_eq!(unescape_quoted_string("hello"), "hello");
assert_eq!(unescape_quoted_string("a\\\\b"), "a\\b");
assert_eq!(unescape_quoted_string("a\\\"b"), "a\"b");
assert_eq!(unescape_quoted_string("trailing\\"), "trailing\\");
}
#[test]
fn parse_address_list_with_escaped_quotes_in_display_name() {
let raw = b"From: a@b.com\r\n\
To: \"A\\\"B\" <a@x.com>, c@d.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Expected 2 To addresses but got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "a@x.com");
assert_eq!(parsed.to[0].name.as_deref(), Some("A\"B"));
assert_eq!(parsed.to[1].email, "c@d.com");
}
#[test]
fn parse_rfc2231_continuation_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*0=\"very_long_\"; filename*1=\"filename.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("very_long_filename.pdf"),
"RFC 2231 continuation filename not reassembled"
);
}
#[test]
fn parse_rfc2231_continuation_with_charset() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1*=%C3%A9.pdf\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("résumé.pdf"),
"RFC 2231 continuation with charset not reassembled"
);
}
#[test]
fn parse_address_comment_with_comma() {
let raw = b"From: sender@example.com\r\n\
To: user@example.com (Doe, John), other@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Expected 2 To addresses but got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "user@example.com");
assert_eq!(
parsed.to[0].name.as_deref(),
Some("Doe, John"),
"Display name from comment should be preserved intact"
);
assert_eq!(parsed.to[1].email, "other@example.com");
}
#[test]
fn parse_header_unfolding_preserves_wsp() {
let raw = b"From: a@b.com\r\nSubject: Hello\r\n\tWorld\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello\tWorld"),
"Tab from continuation line should be preserved per RFC 5322 Section 2.2.3"
);
}
#[test]
fn datetime_to_unix_timestamp() {
use crate::types::DateTime;
let dt = DateTime {
year: 2025,
month: 2,
day: 13,
hour: 15,
minute: 47,
second: 33,
tz_offset_minutes: 0,
};
assert_eq!(dt.to_unix_timestamp(), 1_739_461_653);
let dt_offset = DateTime {
year: 2025,
month: 2,
day: 13,
hour: 21,
minute: 17,
second: 33,
tz_offset_minutes: 330,
};
assert_eq!(dt_offset.to_unix_timestamp(), dt.to_unix_timestamp());
}
#[test]
fn datetime_from_unix_timestamp() {
use crate::types::DateTime;
let ts = 1_739_461_653_i64; let dt = DateTime::from_unix_timestamp(ts, 0);
assert_eq!(dt.year, 2025);
assert_eq!(dt.month, 2);
assert_eq!(dt.day, 13);
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 33);
assert_eq!(dt.tz_offset_minutes, 0);
let dt_offset = DateTime::from_unix_timestamp(ts, 330);
assert_eq!(dt_offset.hour, 21);
assert_eq!(dt_offset.minute, 17);
}
#[test]
fn datetime_round_trip_timestamp() {
use crate::types::DateTime;
let dt = DateTime {
year: 2025,
month: 12,
day: 31,
hour: 23,
minute: 59,
second: 59,
tz_offset_minutes: -480,
};
let ts = dt.to_unix_timestamp();
let restored = DateTime::from_unix_timestamp(ts, -480);
assert_eq!(dt, restored);
}
#[test]
fn datetime_ord_comparison() {
use crate::types::DateTime;
let utc = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 12,
minute: 0,
second: 0,
tz_offset_minutes: 0,
};
let est = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 7,
minute: 0,
second: 0,
tz_offset_minutes: -300,
};
assert_eq!(utc.cmp(&est), std::cmp::Ordering::Equal);
let later = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 13,
minute: 0,
second: 0,
tz_offset_minutes: 0,
};
assert!(later > utc);
}
#[test]
fn datetime_epoch() {
use crate::types::DateTime;
let epoch = DateTime::from_unix_timestamp(0, 0);
assert_eq!(epoch.year, 1970);
assert_eq!(epoch.month, 1);
assert_eq!(epoch.day, 1);
assert_eq!(epoch.hour, 0);
assert_eq!(epoch.minute, 0);
assert_eq!(epoch.second, 0);
assert_eq!(epoch.to_unix_timestamp(), 0);
}
#[test]
fn parse_headers_only_extracts_metadata() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <abc123@example.com>\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
This body should NOT be parsed\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF_DATA\r\n\
--b--";
let parsed = parse_headers_only(raw).unwrap();
assert_eq!(parsed.from[0].email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.subject.as_deref(), Some("Test"));
assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
assert!(parsed.date.is_some());
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_headers_only_empty_input() {
let result = parse_headers_only(b"");
assert!(matches!(result, Err(Error::EmptyInput)));
}
#[test]
fn parse_headers_only_missing_from() {
let raw = b"Subject: No From\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_headers_only(raw).expect(
"headers-only parsing should tolerate missing From when the header block \
is otherwise usable",
);
assert!(
parsed.from.is_empty(),
"missing From should produce an empty from list, got {:?}",
parsed.from
);
assert_eq!(parsed.subject.as_deref(), Some("No From"));
}
#[test]
fn parse_headers_only_body_only_message_without_headers() {
let raw = b"Body-only message with no header section";
let parsed = parse_headers_only(raw).expect(
"headers-only parsing should accept body-only malformed messages so \
callers can still inspect size and raw header state",
);
assert!(parsed.from.is_empty());
assert!(parsed.raw_headers.is_empty());
assert!(parsed.subject.is_none());
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_headers_only_custom_headers_without_well_known_headers() {
let raw = b"X-Trace: 12345\r\nList-Id: Example List <list.example>\r\n\r\n";
let parsed = parse_headers_only(raw).expect(
"headers-only parsing should accept header blocks containing only \
optional fields",
);
assert!(parsed.from.is_empty());
assert!(parsed.subject.is_none());
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "x-trace" && value == "12345"),
"X-Trace must be preserved in extra_headers"
);
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "list-id" && value == "Example List <list.example>"),
"List-Id must be preserved in extra_headers"
);
}
#[test]
fn parse_headers_only_extra_header_starting_on_continuation_strips_structural_wsp() {
let raw = b"From: sender@example.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nX-Note:\r\n hello\r\nX-Indent:\r\n value\r\n\r\n";
let parsed = parse_headers_only(raw).unwrap();
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "x-note" && value == "hello"),
"first continuation SP must be treated as structural, got {:?}",
parsed.extra_headers
);
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "x-indent" && value == " value"),
"only one structural SP should be stripped from the first continuation line, got {:?}",
parsed.extra_headers
);
}
#[test]
fn extract_param_unescapes_backslash_in_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"path\\\\file.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("path\\file.pdf"),
"Backslash in quoted-string filename must be unescaped per RFC 5322 Section 3.2.4"
);
}
#[test]
fn extract_param_handles_escaped_quote_in_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"file\\\"name.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("file\"name.pdf"),
"Escaped quote in quoted-string filename must be handled per RFC 5322 Section 3.2.4"
);
}
#[test]
fn build_then_parse_filename_with_backslash_round_trip() {
let email = crate::types::OutgoingEmail {
from: vec![Address {
name: None,
email: "a@b.com".into(),
}],
sender: None,
to: vec![Address {
name: None,
email: "to@b.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: None,
subject: "test".into(),
body_text: Some("Body".into()),
body_html: None,
in_reply_to: vec![],
references: vec![],
attachments: vec![crate::types::OutgoingAttachment {
filename: "path\\file.pdf".into(),
content_type: "application/pdf".into(),
data: b"data".to_vec(),
is_inline: false,
content_id: None,
}],
extra_headers: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("path\\file.pdf"),
"Round-trip filename with backslash must be preserved"
);
}
#[test]
fn build_then_parse_filename_with_quote_round_trip() {
let email = crate::types::OutgoingEmail {
from: vec![Address {
name: None,
email: "a@b.com".into(),
}],
sender: None,
to: vec![Address {
name: None,
email: "to@b.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: None,
subject: "test".into(),
body_text: Some("Body".into()),
body_html: None,
in_reply_to: vec![],
references: vec![],
attachments: vec![crate::types::OutgoingAttachment {
filename: "file\"name.pdf".into(),
content_type: "application/pdf".into(),
data: b"data".to_vec(),
is_inline: false,
content_id: None,
}],
extra_headers: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("file\"name.pdf"),
"Round-trip filename with double-quote must be preserved"
);
}
#[test]
fn extract_param_with_non_ascii_before_param() {
let header_value = "attachment; description=\"\u{0130}stanbul\"; filename=\"report.pdf\"";
let result = extract_param(header_value, "filename");
assert_eq!(
result.as_deref(),
Some("report.pdf"),
"extract_param must work when non-ASCII chars that change byte length \
under Unicode lowercasing appear before the target parameter (RFC 6532)"
);
}
#[test]
fn parse_date_rejects_invalid_hour() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 25:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with hour=25 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_minute() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 12:60:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with minute=60 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_second() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 12:00:61 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with second=61 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_day() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 32 Feb 2025 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with day=32 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_day_zero() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 00 Feb 2025 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with day=0 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_allows_leap_second() {
let raw = b"From: a@b.com\r\n\
Date: Tue, 30 Jun 2015 23:59:60 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Leap second (60) should be accepted");
assert_eq!(date.second, 60);
}
#[test]
fn parse_date_accepts_calendar_invalid_but_syntactically_valid_day() {
let raw = b"From: a@b.com\r\n\
Date: Wed, 31 Feb 2025 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed
.date
.expect("Calendar-invalid but syntactically valid date (31 Feb) must be accepted");
assert_eq!(date.day, 31);
assert_eq!(date.month, 2);
assert_eq!(date.year, 2025);
let dt =
parse_rfc5322_date("Mon, 31 Apr 2025 12:00:00 +0000").expect("31 Apr must be accepted");
assert_eq!(dt.day, 31);
assert_eq!(dt.month, 4);
let dt =
parse_rfc5322_date("Mon, 31 Jun 2025 12:00:00 +0000").expect("31 Jun must be accepted");
assert_eq!(dt.day, 31);
assert_eq!(dt.month, 6);
let dt = parse_rfc5322_date("Mon, 29 Feb 2023 12:00:00 +0000")
.expect("29 Feb in non-leap year must be accepted");
assert_eq!(dt.day, 29);
assert_eq!(dt.month, 2);
}
#[test]
fn parse_date_with_comment_between_tokens() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 (February) Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed
.date
.expect("Date with CFWS comment must parse per RFC 5322 Section 4.3");
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
}
#[test]
fn parse_date_with_trailing_comment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC)\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Date with trailing comment must parse");
assert_eq!(date.year, 2025);
assert_eq!(date.tz_offset_minutes, 0);
}
#[test]
fn parse_date_with_nested_comments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC (nested))\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Date with nested comment must parse");
assert_eq!(date.year, 2025);
}
#[test]
fn parse_display_name_ending_with_escaped_quote() {
let raw = b"From: \"She said \\\"hello\\\"\" <she@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].name.as_deref(),
Some("She said \"hello\""),
"Display name ending with escaped quote must be parsed correctly \
per RFC 5322 Section 3.2.4"
);
}
#[test]
fn address_from_str_ending_with_escaped_quote() {
let addr: Address = "\"She said \\\"hello\\\"\" <she@example.com>"
.parse()
.unwrap();
assert_eq!(
addr.name.as_deref(),
Some("She said \"hello\""),
"Address::from_str must handle display names ending with escaped quotes"
);
}
#[test]
fn boundary_must_be_at_line_start() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
\r\n\
--BOUND\r\n\
Content-Type: text/plain\r\n\
\r\n\
This line mentions --BOUND in the middle\r\n\
--BOUND--";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.as_deref().unwrap_or("");
assert!(
text.contains("--BOUND"),
"Mid-line boundary must be treated as literal text per RFC 2046 Section 5.1.1, \
but body_text was: {text:?}"
);
}
#[test]
fn mime_type_exact_match_not_prefix() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plaintext\r\n\
\r\n\
Not really plain text\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"text/plaintext must not be treated as text/plain body"
);
assert_eq!(
parsed.attachments.len(),
1,
"text/plaintext should be treated as an attachment"
);
}
#[test]
fn parse_single_part_non_text_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: image/jpeg\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
/9j/4AAQSkZJRg==";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"image/jpeg single-part must not populate body_text"
);
assert_eq!(
parsed.attachments.len(),
1,
"image/jpeg single-part must be treated as an attachment"
);
assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
}
#[test]
fn parse_single_part_application_pdf_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: application/pdf; name=\"doc.pdf\"\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
JVBERi0xLjQK";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"application/pdf must not populate body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "application/pdf");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
assert!(!parsed.attachments[0].is_inline);
}
#[test]
fn parse_single_part_text_plain_with_attachment_disposition() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Disposition: attachment; filename=\"log.txt\"\r\n\
\r\n\
Server log data here";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"text/plain with disposition:attachment must not populate body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "text/plain");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
}
#[test]
fn disposition_type_requires_token_boundary() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Disposition: attachmentfoo\r\n\
\r\n\
This is body text";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_some(),
"text/plain with disposition:attachmentfoo must populate body_text \
(RFC 2183 Section 2: disposition-type is a complete token, not a prefix)"
);
assert_eq!(
parsed.attachments.len(),
0,
"extension-token 'attachmentfoo' must not be classified as 'attachment'"
);
}
#[test]
fn parse_group_address_empty_undisclosed() {
let raw = b"From: a@b.com\r\n\
To: undisclosed-recipients:;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.to.is_empty(),
"empty group undisclosed-recipients:; must produce no addresses, got {:?}",
parsed.to
);
}
#[test]
fn parse_group_address_with_members() {
let raw = b"From: a@b.com\r\n\
To: friends:one@x.com, two@x.com;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"group with 2 members must produce 2 addresses, got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "one@x.com");
assert_eq!(parsed.to[1].email, "two@x.com");
}
#[test]
fn parse_group_address_mixed_with_regular() {
let raw = b"From: a@b.com\r\n\
To: solo@x.com, friends:one@x.com, two@x.com;, last@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let emails: Vec<&str> = parsed.to.iter().map(|a| a.email.as_str()).collect();
assert_eq!(
emails,
vec!["solo@x.com", "one@x.com", "two@x.com", "last@x.com"],
"must extract all 4 addresses from mixed regular+group syntax"
);
}
#[test]
fn decode_qp_trailing_equals_is_soft_break() {
let result = decode_quoted_printable(b"Hello=");
assert_eq!(
result, b"Hello",
"trailing '=' must be treated as soft line break per RFC 2045 Section 6.7"
);
}
#[test]
fn decode_qp_trailing_equals_cr_is_soft_break() {
let result = decode_quoted_printable(b"Hello=\r");
assert_eq!(
result, b"Hello",
"trailing '=\\r' must be treated as soft line break"
);
}
#[test]
fn decode_qp_bare_cr_soft_break_mid_data() {
let result = decode_quoted_printable(b"Hello=\rWorld");
assert_eq!(
result, b"HelloWorld",
"'=\\r' followed by non-LF byte must be treated as soft line break"
);
}
#[test]
fn parse_bare_address_with_trailing_comment() {
let raw = b"From: sender@example.com\r\n\
To: user@example.com (Display Name)\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 1);
assert_eq!(
parsed.to[0].email, "user@example.com",
"email must not contain the trailing comment"
);
assert_eq!(
parsed.to[0].name.as_deref(),
Some("Display Name"),
"trailing comment should become display name per RFC 5322 Section 3.4.1"
);
}
#[test]
fn parse_bare_address_with_leading_comment() {
let raw = b"From: sender@example.com\r\n\
To: (Comment) user@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 1);
assert_eq!(
parsed.to[0].email, "user@example.com",
"email must not contain the leading comment"
);
assert_eq!(
parsed.to[0].name.as_deref(),
Some("Comment"),
"leading comment should be extracted as display name \
(RFC 5322 Section 3.2.2 / RFC 822 convention)"
);
}
#[test]
fn extract_param_skips_quoted_values() {
let header = "text/html; boundary=\"has charset=bad inside\"; charset=utf-8";
let charset = extract_param(header, "charset");
assert_eq!(
charset.as_deref(),
Some("utf-8"),
"Should skip match inside quoted boundary value"
);
}
#[test]
fn extract_param_skips_comment_before_unquoted_value() {
let header = "text/plain; charset=(legacy default)windows-1252";
let charset = extract_param(header, "charset");
assert_eq!(
charset.as_deref(),
Some("windows-1252"),
"RFC 2045 Section 5.1 comments before an unquoted value must be ignored"
);
}
#[test]
fn extract_param_skips_comment_before_quoted_value() {
let header = "attachment; filename=(human note)\"report.pdf\"";
let filename = extract_param(header, "filename");
assert_eq!(
filename.as_deref(),
Some("report.pdf"),
"RFC 2045 Section 5.1 comments before a quoted-string value must be ignored"
);
}
#[test]
fn multipart_part_without_charset_uses_us_ascii_default() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello \x93World\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.unwrap();
assert!(
text.contains('\u{201c}'),
"Part with text/plain (no charset) should use US-ASCII default per \
RFC 2045 Section 5.2, decoding 0x93 as U+201C. Got: {text:?}"
);
assert!(
!text.contains('\u{FFFD}'),
"Part with text/plain (no charset) should not produce UTF-8 replacement \
characters. Got: {text:?}"
);
}
#[test]
fn parse_header_unfold_preserves_trailing_whitespace() {
let raw = b"From: a@b.com\r\nSubject: Hello \r\n World\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello World"),
"Trailing whitespace on first line must be preserved during unfolding \
(RFC 5322 Section 2.2.3)"
);
}
#[test]
fn parse_single_part_body_no_trailing_crlf() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Hello, World!\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello, World!"),
"Single-part body text must not include trailing CRLF"
);
}
#[test]
fn parse_single_part_html_no_trailing_crlf() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<p>Hello</p>\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_html.as_deref(),
Some("<p>Hello</p>"),
"Single-part HTML body must not include trailing CRLF"
);
}
#[test]
fn round_trip_single_part_body_text() {
let email = crate::types::OutgoingEmail {
from: vec![crate::types::Address {
name: None,
email: "a@b.com".into(),
}],
sender: None,
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: None,
subject: "Test".into(),
body_text: Some("Hello, World!".into()),
body_html: None,
in_reply_to: vec![],
references: vec![],
attachments: vec![],
extra_headers: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello, World!"),
"Single-part body text must round-trip without trailing CRLF"
);
}
#[test]
fn parse_encoded_word_display_name_with_comma() {
let raw = b"From: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].name.as_deref(),
Some("John, Doe"),
"RFC 2047 encoded display name with comma must be preserved \
(RFC 2047 Section 5 rule 3): decode AFTER address parsing"
);
assert_eq!(parsed.from[0].email, "john@example.com");
}
#[test]
fn parse_base64_body_ignores_non_alphabet_chars() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVs!bG8#gV29~ybGQ=";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"RFC 2045 Section 6.8: non-alphabet characters must be ignored in base64 data"
);
}
#[test]
fn parse_encoded_word_display_name_with_comma_in_to() {
let raw = b"From: sender@example.com\r\n\
To: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>, other@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Must parse exactly 2 addresses, not 3 (encoded comma is not a separator)"
);
assert_eq!(
parsed.to[0].name.as_deref(),
Some("John, Doe"),
"First recipient display name must be 'John, Doe'"
);
assert_eq!(parsed.to[0].email, "john@example.com");
assert_eq!(parsed.to[1].email, "other@example.com");
}
#[test]
fn round_trip_empty_body_text_is_none() {
let email = crate::types::OutgoingEmail {
from: vec![crate::types::Address {
name: None,
email: "a@b.com".into(),
}],
sender: None,
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: None,
subject: "Empty body".into(),
body_text: None,
body_html: None,
in_reply_to: vec![],
references: vec![],
attachments: vec![crate::types::OutgoingAttachment {
filename: "test.txt".into(),
content_type: "text/plain".into(),
data: b"attachment data".to_vec(),
is_inline: false,
content_id: None,
}],
extra_headers: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_text, None,
"Empty body_text must round-trip as None, not Some(\"\")"
);
}
#[test]
fn round_trip_empty_body_html_in_alternative_is_none() {
let email = crate::types::OutgoingEmail {
from: vec![crate::types::Address {
name: None,
email: "a@b.com".into(),
}],
sender: None,
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: None,
subject: "Text only".into(),
body_text: Some("Plain text".into()),
body_html: Some(String::new()),
in_reply_to: vec![],
references: vec![],
attachments: vec![],
extra_headers: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_html, None,
"Empty body_html must parse as None, not Some(\"\")"
);
assert_eq!(
parsed.body_text.as_deref(),
Some("Plain text"),
"body_text must be preserved"
);
}
#[test]
fn extract_mime_type_strips_rfc5322_comments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain (this is a comment); charset=utf-8\r\n\
\r\n\
Hello with comment";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello with comment"),
"Body must be extracted as body_text when Content-Type has an RFC 5322 comment"
);
assert!(
parsed.attachments.is_empty(),
"No attachments expected for a plain text/plain message with a comment"
);
}
#[test]
fn multipart_digest_default_content_type_is_message_rfc822() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: Digest\r\n\
Content-Type: multipart/digest; boundary=\"digestboundary\"\r\n\
\r\n\
--digestboundary\r\n\
\r\n\
From: nested@example.com\r\n\
Subject: Nested message\r\n\
\r\n\
Nested body text\r\n\
--digestboundary--\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"multipart/digest parts without Content-Type should default to \
message/rfc822, not text/plain — body_text should be None"
);
assert_eq!(
parsed.attachments.len(),
1,
"multipart/digest part should be treated as message/rfc822 attachment"
);
assert_eq!(
parsed.attachments[0].content_type, "message/rfc822",
"default Content-Type in multipart/digest must be message/rfc822 \
(RFC 2046 Section 5.1.5)"
);
}
#[test]
fn content_id_whitespace_inside_brackets_trimmed() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-ID: < cid@example.com >\r\n\
\r\n\
PNG\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments[0].content_id.as_deref(),
Some("cid@example.com"),
"Content-ID must be trimmed after bracket stripping (RFC 2392)"
);
let raw_single = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: image/png\r\n\
Content-ID: < cid2@example.com >\r\n\
\r\n\
PNG";
let parsed_single = parse_email(raw_single).unwrap();
assert_eq!(
parsed_single.attachments[0].content_id.as_deref(),
Some("cid2@example.com"),
"Content-ID in single-part message must be trimmed (RFC 2392)"
);
}
#[test]
fn parse_headers_only_all_fields_verified() {
let raw = b"From: sender@example.com\r\n\
To: to@example.com\r\n\
Cc: cc@example.com\r\n\
Bcc: bcc@example.com\r\n\
Reply-To: reply@example.com\r\n\
Subject: Full test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <msg1@example.com>\r\n\
In-Reply-To: <parent@example.com>\r\n\
References: <ref1@example.com> <ref2@example.com>\r\n\
\r\n\
Body that should be ignored";
let parsed = parse_headers_only(raw).unwrap();
assert_eq!(parsed.from[0].email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.to[0].email, "to@example.com");
assert_eq!(parsed.cc.len(), 1);
assert_eq!(parsed.cc[0].email, "cc@example.com");
assert_eq!(parsed.bcc.len(), 1);
assert_eq!(parsed.bcc[0].email, "bcc@example.com");
assert_eq!(parsed.reply_to.len(), 1);
assert_eq!(parsed.reply_to[0].email, "reply@example.com");
assert_eq!(parsed.subject.as_deref(), Some("Full test"));
assert!(parsed.date.is_some());
assert_eq!(parsed.message_id.as_deref(), Some("msg1@example.com"));
assert_eq!(parsed.in_reply_to, vec!["parent@example.com"]);
assert_eq!(
parsed.references,
vec!["ref1@example.com", "ref2@example.com"]
);
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_missing_optional_headers_returns_none_or_empty() {
let raw = b"From: a@b.com\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "a@b.com");
assert!(parsed.subject.is_none());
assert!(parsed.date.is_none());
assert!(parsed.message_id.is_none());
assert!(parsed.in_reply_to.is_empty());
assert!(parsed.references.is_empty());
assert!(parsed.to.is_empty());
assert!(parsed.cc.is_empty());
assert!(parsed.bcc.is_empty());
assert!(parsed.reply_to.is_empty());
}
#[test]
fn extract_param_rejects_substring_match() {
let value = "attachment; xfilename=\"bad.pdf\"; filename=\"good.pdf\"";
let result = extract_param(value, "filename");
assert_eq!(
result.as_deref(),
Some("good.pdf"),
"Must not match xfilename as filename"
);
}
#[test]
fn extract_param_rejects_suffix_only_match() {
let value = "attachment; notfilename=\"only.pdf\"";
let result = extract_param(value, "filename");
assert!(
result.is_none(),
"Must not match 'filename' inside 'notfilename'"
);
}
#[test]
fn parse_group_address_empty() {
let addrs = parse_address_list("undisclosed-recipients:;");
assert!(
addrs.is_empty(),
"empty group must produce no addresses, got {addrs:?}"
);
}
#[test]
fn parse_group_address_with_two_members() {
let addrs = parse_address_list("Friends: a@x.com, b@x.com;");
assert_eq!(addrs.len(), 2, "group with 2 members: {addrs:?}");
assert_eq!(addrs[0].email, "a@x.com");
assert_eq!(addrs[1].email, "b@x.com");
}
#[test]
fn parse_multiple_groups_and_solo() {
let addrs = parse_address_list("Team A: a1@x.com, a2@x.com;, Team B: b1@x.com;, solo@x.com");
assert_eq!(addrs.len(), 4, "2 groups + 1 solo: {addrs:?}");
assert_eq!(addrs[0].email, "a1@x.com");
assert_eq!(addrs[1].email, "a2@x.com");
assert_eq!(addrs[2].email, "b1@x.com");
assert_eq!(addrs[3].email, "solo@x.com");
}
#[test]
fn parse_address_comment_with_comma_audit() {
let addrs = parse_address_list("user@x.com (Last, First), other@x.com");
assert_eq!(
addrs.len(),
2,
"comma inside comment must not split: {addrs:?}"
);
assert_eq!(addrs[0].email, "user@x.com");
assert_eq!(addrs[1].email, "other@x.com");
}
#[test]
fn rfc2231_continuation_gap_collects_available() {
let header = "attachment; filename*0=\"hello\"; filename*2=\"skipped\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("hello"),
"RFC 2231 Section 3: reassembly must stop at the first gap"
);
}
#[test]
fn rfc2231_continuation_double_gap_collected() {
let header = "attachment; filename*0=\"abc\"; filename*3=\"xyz\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("abc"),
"RFC 2231 Section 3: reassembly must ignore sections after a gap"
);
}
#[test]
fn rfc2231_continuation_single_section() {
let header = "attachment; filename*0=\"report.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(result.as_deref(), Some("report.pdf"));
}
#[test]
fn rfc2231_continuation_requires_section_zero() {
let header = "attachment; filename*1=\"tail.txt\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result, None,
"RFC 2231 Section 3: continuation reassembly must require section 0"
);
}
#[test]
fn rfc2231_continuation_stops_at_first_gap() {
let header = "attachment; filename*0=\"hello\"; filename*2=\"tail\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("hello"),
"RFC 2231 Section 3: reassembly must stop at the first missing index"
);
}
#[test]
fn rfc2231_continuation_no_charset_defaults_to_utf8() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/octet-stream\r\n\
Content-Disposition: attachment; filename*0=\"annual_\"; filename*1=\"report_\"; filename*2=\"2025.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("annual_report_2025.pdf"),
"RFC 2231 continuation without charset should decode as UTF-8"
);
}
#[test]
fn base64_with_embedded_spaces() {
let data = b"SGVs bG8g V29y bGQ=";
let decoded = decode_transfer_encoding(data, "base64");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"base64 decoder must strip non-alphabet characters (RFC 2045 Section 6.8)"
);
}
#[test]
fn base64_with_tabs() {
let data = b"SGVs\tbG8g\tV29ybGQ=";
let decoded = decode_transfer_encoding(data, "base64");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"base64 decoder must strip tabs (RFC 2045 Section 6.8)"
);
}
#[test]
fn qp_trailing_equals_stripped() {
let data = b"Hello=";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello",
"trailing '=' is a soft break (RFC 2045 Section 6.7)"
);
}
#[test]
fn qp_malformed_hex_passthrough() {
let data = b"Hello=ZZ World";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=ZZ World",
"malformed =ZZ must pass through literally (Postel's law)"
);
}
#[test]
fn q_encoding_malformed_hex_passthrough() {
let decoded = decode_q_encoding("Hello=ZZWorld");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=ZZWorld",
"malformed =ZZ in Q-encoding must pass through literally"
);
}
#[test]
fn q_encoding_trailing_equals() {
let decoded = decode_q_encoding("Hello=");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=",
"trailing '=' in Q-encoding must pass through literally"
);
}
#[test]
fn malformed_q_encoded_word_stays_literal() {
let input = b"From: test@example.com\r\nSubject: =?utf-8?Q?a=b?=\r\n\r\nbody";
let parsed = parse_email(input).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("=?utf-8?Q?a=b?="),
"malformed RFC 2047 Q-encoded words must remain literal"
);
}
#[test]
fn multipart_digest_default_content_type_full_email() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/digest; boundary=\"dg\"\r\n\
\r\n\
--dg\r\n\
\r\n\
From: nested@example.com\r\n\
Subject: Inner\r\n\
\r\n\
Inner body\r\n\
--dg--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"digest part must NOT be treated as text/plain"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "message/rfc822");
}
#[test]
fn parse_headers_leading_space_skipped() {
let raw = b" continuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "a@b.com");
assert_eq!(parsed.body_text.as_deref(), Some("Body"));
}
#[test]
fn parse_headers_leading_tab_skipped() {
let raw = b"\tcontinuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].email, "a@b.com");
}
#[test]
fn encoded_word_bad_base64_passthrough() {
let input = "=?UTF-8?B?=====?=";
let result = decode_encoded_words(input);
assert!(
result.contains("=?"),
"Bad base64 encoded word should pass through literally, got: {result:?}"
);
}
#[test]
fn encoded_word_unknown_charset_fallback() {
let input = "=?x-nonexistent-charset?B?SGVsbG8=?=";
let result = decode_encoded_words(input);
assert!(
result.contains("Hello"),
"Unknown charset should fall back to UTF-8, got: {result:?}"
);
}
#[test]
fn encoded_word_truncated_no_closing() {
let input = "Start =?UTF-8?B?SGVsbG8= End";
let result = decode_encoded_words(input);
assert!(
result.contains("=?"),
"Truncated encoded word should pass through, got: {result:?}"
);
}
#[test]
fn rfc2231_continuation_mixed_encoded_and_plain() {
let header = "attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1=\"e.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("r\u{e9}sume.pdf"),
"RFC 2231 mixed encoded/plain continuation should reassemble correctly"
);
}
#[test]
fn rfc2231_continuation_three_sections() {
let header =
"attachment; filename*0=\"part1_\"; filename*1=\"part2_\"; filename*2=\"part3.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(result.as_deref(), Some("part1_part2_part3.pdf"));
}
#[test]
fn extract_param_unterminated_quoted_value() {
let header = "text/plain; charset=\"utf-8";
let result = extract_param(header, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"Unterminated quoted-string should extract to end of string"
);
}
#[test]
fn extract_param_quoted_with_backslash_escape() {
let header = "attachment; filename=\"file\\\\name.txt\"";
let result = extract_param(header, "filename");
assert_eq!(
result.as_deref(),
Some("file\\name.txt"),
"Backslash escape in quoted param value must be unescaped"
);
}
#[test]
fn extract_param_empty_quoted_value() {
let header = "attachment; filename=\"\"";
let result = extract_param(header, "filename");
assert!(
result.is_none(),
"Empty quoted-string value should return None, got: {result:?}"
);
}
#[test]
fn extract_param_unterminated_quote_does_not_absorb_next_param() {
let header = "text/plain; charset=\"utf-8; name=test.txt";
let charset = extract_param(header, "charset");
assert_eq!(
charset.as_deref(),
Some("utf-8"),
"unterminated quote must fall back to ';' terminator \
(RFC 5322 Section 3.2.4, Postel's law)"
);
let name = extract_param(header, "name");
assert_eq!(
name.as_deref(),
Some("test.txt"),
"parameter after unterminated quote must still be accessible"
);
}
#[test]
fn extract_comment_text_nested_parens() {
let result = extract_comment_text("(outer (inner) text)");
assert_eq!(
result.as_deref(),
Some("outer (inner) text"),
"Nested parens should be included in comment text"
);
}
#[test]
fn extract_comment_text_escaped_chars() {
let result = extract_comment_text("(hello \\(world\\))");
assert_eq!(
result.as_deref(),
Some("hello (world)"),
"Escaped parens inside comments should be unescaped"
);
}
#[test]
fn extract_comment_text_empty() {
let result = extract_comment_text("()");
assert!(result.is_none(), "Empty comment should return None");
}
#[test]
fn extract_comment_text_no_paren() {
let result = extract_comment_text("not a comment");
assert!(
result.is_none(),
"Non-parenthesized input should return None"
);
}
#[test]
fn strip_comments_nested_and_escaped() {
let result = strip_comments("Hello (outer (inner) comment) World");
assert_eq!(result, "Hello World");
let result = strip_comments("Hello (comment with \\) escaped) World");
assert_eq!(result, "Hello World");
let result = strip_comments("Hello \\\\ World");
assert_eq!(result, "Hello \\\\ World");
let result = strip_comments("Before (escaped \\( paren) After");
assert_eq!(result, "Before After");
}
#[test]
fn strip_comments_escaped_outside_comment() {
let result = strip_comments("no \\(comment\\) here");
assert_eq!(
result, "no \\(comment\\) here",
"Escaped parens outside comments should not open/close comments"
);
}
#[test]
fn parse_date_too_few_parts() {
assert!(
parse_rfc5322_date("13 Feb").is_none(),
"Date with too few parts should return None"
);
}
#[test]
fn parse_date_time_no_colon() {
assert!(
parse_rfc5322_date("13 Feb 2025 1547 +0000").is_none(),
"Time without colon should return None"
);
}
#[test]
fn parse_date_unknown_month() {
assert!(
parse_rfc5322_date("13 Foo 2025 12:00:00 +0000").is_none(),
"Unknown month name should return None"
);
}
#[test]
fn parse_date_completely_malformed() {
assert!(parse_rfc5322_date("not a date at all").is_none());
assert!(parse_rfc5322_date("").is_none());
assert!(parse_rfc5322_date(" ").is_none());
}
#[test]
fn parse_date_unknown_timezone_defaults_zero() {
let dt = parse_rfc5322_date("13 Feb 2025 12:00:00 ZULU").unwrap();
assert_eq!(
dt.tz_offset_minutes, 0,
"Unknown timezone abbreviation should default to +0000"
);
}
#[test]
fn parse_date_non_numeric_day() {
assert!(
parse_rfc5322_date("XX Feb 2025 12:00:00 +0000").is_none(),
"Non-numeric day should return None"
);
}
#[test]
fn parse_date_non_numeric_year() {
assert!(
parse_rfc5322_date("13 Feb XXXX 12:00:00 +0000").is_none(),
"Non-numeric year should return None"
);
}
#[test]
fn split_mime_parts_lf_only_boundaries() {
let body = b"--boundary\nContent-Type: text/plain\n\nPart 1\n--boundary\nContent-Type: text/plain\n\nPart 2\n--boundary--";
let parts = split_mime_parts(body, "boundary");
assert_eq!(
parts.len(),
2,
"Should find 2 parts with LF-only boundaries"
);
}
#[test]
fn split_mime_parts_boundary_at_start() {
let body = b"--b\r\nContent-Type: text/plain\r\n\r\nOnly part\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(
parts.len(),
1,
"Should find 1 part when boundary is at start"
);
let text = String::from_utf8_lossy(parts[0]);
assert!(text.contains("Only part"));
}
#[test]
fn split_mime_parts_midline_boundary_ignored() {
let body = b"--b\r\nContent-Type: text/plain\r\n\r\nText mentioning --b in the middle\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(parts.len(), 1, "Mid-line boundary must not split");
let text = String::from_utf8_lossy(parts[0]);
assert!(
text.contains("--b in the middle"),
"Mid-line boundary text should be preserved"
);
}
#[test]
fn split_mime_parts_boundary_with_trailing_whitespace() {
let body = b"--b \t\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(
parts.len(),
1,
"Boundary with trailing whitespace should be recognized"
);
}
#[test]
fn split_mime_parts_boundary_not_at_line_start_skipped() {
let body = b"--bound\r\n\r\nSome text has --bound embedded\r\n--bound--";
let parts = split_mime_parts(body, "bound");
assert_eq!(parts.len(), 1);
let text = String::from_utf8_lossy(parts[0]);
assert!(text.contains("--bound embedded"));
}
#[test]
fn parse_quoted_transfer_encoding() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: \"base64\"\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"Quoted CTE 'base64' should decode the body correctly"
);
}
#[test]
fn parse_quoted_transfer_encoding_qp() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: \"quoted-printable\"\r\n\
\r\n\
Hello=20World\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"Quoted CTE 'quoted-printable' should decode the body correctly"
);
}
#[test]
fn parse_transfer_encoding_with_whitespace() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64 \r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"CTE with whitespace should still decode correctly"
);
}
#[test]
fn parse_transfer_encoding_with_trailing_garbage() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64; name=foo\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"CTE with trailing semicolon+params should still decode as base64"
);
}
#[test]
fn parse_transfer_encoding_with_trailing_comment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable (standard)\r\n\
\r\n\
Hello=20World\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"CTE with trailing comment should still decode as quoted-printable"
);
}
#[test]
fn parse_quoted_transfer_encoding_with_trailing_garbage() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: \"base64\"; name=foo\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"Quoted CTE with trailing garbage should still decode as base64"
);
}
#[test]
fn qp_soft_line_break_crlf() {
let data = b"Hello=\r\n World";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"=\\r\\n soft break should be removed (RFC 2045 Section 6.7)"
);
}
#[test]
fn qp_soft_line_break_lf_only() {
let data = b"Hello=\nWorld";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"HelloWorld",
"=\\n soft break should be removed"
);
}
#[test]
fn qp_soft_break_lf_at_end() {
let data = b"Hi=\n";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hi",
"=\\n at end of data should be a soft break"
);
}
#[test]
fn qp_invalid_hex_passthrough() {
let data = b"=GG=4F=4B";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"=GGOK",
"Invalid hex =GG should pass through, valid =4F=4B should decode"
);
}
#[test]
fn base64_empty_body() {
let decoded = decode_transfer_encoding(b"", "base64");
assert!(
decoded.is_empty(),
"Empty base64 input should produce empty output"
);
}
#[test]
fn base64_whitespace_only() {
let decoded = decode_transfer_encoding(b" \r\n \r\n", "base64");
assert!(
decoded.is_empty(),
"Whitespace-only base64 input should produce empty output"
);
}
#[test]
fn find_closing_quote_unterminated() {
assert_eq!(find_closing_quote("no closing quote here"), 21);
}
#[test]
fn find_closing_quote_skips_escaped() {
assert_eq!(find_closing_quote("hello\\\"world\""), 12);
}
#[test]
fn multipart_crlf_before_boundary() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"mp\"\r\n\
\r\n\
--mp\r\n\
Content-Type: text/plain\r\n\
\r\n\
Part A\r\n\
--mp\r\n\
Content-Type: text/html\r\n\
\r\n\
<b>Part B</b>\r\n\
--mp--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}
#[test]
fn multipart_lf_only_before_boundary() {
let raw = b"From: a@b.com\nDate: Thu, 13 Feb 2025 15:47:33 +0000\nContent-Type: multipart/mixed; boundary=\"mp\"\n\n--mp\nContent-Type: text/plain\n\nPart A\n--mp\nContent-Type: text/html\n\n<b>Part B</b>\n--mp--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}
#[test]
fn multipart_truncated_no_closing() {
let body = b"--bnd\r\n\r\nFirst part\r\n--bnd\r\n\r\nSecond part with no closing boundary";
let parts = split_mime_parts(body, "bnd");
assert_eq!(
parts.len(),
2,
"Should find 2 parts even without closing boundary"
);
let text2 = String::from_utf8_lossy(parts[1]);
assert!(text2.contains("Second part"));
}
#[test]
fn strip_comments_escaped_paren_inside_comment() {
let result = strip_comments("Before (escaped \\) paren) After");
assert_eq!(
result.trim(),
"Before After",
"Escaped close-paren inside comment must not end the comment"
);
}
#[test]
fn strip_comments_escaped_open_paren_inside_comment() {
let result = strip_comments("X (comment \\( not nested) Y");
assert_eq!(
result.trim(),
"X Y",
"Escaped open-paren inside comment must not increase nesting depth"
);
}
#[test]
fn strip_comments_respects_quoted_strings() {
let result = strip_comments("\"value (not a comment)\" rest");
assert_eq!(
result, "\"value (not a comment)\" rest",
"Parentheses inside quoted-string must not be treated as comments"
);
}
#[test]
fn strip_comments_quoted_string_with_real_comment() {
let result = strip_comments("\"value (literal)\" (real comment) end");
assert_eq!(
result, "\"value (literal)\" end",
"Real comment stripped, quoted parens preserved"
);
}
#[test]
fn hex_digit_lowercase() {
assert_eq!(hex_digit(b'a'), Some(10));
assert_eq!(hex_digit(b'f'), Some(15));
assert_eq!(hex_digit(b'c'), Some(12));
assert_eq!(hex_digit(b'A'), Some(10));
assert_eq!(hex_digit(b'F'), Some(15));
assert_eq!(hex_digit(b'0'), Some(0));
assert_eq!(hex_digit(b'9'), Some(9));
assert_eq!(hex_digit(b'g'), None);
assert_eq!(hex_digit(b'G'), None);
assert_eq!(hex_digit(b' '), None);
}
#[test]
fn qp_lowercase_hex_digits() {
let data = b"caf=c3=a9";
let decoded = decode_quoted_printable(data);
assert_eq!(decoded, b"caf\xc3\xa9");
let text = String::from_utf8_lossy(&decoded);
assert_eq!(
text, "caf\u{e9}",
"Lowercase hex digits in QP should decode correctly (RFC 2045 Section 6.7)"
);
}
#[test]
fn decode_hex_pair_lowercase() {
assert_eq!(decode_hex_pair(b'f', b'f'), Some(0xFF));
assert_eq!(decode_hex_pair(b'a', b'0'), Some(0xA0));
assert_eq!(decode_hex_pair(b'0', b'a'), Some(0x0A));
}
#[test]
fn parse_single_address_empty_angle_brackets() {
let result = parse_single_address("Display Name <>");
assert!(
result.is_none(),
"Empty angle brackets should not produce an address"
);
}
#[test]
fn parse_single_address_reversed_angles() {
let result = parse_single_address(">bad<user@example.com");
assert!(result.is_some());
}
#[test]
fn parse_single_address_no_at_no_brackets() {
let result = parse_single_address("just plain text");
assert!(
result.is_none(),
"Text without @ or <> should not produce an address"
);
}
#[test]
fn is_inside_quotes_with_escapes() {
assert!(is_inside_quotes("\"hello \\\" world\"end", 15));
assert!(!is_inside_quotes("\"hello\"", 0));
assert!(!is_inside_quotes("\"hello\" world", 8));
}
#[test]
fn is_inside_quotes_backslash_outside_quotes_is_literal() {
let s = r#"x=\"y"z""#;
assert!(
!is_inside_quotes(s, 6),
"RFC 5322 Section 3.2.4: backslash outside quotes must not escape \
the next character — position 6 ('z') should be outside quotes"
);
}
#[test]
fn strip_outer_quotes_short_input() {
assert_eq!(strip_outer_quotes("\""), "\"");
assert_eq!(strip_outer_quotes(""), "");
assert_eq!(strip_outer_quotes("x"), "x");
}
#[test]
fn strip_outer_quotes_one_sided() {
assert_eq!(strip_outer_quotes("\"hello"), "\"hello");
assert_eq!(strip_outer_quotes("hello\""), "hello\"");
}
#[test]
fn split_header_body_starts_with_lf() {
let (headers, body) = split_header_body(b"\nBody text here");
assert!(
headers.is_empty(),
"Headers should be empty when input starts with \\n"
);
assert_eq!(body, b"Body text here");
}
#[test]
fn split_header_body_starts_with_crlf() {
let (headers, body) = split_header_body(b"\r\nBody text here");
assert!(
headers.is_empty(),
"Headers should be empty when input starts with \\r\\n"
);
assert_eq!(body, b"Body text here");
}
#[test]
fn decode_body_strips_trailing_lf_only() {
let result = decode_body(b"Hello\n", "", "text/plain; charset=utf-8");
assert_eq!(result, "Hello", "Trailing bare LF should be stripped");
}
#[test]
fn decode_body_no_trailing_newline() {
let result = decode_body(b"Hello", "", "text/plain; charset=utf-8");
assert_eq!(
result, "Hello",
"No trailing newline should leave content unchanged"
);
}
#[test]
fn percent_decode_lowercase_hex() {
let decoded = percent_decode("%c3%a9");
assert_eq!(decoded, vec![0xC3, 0xA9]);
}
#[test]
fn percent_decode_invalid_hex() {
let decoded = percent_decode("%ZZ");
assert_eq!(decoded, b"%ZZ");
}
#[test]
fn percent_decode_truncated() {
let decoded = percent_decode("hello%2");
assert_eq!(decoded, b"hello%2");
}
#[test]
fn parse_address_colon_with_at_sign() {
let addrs = parse_address_list("user:tag@example.com");
assert!(!addrs.is_empty(), "Should parse at least one address");
}
#[test]
fn extract_rfc2231_param_handles_quoted_value() {
let disposition = "attachment; filename*=\"utf-8''caf%C3%A9.txt\"";
let filename = extract_filename(disposition, "application/octet-stream");
assert_eq!(
filename.as_deref(),
Some("café.txt"),
"RFC 2231 filename with outer quotes must be decoded correctly \
(Postel's law: be liberal in what you accept)"
);
}
#[test]
fn extract_rfc2231_param_quoted_value_with_semicolon() {
let disposition = "attachment; filename*=\"UTF-8''file;name.txt\"";
let result = extract_rfc2231_param(disposition, "filename");
assert_eq!(
result.as_deref(),
Some("file;name.txt"),
"RFC 2231 quoted value containing a literal semicolon must not \
be truncated at the semicolon (Postel's law)"
);
}
#[test]
fn extract_rfc2231_param_unterminated_quoted_value_stops_at_separator() {
let disposition = "attachment; filename*=\"UTF-8''report.txt; size=123";
let result = extract_rfc2231_param(disposition, "filename");
assert_eq!(
result.as_deref(),
Some("report.txt"),
"unterminated quoted RFC 2231 values must stop at the next \
parameter separator instead of swallowing later parameters"
);
}
#[test]
fn extract_filename_malformed_rfc2231_param_falls_back_to_plain_filename() {
let disposition = "attachment; filename*=UTF-8''report%ZZ.txt; filename=\"report.txt\"";
let filename = extract_filename(disposition, "application/octet-stream");
assert_eq!(
filename.as_deref(),
Some("report.txt"),
"malformed RFC 2231 filename* must not override a valid plain filename fallback"
);
}
#[test]
fn group_address_with_at_in_quoted_display_name() {
let input = r#""user@host": addr@example.com;"#;
let addrs = parse_address_list(input);
assert_eq!(
addrs.len(),
1,
"group with quoted display-name containing '@' must parse the member address"
);
assert_eq!(addrs[0].email, "addr@example.com");
}
#[test]
fn obs_route_stripped_from_angle_bracket_address() {
let addrs = parse_address_list("<@relay:user@example.com>");
assert_eq!(addrs.len(), 1);
assert_eq!(addrs[0].email, "user@example.com");
let addrs = parse_address_list("<@hop1,@hop2:alice@domain.org>");
assert_eq!(addrs.len(), 1);
assert_eq!(addrs[0].email, "alice@domain.org");
let addrs = parse_address_list("Alice <@relay:alice@example.com>");
assert_eq!(addrs.len(), 1);
assert_eq!(addrs[0].name.as_deref(), Some("Alice"));
assert_eq!(addrs[0].email, "alice@example.com");
}
#[test]
fn rfc2231_continuation_malformed_first_encoded_section_not_percent_decoded() {
let header = "attachment; filename*0*=hello%20world.txt";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("hello%20world.txt"),
"malformed charset'language'value must not be percent-decoded"
);
}
#[test]
fn rfc2231_continuation_malformed_encoded_duplicate_does_not_override_plain() {
let header =
"attachment; filename*0=\"report\"; filename*0*=broken%20value; filename*1=\".txt\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("report.txt"),
"malformed encoded duplicate section must not override the first valid plain section"
);
}
#[test]
fn test_invalid_timezone_offset_defaults_to_zero() {
let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +9999\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(email.date.unwrap().tz_offset_minutes, 0);
}
#[test]
fn test_valid_edge_case_timezone_offsets() {
let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +1400\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(email.date.unwrap().tz_offset_minutes, 840);
let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 -1200\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(email.date.unwrap().tz_offset_minutes, -720);
let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +0000\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(email.date.unwrap().tz_offset_minutes, 0);
}
#[test]
fn test_overlong_timezone_offset_defaults_to_zero() {
let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +12345\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(
email.date.unwrap().tz_offset_minutes,
0,
"zone = sign 4DIGIT, so +12345 must not be truncated to +1234 \
(RFC 5322 Section 3.3)"
);
}
#[test]
fn test_military_timezone_codes_treated_as_unknown() {
let input = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 12:00:00 A\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(
email.date.unwrap().tz_offset_minutes,
0,
"military zone 'A' must be treated as -0000 (RFC 5322 Section 4.3)"
);
let input = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 12:00:00 N\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(
email.date.unwrap().tz_offset_minutes,
0,
"military zone 'N' must be treated as -0000 (RFC 5322 Section 4.3)"
);
let input = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 12:00:00 M\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(
email.date.unwrap().tz_offset_minutes,
0,
"military zone 'M' must be treated as -0000 (RFC 5322 Section 4.3)"
);
let input = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 12:00:00 EDT\r\n\r\n";
let email = parse_email(input).unwrap();
assert_eq!(
email.date.unwrap().tz_offset_minutes,
-240,
"EDT is a well-known civil zone and should still resolve to -0400"
);
}
#[test]
fn spec_audit_date_accepts_calendar_invalid_days() {
assert!(parse_rfc5322_date("Thu, 31 Feb 2025 12:00:00 +0000").is_some());
assert!(parse_rfc5322_date("Mon, 31 Apr 2025 12:00:00 +0000").is_some());
assert!(parse_rfc5322_date("Mon, 31 Jun 2025 12:00:00 +0000").is_some());
assert!(parse_rfc5322_date("Mon, 29 Feb 2023 12:00:00 +0000").is_some()); assert!(parse_rfc5322_date("Thu, 29 Feb 2024 12:00:00 +0000").is_some()); assert!(parse_rfc5322_date("Tue, 28 Feb 2023 12:00:00 +0000").is_some());
}
#[test]
fn spec_audit_date_accepts_mismatched_day_of_week() {
let result = parse_rfc5322_date("Mon, 13 Feb 2025 12:00:00 +0000");
assert!(
result.is_some(),
"Parser must accept dates with incorrect day-of-week (Postel's law)"
);
let dt = result.unwrap();
assert_eq!(dt.year, 2025);
assert_eq!(dt.month, 2);
assert_eq!(dt.day, 13);
}
#[test]
fn spec_audit_encoded_word_in_quoted_string_not_decoded() {
let raw = b"From: \"=?UTF-8?B?SGVsbG8=?=\" <test@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].name.as_deref(),
Some("=?UTF-8?B?SGVsbG8=?="),
"Encoded words inside quoted-strings must NOT be decoded (RFC 2047 Section 5)"
);
}
#[test]
fn spec_audit_encoded_word_in_unquoted_phrase_decoded() {
let raw = b"From: =?UTF-8?B?SGVsbG8=?= <test@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from[0].name.as_deref(), Some("Hello"));
}
#[test]
fn spec_audit_overlong_encoded_word_decoded() {
let raw = b"Subject: =?UTF-8?Q?AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA?=\r\n\
From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
"Overlong encoded-word must be decoded (Postel's law, RFC 2047 Section 6.1)"
);
}
#[test]
fn extract_param_case_insensitive_param_name() {
let header = "text/plain; charset=utf-8";
assert_eq!(
extract_param(header, "CHARSET"),
Some("utf-8".to_string()),
"extract_param must match param_name case-insensitively (RFC 2045 Section 5.1)"
);
assert_eq!(
extract_param(header, "Charset"),
Some("utf-8".to_string()),
"extract_param must match mixed-case param_name (RFC 2045 Section 5.1)"
);
}
#[test]
fn extract_rfc2231_param_case_insensitive_param_name() {
let header = "attachment; filename*=utf-8''hello%20world.txt";
assert_eq!(
extract_rfc2231_param(header, "FILENAME"),
Some("hello world.txt".to_string()),
"extract_rfc2231_param must match param_name case-insensitively (RFC 2045 Section 5.1)"
);
}
#[test]
fn extract_rfc2231_continuation_case_insensitive_param_name() {
let header = "attachment; filename*0=\"hello \"; filename*1=\"world.txt\"";
assert_eq!(
extract_rfc2231_continuation(header, "FILENAME"),
Some("hello world.txt".to_string()),
"extract_rfc2231_continuation must match param_name case-insensitively \
(RFC 2045 Section 5.1)"
);
}
#[test]
fn decode_body_rfc2231_charset_star_encoded() {
let body = b"caf\xc3\xa9";
let ct = "text/plain; charset*=UTF-8''utf-8";
let result = decode_body(body, "8bit", ct);
assert_eq!(
result, "caf\u{e9}",
"RFC 2231 Section 4: charset*=UTF-8''utf-8 must be recognized \
by decode_body so non-ASCII bodies are decoded correctly"
);
}
#[test]
fn decode_body_rfc2231_charset_continuation() {
let body = b"caf\xc3\xa9";
let ct = "text/plain; charset*0=ut; charset*1=f-8";
let result = decode_body(body, "8bit", ct);
assert_eq!(
result, "caf\u{e9}",
"RFC 2231 Section 3: charset*0=ut; charset*1=f-8 must be \
assembled and recognized by decode_body"
);
}
#[test]
fn extract_mime_type_normalizes_whitespace_around_slash() {
assert_eq!(
extract_mime_type("text (comment) /plain"),
"text/plain",
"whitespace before '/' (left by comment stripping) must be removed"
);
assert_eq!(
extract_mime_type("text/ plain"),
"text/plain",
"whitespace after '/' must be removed"
);
assert_eq!(
extract_mime_type("text / plain"),
"text/plain",
"whitespace around '/' must be removed"
);
assert_eq!(extract_mime_type("text/plain; charset=utf-8"), "text/plain");
}
#[test]
fn extract_rfc2231_continuation_non_contiguous_sections() {
let header = "attachment; filename*0=\"hello \"; filename*2=\"world.txt\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result,
Some("hello ".to_string()),
"RFC 2231 Section 3: reassembly must stop before section 2 when section 1 is missing"
);
}
#[test]
fn rfc2231_gap_larger_than_three() {
let header = "attachment; filename*0=\"hello\"; filename*5=\"world\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result,
Some("hello".to_string()),
"RFC 2231 Section 3: reassembly must stop at the first missing section"
);
}
#[test]
fn rfc2231_continuation_charset_from_non_first_encoded_section() {
let header = "attachment; filename*0=\"hello \"; filename*1*=UTF-8''w%C3%B6rld.txt";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result,
Some("hello wörld.txt".to_string()),
"RFC 2231 Section 3: charset from the first encoded section (not section 0) \
must be used to decode percent-encoded bytes"
);
}
#[test]
fn rfc2231_continuation_tolerates_spaces_around_eq() {
let header = "attachment; filename*0 = \"hello\"; filename*1 = \"world.txt\"";
assert_eq!(
extract_rfc2231_continuation(header, "filename"),
Some("helloworld.txt".to_string()),
"RFC 2231 Section 3 + Postel's law: spaces around '=' in continuation \
parameters must be tolerated"
);
}
#[test]
fn multipart_alternative_prefers_last_html() {
let raw = b"From: sender@example.com\r\n\
Subject: test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/alternative; boundary=\"alt\"\r\n\
\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
plain text\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>first html</p>\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>second html - better version</p>\r\n\
--alt--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_html.as_deref(),
Some("<p>second html - better version</p>"),
"RFC 2046 Section 5.1.4: should prefer LAST text/html in multipart/alternative"
);
}
#[test]
fn multipart_alternative_prefers_last_plain() {
let raw = b"From: sender@example.com\r\n\
Subject: test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/alternative; boundary=\"alt\"\r\n\
\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
first plain\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
second plain - better version\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>html version</p>\r\n\
--alt--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("second plain - better version"),
"RFC 2046 Section 5.1.4: should prefer LAST text/plain in multipart/alternative"
);
}
#[test]
fn boundary_not_matched_as_prefix() {
let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: multipart/mixed; boundary=\"abc\"\r\n\r\n--abc\r\nContent-Type: text/plain\r\n\r\nBody text here\r\n--abcdef\r\nThis is NOT a boundary\r\n--abc--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Body text here\r\n--abcdef\r\nThis is NOT a boundary")
);
}
#[test]
fn closing_boundary_requires_valid_terminator() {
let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: multipart/mixed; boundary=\"abc\"\r\n\r\n--abc\r\nContent-Type: text/plain\r\n\r\nBody text here\r\n--abc--junk\r\nThis is NOT a closing boundary\r\n--abc--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Body text here\r\n--abc--junk\r\nThis is NOT a closing boundary")
);
}
#[test]
fn invalid_content_type_defaults_to_text_plain() {
let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: text\r\n\r\nBody text";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
}
#[test]
fn whitespace_preserved_before_invalid_encoded_word() {
let input = "=?UTF-8?B?SGVsbG8=?= =?broken";
let decoded = decode_encoded_words(input);
assert_eq!(decoded, "Hello =?broken");
}
#[test]
fn encoded_word_without_lwsp_boundary_stays_literal_in_subject() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: Prefix=?UTF-8?Q?caf=C3=A9?=Suffix\r\n\
\r\n\
Body\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Prefix=?UTF-8?Q?caf=C3=A9?=Suffix"),
"RFC 2047 Section 5: Subject encoded-words must not decode \
when they are glued to adjacent text without linear whitespace"
);
}
#[test]
fn encoded_word_without_lwsp_boundary_stays_literal_in_phrase() {
let raw = b"From: Prefix=?UTF-8?Q?caf=C3=A9?= <sender@example.com>\r\n\
To: recipient@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].name.as_deref(),
Some("Prefix=?UTF-8?Q?caf=C3=A9?="),
"RFC 2047 Section 5: phrase encoded-words must not decode \
when they are glued to adjacent words without linear whitespace"
);
}
#[test]
fn encoded_word_base64_with_space_in_text() {
let input = "=?UTF-8?B?SGVs bG8=?=";
let decoded = decode_encoded_words(input);
assert_eq!(
decoded, "Hello",
"space in B-encoded-text should be stripped as fold artifact"
);
}
#[test]
fn split_mime_parts_bare_cr_boundaries() {
let body = b"--boundary\r\nContent-Type: text/plain\r\n\r\nPart 1\r--boundary\r\nContent-Type: text/plain\r\n\r\nPart 2\r--boundary--";
let parts = split_mime_parts(body, "boundary");
assert_eq!(
parts.len(),
2,
"Should find 2 parts with bare-CR-only boundaries"
);
}
#[test]
fn regression_unmatched_angle_bracket_does_not_break_separator() {
let addrs = parse_address_list("bad> addr, user@example.com");
assert!(
addrs.iter().any(|a| a.email == "user@example.com"),
"unmatched '>' must not prevent comma from splitting addresses: {addrs:?}"
);
}
#[test]
fn regression_leading_comment_display_name_extracted() {
let addr = parse_single_address("(John Doe) user@example.com");
let addr = addr.expect("should parse successfully");
assert_eq!(addr.email, "user@example.com");
assert_eq!(
addr.name.as_deref(),
Some("John Doe"),
"leading comment should be used as display name (RFC 5322 Section 3.2.2)"
);
}
#[test]
fn regression_leading_comment_rfc2047_decoded() {
let addr = parse_single_address("(=?UTF-8?B?Sm9obg==?=) user@example.com");
let addr = addr.expect("should parse successfully");
assert_eq!(addr.email, "user@example.com");
assert_eq!(
addr.name.as_deref(),
Some("John"),
"encoded-words in leading comment must be decoded (RFC 2047 Section 5 rule 2)"
);
}
#[test]
fn regression_name_addr_comment_not_retained_in_display_name() {
let raw = b"From: John (Boss) Doe <john@example.com>\r\n\
To: recipient@example.com\r\n\
Subject: Commented name\r\n\
\r\n\
body";
let parsed = parse_email(raw).expect("message should parse successfully");
assert_eq!(parsed.from.len(), 1);
assert_eq!(
parsed.from[0].name.as_deref(),
Some("John Doe"),
"display-name comments are CFWS and must not be retained"
);
}
#[test]
fn regression_name_addr_mixed_phrase_unquotes_quoted_word() {
let raw = b"From: \"John\" Doe <john@example.com>\r\n\
To: recipient@example.com\r\n\
Subject: Mixed phrase\r\n\
\r\n\
body";
let parsed = parse_email(raw).expect("message should parse successfully");
assert_eq!(parsed.from.len(), 1);
assert_eq!(
parsed.from[0].name.as_deref(),
Some("John Doe"),
"quoted-string words in a display-name phrase must be unquoted"
);
}
#[test]
fn edge_q_encoding_underscore_is_space() {
let result = decode_encoded_words("=?utf-8?Q?hello_world?=");
assert_eq!(
result, "hello world",
"RFC 2047 Section 4.2: underscore in Q-encoding must decode to space"
);
}
#[test]
fn edge_adjacent_encoded_words_collapse_whitespace_different_charsets() {
let result = decode_encoded_words("=?utf-8?B?SGVsbG8=?= =?iso-8859-1?B?V29ybGQ=?=");
assert_eq!(
result, "HelloWorld",
"RFC 2047 Section 6.2: whitespace between adjacent encoded words \
with different charsets must be collapsed"
);
}
#[test]
fn edge_boundary_in_content_not_at_line_start() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
\r\n\
--BOUND\r\n\
Content-Type: text/plain\r\n\
\r\n\
This has --BOUND in the middle\r\n\
--BOUND--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("This has --BOUND in the middle"),
"Boundary mid-line must not split the part (RFC 2046 Section 5.1.1)"
);
}
#[test]
fn edge_semicolon_in_quoted_param_value() {
let ct = "text/plain; charset=\"utf-8; extra\"";
let result = extract_param(ct, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8; extra"),
"Semicolons inside quoted parameter values must not split params \
(RFC 5322 Section 3.2.4)"
);
}
#[test]
fn edge_subject_only_whitespace_after_decode() {
let result = decode_encoded_words("=?utf-8?Q?=20=20?=");
assert_eq!(
result, " ",
"RFC 2047 Section 4.2: =20 in Q-encoding must decode to space"
);
}
#[test]
fn edge_very_long_header_unfolded() {
let long_value = "x".repeat(1000);
let raw = format!(
"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Received: {long_value}\r\n\
\r\n"
);
let parsed = parse_email(raw.as_bytes()).unwrap();
assert!(
parsed.raw_headers.contains(&long_value),
"Very long unfolded header line (>998 chars) must be preserved"
);
}
#[test]
fn edge_date_cfws_before_day() {
let raw = b"From: a@b.com\r\n\
Date: (Mon, ) 13 Feb (year) 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed
.date
.expect("Date with CFWS in various positions must parse per RFC 5322 Section 4.3");
assert_eq!(date.day, 13);
assert_eq!(date.month, 2);
assert_eq!(date.year, 2025);
assert_eq!(date.hour, 15);
assert_eq!(date.minute, 47);
assert_eq!(date.second, 33);
}
#[test]
fn edge_nested_message_rfc822() {
let inner_msg = "From: inner@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"inner-bound\"\r\n\
\r\n\
--inner-bound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Inner text\r\n\
--inner-bound\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"inner.pdf\"\r\n\
\r\n\
PDF-data\r\n\
--inner-bound--";
let raw = format!(
"From: outer@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer-bound\"\r\n\
\r\n\
--outer-bound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Outer text\r\n\
--outer-bound\r\n\
Content-Type: message/rfc822\r\n\
\r\n\
{inner_msg}\r\n\
--outer-bound--"
);
let parsed = parse_email(raw.as_bytes()).unwrap();
assert!(
parsed.body_text.is_some(),
"Nested message/rfc822 must be parsed without error"
);
}
#[test]
fn edge_adjacent_encoded_words_single_space() {
let result = decode_encoded_words("=?utf-8?Q?A?= =?utf-8?Q?B?=");
assert_eq!(
result, "AB",
"RFC 2047 Section 6.2: single space between adjacent encoded words must be collapsed"
);
}
#[test]
fn edge_encoded_word_followed_by_plain_text() {
let result = decode_encoded_words("=?utf-8?Q?Hello?= World");
assert_eq!(
result, "Hello World",
"RFC 2047 Section 6.2: whitespace between encoded word and plain text must be preserved"
);
}
#[test]
fn edge_q_encoding_multiple_underscores() {
let result = decode_encoded_words("=?utf-8?Q?a__b___c?=");
assert_eq!(
result, "a b c",
"RFC 2047 Section 4.2: each underscore in Q-encoding represents one space"
);
}
#[test]
fn edge_encoded_word_empty_payload() {
let result = decode_encoded_words("=?utf-8?Q??=");
assert_eq!(
result, "=?utf-8?Q??=",
"RFC 2047: empty Q-encoded payload is malformed and must be left as literal text"
);
}
#[test]
fn edge_encoded_word_empty_base64_payload() {
let result = decode_encoded_words("=?utf-8?B??=");
assert_eq!(
result, "=?utf-8?B??=",
"RFC 2047: empty B-encoded payload is malformed and must be left as literal text"
);
}
#[test]
fn edge_encoded_word_unknown_encoding() {
let result = decode_encoded_words("=?utf-8?X?test?=");
assert!(
result.contains("utf-8") || result.contains("test"),
"Unknown encoding letter should not cause a panic"
);
}
#[test]
fn edge_boundary_prefix_not_false_match() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"abc\"\r\n\
\r\n\
--abc\r\n\
Content-Type: text/plain\r\n\
\r\n\
--abcdef is not a real boundary\r\n\
--abc--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed
.body_text
.as_deref()
.unwrap_or("")
.contains("--abcdef"),
"Boundary 'abc' must not falsely match '--abcdef' (RFC 2046 Section 5.1.1)"
);
}
#[test]
fn edge_header_unfolding_with_tab() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: Hello\r\n\
\tWorld\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.subject.as_ref().unwrap().contains("Hello"),
"Subject must contain 'Hello' after unfolding"
);
assert!(
parsed.subject.as_ref().unwrap().contains("World"),
"Subject must contain 'World' from continuation line"
);
}
#[test]
fn edge_empty_multipart_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none() || parsed.body_text.as_deref() == Some(""),
"Empty multipart body should produce no body text"
);
}
#[test]
fn edge_q_encoding_bare_equals() {
let result = decode_encoded_words("=?utf-8?Q?a=b?=");
assert!(
!result.is_empty(),
"Bare equals in Q-encoding should not cause panic"
);
}
#[test]
fn edge_three_adjacent_encoded_words() {
let result = decode_encoded_words("=?utf-8?Q?A?= =?utf-8?Q?B?= =?utf-8?Q?C?=");
assert_eq!(
result, "ABC",
"RFC 2047 Section 6.2: whitespace between all three adjacent \
encoded words must be collapsed"
);
}
#[test]
fn edge_date_two_digit_year_obsolete() {
let date = parse_rfc5322_date("13 Feb 99 15:47:33 +0000");
let date = date.expect("Two-digit year 99 must parse per RFC 5322 Section 4.3");
assert_eq!(
date.year, 1999,
"RFC 5322 Section 4.3: two-digit year 99 must map to 1999"
);
}
#[test]
fn edge_date_two_digit_year_2000s() {
let date = parse_rfc5322_date("13 Feb 05 15:47:33 +0000");
let date = date.expect("Two-digit year 05 must parse per RFC 5322 Section 4.3");
assert_eq!(
date.year, 2005,
"RFC 5322 Section 4.3: two-digit year 05 must map to 2005"
);
}
#[test]
fn parse_rfc5322_date_fws_space_folding() {
let dt = parse_rfc5322_date("Thu, 01 Jan\r\n 2015 12:00:00 +0000");
let dt = dt.expect("RFC 5322 Section 2.2.3: FWS (CRLF SP) within date must be normalised");
assert_eq!(dt.year, 2015);
assert_eq!(dt.month, 1);
assert_eq!(dt.day, 1);
assert_eq!(dt.hour, 12);
assert_eq!(dt.minute, 0);
assert_eq!(dt.second, 0);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn parse_rfc5322_date_fws_tab_folding() {
let dt = parse_rfc5322_date("Thu, 01 Jan\r\n\t2015 12:00:00 +0000");
let dt = dt.expect("RFC 5322 Section 2.2.3: FWS (CRLF HTAB) within date must be normalised");
assert_eq!(dt.year, 2015);
assert_eq!(dt.month, 1);
assert_eq!(dt.day, 1);
assert_eq!(dt.hour, 12);
assert_eq!(dt.minute, 0);
assert_eq!(dt.second, 0);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn parse_rfc5322_date_fws_multiple_folds() {
let dt = parse_rfc5322_date("Thu,\r\n 01\r\n Jan\r\n 2015\r\n 12:00:00\r\n +0000");
let dt =
dt.expect("RFC 5322 Section 4.3: multiple FWS sequences in date must all be normalised");
assert_eq!(dt.year, 2015);
assert_eq!(dt.month, 1);
assert_eq!(dt.day, 1);
assert_eq!(dt.hour, 12);
assert_eq!(dt.minute, 0);
assert_eq!(dt.second, 0);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn parse_rfc5322_date_month_trailing_comma() {
let dt = parse_rfc5322_date("Thu, 13 Feb, 2025 15:47:33 +0000");
let dt = dt.expect("Postel's law: trailing comma after month must be tolerated");
assert_eq!(dt.day, 13);
assert_eq!(dt.month, 2);
assert_eq!(dt.year, 2025);
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 33);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn parse_rfc5322_date_month_trailing_period() {
let dt = parse_rfc5322_date("13 Feb. 2025 12:00:00 +0000");
let dt = dt.expect("Postel's law: trailing period after month must be tolerated");
assert_eq!(dt.day, 13);
assert_eq!(dt.month, 2);
assert_eq!(dt.year, 2025);
assert_eq!(dt.hour, 12);
assert_eq!(dt.minute, 0);
assert_eq!(dt.second, 0);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn parse_date_no_dow_comma_after_month() {
let result = parse_rfc5322_date("13 Feb, 2025 12:00:00 +0000");
assert!(
result.is_some(),
"date without DOW but with comma after month should parse"
);
let dt = result.unwrap();
assert_eq!(dt.day, 13);
assert_eq!(dt.month, 2);
assert_eq!(dt.year, 2025);
assert_eq!(dt.hour, 12);
assert_eq!(dt.minute, 0);
assert_eq!(dt.second, 0);
}
#[test]
fn parse_date_with_dow_still_works() {
let result = parse_rfc5322_date("Thu, 13 Feb 2025 12:00:00 +0000");
assert!(result.is_some());
let dt = result.unwrap();
assert_eq!(dt.day, 13);
assert_eq!(dt.month, 2);
assert_eq!(dt.year, 2025);
}
#[test]
fn parse_date_dow_comma_month_comma() {
let result = parse_rfc5322_date("Thu, 13 Feb, 2025 15:47:33 +0000");
assert!(result.is_some());
let dt = result.unwrap();
assert_eq!(dt.day, 13);
assert_eq!(dt.month, 2);
assert_eq!(dt.year, 2025);
}
#[test]
fn edge_q_encoding_mixed_hex_and_underscore() {
let result = decode_encoded_words("=?utf-8?Q?=48=65=6C=6C=6F_=57=6F=72=6C=64?=");
assert_eq!(
result, "Hello World",
"RFC 2047 Section 4.2: mixed hex encoding and underscore in Q-encoding"
);
}
#[test]
fn edge_multipart_base64_text_part() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"Base64-encoded text/plain in multipart must be decoded correctly"
);
}
#[test]
fn edge_encoded_word_unknown_charset() {
let result = decode_encoded_words("=?x-unknown?B?SGVsbG8=?=");
assert_eq!(
result, "Hello",
"Unknown charset in encoded-word should fall back gracefully"
);
}
#[test]
fn edge_from_display_name_with_at_sign() {
let raw = b"From: \"user@company\" <real@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].email, "real@example.com",
"Email must be extracted from angle brackets, not from quoted display name"
);
assert_eq!(
parsed.from[0].name.as_deref(),
Some("user@company"),
"Display name containing '@' in quotes must be preserved"
);
}
#[test]
fn edge_no_blank_line_separator() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: Test\r\n\
This is body text without blank line";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Test"));
}
#[test]
fn edge_encoded_word_charset_switch() {
let result = decode_encoded_words("=?iso-8859-1?B?Y2Fm6Q==?= =?utf-8?B?csOpc3Vtw6k=?=");
assert_eq!(
result, "caférésumé",
"RFC 2047 Section 6.2: encoded words with different charsets must \
decode correctly with whitespace collapsed"
);
}
#[test]
fn edge_duplicate_from_header() {
let raw = b"From: first@example.com\r\n\
From: second@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].email, "first@example.com",
"First From header should be used when duplicates exist"
);
}
#[test]
fn edge_boundary_with_escaped_quote() {
let ct = r#"multipart/mixed; boundary="a\"b""#;
let boundary = extract_param(ct, "boundary");
assert_eq!(
boundary.as_deref(),
Some("a\"b"),
"Boundary with escaped quote must be correctly unescaped \
(RFC 5322 Section 3.2.4)"
);
}
#[test]
fn edge_nul_byte_in_subject() {
let mut raw = Vec::new();
raw.extend_from_slice(b"From: a@b.com\r\n");
raw.extend_from_slice(b"Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n");
raw.extend_from_slice(b"Subject: Hello\x00World\r\n");
raw.extend_from_slice(b"\r\n");
let parsed = parse_email(&raw).unwrap();
assert!(
parsed.subject.is_some(),
"Subject with NUL byte must still be extracted (Postel's law)"
);
}
#[test]
fn edge_address_group_syntax() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
To: Friends: alice@a.com, bob@b.com;\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.to.len() >= 2,
"Group syntax 'Friends: alice, bob;' must extract both addresses. \
Got: {:?}",
parsed.to
);
}
#[test]
fn edge_boundary_special_chars() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"a+b/c=d\"\r\n\
\r\n\
--a+b/c=d\r\n\
Content-Type: text/plain\r\n\
\r\n\
body text\r\n\
--a+b/c=d--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("body text"),
"Boundary with special chars +/= must be handled correctly \
(RFC 2046 Section 5.1.1)"
);
}
#[test]
fn parse_param_space_before_equals() {
let body_bytes: &[u8] = "café".as_bytes(); let mut raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset =utf-8\r\n\
\r\n"
.to_vec();
raw.extend_from_slice(body_bytes);
let parsed = parse_email(&raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("café"),
"charset parameter with space before '=' must be recognized"
);
}
#[test]
fn parse_param_spaces_around_equals() {
let body_bytes: &[u8] = "café".as_bytes();
let mut raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset = utf-8\r\n\
\r\n"
.to_vec();
raw.extend_from_slice(body_bytes);
let parsed = parse_email(&raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("café"),
"charset with spaces around '=' must be parsed (Postel's law)"
);
}
#[test]
fn parse_boundary_space_around_equals() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary = \"bound42\"\r\n\
\r\n\
--bound42\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body text\r\n\
--bound42--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Body text"),
"boundary with spaces around '=' must be parsed (Postel's law)"
);
}
#[test]
fn parse_rfc2231_param_space_around_equals() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename* = UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("résumé.pdf"),
"RFC 2231 filename* with space around '=' must be parsed (Postel's law)"
);
}
#[test]
fn parse_sender_header() {
let raw = b"From: author@example.com\r\n\
Sender: transmitter@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test Sender\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.sender.as_ref().map(|a| a.email.as_str()),
Some("transmitter@example.com"),
"Sender header should be parsed into parsed.sender"
);
assert_eq!(
parsed.sender.as_ref().and_then(|a| a.name.as_deref()),
None,
"Sender with bare address should have no display name"
);
}
#[test]
fn parse_sender_header_absent() {
let raw = b"From: author@example.com\r\n\
To: recipient@example.com\r\n\
Subject: No Sender\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.sender.is_none(),
"Missing Sender header should yield None"
);
}
#[test]
fn parse_sender_header_with_display_name() {
let raw = b"From: author@example.com\r\n\
Sender: Secretary <secretary@example.com>\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
let sender = parsed.sender.as_ref().unwrap();
assert_eq!(sender.email, "secretary@example.com");
assert_eq!(sender.name.as_deref(), Some("Secretary"));
}
#[test]
fn parse_headers_only_extracts_sender() {
let raw = b"From: author@example.com\r\n\
Sender: transmitter@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_headers_only(raw).unwrap();
assert_eq!(
parsed.sender.as_ref().map(|a| a.email.as_str()),
Some("transmitter@example.com"),
"parse_headers_only should also extract Sender"
);
}
#[test]
fn parse_extra_headers_accessible() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
X-Mailer: TestMailer\r\n\
List-Unsubscribe: <mailto:unsub@example.com>\r\n\
DKIM-Signature: v=1; a=rsa-sha256; d=example.com\r\n\
\r\n\
Body text\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed
.extra_headers
.iter()
.any(|(k, v)| k == "x-mailer" && v == "TestMailer"),
"extra_headers must contain X-Mailer (RFC 5322 Section 3.6.8)"
);
assert!(
parsed
.extra_headers
.iter()
.any(|(k, v)| k == "list-unsubscribe" && v == "<mailto:unsub@example.com>"),
"extra_headers must contain List-Unsubscribe (RFC 5322 Section 3.6.8)"
);
assert!(
parsed
.extra_headers
.iter()
.any(|(k, v)| k == "dkim-signature" && v == "v=1; a=rsa-sha256; d=example.com"),
"extra_headers must contain DKIM-Signature (RFC 5322 Section 3.6.8)"
);
assert!(
!parsed
.extra_headers
.iter()
.any(|(k, _)| k == "from" || k == "to" || k == "subject"),
"Well-known headers must not appear in extra_headers"
);
}
#[test]
fn military_single_letter_timezones_rfc5322_section_4_3() {
assert_eq!(
parse_timezone("A"),
0,
"A should be -0000 (RFC 5322 Section 4.3 SHOULD)"
);
assert_eq!(
parse_timezone("M"),
0,
"M should be -0000 (RFC 5322 Section 4.3 SHOULD)"
);
assert_eq!(
parse_timezone("N"),
0,
"N should be -0000 (RFC 5322 Section 4.3 SHOULD)"
);
assert_eq!(
parse_timezone("Y"),
0,
"Y should be -0000 (RFC 5322 Section 4.3 SHOULD)"
);
assert_eq!(
parse_timezone("Z"),
0,
"Z should be +0000 (RFC 5322 Section 4.3)"
);
assert_eq!(
parse_timezone("J"),
0,
"J is not used and should fall back to +0000 (RFC 5322 Section 4.3)"
);
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 01 Jan 2015 12:00:00 A\r\n\
\r\n\
Body\r\n";
let parsed = crate::parse_email(raw).unwrap();
let date = parsed.date.expect("date must be present");
assert_eq!(
date.tz_offset_minutes, 0,
"Full date with military zone A should have tz_offset_minutes = 0 \
(RFC 5322 Section 4.3: military zones are -0000)"
);
}
#[test]
fn explicit_attachment_with_content_id_is_not_inline() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Content-Type: image/png\r\n\
Content-Disposition: attachment; filename=\"image.png\"\r\n\
Content-ID: <img001@example.com>\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
iVBORw0KGgo=\r\n";
let parsed = crate::parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(
!parsed.attachments[0].is_inline,
"Content-Disposition: attachment with Content-ID must not be marked inline \
(RFC 2183 Section 2)"
);
}
#[test]
fn multipart_explicit_attachment_with_content_id_is_not_inline() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
\r\n\
--BOUND\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello\r\n\
--BOUND\r\n\
Content-Type: image/png\r\n\
Content-Disposition: attachment; filename=\"image.png\"\r\n\
Content-ID: <img001@example.com>\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
iVBORw0KGgo=\r\n\
--BOUND--\r\n";
let parsed = crate::parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(
!parsed.attachments[0].is_inline,
"multipart: Content-Disposition: attachment with Content-ID must not be marked inline \
(RFC 2183 Section 2)"
);
}
#[test]
fn group_address_with_at_in_comment() {
let addrs = parse_address_list("Group (user@host): member@example.com;");
assert_eq!(
addrs.len(),
1,
"group with comment containing '@' must parse the member address"
);
assert_eq!(addrs[0].email, "member@example.com");
let addrs = parse_address_list("Group (has @ sign): member@example.com;");
assert_eq!(
addrs.len(),
1,
"group with comment containing bare '@' must parse the member address"
);
assert_eq!(addrs[0].email, "member@example.com");
let addrs = parse_address_list("user@host.com");
assert_eq!(addrs.len(), 1);
assert_eq!(addrs[0].email, "user@host.com");
assert!(
!contains_at_outside_quotes("Group (user@host)"),
"'@' only inside a comment must return false"
);
assert!(
contains_at_outside_quotes("user@host.com (Display Name)"),
"'@' outside a comment must return true"
);
assert!(
!contains_at_outside_quotes(r#""user@host""#),
"'@' only inside a quoted string must return false"
);
assert!(
!contains_at_outside_quotes("Group (escaped \\) user@host)"),
"'@' after an escaped paren inside a comment must return false"
);
}
#[test]
fn regression_encoded_word_space_in_encoded_text_decoded() {
let with_space = "=?UTF-8?B?SGVs bG8=?=";
assert_eq!(
decode_encoded_words(with_space),
"Hello",
"space in B-encoded-text should be stripped as fold artifact"
);
let good_b64 = "=?UTF-8?B?SGVsbG8=?=";
assert_eq!(
decode_encoded_words(good_b64),
"Hello",
"valid base64 encoded word must decode normally"
);
let good_q = "=?UTF-8?Q?caf=C3=A9?=";
assert_eq!(
decode_encoded_words(good_q),
"caf\u{e9}",
"valid Q-encoded word must decode normally"
);
let q_with_space = "=?UTF-8?Q?hel lo?=";
assert_eq!(
decode_encoded_words(q_with_space),
"hello",
"space in Q-encoded-text should be stripped as fold artifact"
);
}
#[test]
fn rfc2231_continuation_gap_larger_than_ten() {
let header = "attachment; filename*0=\"part1\"; filename*15=\"part2\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result,
Some("part1".to_string()),
"RFC 2231 Section 3: reassembly must stop before section 15 when sections 1-14 are absent"
);
}
#[test]
fn rfc2231_continuation_more_than_one_hundred_sections() {
use std::fmt::Write as _;
let expected = "a".repeat(5_600);
let mut header = String::from("attachment");
for (idx, chunk) in expected.as_bytes().chunks(50).enumerate() {
let chunk = std::str::from_utf8(chunk).unwrap();
let _ = write!(header, "; filename*{idx}=\"{chunk}\"");
}
let result = extract_rfc2231_continuation(&header, "filename");
assert_eq!(
result.as_deref(),
Some(expected.as_str()),
"RFC 2231 Section 3: continuation parsing must not truncate after \
100 sections"
);
}
#[test]
fn regression_duplicate_address_headers_concatenated() {
let raw = b"From: sender@example.com\r\n\
To: alice@example.com\r\n\
To: bob@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"both To: headers must be parsed — Postel's law requires concatenation"
);
assert_eq!(parsed.to[0].email, "alice@example.com");
assert_eq!(parsed.to[1].email, "bob@example.com");
let raw_cc = b"From: sender@example.com\r\n\
Cc: carol@example.com, dave@example.com\r\n\
Cc: eve@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed_cc = parse_email(raw_cc).unwrap();
assert_eq!(
parsed_cc.cc.len(),
3,
"all Cc: headers must be concatenated"
);
assert_eq!(parsed_cc.cc[0].email, "carol@example.com");
assert_eq!(parsed_cc.cc[1].email, "dave@example.com");
assert_eq!(parsed_cc.cc[2].email, "eve@example.com");
}
#[test]
fn test_duplicate_from_headers_concatenated() {
let raw = b"From: alice@example.com\r\n\
From: bob@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from.len(),
2,
"both From: headers must be concatenated"
);
assert_eq!(parsed.from[0].email, "alice@example.com");
assert_eq!(parsed.from[1].email, "bob@example.com");
}
#[test]
fn structured_headers_not_rfc2047_decoded() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Received: from =?utf-8?Q?server?= (mx.example.com)\r\n\
Authentication-Results: dkim=pass header.d==?utf-8?B?ZXhhbXBsZQ==?=.com\r\n\
DKIM-Signature: v=1; d==?utf-8?Q?example?=.com\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
let received = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "received")
.expect("Received header must be present");
assert!(
received.1.contains("=?utf-8?Q?server?="),
"Received header must NOT be RFC 2047 decoded; got: {:?}",
received.1
);
let auth = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "authentication-results")
.expect("Authentication-Results header must be present");
assert!(
auth.1.contains("=?utf-8?B?ZXhhbXBsZQ==?="),
"Authentication-Results header must NOT be RFC 2047 decoded; got: {:?}",
auth.1
);
let dkim = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "dkim-signature")
.expect("DKIM-Signature header must be present");
assert!(
dkim.1.contains("=?utf-8?Q?example?="),
"DKIM-Signature header must NOT be RFC 2047 decoded; got: {:?}",
dkim.1
);
}
#[test]
fn resent_structured_headers_not_rfc2047_decoded() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Resent-Date: =?utf-8?Q?Thu,_13_Feb_2025_15:47:33_+0000?=\r\n\
Resent-Message-ID: =?utf-8?Q?<resent@example.com>?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
let resent_date = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-date")
.expect("Resent-Date header must be present");
assert!(
resent_date
.1
.contains("=?utf-8?Q?Thu,_13_Feb_2025_15:47:33_+0000?="),
"Resent-Date must NOT be RFC 2047 decoded; got: {:?}",
resent_date.1
);
let resent_message_id = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-message-id")
.expect("Resent-Message-ID header must be present");
assert!(
resent_message_id
.1
.contains("=?utf-8?Q?<resent@example.com>?="),
"Resent-Message-ID must NOT be RFC 2047 decoded; got: {:?}",
resent_message_id.1
);
}
#[test]
fn resent_address_headers_rfc2047_decoded_in_extra_headers() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Resent-From: =?utf-8?Q?Alice_Sender?= <alice@example.com>\r\n\
Resent-To: =?utf-8?Q?Bob_Recipient?= <bob@example.com>\r\n\
Resent-Cc: =?utf-8?Q?Carol_Copy?= <carol@example.com>\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
let resent_from = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-from")
.expect("Resent-From header must be present");
assert!(
resent_from.1.contains("Alice Sender"),
"Resent-From display name must be RFC 2047 decoded; got {:?}",
resent_from.1
);
let resent_to = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-to")
.expect("Resent-To header must be present");
assert!(
resent_to.1.contains("Bob Recipient"),
"Resent-To display name must be RFC 2047 decoded; got {:?}",
resent_to.1
);
let resent_cc = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-cc")
.expect("Resent-Cc header must be present");
assert!(
resent_cc.1.contains("Carol Copy"),
"Resent-Cc display name must be RFC 2047 decoded; got {:?}",
resent_cc.1
);
}
#[test]
fn obsolete_resent_reply_to_rfc2047_decoded_in_extra_headers() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Resent-Reply-To: =?utf-8?Q?Relay_Desk?= <relay@example.com>\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
let resent_reply_to = parsed
.extra_headers
.iter()
.find(|(k, _)| k == "resent-reply-to")
.expect("Resent-Reply-To header must be present");
assert!(
resent_reply_to.1.contains("Relay Desk"),
"Resent-Reply-To display name must be RFC 2047 decoded; got {:?}",
resent_reply_to.1
);
}
#[test]
fn domain_literal_with_commas_not_split() {
let input = "user@[10,0,0,1], other@example.com";
let addrs = parse_address_list(input);
assert_eq!(
addrs.len(),
2,
"expected 2 addresses, got {}: {:?}",
addrs.len(),
addrs
);
assert_eq!(
addrs[0].email, "user@[10,0,0,1]",
"first address must preserve domain-literal including commas"
);
assert_eq!(addrs[1].email, "other@example.com");
}
#[test]
fn parse_date_with_fractional_seconds() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:30:45.123 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
assert_eq!(date.hour, 15);
assert_eq!(date.minute, 30);
assert_eq!(
date.second, 45,
"fractional seconds must not cause second to drop to 0"
);
assert_eq!(date.tz_offset_minutes, 0);
}
#[test]
fn test_rfc2047_empty_fields() {
let raw_empty_charset = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =??Q?test?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw_empty_charset).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("=??Q?test?="),
"empty charset must not be decoded"
);
let raw_empty_encoding = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =?UTF-8??test?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw_empty_encoding).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("=?UTF-8??test?="),
"empty encoding must not be decoded"
);
let raw_empty_text = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =?UTF-8?Q??=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw_empty_text).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("=?UTF-8?Q??="),
"empty encoded-text must not be decoded"
);
}
#[test]
fn test_rfc2047_question_mark_in_encoded_text() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =?UTF-8?Q?hello?world?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("=?UTF-8?Q?hello?world?="),
"'?' in encoded-text must cause the entire encoded-word to be \
left as literal text (RFC 2047 Section 2)"
);
}
#[test]
fn edge_rfc2047_q_encoding_underscore_to_space() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =?UTF-8?Q?hello_world?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("hello world"),
"RFC 2047 Section 4.2: underscore in Q-encoding must decode to space"
);
}
#[test]
fn edge_rfc2047_adjacent_encoded_words_different_charsets() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: =?UTF-8?B?w6k=?= =?ISO-8859-1?Q?=E9?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("\u{00E9}\u{00E9}"),
"RFC 2047 Section 6.2: whitespace between adjacent encoded words \
with different charsets must be collapsed"
);
}
#[test]
fn edge_multipart_boundary_prefix_of_another() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Content-Type: multipart/mixed; boundary=\"abc\"\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
--abc\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Line with --abcdef in the body should not split\r\n\
--abc\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Second part\r\n\
--abc--\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Line with --abcdef in the body should not split"),
"RFC 2046 Section 5.1.1: boundary 'abc' must not match '--abcdef' \
in the body — the delimiter must be followed by a valid terminator"
);
assert_eq!(
parsed.attachments.len(),
1,
"Two text/plain parts: first is body_text, second becomes an \
attachment — this proves boundary splitting was correct"
);
}
#[test]
fn edge_rfc2231_continuation_with_gap_missing_index_1() {
let header = "attachment; filename*0=\"part1\"; filename*2=\"part3\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("part1"),
"RFC 2231 Section 3: continuation must stop at the first missing index"
);
}
#[test]
fn edge_builder_roundtrip_subject_with_encoded_word_marker() {
use crate::build_message;
use crate::types::OutgoingEmail;
let subject = "Price is =? 50\u{20AC}";
let email = OutgoingEmail {
from: vec![Address {
name: None,
email: "sender@example.com".to_string(),
}],
sender: None,
to: vec![Address {
name: None,
email: "recipient@example.com".to_string(),
}],
cc: vec![],
bcc: vec![],
reply_to: vec![],
date: Some(crate::types::DateTime {
year: 2025,
month: 2,
day: 13,
hour: 15,
minute: 47,
second: 33,
tz_offset_minutes: 0,
}),
subject: subject.to_string(),
body_text: Some("test".to_string()),
body_html: None,
in_reply_to: vec![],
references: vec![],
attachments: vec![],
extra_headers: vec![],
};
let built = build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some(subject),
"Subject containing literal '=?' and non-ASCII must survive \
build->parse round-trip without being mangled by RFC 2047 encoding"
);
}
#[test]
fn edge_date_two_digit_year_via_parse_email() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Sat, 13 Feb 99 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("date must parse");
assert_eq!(
date.year, 1999,
"RFC 5322 Section 4.3: two-digit year 99 must map to 1999"
);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
}
#[test]
fn edge_address_with_trailing_comment() {
let raw = b"From: user@example.com (John Doe)\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from[0].email, "user@example.com",
"RFC 5322 Section 3.4.1: email must be extracted from bare addr-spec \
before parenthesized comment"
);
assert_eq!(
parsed.from[0].name.as_deref(),
Some("John Doe"),
"RFC 822 convention: trailing parenthesized comment should be used \
as display name"
);
}
#[test]
fn edge_multipart_digest_default_content_type() {
let inner_message = "From: inner@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Inner\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Inner body text";
let raw = format!(
"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Content-Type: multipart/digest; boundary=\"digestbound\"\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
--digestbound\r\n\
\r\n\
{inner_message}\r\n\
--digestbound--\r\n"
);
let parsed = parse_email(raw.as_bytes()).unwrap();
assert!(
parsed.body_text.is_none(),
"RFC 2046 Section 5.1.5: parts in multipart/digest without \
Content-Type must default to message/rfc822, not text/plain — \
body_text should be None"
);
assert_eq!(
parsed.attachments.len(),
1,
"The message/rfc822 part should appear as an attachment"
);
assert_eq!(
parsed.attachments[0].content_type, "message/rfc822",
"RFC 2046 Section 5.1.5: default Content-Type in multipart/digest \
is message/rfc822"
);
}
#[test]
fn edge_base64_body_with_embedded_whitespace() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVs\r\nbG8=";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello"),
"RFC 2045 Section 6.8: whitespace in base64 body must be ignored \
during decoding"
);
}
#[test]
fn regression_subject_trim_preserves_rfc2047_encoded_leading_space() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: =?UTF-8?B?IEhlbGxv?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some(" Hello"),
"RFC 2047 Section 6.2: leading whitespace encoded inside an \
encoded-word is intentional content, not a fold artifact — \
trim_start() must not strip it"
);
}
#[test]
fn regression_subject_trim_preserves_encoded_whitespace_only() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: =?UTF-8?B?IA==?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some(" "),
"RFC 2047 Section 6.2: a Subject consisting entirely of encoded \
whitespace must not be stripped to empty"
);
}
#[test]
fn regression_rfc2231_language_tag_iso8859() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: =?ISO-8859-1*de?Q?Pr=FCfung?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Pr\u{00FC}fung"),
"RFC 2231 Section 5: language tag in charset field must be \
stripped before charset lookup — ISO-8859-1*de should decode \
as ISO-8859-1"
);
}
#[test]
fn regression_rfc2231_language_tag_utf8() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: =?UTF-8*en?B?SGVsbG8gV29ybGQ=?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello World"),
"RFC 2231 Section 5: UTF-8*en should decode as UTF-8"
);
}
#[test]
fn regression_rfc2231_empty_language_tag() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: =?ISO-8859-1*?Q?Pr=FCfung?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Pr\u{00FC}fung"),
"RFC 2231 Section 5: empty language tag (ISO-8859-1*) must \
still decode as ISO-8859-1"
);
}
#[test]
fn regression_subject_trim_still_strips_fold_artifact() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject:\r\n =?UTF-8?B?SGVsbG8=?=\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello"),
"Fold-artifact whitespace before an encoded-word must still be \
stripped (RFC 5322 Section 2.2.3)"
);
}
#[test]
fn is_disposition_type_quoted() {
assert!(
is_disposition_type("\"attachment\"; filename=\"test.txt\"", "attachment"),
"quoted attachment with params should match"
);
assert!(
is_disposition_type("\"inline\"", "inline"),
"quoted inline should match"
);
assert!(
is_disposition_type("attachment; filename=\"test.txt\"", "attachment"),
"unquoted attachment should still match"
);
assert!(
is_disposition_type("inline", "inline"),
"unquoted inline should still match"
);
}
#[test]
fn parse_email_attachment_with_leading_disposition_comment() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-Disposition: (saved copy) attachment; filename=\"notes.txt\"\r\n\
\r\n\
Quarterly report\r\n";
let parsed = parse_email(raw).expect("message with attachment comment must parse");
assert!(
parsed.body_text.is_none(),
"Content-Disposition CFWS before attachment must not demote the part to body text"
);
assert_eq!(
parsed.attachments.len(),
1,
"attachment with leading disposition comment must still be detected"
);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("notes.txt"),
"filename parameter must still be preserved"
);
assert!(
!parsed.attachments[0].is_inline,
"attachment disposition must not be treated as inline"
);
}
#[test]
fn parse_headers_only_preserves_top_level_content_disposition() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-Disposition: attachment; filename=\"message.eml\"\r\n\
\r\n\
Quarterly report\r\n";
let parsed =
parse_headers_only(raw).expect("message with top-level Content-Disposition must parse");
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| {
name == "content-disposition"
&& value == "attachment; filename=\"message.eml\""
}),
"top-level Content-Disposition must be preserved in extra_headers for header-only consumers"
);
}
#[test]
fn parse_headers_only_preserves_top_level_content_id() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-ID: <top-level@example.com>\r\n\
\r\n\
Quarterly report\r\n";
let parsed = parse_headers_only(raw).expect("message with top-level Content-ID must parse");
assert!(
parsed
.extra_headers
.iter()
.any(|(name, value)| name == "content-id" && value == "<top-level@example.com>"),
"top-level Content-ID must be preserved in extra_headers for header-only consumers"
);
}
#[test]
fn regression_quoted_local_part_with_at_in_bare_address() {
let bare = parse_single_address("\"user@internal\"@example.com");
assert!(bare.is_some(), "bare quoted-local-part addr should parse");
let bare = bare.unwrap();
assert_eq!(
bare.email, "\"user@internal\"@example.com",
"email field must contain the full addr-spec including quotes"
);
assert!(
bare.name.is_none(),
"bare address should have no display name"
);
let angle = parse_single_address("<\"user@internal\"@example.com>");
assert!(
angle.is_some(),
"angle-bracketed quoted-local-part addr should parse"
);
let angle = angle.unwrap();
assert_eq!(
angle.email, "\"user@internal\"@example.com",
"angle-bracketed form must produce the same addr-spec"
);
assert!(
angle.name.is_none(),
"no display name in angle-bracket-only form"
);
let not_addr = parse_single_address("\"user@internal\"");
assert!(
not_addr.is_none(),
"a quoted-string with '@' but no domain must not be treated as an addr-spec"
);
let with_comment = parse_single_address("\"user@internal\"@example.com (Display Name)");
assert!(
with_comment.is_some(),
"quoted-local with trailing comment should parse"
);
let with_comment = with_comment.unwrap();
assert_eq!(
with_comment.email, "\"user@internal\"@example.com",
"email field must be the full addr-spec"
);
assert_eq!(
with_comment.name.as_deref(),
Some("Display Name"),
"display name must be extracted from trailing comment"
);
}
#[test]
fn extract_param_folded_mid_token_charset() {
let header_value = "text/plain; charset=ut f-8";
let result = extract_param(header_value, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"folded mid-token value must be reassembled (RFC 5322 Section 2.2.3, \
RFC 2045 Section 5.1)"
);
}
#[test]
fn extract_param_folded_mid_token_boundary_all_alpha() {
let header_value = "multipart/mixed; boundary=abc def";
let result = extract_param(header_value, "boundary");
assert_eq!(
result.as_deref(),
Some("abcdef"),
"alphabetic folded boundary token must be reassembled after unfolding"
);
}
#[test]
fn extract_param_accepts_comment_only_cfws_before_name() {
let header_value = "text/plain;(note)charset=utf-8";
let result = extract_param(header_value, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"comment-only CFWS before a parameter name must still count as a valid boundary"
);
}
#[test]
fn extract_param_no_false_fold_concat() {
let ct = "text/plain; charset=utf-8 unexpected; name=test.txt";
let result = extract_param(ct, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"should not concatenate 'unexpected' with 'utf-8'"
);
}
#[test]
fn extract_param_does_not_absorb_trailing_comment() {
let ct = "text/plain; charset=utf-8 (comment); name=test.txt";
let result = extract_param(ct, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"trailing comment must not be absorbed into MIME token value"
);
}
#[test]
fn extract_param_ignores_comment_content() {
let ct = "text/plain; ( charset=iso-8859-1) charset=utf-8";
let result = extract_param(ct, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"must not match 'charset' inside a parenthesized comment"
);
}
#[test]
fn extract_param_ignores_nested_comment() {
let ct = "text/plain; (outer (charset=iso) comment) charset=utf-8";
let result = extract_param(ct, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"must not match 'charset' inside a nested comment"
);
}
#[test]
fn parse_email_folded_charset_mid_token() {
let raw: &[u8] = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=windows\r\n \
-1252\r\n\
\r\n\
\x93Hello\x94";
let parsed = parse_email(raw).unwrap();
let body = parsed.body_text.unwrap();
assert!(
body.contains('\u{201C}') && body.contains('\u{201D}'),
"body must be decoded with reassembled charset windows-1252, got: {body:?}"
);
}
#[test]
fn parse_email_folded_charset_multiple_mid_token_folds() {
let raw: &[u8] = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=win\r\n \
dows-\r\n \
1252\r\n\
\r\n\
\x93Hello\x94";
let parsed = parse_email(raw).unwrap();
let body = parsed.body_text.unwrap();
assert!(
body.contains('\u{201C}') && body.contains('\u{201D}'),
"body must be decoded with fully reassembled charset windows-1252, \
got: {body:?}"
);
}
#[test]
fn parse_email_folded_charset_alpha_tail_after_hyphen() {
let (encoded, _, _) = encoding_rs::KOI8_R.encode("Привет");
let mut raw = b"From: test@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=koi8-\r\n \
r\r\n\
\r\n"
.to_vec();
raw.extend_from_slice(&encoded);
let parsed = parse_email(&raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Привет"),
"body must be decoded with reassembled charset koi8-r"
);
}
#[test]
fn encoded_word_rejects_control_chars_in_encoded_text() {
let input = b"From: test@example.com\r\nSubject: =?utf-8?B?\tSGVsbG8=?=\r\n\r\n";
let parsed = parse_email(input).unwrap();
assert!(
parsed.subject.as_deref() != Some("Hello"),
"encoded word with tab in encoded-text must be rejected (RFC 2047 Section 2), \
got subject: {:?}",
parsed.subject
);
}
#[test]
fn encoded_word_rejects_high_bytes_in_encoded_text() {
let input = b"From: test@example.com\r\nSubject: =?utf-8?B?\x80SGVsbG8=?=\r\n\r\n";
let parsed = parse_email(input).unwrap();
assert!(
parsed.subject.as_deref() != Some("Hello"),
"encoded word with high byte in encoded-text must be rejected (RFC 2047 Section 2), \
got subject: {:?}",
parsed.subject
);
}
#[test]
fn encoded_word_folded_b_encoding() {
let input = b"From: test@example.com\r\nSubject: =?UTF-8?B?SGVs\r\n bG8=?=\r\n\r\nbody";
let parsed = parse_email(input).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello"),
"B-encoded word split by header folding must decode to 'Hello'"
);
}
#[test]
fn encoded_word_folded_q_encoding() {
let input = b"From: test@example.com\r\nSubject: =?UTF-8?Q?Hel\r\n lo?=\r\n\r\nbody";
let parsed = parse_email(input).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello"),
"Q-encoded word split by header folding must decode to 'Hello'"
);
}
#[test]
fn encoded_word_folded_b_encoding_with_tab() {
let input = b"From: test@example.com\r\nSubject: =?UTF-8?B?SGVs\r\n\tbG8=?=\r\n\r\nbody";
let parsed = parse_email(input).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello"),
"B-encoded word split by tab folding must decode to 'Hello'"
);
}
#[test]
fn fuzz_parse_timezone_non_ascii_no_panic() {
let input: &[u8] = &[
b'F', b'r', b'o', b'm', b':', b' ', b'a', b'@', b'b', b'\n', b'D', b'a', b't', b'e', b':',
b' ', b'T', b'h', b'u', b',', b' ', b'3', b' ', b'F', b'e', b'b', b' ', b'2', b'0', b'2',
b'5', b' ', b'1', b'5', b':', b'4', b'3', b' ', b'+', b'0', 0xFF, 0xFF,
];
let _ = parse_email(input);
}
mod prop_invariants {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn parse_email_never_panics(data in prop::collection::vec(any::<u8>(), 0..1000)) {
let _ = parse_email(&data);
}
#[test]
fn parse_headers_only_never_panics(data in prop::collection::vec(any::<u8>(), 0..1000)) {
let _ = parse_headers_only(&data);
}
#[test]
fn decode_encoded_words_never_panics(s in ".*") {
let _ = decode_encoded_words(&s);
}
#[test]
fn parse_date_never_panics(s in ".*") {
let _ = parse_rfc5322_date(&s);
}
#[test]
fn parsed_size_matches_input(data in prop::collection::vec(any::<u8>(), 0..1000)) {
if let Ok(parsed) = parse_email(&data) {
prop_assert_eq!(
parsed.size,
data.len() as u64,
"ParsedEmail.size must match input byte count"
);
}
}
#[test]
fn extract_param_ignores_comments(
real_value in "[a-z][a-z0-9-]{0,15}",
decoy_value in "[a-z][a-z0-9-]{0,15}",
) {
let ct = format!("text/plain; (charset={decoy_value}) charset={real_value}");
let result = extract_param(&ct, "charset");
prop_assert_eq!(
result.as_deref(),
Some(real_value.as_str()),
"must extract real value, not value inside comment, for: {:?}",
ct
);
}
#[test]
fn unquoted_param_preserves_backslash(
prefix in "[a-z]{1,5}",
suffix in "[a-z]{1,5}",
) {
let ct = format!("application/octet-stream; filename={prefix}\\{suffix}.txt");
let result = extract_param(&ct, "filename");
if let Some(val) = result {
prop_assert!(
val.contains('\\'),
"backslash in unquoted value must be preserved: got {:?} from {:?}",
val, ct
);
}
}
}
}
mod stuck_tests {
use super::*;
use std::time::{Duration, Instant};
const MAX_PARSE_TIME: Duration = Duration::from_secs(5);
fn assert_terminates<F: FnOnce()>(name: &str, f: F) {
let start = Instant::now();
f();
let elapsed = start.elapsed();
assert!(
elapsed < MAX_PARSE_TIME,
"{name} took {elapsed:?}, exceeds {MAX_PARSE_TIME:?} — parser may be stuck"
);
}
#[test]
fn parse_email_repeated_bytes_100kb() {
let data = vec![0xFFu8; 100_000];
assert_terminates("100KB 0xFF", || {
let _ = parse_email(&data);
});
}
#[test]
fn parse_email_deeply_nested_multipart() {
let mut data = Vec::new();
let boundaries: Vec<String> = (0..100).map(|i| format!("boundary{i:03}")).collect();
data.extend(
b"From: a@b.com\r\nContent-Type: multipart/mixed; boundary=\"boundary000\"\r\n\r\n",
);
for i in 0..99 {
data.extend(format!("--{}\r\n", boundaries[i]).as_bytes());
data.extend(
format!(
"Content-Type: multipart/mixed; boundary=\"{}\"\r\n\r\n",
boundaries[i + 1]
)
.as_bytes(),
);
}
data.extend(format!("--{}\r\n", boundaries[99]).as_bytes());
data.extend(b"Content-Type: text/plain\r\n\r\nHello\r\n");
data.extend(format!("--{}--\r\n", boundaries[99]).as_bytes());
for i in (0..99).rev() {
data.extend(format!("--{}--\r\n", boundaries[i]).as_bytes());
}
assert_terminates("100-deep nested multipart", || {
let _ = parse_email(&data);
});
}
#[test]
fn parse_email_huge_header_count() {
let mut data = Vec::new();
data.extend(b"From: a@b.com\r\n");
for i in 0..10_000 {
data.extend(format!("X-Header-{i}: value{i}\r\n").as_bytes());
}
data.extend(b"\r\nBody\r\n");
assert_terminates("10K headers", || {
let _ = parse_email(&data);
});
}
#[test]
fn parse_email_long_header_value() {
let mut data = Vec::new();
data.extend(b"From: a@b.com\r\nSubject: ");
for chunk in 0..14_000 {
if chunk > 0 {
data.extend(b"\r\n ");
}
data.extend(vec![b'A'; 76]);
}
data.extend(b"\r\n\r\nBody\r\n");
assert_terminates("1MB folded header", || {
let _ = parse_email(&data);
});
}
#[test]
fn decode_encoded_words_pathological_pattern() {
let mut input = String::new();
for i in 0..10_000 {
use std::fmt::Write;
let _ = write!(input, "=?UTF-8?X?invalid{i}?= ");
}
assert_terminates("10K invalid encoded words", || {
let _ = decode_encoded_words(&input);
});
}
#[test]
fn parse_headers_non_ascii_name_skipped() {
let input = "X-\u{0130}: value\r\n\r\n";
let headers = parse_headers(input.as_bytes());
assert!(headers.is_empty(), "non-ASCII header names must be skipped");
}
#[test]
fn parse_year_four_digit_low_value_not_obs_year() {
assert_eq!(parse_year("0050"), Some(50));
assert_eq!(parse_year("0000"), Some(0));
assert_eq!(parse_year("0500"), Some(500));
assert_eq!(parse_year("50"), Some(1950));
assert_eq!(parse_year("99"), Some(1999));
assert_eq!(parse_year("00"), Some(2000));
assert_eq!(parse_year("49"), Some(2049));
assert_eq!(parse_year("100"), Some(2000));
assert_eq!(parse_year("2025"), Some(2025));
}
#[test]
fn parse_date_repeated_whitespace() {
let mut input = String::from("Thu, 13 Feb 2025 15:47:33 +0000");
for _ in 0..100_000 {
input.push(' ');
}
assert_terminates("100K trailing spaces in date", || {
let _ = parse_rfc5322_date(&input);
});
}
}
#[test]
fn duplicate_in_reply_to_headers_concatenated() {
let raw = b"From: a@b.com\r\n\
In-Reply-To: <id1@host>\r\n\
In-Reply-To: <id2@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.in_reply_to, vec!["id1@host", "id2@host"]);
}
#[test]
fn duplicate_references_headers_concatenated() {
let raw = b"From: a@b.com\r\n\
References: <ref1@host>\r\n\
References: <ref2@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.references, vec!["ref1@host", "ref2@host"]);
}
#[test]
fn extract_rfc2231_param_skips_comment_match() {
let header = "attachment; ( filename*=UTF-8''wrong.pdf) filename*=UTF-8''correct.pdf";
let result = extract_rfc2231_param(header, "filename");
assert_eq!(result.as_deref(), Some("correct.pdf"));
}
#[test]
fn find_param_value_skips_comment_match() {
let header = "attachment; ( filename*0=\"wrong\") filename*0=\"correct\"";
let lower = header.to_ascii_lowercase();
let result = find_param_value(&lower, header, "filename*0=");
assert_eq!(result.as_deref(), Some("correct"));
}
#[test]
fn regression_msg001_date_accepts_wrong_day_of_week() {
let result = parse_rfc5322_date("Wed, 13 Feb 2025 15:47:33 +0000");
assert!(
result.is_some(),
"Parser must accept dates with incorrect day-of-week (Postel's law)"
);
let dt = result.unwrap();
assert_eq!(dt.year, 2025);
assert_eq!(dt.month, 2);
assert_eq!(dt.day, 13);
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 33);
assert_eq!(dt.tz_offset_minutes, 0);
}
#[test]
fn regression_msg019_overlong_encoded_word_decoded() {
let encoded_word = "=?UTF-8?B?VGhpcyBpcyBhIGxvbmcgVVRGLTggc3ViamVjdCB0aGF0IHdpbGwgZXhjZWVkIHNldmVudHktZml2ZSBjaGFyYWN0ZXJzIGVhc2lseQ==?=";
assert!(
encoded_word.len() > 75,
"Test setup: encoded word must be > 75 chars, got {}",
encoded_word.len()
);
let decoded = decode_encoded_words(encoded_word);
assert_eq!(
decoded, "This is a long UTF-8 subject that will exceed seventy-five characters easily",
"Overlong encoded words must be decoded (Postel's law, RFC 2047 Section 6.1)"
);
}
#[test]
fn regression_msg100_resent_address_headers_decode_display_names() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Resent-From: =?UTF-8?B?w6TDtg==?= <user@example.com>\r\n\
Resent-Sender: =?UTF-8?B?w6TDtg==?= <sender2@example.com>\r\n\
Resent-To: =?UTF-8?B?w6TDtg==?= <to@example.com>\r\n\
Resent-Cc: =?UTF-8?B?w6TDtg==?= <cc@example.com>\r\n\
Resent-Bcc: =?UTF-8?B?w6TDtg==?= <bcc@example.com>\r\n\
Resent-Reply-To: =?UTF-8?B?w6TDtg==?= <reply@example.com>\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
for header_name in &[
"resent-from",
"resent-sender",
"resent-to",
"resent-cc",
"resent-bcc",
"resent-reply-to",
] {
let (_, value) = parsed
.extra_headers
.iter()
.find(|(k, _)| k == header_name)
.unwrap_or_else(|| panic!("{header_name} header must be present in extra_headers"));
assert!(
value.contains("\u{e4}\u{f6}"),
"{header_name} display name must be RFC 2047 decoded to 'äö'; got: {value:?}"
);
assert!(
!value.contains("=?UTF-8?B?"),
"{header_name} must not contain raw encoded-word syntax; got: {value:?}"
);
}
}
#[test]
fn regression_msg119_qp_strip_trailing_whitespace_before_hard_break() {
assert_eq!(
decode_quoted_printable(b"hello \r\nworld"),
b"hello\r\nworld",
"trailing literal spaces before CRLF must be stripped (RFC 2045 Section 6.7 Rule #3)"
);
assert_eq!(
decode_quoted_printable(b"hello\t\t\r\nworld"),
b"hello\r\nworld",
"trailing literal tabs before CRLF must be stripped (RFC 2045 Section 6.7 Rule #3)"
);
assert_eq!(
decode_quoted_printable(b"hello=20\r\nworld"),
b"hello \r\nworld",
"hex-encoded =20 before CRLF must be preserved (explicitly encoded by sender)"
);
assert_eq!(
decode_quoted_printable(b"hello "),
b"hello",
"trailing literal spaces at end of input must be stripped (RFC 2045 Section 6.7 Rule #3)"
);
assert_eq!(
decode_quoted_printable(b"hello\r\n \r\nworld"),
b"hello\r\n\r\nworld",
"lines containing only whitespace must be stripped (RFC 2045 Section 6.7 Rule #3)"
);
assert_eq!(
decode_quoted_printable(b"hello \t \r\nworld"),
b"hello\r\nworld",
"mixed trailing tabs and spaces before CRLF must be stripped"
);
assert_eq!(
decode_quoted_printable(b"hello=09\r\nworld"),
b"hello\t\r\nworld",
"hex-encoded =09 before CRLF must be preserved (explicitly encoded by sender)"
);
}
#[test]
fn parse_date_obs_time_whitespace_around_colons() {
let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 : 55 : 06 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 6);
assert_eq!(dt.tz_offset_minutes, -360);
assert_eq!(dt.year, 1997);
assert_eq!(dt.month, 11);
assert_eq!(dt.day, 21);
}
#[test]
fn parse_date_obs_time_whitespace_before_colons() {
let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 :55 :06 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 6);
assert_eq!(dt.tz_offset_minutes, -360);
}
#[test]
fn parse_date_obs_time_whitespace_after_colons() {
let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09: 55: 06 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 6);
assert_eq!(dt.tz_offset_minutes, -360);
}
#[test]
fn parse_date_obs_time_mixed_whitespace() {
let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 :55: 06 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 6);
assert_eq!(dt.tz_offset_minutes, -360);
}
#[test]
fn parse_date_standard_time_still_works_after_obs_fix() {
let dt = parse_rfc5322_date("21 Nov 1997 09:55:06 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 6);
assert_eq!(dt.tz_offset_minutes, -360);
}
#[test]
fn parse_date_obs_time_whitespace_no_seconds() {
let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 : 55 -0600").unwrap();
assert_eq!(dt.hour, 9);
assert_eq!(dt.minute, 55);
assert_eq!(dt.second, 0);
assert_eq!(dt.tz_offset_minutes, -360);
}
#[test]
fn parse_timezone_ut_gmt_are_well_defined_rfc5322_section_4_3() {
assert_eq!(
parse_timezone("UT"),
0,
"UT is well-defined as +0000 (RFC 5322 Section 4.3)"
);
assert_eq!(
parse_timezone("GMT"),
0,
"GMT is well-defined as +0000 (RFC 5322 Section 4.3)"
);
assert_eq!(parse_timezone("UTC"), 0, "UTC is well-defined as +0000");
}
#[test]
fn parse_timezone_z_is_well_defined_utc() {
assert_eq!(
parse_timezone("Z"),
0,
"Z is well-defined as +0000 (RFC 5322 Section 4.3)"
);
}
#[test]
fn parse_timezone_military_zones_are_unreliable() {
assert_eq!(
parse_timezone("A"),
0,
"military zone A is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
);
assert_eq!(
parse_timezone("K"),
0,
"military zone K is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
);
assert_eq!(
parse_timezone("N"),
0,
"military zone N is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
);
assert_eq!(
parse_timezone("Y"),
0,
"military zone Y is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
);
}