use base64::Engine as _;
use crate::error::Error;
use crate::types::{Address, DateTime, ParsedAttachment, ParsedEmail};
const MAX_MIME_DEPTH: u32 = 64;
const LENIENT_BASE64: base64::engine::GeneralPurpose = base64::engine::GeneralPurpose::new(
&base64::alphabet::STANDARD,
base64::engine::GeneralPurposeConfig::new()
.with_decode_padding_mode(base64::engine::DecodePaddingMode::Indifferent),
);
struct HeaderFields {
message_id: Option<String>,
in_reply_to: Option<String>,
references: Option<String>,
subject: Option<String>,
from: Address,
to: Vec<Address>,
cc: Vec<Address>,
bcc: Vec<Address>,
reply_to: Vec<Address>,
date: Option<DateTime>,
}
fn extract_header_fields(headers: &[(String, String)]) -> Result<HeaderFields, Error> {
Ok(HeaderFields {
message_id: extract_message_id(headers),
in_reply_to: extract_in_reply_to(headers),
references: extract_references(headers),
subject: get_header_value(headers, "subject").map(|v| decode_encoded_words(&v)),
from: extract_from(headers)?,
to: extract_address_list(headers, "to"),
cc: extract_address_list(headers, "cc"),
bcc: extract_address_list(headers, "bcc"),
reply_to: extract_address_list(headers, "reply-to"),
date: extract_date(headers),
})
}
pub fn parse_email(raw: &[u8]) -> Result<ParsedEmail, Error> {
if raw.is_empty() {
return Err(Error::EmptyInput);
}
let size = raw.len() as u64;
let (header_bytes, body_bytes) = split_header_body(raw);
let raw_headers = String::from_utf8_lossy(header_bytes).into_owned();
let headers = parse_headers(header_bytes);
let hf = extract_header_fields(&headers)?;
let content_type = get_header_value(&headers, "content-type")
.unwrap_or_else(|| "text/plain; charset=us-ascii".to_string());
let transfer_encoding =
get_header_value(&headers, "content-transfer-encoding").unwrap_or_default();
let content_disposition = get_header_value(&headers, "content-disposition").unwrap_or_default();
let content_id = get_header_value(&headers, "content-id");
let (body_text, body_html, attachments) = if is_multipart(&content_type) {
match extract_boundary(&content_type) {
Some(boundary) => {
let is_digest = extract_mime_type(&content_type) == "multipart/digest";
walk_mime_tree(body_bytes, &boundary, "", 0, is_digest)
}
None => extract_simple_body(
body_bytes,
"text/plain; charset=us-ascii",
&transfer_encoding,
&content_disposition,
content_id.as_deref(),
),
}
} else {
extract_simple_body(
body_bytes,
&content_type,
&transfer_encoding,
&content_disposition,
content_id.as_deref(),
)
};
Ok(ParsedEmail {
message_id: hf.message_id,
in_reply_to: hf.in_reply_to,
references: hf.references,
subject: hf.subject,
from: hf.from,
to: hf.to,
cc: hf.cc,
bcc: hf.bcc,
reply_to: hf.reply_to,
date: hf.date,
body_text,
body_html,
attachments,
raw_headers,
size,
})
}
pub fn parse_headers_only(raw: &[u8]) -> Result<ParsedEmail, Error> {
if raw.is_empty() {
return Err(Error::EmptyInput);
}
let size = raw.len() as u64;
let (header_bytes, _) = split_header_body(raw);
let raw_headers = String::from_utf8_lossy(header_bytes).into_owned();
let headers = parse_headers(header_bytes);
let hf = extract_header_fields(&headers)?;
Ok(ParsedEmail {
message_id: hf.message_id,
in_reply_to: hf.in_reply_to,
references: hf.references,
subject: hf.subject,
from: hf.from,
to: hf.to,
cc: hf.cc,
bcc: hf.bcc,
reply_to: hf.reply_to,
date: hf.date,
body_text: None,
body_html: None,
attachments: Vec::new(),
raw_headers,
size,
})
}
fn split_header_body(raw: &[u8]) -> (&[u8], &[u8]) {
if raw.starts_with(b"\r\n") {
return (&[], &raw[2..]);
}
if raw.starts_with(b"\n") {
return (&[], &raw[1..]);
}
if let Some(pos) = find_subsequence(raw, b"\r\n\r\n") {
return (&raw[..pos], &raw[pos + 4..]);
}
if let Some(pos) = find_subsequence(raw, b"\n\n") {
return (&raw[..pos], &raw[pos + 2..]);
}
(raw, &[])
}
fn parse_headers(raw: &[u8]) -> Vec<(String, String)> {
let text = String::from_utf8_lossy(raw);
let mut headers: Vec<(String, String)> = Vec::new();
let mut current_name = String::new();
let mut current_value = String::new();
for line in text.split('\n') {
let line = line.strip_suffix('\r').unwrap_or(line);
if line.is_empty() {
break;
}
if line.starts_with(' ') || line.starts_with('\t') {
if !current_name.is_empty() {
current_value.push_str(line);
}
} else if let Some(colon_pos) = line.find(':') {
if !current_name.is_empty() {
headers.push((current_name.to_lowercase(), current_value));
}
current_name = line[..colon_pos].trim().to_string();
current_value = line[colon_pos + 1..].trim_start().to_string();
}
}
if !current_name.is_empty() {
headers.push((current_name.to_lowercase(), current_value));
}
headers
}
fn get_header_value(headers: &[(String, String)], name: &str) -> Option<String> {
headers
.iter()
.find(|(k, _)| k == name)
.map(|(_, v)| v.clone())
}
pub(crate) fn decode_encoded_words(input: &str) -> String {
let mut result = String::new();
let mut remaining = input;
while !remaining.is_empty() {
if let Some(start) = remaining.find("=?") {
result.push_str(&remaining[..start]);
remaining = &remaining[start..];
if let Some((decoded, consumed)) = try_decode_encoded_word(remaining) {
result.push_str(&decoded);
remaining = &remaining[consumed..];
let trimmed = remaining.trim_start_matches([' ', '\t']);
if trimmed.starts_with("=?") {
remaining = trimmed;
}
} else {
result.push_str("=?");
remaining = &remaining[2..];
}
} else {
result.push_str(remaining);
break;
}
}
result
}
fn try_decode_encoded_word(input: &str) -> Option<(String, usize)> {
let rest = input.strip_prefix("=?")?;
let q1 = rest.find('?')?;
let charset = &rest[..q1];
let rest2 = &rest[q1 + 1..];
let q2 = rest2.find('?')?;
let encoding = &rest2[..q2];
let rest3 = &rest2[q2 + 1..];
let q3 = rest3.find("?=")?;
let encoded_text = &rest3[..q3];
let consumed = 2 + q1 + 1 + q2 + 1 + q3 + 2;
let bytes = match encoding.to_ascii_uppercase().as_str() {
"B" => LENIENT_BASE64.decode(encoded_text.as_bytes()).ok()?,
"Q" => decode_q_encoding(encoded_text),
_ => return None,
};
Some((decode_charset(charset, &bytes), consumed))
}
fn decode_q_encoding(input: &str) -> Vec<u8> {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'=' && i + 2 < bytes.len() {
if let Some(val) = decode_hex_pair(bytes[i + 1], bytes[i + 2]) {
result.push(val);
i += 3;
continue;
}
}
if bytes[i] == b'_' {
result.push(b' ');
} else {
result.push(bytes[i]);
}
i += 1;
}
result
}
fn extract_from(headers: &[(String, String)]) -> Result<Address, Error> {
let value = get_header_value(headers, "from").ok_or(Error::MissingFrom)?;
let addrs = decode_address_names(parse_address_list(&value));
addrs.into_iter().next().ok_or(Error::MissingFrom)
}
fn extract_address_list(headers: &[(String, String)], name: &str) -> Vec<Address> {
get_header_value(headers, name)
.map(|v| decode_address_names(parse_address_list(&v)))
.unwrap_or_default()
}
fn decode_address_names(addrs: Vec<Address>) -> Vec<Address> {
addrs
.into_iter()
.map(|mut addr| {
if let Some(ref name) = addr.name {
addr.name = Some(decode_encoded_words(name));
}
addr
})
.collect()
}
fn parse_address_list(input: &str) -> Vec<Address> {
let mut addresses = Vec::new();
let mut current = String::new();
let mut in_quotes = false;
let mut escaped = false;
let mut angle_depth: i32 = 0;
let mut paren_depth: i32 = 0;
let mut in_group = false;
for ch in input.chars() {
if escaped {
current.push(ch);
escaped = false;
continue;
}
match ch {
'\\' if in_quotes || paren_depth > 0 => {
escaped = true;
current.push(ch);
}
'"' if paren_depth == 0 => {
in_quotes = !in_quotes;
current.push(ch);
}
'(' if !in_quotes => {
paren_depth += 1;
current.push(ch);
}
')' if !in_quotes && paren_depth > 0 => {
paren_depth -= 1;
current.push(ch);
}
'<' if !in_quotes && paren_depth == 0 => {
angle_depth += 1;
current.push(ch);
}
'>' if !in_quotes && paren_depth == 0 => {
angle_depth -= 1;
current.push(ch);
}
':' if !in_quotes && angle_depth == 0 && paren_depth == 0 && !in_group => {
if current.trim().contains('@') {
current.push(ch);
} else {
in_group = true;
current.clear();
}
}
';' if !in_quotes && angle_depth == 0 && paren_depth == 0 && in_group => {
if let Some(addr) = parse_single_address(¤t) {
addresses.push(addr);
}
current.clear();
in_group = false;
}
',' if !in_quotes && angle_depth == 0 && paren_depth == 0 => {
if let Some(addr) = parse_single_address(¤t) {
addresses.push(addr);
}
current.clear();
}
_ => current.push(ch),
}
}
if let Some(addr) = parse_single_address(¤t) {
addresses.push(addr);
}
addresses
}
fn parse_single_address(input: &str) -> Option<Address> {
let input = input.trim();
if input.is_empty() {
return None;
}
if let Some(angle_start) = input.rfind('<') {
if let Some(angle_end) = input.rfind('>') {
if angle_end > angle_start {
let email = input[angle_start + 1..angle_end].trim().to_string();
let name_part = input[..angle_start].trim();
let name = if name_part.is_empty() {
None
} else {
let name = strip_outer_quotes(name_part).trim().to_string();
if name.is_empty() {
None
} else {
Some(unescape_quoted_string(&name))
}
};
if !email.is_empty() {
return Some(Address { name, email });
}
}
}
}
if input.contains('@') {
if let Some(paren_start) = input.find('(') {
let email_part = input[..paren_start].trim();
let after_email = input[paren_start..].trim();
let name = if !email_part.is_empty() && email_part.contains('@') {
extract_comment_text(after_email)
} else {
None
};
let stripped = strip_comments(input);
let email = stripped.trim().to_string();
if !email.is_empty() && email.contains('@') {
return Some(Address { name, email });
}
}
return Some(Address {
name: None,
email: input.to_string(),
});
}
None
}
fn extract_comment_text(s: &str) -> Option<String> {
let s = s.trim();
if !s.starts_with('(') {
return None;
}
let mut depth: u32 = 0;
let mut result = String::new();
let mut escaped = false;
let mut started = false;
for c in s.chars() {
if escaped {
escaped = false;
result.push(c);
continue;
}
match c {
'\\' => {
escaped = true;
}
'(' => {
if started {
result.push(c);
}
depth = depth.saturating_add(1);
started = true;
}
')' => {
depth = depth.saturating_sub(1);
if depth == 0 {
break;
}
result.push(c);
}
_ => {
if depth > 0 {
result.push(c);
}
}
}
}
let trimmed = result.trim().to_string();
if trimmed.is_empty() {
None
} else {
Some(trimmed)
}
}
fn extract_message_id(headers: &[(String, String)]) -> Option<String> {
get_header_value(headers, "message-id").and_then(|v| {
if let Some(id) = extract_first_msg_id(&v) {
return Some(id);
}
let trimmed = v.trim();
if trimmed.is_empty() || trimmed.contains('<') || trimmed.contains('>') {
None
} else {
Some(trimmed.to_string())
}
})
}
fn extract_in_reply_to(headers: &[(String, String)]) -> Option<String> {
get_header_value(headers, "in-reply-to").and_then(|v| extract_first_msg_id(&v))
}
fn extract_references(headers: &[(String, String)]) -> Option<String> {
get_header_value(headers, "references").and_then(|v| {
let ids = extract_all_msg_ids(&v);
if ids.is_empty() {
None
} else {
Some(ids.join(" "))
}
})
}
fn extract_first_msg_id(value: &str) -> Option<String> {
let start = value.find('<')?;
let end = value[start..].find('>')? + start;
let id = value[start + 1..end].trim();
if id.is_empty() {
None
} else {
Some(id.to_string())
}
}
fn extract_all_msg_ids(value: &str) -> Vec<String> {
let mut ids = Vec::new();
let mut remaining = value;
while let Some(start) = remaining.find('<') {
remaining = &remaining[start + 1..];
if let Some(end) = remaining.find('>') {
let id = remaining[..end].trim();
if !id.is_empty() {
ids.push(id.to_string());
}
remaining = &remaining[end + 1..];
} else {
break;
}
}
ids
}
fn extract_date(headers: &[(String, String)]) -> Option<DateTime> {
get_header_value(headers, "date").and_then(|v| parse_rfc5322_date(&v))
}
pub(crate) fn parse_rfc5322_date(input: &str) -> Option<DateTime> {
let input = strip_comments(input);
let input = input.trim();
let input = if let Some(comma_pos) = input.find(',') {
input[comma_pos + 1..].trim()
} else {
input
};
let parts: Vec<&str> = input.split_whitespace().collect();
if parts.len() < 4 {
return None;
}
let day: u8 = parts[0].parse().ok()?;
let month = parse_month_name(parts[1])?;
let year: u16 = parse_year(parts[2])?;
let time_parts: Vec<&str> = parts[3].split(':').collect();
if time_parts.len() < 2 {
return None;
}
let hour: u8 = time_parts[0].parse().ok()?;
let minute: u8 = time_parts[1].parse().ok()?;
let second: u8 = time_parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
if day == 0 || day > 31 || hour > 23 || minute > 59 || second > 60 {
return None;
}
let tz_offset_minutes = parts.get(4).map_or(0, |tz| parse_timezone(tz));
Some(DateTime {
year,
month,
day,
hour,
minute,
second,
tz_offset_minutes,
})
}
fn parse_month_name(s: &str) -> Option<u8> {
match s.to_ascii_lowercase().as_str() {
"jan" => Some(1),
"feb" => Some(2),
"mar" => Some(3),
"apr" => Some(4),
"may" => Some(5),
"jun" => Some(6),
"jul" => Some(7),
"aug" => Some(8),
"sep" => Some(9),
"oct" => Some(10),
"nov" => Some(11),
"dec" => Some(12),
_ => None,
}
}
fn parse_year(s: &str) -> Option<u16> {
let y: u16 = s.parse().ok()?;
if y < 100 {
Some(if y >= 50 { 1900 + y } else { 2000 + y })
} else if y < 1000 {
Some(1900 + y)
} else {
Some(y)
}
}
fn parse_timezone(s: &str) -> i16 {
let s = s.trim();
if (s.starts_with('+') || s.starts_with('-')) && s.len() >= 5 {
let sign: i16 = if s.starts_with('-') { -1 } else { 1 };
if let (Ok(h), Ok(m)) = (s[1..3].parse::<i16>(), s[3..5].parse::<i16>()) {
return sign * (h * 60 + m);
}
}
match s.to_ascii_uppercase().as_str() {
"EST" | "CDT" => -300,
"EDT" => -240,
"CST" | "MDT" => -360,
"MST" | "PDT" => -420,
"PST" => -480,
_ => 0,
}
}
fn walk_mime_tree(
body: &[u8],
boundary: &str,
section_prefix: &str,
depth: u32,
is_digest: bool,
) -> (Option<String>, Option<String>, Vec<ParsedAttachment>) {
if depth > MAX_MIME_DEPTH {
return (None, None, Vec::new());
}
let parts = split_mime_parts(body, boundary);
let mut body_text: Option<String> = None;
let mut body_html: Option<String> = None;
let mut attachments: Vec<ParsedAttachment> = Vec::new();
for (i, part) in parts.iter().enumerate() {
let section_num = i + 1;
let section = if section_prefix.is_empty() {
section_num.to_string()
} else {
format!("{section_prefix}.{section_num}")
};
let (part_header_bytes, part_body) = split_header_body(part);
let part_headers = parse_headers(part_header_bytes);
let default_ct = if is_digest {
"message/rfc822"
} else {
"text/plain; charset=us-ascii"
};
let ct = get_header_value(&part_headers, "content-type")
.unwrap_or_else(|| default_ct.to_string());
let cte = get_header_value(&part_headers, "content-transfer-encoding").unwrap_or_default();
let cd = get_header_value(&part_headers, "content-disposition").unwrap_or_default();
let content_id = get_header_value(&part_headers, "content-id");
if is_multipart(&ct) {
if let Some(inner_boundary) = extract_boundary(&ct) {
let inner_digest = extract_mime_type(&ct) == "multipart/digest";
let (t, h, a) = walk_mime_tree(
part_body,
&inner_boundary,
§ion,
depth + 1,
inner_digest,
);
if body_text.is_none() {
body_text = t;
}
if body_html.is_none() {
body_html = h;
}
attachments.extend(a);
}
} else {
let mime = extract_mime_type(&ct);
let cd_lower = cd.to_lowercase();
let is_explicit_attachment = cd_lower.starts_with("attachment");
if !is_explicit_attachment && mime == "text/plain" && body_text.is_none() {
let decoded = decode_body(part_body, &cte, &ct);
if !decoded.is_empty() {
body_text = Some(decoded);
}
} else if !is_explicit_attachment && mime == "text/html" && body_html.is_none() {
let decoded = decode_body(part_body, &cte, &ct);
if !decoded.is_empty() {
body_html = Some(decoded);
}
} else if !mime.starts_with("multipart/") {
let is_inline = cd_lower.starts_with("inline") || content_id.is_some();
let filename = extract_filename(&cd, &ct);
attachments.push(ParsedAttachment {
filename,
content_type: mime,
content_id: content_id
.map(|s| s.trim_matches(|c| c == '<' || c == '>').trim().to_string()),
is_inline,
size: Some(part_body.len() as u64),
section: Some(section),
});
}
}
}
(body_text, body_html, attachments)
}
fn split_mime_parts<'a>(body: &'a [u8], boundary: &str) -> Vec<&'a [u8]> {
let delim = format!("--{boundary}");
let delim_bytes = delim.as_bytes();
let end_delim = format!("--{boundary}--");
let end_delim_bytes = end_delim.as_bytes();
let mut parts: Vec<&'a [u8]> = Vec::new();
let mut search_from: usize = 0;
let mut part_start: Option<usize> = None;
loop {
let Some(rel_pos) = find_subsequence(&body[search_from..], delim_bytes) else {
if let Some(start) = part_start {
if start < body.len() {
parts.push(&body[start..]);
}
}
break;
};
let pos = search_from + rel_pos;
if pos > 0 && body[pos - 1] != b'\n' {
search_from = pos + delim_bytes.len();
continue;
}
if let Some(start) = part_start {
let end = if pos >= 2 && body[pos - 2] == b'\r' && body[pos - 1] == b'\n' {
pos - 2
} else if pos >= 1 && body[pos - 1] == b'\n' {
pos - 1
} else {
pos
};
if start <= end {
parts.push(&body[start..end]);
}
}
if body[pos..].starts_with(end_delim_bytes) {
break;
}
let mut next = pos + delim_bytes.len();
while next < body.len() && (body[next] == b' ' || body[next] == b'\t') {
next += 1;
}
if next < body.len() && body[next] == b'\r' {
next += 1;
}
if next < body.len() && body[next] == b'\n' {
next += 1;
}
part_start = Some(next);
search_from = next;
}
parts
}
fn extract_simple_body(
body: &[u8],
content_type: &str,
transfer_encoding: &str,
content_disposition: &str,
content_id: Option<&str>,
) -> (Option<String>, Option<String>, Vec<ParsedAttachment>) {
if body.is_empty() {
return (None, None, Vec::new());
}
let mime = extract_mime_type(content_type);
let cd_lower = content_disposition.to_lowercase();
let is_explicit_attachment = cd_lower.starts_with("attachment");
if is_explicit_attachment || (mime != "text/plain" && mime != "text/html") {
let is_inline = cd_lower.starts_with("inline") || content_id.is_some();
let filename = extract_filename(content_disposition, content_type);
let attachment = ParsedAttachment {
filename,
content_type: mime,
content_id: content_id
.map(|s| s.trim_matches(|c| c == '<' || c == '>').trim().to_string()),
is_inline,
size: Some(body.len() as u64),
section: Some("1".to_string()),
};
return (None, None, vec![attachment]);
}
let text = decode_body(body, transfer_encoding, content_type);
if text.is_empty() {
return (None, None, Vec::new());
}
if mime == "text/html" {
(None, Some(text), Vec::new())
} else {
(Some(text), None, Vec::new())
}
}
fn decode_body(data: &[u8], transfer_encoding: &str, content_type: &str) -> String {
let decoded = decode_transfer_encoding(data, transfer_encoding);
let charset = extract_param(content_type, "charset").unwrap_or_else(|| "us-ascii".to_string());
let text = decode_charset(&charset, &decoded);
if let Some(stripped) = text.strip_suffix("\r\n") {
stripped.to_string()
} else if let Some(stripped) = text.strip_suffix('\n') {
stripped.to_string()
} else {
text
}
}
fn decode_transfer_encoding(data: &[u8], encoding: &str) -> Vec<u8> {
match encoding.trim().to_ascii_lowercase().as_str() {
"base64" => {
let cleaned: Vec<u8> = data
.iter()
.copied()
.filter(|b| b.is_ascii_alphanumeric() || *b == b'+' || *b == b'/' || *b == b'=')
.collect();
LENIENT_BASE64
.decode(&cleaned)
.unwrap_or_else(|_| data.to_vec())
}
"quoted-printable" => decode_quoted_printable(data),
_ => data.to_vec(),
}
}
fn decode_quoted_printable(data: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if data[i] == b'=' {
if i + 2 < data.len() {
if data[i + 1] == b'\r' && i + 2 < data.len() && data[i + 2] == b'\n' {
i += 3;
continue;
}
if data[i + 1] == b'\n' {
i += 2;
continue;
}
if let Some(val) = decode_hex_pair(data[i + 1], data[i + 2]) {
result.push(val);
i += 3;
continue;
}
} else if i + 1 < data.len() && data[i + 1] == b'\n' {
i += 2;
continue;
} else if i + 1 < data.len() && data[i + 1] == b'\r' {
i += 2;
continue;
} else if i + 1 == data.len() {
break;
}
}
result.push(data[i]);
i += 1;
}
result
}
fn decode_charset(charset: &str, bytes: &[u8]) -> String {
let charset_lower = charset.to_lowercase();
if charset_lower == "utf-8" || charset_lower == "utf8" {
return String::from_utf8_lossy(bytes).into_owned();
}
let encoding =
encoding_rs::Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::UTF_8);
let (decoded, _, _) = encoding.decode(bytes);
decoded.into_owned()
}
fn is_multipart(content_type: &str) -> bool {
extract_mime_type(content_type).starts_with("multipart/")
}
fn extract_mime_type(content_type: &str) -> String {
let ct = content_type.trim();
let end = ct.find(';').unwrap_or(ct.len());
let raw = ct[..end].trim();
strip_comments(raw).trim().to_lowercase()
}
fn extract_boundary(content_type: &str) -> Option<String> {
extract_param(content_type, "boundary")
}
fn is_param_boundary(lower: &str, pos: usize) -> bool {
pos == 0
|| lower
.as_bytes()
.get(pos - 1)
.is_some_and(|&c| c == b';' || c == b' ' || c == b'\t')
}
fn extract_param_value(rest: &str) -> Option<String> {
let value = if let Some(stripped) = rest.strip_prefix('"') {
let end = find_closing_quote(stripped);
&stripped[..end]
} else {
let end = rest
.find(|c: char| c == ';' || c.is_whitespace())
.unwrap_or(rest.len());
&rest[..end]
};
if value.is_empty() {
None
} else if rest.starts_with('"') {
Some(unescape_quoted_string(value))
} else {
Some(value.to_string())
}
}
fn extract_param(header_value: &str, param_name: &str) -> Option<String> {
let lower = header_value.to_ascii_lowercase();
let pattern = format!("{param_name}=");
let mut search_from = 0;
loop {
let idx = lower[search_from..].find(&pattern)?;
let abs_idx = search_from + idx;
if is_param_boundary(&lower, abs_idx) {
if is_inside_quotes(&lower, abs_idx) {
search_from = abs_idx + pattern.len();
continue;
}
let rest = &header_value[abs_idx + pattern.len()..];
return extract_param_value(rest);
}
search_from = abs_idx + pattern.len();
}
}
fn find_closing_quote(s: &str) -> usize {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
return i;
}
i += 1;
}
bytes.len()
}
fn is_inside_quotes(s: &str, pos: usize) -> bool {
let bytes = s.as_bytes();
let mut quote_count: u32 = 0;
let mut i = 0;
while i < pos && i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
quote_count += 1;
}
i += 1;
}
quote_count % 2 != 0
}
fn extract_filename(disposition: &str, content_type: &str) -> Option<String> {
if let Some(name) = extract_rfc2231_param(disposition, "filename") {
return Some(name);
}
if let Some(name) = extract_rfc2231_continuation(disposition, "filename") {
return Some(name);
}
if let Some(name) = extract_param(disposition, "filename") {
return Some(decode_encoded_words(&name));
}
if let Some(name) = extract_rfc2231_param(content_type, "name") {
return Some(name);
}
if let Some(name) = extract_rfc2231_continuation(content_type, "name") {
return Some(name);
}
if let Some(name) = extract_param(content_type, "name") {
return Some(decode_encoded_words(&name));
}
None
}
fn extract_rfc2231_param(header_value: &str, param_name: &str) -> Option<String> {
let lower = header_value.to_ascii_lowercase();
let pattern = format!("{param_name}*=");
let mut search_from = 0;
let idx = loop {
let rel_idx = lower[search_from..].find(&pattern)?;
let abs_idx = search_from + rel_idx;
if is_param_boundary(&lower, abs_idx) {
if is_inside_quotes(&lower, abs_idx) {
search_from = abs_idx + pattern.len();
continue;
}
break abs_idx;
}
search_from = abs_idx + pattern.len();
};
let rest = &header_value[idx + pattern.len()..];
let end = rest.find(';').unwrap_or(rest.len());
let value = rest[..end].trim();
let mut parts_iter = value.splitn(3, '\'');
let charset = parts_iter.next()?;
let _language = parts_iter.next()?; let encoded = parts_iter.next()?;
let decoded_bytes = percent_decode(encoded);
Some(decode_charset(charset, &decoded_bytes))
}
fn extract_rfc2231_continuation(header_value: &str, param_name: &str) -> Option<String> {
let lower = header_value.to_ascii_lowercase();
let mut sections: Vec<(u32, bool, String)> = Vec::new(); let mut charset = String::new();
for section_idx in 0u32..100 {
let encoded_pattern = format!("{param_name}*{section_idx}*=");
if let Some(val) = find_param_value(&lower, header_value, &encoded_pattern) {
if section_idx == 0 {
let mut parts = val.splitn(3, '\'');
if let (Some(cs), Some(_lang), Some(encoded)) =
(parts.next(), parts.next(), parts.next())
{
charset = cs.to_string();
sections.push((section_idx, true, encoded.to_string()));
} else {
sections.push((section_idx, true, val));
}
} else {
sections.push((section_idx, true, val));
}
continue;
}
let plain_pattern = format!("{param_name}*{section_idx}=");
if let Some(val) = find_param_value(&lower, header_value, &plain_pattern) {
sections.push((section_idx, false, val));
continue;
}
break;
}
if sections.is_empty() {
return None;
}
sections.sort_by_key(|(idx, _, _)| *idx);
let mut raw_bytes: Vec<u8> = Vec::new();
for (_, is_encoded, value) in §ions {
if *is_encoded {
raw_bytes.extend(percent_decode(value));
} else {
raw_bytes.extend(value.as_bytes());
}
}
if charset.is_empty() {
charset = "utf-8".to_string();
}
Some(decode_charset(&charset, &raw_bytes))
}
fn find_param_value(lower: &str, original: &str, pattern: &str) -> Option<String> {
let mut search_from = 0;
loop {
let rel_idx = lower[search_from..].find(pattern)?;
let abs_idx = search_from + rel_idx;
if is_param_boundary(lower, abs_idx) {
if is_inside_quotes(lower, abs_idx) {
search_from = abs_idx + pattern.len();
continue;
}
let rest = &original[abs_idx + pattern.len()..];
return extract_param_value(rest);
}
search_from = abs_idx + pattern.len();
}
}
fn percent_decode(input: &str) -> Vec<u8> {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let Some(val) = decode_hex_pair(bytes[i + 1], bytes[i + 2]) {
result.push(val);
i += 3;
continue;
}
}
result.push(bytes[i]);
i += 1;
}
result
}
fn strip_comments(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let mut depth: u32 = 0;
let mut escaped = false;
for c in input.chars() {
if escaped {
escaped = false;
if depth == 0 {
result.push(c);
}
continue;
}
match c {
'\\' => {
escaped = true;
if depth == 0 {
result.push(c);
}
}
'(' => depth = depth.saturating_add(1),
')' if depth > 0 => depth = depth.saturating_sub(1),
_ if depth == 0 => result.push(c),
_ => {}
}
}
result
}
fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack.windows(needle.len()).position(|w| w == needle)
}
fn decode_hex_pair(high: u8, low: u8) -> Option<u8> {
let h = hex_digit(high)?;
let l = hex_digit(low)?;
Some(h * 16 + l)
}
fn hex_digit(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'A'..=b'F' => Some(b - b'A' + 10),
b'a'..=b'f' => Some(b - b'a' + 10),
_ => None,
}
}
fn strip_outer_quotes(input: &str) -> &str {
if input.len() >= 2 && input.starts_with('"') && input.ends_with('"') {
&input[1..input.len() - 1]
} else {
input
}
}
fn unescape_quoted_string(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let mut chars = input.chars();
while let Some(c) = chars.next() {
if c == '\\' {
if let Some(next) = chars.next() {
result.push(next);
} else {
result.push(c);
}
} else {
result.push(c);
}
}
result
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn parse_simple_text_email() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <abc123@example.com>\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Hello, World!";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.to[0].email, "recipient@example.com");
assert_eq!(parsed.subject.as_deref(), Some("Test"));
assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
assert_eq!(parsed.body_text.as_deref(), Some("Hello, World!"));
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_multipart_alternative() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Multi\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: multipart/alternative; boundary=\"bound42\"\r\n\
\r\n\
--bound42\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Plain text body\r\n\
--bound42\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<html><body>HTML body</body></html>\r\n\
--bound42--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Plain text body"));
assert_eq!(
parsed.body_html.as_deref(),
Some("<html><body>HTML body</body></html>")
);
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_encoded_words_base64_subject() {
let raw = b"From: sender@example.com\r\n\
Subject: =?UTF-8?B?SGVsbG8gV29ybGQ=?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_encoded_words_q_subject() {
let raw = b"From: sender@example.com\r\n\
Subject: =?UTF-8?Q?Hello_World?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
body";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_encoded_words_in_display_name() {
let raw = b"From: =?UTF-8?B?Sm9obiBEb2U=?= <john@example.com>\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.name.as_deref(), Some("John Doe"));
assert_eq!(parsed.from.email, "john@example.com");
}
#[test]
fn parse_non_utf8_charset() {
let raw = b"From: sender@example.com\r\n\
Subject: =?ISO-8859-1?Q?H=E9llo?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Héllo"));
}
#[test]
fn parse_message_id_strips_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: <unique-id@host.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.message_id.as_deref(), Some("unique-id@host.com"));
}
#[test]
fn parse_in_reply_to_first_only() {
let raw = b"From: a@b.com\r\n\
In-Reply-To: <first@host> <second@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.in_reply_to.as_deref(), Some("first@host"));
}
#[test]
fn parse_references_all_ids() {
let raw = b"From: a@b.com\r\n\
References: <ref1@host> <ref2@host> <ref3@host>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.references.as_deref(),
Some("ref1@host ref2@host ref3@host")
);
}
#[test]
fn parse_date_with_numeric_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0530\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
assert_eq!(date.hour, 15);
assert_eq!(date.minute, 47);
assert_eq!(date.second, 33);
assert_eq!(date.tz_offset_minutes, 330);
}
#[test]
fn parse_date_named_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 10:30:00 EST\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.tz_offset_minutes, -300);
}
#[test]
fn parse_address_with_display_name() {
let raw = b"From: \"John Doe\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.name.as_deref(), Some("John Doe"));
assert_eq!(parsed.from.email, "john@example.com");
}
#[test]
fn parse_multiple_recipients() {
let raw = b"From: a@b.com\r\n\
To: one@x.com, \"Two\" <two@x.com>, three@x.com\r\n\
Cc: cc1@x.com, cc2@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 3);
assert_eq!(parsed.to[1].name.as_deref(), Some("Two"));
assert_eq!(parsed.cc.len(), 2);
}
#[test]
fn parse_multipart_with_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: multipart/mixed; boundary=\"mixbound\"\r\n\
\r\n\
--mixbound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Message body\r\n\
--mixbound\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF_CONTENT_HERE\r\n\
--mixbound--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Message body"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
assert_eq!(parsed.attachments[0].content_type, "application/pdf");
assert!(!parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}
#[test]
fn parse_inline_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"bound\"\r\n\
\r\n\
--bound\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--bound\r\n\
Content-Type: image/png\r\n\
Content-Disposition: inline\r\n\
Content-ID: <img001>\r\n\
\r\n\
PNG_DATA\r\n\
--bound--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img001"));
}
#[test]
fn parse_headers_only_no_body() {
let raw = b"From: a@b.com\r\n\
Subject: Headers only\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Headers only"));
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
}
#[test]
fn parse_empty_input() {
let result = parse_email(b"");
assert!(matches!(result, Err(Error::EmptyInput)));
}
#[test]
fn parse_missing_from() {
let raw = b"Subject: No from\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let result = parse_email(raw);
assert!(matches!(result, Err(Error::MissingFrom)));
}
#[test]
fn parse_quoted_printable_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable\r\n\
\r\n\
Hello=20World=0D=0ASoft=\r\n break";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World\r\nSoft break")
);
}
#[test]
fn parse_base64_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}
#[test]
fn parse_nested_multipart_section_numbers() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/alternative; boundary=\"inner\"\r\n\
\r\n\
--inner\r\n\
Content-Type: text/plain\r\n\
\r\n\
Plain\r\n\
--inner\r\n\
Content-Type: text/html\r\n\
\r\n\
<b>HTML</b>\r\n\
--inner--\r\n\
--outer\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
\r\n\
DATA\r\n\
--outer--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Plain"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>HTML</b>"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}
#[test]
fn parse_rfc2231_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("résumé.pdf")
);
}
#[test]
fn parse_raw_headers_preserved() {
let raw = b"From: a@b.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert!(parsed.raw_headers.contains("From: a@b.com"));
assert!(parsed.raw_headers.contains("Subject: Test"));
}
#[test]
fn parse_lf_only_line_endings() {
let raw = b"From: a@b.com\n\
Subject: LF\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\n\
\n\
Body with LF";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("LF"));
assert_eq!(parsed.body_text.as_deref(), Some("Body with LF"));
}
#[test]
fn parse_header_continuation_lines() {
let raw = b"From: a@b.com\r\nSubject: This is a very long\r\n subject line that wraps\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("This is a very long subject line that wraps")
);
}
#[test]
fn parse_garbage_input_best_effort() {
let result = parse_email(b"\x00\x01\x02\x03\xff\xfe");
assert!(result.is_err());
}
#[test]
fn parse_truncated_multipart() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"trunc\"\r\n\
\r\n\
--trunc\r\n\
Content-Type: text/plain\r\n\
\r\n\
Some text here";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Some text here"));
}
#[test]
fn decode_adjacent_encoded_words() {
let input = "=?UTF-8?B?SGVs?= =?UTF-8?B?bG8=?=";
let decoded = decode_encoded_words(input);
assert_eq!(decoded, "Hello");
}
#[test]
fn decode_iso8859_encoded_word() {
let input = "=?ISO-8859-1?Q?caf=E9?=";
let decoded = decode_encoded_words(input);
assert_eq!(decoded, "café");
}
#[test]
fn parse_date_without_seconds() {
let dt = parse_rfc5322_date("Thu, 13 Feb 2025 15:47 +0000").unwrap();
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 0);
}
#[test]
fn parse_two_digit_year() {
let dt = parse_rfc5322_date("13 Feb 99 12:00:00 +0000").unwrap();
assert_eq!(dt.year, 1999);
let dt = parse_rfc5322_date("13 Feb 25 12:00:00 +0000").unwrap();
assert_eq!(dt.year, 2025);
}
#[test]
fn parse_three_digit_year_rfc5322_section_4_3() {
let dt = parse_rfc5322_date("13 Feb 107 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2007,
"3-digit year 107 must map to 2007 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 100 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2000,
"3-digit year 100 must map to 2000 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 999 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2899,
"3-digit year 999 must map to 2899 per RFC 5322 Section 4.3"
);
}
#[test]
fn parse_two_digit_year_rfc5322_section_4_3_cutoff() {
let dt = parse_rfc5322_date("13 Feb 50 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 1950,
"2-digit year 50 must map to 1950 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 69 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 1969,
"2-digit year 69 must map to 1969 per RFC 5322 Section 4.3"
);
let dt = parse_rfc5322_date("13 Feb 49 12:00:00 +0000").unwrap();
assert_eq!(
dt.year, 2049,
"2-digit year 49 must map to 2049 per RFC 5322 Section 4.3"
);
}
#[test]
fn parse_non_text_part_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Text\r\n\
--b\r\n\
Content-Type: image/jpeg\r\n\
\r\n\
JPEG_DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
}
#[test]
fn parse_windows1252_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=windows-1252\r\n\
\r\n\
\x93Hello\x94";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.unwrap();
assert!(text.contains("Hello"));
assert!(text.contains('\u{201c}') || text.contains('\u{201d}'));
}
#[test]
fn parse_html_only_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<html><body>Hello</body></html>";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_none());
assert_eq!(
parsed.body_html.as_deref(),
Some("<html><body>Hello</body></html>")
);
}
#[test]
fn parse_bcc_addresses() {
let raw = b"From: a@b.com\r\n\
To: to@x.com\r\n\
Bcc: hidden@x.com, secret@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.bcc.len(), 2);
assert_eq!(parsed.bcc[0].email, "hidden@x.com");
}
#[test]
fn mime_depth_limit() {
let mut msg = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b0\"\r\n\r\n"
.to_vec();
for i in 0..70 {
msg.extend_from_slice(
format!(
"--b{i}\r\nContent-Type: multipart/mixed; boundary=\"b{}\"\r\n\r\n",
i + 1
)
.as_bytes(),
);
}
msg.extend_from_slice(b"--b70\r\nContent-Type: text/plain\r\n\r\nDeep\r\n--b70--\r\n");
let parsed = parse_email(&msg).unwrap();
assert!(parsed.body_text.is_none() || parsed.body_text.is_some());
}
#[test]
fn parse_reply_to() {
let raw = b"From: a@b.com\r\n\
Reply-To: noreply@example.com, support@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.reply_to.len(), 2);
assert_eq!(parsed.reply_to[0].email, "noreply@example.com");
assert_eq!(parsed.reply_to[1].email, "support@example.com");
}
#[test]
fn parse_gb2312_encoded_word() {
let raw = b"From: sender@example.com\r\n\
Subject: =?GB2312?B?xOO6ww==?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("你好"));
}
#[test]
fn parse_content_id_strips_brackets() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-ID: <cid:image001@01D00000.00000000>\r\n\
\r\n\
PNG\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments[0].content_id.as_deref(),
Some("cid:image001@01D00000.00000000")
);
}
#[test]
fn parse_attachment_without_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/octet-stream\r\n\
Content-Disposition: attachment\r\n\
\r\n\
BINARY\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].filename.is_none());
assert_eq!(
parsed.attachments[0].content_type,
"application/octet-stream"
);
assert!(!parsed.attachments[0].is_inline);
}
#[test]
fn parse_content_type_without_charset_defaults() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello ASCII";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}
#[test]
fn parse_mime_part_no_content_type_defaults_to_us_ascii() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Transfer-Encoding: 7bit\r\n\
\r\n\
Hello ASCII\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}
#[test]
fn parse_mime_part_no_headers_at_all() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
\r\n\
Headerless body\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Headerless body"));
}
#[test]
fn parse_multipart_only_attachments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"a.pdf\"\r\n\
\r\n\
PDF1\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-Disposition: attachment; filename=\"b.png\"\r\n\
\r\n\
PNG2\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert_eq!(parsed.attachments.len(), 2);
assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
assert_eq!(parsed.attachments[1].section.as_deref(), Some("2"));
}
#[test]
fn parse_unknown_charset_body_fallback() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=x-unknown-fake\r\n\
\r\n\
Plain text in unknown charset";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
assert!(parsed.body_text.unwrap().contains("Plain text"));
}
#[test]
fn parse_content_id_without_disposition_is_inline() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/gif\r\n\
Content-ID: <img42>\r\n\
\r\n\
GIF89a\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img42"));
}
#[test]
fn parse_overlong_subject() {
let long_subject = "A".repeat(10_000);
let raw = format!(
"From: a@b.com\r\n\
Subject: {long_subject}\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n"
);
let parsed = parse_email(raw.as_bytes()).unwrap();
assert_eq!(parsed.subject.as_deref(), Some(long_subject.as_str()));
}
#[test]
fn parse_multiple_from_takes_first() {
let raw = b"From: first@example.com, second@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "first@example.com");
}
#[test]
fn parse_multipart_no_boundary_param() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed\r\n\
\r\n\
Some text content";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_empty_body_after_headers() {
let raw = b"From: a@b.com\r\n\
Subject: Empty body\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Empty body"));
assert!(parsed.body_text.is_none());
}
#[test]
fn parse_mixed_charset_encoded_words() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?B?SGVsbG8=?= =?ISO-8859-1?Q?_caf=E9?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello caf\u{e9}"));
}
#[test]
fn parse_no_date_header() {
let raw = b"From: a@b.com\r\n\
Subject: No date\r\n\
\r\n\
Body";
let parsed = parse_email(raw).unwrap();
assert!(parsed.date.is_none());
assert_eq!(parsed.subject.as_deref(), Some("No date"));
}
#[test]
fn parse_explicit_attachment_text_plain() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body text\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
Content-Disposition: attachment; filename=\"log.txt\"\r\n\
\r\n\
Log file content\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
assert_eq!(parsed.attachments[0].content_type, "text/plain");
}
#[test]
fn parse_date_negative_timezone() {
let raw = b"From: a@b.com\r\n\
Date: Fri, 14 Feb 2025 09:15:00 -0800\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.tz_offset_minutes, -480);
}
#[test]
fn parse_size_equals_input_length() {
let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.size, raw.len() as u64);
}
#[test]
fn parse_binary_garbage_returns_error() {
let garbage: Vec<u8> = (0..=255_u8).collect();
let result = parse_email(&garbage);
assert!(result.is_err());
}
#[test]
fn parse_folded_encoded_word_subject() {
let raw = b"From: a@b.com\r\nSubject: =?UTF-8?B?SGVsbG8=?=\r\n =?UTF-8?B?V29ybGQ=?=\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("HelloWorld"));
}
#[test]
fn parse_encoded_word_lowercase_encoding() {
let raw = b"From: sender@example.com\r\n\
Subject: =?utf-8?b?SGVsbG8=?= =?utf-8?q?_World?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}
#[test]
fn parse_malformed_encoded_word_passthrough() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?B?broken\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert!(parsed.subject.unwrap().contains("=?"));
}
#[test]
fn parse_encoded_word_unknown_encoding_type() {
let raw = b"From: a@b.com\r\n\
Subject: =?UTF-8?X?data?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert!(parsed.subject.unwrap().contains("=?"));
}
#[test]
fn parse_utf8_directly_in_headers_rfc6532() {
let raw = "From: José <jose@example.com>\r\n\
Subject: Ñoño café\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n\
Body";
let parsed = parse_email(raw.as_bytes()).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Ñoño café"));
assert_eq!(parsed.from.name.as_deref(), Some("José"));
assert_eq!(parsed.from.email, "jose@example.com");
}
#[test]
fn parse_multipart_with_preamble() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"preamble-test\"\r\n\
\r\n\
This is the preamble, which should be ignored.\r\n\
--preamble-test\r\n\
Content-Type: text/plain\r\n\
\r\n\
Actual body\r\n\
--preamble-test--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Actual body"));
}
#[test]
fn parse_attachment_name_from_content_type() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf; name=\"report.pdf\"\r\n\
Content-Disposition: attachment\r\n\
\r\n\
PDF\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("report.pdf")
);
}
#[test]
fn parse_qp_soft_break_lf_only() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable\r\n\
\r\n\
Hello=\nWorld";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("HelloWorld"));
}
#[test]
fn parse_subject_mixed_encoded_and_plain() {
let raw = b"From: a@b.com\r\n\
Subject: Re: =?UTF-8?B?SGVsbG8=?= there\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.subject.as_deref(), Some("Re: Hello there"));
}
#[test]
fn parse_whitespace_only_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
\r\n \r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_date_missing_timezone() {
let raw = b"From: a@b.com\r\n\
Date: 13 Feb 2025 12:00:00\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.tz_offset_minutes, 0);
}
#[test]
fn parse_deeply_nested_section_dot_notation() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/related; boundary=\"rel\"\r\n\
\r\n\
--rel\r\n\
Content-Type: text/html\r\n\
\r\n\
<img src=\"cid:img1\">\r\n\
--rel\r\n\
Content-Type: image/png\r\n\
Content-ID: <img1>\r\n\
\r\n\
PNG_DATA\r\n\
--rel--\r\n\
--outer\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF\r\n\
--outer--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_html.is_some());
assert!(parsed.body_html.unwrap().contains("cid:img1"));
let inline_att = parsed
.attachments
.iter()
.find(|a| a.content_type == "image/png")
.unwrap();
assert_eq!(inline_att.section.as_deref(), Some("1.2"));
assert!(inline_att.is_inline);
let pdf_att = parsed
.attachments
.iter()
.find(|a| a.content_type == "application/pdf")
.unwrap();
assert_eq!(pdf_att.section.as_deref(), Some("2"));
}
#[test]
fn parse_non_ascii_bytes_in_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello \xff\xfe world";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_base64_body_with_line_breaks() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVs\r\nbG8g\r\nV29y\r\nbGQ=";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}
#[test]
fn parse_date_extra_whitespace() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 \r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.unwrap();
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
}
#[test]
fn parse_multipart_related_with_inline_images() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/related; boundary=\"rel\"\r\n\
\r\n\
--rel\r\n\
Content-Type: text/html\r\n\
\r\n\
<html><img src=\"cid:logo\"></html>\r\n\
--rel\r\n\
Content-Type: image/jpeg\r\n\
Content-ID: <logo>\r\n\
Content-Disposition: inline; filename=\"logo.jpg\"\r\n\
\r\n\
JPEG_DATA\r\n\
--rel--";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_html.is_some());
assert_eq!(parsed.attachments.len(), 1);
assert!(parsed.attachments[0].is_inline);
assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("logo"));
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("logo.jpg"));
}
#[test]
fn parse_minimal_message_from_only() {
let raw = b"From: a@b.com\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "a@b.com");
assert!(parsed.subject.is_none());
assert!(parsed.date.is_none());
assert!(parsed.body_text.is_none());
}
#[test]
fn parse_multiple_same_headers() {
let raw = b"From: first@example.com\r\n\
From: second@example.com\r\n\
Subject: First\r\n\
Subject: Second\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "first@example.com");
assert_eq!(parsed.subject.as_deref(), Some("First"));
}
#[test]
fn parse_date_all_named_timezones() {
let test_cases = [
("EST", -300),
("EDT", -240),
("CST", -360),
("CDT", -300),
("MST", -420),
("MDT", -360),
("PST", -480),
("PDT", -420),
("GMT", 0),
("UTC", 0),
("UT", 0),
];
for (tz_name, expected_offset) in test_cases {
let raw = format!("From: a@b.com\r\nDate: Thu, 13 Feb 2025 12:00:00 {tz_name}\r\n\r\n");
let parsed = parse_email(raw.as_bytes()).unwrap();
let date = parsed.date.unwrap();
assert_eq!(
date.tz_offset_minutes, expected_offset,
"Failed for timezone {tz_name}"
);
}
}
#[test]
fn parse_boundary_with_special_chars() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"----=_Part_123+abc\"\r\n\
\r\n\
------=_Part_123+abc\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body text\r\n\
------=_Part_123+abc--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
}
#[test]
fn parse_truncated_base64_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gV29yb===invalid";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_some());
}
#[test]
fn parse_address_group_syntax() {
let raw = b"From: sender@example.com\r\n\
To: Undisclosed:;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "sender@example.com");
}
#[test]
fn parse_iso2022jp_encoded_word() {
let raw = b"From: a@b.com\r\n\
Subject: =?ISO-2022-JP?B?GyRCJUYlOSVIGyhC?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.subject.is_some());
assert_eq!(parsed.subject.as_deref(), Some("テスト"));
}
#[test]
fn parse_multipart_missing_parts_tolerance() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"never-appears\"\r\n\
\r\n\
This body doesn't contain any boundaries at all.";
let parsed = parse_email(raw).unwrap();
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
}
#[test]
fn parse_encoded_word_in_multiple_header_types() {
let raw = b"From: =?UTF-8?Q?M=C3=BCller?= <mueller@example.com>\r\n\
To: =?UTF-8?B?U21pdGg=?= <smith@example.com>\r\n\
Subject: =?UTF-8?Q?Caf=C3=A9?=\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.name.as_deref(), Some("Müller"));
assert_eq!(parsed.to[0].name.as_deref(), Some("Smith"));
assert_eq!(parsed.subject.as_deref(), Some("Café"));
}
#[test]
fn parse_attachment_size_reflects_part_body() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
\r\n\
0123456789\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].size, Some(10));
}
#[test]
fn parse_unquoted_boundary() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=simple_boundary\r\n\
\r\n\
--simple_boundary\r\n\
Content-Type: text/plain\r\n\
\r\n\
Text\r\n\
--simple_boundary--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Text"));
}
#[test]
fn parse_message_id_without_angle_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: bare-id@host.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.message_id.as_deref(), Some("bare-id@host.com"));
}
#[test]
fn parse_empty_references_header() {
let raw = b"From: a@b.com\r\n\
References: \r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.references.is_none());
}
#[test]
fn parse_large_multipart_many_attachments() {
let mut raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"multi\"\r\n\r\n"
.to_vec();
raw.extend_from_slice(b"--multi\r\nContent-Type: text/plain\r\n\r\nBody\r\n");
for i in 1..=5 {
raw.extend_from_slice(
format!(
"--multi\r\nContent-Type: application/octet-stream\r\n\
Content-Disposition: attachment; filename=\"file{i}.bin\"\r\n\r\n\
DATA{i}\r\n"
)
.as_bytes(),
);
}
raw.extend_from_slice(b"--multi--");
let parsed = parse_email(&raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Body"));
assert_eq!(parsed.attachments.len(), 5);
for (i, att) in parsed.attachments.iter().enumerate() {
assert_eq!(
att.section.as_deref(),
Some(&(i + 2).to_string() as &str),
"Wrong section for attachment {i}"
);
assert_eq!(
att.filename.as_deref(),
Some(&format!("file{}.bin", i + 1) as &str)
);
}
}
#[test]
fn parse_message_id_empty_brackets() {
let raw = b"From: a@b.com\r\n\
Message-ID: <>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.message_id.is_none());
}
#[test]
fn parse_message_id_empty_value() {
let raw = b"From: a@b.com\r\n\
Message-ID: \r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(parsed.message_id.is_none());
}
#[test]
fn rfc2231_param_boundary_check() {
let disposition = "attachment; xfilename*=UTF-8''bad.pdf; filename*=UTF-8''good.pdf";
let result = extract_rfc2231_param(disposition, "filename");
assert_eq!(result.as_deref(), Some("good.pdf"));
}
#[test]
fn rfc2231_param_at_start() {
let value = "filename*=UTF-8''test.pdf";
let result = extract_rfc2231_param(value, "filename");
assert_eq!(result.as_deref(), Some("test.pdf"));
}
#[test]
fn parse_quoted_display_name_with_comma() {
let raw = b"From: \"Doe, John\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.name.as_deref(), Some("Doe, John"));
assert_eq!(parsed.from.email, "john@example.com");
}
#[test]
fn parse_quoted_display_name_with_escaped_chars() {
let raw = b"From: \"John \\\"Doc\\\" Doe\" <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.name.as_deref(), Some("John \"Doc\" Doe"));
assert_eq!(parsed.from.email, "john@example.com");
}
#[test]
fn unescape_quoted_string_backslash() {
assert_eq!(unescape_quoted_string("hello"), "hello");
assert_eq!(unescape_quoted_string("a\\\\b"), "a\\b");
assert_eq!(unescape_quoted_string("a\\\"b"), "a\"b");
assert_eq!(unescape_quoted_string("trailing\\"), "trailing\\");
}
#[test]
fn parse_address_list_with_escaped_quotes_in_display_name() {
let raw = b"From: a@b.com\r\n\
To: \"A\\\"B\" <a@x.com>, c@d.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Expected 2 To addresses but got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "a@x.com");
assert_eq!(parsed.to[0].name.as_deref(), Some("A\"B"));
assert_eq!(parsed.to[1].email, "c@d.com");
}
#[test]
fn parse_rfc2231_continuation_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*0=\"very_long_\"; filename*1=\"filename.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("very_long_filename.pdf"),
"RFC 2231 continuation filename not reassembled"
);
}
#[test]
fn parse_rfc2231_continuation_with_charset() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1*=%C3%A9.pdf\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("résumé.pdf"),
"RFC 2231 continuation with charset not reassembled"
);
}
#[test]
fn parse_address_comment_with_comma() {
let raw = b"From: sender@example.com\r\n\
To: user@example.com (Doe, John), other@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Expected 2 To addresses but got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "user@example.com");
assert_eq!(
parsed.to[0].name.as_deref(),
Some("Doe, John"),
"Display name from comment should be preserved intact"
);
assert_eq!(parsed.to[1].email, "other@example.com");
}
#[test]
fn parse_header_unfolding_preserves_wsp() {
let raw = b"From: a@b.com\r\nSubject: Hello\r\n\tWorld\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello\tWorld"),
"Tab from continuation line should be preserved per RFC 5322 Section 2.2.3"
);
}
#[test]
fn datetime_to_unix_timestamp() {
use crate::types::DateTime;
let dt = DateTime {
year: 2025,
month: 2,
day: 13,
hour: 15,
minute: 47,
second: 33,
tz_offset_minutes: 0,
};
assert_eq!(dt.to_unix_timestamp(), 1_739_461_653);
let dt_offset = DateTime {
year: 2025,
month: 2,
day: 13,
hour: 21,
minute: 17,
second: 33,
tz_offset_minutes: 330,
};
assert_eq!(dt_offset.to_unix_timestamp(), dt.to_unix_timestamp());
}
#[test]
fn datetime_from_unix_timestamp() {
use crate::types::DateTime;
let ts = 1_739_461_653_i64; let dt = DateTime::from_unix_timestamp(ts, 0);
assert_eq!(dt.year, 2025);
assert_eq!(dt.month, 2);
assert_eq!(dt.day, 13);
assert_eq!(dt.hour, 15);
assert_eq!(dt.minute, 47);
assert_eq!(dt.second, 33);
assert_eq!(dt.tz_offset_minutes, 0);
let dt_offset = DateTime::from_unix_timestamp(ts, 330);
assert_eq!(dt_offset.hour, 21);
assert_eq!(dt_offset.minute, 17);
}
#[test]
fn datetime_round_trip_timestamp() {
use crate::types::DateTime;
let dt = DateTime {
year: 2025,
month: 12,
day: 31,
hour: 23,
minute: 59,
second: 59,
tz_offset_minutes: -480,
};
let ts = dt.to_unix_timestamp();
let restored = DateTime::from_unix_timestamp(ts, -480);
assert_eq!(dt, restored);
}
#[test]
fn datetime_ord_comparison() {
use crate::types::DateTime;
let utc = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 12,
minute: 0,
second: 0,
tz_offset_minutes: 0,
};
let est = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 7,
minute: 0,
second: 0,
tz_offset_minutes: -300,
};
assert_eq!(utc.cmp(&est), std::cmp::Ordering::Equal);
let later = DateTime {
year: 2025,
month: 1,
day: 1,
hour: 13,
minute: 0,
second: 0,
tz_offset_minutes: 0,
};
assert!(later > utc);
}
#[test]
fn datetime_epoch() {
use crate::types::DateTime;
let epoch = DateTime::from_unix_timestamp(0, 0);
assert_eq!(epoch.year, 1970);
assert_eq!(epoch.month, 1);
assert_eq!(epoch.day, 1);
assert_eq!(epoch.hour, 0);
assert_eq!(epoch.minute, 0);
assert_eq!(epoch.second, 0);
assert_eq!(epoch.to_unix_timestamp(), 0);
}
#[test]
fn parse_headers_only_extracts_metadata() {
let raw = b"From: sender@example.com\r\n\
To: recipient@example.com\r\n\
Subject: Test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <abc123@example.com>\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
This body should NOT be parsed\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
\r\n\
PDF_DATA\r\n\
--b--";
let parsed = parse_headers_only(raw).unwrap();
assert_eq!(parsed.from.email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.subject.as_deref(), Some("Test"));
assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
assert!(parsed.date.is_some());
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_headers_only_empty_input() {
let result = parse_headers_only(b"");
assert!(matches!(result, Err(Error::EmptyInput)));
}
#[test]
fn parse_headers_only_missing_from() {
let raw = b"Subject: No From\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let result = parse_headers_only(raw);
assert!(matches!(result, Err(Error::MissingFrom)));
}
#[test]
fn extract_param_unescapes_backslash_in_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"path\\\\file.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("path\\file.pdf"),
"Backslash in quoted-string filename must be unescaped per RFC 5322 Section 3.2.4"
);
}
#[test]
fn extract_param_handles_escaped_quote_in_filename() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/pdf\r\n\
Content-Disposition: attachment; filename=\"file\\\"name.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("file\"name.pdf"),
"Escaped quote in quoted-string filename must be handled per RFC 5322 Section 3.2.4"
);
}
#[test]
fn build_then_parse_filename_with_backslash_round_trip() {
let email = crate::types::OutgoingEmail {
from: Address {
name: None,
email: "a@b.com".into(),
},
to: vec![Address {
name: None,
email: "to@b.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: None,
subject: "test".into(),
body_text: Some("Body".into()),
body_html: None,
in_reply_to: None,
references: None,
attachments: vec![crate::types::OutgoingAttachment {
filename: "path\\file.pdf".into(),
content_type: "application/pdf".into(),
data: b"data".to_vec(),
}],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("path\\file.pdf"),
"Round-trip filename with backslash must be preserved"
);
}
#[test]
fn build_then_parse_filename_with_quote_round_trip() {
let email = crate::types::OutgoingEmail {
from: Address {
name: None,
email: "a@b.com".into(),
},
to: vec![Address {
name: None,
email: "to@b.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: None,
subject: "test".into(),
body_text: Some("Body".into()),
body_html: None,
in_reply_to: None,
references: None,
attachments: vec![crate::types::OutgoingAttachment {
filename: "file\"name.pdf".into(),
content_type: "application/pdf".into(),
data: b"data".to_vec(),
}],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("file\"name.pdf"),
"Round-trip filename with double-quote must be preserved"
);
}
#[test]
fn extract_param_with_non_ascii_before_param() {
let header_value = "attachment; description=\"\u{0130}stanbul\"; filename=\"report.pdf\"";
let result = extract_param(header_value, "filename");
assert_eq!(
result.as_deref(),
Some("report.pdf"),
"extract_param must work when non-ASCII chars that change byte length \
under Unicode lowercasing appear before the target parameter (RFC 6532)"
);
}
#[test]
fn parse_date_rejects_invalid_hour() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 25:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with hour=25 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_minute() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 12:60:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with minute=60 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_second() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 12:00:61 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with second=61 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_invalid_day() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 32 Feb 2025 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with day=32 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_rejects_day_zero() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 00 Feb 2025 12:00:00 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.date.is_none(),
"Date with day=0 should be rejected per RFC 5322 Section 3.3"
);
}
#[test]
fn parse_date_allows_leap_second() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 30 Jun 2015 23:59:60 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Leap second (60) should be accepted");
assert_eq!(date.second, 60);
}
#[test]
fn parse_date_with_comment_between_tokens() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 (February) Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed
.date
.expect("Date with CFWS comment must parse per RFC 5322 Section 4.3");
assert_eq!(date.year, 2025);
assert_eq!(date.month, 2);
assert_eq!(date.day, 13);
}
#[test]
fn parse_date_with_trailing_comment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC)\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Date with trailing comment must parse");
assert_eq!(date.year, 2025);
assert_eq!(date.tz_offset_minutes, 0);
}
#[test]
fn parse_date_with_nested_comments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC (nested))\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let date = parsed.date.expect("Date with nested comment must parse");
assert_eq!(date.year, 2025);
}
#[test]
fn parse_display_name_ending_with_escaped_quote() {
let raw = b"From: \"She said \\\"hello\\\"\" <she@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from.name.as_deref(),
Some("She said \"hello\""),
"Display name ending with escaped quote must be parsed correctly \
per RFC 5322 Section 3.2.4"
);
}
#[test]
fn address_from_str_ending_with_escaped_quote() {
let addr: Address = "\"She said \\\"hello\\\"\" <she@example.com>"
.parse()
.unwrap();
assert_eq!(
addr.name.as_deref(),
Some("She said \"hello\""),
"Address::from_str must handle display names ending with escaped quotes"
);
}
#[test]
fn boundary_must_be_at_line_start() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
\r\n\
--BOUND\r\n\
Content-Type: text/plain\r\n\
\r\n\
This line mentions --BOUND in the middle\r\n\
--BOUND--";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.as_deref().unwrap_or("");
assert!(
text.contains("--BOUND"),
"Mid-line boundary must be treated as literal text per RFC 2046 Section 5.1.1, \
but body_text was: {text:?}"
);
}
#[test]
fn mime_type_exact_match_not_prefix() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plaintext\r\n\
\r\n\
Not really plain text\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"text/plaintext must not be treated as text/plain body"
);
assert_eq!(
parsed.attachments.len(),
1,
"text/plaintext should be treated as an attachment"
);
}
#[test]
fn parse_single_part_non_text_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: image/jpeg\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
/9j/4AAQSkZJRg==";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"image/jpeg single-part must not populate body_text"
);
assert_eq!(
parsed.attachments.len(),
1,
"image/jpeg single-part must be treated as an attachment"
);
assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
}
#[test]
fn parse_single_part_application_pdf_is_attachment() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: application/pdf; name=\"doc.pdf\"\r\n\
Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
JVBERi0xLjQK";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"application/pdf must not populate body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "application/pdf");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
assert!(!parsed.attachments[0].is_inline);
}
#[test]
fn parse_single_part_text_plain_with_attachment_disposition() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Disposition: attachment; filename=\"log.txt\"\r\n\
\r\n\
Server log data here";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"text/plain with disposition:attachment must not populate body_text"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "text/plain");
assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
}
#[test]
fn parse_group_address_empty_undisclosed() {
let raw = b"From: a@b.com\r\n\
To: undisclosed-recipients:;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.to.is_empty(),
"empty group undisclosed-recipients:; must produce no addresses, got {:?}",
parsed.to
);
}
#[test]
fn parse_group_address_with_members() {
let raw = b"From: a@b.com\r\n\
To: friends:one@x.com, two@x.com;\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"group with 2 members must produce 2 addresses, got {:?}",
parsed.to
);
assert_eq!(parsed.to[0].email, "one@x.com");
assert_eq!(parsed.to[1].email, "two@x.com");
}
#[test]
fn parse_group_address_mixed_with_regular() {
let raw = b"From: a@b.com\r\n\
To: solo@x.com, friends:one@x.com, two@x.com;, last@x.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
let emails: Vec<&str> = parsed.to.iter().map(|a| a.email.as_str()).collect();
assert_eq!(
emails,
vec!["solo@x.com", "one@x.com", "two@x.com", "last@x.com"],
"must extract all 4 addresses from mixed regular+group syntax"
);
}
#[test]
fn decode_qp_trailing_equals_is_soft_break() {
let result = decode_quoted_printable(b"Hello=");
assert_eq!(
result, b"Hello",
"trailing '=' must be treated as soft line break per RFC 2045 Section 6.7"
);
}
#[test]
fn decode_qp_trailing_equals_cr_is_soft_break() {
let result = decode_quoted_printable(b"Hello=\r");
assert_eq!(
result, b"Hello",
"trailing '=\\r' must be treated as soft line break"
);
}
#[test]
fn parse_bare_address_with_trailing_comment() {
let raw = b"From: sender@example.com\r\n\
To: user@example.com (Display Name)\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 1);
assert_eq!(
parsed.to[0].email, "user@example.com",
"email must not contain the trailing comment"
);
assert_eq!(
parsed.to[0].name.as_deref(),
Some("Display Name"),
"trailing comment should become display name per RFC 5322 Section 3.4.1"
);
}
#[test]
fn parse_bare_address_with_leading_comment() {
let raw = b"From: sender@example.com\r\n\
To: (Comment) user@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.to.len(), 1);
assert_eq!(
parsed.to[0].email, "user@example.com",
"email must not contain the leading comment"
);
assert_eq!(
parsed.to[0].name, None,
"leading comment must not become display name"
);
}
#[test]
fn extract_param_skips_quoted_values() {
let header = "text/html; boundary=\"has charset=bad inside\"; charset=utf-8";
let charset = extract_param(header, "charset");
assert_eq!(
charset.as_deref(),
Some("utf-8"),
"Should skip match inside quoted boundary value"
);
}
#[test]
fn multipart_part_without_charset_uses_us_ascii_default() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Hello \x93World\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
let text = parsed.body_text.unwrap();
assert!(
text.contains('\u{201c}'),
"Part with text/plain (no charset) should use US-ASCII default per \
RFC 2045 Section 5.2, decoding 0x93 as U+201C. Got: {text:?}"
);
assert!(
!text.contains('\u{FFFD}'),
"Part with text/plain (no charset) should not produce UTF-8 replacement \
characters. Got: {text:?}"
);
}
#[test]
fn parse_header_unfold_preserves_trailing_whitespace() {
let raw = b"From: a@b.com\r\nSubject: Hello \r\n World\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.subject.as_deref(),
Some("Hello World"),
"Trailing whitespace on first line must be preserved during unfolding \
(RFC 5322 Section 2.2.3)"
);
}
#[test]
fn parse_single_part_body_no_trailing_crlf() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
\r\n\
Hello, World!\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello, World!"),
"Single-part body text must not include trailing CRLF"
);
}
#[test]
fn parse_single_part_html_no_trailing_crlf() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/html; charset=utf-8\r\n\
\r\n\
<p>Hello</p>\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_html.as_deref(),
Some("<p>Hello</p>"),
"Single-part HTML body must not include trailing CRLF"
);
}
#[test]
fn round_trip_single_part_body_text() {
let email = crate::types::OutgoingEmail {
from: crate::types::Address {
name: None,
email: "a@b.com".into(),
},
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: None,
subject: "Test".into(),
body_text: Some("Hello, World!".into()),
body_html: None,
in_reply_to: None,
references: None,
attachments: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello, World!"),
"Single-part body text must round-trip without trailing CRLF"
);
}
#[test]
fn parse_encoded_word_display_name_with_comma() {
let raw = b"From: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.from.name.as_deref(),
Some("John, Doe"),
"RFC 2047 encoded display name with comma must be preserved \
(RFC 2047 Section 5 rule 3): decode AFTER address parsing"
);
assert_eq!(parsed.from.email, "john@example.com");
}
#[test]
fn parse_base64_body_ignores_non_alphabet_chars() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVs!bG8#gV29~ybGQ=";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"RFC 2045 Section 6.8: non-alphabet characters must be ignored in base64 data"
);
}
#[test]
fn parse_encoded_word_display_name_with_comma_in_to() {
let raw = b"From: sender@example.com\r\n\
To: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>, other@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.to.len(),
2,
"Must parse exactly 2 addresses, not 3 (encoded comma is not a separator)"
);
assert_eq!(
parsed.to[0].name.as_deref(),
Some("John, Doe"),
"First recipient display name must be 'John, Doe'"
);
assert_eq!(parsed.to[0].email, "john@example.com");
assert_eq!(parsed.to[1].email, "other@example.com");
}
#[test]
fn round_trip_empty_body_text_is_none() {
let email = crate::types::OutgoingEmail {
from: crate::types::Address {
name: None,
email: "a@b.com".into(),
},
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: None,
subject: "Empty body".into(),
body_text: None,
body_html: None,
in_reply_to: None,
references: None,
attachments: vec![crate::types::OutgoingAttachment {
filename: "test.txt".into(),
content_type: "text/plain".into(),
data: b"attachment data".to_vec(),
}],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_text, None,
"Empty body_text must round-trip as None, not Some(\"\")"
);
}
#[test]
fn round_trip_empty_body_html_in_alternative_is_none() {
let email = crate::types::OutgoingEmail {
from: crate::types::Address {
name: None,
email: "a@b.com".into(),
},
to: vec![crate::types::Address {
name: None,
email: "c@d.com".into(),
}],
cc: vec![],
bcc: vec![],
reply_to: None,
subject: "Text only".into(),
body_text: Some("Plain text".into()),
body_html: Some(String::new()),
in_reply_to: None,
references: None,
attachments: vec![],
};
let built = crate::build_message(&email).unwrap();
let parsed = parse_email(&built.raw).unwrap();
assert_eq!(
parsed.body_html, None,
"Empty body_html must parse as None, not Some(\"\")"
);
assert_eq!(
parsed.body_text.as_deref(),
Some("Plain text"),
"body_text must be preserved"
);
}
#[test]
fn extract_mime_type_strips_rfc5322_comments() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain (this is a comment); charset=utf-8\r\n\
\r\n\
Hello with comment";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello with comment"),
"Body must be extracted as body_text when Content-Type has an RFC 5322 comment"
);
assert!(
parsed.attachments.is_empty(),
"No attachments expected for a plain text/plain message with a comment"
);
}
#[test]
fn multipart_digest_default_content_type_is_message_rfc822() {
let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Subject: Digest\r\n\
Content-Type: multipart/digest; boundary=\"digestboundary\"\r\n\
\r\n\
--digestboundary\r\n\
\r\n\
From: nested@example.com\r\n\
Subject: Nested message\r\n\
\r\n\
Nested body text\r\n\
--digestboundary--\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"multipart/digest parts without Content-Type should default to \
message/rfc822, not text/plain — body_text should be None"
);
assert_eq!(
parsed.attachments.len(),
1,
"multipart/digest part should be treated as message/rfc822 attachment"
);
assert_eq!(
parsed.attachments[0].content_type, "message/rfc822",
"default Content-Type in multipart/digest must be message/rfc822 \
(RFC 2046 Section 5.1.5)"
);
}
#[test]
fn content_id_whitespace_inside_brackets_trimmed() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: image/png\r\n\
Content-ID: < cid@example.com >\r\n\
\r\n\
PNG\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.attachments[0].content_id.as_deref(),
Some("cid@example.com"),
"Content-ID must be trimmed after bracket stripping (RFC 2392)"
);
let raw_single = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: image/png\r\n\
Content-ID: < cid2@example.com >\r\n\
\r\n\
PNG";
let parsed_single = parse_email(raw_single).unwrap();
assert_eq!(
parsed_single.attachments[0].content_id.as_deref(),
Some("cid2@example.com"),
"Content-ID in single-part message must be trimmed (RFC 2392)"
);
}
#[test]
fn parse_headers_only_all_fields_verified() {
let raw = b"From: sender@example.com\r\n\
To: to@example.com\r\n\
Cc: cc@example.com\r\n\
Bcc: bcc@example.com\r\n\
Reply-To: reply@example.com\r\n\
Subject: Full test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Message-ID: <msg1@example.com>\r\n\
In-Reply-To: <parent@example.com>\r\n\
References: <ref1@example.com> <ref2@example.com>\r\n\
\r\n\
Body that should be ignored";
let parsed = parse_headers_only(raw).unwrap();
assert_eq!(parsed.from.email, "sender@example.com");
assert_eq!(parsed.to.len(), 1);
assert_eq!(parsed.to[0].email, "to@example.com");
assert_eq!(parsed.cc.len(), 1);
assert_eq!(parsed.cc[0].email, "cc@example.com");
assert_eq!(parsed.bcc.len(), 1);
assert_eq!(parsed.bcc[0].email, "bcc@example.com");
assert_eq!(parsed.reply_to.len(), 1);
assert_eq!(parsed.reply_to[0].email, "reply@example.com");
assert_eq!(parsed.subject.as_deref(), Some("Full test"));
assert!(parsed.date.is_some());
assert_eq!(parsed.message_id.as_deref(), Some("msg1@example.com"));
assert_eq!(parsed.in_reply_to.as_deref(), Some("parent@example.com"));
assert_eq!(
parsed.references.as_deref(),
Some("ref1@example.com ref2@example.com")
);
assert!(parsed.body_text.is_none());
assert!(parsed.body_html.is_none());
assert!(parsed.attachments.is_empty());
}
#[test]
fn parse_missing_optional_headers_returns_none_or_empty() {
let raw = b"From: a@b.com\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "a@b.com");
assert!(parsed.subject.is_none());
assert!(parsed.date.is_none());
assert!(parsed.message_id.is_none());
assert!(parsed.in_reply_to.is_none());
assert!(parsed.references.is_none());
assert!(parsed.to.is_empty());
assert!(parsed.cc.is_empty());
assert!(parsed.bcc.is_empty());
assert!(parsed.reply_to.is_empty());
}
#[test]
fn extract_param_rejects_substring_match() {
let value = "attachment; xfilename=\"bad.pdf\"; filename=\"good.pdf\"";
let result = extract_param(value, "filename");
assert_eq!(
result.as_deref(),
Some("good.pdf"),
"Must not match xfilename as filename"
);
}
#[test]
fn extract_param_rejects_suffix_only_match() {
let value = "attachment; notfilename=\"only.pdf\"";
let result = extract_param(value, "filename");
assert!(
result.is_none(),
"Must not match 'filename' inside 'notfilename'"
);
}
#[test]
fn parse_group_address_empty() {
let addrs = parse_address_list("undisclosed-recipients:;");
assert!(
addrs.is_empty(),
"empty group must produce no addresses, got {addrs:?}"
);
}
#[test]
fn parse_group_address_with_two_members() {
let addrs = parse_address_list("Friends: a@x.com, b@x.com;");
assert_eq!(addrs.len(), 2, "group with 2 members: {addrs:?}");
assert_eq!(addrs[0].email, "a@x.com");
assert_eq!(addrs[1].email, "b@x.com");
}
#[test]
fn parse_multiple_groups_and_solo() {
let addrs =
parse_address_list("Team A: a1@x.com, a2@x.com;, Team B: b1@x.com;, solo@x.com");
assert_eq!(addrs.len(), 4, "2 groups + 1 solo: {addrs:?}");
assert_eq!(addrs[0].email, "a1@x.com");
assert_eq!(addrs[1].email, "a2@x.com");
assert_eq!(addrs[2].email, "b1@x.com");
assert_eq!(addrs[3].email, "solo@x.com");
}
#[test]
fn parse_address_comment_with_comma_audit() {
let addrs = parse_address_list("user@x.com (Last, First), other@x.com");
assert_eq!(
addrs.len(),
2,
"comma inside comment must not split: {addrs:?}"
);
assert_eq!(addrs[0].email, "user@x.com");
assert_eq!(addrs[1].email, "other@x.com");
}
#[test]
fn rfc2231_continuation_gap_stops() {
let header = "attachment; filename*0=\"hello\"; filename*2=\"skipped\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("hello"),
"continuation must stop at missing section index"
);
}
#[test]
fn rfc2231_continuation_single_section() {
let header = "attachment; filename*0=\"report.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(result.as_deref(), Some("report.pdf"));
}
#[test]
fn rfc2231_continuation_no_charset_defaults_to_utf8() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"b\"\r\n\
\r\n\
--b\r\n\
Content-Type: text/plain\r\n\
\r\n\
Body\r\n\
--b\r\n\
Content-Type: application/octet-stream\r\n\
Content-Disposition: attachment; filename*0=\"annual_\"; filename*1=\"report_\"; filename*2=\"2025.pdf\"\r\n\
\r\n\
DATA\r\n\
--b--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(
parsed.attachments[0].filename.as_deref(),
Some("annual_report_2025.pdf"),
"RFC 2231 continuation without charset should decode as UTF-8"
);
}
#[test]
fn base64_with_embedded_spaces() {
let data = b"SGVs bG8g V29y bGQ=";
let decoded = decode_transfer_encoding(data, "base64");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"base64 decoder must strip non-alphabet characters (RFC 2045 Section 6.8)"
);
}
#[test]
fn base64_with_tabs() {
let data = b"SGVs\tbG8g\tV29ybGQ=";
let decoded = decode_transfer_encoding(data, "base64");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"base64 decoder must strip tabs (RFC 2045 Section 6.8)"
);
}
#[test]
fn qp_trailing_equals_stripped() {
let data = b"Hello=";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello",
"trailing '=' is a soft break (RFC 2045 Section 6.7)"
);
}
#[test]
fn qp_malformed_hex_passthrough() {
let data = b"Hello=ZZ World";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=ZZ World",
"malformed =ZZ must pass through literally (Postel's law)"
);
}
#[test]
fn q_encoding_malformed_hex_passthrough() {
let decoded = decode_q_encoding("Hello=ZZWorld");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=ZZWorld",
"malformed =ZZ in Q-encoding must pass through literally"
);
}
#[test]
fn q_encoding_trailing_equals() {
let decoded = decode_q_encoding("Hello=");
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello=",
"trailing '=' in Q-encoding must pass through literally"
);
}
#[test]
fn multipart_digest_default_content_type_full_email() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/digest; boundary=\"dg\"\r\n\
\r\n\
--dg\r\n\
\r\n\
From: nested@example.com\r\n\
Subject: Inner\r\n\
\r\n\
Inner body\r\n\
--dg--";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_none(),
"digest part must NOT be treated as text/plain"
);
assert_eq!(parsed.attachments.len(), 1);
assert_eq!(parsed.attachments[0].content_type, "message/rfc822");
}
#[test]
fn parse_headers_leading_space_skipped() {
let raw = b" continuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "a@b.com");
assert_eq!(parsed.body_text.as_deref(), Some("Body"));
}
#[test]
fn parse_headers_leading_tab_skipped() {
let raw = b"\tcontinuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.from.email, "a@b.com");
}
#[test]
fn encoded_word_bad_base64_passthrough() {
let input = "=?UTF-8?B?=====?=";
let result = decode_encoded_words(input);
assert!(
result.contains("=?"),
"Bad base64 encoded word should pass through literally, got: {result:?}"
);
}
#[test]
fn encoded_word_unknown_charset_fallback() {
let input = "=?x-nonexistent-charset?B?SGVsbG8=?=";
let result = decode_encoded_words(input);
assert!(
result.contains("Hello"),
"Unknown charset should fall back to UTF-8, got: {result:?}"
);
}
#[test]
fn encoded_word_truncated_no_closing() {
let input = "Start =?UTF-8?B?SGVsbG8= End";
let result = decode_encoded_words(input);
assert!(
result.contains("=?"),
"Truncated encoded word should pass through, got: {result:?}"
);
}
#[test]
fn rfc2231_continuation_mixed_encoded_and_plain() {
let header = "attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1=\"e.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(
result.as_deref(),
Some("r\u{e9}sume.pdf"),
"RFC 2231 mixed encoded/plain continuation should reassemble correctly"
);
}
#[test]
fn rfc2231_continuation_three_sections() {
let header =
"attachment; filename*0=\"part1_\"; filename*1=\"part2_\"; filename*2=\"part3.pdf\"";
let result = extract_rfc2231_continuation(header, "filename");
assert_eq!(result.as_deref(), Some("part1_part2_part3.pdf"));
}
#[test]
fn extract_param_unterminated_quoted_value() {
let header = "text/plain; charset=\"utf-8";
let result = extract_param(header, "charset");
assert_eq!(
result.as_deref(),
Some("utf-8"),
"Unterminated quoted-string should extract to end of string"
);
}
#[test]
fn extract_param_quoted_with_backslash_escape() {
let header = "attachment; filename=\"file\\\\name.txt\"";
let result = extract_param(header, "filename");
assert_eq!(
result.as_deref(),
Some("file\\name.txt"),
"Backslash escape in quoted param value must be unescaped"
);
}
#[test]
fn extract_param_empty_quoted_value() {
let header = "attachment; filename=\"\"";
let result = extract_param(header, "filename");
assert!(
result.is_none(),
"Empty quoted-string value should return None, got: {result:?}"
);
}
#[test]
fn extract_comment_text_nested_parens() {
let result = extract_comment_text("(outer (inner) text)");
assert_eq!(
result.as_deref(),
Some("outer (inner) text"),
"Nested parens should be included in comment text"
);
}
#[test]
fn extract_comment_text_escaped_chars() {
let result = extract_comment_text("(hello \\(world\\))");
assert_eq!(
result.as_deref(),
Some("hello (world)"),
"Escaped parens inside comments should be unescaped"
);
}
#[test]
fn extract_comment_text_empty() {
let result = extract_comment_text("()");
assert!(result.is_none(), "Empty comment should return None");
}
#[test]
fn extract_comment_text_no_paren() {
let result = extract_comment_text("not a comment");
assert!(
result.is_none(),
"Non-parenthesized input should return None"
);
}
#[test]
fn strip_comments_nested_and_escaped() {
let result = strip_comments("Hello (outer (inner) comment) World");
assert_eq!(result, "Hello World");
let result = strip_comments("Hello (comment with \\) escaped) World");
assert_eq!(result, "Hello World");
let result = strip_comments("Hello \\\\ World");
assert_eq!(result, "Hello \\\\ World");
let result = strip_comments("Before (escaped \\( paren) After");
assert_eq!(result, "Before After");
}
#[test]
fn strip_comments_escaped_outside_comment() {
let result = strip_comments("no \\(comment\\) here");
assert_eq!(
result, "no \\(comment\\) here",
"Escaped parens outside comments should not open/close comments"
);
}
#[test]
fn parse_date_too_few_parts() {
assert!(
parse_rfc5322_date("13 Feb").is_none(),
"Date with too few parts should return None"
);
}
#[test]
fn parse_date_time_no_colon() {
assert!(
parse_rfc5322_date("13 Feb 2025 1547 +0000").is_none(),
"Time without colon should return None"
);
}
#[test]
fn parse_date_unknown_month() {
assert!(
parse_rfc5322_date("13 Foo 2025 12:00:00 +0000").is_none(),
"Unknown month name should return None"
);
}
#[test]
fn parse_date_completely_malformed() {
assert!(parse_rfc5322_date("not a date at all").is_none());
assert!(parse_rfc5322_date("").is_none());
assert!(parse_rfc5322_date(" ").is_none());
}
#[test]
fn parse_date_unknown_timezone_defaults_zero() {
let dt = parse_rfc5322_date("13 Feb 2025 12:00:00 ZULU").unwrap();
assert_eq!(
dt.tz_offset_minutes, 0,
"Unknown timezone abbreviation should default to +0000"
);
}
#[test]
fn parse_date_non_numeric_day() {
assert!(
parse_rfc5322_date("XX Feb 2025 12:00:00 +0000").is_none(),
"Non-numeric day should return None"
);
}
#[test]
fn parse_date_non_numeric_year() {
assert!(
parse_rfc5322_date("13 Feb XXXX 12:00:00 +0000").is_none(),
"Non-numeric year should return None"
);
}
#[test]
fn split_mime_parts_lf_only_boundaries() {
let body = b"--boundary\nContent-Type: text/plain\n\nPart 1\n--boundary\nContent-Type: text/plain\n\nPart 2\n--boundary--";
let parts = split_mime_parts(body, "boundary");
assert_eq!(
parts.len(),
2,
"Should find 2 parts with LF-only boundaries"
);
}
#[test]
fn split_mime_parts_boundary_at_start() {
let body = b"--b\r\nContent-Type: text/plain\r\n\r\nOnly part\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(
parts.len(),
1,
"Should find 1 part when boundary is at start"
);
let text = String::from_utf8_lossy(parts[0]);
assert!(text.contains("Only part"));
}
#[test]
fn split_mime_parts_midline_boundary_ignored() {
let body =
b"--b\r\nContent-Type: text/plain\r\n\r\nText mentioning --b in the middle\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(parts.len(), 1, "Mid-line boundary must not split");
let text = String::from_utf8_lossy(parts[0]);
assert!(
text.contains("--b in the middle"),
"Mid-line boundary text should be preserved"
);
}
#[test]
fn split_mime_parts_boundary_with_trailing_whitespace() {
let body = b"--b \t\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--b--";
let parts = split_mime_parts(body, "b");
assert_eq!(
parts.len(),
1,
"Boundary with trailing whitespace should be recognized"
);
}
#[test]
fn split_mime_parts_boundary_not_at_line_start_skipped() {
let body = b"--bound\r\n\r\nSome text has --bound embedded\r\n--bound--";
let parts = split_mime_parts(body, "bound");
assert_eq!(parts.len(), 1);
let text = String::from_utf8_lossy(parts[0]);
assert!(text.contains("--bound embedded"));
}
#[test]
fn parse_quoted_transfer_encoding() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: \"base64\"\r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert!(
parsed.body_text.is_some(),
"Message with quoted CTE should still produce body_text"
);
}
#[test]
fn parse_transfer_encoding_with_whitespace() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: base64 \r\n\
\r\n\
SGVsbG8gV29ybGQ=\r\n";
let parsed = parse_email(raw).unwrap();
assert_eq!(
parsed.body_text.as_deref(),
Some("Hello World"),
"CTE with whitespace should still decode correctly"
);
}
#[test]
fn qp_soft_line_break_crlf() {
let data = b"Hello=\r\n World";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hello World",
"=\\r\\n soft break should be removed (RFC 2045 Section 6.7)"
);
}
#[test]
fn qp_soft_line_break_lf_only() {
let data = b"Hello=\nWorld";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"HelloWorld",
"=\\n soft break should be removed"
);
}
#[test]
fn qp_soft_break_lf_at_end() {
let data = b"Hi=\n";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"Hi",
"=\\n at end of data should be a soft break"
);
}
#[test]
fn qp_invalid_hex_passthrough() {
let data = b"=GG=4F=4B";
let decoded = decode_quoted_printable(data);
assert_eq!(
std::str::from_utf8(&decoded).unwrap(),
"=GGOK",
"Invalid hex =GG should pass through, valid =4F=4B should decode"
);
}
#[test]
fn base64_empty_body() {
let decoded = decode_transfer_encoding(b"", "base64");
assert!(
decoded.is_empty(),
"Empty base64 input should produce empty output"
);
}
#[test]
fn base64_whitespace_only() {
let decoded = decode_transfer_encoding(b" \r\n \r\n", "base64");
assert!(
decoded.is_empty(),
"Whitespace-only base64 input should produce empty output"
);
}
#[test]
fn find_closing_quote_unterminated() {
assert_eq!(find_closing_quote("no closing quote here"), 21);
}
#[test]
fn find_closing_quote_skips_escaped() {
assert_eq!(find_closing_quote("hello\\\"world\""), 12);
}
#[test]
fn multipart_crlf_before_boundary() {
let raw = b"From: a@b.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/mixed; boundary=\"mp\"\r\n\
\r\n\
--mp\r\n\
Content-Type: text/plain\r\n\
\r\n\
Part A\r\n\
--mp\r\n\
Content-Type: text/html\r\n\
\r\n\
<b>Part B</b>\r\n\
--mp--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}
#[test]
fn multipart_lf_only_before_boundary() {
let raw = b"From: a@b.com\nDate: Thu, 13 Feb 2025 15:47:33 +0000\nContent-Type: multipart/mixed; boundary=\"mp\"\n\n--mp\nContent-Type: text/plain\n\nPart A\n--mp\nContent-Type: text/html\n\n<b>Part B</b>\n--mp--";
let parsed = parse_email(raw).unwrap();
assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}
#[test]
fn multipart_truncated_no_closing() {
let body = b"--bnd\r\n\r\nFirst part\r\n--bnd\r\n\r\nSecond part with no closing boundary";
let parts = split_mime_parts(body, "bnd");
assert_eq!(
parts.len(),
2,
"Should find 2 parts even without closing boundary"
);
let text2 = String::from_utf8_lossy(parts[1]);
assert!(text2.contains("Second part"));
}
#[test]
fn strip_comments_escaped_paren_inside_comment() {
let result = strip_comments("Before (escaped \\) paren) After");
assert_eq!(
result.trim(),
"Before After",
"Escaped close-paren inside comment must not end the comment"
);
}
#[test]
fn strip_comments_escaped_open_paren_inside_comment() {
let result = strip_comments("X (comment \\( not nested) Y");
assert_eq!(
result.trim(),
"X Y",
"Escaped open-paren inside comment must not increase nesting depth"
);
}
#[test]
fn hex_digit_lowercase() {
assert_eq!(hex_digit(b'a'), Some(10));
assert_eq!(hex_digit(b'f'), Some(15));
assert_eq!(hex_digit(b'c'), Some(12));
assert_eq!(hex_digit(b'A'), Some(10));
assert_eq!(hex_digit(b'F'), Some(15));
assert_eq!(hex_digit(b'0'), Some(0));
assert_eq!(hex_digit(b'9'), Some(9));
assert_eq!(hex_digit(b'g'), None);
assert_eq!(hex_digit(b'G'), None);
assert_eq!(hex_digit(b' '), None);
}
#[test]
fn qp_lowercase_hex_digits() {
let data = b"caf=c3=a9";
let decoded = decode_quoted_printable(data);
assert_eq!(decoded, b"caf\xc3\xa9");
let text = String::from_utf8_lossy(&decoded);
assert_eq!(
text, "caf\u{e9}",
"Lowercase hex digits in QP should decode correctly (RFC 2045 Section 6.7)"
);
}
#[test]
fn decode_hex_pair_lowercase() {
assert_eq!(decode_hex_pair(b'f', b'f'), Some(0xFF));
assert_eq!(decode_hex_pair(b'a', b'0'), Some(0xA0));
assert_eq!(decode_hex_pair(b'0', b'a'), Some(0x0A));
}
#[test]
fn parse_single_address_empty_angle_brackets() {
let result = parse_single_address("Display Name <>");
assert!(
result.is_none(),
"Empty angle brackets should not produce an address"
);
}
#[test]
fn parse_single_address_reversed_angles() {
let result = parse_single_address(">bad<user@example.com");
assert!(result.is_some());
}
#[test]
fn parse_single_address_no_at_no_brackets() {
let result = parse_single_address("just plain text");
assert!(
result.is_none(),
"Text without @ or <> should not produce an address"
);
}
#[test]
fn is_inside_quotes_with_escapes() {
assert!(is_inside_quotes("\"hello \\\" world\"end", 15));
assert!(!is_inside_quotes("\"hello\"", 0));
assert!(!is_inside_quotes("\"hello\" world", 8));
}
#[test]
fn strip_outer_quotes_short_input() {
assert_eq!(strip_outer_quotes("\""), "\"");
assert_eq!(strip_outer_quotes(""), "");
assert_eq!(strip_outer_quotes("x"), "x");
}
#[test]
fn strip_outer_quotes_one_sided() {
assert_eq!(strip_outer_quotes("\"hello"), "\"hello");
assert_eq!(strip_outer_quotes("hello\""), "hello\"");
}
#[test]
fn split_header_body_starts_with_lf() {
let (headers, body) = split_header_body(b"\nBody text here");
assert!(
headers.is_empty(),
"Headers should be empty when input starts with \\n"
);
assert_eq!(body, b"Body text here");
}
#[test]
fn split_header_body_starts_with_crlf() {
let (headers, body) = split_header_body(b"\r\nBody text here");
assert!(
headers.is_empty(),
"Headers should be empty when input starts with \\r\\n"
);
assert_eq!(body, b"Body text here");
}
#[test]
fn decode_body_strips_trailing_lf_only() {
let result = decode_body(b"Hello\n", "", "text/plain; charset=utf-8");
assert_eq!(result, "Hello", "Trailing bare LF should be stripped");
}
#[test]
fn decode_body_no_trailing_newline() {
let result = decode_body(b"Hello", "", "text/plain; charset=utf-8");
assert_eq!(
result, "Hello",
"No trailing newline should leave content unchanged"
);
}
#[test]
fn percent_decode_lowercase_hex() {
let decoded = percent_decode("%c3%a9");
assert_eq!(decoded, vec![0xC3, 0xA9]);
}
#[test]
fn percent_decode_invalid_hex() {
let decoded = percent_decode("%ZZ");
assert_eq!(decoded, b"%ZZ");
}
#[test]
fn percent_decode_truncated() {
let decoded = percent_decode("hello%2");
assert_eq!(decoded, b"hello%2");
}
#[test]
fn parse_address_colon_with_at_sign() {
let addrs = parse_address_list("user:tag@example.com");
assert!(!addrs.is_empty(), "Should parse at least one address");
}
}