mod address;
mod date;
mod encoded_words;
mod message_id;
mod mime;
mod params;
use crate::error::Error;
use crate::types::{Address, DateTime, ParsedEmail};
use super::wire::WireMessage;
pub(crate) use address::find_paren_outside_quotes;
pub(crate) use address::normalize_display_name_phrase;
pub use address::parse_address_list;
pub(crate) use address::strip_comments;
pub(crate) use date::parse_rfc5322_date;
pub(crate) use encoded_words::decode_encoded_words;
#[cfg(test)]
pub(super) use address::contains_at_outside_quotes;
#[cfg(test)]
pub(super) use address::extract_comment_text;
#[cfg(test)]
pub(super) use address::parse_single_address;
#[cfg(test)]
pub(super) use address::unescape_quoted_string;
#[cfg(test)]
pub(super) use date::parse_timezone;
#[cfg(test)]
pub(super) use date::parse_year;
#[cfg(test)]
pub(super) use encoded_words::decode_q_encoding;
#[cfg(test)]
pub(super) use mime::decode_body;
#[cfg(test)]
pub(super) use mime::decode_quoted_printable;
#[cfg(test)]
pub(super) use mime::decode_transfer_encoding;
#[cfg(test)]
pub(super) use params::decode_hex_pair;
#[cfg(test)]
pub(super) use params::extract_filename;
#[cfg(test)]
pub(super) use params::extract_mime_type;
#[cfg(test)]
pub(super) use params::extract_param;
#[cfg(test)]
pub(super) use params::extract_rfc2231_continuation;
#[cfg(test)]
pub(super) use params::extract_rfc2231_param;
#[cfg(test)]
pub(super) use params::find_closing_quote;
#[cfg(test)]
pub(super) use params::find_param_value;
#[cfg(test)]
pub(super) use params::hex_digit;
#[cfg(test)]
pub(super) use params::is_disposition_type;
#[cfg(test)]
pub(super) use params::is_inside_quotes;
#[cfg(test)]
pub(super) use params::percent_decode;
#[cfg(test)]
pub(super) use params::strip_outer_quotes;
pub(super) const MAX_MIME_DEPTH: u32 = 64;
pub(super) const LENIENT_BASE64: base64::engine::GeneralPurpose =
base64::engine::GeneralPurpose::new(
&base64::alphabet::STANDARD,
base64::engine::GeneralPurposeConfig::new()
.with_decode_padding_mode(base64::engine::DecodePaddingMode::Indifferent),
);
#[derive(Default)]
struct HeaderFields {
message_id: Option<String>,
in_reply_to: Vec<String>,
references: Vec<String>,
subject: Option<String>,
from: Vec<Address>,
sender: Option<Address>,
to: Vec<Address>,
cc: Vec<Address>,
bcc: Vec<Address>,
reply_to: Vec<Address>,
date: Option<DateTime>,
extra_headers: Vec<(String, String)>,
}
const WELL_KNOWN_HEADERS: &[&str] = &[
"from",
"to",
"cc",
"bcc",
"reply-to",
"sender",
"subject",
"date",
"message-id",
"in-reply-to",
"references",
"content-type",
"content-transfer-encoding",
"mime-version",
];
const STRUCTURED_HEADERS: &[&str] = &[
"content-disposition",
"content-id",
"received",
"return-path",
"resent-date",
"resent-message-id",
"dkim-signature",
"domainkey-signature",
"arc-seal",
"arc-message-signature",
"arc-authentication-results",
"authentication-results",
];
pub(crate) fn interpret(wire_msg: &WireMessage, headers_only: bool) -> Result<ParsedEmail, Error> {
let hf = if wire_msg.headerless {
HeaderFields::default()
} else {
extract_header_fields(&wire_msg.headers, &wire_msg.raw_headers)?
};
if headers_only {
return Ok(ParsedEmail {
message_id: hf.message_id,
in_reply_to: hf.in_reply_to,
references: hf.references,
subject: hf.subject,
from: hf.from,
sender: hf.sender,
to: hf.to,
cc: hf.cc,
bcc: hf.bcc,
reply_to: hf.reply_to,
date: hf.date,
body_text: None,
body_html: None,
attachments: Vec::new(),
raw_headers: wire_msg.raw_headers.clone(),
extra_headers: hf.extra_headers,
size: wire_msg.size,
});
}
let content_type = get_header_value(&wire_msg.headers, "content-type")
.unwrap_or_else(|| "text/plain; charset=us-ascii".to_string());
let transfer_encoding = get_header_value(&wire_msg.headers, "content-transfer-encoding")
.unwrap_or_else(|| "7bit".to_string());
let content_disposition =
get_header_value(&wire_msg.headers, "content-disposition").unwrap_or_default();
let content_id = get_header_value(&wire_msg.headers, "content-id");
let body_bytes = &wire_msg.body;
let (body_text, body_html, attachments) = if params::is_multipart(&content_type) {
match params::extract_boundary_for_body(&content_type, body_bytes) {
Some(boundary) => {
let mime_type = params::extract_mime_type(&content_type);
let is_digest = mime_type == "multipart/digest";
let is_alternative = mime_type == "multipart/alternative";
mime::walk_mime_tree(body_bytes, &boundary, "", 0, is_digest, is_alternative)
}
None => mime::extract_simple_body(
body_bytes,
"text/plain; charset=us-ascii",
&transfer_encoding,
&content_disposition,
content_id.as_deref(),
),
}
} else {
mime::extract_simple_body(
body_bytes,
&content_type,
&transfer_encoding,
&content_disposition,
content_id.as_deref(),
)
};
Ok(ParsedEmail {
message_id: hf.message_id,
in_reply_to: hf.in_reply_to,
references: hf.references,
subject: hf.subject,
from: hf.from,
sender: hf.sender,
to: hf.to,
cc: hf.cc,
bcc: hf.bcc,
reply_to: hf.reply_to,
date: hf.date,
body_text,
body_html,
attachments,
raw_headers: wire_msg.raw_headers.clone(),
extra_headers: hf.extra_headers,
size: wire_msg.size,
})
}
fn extract_header_fields(
headers: &[(String, String)],
raw_headers: &str,
) -> Result<HeaderFields, Error> {
if headers.is_empty() {
return Err(Error::MissingFrom);
}
let continuation_flags = header_body_starts_on_continuation_flags(raw_headers);
let extra_headers: Vec<(String, String)> = headers
.iter()
.zip(
continuation_flags
.iter()
.copied()
.chain(std::iter::repeat(false)),
)
.filter(|((k, _), _)| !WELL_KNOWN_HEADERS.contains(&k.as_str()))
.map(|((k, v), starts_on_continuation)| {
let normalized = if starts_on_continuation {
strip_leading_structural_wsp(v)
} else {
v.as_str()
};
let decoded = if STRUCTURED_HEADERS.contains(&k.as_str()) {
normalized.to_string()
} else {
decode_encoded_words(normalized)
};
(k.clone(), decoded)
})
.collect();
Ok(HeaderFields {
message_id: message_id::extract_message_id(headers),
in_reply_to: message_id::extract_in_reply_to(headers),
references: message_id::extract_references(headers),
subject: get_header_value_with_continuation_flag(headers, &continuation_flags, "subject")
.map(|(v, starts_on_continuation)| {
let normalized = if starts_on_continuation {
strip_leading_structural_wsp(&v)
} else {
v.as_str()
};
decode_encoded_words(normalized)
}),
from: address::extract_from(headers),
sender: address::extract_sender(headers),
to: address::extract_address_list(headers, "to"),
cc: address::extract_address_list(headers, "cc"),
bcc: address::extract_address_list(headers, "bcc"),
reply_to: address::extract_address_list(headers, "reply-to"),
date: date::extract_date(headers),
extra_headers,
})
}
pub(super) fn get_header_value(headers: &[(String, String)], name: &str) -> Option<String> {
headers
.iter()
.find(|(k, _)| k == name)
.map(|(_, v)| v.clone())
}
fn get_header_value_with_continuation_flag(
headers: &[(String, String)],
continuation_flags: &[bool],
name: &str,
) -> Option<(String, bool)> {
headers
.iter()
.enumerate()
.find(|(_, (k, _))| k == name)
.map(|(idx, (_, v))| {
(
v.clone(),
continuation_flags.get(idx).copied().unwrap_or(false),
)
})
}
fn strip_leading_structural_wsp(value: &str) -> &str {
value
.strip_prefix(' ')
.or_else(|| value.strip_prefix('\t'))
.unwrap_or(value)
}
fn header_body_starts_on_continuation_flags(raw_headers: &str) -> Vec<bool> {
let mut flags = Vec::new();
let mut lines = raw_headers.split('\n').peekable();
while let Some(line) = lines.next() {
let line = line.strip_suffix('\r').unwrap_or(line);
if line.is_empty() || line.starts_with(' ') || line.starts_with('\t') {
continue;
}
let Some(colon_pos) = line.find(':') else {
continue;
};
let field_name = line[..colon_pos].trim();
if crate::types::HeaderName::new(field_name).is_err() {
continue;
}
let raw_value = &line[colon_pos + 1..];
let starts_on_continuation = raw_value.bytes().all(|byte| byte == b' ' || byte == b'\t')
&& lines.peek().is_some_and(|next| {
let next = next.strip_suffix('\r').unwrap_or(next);
next.starts_with(' ') || next.starts_with('\t')
});
flags.push(starts_on_continuation);
}
flags
}