use crate::part::ParsedPart;
pub(crate) struct BodyStructure {
pub(crate) text_body: Vec<String>,
pub(crate) html_body: Vec<String>,
pub(crate) attachments: Vec<String>,
}
pub fn compute_body_structure(root: &ParsedPart) -> BodyStructure {
let mut text_body: Vec<String> = Vec::new();
let mut html_body: Vec<String> = Vec::new();
let mut attachments: Vec<String> = Vec::new();
parse_structure(
std::slice::from_ref(root),
"mixed",
false,
&mut Some(&mut text_body),
&mut Some(&mut html_body),
&mut attachments,
);
BodyStructure {
text_body,
html_body,
attachments,
}
}
fn is_inline_media_type(media_type: &str) -> bool {
media_type.starts_with("image/")
|| media_type.starts_with("audio/")
|| media_type.starts_with("video/")
}
fn parse_structure<'a>(
parts: &[ParsedPart],
multipart_type: &str,
in_alternative: bool,
text_body: &mut Option<&'a mut Vec<String>>,
html_body: &mut Option<&'a mut Vec<String>>,
attachments: &mut Vec<String>,
) {
let text_length_at_entry: usize = text_body.as_ref().map_or(0, |v| v.len());
let html_length_at_entry: usize = html_body.as_ref().map_or(0, |v| v.len());
for (i, part) in parts.iter().enumerate() {
let is_multipart = part.content_type.starts_with("multipart/");
let is_inline = part
.disposition
.as_deref()
.is_none_or(|d| !d.eq_ignore_ascii_case("attachment"))
&& (part.content_type == "text/plain"
|| part.content_type == "text/html"
|| is_inline_media_type(&part.content_type))
&& (i == 0
|| (multipart_type != "related"
&& (is_inline_media_type(&part.content_type) || part.filename.is_none())));
if is_multipart {
let sub_multipart_type = part
.content_type
.split_once('/')
.map(|(_, sub)| sub)
.unwrap_or("mixed");
let new_in_alternative = in_alternative || sub_multipart_type == "alternative";
parse_structure(
&part.children,
sub_multipart_type,
new_in_alternative,
text_body,
html_body,
attachments,
);
} else if is_inline {
if multipart_type == "alternative" {
match part.content_type.as_str() {
"text/plain" => {
if let Some(ref mut tb) = text_body {
tb.push(part.part_id.clone());
}
}
"text/html" => {
if let Some(ref mut hb) = html_body {
hb.push(part.part_id.clone());
}
}
_ => {
attachments.push(part.part_id.clone());
}
}
continue;
} else if in_alternative {
if part.content_type == "text/plain" {
*html_body = None; }
if part.content_type == "text/html" {
*text_body = None; }
}
if let Some(ref mut tb) = text_body {
tb.push(part.part_id.clone());
}
if let Some(ref mut hb) = html_body {
hb.push(part.part_id.clone());
}
if (text_body.is_none() || html_body.is_none())
&& is_inline_media_type(&part.content_type)
{
attachments.push(part.part_id.clone());
}
} else {
attachments.push(part.part_id.clone());
}
}
if multipart_type == "alternative" {
let tb_active = text_body.is_some();
let hb_active = html_body.is_some();
if tb_active && hb_active {
let text_now = text_body.as_ref().map_or(0, |v| v.len());
let html_now = html_body.as_ref().map_or(0, |v| v.len());
if text_length_at_entry == text_now && html_length_at_entry != html_now {
let new_ids: Vec<String> = html_body
.as_ref()
.map(|v| v[html_length_at_entry..].to_vec())
.unwrap_or_default();
if let Some(ref mut tb) = text_body {
tb.extend(new_ids);
}
}
if html_length_at_entry == html_now && text_length_at_entry != text_now {
let new_ids: Vec<String> = text_body
.as_ref()
.map(|v| v[text_length_at_entry..].to_vec())
.unwrap_or_default();
if let Some(ref mut hb) = html_body {
hb.extend(new_ids);
}
}
}
}
}
#[cfg(test)]
mod tests {
use crate::parse;
#[test]
fn simple_text_plain() {
let raw =
b"From: a@b.com\r\nMIME-Version: 1.0\r\nContent-Type: text/plain\r\n\r\nHello\r\n";
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["1".to_owned()]);
assert!(msg.attachments.is_empty(), "attachments should be empty");
}
#[test]
fn multipart_alternative_text_and_html() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/alternative; boundary=\"b\"\r\n",
"\r\n",
"--b\r\n",
"Content-Type: text/plain\r\n",
"\r\n",
"Hello text\r\n",
"--b\r\n",
"Content-Type: text/html\r\n",
"\r\n",
"<p>Hello html</p>\r\n",
"--b--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["2".to_owned()]);
assert!(msg.attachments.is_empty(), "attachments should be empty");
}
#[test]
fn multipart_mixed_text_and_attachment() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/mixed; boundary=\"b\"\r\n",
"\r\n",
"--b\r\n",
"Content-Type: text/plain\r\n",
"\r\n",
"Body text\r\n",
"--b\r\n",
"Content-Type: application/pdf\r\n",
"Content-Disposition: attachment; filename=\"doc.pdf\"\r\n",
"\r\n",
"<pdf content>\r\n",
"--b--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["1".to_owned()]);
assert_eq!(msg.attachments, vec!["2".to_owned()]);
}
#[test]
fn alternative_html_only_mirrors_to_text_body() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/alternative; boundary=\"b\"\r\n",
"\r\n",
"--b\r\n",
"Content-Type: text/html\r\n",
"\r\n",
"<p>HTML only</p>\r\n",
"--b--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["1".to_owned()]);
assert!(msg.attachments.is_empty());
}
#[test]
fn alternative_text_only_mirrors_to_html_body() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/alternative; boundary=\"b\"\r\n",
"\r\n",
"--b\r\n",
"Content-Type: text/plain\r\n",
"\r\n",
"Text only\r\n",
"--b--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["1".to_owned()]);
assert!(msg.attachments.is_empty());
}
#[test]
fn related_non_first_child_goes_to_attachments() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/related; boundary=\"b\"\r\n",
"\r\n",
"--b\r\n",
"Content-Type: text/html\r\n",
"\r\n",
"<p>HTML with inline image</p>\r\n",
"--b\r\n",
"Content-Type: image/gif\r\n",
"Content-ID: <img@example.com>\r\n",
"\r\n",
"<gif data>\r\n",
"--b--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1".to_owned()]);
assert_eq!(msg.html_body, vec!["1".to_owned()]);
assert_eq!(msg.attachments, vec!["2".to_owned()]);
}
#[test]
fn alternative_mixed_subtree_nullifies_html_body() {
let raw = concat!(
"From: a@b.com\r\n",
"MIME-Version: 1.0\r\n",
"Content-Type: multipart/alternative; boundary=\"outer\"\r\n",
"\r\n",
"--outer\r\n",
"Content-Type: multipart/mixed; boundary=\"inner\"\r\n",
"\r\n",
"--inner\r\n",
"Content-Type: text/plain\r\n",
"\r\n",
"Plain text in mixed\r\n",
"--inner--\r\n",
"--outer\r\n",
"Content-Type: text/html\r\n",
"\r\n",
"<p>This html is suppressed because html_body was nullified</p>\r\n",
"--outer--\r\n"
)
.as_bytes();
let msg = parse(raw).expect("parse failed");
assert_eq!(msg.text_body, vec!["1.1".to_owned()]);
assert!(
msg.html_body.is_empty(),
"html_body should be empty after nullification; got: {:?}",
msg.html_body
);
assert!(msg.attachments.is_empty());
}
}