use mailrs_rfc5322::Message;
use crate::content_type::{ContentType, Disposition};
use crate::decoder::TransferEncoding;
#[derive(Debug, Clone)]
pub struct Part {
pub content_type: ContentType,
pub disposition: Option<Disposition>,
pub content_id: Option<String>,
pub transfer_encoding: TransferEncoding,
pub body: Vec<u8>,
pub children: Vec<Part>,
}
impl Part {
pub fn find_by_content_type(&self, mime_type: &str) -> Option<&Part> {
let target = mime_type.to_ascii_lowercase();
self.walk()
.find(|p| p.content_type.mime_type() == target)
}
pub fn walk(&self) -> Walker<'_> {
Walker {
stack: vec![self],
}
}
pub fn body_text(&self) -> Option<String> {
if self.content_type.type_ != "text" {
return None;
}
let charset = self.content_type.charset();
if let Some(enc) = encoding_rs::Encoding::for_label(charset.as_bytes()) {
let (cow, _, _) = enc.decode(&self.body);
Some(cow.into_owned())
} else {
Some(String::from_utf8_lossy(&self.body).into_owned())
}
}
pub fn attachments(&self) -> impl Iterator<Item = &Part> {
self.walk().filter(|p| p.is_attachment())
}
pub fn attachment_filename(&self) -> Option<&str> {
if let Some(d) = &self.disposition
&& let Some(f) = d.filename()
{
return Some(f);
}
self.content_type.name()
}
pub fn is_attachment(&self) -> bool {
if let Some(d) = &self.disposition
&& d.is_attachment()
{
return true;
}
if self.content_type.name().is_some()
|| self
.disposition
.as_ref()
.and_then(|d| d.filename())
.is_some()
{
return true;
}
false
}
}
pub struct Walker<'a> {
stack: Vec<&'a Part>,
}
impl<'a> Iterator for Walker<'a> {
type Item = &'a Part;
fn next(&mut self) -> Option<Self::Item> {
let p = self.stack.pop()?;
for c in p.children.iter().rev() {
self.stack.push(c);
}
Some(p)
}
}
pub fn parse(raw: &[u8]) -> Part {
let msg = Message::new(raw);
let header_bytes = |name: &str| -> Option<&[u8]> { msg.header(name) };
let content_type = match header_bytes("Content-Type") {
Some(v) => ContentType::parse(&String::from_utf8_lossy(v)),
None => ContentType::default_for_missing_header(),
};
let disposition = header_bytes("Content-Disposition")
.map(|v| Disposition::parse(&String::from_utf8_lossy(v)));
let content_id = header_bytes("Content-ID")
.map(|v| String::from_utf8_lossy(v).trim().trim_matches(['<', '>']).to_string());
let transfer_encoding = header_bytes("Content-Transfer-Encoding")
.map(|v| TransferEncoding::parse(&String::from_utf8_lossy(v)))
.unwrap_or(TransferEncoding::SevenBit);
let body = msg.body().unwrap_or(b"");
if content_type.is_multipart() {
let children = match content_type.boundary() {
Some(b) => split_multipart(body, b),
None => Vec::new(),
};
Part {
content_type,
disposition,
content_id,
transfer_encoding,
body: body.to_vec(), children,
}
} else {
let decoded = transfer_encoding.decode(body);
Part {
content_type,
disposition,
content_id,
transfer_encoding,
body: decoded,
children: Vec::new(),
}
}
}
fn split_multipart(body: &[u8], boundary: &str) -> Vec<Part> {
let delim = format!("--{boundary}");
let close = format!("--{boundary}--");
let mut parts = Vec::new();
let mut cursor = 0usize;
let mut current_start: Option<usize> = None;
while cursor < body.len() {
let next = find_at_line_start(body, cursor, delim.as_bytes());
let Some(pos) = next else {
break;
};
if let Some(start) = current_start {
let end = pos.saturating_sub(2);
let end = if end >= start && &body[end..pos] == b"\r\n" {
end
} else if pos > 0 && body[pos - 1] == b'\n' {
pos - 1
} else {
pos
};
let part_bytes = &body[start..end];
parts.push(parse(part_bytes));
}
let is_close =
pos + close.len() <= body.len() && &body[pos..pos + close.len()] == close.as_bytes();
if is_close {
break;
}
let mut after = pos + delim.len();
while after < body.len() && matches!(body[after], b' ' | b'\t') {
after += 1;
}
if after + 1 < body.len() && body[after] == b'\r' && body[after + 1] == b'\n' {
after += 2;
} else if after < body.len() && body[after] == b'\n' {
after += 1;
}
current_start = Some(after);
cursor = after;
}
parts
}
fn find_at_line_start(body: &[u8], cursor: usize, pattern: &[u8]) -> Option<usize> {
let mut i = cursor;
while i + pattern.len() <= body.len() {
let line_start = i == 0
|| (i >= 2 && &body[i - 2..i] == b"\r\n")
|| (i >= 1 && body[i - 1] == b'\n');
if line_start && &body[i..i + pattern.len()] == pattern {
return Some(i);
}
i += 1;
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_simple_text_plain() {
let raw = b"Content-Type: text/plain\r\n\r\nhello";
let p = parse(raw);
assert_eq!(p.content_type.mime_type(), "text/plain");
assert!(p.children.is_empty());
assert_eq!(p.body_text().as_deref(), Some("hello"));
}
#[test]
fn parse_no_content_type_defaults_text_plain() {
let raw = b"From: a\r\n\r\nhello";
let p = parse(raw);
assert_eq!(p.content_type.mime_type(), "text/plain");
assert_eq!(p.body_text().as_deref(), Some("hello"));
}
#[test]
fn parse_multipart_alternative() {
let raw = b"Content-Type: multipart/alternative; boundary=\"xx\"\r\n\
\r\n\
--xx\r\n\
Content-Type: text/plain\r\n\
\r\n\
hello plain\r\n\
--xx\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>hello</p>\r\n\
--xx--\r\n";
let p = parse(raw);
assert!(p.content_type.is_multipart());
assert_eq!(p.children.len(), 2);
assert_eq!(p.children[0].body_text().as_deref(), Some("hello plain"));
assert_eq!(p.children[1].body_text().as_deref(), Some("<p>hello</p>"));
}
#[test]
fn find_by_content_type_returns_first_match() {
let raw = b"Content-Type: multipart/alternative; boundary=\"xx\"\r\n\
\r\n\
--xx\r\n\
Content-Type: text/plain\r\n\
\r\n\
plain body\r\n\
--xx\r\n\
Content-Type: text/html\r\n\
\r\n\
html body\r\n\
--xx--\r\n";
let p = parse(raw);
let html = p.find_by_content_type("text/html").unwrap();
assert_eq!(html.body_text().as_deref(), Some("html body"));
let plain = p.find_by_content_type("text/plain").unwrap();
assert_eq!(plain.body_text().as_deref(), Some("plain body"));
assert!(p.find_by_content_type("text/calendar").is_none());
}
#[test]
fn find_text_calendar_in_invite() {
let raw = b"Content-Type: multipart/alternative; boundary=\"x\"\r\n\
\r\n\
--x\r\n\
Content-Type: text/plain\r\n\
\r\n\
Meeting invite.\r\n\
--x\r\n\
Content-Type: text/calendar; method=REQUEST; charset=utf-8\r\n\
\r\n\
BEGIN:VCALENDAR\r\nEND:VCALENDAR\r\n\
--x--\r\n";
let p = parse(raw);
let cal = p.find_by_content_type("text/calendar").unwrap();
assert!(cal.body_text().unwrap().contains("VCALENDAR"));
}
#[test]
fn parse_base64_attachment() {
let raw = b"Content-Type: multipart/mixed; boundary=\"xx\"\r\n\
\r\n\
--xx\r\n\
Content-Type: text/plain\r\n\
\r\n\
See attached.\r\n\
--xx\r\n\
Content-Type: application/pdf; name=\"report.pdf\"\r\n\
Content-Disposition: attachment; filename=\"report.pdf\"\r\n\
Content-Transfer-Encoding: base64\r\n\
\r\n\
SGVsbG8gd29ybGQ=\r\n\
--xx--\r\n";
let p = parse(raw);
let attachments: Vec<&Part> = p.attachments().collect();
assert_eq!(attachments.len(), 1);
assert_eq!(attachments[0].attachment_filename(), Some("report.pdf"));
assert_eq!(attachments[0].body, b"Hello world");
}
#[test]
fn parse_quoted_printable_body() {
let raw = b"Content-Type: text/plain; charset=utf-8\r\n\
Content-Transfer-Encoding: quoted-printable\r\n\
\r\n\
Hello=20world=21";
let p = parse(raw);
assert_eq!(p.body_text().as_deref(), Some("Hello world!"));
}
#[test]
fn parse_nested_multipart() {
let raw = b"Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
\r\n\
--outer\r\n\
Content-Type: multipart/alternative; boundary=\"inner\"\r\n\
\r\n\
--inner\r\n\
Content-Type: text/plain\r\n\
\r\n\
plain\r\n\
--inner\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>html</p>\r\n\
--inner--\r\n\
--outer\r\n\
Content-Type: application/pdf; name=\"x.pdf\"\r\n\
Content-Disposition: attachment; filename=\"x.pdf\"\r\n\
\r\n\
PDFBYTES\r\n\
--outer--\r\n";
let p = parse(raw);
assert!(p.content_type.is_multipart());
assert_eq!(p.children.len(), 2);
assert_eq!(p.children[0].children.len(), 2);
let attachments: Vec<&Part> = p.attachments().collect();
assert_eq!(attachments.len(), 1);
assert_eq!(attachments[0].attachment_filename(), Some("x.pdf"));
}
#[test]
fn walk_visits_self_first_then_children_in_order() {
let raw = b"Content-Type: multipart/alternative; boundary=\"x\"\r\n\
\r\n\
--x\r\n\
Content-Type: text/plain\r\n\
\r\n\
a\r\n\
--x\r\n\
Content-Type: text/html\r\n\
\r\n\
b\r\n\
--x--\r\n";
let p = parse(raw);
let types: Vec<String> = p.walk().map(|x| x.content_type.mime_type()).collect();
assert_eq!(
types,
vec![
"multipart/alternative".to_string(),
"text/plain".to_string(),
"text/html".to_string(),
]
);
}
#[test]
fn content_id_strips_angle_brackets() {
let raw = b"Content-Type: image/png\r\n\
Content-ID: <abc@example.com>\r\n\
\r\n\
binary";
let p = parse(raw);
assert_eq!(p.content_id.as_deref(), Some("abc@example.com"));
}
#[test]
fn empty_body_handled_gracefully() {
let raw = b"Content-Type: text/plain\r\n\r\n";
let p = parse(raw);
assert_eq!(p.body, b"");
}
#[test]
fn is_attachment_via_disposition() {
let raw = b"Content-Type: text/plain\r\n\
Content-Disposition: attachment\r\n\
\r\n\
hi";
let p = parse(raw);
assert!(p.is_attachment());
}
#[test]
fn is_attachment_via_filename_no_disposition() {
let raw = b"Content-Type: application/pdf; name=\"x.pdf\"\r\n\r\nBYTES";
let p = parse(raw);
assert!(p.is_attachment());
assert_eq!(p.attachment_filename(), Some("x.pdf"));
}
#[test]
fn text_with_iso_2022_jp_charset() {
let raw = b"Content-Type: text/plain; charset=iso-2022-jp\r\n\
\r\n\
\x1b$B%F%9%H\x1b(B";
let p = parse(raw);
let text = p.body_text().unwrap();
assert_eq!(text, "テスト");
}
#[test]
fn boundary_with_close_marker_terminates() {
let raw = b"Content-Type: multipart/mixed; boundary=\"x\"\r\n\
\r\n\
--x\r\n\
Content-Type: text/plain\r\n\
\r\n\
part1\r\n\
--x--\r\n\
EPILOGUE - SHOULD NOT BE A PART";
let p = parse(raw);
assert_eq!(p.children.len(), 1);
assert_eq!(p.children[0].body_text().as_deref(), Some("part1"));
}
#[test]
fn multipart_without_boundary_yields_no_children() {
let raw = b"Content-Type: multipart/mixed\r\n\r\nbody";
let p = parse(raw);
assert!(p.children.is_empty());
}
}