use std::collections::HashMap;
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub enum MimePart {
Text {
subtype: String,
charset: Option<String>,
encoding: String,
size: usize,
lines: usize,
body_offset: usize,
body_len: usize,
},
Multipart {
subtype: String,
boundary: String,
parts: Vec<MimePart>,
},
Other {
media_type: String,
subtype: String,
encoding: String,
size: usize,
body_offset: usize,
body_len: usize,
},
}
fn parse_content_type(header: &str) -> (String, String, HashMap<String, String>) {
let mut params = HashMap::new();
let header = header.trim();
let mut parts = header.split(';');
let type_part = parts.next().unwrap_or("text/plain").trim();
let (media_type, subtype) = if let Some((t, s)) = type_part.split_once('/') {
(t.trim().to_lowercase(), s.trim().to_lowercase())
} else {
("text".to_string(), "plain".to_string())
};
for param in parts {
let param = param.trim();
if let Some((key, value)) = param.split_once('=') {
let key = key.trim().to_lowercase();
let value = value.trim().trim_matches('"').to_string();
params.insert(key, value);
}
}
(media_type, subtype, params)
}
fn parse_encoding(header: Option<&str>) -> String {
header
.map(|h| h.trim().to_uppercase())
.unwrap_or_else(|| "7BIT".to_string())
}
fn header_value<'a>(data: &'a [u8], name: &str) -> Option<&'a str> {
let needle = format!("{}:", name.to_lowercase());
for line in data.split(|b| *b == b'\n') {
let line = line.strip_suffix(b"\r").unwrap_or(line);
if line.is_empty() {
break;
}
let line_str = std::str::from_utf8(line).ok()?;
if line_str.to_lowercase().starts_with(&needle) {
let value = line_str[needle.len()..].trim();
if !value.is_empty() {
return Some(value);
}
}
}
None
}
fn find_body_offset(data: &[u8]) -> usize {
let needle = b"\r\n\r\n";
data.windows(needle.len())
.position(|w| w == needle)
.map(|pos| pos + needle.len())
.unwrap_or(data.len())
}
#[allow(clippy::naive_bytecount)]
fn count_lines(data: &[u8]) -> usize {
data.iter().filter(|&&b| b == b'\n').count()
}
fn find_boundary_positions(data: &[u8], boundary: &str) -> Vec<(usize, usize)> {
let boundary_start = format!("--{}", boundary);
let boundary_end = format!("--{}--", boundary);
let mut positions = Vec::new();
let mut current_start = None;
let data_str = String::from_utf8_lossy(data);
for (idx, line) in data_str.lines().enumerate() {
let line_trimmed = line.trim();
if line_trimmed == boundary_start || line_trimmed.starts_with(&boundary_start) {
if let Some(start) = current_start {
let end_offset = find_line_offset(&data_str, idx);
positions.push((start, end_offset));
}
current_start = Some(find_line_offset(&data_str, idx + 1));
} else if line_trimmed == boundary_end || line_trimmed.starts_with(&boundary_end) {
if let Some(start) = current_start {
let end_offset = find_line_offset(&data_str, idx);
positions.push((start, end_offset));
}
break;
}
}
positions
}
fn find_line_offset(s: &str, target_line: usize) -> usize {
let mut offset = 0;
for (idx, line) in s.lines().enumerate() {
if idx == target_line {
return offset;
}
offset += line.len() + 1; }
s.len()
}
pub fn parse_mime(data: &[u8]) -> MimePart {
parse_mime_part(data, 0)
}
fn parse_mime_part(data: &[u8], base_offset: usize) -> MimePart {
let content_type = header_value(data, "Content-Type")
.map(String::from)
.unwrap_or_else(|| "text/plain".to_string());
let encoding = parse_encoding(header_value(data, "Content-Transfer-Encoding"));
let (media_type, subtype, params) = parse_content_type(&content_type);
let body_offset = find_body_offset(data);
let body = &data[body_offset..];
if media_type == "multipart" {
let boundary = params.get("boundary").cloned().unwrap_or_default();
if boundary.is_empty() {
return MimePart::Text {
subtype: "plain".to_string(),
charset: params.get("charset").cloned(),
encoding,
size: body.len(),
lines: count_lines(body),
body_offset: base_offset + body_offset,
body_len: body.len(),
};
}
let positions = find_boundary_positions(body, &boundary);
let mut parts = Vec::new();
for (start, end) in positions {
if start < end && end <= body.len() {
let part_data = &body[start..end];
let part = parse_mime_part(part_data, base_offset + body_offset + start);
parts.push(part);
}
}
MimePart::Multipart {
subtype,
boundary,
parts,
}
} else if media_type == "text" {
MimePart::Text {
subtype,
charset: params.get("charset").cloned(),
encoding,
size: body.len(),
lines: count_lines(body),
body_offset: base_offset + body_offset,
body_len: body.len(),
}
} else {
MimePart::Other {
media_type,
subtype,
encoding,
size: body.len(),
body_offset: base_offset + body_offset,
body_len: body.len(),
}
}
}
pub fn format_bodystructure(part: &MimePart) -> String {
match part {
MimePart::Text {
subtype,
charset,
encoding,
size,
lines,
..
} => {
let cs = charset.as_deref().unwrap_or("US-ASCII");
format!(
"(\"TEXT\" \"{}\" (\"CHARSET\" \"{}\") NIL NIL \"{}\" {} {})",
subtype.to_uppercase(),
cs.to_uppercase(),
encoding,
size,
lines
)
}
MimePart::Multipart { subtype, parts, .. } => {
if parts.is_empty() {
return "(\"TEXT\" \"PLAIN\" (\"CHARSET\" \"US-ASCII\") NIL NIL \"7BIT\" 0 0)"
.to_string();
}
let parts_str: String = parts.iter().map(format_bodystructure).collect();
format!("({} \"{}\")", parts_str, subtype.to_uppercase())
}
MimePart::Other {
media_type,
subtype,
encoding,
size,
..
} => {
format!(
"(\"{}\" \"{}\" NIL NIL NIL \"{}\" {})",
media_type.to_uppercase(),
subtype.to_uppercase(),
encoding,
size
)
}
}
}
pub fn extract_section<'a>(data: &'a [u8], section: &str) -> Option<&'a [u8]> {
let section = section.trim();
if section.is_empty() {
return Some(data);
}
let section_upper = section.to_uppercase();
if section_upper == "HEADER" {
return Some(extract_headers(data));
}
if section_upper == "TEXT" {
return Some(extract_body(data));
}
if section_upper.starts_with("HEADER.FIELDS") {
return Some(extract_headers(data));
}
let parts: Vec<&str> = section.split('.').collect();
let (section_nums, suffix) = if let Some(last) = parts.last() {
let upper = last.to_uppercase();
if upper == "MIME" || upper == "HEADER" || upper == "TEXT" {
(&parts[..parts.len() - 1], Some(upper))
} else {
(&parts[..], None)
}
} else {
return None;
};
let mut current_data = data;
let mime_structure = parse_mime(data);
let mut current_part = &mime_structure;
for part_num_str in section_nums {
let part_num: usize = part_num_str.parse().ok()?;
if part_num == 0 {
return None;
}
match current_part {
MimePart::Multipart { parts, .. } => {
current_part = parts.get(part_num - 1)?;
}
MimePart::Text {
body_offset,
body_len,
..
}
| MimePart::Other {
body_offset,
body_len,
..
} => {
if part_num == 1 && section_nums.len() == 1 {
let start = *body_offset;
let end = start + body_len;
if end <= data.len() {
current_data = &data[start..end];
}
break;
}
return None;
}
}
}
match (current_part, suffix.as_deref()) {
(
MimePart::Text {
body_offset,
body_len,
..
},
None,
)
| (
MimePart::Other {
body_offset,
body_len,
..
},
None,
) => {
let start = *body_offset;
let end = start + body_len;
if end <= data.len() {
Some(&data[start..end])
} else {
Some(&data[start..])
}
}
(MimePart::Multipart { .. }, None) => {
Some(current_data)
}
(_, Some("HEADER")) | (_, Some("MIME")) => Some(extract_headers(current_data)),
(_, Some("TEXT")) => Some(extract_body(current_data)),
_ => None,
}
}
fn extract_headers(data: &[u8]) -> &[u8] {
let needle = b"\r\n\r\n";
if let Some(pos) = data.windows(needle.len()).position(|w| w == needle) {
&data[..pos + needle.len()]
} else {
data
}
}
fn extract_body(data: &[u8]) -> &[u8] {
let needle = b"\r\n\r\n";
if let Some(pos) = data.windows(needle.len()).position(|w| w == needle) {
&data[pos + needle.len()..]
} else {
&[]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_content_type_simple() {
let (media, subtype, params) = parse_content_type("text/plain");
assert_eq!(media, "text");
assert_eq!(subtype, "plain");
assert!(params.is_empty());
}
#[test]
fn test_parse_content_type_with_charset() {
let (media, subtype, params) = parse_content_type("text/html; charset=UTF-8");
assert_eq!(media, "text");
assert_eq!(subtype, "html");
assert_eq!(params.get("charset"), Some(&"UTF-8".to_string()));
}
#[test]
fn test_parse_content_type_multipart() {
let (media, subtype, params) =
parse_content_type("multipart/mixed; boundary=\"boundary123\"");
assert_eq!(media, "multipart");
assert_eq!(subtype, "mixed");
assert_eq!(params.get("boundary"), Some(&"boundary123".to_string()));
}
#[test]
fn test_parse_simple_text_message() {
let msg = b"Content-Type: text/plain; charset=UTF-8\r\n\r\nHello, World!\r\n";
let part = parse_mime(msg);
match part {
MimePart::Text {
subtype, charset, ..
} => {
assert_eq!(subtype, "plain");
assert_eq!(charset, Some("UTF-8".to_string()));
}
_ => panic!("Expected Text part"),
}
}
#[test]
fn test_format_bodystructure_text() {
let part = MimePart::Text {
subtype: "plain".to_string(),
charset: Some("UTF-8".to_string()),
encoding: "7BIT".to_string(),
size: 100,
lines: 5,
body_offset: 0,
body_len: 100,
};
let result = format_bodystructure(&part);
assert!(result.contains("\"TEXT\""));
assert!(result.contains("\"PLAIN\""));
assert!(result.contains("\"UTF-8\""));
assert!(result.contains("\"7BIT\""));
}
#[test]
fn test_extract_section_header() {
let msg = b"Subject: Test\r\nFrom: test@test.com\r\n\r\nBody here";
let headers = extract_section(msg, "HEADER").unwrap();
assert!(headers.starts_with(b"Subject:"));
assert!(headers.ends_with(b"\r\n\r\n"));
}
#[test]
fn test_extract_section_text() {
let msg = b"Subject: Test\r\n\r\nBody here";
let body = extract_section(msg, "TEXT").unwrap();
assert_eq!(body, b"Body here");
}
}