use super::*;
fn contains_rfc2047_encoded_word(value: &str) -> bool {
let bytes = value.as_bytes();
let mut i = 0;
while i + 4 < bytes.len() {
if bytes[i] == b'=' && bytes[i + 1] == b'?' {
let mut j = i + 2;
let charset_start = j;
while j < bytes.len() && bytes[j] != b'?' && bytes[j] != b' ' {
j += 1;
}
if j == charset_start || j >= bytes.len() || bytes[j] != b'?' {
i += 1;
continue;
}
j += 1;
if j >= bytes.len() || !matches!(bytes[j], b'B' | b'b' | b'Q' | b'q') {
i += 1;
continue;
}
j += 1;
if j >= bytes.len() || bytes[j] != b'?' {
i += 1;
continue;
}
j += 1;
let text_start = j;
while j < bytes.len() && bytes[j] != b'?' && bytes[j] != b' ' {
j += 1;
}
if j == text_start || j >= bytes.len() || bytes[j] != b'?' {
i += 1;
continue;
}
if j + 1 < bytes.len() && bytes[j + 1] == b'=' {
return true;
}
}
i += 1;
}
false
}
fn validate_header_folding(name: &str, value: &str, fold_limit: usize) -> Result<(), Error> {
let prefix = format!("{name}: ");
let mut line_len = prefix.len();
let (words, trailing_sep) = split_header_words(value);
let mut first_word = true;
for (word, sep) in words {
let sep_len = sep.unwrap_or("").len();
let word_len = word.len();
if line_len + sep_len + word_len > fold_limit && line_len > 0 {
line_len = if first_word { 1 + sep_len } else { sep_len };
} else {
line_len += sep_len;
}
if line_len + word_len > HARD_LINE_LIMIT {
return Err(Error::HeaderLineTooLong(format!(
"{name} header contains a token that cannot be folded within the 998-octet line limit required by RFC 5322 Sections 2.1.1 and 2.2.3"
)));
}
line_len += word_len;
first_word = false;
}
if let Some(trailing_sep) = trailing_sep.filter(|_| !value.is_empty()) {
let trailing_len = trailing_sep.len();
let final_line_len = if line_len + trailing_len > fold_limit && line_len > 0 {
trailing_len
} else {
line_len + trailing_len
};
if final_line_len > HARD_LINE_LIMIT {
return Err(Error::HeaderLineTooLong(format!(
"{name} header contains whitespace that cannot be folded within the 998-octet line limit required by RFC 5322 Sections 2.1.1 and 2.2.3"
)));
}
}
Ok(())
}
pub(super) fn try_write_header(output: &mut Vec<u8>, name: &str, value: &str) -> Result<(), Error> {
let has_encoded_words = contains_rfc2047_encoded_word(value);
let fold_limit = if has_encoded_words {
RFC2047_LINE_LIMIT
} else {
MAX_LINE_LEN
};
validate_header_folding(name, value, fold_limit)?;
let prefix = format!("{name}: ");
let mut line_len = prefix.len();
output.extend_from_slice(prefix.as_bytes());
let (words, trailing_sep) = split_header_words(value);
let mut first_word = true;
for (word, sep) in words {
let sep_str = sep.unwrap_or("");
let word_with_sep_len = sep_str.len() + word.len();
if line_len + word_with_sep_len > fold_limit && line_len > 0 {
if first_word {
if output.last() == Some(&b' ') {
output.pop();
}
}
output.extend_from_slice(b"\r\n");
if first_word {
output.extend_from_slice(b" ");
output.extend_from_slice(sep_str.as_bytes());
line_len = 1 + sep_str.len();
} else {
output.extend_from_slice(sep_str.as_bytes());
line_len = sep_str.len();
}
} else {
output.extend_from_slice(sep_str.as_bytes());
line_len += sep_str.len();
}
output.extend_from_slice(word.as_bytes());
line_len += word.len();
first_word = false;
}
if let Some(trailing_sep) = trailing_sep.filter(|_| !value.is_empty()) {
if line_len + trailing_sep.len() > fold_limit && line_len > 0 {
output.extend_from_slice(b"\r\n");
}
output.extend_from_slice(trailing_sep.as_bytes());
}
output.extend_from_slice(b"\r\n");
Ok(())
}
pub(super) fn split_header_words(value: &str) -> (Vec<(&str, Option<&str>)>, Option<&str>) {
let mut words = Vec::new();
let mut start = 0;
let mut in_quotes = false;
let mut in_angles = false;
let bytes = value.as_bytes();
let mut pending_sep_start: Option<usize> = None;
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'\\' if in_quotes => {
i += 1; if i < bytes.len() {
let ch_len = value[i..].chars().next().map_or(1, char::len_utf8);
i += ch_len;
}
continue;
}
b'"' => in_quotes = !in_quotes,
b'<' if !in_quotes => in_angles = true,
b'>' if !in_quotes => in_angles = false,
b' ' | b'\t' if !in_quotes && !in_angles => {
if i > start {
let pending_sep = pending_sep_start.map(|sep_start| &value[sep_start..start]);
words.push((&value[start..i], pending_sep));
pending_sep_start = Some(i);
} else if pending_sep_start.is_none() {
pending_sep_start = Some(i);
}
start = i + 1;
}
_ => {}
}
i += 1;
}
let trailing_sep = if start < bytes.len() {
words.push((
&value[start..],
pending_sep_start.map(|sep_start| &value[sep_start..start]),
));
None
} else {
pending_sep_start.map(|sep_start| &value[sep_start..])
};
(words, trailing_sep)
}
pub(super) fn write_boundary(output: &mut Vec<u8>, boundary: &str, closing: bool) {
output.extend_from_slice(b"--");
output.extend_from_slice(boundary.as_bytes());
if closing {
output.extend_from_slice(b"--");
}
output.extend_from_slice(b"\r\n");
}
fn needs_quoted_printable(text: &str) -> bool {
text.split("\r\n").any(|line| line.len() > HARD_LINE_LIMIT)
}
fn has_overlong_lines(data: &[u8]) -> bool {
let mut line_start = 0;
let mut i = 0;
while i < data.len() {
if data[i] == b'\r' && i + 1 < data.len() && data[i + 1] == b'\n' {
let line_len = i - line_start;
if line_len > HARD_LINE_LIMIT {
return true;
}
line_start = i + 2;
i += 2;
} else {
i += 1;
}
}
let last_line_len = data.len() - line_start;
last_line_len > HARD_LINE_LIMIT
}
pub(super) fn encode_quoted_printable(data: &[u8]) -> Vec<u8> {
const QP_LINE_LIMIT: usize = 76;
let mut result = Vec::with_capacity(data.len() * 2);
let mut line_len: usize = 0;
let mut i = 0;
while i < data.len() {
if data[i] == b'\r' && i + 1 < data.len() && data[i + 1] == b'\n' {
result.extend_from_slice(b"\r\n");
line_len = 0;
i += 2;
continue;
}
let byte = data[i];
let needs_encoding = if byte == b'\t' || byte == b' ' {
is_trailing_whitespace(data, i)
} else if byte == b'=' {
true
} else if (33..=126).contains(&byte) {
false
} else {
true
};
if needs_encoding {
if line_len + 3 > QP_LINE_LIMIT - 1 {
result.extend_from_slice(b"=\r\n");
line_len = 0;
}
result.push(b'=');
let hi = HEX_UPPER[(byte >> 4) as usize];
let lo = HEX_UPPER[(byte & 0x0F) as usize];
result.push(hi);
result.push(lo);
line_len += 3;
} else {
if line_len + 1 > QP_LINE_LIMIT - 1 {
result.extend_from_slice(b"=\r\n");
line_len = 0;
}
result.push(byte);
line_len += 1;
}
i += 1;
}
result
}
const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF";
pub(super) fn is_trailing_whitespace(data: &[u8], pos: usize) -> bool {
let mut j = pos + 1;
while j < data.len() {
match data[j] {
b'\r' | b'\n' => return true,
b' ' | b'\t' => j += 1,
_ => return false,
}
}
true
}
pub(super) fn write_text_part(
output: &mut Vec<u8>,
text: &str,
mime_type: &str,
) -> Result<(), Error> {
try_write_header(
output,
"Content-Type",
&format!("{mime_type}; charset=utf-8"),
)?;
let normalized = normalize_line_endings(text);
let has_non_ascii = normalized.as_bytes().iter().any(|&b| b > 127);
let long_lines = needs_quoted_printable(&normalized);
let has_nul = normalized.as_bytes().contains(&0x00);
if has_non_ascii || long_lines || has_nul {
write_qp_body(output, &normalized)?;
} else {
write_raw_body(output, "7bit", &normalized)?;
}
Ok(())
}
fn write_qp_body(output: &mut Vec<u8>, normalized: &str) -> Result<(), Error> {
try_write_header(output, "Content-Transfer-Encoding", "quoted-printable")?;
output.extend_from_slice(b"\r\n");
let encoded = encode_quoted_printable(normalized.as_bytes());
output.extend_from_slice(&encoded);
if !encoded.ends_with(b"\r\n") {
output.extend_from_slice(b"\r\n");
}
Ok(())
}
fn write_raw_body(output: &mut Vec<u8>, cte: &str, normalized: &str) -> Result<(), Error> {
try_write_header(output, "Content-Transfer-Encoding", cte)?;
output.extend_from_slice(b"\r\n");
output.extend_from_slice(normalized.as_bytes());
if !normalized.as_bytes().ends_with(b"\r\n") {
output.extend_from_slice(b"\r\n");
}
Ok(())
}
fn validate_message_attachment_bytes(filename: &str, data: &[u8]) -> Result<(), Error> {
if data.contains(&0x00) {
return Err(Error::InvalidAttachment(format!(
"message/* attachment \"{filename}\" contains NUL octets; \
emitted 7bit/8bit message parts must not contain NUL \
(RFC 2045 Sections 2.7-2.8 / RFC 2046 Section 5.2.1)"
)));
}
for i in 0..data.len() {
if data[i] == b'\r' && data.get(i + 1) != Some(&b'\n') {
return Err(Error::InvalidAttachment(format!(
"message/* attachment \"{filename}\" contains bare CR; \
CR and LF may only appear as CRLF in 7bit/8bit data \
(RFC 2045 Sections 2.7-2.8 / RFC 5322 Section 2.3)"
)));
}
if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
return Err(Error::InvalidAttachment(format!(
"message/* attachment \"{filename}\" contains bare LF; \
CR and LF may only appear as CRLF in 7bit/8bit data \
(RFC 2045 Sections 2.7-2.8 / RFC 5322 Section 2.3)"
)));
}
}
if has_overlong_lines(data) {
return Err(Error::InvalidAttachment(format!(
"message/* attachment \"{filename}\" contains lines exceeding 998 octets; \
base64/quoted-printable encoding is forbidden for message/* types \
(RFC 2046 Section 5.2.1) so line lengths must conform to \
RFC 2045 Sections 2.7-2.8",
)));
}
Ok(())
}
fn validate_message_rfc822_headers_are_ascii(filename: &str, data: &[u8]) -> Result<(), Error> {
let header_end = data
.windows(4)
.position(|window| window == b"\r\n\r\n")
.map_or(data.len(), |pos| pos + 2);
if let Some(non_ascii) = data[..header_end].iter().copied().find(|b| !b.is_ascii()) {
return Err(Error::InvalidAttachment(format!(
"message/rfc822 attachment \"{filename}\" contains raw non-ASCII byte 0x{non_ascii:02X} \
in the encapsulated header block; RFC 2046 Section 5.2.1 requires \
message/rfc822 headers to stay US-ASCII, and RFC 6532 Section 3.7 \
requires message/global for encapsulated messages with UTF-8 headers"
)));
}
Ok(())
}
fn validate_message_rfc822_required_headers(filename: &str, data: &[u8]) -> Result<(), Error> {
let header_end = data
.windows(4)
.position(|window| window == b"\r\n\r\n")
.unwrap_or(data.len());
let has_required_header = data[..header_end]
.split(|&byte| byte == b'\n')
.map(|line| line.strip_suffix(b"\r").unwrap_or(line))
.filter(|line| !line.is_empty())
.filter(|line| !matches!(line.first(), Some(b' ' | b'\t')))
.filter_map(|line| {
line.iter()
.position(|&byte| byte == b':')
.map(|colon| &line[..colon])
})
.any(|field_name| {
field_name.eq_ignore_ascii_case(b"from")
|| field_name.eq_ignore_ascii_case(b"subject")
|| field_name.eq_ignore_ascii_case(b"date")
});
if !has_required_header {
return Err(Error::InvalidAttachment(format!(
"message/rfc822 attachment \"{filename}\" must include at least one of \
From, Subject, or Date in the encapsulated header block \
(RFC 2046 Section 5.2.1)"
)));
}
Ok(())
}
fn write_attachment_headers(
output: &mut Vec<u8>,
attachment: &OutgoingAttachment,
content_type: &str,
filename: &str,
) -> Result<(), Error> {
write_attachment_content_type_header(output, content_type, filename)?;
write_attachment_disposition_header(output, attachment.is_inline, filename)?;
write_attachment_content_id_header(output, attachment.content_id.as_deref())?;
Ok(())
}
fn write_attachment_content_type_header(
output: &mut Vec<u8>,
content_type: &str,
filename: &str,
) -> Result<(), Error> {
let is_non_ascii = filename.bytes().any(|b| !b.is_ascii());
let legacy: String = if is_non_ascii {
filename
.chars()
.map(|c| if c.is_ascii() { c } else { '_' })
.collect()
} else {
filename.to_owned()
};
let ct_name = escape_quoted_string(&legacy);
let legacy_name_needs_rfc2231 = quoted_param_word_needs_rfc2231("name", &ct_name);
if is_non_ascii || legacy_name_needs_rfc2231 {
let encoded = percent_encode_filename(filename);
let single_param_len = "name*=UTF-8''".len() + encoded.len();
let legacy_prefix = if is_non_ascii && !legacy_name_needs_rfc2231 {
format!("{content_type}; name=\"{ct_name}\"")
} else {
content_type.to_string()
};
if single_param_len > MAX_LINE_LEN {
use std::fmt::Write;
let chunks = split_percent_encoded(&encoded, RFC2231_CHUNK_MAX);
let mut value = legacy_prefix;
for (i, chunk) in chunks.iter().enumerate() {
if i == 0 {
let _ = write!(value, "; name*0*=UTF-8''{chunk}");
} else {
let _ = write!(value, "; name*{i}*={chunk}");
}
}
try_write_header(output, "Content-Type", &value)?;
} else {
try_write_header(
output,
"Content-Type",
&format!("{legacy_prefix}; name*=UTF-8''{encoded}"),
)?;
}
} else {
try_write_header(
output,
"Content-Type",
&format!("{content_type}; name=\"{ct_name}\""),
)?;
}
Ok(())
}
fn write_attachment_disposition_header(
output: &mut Vec<u8>,
is_inline: bool,
filename: &str,
) -> Result<(), Error> {
let disposition = if is_inline { "inline" } else { "attachment" };
let is_non_ascii = filename.bytes().any(|b| !b.is_ascii());
let legacy: String = if is_non_ascii {
filename
.chars()
.map(|c| if c.is_ascii() { c } else { '_' })
.collect()
} else {
filename.to_owned()
};
let escaped_legacy = escape_quoted_string(&legacy);
let legacy_filename_needs_rfc2231 =
quoted_param_word_needs_rfc2231("filename", &escaped_legacy);
if is_non_ascii || legacy_filename_needs_rfc2231 {
let encoded = percent_encode_filename(filename);
let single_param_len = "filename*=UTF-8''".len() + encoded.len();
let legacy_prefix = if is_non_ascii && !legacy_filename_needs_rfc2231 {
format!("{disposition}; filename=\"{escaped_legacy}\"")
} else {
disposition.to_owned()
};
if single_param_len > MAX_LINE_LEN {
use std::fmt::Write;
let chunks = split_percent_encoded(&encoded, RFC2231_CHUNK_MAX);
let mut value = legacy_prefix;
for (i, chunk) in chunks.iter().enumerate() {
if i == 0 {
let _ = write!(value, "; filename*0*=UTF-8''{chunk}");
} else {
let _ = write!(value, "; filename*{i}*={chunk}");
}
}
try_write_header(output, "Content-Disposition", &value)?;
} else {
try_write_header(
output,
"Content-Disposition",
&format!("{legacy_prefix}; filename*=UTF-8''{encoded}"),
)?;
}
} else {
try_write_header(
output,
"Content-Disposition",
&format!("{disposition}; filename=\"{escaped_legacy}\""),
)?;
}
Ok(())
}
fn write_attachment_content_id_header(
output: &mut Vec<u8>,
content_id: Option<&str>,
) -> Result<(), Error> {
if let Some(cid) = content_id {
let normalized_cid = normalize_content_id(cid)?;
try_write_header(output, "Content-ID", &format!("<{normalized_cid}>"))?;
}
Ok(())
}
fn write_attachment_body(
output: &mut Vec<u8>,
attachment: &OutgoingAttachment,
media_type: &str,
) -> Result<(), Error> {
if media_type
.as_bytes()
.get(..8)
.is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"message/"))
&& !media_type.eq_ignore_ascii_case("message/global")
{
validate_message_attachment_bytes(&attachment.filename, &attachment.data)?;
if media_type.eq_ignore_ascii_case("message/rfc822") {
validate_message_rfc822_headers_are_ascii(&attachment.filename, &attachment.data)?;
validate_message_rfc822_required_headers(&attachment.filename, &attachment.data)?;
}
let encoding = if media_type.eq_ignore_ascii_case("message/partial")
|| media_type.eq_ignore_ascii_case("message/external-body")
{
if attachment.data.iter().any(|&b| !b.is_ascii()) {
return Err(Error::InvalidAttachment(format!(
"{media_type} attachment \"{}\" contains non-ASCII bytes; \
RFC 2046 requires this subtype to use 7bit transport only",
attachment.filename
)));
}
"7bit"
} else if attachment.data.iter().any(|&b| !b.is_ascii()) {
"8bit"
} else {
"7bit"
};
try_write_header(output, "Content-Transfer-Encoding", encoding)?;
output.extend_from_slice(b"\r\n");
output.extend_from_slice(&attachment.data);
if !attachment.data.ends_with(b"\r\n") {
output.extend_from_slice(b"\r\n");
}
} else {
try_write_header(output, "Content-Transfer-Encoding", "base64")?;
output.extend_from_slice(b"\r\n");
let encoded = base64::engine::general_purpose::STANDARD.encode(&attachment.data);
for chunk in encoded.as_bytes().chunks(76) {
output.extend_from_slice(chunk);
output.extend_from_slice(b"\r\n");
}
}
Ok(())
}
pub(super) fn write_attachment_part(
output: &mut Vec<u8>,
attachment: &OutgoingAttachment,
) -> Result<(), Error> {
let (content_type, media_type) = parse_attachment_content_type(&attachment.content_type)
.unwrap_or_else(|| {
(
"application/octet-stream",
"application/octet-stream".to_string(),
)
});
let filename = sanitize_header_value(&attachment.filename);
write_attachment_headers(output, attachment, content_type, &filename)?;
write_attachment_body(output, attachment, &media_type)
}
fn normalize_content_id(content_id: &str) -> Result<String, Error> {
let trimmed = content_id.trim();
let bare = strip_angle_brackets(trimmed).trim();
if is_valid_msg_id(bare) {
Ok(bare.to_string())
} else {
Err(Error::InvalidAttachment(format!(
"attachment Content-ID must be a valid bare msg-id body without angle brackets \
(RFC 2392 / RFC 5322 Section 3.6.4): {content_id:?}"
)))
}
}
fn percent_encode_filename(filename: &str) -> String {
let mut encoded = String::with_capacity(filename.len() * 3);
for &b in filename.as_bytes() {
if b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~') {
encoded.push(b as char);
} else {
use std::fmt::Write;
let _ = write!(encoded, "%{b:02X}");
}
}
encoded
}
const RFC2231_CHUNK_MAX: usize = 50;
fn split_percent_encoded(encoded: &str, max_chunk_len: usize) -> Vec<&str> {
if encoded.len() <= max_chunk_len {
return vec![encoded];
}
let bytes = encoded.as_bytes();
let mut chunks = Vec::new();
let mut pos = 0;
while pos < bytes.len() {
let chunk_start = pos;
let mut used = 0;
while pos < bytes.len() && used < max_chunk_len {
if bytes[pos] == b'%' && pos + 2 < bytes.len() {
if used + 3 > max_chunk_len && used > 0 {
break; }
pos += 3;
used += 3;
} else {
pos += 1;
used += 1;
}
}
if pos == chunk_start {
pos += 1;
}
chunks.push(&encoded[chunk_start..pos]);
}
chunks
}
fn quoted_param_word_needs_rfc2231(param_name: &str, escaped_value: &str) -> bool {
let word_len = param_name.len() + 2 + escaped_value.len() + 1;
word_len > (MAX_LINE_LEN - 1)
}
pub(super) fn is_valid_mime_type(ct: &str) -> bool {
let ct = ct.trim();
if let Some(slash) = ct.find('/') {
let type_part = &ct[..slash];
let subtype_part = &ct[slash + 1..];
!type_part.is_empty()
&& !subtype_part.is_empty()
&& type_part.chars().all(is_mime_token_char)
&& subtype_part.chars().all(is_mime_token_char)
} else {
false
}
}
fn parse_attachment_content_type(ct: &str) -> Option<(&str, String)> {
let ct = ct.trim();
if ct.is_empty()
|| !ct.is_ascii()
|| ct
.bytes()
.any(|b| matches!(b, b'\r' | b'\n') || (b.is_ascii_control() && b != b'\t'))
{
return None;
}
let (media_type, params) = match ct.find(';') {
Some(idx) => (&ct[..idx], Some(&ct[idx + 1..])),
None => (ct, None),
};
let normalized_media_type = strip_comments(media_type).trim().to_string();
if !is_valid_mime_type(&normalized_media_type) {
return None;
}
if let Some(params) = params {
validate_content_type_parameters(params)?;
}
Some((ct, normalized_media_type))
}
fn validate_content_type_parameters(params: &str) -> Option<()> {
let params_without_comments = strip_comments(params);
let bytes = params_without_comments.as_bytes();
let mut i = 0;
while i < bytes.len() {
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
i += 1;
}
if i == bytes.len() {
return Some(());
}
let attr_start = i;
while i < bytes.len() && is_mime_token_char(bytes[i] as char) {
i += 1;
}
if i == attr_start {
return None;
}
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
i += 1;
}
if i == bytes.len() || bytes[i] != b'=' {
return None;
}
i += 1;
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
i += 1;
}
if i == bytes.len() {
return None;
}
if bytes[i] == b'"' {
i += 1;
while i < bytes.len() {
match bytes[i] {
b'\\' => {
i += 1;
if i == bytes.len() || !bytes[i].is_ascii() {
return None;
}
i += 1;
}
b'"' => {
i += 1;
break;
}
b'\r' | b'\n' => return None,
b if !b.is_ascii() || (b.is_ascii_control() && b != b'\t') => return None,
_ => i += 1,
}
}
if i == 0 || bytes[i - 1] != b'"' {
return None;
}
} else {
let value_start = i;
while i < bytes.len() && is_mime_token_char(bytes[i] as char) {
i += 1;
}
if i == value_start {
return None;
}
}
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
i += 1;
}
if i == bytes.len() {
return Some(());
}
if bytes[i] != b';' {
return None;
}
i += 1;
}
Some(())
}
fn is_mime_token_char(c: char) -> bool {
c.is_ascii()
&& !c.is_ascii_whitespace()
&& !c.is_ascii_control()
&& !matches!(
c,
'(' | ')'
| '<'
| '>'
| '@'
| ','
| ';'
| ':'
| '\\'
| '"'
| '/'
| '['
| ']'
| '?'
| '='
)
}