use super::{address, encoded_words};
use super::super::wire;
pub(crate) fn is_multipart(content_type: &str) -> bool {
extract_mime_type(content_type).starts_with("multipart/")
}
pub(crate) fn is_disposition_type(header_value: &str, expected: &str) -> bool {
let lower = address::strip_comments(header_value)
.trim()
.to_ascii_lowercase();
let end = lower
.find(|c: char| c == ';' || c.is_ascii_whitespace())
.unwrap_or(lower.len());
let token = &lower[..end];
let token = token
.strip_prefix('"')
.and_then(|t| t.strip_suffix('"'))
.unwrap_or(token);
token == expected
}
pub(crate) fn extract_mime_type(content_type: &str) -> String {
let ct = content_type.trim();
let end = ct.find(';').unwrap_or(ct.len());
let raw = ct[..end].trim();
let stripped = address::strip_comments(raw).trim().to_lowercase();
if let Some(slash_pos) = stripped.find('/') {
let type_part = stripped[..slash_pos].trim();
let subtype_part = stripped[slash_pos + 1..].trim();
format!("{type_part}/{subtype_part}")
} else {
"text/plain".to_string()
}
}
fn extract_boundary(content_type: &str) -> Option<String> {
extract_param_with_policy(content_type, "boundary", AlphaContinuationPolicy::Always)
}
pub(crate) fn extract_boundary_for_body(content_type: &str, body: &[u8]) -> Option<String> {
let preferred = extract_boundary(content_type)?;
if boundary_matches_body(body, &preferred) {
return Some(preferred);
}
let fallback =
extract_param_with_policy(content_type, "boundary", AlphaContinuationPolicy::Never)?;
if fallback != preferred && boundary_matches_body(body, &fallback) {
return Some(fallback);
}
None
}
fn boundary_matches_body(body: &[u8], boundary: &str) -> bool {
!wire::split_mime_parts(body, boundary).is_empty()
|| body_contains_boundary_delimiter(body, boundary)
}
fn body_contains_boundary_delimiter(body: &[u8], boundary: &str) -> bool {
let delim = format!("--{boundary}");
let delim_bytes = delim.as_bytes();
let mut search_from = 0;
while let Some(rel_pos) = wire::find_subsequence(&body[search_from..], delim_bytes) {
let pos = search_from + rel_pos;
if pos > 0 && body[pos - 1] != b'\n' && body[pos - 1] != b'\r' {
search_from = pos + delim_bytes.len();
continue;
}
let after = pos + delim_bytes.len();
if after < body.len() {
let next_byte = body[after];
if next_byte == b'-' {
if body.get(after + 1) != Some(&b'-') {
search_from = after + 1;
continue;
}
let closing_after = after + 2;
if closing_after < body.len() {
let closing_next = body[closing_after];
if closing_next != b'\r'
&& closing_next != b'\n'
&& closing_next != b' '
&& closing_next != b'\t'
{
search_from = closing_after;
continue;
}
}
} else if next_byte != b'\r'
&& next_byte != b'\n'
&& next_byte != b' '
&& next_byte != b'\t'
{
search_from = after;
continue;
}
}
return true;
}
false
}
fn is_param_boundary(lower: &str, pos: usize) -> bool {
if pos == 0 {
return true;
}
address::strip_comments(&lower[..pos])
.trim_end_matches([' ', '\t'])
.ends_with(';')
}
fn extract_param_value_internal(
rest: &str,
alpha_policy: AlphaContinuationPolicy,
) -> Option<String> {
let value = if let Some(stripped) = rest.strip_prefix('"') {
let end = find_closing_quote(stripped);
if end < stripped.len() {
&stripped[..end]
} else {
let fallback_end = stripped.find(';').unwrap_or(stripped.len());
stripped[..fallback_end].trim_end()
}
} else {
let end = rest
.find(|c: char| c == ';' || c.is_whitespace())
.unwrap_or(rest.len());
let initial = &rest[..end];
if initial.is_empty() {
return None;
}
let mut assembled = initial.to_string();
let mut tail = &rest[end..];
loop {
let after_ws = tail.trim_start();
if after_ws.is_empty() || after_ws.starts_with(';') {
return Some(assembled);
}
let segment = match after_ws.find(';') {
Some(pos) => &after_ws[..pos],
None => after_ws,
};
if segment.contains('=') {
return Some(assembled);
}
let continuation_end = after_ws
.find(|c: char| c == ';' || c.is_whitespace())
.unwrap_or(after_ws.len());
let continuation = &after_ws[..continuation_end];
let continuation_is_token = continuation.chars().all(is_unquoted_mime_token_char);
let is_fold_fragment = continuation_is_token
&& (continuation.chars().any(|c| !c.is_ascii_alphabetic())
|| alpha_policy.allows_alpha_continuation(&assembled, continuation));
if !is_fold_fragment {
return Some(assembled);
}
assembled.push_str(continuation);
tail = &after_ws[continuation_end..];
}
};
if value.is_empty() {
None
} else if rest.starts_with('"') {
Some(address::unescape_quoted_string(value))
} else {
Some(value.to_string())
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum AlphaContinuationPolicy {
Never,
Always,
CharsetLabel,
}
impl AlphaContinuationPolicy {
fn allows_alpha_continuation(self, assembled: &str, continuation: &str) -> bool {
match self {
Self::Never => false,
Self::Always => true,
Self::CharsetLabel => {
let mut candidate = String::with_capacity(assembled.len() + continuation.len());
candidate.push_str(assembled);
candidate.push_str(continuation);
encoding_rs::Encoding::for_label(candidate.as_bytes()).is_some()
}
}
}
}
fn extract_param_value(rest: &str) -> Option<String> {
extract_param_value_internal(rest, AlphaContinuationPolicy::Never)
}
fn skip_lwsp(bytes: &[u8], pos: usize) -> usize {
let mut i = pos;
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
i += 1;
}
i
}
fn skip_cfws(input: &str, pos: usize) -> usize {
let mut i = pos;
loop {
i = skip_lwsp(input.as_bytes(), i);
let Some(rest) = input.get(i..) else {
return input.len();
};
if !rest.starts_with('(') {
return i;
}
let Some(comment_end) = find_comment_end(input, i) else {
return i;
};
i = comment_end;
}
}
fn find_comment_end(input: &str, pos: usize) -> Option<usize> {
let rest = input.get(pos..)?;
if !rest.starts_with('(') {
return Some(pos);
}
let mut depth = 0u32;
let mut escaped = false;
for (offset, ch) in rest.char_indices() {
if escaped {
escaped = false;
continue;
}
match ch {
'\\' => escaped = true,
'(' => depth = depth.saturating_add(1),
')' => {
depth = depth.saturating_sub(1);
if depth == 0 {
return Some(pos + offset + ch.len_utf8());
}
}
_ => {}
}
}
None
}
fn is_unquoted_mime_token_char(c: char) -> bool {
c.is_ascii()
&& !c.is_ascii_whitespace()
&& !c.is_ascii_control()
&& !matches!(
c,
'(' | ')'
| '<'
| '>'
| '@'
| ','
| ';'
| ':'
| '\\'
| '"'
| '/'
| '['
| ']'
| '?'
| '='
)
}
pub(crate) fn extract_param(header_value: &str, param_name: &str) -> Option<String> {
let alpha_policy = if param_name.eq_ignore_ascii_case("boundary") {
AlphaContinuationPolicy::Always
} else if param_name.eq_ignore_ascii_case("charset") {
AlphaContinuationPolicy::CharsetLabel
} else {
AlphaContinuationPolicy::Never
};
extract_param_with_policy(header_value, param_name, alpha_policy)
}
fn extract_param_with_policy(
header_value: &str,
param_name: &str,
alpha_policy: AlphaContinuationPolicy,
) -> Option<String> {
let lower = header_value.to_ascii_lowercase();
let param_lower = param_name.to_ascii_lowercase();
let mut search_from = 0;
loop {
let rel_idx = lower[search_from..].find(¶m_lower)?;
let abs_idx = search_from + rel_idx;
if is_param_boundary(&lower, abs_idx) {
if is_inside_quotes(&lower, abs_idx) {
search_from = abs_idx + param_lower.len();
continue;
}
if is_inside_comment(&lower, abs_idx) {
search_from = abs_idx + param_lower.len();
continue;
}
let after_name = abs_idx + param_lower.len();
let eq_pos = skip_cfws(header_value, after_name);
if eq_pos < lower.len() && lower.as_bytes()[eq_pos] == b'=' {
let val_start = skip_cfws(header_value, eq_pos + 1);
let rest = &header_value[val_start..];
return extract_param_value_internal(rest, alpha_policy);
}
}
search_from = abs_idx + param_lower.len();
}
}
pub(crate) fn find_closing_quote(s: &str) -> usize {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
return i;
}
i += 1;
}
bytes.len()
}
pub(crate) fn is_inside_comment(s: &str, pos: usize) -> bool {
let bytes = s.as_bytes();
let mut depth: u32 = 0;
let mut i = 0;
while i < pos && i < bytes.len() {
match bytes[i] {
b'\\' => {
i += 2;
continue;
}
b'"' => {
i += 1;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
i += 1;
break;
}
i += 1;
}
continue;
}
b'(' => {
depth = depth.saturating_add(1);
}
b')' => {
depth = depth.saturating_sub(1);
}
_ => {}
}
i += 1;
}
depth > 0
}
pub(crate) fn is_inside_quotes(s: &str, pos: usize) -> bool {
let bytes = s.as_bytes();
let mut i = 0;
while i < pos && i < bytes.len() {
if bytes[i] == b'"' {
i += 1;
let mut found_close = false;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'"' {
found_close = true;
i += 1;
break;
}
i += 1;
}
if !found_close {
return false;
}
if i > pos {
return true;
}
} else {
i += 1;
}
}
false
}
pub(crate) fn extract_filename(disposition: &str, content_type: &str) -> Option<String> {
if let Some(name) = extract_rfc2231_param(disposition, "filename") {
return Some(name);
}
if let Some(name) = extract_rfc2231_continuation(disposition, "filename") {
return Some(name);
}
if let Some(name) = extract_param(disposition, "filename") {
return Some(encoded_words::decode_encoded_words(&name));
}
if let Some(name) = extract_rfc2231_param(content_type, "name") {
return Some(name);
}
if let Some(name) = extract_rfc2231_continuation(content_type, "name") {
return Some(name);
}
if let Some(name) = extract_param(content_type, "name") {
return Some(encoded_words::decode_encoded_words(&name));
}
None
}
pub(crate) fn extract_rfc2231_param(header_value: &str, param_name: &str) -> Option<String> {
let lower = header_value.to_ascii_lowercase();
let name_star = format!("{}*", param_name.to_ascii_lowercase());
let mut search_from = 0;
let idx = loop {
let rel_idx = lower[search_from..].find(&name_star)?;
let abs_idx = search_from + rel_idx;
if is_param_boundary(&lower, abs_idx) {
if is_inside_quotes(&lower, abs_idx) {
search_from = abs_idx + name_star.len();
continue;
}
if is_inside_comment(&lower, abs_idx) {
search_from = abs_idx + name_star.len();
continue;
}
let after_star = abs_idx + name_star.len();
let eq_pos = skip_cfws(header_value, after_star);
if eq_pos < lower.len() && lower.as_bytes()[eq_pos] == b'=' {
break abs_idx;
}
}
search_from = abs_idx + name_star.len();
};
let after_star = idx + name_star.len();
let eq_pos = skip_cfws(header_value, after_star);
let val_start = skip_cfws(header_value, eq_pos + 1);
let rest = &header_value[val_start..];
let value = if let Some(inner) = rest.strip_prefix('"') {
let close = find_closing_quote(inner);
if close < inner.len() {
inner[..close].trim()
} else {
let fallback_end = inner.find(';').unwrap_or(inner.len());
inner[..fallback_end].trim_end()
}
} else {
let end = rest.find(';').unwrap_or(rest.len());
rest[..end].trim()
};
let mut parts_iter = value.splitn(3, '\'');
let charset = parts_iter.next()?;
let _language = parts_iter.next()?; let encoded = parts_iter.next()?;
let decoded_bytes = strict_percent_decode(encoded)?;
Some(encoded_words::decode_charset(charset, &decoded_bytes))
}
pub(crate) fn extract_rfc2231_continuation(header_value: &str, param_name: &str) -> Option<String> {
let mut sections = collect_rfc2231_continuation_sections(header_value, param_name);
let mut charset = String::new();
if sections.is_empty() {
return None;
}
sections.sort_by_key(|(idx, _, _)| *idx);
if sections.first().map_or(true, |(idx, _, _)| *idx != 0) {
return None;
}
let mut contiguous_len = 0usize;
for (expected, (idx, _, _)) in sections.iter().enumerate() {
let Ok(expected_idx) = u32::try_from(expected) else {
break;
};
if *idx != expected_idx {
break;
}
contiguous_len += 1;
}
sections.truncate(contiguous_len);
let first_encoded_idx = sections
.iter()
.find_map(|(idx, _, encoded)| encoded.as_ref().map(|_| *idx));
let mut raw_bytes: Vec<u8> = Vec::new();
for (idx, plain, encoded) in §ions {
let is_first_encoded = first_encoded_idx.is_some_and(|first_idx| *idx == first_idx);
if is_first_encoded {
if let Some(value) = encoded {
let mut parts = value.splitn(3, '\'');
if let (Some(cs), Some(_lang), Some(encoded_value)) =
(parts.next(), parts.next(), parts.next())
{
charset = cs.to_string();
raw_bytes.extend(percent_decode(encoded_value));
continue;
}
}
if let Some(value) = plain {
raw_bytes.extend(value.as_bytes());
continue;
}
if let Some(value) = encoded {
raw_bytes.extend(value.as_bytes());
continue;
}
}
if let Some(value) = encoded {
raw_bytes.extend(percent_decode(value));
} else if let Some(value) = plain {
raw_bytes.extend(value.as_bytes());
}
}
if charset.is_empty() {
charset = "utf-8".to_string();
}
Some(encoded_words::decode_charset(&charset, &raw_bytes))
}
fn collect_rfc2231_continuation_sections(
header_value: &str,
param_name: &str,
) -> Vec<(u32, Option<String>, Option<String>)> {
use std::collections::BTreeMap;
let lower = header_value.to_ascii_lowercase();
let param_lower = param_name.to_ascii_lowercase();
let prefix = format!("{param_lower}*");
let mut search_from = 0;
let mut sections: BTreeMap<u32, (Option<String>, Option<String>)> = BTreeMap::new();
while let Some(rel_idx) = lower[search_from..].find(&prefix) {
let abs_idx = search_from + rel_idx;
if !is_param_boundary(&lower, abs_idx)
|| is_inside_quotes(&lower, abs_idx)
|| is_inside_comment(&lower, abs_idx)
{
search_from = abs_idx + prefix.len();
continue;
}
let mut pos = abs_idx + prefix.len();
let digits_start = pos;
while pos < lower.len() && lower.as_bytes()[pos].is_ascii_digit() {
pos += 1;
}
if pos == digits_start {
search_from = abs_idx + prefix.len();
continue;
}
let digits = &lower[digits_start..pos];
if digits.len() > 1 && digits.starts_with('0') {
search_from = abs_idx + prefix.len();
continue;
}
let mut is_encoded = false;
if pos < lower.len() && lower.as_bytes()[pos] == b'*' {
is_encoded = true;
pos += 1;
}
let eq_pos = skip_cfws(header_value, pos);
if eq_pos >= lower.len() || lower.as_bytes()[eq_pos] != b'=' {
search_from = abs_idx + prefix.len();
continue;
}
let Some(index) = digits.parse::<u32>().ok() else {
search_from = abs_idx + prefix.len();
continue;
};
let val_start = skip_cfws(header_value, eq_pos + 1);
let rest = &header_value[val_start..];
if let Some(value) = extract_param_value(rest) {
let entry = sections.entry(index).or_insert((None, None));
if is_encoded {
entry.1.get_or_insert(value);
} else {
entry.0.get_or_insert(value);
}
}
search_from = abs_idx + prefix.len();
}
sections
.into_iter()
.map(|(idx, (plain, encoded))| (idx, plain, encoded))
.collect()
}
#[cfg(test)]
pub(crate) fn find_param_value(lower: &str, original: &str, pattern: &str) -> Option<String> {
let name_pattern = pattern.strip_suffix('=').unwrap_or(pattern);
let mut search_from = 0;
loop {
let rel_idx = lower[search_from..].find(name_pattern)?;
let abs_idx = search_from + rel_idx;
if is_param_boundary(lower, abs_idx) {
if is_inside_quotes(lower, abs_idx) {
search_from = abs_idx + name_pattern.len();
continue;
}
if is_inside_comment(lower, abs_idx) {
search_from = abs_idx + name_pattern.len();
continue;
}
let after_name = abs_idx + name_pattern.len();
let eq_pos = skip_cfws(original, after_name);
if eq_pos >= lower.len() || lower.as_bytes()[eq_pos] != b'=' {
search_from = abs_idx + name_pattern.len();
continue;
}
let val_start = skip_cfws(original, eq_pos + 1);
let rest = &original[val_start..];
return extract_param_value(rest);
}
search_from = abs_idx + name_pattern.len();
}
}
pub(crate) fn percent_decode(input: &str) -> Vec<u8> {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let Some(val) = decode_hex_pair(bytes[i + 1], bytes[i + 2]) {
result.push(val);
i += 3;
continue;
}
}
result.push(bytes[i]);
i += 1;
}
result
}
fn strict_percent_decode(input: &str) -> Option<Vec<u8>> {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' {
if i + 2 >= bytes.len() {
return None;
}
let value = decode_hex_pair(bytes[i + 1], bytes[i + 2])?;
result.push(value);
i += 3;
continue;
}
result.push(bytes[i]);
i += 1;
}
Some(result)
}
pub(crate) fn decode_hex_pair(high: u8, low: u8) -> Option<u8> {
let h = hex_digit(high)?;
let l = hex_digit(low)?;
Some(h * 16 + l)
}
pub(crate) fn hex_digit(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'A'..=b'F' => Some(b - b'A' + 10),
b'a'..=b'f' => Some(b - b'a' + 10),
_ => None,
}
}
#[cfg(test)]
pub(crate) fn strip_outer_quotes(input: &str) -> &str {
if input.len() >= 2 && input.starts_with('"') && input.ends_with('"') {
&input[1..input.len() - 1]
} else {
input
}
}