use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashSet};
#[allow(clippy::too_many_lines)]
pub(crate) fn decode_rfc2231_params(params: &[(String, String)]) -> Vec<(String, String)> {
type ContinuationGroup = (String, usize, BTreeMap<u32, (String, bool)>);
let mut result: Vec<Option<(String, String)>> = Vec::with_capacity(params.len());
let mut continuations: Vec<ContinuationGroup> = Vec::new();
let mut rfc2231_decoded: HashSet<usize> = HashSet::new();
for (key, value) in params {
if let Some(classification) = classify_key(key) {
match classification {
KeyClass::StandaloneEncoded { base_name } => {
let decoded = decode_charset_value(value);
rfc2231_decoded.insert(result.len());
result.push(Some((base_name, decoded)));
}
KeyClass::Continuation {
base_name,
index,
encoded,
} => {
let lower = base_name.to_ascii_lowercase();
let group = continuations.iter_mut().find(|(name, _, _)| *name == lower);
if let Some((_, _, segments)) = group {
match segments.entry(index) {
Entry::Vacant(e) => {
e.insert((value.clone(), encoded));
}
Entry::Occupied(_) => {
tracing::warn!(
base_name = lower.as_str(),
index = index,
"RFC 2231 Section 3: duplicate continuation index {}, keeping first value",
index,
);
}
}
} else {
let insert_pos = result.len();
result.push(None);
let mut segments = BTreeMap::new();
segments.insert(index, (value.clone(), encoded));
continuations.push((lower, insert_pos, segments));
}
}
}
} else {
result.push(Some((key.clone(), value.clone())));
}
}
for (lower_name, insert_pos, segments) in continuations {
let Some((&0, _)) = segments.first_key_value() else {
continue;
};
let mut charset: Option<String> = None;
let mut raw_bytes = Vec::new();
let mut expected_idx: u32 = 0;
for (idx, (value, is_encoded)) in &segments {
if *idx != expected_idx {
tracing::warn!(
base_name = lower_name.as_str(),
expected = expected_idx,
actual = *idx,
"RFC 2231 Section 3: stopping continuation reassembly at gap \
(expected index {}, found {})",
expected_idx,
idx,
);
break;
}
expected_idx = idx + 1;
if *is_encoded && charset.is_none() {
let (cs, bytes) = split_charset_value(value);
charset = cs;
raw_bytes.extend_from_slice(&bytes);
} else if *is_encoded {
raw_bytes.extend_from_slice(&percent_decode(value));
} else {
raw_bytes.extend_from_slice(value.as_bytes());
}
}
let decoded = match &charset {
Some(cs) => decode_bytes_with_charset(cs, &raw_bytes),
None => String::from_utf8_lossy(&raw_bytes).into_owned(),
};
let original_base = find_original_base_name(params, &lower_name);
if charset.is_some() {
rfc2231_decoded.insert(insert_pos);
}
result[insert_pos] = Some((original_base, decoded));
}
let mut rfc2231_names: HashSet<String> = HashSet::new();
for &idx in &rfc2231_decoded {
if let Some(Some((key, _))) = result.get(idx) {
rfc2231_names.insert(key.to_ascii_lowercase());
}
}
if !rfc2231_names.is_empty() {
let mut new_result: Vec<Option<(String, String)>> = Vec::with_capacity(result.len());
let mut new_decoded: HashSet<usize> = HashSet::new();
for (i, entry) in result.into_iter().enumerate() {
let Some(entry) = entry else {
continue;
};
let dominated = !rfc2231_decoded.contains(&i)
&& rfc2231_names.contains(&entry.0.to_ascii_lowercase());
if !dominated {
if rfc2231_decoded.contains(&i) {
new_decoded.insert(new_result.len());
}
new_result.push(Some(entry));
}
}
result = new_result;
rfc2231_decoded = new_decoded;
}
for (i, entry) in result.iter_mut().enumerate() {
let Some((_key, value)) = entry.as_mut() else {
continue;
};
if rfc2231_decoded.contains(&i) {
continue;
}
if value.contains("=?") && value.contains("?=") {
*value = crate::codec::decode::decode_rfc2047(value.as_bytes());
}
}
result.into_iter().flatten().collect()
}
enum KeyClass {
StandaloneEncoded { base_name: String },
Continuation {
base_name: String,
index: u32,
encoded: bool,
},
}
fn classify_key(key: &str) -> Option<KeyClass> {
let star_pos = key.find('*')?;
let base_name = key[..star_pos].to_owned();
let suffix = &key[star_pos + 1..];
if suffix.is_empty() {
return Some(KeyClass::StandaloneEncoded { base_name });
}
let (digits, is_encoded) = if let Some(stripped) = suffix.strip_suffix('*') {
(stripped, true)
} else {
(suffix, false)
};
if digits.len() > 1 && digits.starts_with('0') {
return None;
}
let index: u32 = digits.parse().ok()?;
Some(KeyClass::Continuation {
base_name,
index,
encoded: is_encoded,
})
}
fn decode_charset_value(value: &str) -> String {
let (charset, bytes) = split_charset_value(value);
match charset {
Some(cs) => decode_bytes_with_charset(&cs, &bytes),
None => String::from_utf8_lossy(&bytes).into_owned(),
}
}
fn split_charset_value(value: &str) -> (Option<String>, Vec<u8>) {
let Some(first_quote) = value.find('\'') else {
return (None, value.as_bytes().to_vec());
};
let Some(offset) = value[first_quote + 1..].find('\'') else {
return (None, value.as_bytes().to_vec());
};
let second_quote = first_quote + 1 + offset;
let charset = &value[..first_quote];
let encoded_part = &value[second_quote + 1..];
let bytes = percent_decode(encoded_part);
let cs = if charset.is_empty() {
None
} else {
Some(charset.to_owned())
};
(cs, bytes)
}
fn percent_decode(input: &str) -> Vec<u8> {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
result.push((hi << 4) | lo);
i += 3;
continue;
}
}
result.push(bytes[i]);
i += 1;
}
result
}
fn hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'A'..=b'F' => Some(b - b'A' + 10),
b'a'..=b'f' => Some(b - b'a' + 10),
_ => None,
}
}
fn decode_bytes_with_charset(charset: &str, bytes: &[u8]) -> String {
let cs_lower = charset.to_ascii_lowercase();
if cs_lower == "utf-8" || cs_lower == "utf8" {
return String::from_utf8_lossy(bytes).into_owned();
}
match encoding_rs::Encoding::for_label(charset.as_bytes()) {
Some(encoding) => {
let (cow, _) = encoding.decode_without_bom_handling(bytes);
cow.into_owned()
}
None => {
String::from_utf8_lossy(bytes).into_owned()
}
}
}
fn find_original_base_name(params: &[(String, String)], lower_name: &str) -> String {
for (key, _) in params {
if let Some(star_pos) = key.find('*') {
let base = &key[..star_pos];
if base.eq_ignore_ascii_case(lower_name) {
return base.to_owned();
}
}
}
lower_name.to_owned()
}
#[cfg(test)]
#[path = "rfc2231_tests.rs"]
mod tests;