#![allow(dead_code)]
use std::borrow::Cow;
const MAX_ENCODED_WORD_LEN: usize = 75;
const PREFIX: &str = "=?UTF-8?B?";
const SUFFIX: &str = "?=";
const MAX_INPUT_LEN: usize = 1024 * 1024;
fn is_ascii_header_safe(s: &str) -> bool {
s.bytes().all(|b| b.is_ascii() && b >= 0x20 && b != 0x7f)
}
pub fn encode(s: &str) -> Result<Cow<'_, str>, EncodeError> {
if s.len() > MAX_INPUT_LEN {
return Err(EncodeError::InputTooLarge);
}
if is_ascii_header_safe(s) {
return Ok(Cow::Borrowed(s));
}
let overhead = PREFIX.len() + SUFFIX.len(); let max_encoded_text_len = MAX_ENCODED_WORD_LEN - overhead;
let max_input_bytes = max_encoded_text_len / 4 * 3;
let bytes = s.as_bytes();
let estimated_encoded_len = (bytes.len() * 4).div_ceil(3);
if estimated_encoded_len + overhead <= MAX_ENCODED_WORD_LEN {
let full_encoded = base64_simd::STANDARD.encode_to_string(bytes);
return Ok(Cow::Owned(format!("{PREFIX}{full_encoded}{SUFFIX}")));
}
let num_chunks = s.len().div_ceil(max_input_bytes);
let estimated_capacity = s.len() + (overhead + 1) * num_chunks; let mut result = String::with_capacity(estimated_capacity);
let mut i = 0;
while i < s.len() {
let mut end = usize::min(i + max_input_bytes, s.len());
while end > i && !s.is_char_boundary(end) {
end -= 1;
}
if end <= i {
return Err(EncodeError::InvalidUtf8Boundary);
}
let chunk = &s[i..end];
let encoded = base64_simd::STANDARD.encode_to_string(chunk.as_bytes());
if !result.is_empty() {
result.push(' ');
}
result.push_str(PREFIX);
result.push_str(&encoded);
result.push_str(SUFFIX);
i = end;
}
Ok(Cow::Owned(result))
}
pub fn decode(s: &str) -> Result<Cow<'_, str>, DecodeError> {
if s.len() > MAX_INPUT_LEN {
return Err(DecodeError::InputTooLarge);
}
let s = s.trim();
if !s.starts_with("=?") || !s.ends_with("?=") {
return Ok(Cow::Borrowed(s));
}
let encoded_word_count = s
.split_whitespace()
.filter(|p| p.starts_with("=?") && p.ends_with("?="))
.count();
let has_multiple_words = encoded_word_count > 1;
if has_multiple_words {
let mut result = Vec::with_capacity(s.len());
for part in s.split_whitespace() {
if part.is_empty() || !part.starts_with("=?") || !part.ends_with("?=") {
continue;
}
let decoded = decode_single_word(part)?;
result.extend_from_slice(decoded.as_bytes());
}
if result.is_empty() {
return Err(DecodeError::InvalidFormat);
}
return String::from_utf8(result)
.map(Cow::Owned)
.map_err(|_| DecodeError::InvalidUtf8);
}
decode_single_word(s).map(Cow::Owned)
}
fn decode_single_word(s: &str) -> Result<String, DecodeError> {
let s = s.trim();
if !s.starts_with("=?") || !s.ends_with("?=") {
return Err(DecodeError::InvalidFormat);
}
let inner = &s[2..s.len() - 2];
let mut parts = inner.splitn(3, '?');
let _charset = parts.next().ok_or(DecodeError::InvalidFormat)?;
let encoding = parts.next().ok_or(DecodeError::InvalidFormat)?;
let encoded_text = parts.next().ok_or(DecodeError::InvalidFormat)?;
let decoded_bytes = match encoding.to_ascii_uppercase().as_str() {
"B" => base64_simd::STANDARD
.decode_to_vec(encoded_text)
.map_err(|_| DecodeError::Base64Error)?,
"Q" => decode_quoted_printable(encoded_text)?,
_ => return Err(DecodeError::UnsupportedEncoding),
};
String::from_utf8(decoded_bytes).map_err(|_| DecodeError::InvalidUtf8)
}
fn decode_quoted_printable(s: &str) -> Result<Vec<u8>, DecodeError> {
let mut result = Vec::with_capacity(std::cmp::min(64, s.len()));
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
match c {
'=' => {
let h1 = chars.next().ok_or(DecodeError::InvalidFormat)?;
let h2 = chars.next().ok_or(DecodeError::InvalidFormat)?;
#[allow(clippy::cast_possible_truncation)]
let high = h1.to_digit(16).ok_or(DecodeError::InvalidHex)? as u8;
#[allow(clippy::cast_possible_truncation)]
let low = h2.to_digit(16).ok_or(DecodeError::InvalidHex)? as u8;
let byte = (high << 4) | low;
result.push(byte);
}
'_' => {
result.push(b' ');
}
c if c.is_ascii() => {
#[allow(clippy::cast_possible_truncation)]
result.push(c as u8);
}
_ => {
return Err(DecodeError::InvalidFormat);
}
}
}
Ok(result)
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum EncodeError {
#[error("input string too large (max {MAX_INPUT_LEN} bytes)")]
InputTooLarge,
#[error("failed to find valid UTF-8 character boundary")]
InvalidUtf8Boundary,
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum DecodeError {
#[error("input string too large (max {MAX_INPUT_LEN} bytes)")]
InputTooLarge,
#[error("invalid RFC 2047 encoded-word format")]
InvalidFormat,
#[error("base64 decoding failed")]
Base64Error,
#[error("invalid hex in quoted-printable encoding")]
InvalidHex,
#[error("decoded bytes are not valid UTF-8")]
InvalidUtf8,
#[error("unsupported encoding type")]
UnsupportedEncoding,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_ascii() {
let input = "hello world";
let encoded = encode(input).unwrap();
assert_eq!(encoded, "hello world");
}
#[test]
fn test_encode_non_ascii() {
let input = "你好世界";
let encoded = encode(input).unwrap();
assert!(encoded.starts_with("=?UTF-8?B?"));
assert!(encoded.ends_with("?="));
}
#[test]
fn test_encode_control_characters() {
let input = "hello\x00world";
let encoded = encode(input).unwrap();
assert!(encoded.starts_with("=?UTF-8?B?"));
}
#[test]
fn test_encode_del_character() {
let input = "hello\x7fworld";
let encoded = encode(input).unwrap();
assert!(encoded.starts_with("=?UTF-8?B?"));
}
#[test]
fn test_encode_empty_string() {
let input = "";
let encoded = encode(input).unwrap();
assert_eq!(encoded, "");
}
#[test]
fn test_encode_mixed_content() {
let input = "Hello 世界";
let encoded = encode(input).unwrap();
assert!(encoded.starts_with("=?UTF-8?B?"));
}
#[test]
fn test_encode_respects_75_char_limit() {
let input = "这是一个非常长的中文字符串,用于测试RFC2047的75字符限制功能是否正常工作";
let encoded = encode(input).unwrap();
for word in encoded.split(' ') {
assert!(word.len() <= 75, "Encoded word exceeds 75 characters: {} (len={})", word, word.len());
}
}
#[test]
fn test_encode_short_string_single_word() {
let input = "你好";
let encoded = encode(input).unwrap();
assert!(!encoded.contains(' ')); assert!(encoded.len() <= 75);
}
#[test]
fn test_encode_long_string_multiple_words() {
let input = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん";
let encoded = encode(input).unwrap();
let word_count = encoded.split(' ').count();
assert!(word_count > 1, "Long string should be split into multiple words");
for word in encoded.split(' ') {
assert!(word.starts_with("=?UTF-8?B?"));
assert!(word.ends_with("?="));
assert!(word.len() <= 75);
}
}
#[test]
fn test_roundtrip_long_string() {
let original = "这是一个非常长的中文字符串,用于测试RFC2047的75字符限制功能是否正常工作,包括多个编码字的拆分和合并";
let encoded = encode(original).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn test_encode_ascii_returns_borrowed() {
let input = "hello world";
let encoded = encode(input).unwrap();
assert!(matches!(encoded, Cow::Borrowed(_)));
}
#[test]
fn test_encode_input_too_large() {
let large_input = "a".repeat(MAX_INPUT_LEN + 1);
let result = encode(&large_input);
assert_eq!(result, Err(EncodeError::InputTooLarge));
}
#[test]
fn test_decode_plain() {
let input = "hello world";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "hello world");
}
#[test]
fn test_decode_input_too_large() {
let large_input = "a".repeat(MAX_INPUT_LEN + 1);
let result = decode(&large_input);
assert_eq!(result, Err(DecodeError::InputTooLarge));
}
#[test]
fn test_decode_plain_returns_borrowed() {
let input = "hello world";
let decoded = decode(input).unwrap();
assert!(matches!(decoded, Cow::Borrowed(_)));
}
#[test]
fn test_decode_single_word_with_question_equals_in_content() {
let input = "=?UTF-8?B?dGVzdD89dGVzdA==?="; let decoded = decode(input).unwrap();
assert_eq!(decoded, "test?=test");
}
#[test]
fn test_decode_base64() {
let input = "=?UTF-8?B?5L2g5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_quoted_printable() {
let input = "=?UTF-8?Q?caf=C3=A9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "café");
}
#[test]
fn test_decode_underscore_as_space() {
let input = "=?UTF-8?Q?hello_world?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "hello world");
}
#[test]
fn test_decode_lowercase_encoding() {
let input = "=?utf-8?b?5L2g5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_lowercase_q_encoding() {
let input = "=?UTF-8?q?hello_world?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "hello world");
}
#[test]
fn test_decode_multiple_encoded_words() {
let input = "=?UTF-8?B?5L2g?= =?UTF-8?B?5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_multiple_encoded_words_with_tabs() {
let input = "=?UTF-8?B?5L2g?=\t=?UTF-8?B?5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_multiple_encoded_words_with_mixed_whitespace() {
let input = "=?UTF-8?B?5L2g?= \t =?UTF-8?B?5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_malformed_multiple_words_no_valid_parts() {
let input = "=?test ?= another ?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_with_whitespace_trim() {
let input = " =?UTF-8?B?5L2g5aW9?= ";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_utf8_charset_variant() {
let input = "=?UTF8?B?5L2g5aW9?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "你好");
}
#[test]
fn test_decode_other_charset_as_utf8() {
let input = "=?ISO-8859-1?B?SGVsbG8=?="; let decoded = decode(input).unwrap();
assert_eq!(decoded, "Hello");
}
#[test]
fn test_roundtrip() {
let original = "Hello 世界 🌍";
let encoded = encode(original).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn test_roundtrip_ascii() {
let original = "plain ascii text";
let encoded = encode(original).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn test_roundtrip_emoji() {
let original = "🎉🎊🎁";
let encoded = encode(original).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn test_decode_invalid_base64() {
let input = "=?UTF-8?B?!!!?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::Base64Error));
}
#[test]
fn test_decode_unsupported_encoding() {
let input = "=?UTF-8?X?dGVzdA==?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::UnsupportedEncoding));
}
#[test]
fn test_decode_missing_encoding_part() {
let input = "=?UTF-8?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_missing_encoded_text() {
let input = "=?UTF-8?B?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_qp_non_ascii_rejected() {
let input = "=?UTF-8?Q?café?="; let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_qp_incomplete_hex_one_char() {
let input = "=?UTF-8?Q?test=A?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_qp_incomplete_hex_no_chars() {
let input = "=?UTF-8?Q?test=?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidFormat));
}
#[test]
fn test_decode_qp_invalid_hex() {
let input = "=?UTF-8?Q?test=GG?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidHex));
}
#[test]
fn test_decode_invalid_utf8_bytes() {
let input = "=?UTF-8?B?//4=?=";
let result = decode(input);
assert_eq!(result, Err(DecodeError::InvalidUtf8));
}
#[test]
fn test_decode_not_starting_with_marker() {
let input = "hello?=";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "hello?=");
}
#[test]
fn test_decode_not_ending_with_marker() {
let input = "=?hello";
let decoded = decode(input).unwrap();
assert_eq!(decoded, "=?hello");
}
#[test]
fn test_decode_error_display_invalid_format() {
let err = DecodeError::InvalidFormat;
assert_eq!(err.to_string(), "invalid RFC 2047 encoded-word format");
}
#[test]
fn test_decode_error_display_base64_error() {
let err = DecodeError::Base64Error;
assert_eq!(err.to_string(), "base64 decoding failed");
}
#[test]
fn test_decode_error_display_invalid_hex() {
let err = DecodeError::InvalidHex;
assert_eq!(err.to_string(), "invalid hex in quoted-printable encoding");
}
#[test]
fn test_decode_error_display_invalid_utf8() {
let err = DecodeError::InvalidUtf8;
assert_eq!(err.to_string(), "decoded bytes are not valid UTF-8");
}
#[test]
fn test_decode_error_display_unsupported_encoding() {
let err = DecodeError::UnsupportedEncoding;
assert_eq!(err.to_string(), "unsupported encoding type");
}
#[test]
fn test_decode_error_display_input_too_large() {
let err = DecodeError::InputTooLarge;
assert_eq!(err.to_string(), "input string too large (max 1048576 bytes)");
}
#[test]
fn test_decode_error_is_error() {
let err: &dyn std::error::Error = &DecodeError::InvalidFormat;
assert!(err.source().is_none());
}
#[test]
fn test_decode_error_debug() {
let err = DecodeError::InvalidFormat;
assert_eq!(format!("{err:?}"), "InvalidFormat");
}
#[test]
fn test_decode_error_clone() {
let err = DecodeError::Base64Error;
let cloned = err.clone();
assert_eq!(err, cloned);
}
#[test]
fn test_decode_error_eq() {
assert_eq!(DecodeError::InvalidFormat, DecodeError::InvalidFormat);
assert_ne!(DecodeError::InvalidFormat, DecodeError::Base64Error);
}
#[test]
fn test_encode_error_display_input_too_large() {
let err = EncodeError::InputTooLarge;
assert_eq!(err.to_string(), "input string too large (max 1048576 bytes)");
}
#[test]
fn test_encode_error_display_invalid_utf8_boundary() {
let err = EncodeError::InvalidUtf8Boundary;
assert_eq!(err.to_string(), "failed to find valid UTF-8 character boundary");
}
#[test]
fn test_encode_error_is_error() {
let err: &dyn std::error::Error = &EncodeError::InputTooLarge;
assert!(err.source().is_none());
}
#[test]
fn test_encode_error_debug() {
let err = EncodeError::InputTooLarge;
assert_eq!(format!("{err:?}"), "InputTooLarge");
}
#[test]
fn test_encode_error_clone() {
let err = EncodeError::InvalidUtf8Boundary;
let cloned = err.clone();
assert_eq!(err, cloned);
}
#[test]
fn test_encode_error_eq() {
assert_eq!(EncodeError::InputTooLarge, EncodeError::InputTooLarge);
assert_ne!(EncodeError::InputTooLarge, EncodeError::InvalidUtf8Boundary);
}
}