use base64::alphabet::Alphabet;
use base64::engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig};
use base64::engine::DecodePaddingMode;
use base64::Engine;
fn imap_b64_engine() -> GeneralPurpose {
let alphabet =
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,");
#[allow(clippy::expect_used)]
let alphabet = alphabet.expect("IMAP modified Base64 alphabet is valid");
let config = GeneralPurposeConfig::new()
.with_encode_padding(false)
.with_decode_padding_mode(DecodePaddingMode::Indifferent)
.with_decode_allow_trailing_bits(true);
GeneralPurpose::new(&alphabet, config)
}
pub fn encode_utf7(input: &str) -> String {
let engine = imap_b64_engine();
let mut out = String::with_capacity(input.len());
let mut utf16_buf: Vec<u8> = Vec::new();
for ch in input.chars() {
if ch == '&' {
flush_utf16(&engine, &mut utf16_buf, &mut out);
out.push_str("&-");
} else if ch.is_ascii() && (0x20..=0x7E).contains(&(ch as u32)) {
flush_utf16(&engine, &mut utf16_buf, &mut out);
out.push(ch);
} else {
let mut u16_buf = [0u16; 2];
let encoded = ch.encode_utf16(&mut u16_buf);
for code_unit in encoded.iter() {
utf16_buf.extend_from_slice(&code_unit.to_be_bytes());
}
}
}
flush_utf16(&engine, &mut utf16_buf, &mut out);
out
}
fn flush_utf16(engine: &GeneralPurpose, utf16_buf: &mut Vec<u8>, out: &mut String) {
if utf16_buf.is_empty() {
return;
}
out.push('&');
out.push_str(&engine.encode(&utf16_buf));
out.push('-');
utf16_buf.clear();
}
pub fn decode_utf7(input: &[u8]) -> String {
let engine = imap_b64_engine();
let mut out = String::with_capacity(input.len());
let mut i = 0;
while i < input.len() {
if input[i] == b'&' {
i += 1;
if i < input.len() && input[i] == b'-' {
out.push('&');
i += 1;
} else {
let start = i;
while i < input.len() && input[i] != b'-' {
i += 1;
}
let b64_slice = &input[start..i];
if i < input.len() {
i += 1; }
if let Ok(utf16_bytes) = engine.decode(b64_slice) {
let mut temp = String::new();
decode_utf16be(&utf16_bytes, &mut temp);
let has_printable_ascii = !temp.is_empty()
&& temp.chars().any(|ch| {
ch.is_ascii() && (0x20u32..=0x7Eu32).contains(&(ch as u32))
});
if has_printable_ascii {
tracing::warn!(
"Modified UTF-7: Base64 segment encodes printable ASCII \
which MUST NOT be Base64-encoded per RFC 3501 Section 5.1.3"
);
out.push('&');
out.push_str(&String::from_utf8_lossy(b64_slice));
out.push('-');
} else {
out.push_str(&temp);
}
} else {
out.push('&');
out.push_str(&String::from_utf8_lossy(b64_slice));
out.push('-');
}
}
} else if input[i] >= 0x80 {
let start = i;
while i < input.len() && input[i] >= 0x80 {
i += 1;
}
let raw = &input[start..i];
match std::str::from_utf8(raw) {
Ok(s) => out.push_str(s),
Err(_) => {
out.push_str(&String::from_utf8_lossy(raw));
}
}
} else if input[i] >= 0x20 && input[i] <= 0x7E {
out.push(input[i] as char);
i += 1;
} else {
out.push('\u{FFFD}');
i += 1;
}
}
out
}
fn decode_utf16be(bytes: &[u8], out: &mut String) {
let has_trailing = bytes.len() % 2 != 0;
for result in char::decode_utf16(bytes.chunks(2).filter_map(|chunk| {
if chunk.len() == 2 {
Some(u16::from_be_bytes([chunk[0], chunk[1]]))
} else {
None
}
})) {
match result {
Ok(ch) => out.push(ch),
Err(_) => out.push('\u{FFFD}'),
}
}
if has_trailing {
out.push('\u{FFFD}');
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn ascii_passthrough() {
assert_eq!(encode_utf7("INBOX"), "INBOX");
assert_eq!(decode_utf7(b"INBOX"), "INBOX");
}
#[test]
fn ampersand_encoding() {
assert_eq!(encode_utf7("A&B"), "A&-B");
assert_eq!(decode_utf7(b"A&-B"), "A&B");
}
#[test]
fn non_ascii_roundtrip() {
let names = [
"Langstrumpf", "Strstrumpf", "Français", "日本語", "Strstrumpf/Langstrumpf", "Папка", "مجلد", "フォルダ", ];
for name in &names {
let encoded = encode_utf7(name);
let decoded = decode_utf7(encoded.as_bytes());
assert_eq!(
&decoded, name,
"round-trip failed for {name:?}: encoded={encoded:?}"
);
}
}
#[test]
fn known_encodings() {
assert_eq!(encode_utf7("Langstrumpf"), "Langstrumpf");
let encoded = encode_utf7("Français");
assert!(encoded.starts_with("Fran"));
assert!(encoded.contains('&'));
assert_eq!(decode_utf7(encoded.as_bytes()), "Français");
}
#[test]
fn mixed_ascii_non_ascii() {
let name = "INBOX/日本語/test";
let encoded = encode_utf7(name);
let decoded = decode_utf7(encoded.as_bytes());
assert_eq!(decoded, name);
}
#[test]
fn supplementary_plane() {
let name = "Emoji\u{1F4E7}Folder"; let encoded = encode_utf7(name);
let decoded = decode_utf7(encoded.as_bytes());
assert_eq!(decoded, name);
}
#[test]
fn empty_string() {
assert_eq!(encode_utf7(""), "");
assert_eq!(decode_utf7(b""), "");
}
#[test]
fn multiple_ampersands() {
assert_eq!(encode_utf7("A&B&C"), "A&-B&-C");
assert_eq!(decode_utf7(b"A&-B&-C"), "A&B&C");
}
#[test]
fn malformed_base64_graceful() {
let result = decode_utf7(b"&INVALID");
assert!(!result.is_empty());
}
#[test]
fn non_ascii_passthrough_outside_base64() {
let result = decode_utf7(&[b'A', 0xC3, 0xA9, b'B']);
assert!(result.contains('A'));
assert!(result.contains('B'));
}
#[test]
fn unterminated_base64_segment() {
let result = decode_utf7(b"test&AE4");
assert!(result.starts_with("test"));
}
#[test]
fn spec_audit_raw_utf8_outside_base64() {
let input = b"INBOX/caf\xc3\xa9";
let result = decode_utf7(input);
assert_eq!(
result, "INBOX/café",
"raw UTF-8 bytes should be decoded as UTF-8, not Latin-1"
);
}
#[test]
fn spec_audit_raw_utf8_cjk_outside_base64() {
let input = b"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e";
let result = decode_utf7(input);
assert_eq!(
result, "日本語",
"raw UTF-8 CJK should be decoded correctly"
);
}
#[test]
fn rejects_base64_encoded_printable_ascii() {
let result = decode_utf7(b"&AEE-");
assert_ne!(
result, "A",
"Decoder should not silently accept Base64-encoded printable ASCII"
);
}
#[test]
fn spec_audit_utf16be_trailing_odd_byte_produces_replacement() {
let mut out = String::new();
decode_utf16be(&[0x4E, 0x16, 0xFF], &mut out);
assert_eq!(
out, "世\u{FFFD}",
"orphan trailing byte must produce U+FFFD, not be silently dropped"
);
let result = decode_utf7(b"&Thb,-");
assert!(
result.contains('\u{FFFD}'),
"decode_utf7 must emit U+FFFD for orphan byte in Base64 segment"
);
assert!(
result.contains('世'),
"valid code unit before orphan byte must still decode"
);
}
#[test]
fn regression_mixed_ascii_nonascii_base64_segment() {
let result = decode_utf7(b"&AEFl5Q-");
assert_ne!(
result, "A\u{65E5}",
"Decoder must not silently accept Base64-encoded printable ASCII \
mixed with non-ASCII (RFC 3501 Section 5.1.3)"
);
assert_eq!(
result, "&AEFl5Q-",
"Decoder should emit raw fallback for non-conformant segment"
);
}
#[test]
fn regression_nonzero_trailing_bits_accepted() {
assert_eq!(
decode_utf7(b"&AOn-"),
"\u{00E9}",
"Non-zero trailing bits in Base64 must be accepted (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn regression_base64_padding_tolerated() {
assert_eq!(
decode_utf7(b"&AOk=-"),
"\u{00E9}",
"Base64 padding must be tolerated (RFC 3501 Section 5.1.3, Postel's law)"
);
}
#[test]
fn rejects_base64_encoded_ampersand() {
let result = decode_utf7(b"&ACY-");
assert_ne!(
result, "&",
"Decoder must not silently accept Base64-encoded '&' (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn regression_control_chars_replaced() {
let input = b"\x00hello\x07world\x7F";
let result = decode_utf7(input);
assert!(
!result.contains('\0'),
"NUL (0x00) must not pass through verbatim (RFC 3501 Section 5.1.3)"
);
assert!(
!result.contains('\x07'),
"BEL (0x07) must not pass through verbatim (RFC 3501 Section 5.1.3)"
);
assert!(
!result.contains('\x7F'),
"DEL (0x7F) must not pass through verbatim (RFC 3501 Section 5.1.3)"
);
assert!(
result.contains("hello"),
"printable ASCII must be preserved"
);
assert!(
result.contains("world"),
"printable ASCII must be preserved"
);
assert!(
result.contains('\u{FFFD}'),
"control characters must be replaced with U+FFFD"
);
}
#[test]
fn invalid_base64_in_shift_falls_back_to_raw() {
let result = decode_utf7(b"test&!!!-end");
assert_eq!(
result, "test&!!!-end",
"Invalid Base64 within shift must emit raw fallback (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn invalid_base64_chars_in_shift_falls_back() {
let result = decode_utf7(b"&@#$-");
assert_eq!(
result, "&@#$-",
"Base64 with invalid alphabet chars must produce raw fallback"
);
}
#[test]
fn invalid_utf8_high_bytes_outside_base64() {
let result = decode_utf7(&[b'A', 0xFF, 0xFE, b'B']);
assert!(
result.starts_with('A'),
"printable ASCII before invalid bytes must be preserved"
);
assert!(
result.ends_with('B'),
"printable ASCII after invalid bytes must be preserved"
);
assert!(
result.contains('\u{FFFD}'),
"invalid UTF-8 high bytes must produce U+FFFD (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn lone_continuation_byte_outside_base64() {
let result = decode_utf7(&[0x80]);
assert_eq!(
result, "\u{FFFD}",
"lone continuation byte must produce U+FFFD"
);
}
#[test]
fn unpaired_high_surrogate_produces_replacement() {
let mut out = String::new();
decode_utf16be(&[0xD8, 0x00], &mut out);
assert_eq!(
out, "\u{FFFD}",
"unpaired high surrogate must produce U+FFFD (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn unpaired_low_surrogate_produces_replacement() {
let mut out = String::new();
decode_utf16be(&[0xDC, 0x00], &mut out);
assert_eq!(
out, "\u{FFFD}",
"unpaired low surrogate must produce U+FFFD (RFC 3501 Section 5.1.3)"
);
}
#[test]
fn two_high_surrogates_produce_two_replacements() {
let mut out = String::new();
decode_utf16be(&[0xD8, 0x00, 0xD8, 0x00], &mut out);
assert_eq!(
out, "\u{FFFD}\u{FFFD}",
"two unpaired high surrogates must each produce U+FFFD"
);
}
#[test]
fn unpaired_surrogate_in_base64_segment_produces_replacement() {
let engine = imap_b64_engine();
let encoded_b64 = engine.encode([0xD8, 0x00]);
let mut input = Vec::new();
input.push(b'&');
input.extend_from_slice(encoded_b64.as_bytes());
input.push(b'-');
let result = decode_utf7(&input);
assert!(
result.contains('\u{FFFD}'),
"unpaired surrogate in Base64 segment must produce U+FFFD (RFC 3501 Section 5.1.3)"
);
}
}