daaki-imap 0.1.0

//! Modified UTF-7 encoding for IMAP mailbox names.
//!
//! RFC 3501 Section 5.1.3 defines a modified UTF-7 encoding for mailbox names.
//! Printable US-ASCII characters (0x20-0x7E) except `&` are sent as-is.
//! All other characters are encoded in modified Base64 (shifted with `&`…`-`).
//! The `&` character itself is represented as `&-`.
//!
//! The modified Base64 alphabet replaces `/` with `,` and omits padding `=`.
//!
//! When the server advertises `UTF8=ACCEPT` (RFC 6855) and the client has
//! enabled it, mailbox names are sent as raw UTF-8 instead.

use base64::alphabet::Alphabet;
use base64::engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig};
use base64::engine::DecodePaddingMode;
use base64::Engine;

/// Build the modified Base64 engine for IMAP UTF-7 (RFC 3501 Section 5.1.3).
///
/// The alphabet is standard Base64 with `/` replaced by `,`, and no padding.
///
/// The decoder is configured to:
/// - Accept non-zero trailing bits (RFC 3501 Section 5.1.3 does not require
///   zero-padding of unused bits, and non-conformant encoders may set them).
/// - Tolerate padding characters (`=`) from non-conformant servers, even
///   though the spec says padding is omitted (Postel's law).
fn imap_b64_engine() -> GeneralPurpose {
    // This alphabet is a compile-time constant string; Alphabet::new only
    // fails if the string is not exactly 64 unique ASCII bytes, which is
    // provably correct here.
    let alphabet =
        Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,");
    // SAFETY (logic, not `unsafe`): the alphabet literal above is valid by
    // construction — 64 unique printable ASCII chars. This `expect` cannot
    // fail at runtime.
    #[allow(clippy::expect_used)]
    let alphabet = alphabet.expect("IMAP modified Base64 alphabet is valid");
    // RFC 3501 Section 5.1.3: no padding on encode, but tolerate padding
    // and non-zero trailing bits on decode for robustness.
    let config = GeneralPurposeConfig::new()
        .with_encode_padding(false)
        .with_decode_padding_mode(DecodePaddingMode::Indifferent)
        .with_decode_allow_trailing_bits(true);
    GeneralPurpose::new(&alphabet, config)
}

/// Encode a UTF-8 mailbox name into modified UTF-7 per RFC 3501 Section 5.1.3.
///
/// Printable US-ASCII (0x20..=0x7E) except `&` passes through unchanged.
/// `&` is encoded as `&-`. Non-ASCII runs are converted to UTF-16BE,
/// then encoded with the modified Base64 alphabet and wrapped in `&`…`-`.
pub fn encode_utf7(input: &str) -> String {
    let engine = imap_b64_engine();

    let mut out = String::with_capacity(input.len());
    let mut utf16_buf: Vec<u8> = Vec::new();

    for ch in input.chars() {
        if ch == '&' {
            // Flush any pending non-ASCII run first.
            flush_utf16(&engine, &mut utf16_buf, &mut out);
            // RFC 3501 Section 5.1.3: `&` is encoded as `&-`.
            out.push_str("&-");
        } else if ch.is_ascii() && (0x20..=0x7E).contains(&(ch as u32)) {
            // Printable US-ASCII — send as-is.
            flush_utf16(&engine, &mut utf16_buf, &mut out);
            out.push(ch);
        } else {
            // Non-ASCII or control character — accumulate UTF-16BE bytes.
            let mut u16_buf = [0u16; 2];
            let encoded = ch.encode_utf16(&mut u16_buf);
            for code_unit in encoded.iter() {
                utf16_buf.extend_from_slice(&code_unit.to_be_bytes());
            }
        }
    }

    // Flush any trailing non-ASCII run.
    flush_utf16(&engine, &mut utf16_buf, &mut out);

    out
}

/// Flush accumulated UTF-16BE bytes as a modified Base64 `&`…`-` segment
/// (RFC 3501 Section 5.1.3).
fn flush_utf16(engine: &GeneralPurpose, utf16_buf: &mut Vec<u8>, out: &mut String) {
    if utf16_buf.is_empty() {
        return;
    }
    out.push('&');
    out.push_str(&engine.encode(&utf16_buf));
    out.push('-');
    utf16_buf.clear();
}

/// Decode an IMAP modified UTF-7 mailbox name into a UTF-8 string
/// per RFC 3501 Section 5.1.3.
///
/// Returns the decoded string, or the input lossily converted if decoding fails.
pub fn decode_utf7(input: &[u8]) -> String {
    let engine = imap_b64_engine();

    let mut out = String::with_capacity(input.len());
    let mut i = 0;

    while i < input.len() {
        if input[i] == b'&' {
            i += 1;
            if i < input.len() && input[i] == b'-' {
                // `&-` → literal `&`
                out.push('&');
                i += 1;
            } else {
                // Find the closing `-`.
                let start = i;
                while i < input.len() && input[i] != b'-' {
                    i += 1;
                }
                // Decode the modified Base64 segment.
                let b64_slice = &input[start..i];
                if i < input.len() {
                    i += 1; // skip the `-`
                }
                if let Ok(utf16_bytes) = engine.decode(b64_slice) {
                    // Validate: decoded chars must NOT contain ANY printable ASCII,
                    // including '&' which has its own dedicated encoding (`&-`).
                    // RFC 3501 Section 5.1.3: "Modified BASE64 MUST NOT be used to
                    // represent any printing US-ASCII character which can represent
                    // itself."
                    let mut temp = String::new();
                    decode_utf16be(&utf16_bytes, &mut temp);
                    let has_printable_ascii = !temp.is_empty()
                        && temp.chars().any(|ch| {
                            // RFC 3501 Section 5.1.3: '&' (U+0026) is printable ASCII
                            // and MUST be encoded as `&-`, not via Base64.
                            ch.is_ascii() && (0x20u32..=0x7Eu32).contains(&(ch as u32))
                        });
                    if has_printable_ascii {
                        // Malformed: emit raw fallback instead of decoded ASCII.
                        // RFC 3501 Section 5.1.3 violation by the sender.
                        tracing::warn!(
                            "Modified UTF-7: Base64 segment encodes printable ASCII \
                             which MUST NOT be Base64-encoded per RFC 3501 Section 5.1.3"
                        );
                        out.push('&');
                        out.push_str(&String::from_utf8_lossy(b64_slice));
                        out.push('-');
                    } else {
                        out.push_str(&temp);
                    }
                } else {
                    // Malformed — emit the raw bytes lossily.
                    out.push('&');
                    out.push_str(&String::from_utf8_lossy(b64_slice));
                    out.push('-');
                }
            }
        } else if input[i] >= 0x80 {
            // Non-ASCII byte outside a Base64 segment.
            // RFC 3501 §5.1.3: bytes 0x80-0xFF should be Base64-encoded,
            // but non-conformant servers (e.g. Exchange) send raw UTF-8.
            // Try to decode as UTF-8 per Postel's law.
            let start = i;
            // Gather the full run of high bytes (potential multi-byte UTF-8).
            while i < input.len() && input[i] >= 0x80 {
                i += 1;
            }
            let raw = &input[start..i];
            match std::str::from_utf8(raw) {
                Ok(s) => out.push_str(s),
                Err(_) => {
                    // Not valid UTF-8 — fall back to lossy conversion.
                    out.push_str(&String::from_utf8_lossy(raw));
                }
            }
        } else if input[i] >= 0x20 && input[i] <= 0x7E {
            // Printable ASCII (0x20-0x7E) — pass through (RFC 3501 Section 5.1.3).
            out.push(input[i] as char);
            i += 1;
        } else {
            // Control characters (0x00-0x1F, 0x7F) are not printable US-ASCII
            // per RFC 3501 Section 5.1.3. Replace with U+FFFD.
            out.push('\u{FFFD}');
            i += 1;
        }
    }

    out
}

/// Decode UTF-16BE byte pairs into UTF-8 characters appended to `out`
/// (RFC 3501 Section 5.1.3).
fn decode_utf16be(bytes: &[u8], out: &mut String) {
    // Track whether there's a trailing odd byte.
    let has_trailing = bytes.len() % 2 != 0;

    // Each code unit is 2 bytes big-endian.
    for result in char::decode_utf16(bytes.chunks(2).filter_map(|chunk| {
        if chunk.len() == 2 {
            Some(u16::from_be_bytes([chunk[0], chunk[1]]))
        } else {
            None
        }
    })) {
        match result {
            Ok(ch) => out.push(ch),
            Err(_) => out.push('\u{FFFD}'),
        }
    }

    // RFC 3501 Section 5.1.3: UTF-16BE requires byte pairs.
    // An orphaned trailing byte indicates corruption — emit U+FFFD.
    if has_trailing {
        out.push('\u{FFFD}');
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;

    /// RFC 3501 Section 5.1.3 example: plain ASCII mailbox.
    #[test]
    fn ascii_passthrough() {
        assert_eq!(encode_utf7("INBOX"), "INBOX");
        assert_eq!(decode_utf7(b"INBOX"), "INBOX");
    }

    /// RFC 3501 Section 5.1.3: `&` encodes as `&-`.
    #[test]
    fn ampersand_encoding() {
        assert_eq!(encode_utf7("A&B"), "A&-B");
        assert_eq!(decode_utf7(b"A&-B"), "A&B");
    }

    /// Round-trip for non-ASCII mailbox names.
    #[test]
    fn non_ascii_roundtrip() {
        let names = [
            "Langstrumpf",            // ASCII only
            "Strstrumpf",             // ASCII only
            "Français",               // Latin-1 supplement
            "日本語",                 // CJK
            "Strstrumpf/Langstrumpf", // With slash
            "Папка",                  // Cyrillic
            "مجلد",                   // Arabic
            "フォルダ",               // Katakana
        ];

        for name in &names {
            let encoded = encode_utf7(name);
            let decoded = decode_utf7(encoded.as_bytes());
            assert_eq!(
                &decoded, name,
                "round-trip failed for {name:?}: encoded={encoded:?}"
            );
        }
    }

    /// Verify specific known encodings.
    #[test]
    fn known_encodings() {
        // "Langstrumpf" is pure ASCII
        assert_eq!(encode_utf7("Langstrumpf"), "Langstrumpf");

        // "Français" — the `ç` (U+00E7) is non-ASCII
        let encoded = encode_utf7("Français");
        assert!(encoded.starts_with("Fran"));
        assert!(encoded.contains('&'));
        assert_eq!(decode_utf7(encoded.as_bytes()), "Français");
    }

    /// Mixed ASCII and non-ASCII in same name.
    #[test]
    fn mixed_ascii_non_ascii() {
        let name = "INBOX/日本語/test";
        let encoded = encode_utf7(name);
        let decoded = decode_utf7(encoded.as_bytes());
        assert_eq!(decoded, name);
    }

    /// Supplementary plane characters (surrogate pairs in UTF-16).
    #[test]
    fn supplementary_plane() {
        let name = "Emoji\u{1F4E7}Folder"; // 📧
        let encoded = encode_utf7(name);
        let decoded = decode_utf7(encoded.as_bytes());
        assert_eq!(decoded, name);
    }

    /// Empty string.
    #[test]
    fn empty_string() {
        assert_eq!(encode_utf7(""), "");
        assert_eq!(decode_utf7(b""), "");
    }

    /// Multiple `&` characters.
    #[test]
    fn multiple_ampersands() {
        assert_eq!(encode_utf7("A&B&C"), "A&-B&-C");
        assert_eq!(decode_utf7(b"A&-B&-C"), "A&B&C");
    }

    /// Malformed modified Base64 is handled gracefully.
    #[test]
    fn malformed_base64_graceful() {
        // Unterminated shift sequence — no closing `-`
        let result = decode_utf7(b"&INVALID");
        // Should not panic; produces some output.
        assert!(!result.is_empty());
    }

    // L8: Decoder passes through non-ASCII bytes outside Base64 segments.
    // RFC 3501 Section 5.1.3 says bytes 0x80-0xFF should be Base64-encoded,
    // but as a client library we accept them gracefully (Postel's law).
    #[test]
    fn non_ascii_passthrough_outside_base64() {
        // 0xC3 0xA9 is UTF-8 for 'é' — not valid in modified UTF-7 but
        // should be accepted gracefully by the decoder.
        let result = decode_utf7(&[b'A', 0xC3, 0xA9, b'B']);
        assert!(result.contains('A'));
        assert!(result.contains('B'));
        // Should not panic or error
    }

    // L9: Unterminated Base64 segment (missing trailing '-').
    // RFC 3501 Section 5.1.3 requires '-' at the end but we accept
    // gracefully for robustness against non-conformant servers.
    #[test]
    fn unterminated_base64_segment() {
        // &- is the encoding of '&'. &AE4- is '日'. Without trailing '-':
        let result = decode_utf7(b"test&AE4");
        assert!(result.starts_with("test"));
        // Should not panic
    }

    /// RFC 3501 §5.1.3 / Postel's law: non-conformant servers send raw UTF-8
    /// bytes outside Base64 segments. The decoder should interpret consecutive
    /// high bytes as UTF-8 rather than treating each byte as Latin-1.
    #[test]
    fn spec_audit_raw_utf8_outside_base64() {
        // 0xC3 0xA9 is UTF-8 for 'é' (U+00E9).
        // A non-conformant server might send: INBOX/café
        let input = b"INBOX/caf\xc3\xa9";
        let result = decode_utf7(input);
        assert_eq!(
            result, "INBOX/café",
            "raw UTF-8 bytes should be decoded as UTF-8, not Latin-1"
        );
    }

    #[test]
    fn spec_audit_raw_utf8_cjk_outside_base64() {
        // 0xE6 0x97 0xA5 0xE6 0x9C 0xAC 0xE8 0xAA 0x9E is UTF-8 for '日本語'.
        let input = b"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e";
        let result = decode_utf7(input);
        assert_eq!(
            result, "日本語",
            "raw UTF-8 CJK should be decoded correctly"
        );
    }

    /// RFC 3501 Section 5.1.3: "Modified BASE64 MUST NOT be used to represent
    /// any printing US-ASCII character which can represent itself."
    /// `&AEE-` encodes U+0041 ('A') which is printable ASCII — the decoder
    /// must not silently accept this as valid.
    #[test]
    fn rejects_base64_encoded_printable_ascii() {
        // &AEE- encodes U+0041 ('A') which is printable ASCII.
        let result = decode_utf7(b"&AEE-");
        // The decoded value must NOT silently produce "A" — it should indicate
        // the malformed encoding. The decoder detects printable ASCII in the
        // Base64 segment and emits the raw fallback ("&AEE-") instead.
        assert_ne!(
            result, "A",
            "Decoder should not silently accept Base64-encoded printable ASCII"
        );
    }

    /// RFC 3501 Section 5.1.3: Modified UTF-7 uses UTF-16BE encoding, which
    /// requires byte pairs. A Base64 segment that decodes to an odd number of
    /// bytes has an orphaned trailing byte — this must emit U+FFFD (replacement
    /// character), not silently drop the corrupt byte.
    #[test]
    fn spec_audit_utf16be_trailing_odd_byte_produces_replacement() {
        // Test decode_utf16be directly: [0x4E, 0x16] = U+4E16 ('世'),
        // 0xFF is the orphan byte that cannot form a code unit.
        let mut out = String::new();
        decode_utf16be(&[0x4E, 0x16, 0xFF], &mut out);
        assert_eq!(
            out, "世\u{FFFD}",
            "orphan trailing byte must produce U+FFFD, not be silently dropped"
        );

        // Test via decode_utf7: "&Thb,-" → Base64 "Thb," decodes to
        // [0x4E, 0x16, 0xFF] (one valid code unit + one orphan byte).
        let result = decode_utf7(b"&Thb,-");
        assert!(
            result.contains('\u{FFFD}'),
            "decode_utf7 must emit U+FFFD for orphan byte in Base64 segment"
        );
        assert!(
            result.contains('世'),
            "valid code unit before orphan byte must still decode"
        );
    }

    /// RFC 3501 Section 5.1.3: "Modified BASE64 MUST NOT be used to represent
    /// ANY printing US-ASCII character which can represent itself."
    /// A Base64 segment encoding a mix of printable ASCII and
    /// non-ASCII characters (e.g., U+0041 'A' + U+65E5 '日') must still be
    /// flagged as non-conformant. Previously the decoder used `.all()` which
    /// only flagged segments where EVERY character was printable ASCII,
    /// missing the mixed case.
    #[test]
    fn regression_mixed_ascii_nonascii_base64_segment() {
        // &AEFl5Q- encodes UTF-16BE [0x00, 0x41, 0x65, 0xE5] = 'A' + '日'
        // in a single Base64 segment. 'A' is printable ASCII and MUST NOT be
        // encoded in Base64 per RFC 3501 Section 5.1.3.
        let result = decode_utf7(b"&AEFl5Q-");
        // The decoder is lenient: it should still produce the correct text
        // but it should detect the violation (the warning path). Since the
        // segment contains a printable ASCII char, the decoder should emit
        // the raw fallback instead of the decoded text.
        assert_ne!(
            result, "A\u{65E5}",
            "Decoder must not silently accept Base64-encoded printable ASCII \
             mixed with non-ASCII (RFC 3501 Section 5.1.3)"
        );
        // The raw fallback should be the original segment verbatim.
        assert_eq!(
            result, "&AEFl5Q-",
            "Decoder should emit raw fallback for non-conformant segment"
        );
    }

    /// RFC 3501 Section 5.1.3 — the modified Base64 engine must
    /// tolerate non-zero trailing bits in the final Base64 character. When a
    /// non-conformant encoder sets unused trailing bits, the decoder should
    /// still produce the correct UTF-16BE output.
    #[test]
    fn regression_nonzero_trailing_bits_accepted() {
        // U+00E9 ('é') = UTF-16BE [0x00, 0xE9] = 16 bits.
        // Base64 groups of 6: 000000 001110 1001xx (18 bits, 2 trailing).
        // Standard encoding (trailing=00): 'A','O','k' → &AOk-
        // Non-zero trailing (trailing=11): 'A','O','n' → &AOn-
        // The decoder must accept &AOn- and produce 'é'.
        assert_eq!(
            decode_utf7(b"&AOn-"),
            "\u{00E9}",
            "Non-zero trailing bits in Base64 must be accepted (RFC 3501 Section 5.1.3)"
        );
    }

    /// RFC 3501 Section 5.1.3 — the modified Base64 alphabet omits
    /// padding ('='), but the decoder should tolerate padding from
    /// non-conformant servers per Postel's law.
    #[test]
    fn regression_base64_padding_tolerated() {
        // &AOk=- encodes U+00E9 ('é') with padding. The decoder must accept it.
        assert_eq!(
            decode_utf7(b"&AOk=-"),
            "\u{00E9}",
            "Base64 padding must be tolerated (RFC 3501 Section 5.1.3, Postel's law)"
        );
    }

    /// RFC 3501 Section 5.1.3 — `&` MUST be represented as `&-`,
    /// NOT Base64-encoded. The decoder must reject `&ACY-` (Base64 encoding
    /// of U+0026 '&') and emit a raw fallback, not silently decode it to '&'.
    #[test]
    fn rejects_base64_encoded_ampersand() {
        // &ACY- encodes U+0026 ('&') in Base64. Per RFC 3501 Section 5.1.3,
        // '&' MUST be represented as '&-', not Base64-encoded.
        let result = decode_utf7(b"&ACY-");
        assert_ne!(
            result, "&",
            "Decoder must not silently accept Base64-encoded '&' (RFC 3501 Section 5.1.3)"
        );
    }

    /// control characters (0x00-0x1F, 0x7F) in the input are not
    /// printable US-ASCII per RFC 3501 Section 5.1.3 and must be replaced
    /// with U+FFFD, not silently passed through.
    #[test]
    fn regression_control_chars_replaced() {
        // NUL (0x00), BEL (0x07), DEL (0x7F) in input stream
        let input = b"\x00hello\x07world\x7F";
        let result = decode_utf7(input);
        assert!(
            !result.contains('\0'),
            "NUL (0x00) must not pass through verbatim (RFC 3501 Section 5.1.3)"
        );
        assert!(
            !result.contains('\x07'),
            "BEL (0x07) must not pass through verbatim (RFC 3501 Section 5.1.3)"
        );
        assert!(
            !result.contains('\x7F'),
            "DEL (0x7F) must not pass through verbatim (RFC 3501 Section 5.1.3)"
        );
        // The printable ASCII parts should still be decoded.
        assert!(
            result.contains("hello"),
            "printable ASCII must be preserved"
        );
        assert!(
            result.contains("world"),
            "printable ASCII must be preserved"
        );
        // Control chars should be replaced with U+FFFD.
        assert!(
            result.contains('\u{FFFD}'),
            "control characters must be replaced with U+FFFD"
        );
    }

    /// RFC 3501 Section 5.1.3: when the Base64 data inside a `&...-`
    /// shift sequence is not valid modified Base64, the decoder falls back
    /// to emitting the raw `&<base64>-` literally (Postel's law).
    #[test]
    fn invalid_base64_in_shift_falls_back_to_raw() {
        // `!!!` is not valid Base64 — none of those characters appear in the
        // modified Base64 alphabet. engine.decode() will return Err,
        // triggering the fallback at L151-156.
        let result = decode_utf7(b"test&!!!-end");
        assert_eq!(
            result, "test&!!!-end",
            "Invalid Base64 within shift must emit raw fallback (RFC 3501 Section 5.1.3)"
        );
    }

    /// RFC 3501 Section 5.1.3: when the Base64 data inside a shift
    /// decodes to bytes but cannot form valid characters, the raw
    /// fallback must still fire. Here we use `@#$` which
    /// is not in the modified Base64 alphabet.
    #[test]
    fn invalid_base64_chars_in_shift_falls_back() {
        let result = decode_utf7(b"&@#$-");
        assert_eq!(
            result, "&@#$-",
            "Base64 with invalid alphabet chars must produce raw fallback"
        );
    }

    /// RFC 3501 Section 5.1.3: bytes 0x80-0xFF outside a Base64 segment
    /// that do not form valid UTF-8 must be handled via `from_utf8_lossy`,
    /// producing U+FFFD replacement characters.
    #[test]
    fn invalid_utf8_high_bytes_outside_base64() {
        // 0xFF is not a valid start byte for any UTF-8 sequence.
        // 0xFE is similarly invalid. A sequence of such bytes triggers
        // the from_utf8_lossy fallback.
        let result = decode_utf7(&[b'A', 0xFF, 0xFE, b'B']);
        assert!(
            result.starts_with('A'),
            "printable ASCII before invalid bytes must be preserved"
        );
        assert!(
            result.ends_with('B'),
            "printable ASCII after invalid bytes must be preserved"
        );
        assert!(
            result.contains('\u{FFFD}'),
            "invalid UTF-8 high bytes must produce U+FFFD (RFC 3501 Section 5.1.3)"
        );
    }

    /// RFC 3501 Section 5.1.3: a lone continuation byte (0x80-0xBF) without
    /// a leading byte is invalid UTF-8 and must produce U+FFFD via the
    /// `from_utf8_lossy` path.
    #[test]
    fn lone_continuation_byte_outside_base64() {
        // 0x80 is a continuation byte that doesn't follow a valid start byte.
        let result = decode_utf7(&[0x80]);
        assert_eq!(
            result, "\u{FFFD}",
            "lone continuation byte must produce U+FFFD"
        );
    }

    /// RFC 3501 Section 5.1.3: UTF-16BE requires valid code units.
    /// An unpaired high surrogate (0xD800) cannot be decoded to a
    /// character, so `decode_utf16be` must emit U+FFFD.
    #[test]
    fn unpaired_high_surrogate_produces_replacement() {
        // 0xD800 is a high surrogate. Without a following low surrogate,
        // char::decode_utf16 yields Err, triggering the replacement path.
        let mut out = String::new();
        decode_utf16be(&[0xD8, 0x00], &mut out);
        assert_eq!(
            out, "\u{FFFD}",
            "unpaired high surrogate must produce U+FFFD (RFC 3501 Section 5.1.3)"
        );
    }

    /// RFC 3501 Section 5.1.3: an unpaired low surrogate (0xDC00)
    /// is also invalid UTF-16 and must produce U+FFFD.
    #[test]
    fn unpaired_low_surrogate_produces_replacement() {
        // 0xDC00 is a low surrogate without a preceding high surrogate.
        let mut out = String::new();
        decode_utf16be(&[0xDC, 0x00], &mut out);
        assert_eq!(
            out, "\u{FFFD}",
            "unpaired low surrogate must produce U+FFFD (RFC 3501 Section 5.1.3)"
        );
    }

    /// RFC 3501 Section 5.1.3: a high surrogate followed by another high
    /// surrogate (instead of a low surrogate) must produce two U+FFFD
    /// replacement characters via the `decode_utf16be` error path.
    #[test]
    fn two_high_surrogates_produce_two_replacements() {
        // Two consecutive high surrogates: 0xD800, 0xD800.
        let mut out = String::new();
        decode_utf16be(&[0xD8, 0x00, 0xD8, 0x00], &mut out);
        assert_eq!(
            out, "\u{FFFD}\u{FFFD}",
            "two unpaired high surrogates must each produce U+FFFD"
        );
    }

    /// RFC 3501 Section 5.1.3: verify the surrogate replacement path
    /// works end-to-end via `decode_utf7`. We manually encode a lone
    /// high surrogate (0xD800) as modified Base64 inside a `&...-` block.
    #[test]
    fn unpaired_surrogate_in_base64_segment_produces_replacement() {
        // UTF-16BE for lone high surrogate 0xD800 = [0xD8, 0x00].
        // Modified Base64 encoding of those bytes via the IMAP engine.
        let engine = imap_b64_engine();
        let encoded_b64 = engine.encode([0xD8, 0x00]);
        let mut input = Vec::new();
        input.push(b'&');
        input.extend_from_slice(encoded_b64.as_bytes());
        input.push(b'-');

        let result = decode_utf7(&input);
        assert!(
            result.contains('\u{FFFD}'),
            "unpaired surrogate in Base64 segment must produce U+FFFD (RFC 3501 Section 5.1.3)"
        );
    }
}