daaki-imap 0.1.0

//! RFC 2231 MIME parameter decoding (continuations and charset-encoded values).
//!
//! RFC 2231 extends MIME parameter handling (RFC 2045) with three mechanisms:
//! - **Section 3 (Continuations):** Long parameter values split across multiple
//!   segments: `name*0="part1"; name*1="part2"`. "The count starts at 0 and
//!   increments by 1 for each subsequent section."
//! - **Section 4 (Charset/Language):** Charset-encoded values using
//!   `parameter*=charset'language'percent-encoded`. The ABNF is:
//!   `extended-initial-value = [charset] "'" [language] "'" extended-other-values`
//!   `ext-octet = "%" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")`
//! - **Combined:** Continuations with charset encoding on the first segment:
//!   `name*0*=charset'language'part1; name*1*=part2`
//!   "Language and character set information only appear at the beginning of a
//!   given parameter value."

use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashSet};

/// Decode RFC 2231 continuation and charset-encoded parameters.
///
/// Takes raw MIME parameters (as produced by the BODYSTRUCTURE parser) and returns
/// decoded parameters with continuations reassembled and charset-encoded values
/// converted to UTF-8.
///
/// RFC 2231 Section 3 (Continuations):
/// `name*0="first_part"; name*1="second_part"` → `name="first_partsecond_part"`
/// "The count starts at 0 and increments by 1 for each subsequent section."
///
/// RFC 2231 Section 4 (Charset/Language encoding):
/// `name*=charset'language'percent-encoded` → `name="decoded_value"`
/// `extended-initial-value = [charset] "'" [language] "'" extended-other-values`
///
/// Non-continuation parameters are passed through unchanged. Continuation groups
/// appear at the position of their first segment.
#[allow(clippy::too_many_lines)]
pub fn decode_rfc2231_params(params: &[(String, String)]) -> Vec<(String, String)> {
    // Phase 1: classify each parameter.
    //
    // We track three categories:
    // - Plain: no RFC 2231 markers, pass through unchanged.
    // - Standalone charset-encoded: `name*=charset'lang'value` (no digit index).
    // - Continuation: `name*N` or `name*N*` where N is a decimal index.
    //
    // For continuations we group segments by base name (case-insensitive) and
    // record the original insertion position of the first segment seen.

    /// A continuation group: (lowercase base name, result index, segments).
    type ContinuationGroup = (String, usize, BTreeMap<u32, (String, bool)>);

    let mut result: Vec<(String, String)> = Vec::with_capacity(params.len());
    let mut continuations: Vec<ContinuationGroup> = Vec::new();
    // Track indices of values decoded via RFC 2231 charset mechanisms so the
    // RFC 2047 fallback (which handles non-conformant servers) does not
    // double-decode them. RFC 2047 Section 5 / RFC 2231 Section 4.
    let mut rfc2231_decoded: HashSet<usize> = HashSet::new();

    for (key, value) in params {
        if let Some(classification) = classify_key(key) {
            match classification {
                KeyClass::StandaloneEncoded { base_name } => {
                    // RFC 2231 Section 4: standalone `name*=charset'lang'encoded`
                    let decoded = decode_charset_value(value);
                    rfc2231_decoded.insert(result.len());
                    result.push((base_name, decoded));
                }
                KeyClass::Continuation {
                    base_name,
                    index,
                    encoded,
                } => {
                    let lower = base_name.to_ascii_lowercase();
                    // Find or create the continuation group.
                    let group = continuations.iter_mut().find(|(name, _, _)| *name == lower);
                    if let Some((_, _, segments)) = group {
                        // RFC 2231 Section 3: each index appears exactly once.
                        // Keep the first value if a duplicate index is encountered.
                        match segments.entry(index) {
                            Entry::Vacant(e) => {
                                e.insert((value.clone(), encoded));
                            }
                            Entry::Occupied(_) => {
                                tracing::warn!(
                                    base_name = lower.as_str(),
                                    index = index,
                                    "RFC 2231 Section 3: duplicate continuation index {}, keeping first value",
                                    index,
                                );
                            }
                        }
                    } else {
                        let insert_pos = result.len();
                        // Reserve a slot in the result vector.
                        result.push((String::new(), String::new()));
                        let mut segments = BTreeMap::new();
                        segments.insert(index, (value.clone(), encoded));
                        continuations.push((lower, insert_pos, segments));
                    }
                }
            }
        } else {
            // Plain parameter — pass through unchanged.
            result.push((key.clone(), value.clone()));
        }
    }

    // Phase 2: reassemble continuation groups.
    for (lower_name, insert_pos, segments) in continuations {
        // Determine charset from the first segment (index 0) if it is charset-encoded.
        // RFC 2231 Section 4: "Language and character set information only appear at
        // the beginning of a given parameter value."
        let first_encoded = segments.get(&0).is_some_and(|(_, enc)| *enc);

        let mut charset: Option<String> = None;
        let mut raw_bytes = Vec::new();
        let mut expected_idx: u32 = 0;

        for (idx, (value, is_encoded)) in &segments {
            // RFC 2231 Section 3: "neither leading zeroes nor gaps in the
            // sequence are allowed". When a gap is detected, the continuation
            // is malformed — stop assembly at the gap point and discard all
            // segments after the gap. Only the contiguous prefix (indices
            // 0..gap_start) is reliable; data after the gap may be misaligned.
            if *idx > expected_idx {
                tracing::warn!(
                    base_name = lower_name.as_str(),
                    expected = expected_idx,
                    actual = *idx,
                    "RFC 2231 Section 3: gap in continuation (expected index {}, found {}); \
                     truncating assembly at segment {}",
                    expected_idx,
                    idx,
                    expected_idx,
                );
                break;
            }
            expected_idx = idx + 1;

            if *idx == 0 && first_encoded && *is_encoded {
                // Parse charset'language'encoded from the first segment.
                let (cs, bytes) = split_charset_value(value);
                charset = cs;
                raw_bytes.extend_from_slice(&bytes);
            } else if *is_encoded {
                // Subsequent encoded segments: just percent-decode (no charset prefix).
                raw_bytes.extend_from_slice(&percent_decode(value));
            } else {
                // Unencoded segment — raw bytes.
                raw_bytes.extend_from_slice(value.as_bytes());
            }
        }

        // Convert to UTF-8.
        let decoded = match &charset {
            Some(cs) => decode_bytes_with_charset(cs, &raw_bytes),
            None => String::from_utf8_lossy(&raw_bytes).into_owned(),
        };

        // Reconstruct the base name preserving original case from the first segment.
        // We stored lowercase for grouping, but we need to recover the original.
        // Use the first key's base name from the original params.
        let original_base = find_original_base_name(params, &lower_name);
        // Track continuation groups that used RFC 2231 charset encoding so the
        // RFC 2047 fallback does not double-decode them.
        if charset.is_some() {
            rfc2231_decoded.insert(insert_pos);
        }
        result[insert_pos] = (original_base, decoded);
    }

    // Phase 2.5: RFC 2231 Section 5 — when both `name` (plain) and `name*`
    // (or `name*0*` continuations) exist for the same parameter, the RFC 2231
    // charset-encoded form takes precedence. The plain form is merely "a
    // default for clients that do not understand the extended syntax."
    // Remove plain duplicates that are superseded by RFC 2231-decoded values.
    let mut rfc2231_names: HashSet<String> = HashSet::new();
    for &idx in &rfc2231_decoded {
        if let Some((key, _)) = result.get(idx) {
            rfc2231_names.insert(key.to_ascii_lowercase());
        }
    }
    if !rfc2231_names.is_empty() {
        let mut new_result: Vec<(String, String)> = Vec::with_capacity(result.len());
        let mut new_decoded: HashSet<usize> = HashSet::new();
        for (i, entry) in result.into_iter().enumerate() {
            // A plain entry is "dominated" if an RFC 2231-decoded entry exists
            // for the same base name (case-insensitive).
            let dominated = !rfc2231_decoded.contains(&i)
                && rfc2231_names.contains(&entry.0.to_ascii_lowercase());
            if !dominated {
                if rfc2231_decoded.contains(&i) {
                    new_decoded.insert(new_result.len());
                }
                new_result.push(entry);
            }
        }
        result = new_result;
        rfc2231_decoded = new_decoded;
    }

    // Fallback pass: some servers (non-conformantly) emit RFC 2047 encoded
    // words inside BODYSTRUCTURE parameter values instead of using RFC 2231
    // charset encoding. Decode any such values so callers get plain text.
    // RFC 2047 Section 1: "encoded-word = =?charset?encoding?encoded-text?="
    //
    // Skip values already decoded via RFC 2231 charset mechanisms to avoid
    // double-decoding. A legitimate RFC 2231-decoded value may contain literal
    // `=?...?=` sequences that must not be reinterpreted as RFC 2047 encoded
    // words. RFC 2047 Section 5 / RFC 2231 Section 4.
    for (i, (_key, value)) in result.iter_mut().enumerate() {
        if rfc2231_decoded.contains(&i) {
            continue;
        }
        if value.contains("=?") && value.contains("?=") {
            *value = crate::codec::decode::decode_rfc2047(value.as_bytes());
        }
    }

    result
}

/// Classification of an RFC 2231 parameter key.
enum KeyClass {
    /// `name*` — standalone charset-encoded (no continuation index).
    StandaloneEncoded { base_name: String },
    /// `name*N` or `name*N*` — continuation segment.
    Continuation {
        base_name: String,
        index: u32,
        encoded: bool,
    },
}

/// Classify a parameter key as plain, standalone charset-encoded, or continuation.
///
/// RFC 2231 Section 3: continuation keys have the form `name*N` or `name*N*`.
/// RFC 2231 Section 4: standalone charset keys have the form `name*` (no digit).
fn classify_key(key: &str) -> Option<KeyClass> {
    // Must contain at least one '*' to be RFC 2231.
    let star_pos = key.find('*')?;

    let base_name = key[..star_pos].to_owned();
    let suffix = &key[star_pos + 1..];

    if suffix.is_empty() {
        // `name*` — standalone charset-encoded.
        return Some(KeyClass::StandaloneEncoded { base_name });
    }

    // Check for `name*N` or `name*N*`.
    let (digits, is_encoded) = if let Some(stripped) = suffix.strip_suffix('*') {
        (stripped, true)
    } else {
        (suffix, false)
    };

    // RFC 2231 Section 3: "neither leading zeroes nor gaps in the sequence
    // are allowed." Reject indices with leading zeroes (e.g. *00, *01, *007)
    // while keeping *0 valid.
    if digits.len() > 1 && digits.starts_with('0') {
        return None;
    }
    let index: u32 = digits.parse().ok()?;

    Some(KeyClass::Continuation {
        base_name,
        index,
        encoded: is_encoded,
    })
}

/// Decode a standalone charset-encoded value per RFC 2231 Section 4.
///
/// Format: `charset'language'percent-encoded-value`
/// `extended-initial-value = [charset] "'" [language] "'" extended-other-values`
/// "Single quote delimiters MUST be present even when one of the field values is
/// omitted."
///
/// Falls back to returning the raw value if the format is malformed.
fn decode_charset_value(value: &str) -> String {
    let (charset, bytes) = split_charset_value(value);
    match charset {
        Some(cs) => decode_bytes_with_charset(&cs, &bytes),
        None => String::from_utf8_lossy(&bytes).into_owned(),
    }
}

/// Split `charset'language'encoded` into (charset, percent-decoded bytes).
///
/// RFC 2231 Section 4:
/// `extended-initial-value = [charset] "'" [language] "'" extended-other-values`
///
/// Returns `(None, raw_bytes)` if the format is malformed (graceful fallback).
fn split_charset_value(value: &str) -> (Option<String>, Vec<u8>) {
    // Find the two single-quote delimiters.
    let Some(first_quote) = value.find('\'') else {
        return (None, value.as_bytes().to_vec());
    };
    let Some(offset) = value[first_quote + 1..].find('\'') else {
        return (None, value.as_bytes().to_vec());
    };
    let second_quote = first_quote + 1 + offset;

    let charset = &value[..first_quote];
    let encoded_part = &value[second_quote + 1..];
    let bytes = percent_decode(encoded_part);

    let cs = if charset.is_empty() {
        None
    } else {
        Some(charset.to_owned())
    };

    (cs, bytes)
}

/// Percent-decode a string per RFC 2231 Section 4.
///
/// RFC 2231 Section 4 defines `ext-octet = "%" 2(DIGIT / "A"..."F")`.
/// Characters not preceded by `%` are passed through as-is.
fn percent_decode(input: &str) -> Vec<u8> {
    let bytes = input.as_bytes();
    let mut result = Vec::with_capacity(bytes.len());
    let mut i = 0;

    while i < bytes.len() {
        if bytes[i] == b'%' && i + 2 < bytes.len() {
            if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
                result.push((hi << 4) | lo);
                i += 3;
                continue;
            }
        }
        result.push(bytes[i]);
        i += 1;
    }

    result
}

/// Decode a single hex digit.
///
/// RFC 2231 Section 4: `ext-octet = "%" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")`
fn hex_val(b: u8) -> Option<u8> {
    match b {
        b'0'..=b'9' => Some(b - b'0'),
        b'A'..=b'F' => Some(b - b'A' + 10),
        b'a'..=b'f' => Some(b - b'a' + 10),
        _ => None,
    }
}

/// Convert raw bytes to UTF-8 using the specified charset via `encoding_rs`.
///
/// Falls back to lossy UTF-8 conversion if the charset is unknown.
fn decode_bytes_with_charset(charset: &str, bytes: &[u8]) -> String {
    // UTF-8 fast path
    let cs_lower = charset.to_ascii_lowercase();
    if cs_lower == "utf-8" || cs_lower == "utf8" {
        return String::from_utf8_lossy(bytes).into_owned();
    }

    // Use encoding_rs for non-UTF-8 charsets.
    match encoding_rs::Encoding::for_label(charset.as_bytes()) {
        Some(encoding) => {
            // Use decode_without_bom_handling to preserve a leading U+FEFF if
            // it is genuinely part of the value rather than a BOM artefact.
            // RFC 2231 values are parameter fragments, not standalone documents,
            // so stripping a leading FEFF would corrupt legitimate content.
            let (cow, _) = encoding.decode_without_bom_handling(bytes);
            cow.into_owned()
        }
        None => {
            // Unknown charset — lossy fallback.
            String::from_utf8_lossy(bytes).into_owned()
        }
    }
}

/// Find the original (non-lowercased) base name from the first matching parameter key.
fn find_original_base_name(params: &[(String, String)], lower_name: &str) -> String {
    for (key, _) in params {
        if let Some(star_pos) = key.find('*') {
            let base = &key[..star_pos];
            if base.eq_ignore_ascii_case(lower_name) {
                return base.to_owned();
            }
        }
    }
    // Should not happen, but fallback to lowercase name.
    lower_name.to_owned()
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;

    fn p(pairs: &[(&str, &str)]) -> Vec<(String, String)> {
        pairs
            .iter()
            .map(|&(k, v)| (k.to_owned(), v.to_owned()))
            .collect()
    }

    // --- Plain passthrough ---

    #[test]
    fn plain_passthrough() {
        let params = p(&[("charset", "utf-8"), ("name", "file.txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result, p(&[("charset", "utf-8"), ("name", "file.txt")]));
    }

    // --- Standalone charset-encoded ---

    #[test]
    fn standalone_charset_encoded() {
        // RFC 2231 Section 4 example: title*=us-ascii'en-us'This%20is%20fun
        let params = p(&[("title*", "us-ascii'en-us'This%20is%20fun")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "This is fun");
    }

    // --- Continuation reassembly ---

    #[test]
    fn continuation_reassembly() {
        // RFC 2231 Section 3: name*0="first"; name*1="second"
        let params = p(&[("name*0", "first"), ("name*1", "second")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        assert_eq!(result[0].1, "firstsecond");
    }

    // --- Combined charset + continuation ---

    #[test]
    fn charset_continuation_combined() {
        // RFC 2231 combined: first segment has charset, subsequent are percent-encoded.
        // "Language and character set information only appear at the beginning"
        let params = p(&[
            ("title*0*", "us-ascii'en'This%20is"),
            ("title*1*", "%20fun"),
        ]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "This is fun");
    }

    // --- Out-of-order indices ---

    #[test]
    fn out_of_order_indices() {
        let params = p(&[("name*1", "second"), ("name*0", "first")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        // BTreeMap orders by key, so segments are reassembled in order.
        assert_eq!(result[0].1, "firstsecond");
    }

    // --- Non-UTF-8 charset (ISO-8859-1) ---

    #[test]
    fn non_utf8_charset_iso8859_1() {
        // ISO-8859-1: 0xe9 = 'é'
        let params = p(&[("title*", "iso-8859-1'en'caf%E9")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "café");
    }

    // --- Unknown charset (lossy fallback) ---

    #[test]
    fn unknown_charset_lossy_fallback() {
        let params = p(&[("title*", "x-nonexistent'en'hello%20world")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "hello world");
    }

    // --- Mixed plain + encoded ordering ---

    #[test]
    fn mixed_plain_and_encoded_ordering() {
        let params = p(&[
            ("charset", "utf-8"),
            ("name*0", "long"),
            ("name*1", "file.txt"),
            ("disposition", "inline"),
        ]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 3);
        assert_eq!(result[0].0, "charset");
        assert_eq!(result[0].1, "utf-8");
        // Continuation group appears at position of first segment.
        assert_eq!(result[1].0, "name");
        assert_eq!(result[1].1, "longfile.txt");
        assert_eq!(result[2].0, "disposition");
        assert_eq!(result[2].1, "inline");
    }

    // --- Empty params ---

    #[test]
    fn empty_params() {
        let params: Vec<(String, String)> = Vec::new();
        let result = decode_rfc2231_params(&params);
        assert!(result.is_empty());
    }

    // --- Missing language tag ---

    #[test]
    fn missing_language_tag() {
        // charset present, language empty: charset''encoded
        let params = p(&[("title*", "utf-8''hello%20world")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "hello world");
    }

    // --- Malformed value (graceful passthrough) ---

    #[test]
    fn malformed_value_no_quotes() {
        // No single quotes — graceful fallback: value returned as-is.
        let params = p(&[("title*", "just-some-value")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "just-some-value");
    }

    // --- Case-insensitive key grouping ---

    #[test]
    fn case_insensitive_key_grouping() {
        let params = p(&[("Name*0", "hello"), ("NAME*1", " world")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        // Base name uses the case from the first occurrence.
        assert_eq!(result[0].0, "Name");
        assert_eq!(result[0].1, "hello world");
    }

    // --- percent_decode unit tests ---

    #[test]
    fn percent_decode_basic() {
        assert_eq!(percent_decode("hello%20world"), b"hello world");
        assert_eq!(percent_decode("%2A%2A%2A"), b"***");
        assert_eq!(percent_decode("no-encoding"), b"no-encoding");
    }

    #[test]
    fn percent_decode_truncated_sequence() {
        // Truncated % at end — pass through.
        assert_eq!(percent_decode("abc%2"), b"abc%2");
        assert_eq!(percent_decode("abc%"), b"abc%");
    }

    #[test]
    fn percent_decode_invalid_hex() {
        // Invalid hex chars — pass through.
        assert_eq!(percent_decode("%GG"), b"%GG");
    }

    // --- Edge cases ---

    #[test]
    fn continuation_missing_segment_0() {
        // Only segment 1 exists — no segment 0. RFC 2231 Section 3 requires
        // the sequence to start at 0 with no gaps. Since the gap is at the very
        // start, assembly stops immediately and the result is empty.
        let params = p(&[("name*1", "world")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        assert_eq!(result[0].1, "");
    }

    #[test]
    fn continuation_mixed_encoded_plain() {
        // Segment 0 is charset-encoded, segment 1 is plain (unencoded).
        // RFC 2231 Section 4: "Language and character set information only appear
        // at the beginning of a given parameter value."
        // Charset from segment 0 applies to the whole reassembled value.
        let params = p(&[("name*0*", "utf-8'en'caf%C3%A9"), ("name*1", ".txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        assert_eq!(result[0].1, "café.txt");
    }

    #[test]
    fn standalone_empty_charset_empty_language() {
        // Both charset and language empty: `title*=''hello%20world`
        // split_charset_value returns charset=None, so lossy UTF-8 applies.
        let params = p(&[("title*", "''hello%20world")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "title");
        assert_eq!(result[0].1, "hello world");
    }

    #[test]
    fn empty_base_name_key() {
        // Key is `*0` — empty base name. classify_key extracts base_name="".
        // Should not panic; produces a parameter with empty key name.
        let params = p(&[("*0", "value")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "");
        assert_eq!(result[0].1, "value");
    }

    // --- L10: RFC 2231 continuation gap detection ---

    #[test]
    fn continuation_gap_truncates_at_first_gap() {
        // RFC 2231 Section 3: "neither leading zeroes nor gaps in the sequence
        // are allowed". Segments 0 and 2 present, segment 1 missing.
        // A conformant decoder must not silently concatenate "first" + "third"
        // into "firstthird" — the gap makes the continuation malformed.
        // Instead, assembly stops at the gap and only segment 0's value is kept.
        let params = p(&[("name*0", "first"), ("name*2", "third")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        // Only the contiguous prefix (segment 0) is used; segments after the gap
        // are discarded because the continuation is malformed.
        assert_eq!(result[0].1, "first");
    }

    #[test]
    fn continuation_gap_mid_sequence_truncates() {
        // RFC 2231 Section 3: gaps are not allowed. Segments 0, 1, and 3 present
        // but segment 2 missing. Assembly stops after segment 1 (the last
        // contiguous segment), producing "AB" and discarding "D".
        let params = p(&[("f*0", "A"), ("f*1", "B"), ("f*3", "D")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].1, "AB");
    }

    #[test]
    fn continuation_no_gap() {
        // Contiguous segments — no gap, straightforward reassembly.
        let params = p(&[("f*0", "A"), ("f*1", "B"), ("f*2", "C")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].1, "ABC");
    }

    // --- L11: RFC 2047 encoded-word fallback in BODYSTRUCTURE parameters ---

    #[test]
    fn rfc2047_encoded_word_fallback_base64() {
        // Some servers put RFC 2047 encoded words in parameter values instead
        // of using RFC 2231. Verify the fallback decodes them.
        // "test.txt" in base64 = "dGVzdC50eHQ="
        let params = p(&[("filename", "=?UTF-8?B?dGVzdC50eHQ=?=")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "filename");
        assert_eq!(result[0].1, "test.txt");
    }

    #[test]
    fn rfc2047_encoded_word_fallback_quoted_printable() {
        // RFC 2047 Q-encoding: =?UTF-8?Q?caf=C3=A9.txt?=
        let params = p(&[("filename", "=?UTF-8?Q?caf=C3=A9.txt?=")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "filename");
        assert_eq!(result[0].1, "caf\u{e9}.txt");
    }

    #[test]
    fn rfc2047_fallback_does_not_corrupt_plain_values() {
        // Plain values without =? ... ?= markers must be left untouched.
        let params = p(&[("filename", "report.pdf")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result[0].1, "report.pdf");
    }

    // --- L12: decode_without_bom_handling preserves leading U+FEFF ---

    #[test]
    fn non_utf8_charset_preserves_leading_feff() {
        // U+FEFF encoded in UTF-16LE is 0xFF 0xFE. When present as genuine
        // content (not a BOM), it must be preserved.
        // Build: iso-8859-1 value with byte 0xEF 0xBB 0xBF (UTF-8 BOM) should
        // NOT be stripped — decode_without_bom_handling preserves it.
        //
        // We use windows-1252 which maps bytes 1:1 for 0x00-0xFF.
        // Byte 0xC0 in windows-1252 = U+00C0 (À).
        // We verify a BOM-like prefix (0xEF 0xBB 0xBF) is preserved when
        // re-decoded from windows-1252.
        let bom_bytes_hex = "%EF%BB%BF%C0"; // 0xEF 0xBB 0xBF 0xC0
        let params = p(&[("title*", &format!("windows-1252''{bom_bytes_hex}"))]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        // In windows-1252: 0xEF=ï, 0xBB=», 0xBF=¿, 0xC0=À
        // With decode_without_bom_handling, all bytes are preserved.
        assert_eq!(result[0].1, "ï»¿À");
    }

    // --- Issue #10: RFC 2231-decoded values must not be double-decoded as RFC 2047 ---

    #[test]
    fn rfc2231_decoded_value_not_double_decoded_as_rfc2047() {
        // A value that literally contains =?...?= after RFC 2231 decoding
        // should NOT be re-decoded by the RFC 2047 fallback.
        // "=?UTF-8?B?dGVzdA==?=" percent-encoded: %3D%3FUTF-8%3FB%3FdGVzdA%3D%3D%3F%3D
        let params = p(&[("name*", "utf-8''%3D%3FUTF-8%3FB%3FdGVzdA%3D%3D%3F%3D")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        // Should be the literal string, NOT "test"
        assert_eq!(result[0].1, "=?UTF-8?B?dGVzdA==?=");
    }

    // --- Issue #11: Duplicate continuation indices must keep first value ---

    #[test]
    fn duplicate_continuation_index_keeps_first() {
        // RFC 2231 Section 3: each index appears exactly once.
        // When duplicates exist, first value should win.
        let params = p(&[("name*0", "correct"), ("name*0", "wrong")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "name");
        assert_eq!(result[0].1, "correct"); // First value should win
    }

    // --- RFC 2231 Section 5: encoded params override plain duplicates ---

    #[test]
    fn rfc2231_section5_encoded_overrides_plain_duplicate() {
        // RFC 2231 Section 5: when both `name` (plain) and `name*` (charset-
        // encoded) exist, the RFC 2231 form takes precedence. The plain form
        // is a fallback for clients that don't understand RFC 2231.
        let params = p(&[
            ("name", "fallback.txt"),
            ("name*", "utf-8''encoded%2Dname.txt"),
        ]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(
            result,
            p(&[("name", "encoded-name.txt")]),
            "RFC 2231 Section 5: encoded form should override plain duplicate"
        );
    }

    #[test]
    fn rfc2231_section5_plain_without_encoded_kept() {
        // Plain parameter without any RFC 2231 version must be kept as-is.
        let params = p(&[("name", "plain.txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result, p(&[("name", "plain.txt")]));
    }

    #[test]
    fn rfc2231_section5_encoded_without_plain_kept() {
        // RFC 2231 parameter without any plain version must be kept as-is.
        let params = p(&[("name*", "utf-8''encoded.txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result, p(&[("name", "encoded.txt")]));
    }

    #[test]
    fn rfc2231_section5_case_insensitive_dedup() {
        // RFC 2231 Section 5: deduplication should be case-insensitive.
        let params = p(&[("Name", "fallback.txt"), ("NAME*", "utf-8''encoded.txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(
            result.len(),
            1,
            "RFC 2231 Section 5: case-insensitive dedup should produce one entry; got {result:?}"
        );
        assert_eq!(result[0].1, "encoded.txt");
    }

    #[test]
    fn rfc2231_section5_continuation_overrides_plain() {
        // RFC 2231 Section 5: continuation groups (`name*0*=...`, `name*1=...`)
        // also override a plain `name` parameter.
        let params = p(&[
            ("name", "fallback.txt"),
            ("name*0*", "utf-8''encoded"),
            ("name*1", ".txt"),
        ]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(
            result.len(),
            1,
            "RFC 2231 Section 5: continuation group should override plain; got {result:?}"
        );
        assert_eq!(result[0].1, "encoded.txt");
    }

    #[test]
    fn rfc2231_section5_plain_before_encoded_preserves_order() {
        // When the plain entry appears before the encoded form, the encoded
        // form should replace it and unrelated params keep their positions.
        let params = p(&[
            ("charset", "utf-8"),
            ("name", "fallback.txt"),
            ("name*", "utf-8''real%2Dname.txt"),
            ("disposition", "attachment"),
        ]);
        let result = decode_rfc2231_params(&params);
        // "charset" and "disposition" must be kept; only one "name" entry.
        assert_eq!(result.len(), 3);
        assert_eq!(result[0], ("charset".to_owned(), "utf-8".to_owned()));
        assert_eq!(
            result[2],
            ("disposition".to_owned(), "attachment".to_owned())
        );
        // The sole "name" entry should be the RFC 2231 decoded value.
        let name_entries: Vec<_> = result.iter().filter(|(k, _)| k == "name").collect();
        assert_eq!(name_entries.len(), 1);
        assert_eq!(name_entries[0].1, "real-name.txt");
    }

    // --- split_charset_value: single-quote edge cases ---

    #[test]
    fn split_charset_value_one_quote_only() {
        // RFC 2231 Section 4: malformed value with only one single-quote.
        // split_charset_value should return (None, raw_bytes) as a graceful fallback.
        let (cs, bytes) = split_charset_value("utf-8'hello");
        assert!(cs.is_none());
        assert_eq!(bytes, b"utf-8'hello");
    }

    // --- hex_val: lowercase hex digits ---

    #[test]
    fn percent_decode_lowercase_hex() {
        // RFC 2231 Section 4: ext-octet = "%" 2(DIGIT / "A"..."F").
        // Real-world servers may emit lowercase hex digits; Postel's law says
        // we should accept them.
        assert_eq!(percent_decode("%2a%2b%2c"), b"*+,");
        assert_eq!(percent_decode("caf%c3%a9"), "café".as_bytes());
    }

    // --- find_original_base_name: fallback when no key has a '*' matching lower_name ---

    #[test]
    fn find_original_base_name_fallback_to_lowercase() {
        // When no key in the params list has a '*' with a matching base name,
        // the function falls back to returning the lowercase name.
        let params = vec![
            ("plain_key".to_owned(), "value".to_owned()),
            ("another".to_owned(), "value2".to_owned()),
        ];
        let result = find_original_base_name(&params, "nonexistent");
        assert_eq!(result, "nonexistent");
    }

    #[test]
    fn find_original_base_name_no_star_keys() {
        // Params with no '*' in any key — should hit the fallback path.
        let params = vec![("charset".to_owned(), "utf-8".to_owned())];
        let result = find_original_base_name(&params, "charset");
        assert_eq!(result, "charset");
    }

    // --- standalone charset-encoded value with no encoded parts ---

    #[test]
    fn standalone_charset_no_encoded_bytes() {
        // RFC 2231 Section 4: `name*=charset'lang'value` where value has no
        // percent-encoded parts — the value is plain ASCII.
        let params = p(&[("filename*", "us-ascii'en'plain-text-file.txt")]);
        let result = decode_rfc2231_params(&params);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0].0, "filename");
        assert_eq!(result[0].1, "plain-text-file.txt");
    }

    // ===== Spec audit: prior deviations =====

    #[test]
    fn spec_audit_m6_leading_zeroes_in_continuation_rejected() {
        // RFC 2231 Section 3/7: "neither leading zeroes nor gaps in the
        // sequence are allowed."
        //
        // Keys with leading zeroes like `name*00` and `name*01` must be
        // rejected. The classify_key() function validates this at line 259
        // by checking `digits.len() > 1 && digits.starts_with('0')`.
        let params = p(&[("name*00", "first"), ("name*01", "second")]);
        let result = decode_rfc2231_params(&params);

        // If leading zeroes were properly rejected, these segments would NOT
        // be reassembled into a single "name" parameter with value "firstsecond".
        let has_reassembled = result
            .iter()
            .any(|(k, v)| k == "name" && v == "firstsecond");
        assert!(
            !has_reassembled,
            "Leading zeroes in continuation indices (*00, *01) should be rejected \
             per RFC 2231 Section 3, not silently normalized; got {result:?}"
        );
    }

    #[test]
    fn spec_audit_m6_leading_zero_collision() {
        // RFC 2231 Section 3/7: "neither leading zeroes nor gaps in the
        // sequence are allowed."
        //
        // When both `name*0` (valid) and `name*00` (invalid, leading zero)
        // are present, the valid form `*0` must win. The classify_key()
        // function rejects `*00` (leading zero), so only `*0` is accepted.
        let params = p(&[("name*0", "correct"), ("name*00", "wrong")]);
        let result = decode_rfc2231_params(&params);
        let name_value = result.iter().find(|(k, _)| k == "name");
        assert!(
            name_value.is_some(),
            "Expected a 'name' parameter in the result"
        );
        assert_eq!(
            name_value.unwrap().1,
            "correct",
            "name*0 (valid) should take precedence over name*00 (leading zero); \
             got {result:?}"
        );
    }
}