daaki-message 0.2.0

//! Header writing, folding, MIME part construction, and RFC 2047 encoding.
//!
//! # References
//! - RFC 5322 Section 2.2.3 (long header fields / folding)
//! - RFC 2045 (MIME Content-Transfer-Encoding)
//! - RFC 2046 (MIME multipart boundaries and message/* restrictions)
//! - RFC 2047 Section 2 (encoded-word syntax)
//! - RFC 2183 (Content-Disposition)
//! - RFC 2231 (MIME parameter encoding)

use super::*;

/// Returns `true` if `value` contains at least one RFC 2047 encoded-word.
///
/// An encoded-word has the syntax `=?charset?encoding?encoded-text?=` where:
/// - **charset** is a non-empty token (no `?`, no spaces)
/// - **encoding** is `B` or `Q` (case-insensitive)
/// - **encoded-text** is non-empty and contains no `?` or spaces
///
/// This replaces the previous loose heuristic (`contains("=?") && contains("?=")`)
/// which produced false positives on plain text that coincidentally contained
/// those substrings.
///
/// # References
/// - RFC 2047 Section 2 (encoded-word syntax)
fn contains_rfc2047_encoded_word(value: &str) -> bool {
    let bytes = value.as_bytes();
    let mut i = 0;
    while i + 4 < bytes.len() {
        // Look for "=?" prefix
        if bytes[i] == b'=' && bytes[i + 1] == b'?' {
            // Skip past "=?"
            let mut j = i + 2;

            // charset: one or more non-'?' / non-space bytes
            let charset_start = j;
            while j < bytes.len() && bytes[j] != b'?' && bytes[j] != b' ' {
                j += 1;
            }
            if j == charset_start || j >= bytes.len() || bytes[j] != b'?' {
                i += 1;
                continue;
            }

            // Skip '?' after charset
            j += 1;

            // encoding: must be 'B' or 'Q' (case-insensitive)
            if j >= bytes.len() || !matches!(bytes[j], b'B' | b'b' | b'Q' | b'q') {
                i += 1;
                continue;
            }
            j += 1;

            // Must be followed by '?'
            if j >= bytes.len() || bytes[j] != b'?' {
                i += 1;
                continue;
            }
            j += 1;

            // encoded-text: one or more bytes, no '?' or space
            let text_start = j;
            while j < bytes.len() && bytes[j] != b'?' && bytes[j] != b' ' {
                j += 1;
            }
            if j == text_start || j >= bytes.len() || bytes[j] != b'?' {
                i += 1;
                continue;
            }

            // Must end with "?="
            if j + 1 < bytes.len() && bytes[j + 1] == b'=' {
                return true;
            }
        }
        i += 1;
    }
    false
}

/// Returns a build error when a header contains an impossible-to-fold token.
///
/// RFC 5322 Section 2.1.1 requires every emitted header line to stay within
/// 998 octets, while Section 2.2.3 only permits folding at existing FWS
/// boundaries. If a single token still exceeds the hard limit after moving it
/// to a fresh line, the builder must reject the header instead of emitting
/// invalid output.
fn validate_header_folding(name: &str, value: &str, fold_limit: usize) -> Result<(), Error> {
    let prefix = format!("{name}: ");
    let mut line_len = prefix.len();
    let (words, trailing_sep) = split_header_words(value);
    let mut first_word = true;

    for (word, sep) in words {
        let sep_len = sep.unwrap_or("").len();
        let word_len = word.len();

        if line_len + sep_len + word_len > fold_limit && line_len > 0 {
            line_len = if first_word { 1 + sep_len } else { sep_len };
        } else {
            line_len += sep_len;
        }

        if line_len + word_len > HARD_LINE_LIMIT {
            return Err(Error::HeaderLineTooLong(format!(
                "{name} header contains a token that cannot be folded within the 998-octet line limit required by RFC 5322 Sections 2.1.1 and 2.2.3"
            )));
        }
        line_len += word_len;
        first_word = false;
    }

    if let Some(trailing_sep) = trailing_sep.filter(|_| !value.is_empty()) {
        let trailing_len = trailing_sep.len();
        let final_line_len = if line_len + trailing_len > fold_limit && line_len > 0 {
            trailing_len
        } else {
            line_len + trailing_len
        };
        if final_line_len > HARD_LINE_LIMIT {
            return Err(Error::HeaderLineTooLong(format!(
                "{name} header contains whitespace that cannot be folded within the 998-octet line limit required by RFC 5322 Sections 2.1.1 and 2.2.3"
            )));
        }
    }

    Ok(())
}

/// Writes a header, folding long lines per RFC 5322 Section 2.2.3.
///
/// Tries to keep lines under [`MAX_LINE_LEN`] (78) by inserting `\r\n `
/// (CRLF + space) at whitespace boundaries. When the value contains
/// RFC 2047 encoded-words, the stricter 76-char limit from RFC 2047
/// Section 2 is used instead.
pub(super) fn try_write_header(output: &mut Vec<u8>, name: &str, value: &str) -> Result<(), Error> {
    // RFC 2047 Section 2: lines containing encoded-words MUST be ≤76 chars.
    let has_encoded_words = contains_rfc2047_encoded_word(value);
    let fold_limit = if has_encoded_words {
        RFC2047_LINE_LIMIT
    } else {
        MAX_LINE_LEN
    };
    validate_header_folding(name, value, fold_limit)?;

    let prefix = format!("{name}: ");
    let mut line_len = prefix.len();
    output.extend_from_slice(prefix.as_bytes());

    let (words, trailing_sep) = split_header_words(value);
    let mut first_word = true;
    for (word, sep) in words {
        let sep_str = sep.unwrap_or("");
        let word_with_sep_len = sep_str.len() + word.len();

        if line_len + word_with_sep_len > fold_limit && line_len > 0 {
            if first_word {
                // Fold before the first value token. The prefix "Name: "
                // already wrote a trailing space; replace it with the fold
                // point so that unfolding (RFC 5322 Section 2.2.3) yields
                // exactly one space between the colon and the value.
                if output.last() == Some(&b' ') {
                    output.pop();
                }
            }
            // Fold at the existing WSP boundary so unfolding preserves the
            // caller's original whitespace exactly. For the first word, the
            // fold itself contributes the mandatory separator after the colon.
            output.extend_from_slice(b"\r\n");
            if first_word {
                output.extend_from_slice(b" ");
                output.extend_from_slice(sep_str.as_bytes());
                line_len = 1 + sep_str.len();
            } else {
                output.extend_from_slice(sep_str.as_bytes());
                line_len = sep_str.len();
            }
        } else {
            output.extend_from_slice(sep_str.as_bytes());
            line_len += sep_str.len();
        }

        output.extend_from_slice(word.as_bytes());
        line_len += word.len();

        first_word = false;
    }

    if let Some(trailing_sep) = trailing_sep.filter(|_| !value.is_empty()) {
        if line_len + trailing_sep.len() > fold_limit && line_len > 0 {
            output.extend_from_slice(b"\r\n");
        }
        output.extend_from_slice(trailing_sep.as_bytes());
    }

    output.extend_from_slice(b"\r\n");

    Ok(())
}

/// Splits a header value into words for folding purposes.
///
/// Preserves whitespace-delimited tokens, keeping quoted strings and
/// angle-bracketed message-ids as single units. Returns each word paired
/// with the exact WSP run (SP / HTAB) that preceded it so repeated or
/// leading whitespace is not normalized away during folding. Also returns any
/// trailing WSP that followed the final token.
pub(super) fn split_header_words(value: &str) -> (Vec<(&str, Option<&str>)>, Option<&str>) {
    let mut words = Vec::new();
    let mut start = 0;
    let mut in_quotes = false;
    let mut in_angles = false;
    let bytes = value.as_bytes();
    // Byte offset where the WSP preceding the current word started.
    let mut pending_sep_start: Option<usize> = None;

    let mut i = 0;
    while i < bytes.len() {
        match bytes[i] {
            // Skip backslash + the full next character (may be multi-byte UTF-8).
            // RFC 5322 Section 3.2.4: quoted-pair = "\" (VCHAR / WSP), but user
            // strings may contain arbitrary Unicode after a backslash.
            b'\\' if in_quotes => {
                i += 1; // skip the backslash
                if i < bytes.len() {
                    // Advance past the full UTF-8 character at position i
                    let ch_len = value[i..].chars().next().map_or(1, char::len_utf8);
                    i += ch_len;
                }
                continue;
            }
            b'"' => in_quotes = !in_quotes,
            b'<' if !in_quotes => in_angles = true,
            b'>' if !in_quotes => in_angles = false,
            // WSP = SP / HTAB (RFC 5322 Section 2.2.3)
            b' ' | b'\t' if !in_quotes && !in_angles => {
                if i > start {
                    let pending_sep = pending_sep_start.map(|sep_start| &value[sep_start..start]);
                    words.push((&value[start..i], pending_sep));
                    pending_sep_start = Some(i);
                } else if pending_sep_start.is_none() {
                    pending_sep_start = Some(i);
                }
                start = i + 1;
            }
            _ => {}
        }
        i += 1;
    }
    let trailing_sep = if start < bytes.len() {
        words.push((
            &value[start..],
            pending_sep_start.map(|sep_start| &value[sep_start..start]),
        ));
        None
    } else {
        pending_sep_start.map(|sep_start| &value[sep_start..])
    };
    (words, trailing_sep)
}

/// Writes a MIME boundary line. If `closing` is true, appends `--` suffix.
pub(super) fn write_boundary(output: &mut Vec<u8>, boundary: &str, closing: bool) {
    output.extend_from_slice(b"--");
    output.extend_from_slice(boundary.as_bytes());
    if closing {
        output.extend_from_slice(b"--");
    }
    output.extend_from_slice(b"\r\n");
}

/// Returns `true` if the text (already CRLF-normalized) contains any line
/// exceeding the 998-character hard limit from RFC 5322 Section 2.1.1.
///
/// When this returns `true`, the body must be encoded with `quoted-printable`
/// (RFC 2045 Section 6.7) rather than `8bit`, because `8bit` requires all
/// lines to be at most 998 octets (RFC 2045 Section 2.8).
///
/// # References
/// - RFC 5322 Section 2.1.1 (998-character line length limit)
/// - RFC 2045 Section 2.8 (8bit line length constraint)
fn needs_quoted_printable(text: &str) -> bool {
    text.split("\r\n").any(|line| line.len() > HARD_LINE_LIMIT)
}

/// Returns `true` if `data` contains any CRLF-delimited line exceeding
/// [`HARD_LINE_LIMIT`] (998) octets.
///
/// Unlike [`needs_quoted_printable`], this operates on raw `&[u8]` rather
/// than `&str`, since `message/*` attachment data is raw bytes.
///
/// # References
/// - RFC 5322 Section 2.1.1 (998-character line length limit)
/// - RFC 2045 Section 2.8 (8bit line length constraint)
fn has_overlong_lines(data: &[u8]) -> bool {
    let mut line_start = 0;
    let mut i = 0;
    while i < data.len() {
        if data[i] == b'\r' && i + 1 < data.len() && data[i + 1] == b'\n' {
            let line_len = i - line_start;
            if line_len > HARD_LINE_LIMIT {
                return true;
            }
            line_start = i + 2;
            i += 2;
        } else {
            i += 1;
        }
    }
    // Check the last line (may not end with CRLF)
    let last_line_len = data.len() - line_start;
    last_line_len > HARD_LINE_LIMIT
}

/// Encodes data using the quoted-printable encoding defined in RFC 2045
/// Section 6.7.
///
/// Encoding rules:
/// - Printable ASCII characters (33..=126 except `=`) pass through unchanged.
/// - TAB (0x09) and space (0x20) pass through unless they appear at the end
///   of a line (Rule #3 — trailing whitespace must be encoded).
/// - CRLF sequences pass through as hard line breaks (Rule #4).
/// - All other bytes are encoded as `=XX` with uppercase hex digits (Rule #1).
/// - Lines are wrapped at 76 characters using `=\r\n` soft line breaks (Rule #5).
///
/// # References
/// - RFC 2045 Section 6.7 (quoted-printable encoding)
pub(super) fn encode_quoted_printable(data: &[u8]) -> Vec<u8> {
    /// Maximum encoded line length before a soft break (RFC 2045 Section 6.7 Rule #5).
    /// The `=` soft break marker uses 1 char, so usable content is 75 chars.
    const QP_LINE_LIMIT: usize = 76;

    let mut result = Vec::with_capacity(data.len() * 2);
    let mut line_len: usize = 0;
    let mut i = 0;

    while i < data.len() {
        // Hard line break: CRLF passes through unchanged (Rule #4)
        if data[i] == b'\r' && i + 1 < data.len() && data[i + 1] == b'\n' {
            result.extend_from_slice(b"\r\n");
            line_len = 0;
            i += 2;
            continue;
        }

        let byte = data[i];

        // Determine if this byte needs encoding (Rule #1, #2, #3)
        let needs_encoding = if byte == b'\t' || byte == b' ' {
            // Trailing whitespace on a line must be encoded (Rule #3).
            // Check if next non-whitespace before next CRLF/end is absent.
            is_trailing_whitespace(data, i)
        } else if byte == b'=' {
            // The `=` character must always be encoded (Rule #1)
            true
        } else if (33..=126).contains(&byte) {
            // Printable ASCII (except `=` handled above) — pass through
            false
        } else {
            // Non-printable / non-ASCII — encode (Rule #1)
            true
        };

        if needs_encoding {
            // Encoded form is 3 chars: =XX
            // Need at least 3 chars on current line, plus 1 for potential soft break marker
            if line_len + 3 > QP_LINE_LIMIT - 1 {
                // Insert soft line break before encoding
                result.extend_from_slice(b"=\r\n");
                line_len = 0;
            }
            result.push(b'=');
            // Uppercase hex digits per RFC 2045 Section 6.7 Rule #1
            let hi = HEX_UPPER[(byte >> 4) as usize];
            let lo = HEX_UPPER[(byte & 0x0F) as usize];
            result.push(hi);
            result.push(lo);
            line_len += 3;
        } else {
            // Literal byte — 1 char
            if line_len + 1 > QP_LINE_LIMIT - 1 {
                // Insert soft line break
                result.extend_from_slice(b"=\r\n");
                line_len = 0;
            }
            result.push(byte);
            line_len += 1;
        }

        i += 1;
    }

    result
}

/// Uppercase hex digit lookup table for quoted-printable encoding.
const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF";

/// Returns `true` if the whitespace byte at position `pos` in `data` is
/// "trailing" — i.e., no non-whitespace byte follows before the next CRLF
/// or end of data.
///
/// RFC 2045 Section 6.7 Rule #3 requires trailing whitespace to be encoded.
pub(super) fn is_trailing_whitespace(data: &[u8], pos: usize) -> bool {
    let mut j = pos + 1;
    while j < data.len() {
        match data[j] {
            b'\r' | b'\n' => return true,
            b' ' | b'\t' => j += 1,
            _ => return false,
        }
    }
    // Reached end of data — whitespace at end of data is trailing
    true
}

/// Writes a text MIME part (text/plain or text/html) with UTF-8 charset.
///
/// Normalizes line endings to CRLF per RFC 5322 Section 2.1. Always
/// produces 7-bit safe output so the built bytes work with any SMTP
/// server and can be stored via IMAP APPEND without re-encoding:
///
/// | Content                       | CTE                |
/// |-------------------------------|--------------------|
/// | Pure ASCII, lines ≤ 998       | `7bit`             |
/// | Non-ASCII, long lines, or NUL | `quoted-printable` |
///
/// # References
/// - RFC 5322 Section 2.1 (CRLF line endings)
/// - RFC 5322 Section 2.1.1 (998-character line length limit)
/// - RFC 2045 Section 2.7 (7bit encoding)
/// - RFC 2045 Section 6.7 (quoted-printable encoding)
pub(super) fn write_text_part(
    output: &mut Vec<u8>,
    text: &str,
    mime_type: &str,
) -> Result<(), Error> {
    try_write_header(
        output,
        "Content-Type",
        &format!("{mime_type}; charset=utf-8"),
    )?;
    // Normalize bare LF/CR to CRLF (RFC 5322 Section 2.1)
    let normalized = normalize_line_endings(text);

    // Always produce 7-bit safe output so the built bytes can be sent to
    // any SMTP server (with or without 8BITMIME) and stored via IMAP
    // APPEND without re-encoding.
    //
    // RFC 5321 Section 2.3.1: servers without 8BITMIME need 7-bit safe
    // content. Pure ASCII with conforming lines can use "7bit"
    // (RFC 2045 Section 2.7); anything else, including NUL octets,
    // needs quoted-printable (RFC 2045 Section 6.7).
    let has_non_ascii = normalized.as_bytes().iter().any(|&b| b > 127);
    let long_lines = needs_quoted_printable(&normalized);
    let has_nul = normalized.as_bytes().contains(&0x00);
    if has_non_ascii || long_lines || has_nul {
        write_qp_body(output, &normalized)?;
    } else {
        // RFC 2045 Section 2.7: 7bit — all octets ≤ 127, lines ≤ 998.
        write_raw_body(output, "7bit", &normalized)?;
    }

    Ok(())
}

/// Helper: write a quoted-printable encoded text body.
///
/// # References
/// - RFC 2045 Section 6.7 (quoted-printable encoding)
fn write_qp_body(output: &mut Vec<u8>, normalized: &str) -> Result<(), Error> {
    try_write_header(output, "Content-Transfer-Encoding", "quoted-printable")?;
    output.extend_from_slice(b"\r\n");
    let encoded = encode_quoted_printable(normalized.as_bytes());
    output.extend_from_slice(&encoded);
    // Ensure trailing CRLF after body
    if !encoded.ends_with(b"\r\n") {
        output.extend_from_slice(b"\r\n");
    }

    Ok(())
}

/// Helper: write a raw (non-encoded) text body with the given CTE label.
///
/// # References
/// - RFC 2045 Section 2.7 (7bit encoding)
/// - RFC 2045 Section 2.8 (8bit encoding)
fn write_raw_body(output: &mut Vec<u8>, cte: &str, normalized: &str) -> Result<(), Error> {
    try_write_header(output, "Content-Transfer-Encoding", cte)?;
    output.extend_from_slice(b"\r\n");
    output.extend_from_slice(normalized.as_bytes());
    // Ensure trailing CRLF after body (consistent with write_qp_body)
    if !normalized.as_bytes().ends_with(b"\r\n") {
        output.extend_from_slice(b"\r\n");
    }

    Ok(())
}

/// Validates restricted `message/*` attachment bytes before emitting `7bit`
/// or `8bit`.
///
/// RFC 2046 Section 5.2.1 permits only `7bit`, `8bit`, or `binary` transfer
/// encodings for `message/*` entities. This builder intentionally emits only
/// `7bit`/`8bit` for SMTP-safe output, so the attachment body itself must
/// satisfy the corresponding data-domain constraints: no NUL octets, CR/LF
/// only as CRLF, and line lengths at most 998 octets (RFC 2045 Sections 2.7
/// and 2.8; RFC 5322 Section 2.3).
///
/// RFC 6532 Section 3.7 defines `message/global` as the internationalized
/// counterpart to `message/rfc822` and permits any content-transfer-encoding,
/// so `message/global` is intentionally excluded from this validation path.
fn validate_message_attachment_bytes(filename: &str, data: &[u8]) -> Result<(), Error> {
    if data.contains(&0x00) {
        return Err(Error::InvalidAttachment(format!(
            "message/* attachment \"{filename}\" contains NUL octets; \
             emitted 7bit/8bit message parts must not contain NUL \
             (RFC 2045 Sections 2.7-2.8 / RFC 2046 Section 5.2.1)"
        )));
    }

    for i in 0..data.len() {
        if data[i] == b'\r' && data.get(i + 1) != Some(&b'\n') {
            return Err(Error::InvalidAttachment(format!(
                "message/* attachment \"{filename}\" contains bare CR; \
                 CR and LF may only appear as CRLF in 7bit/8bit data \
                 (RFC 2045 Sections 2.7-2.8 / RFC 5322 Section 2.3)"
            )));
        }
        if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
            return Err(Error::InvalidAttachment(format!(
                "message/* attachment \"{filename}\" contains bare LF; \
                 CR and LF may only appear as CRLF in 7bit/8bit data \
                 (RFC 2045 Sections 2.7-2.8 / RFC 5322 Section 2.3)"
            )));
        }
    }

    if has_overlong_lines(data) {
        return Err(Error::InvalidAttachment(format!(
            "message/* attachment \"{filename}\" contains lines exceeding 998 octets; \
             base64/quoted-printable encoding is forbidden for message/* types \
             (RFC 2046 Section 5.2.1) so line lengths must conform to \
             RFC 2045 Sections 2.7-2.8",
        )));
    }

    Ok(())
}

/// Validates that the top-level encapsulated headers of a `message/rfc822`
/// attachment remain US-ASCII.
///
/// RFC 2046 Section 5.2.1 requires `message/rfc822` header fields to be
/// US-ASCII, using RFC 2047 for non-ASCII header text. RFC 6532 Section 3.7
/// defines `message/global` for encapsulated messages with raw UTF-8 header
/// values. This check is intentionally limited to the top-level encapsulated
/// header block; raw 8bit octets in the encapsulated body remain valid when
/// the embedded message uses 8bit transport.
fn validate_message_rfc822_headers_are_ascii(filename: &str, data: &[u8]) -> Result<(), Error> {
    let header_end = data
        .windows(4)
        .position(|window| window == b"\r\n\r\n")
        .map_or(data.len(), |pos| pos + 2);

    if let Some(non_ascii) = data[..header_end].iter().copied().find(|b| !b.is_ascii()) {
        return Err(Error::InvalidAttachment(format!(
            "message/rfc822 attachment \"{filename}\" contains raw non-ASCII byte 0x{non_ascii:02X} \
             in the encapsulated header block; RFC 2046 Section 5.2.1 requires \
             message/rfc822 headers to stay US-ASCII, and RFC 6532 Section 3.7 \
             requires message/global for encapsulated messages with UTF-8 headers"
        )));
    }

    Ok(())
}

/// Validates that an encapsulated `message/rfc822` header block includes at
/// least one of `From`, `Subject`, or `Date`.
///
/// RFC 2046 Section 5.2.1 relaxes the top-level RFC 822 requirements for
/// encapsulated messages, but still requires at least one of these fields to
/// be present. Folded continuation lines are skipped per RFC 5322
/// Section 2.2.3, and field-name matching remains case-insensitive per RFC
/// 5322 Section 2.2.
fn validate_message_rfc822_required_headers(filename: &str, data: &[u8]) -> Result<(), Error> {
    let header_end = data
        .windows(4)
        .position(|window| window == b"\r\n\r\n")
        .unwrap_or(data.len());

    let has_required_header = data[..header_end]
        .split(|&byte| byte == b'\n')
        .map(|line| line.strip_suffix(b"\r").unwrap_or(line))
        .filter(|line| !line.is_empty())
        .filter(|line| !matches!(line.first(), Some(b' ' | b'\t')))
        .filter_map(|line| {
            line.iter()
                .position(|&byte| byte == b':')
                .map(|colon| &line[..colon])
        })
        .any(|field_name| {
            field_name.eq_ignore_ascii_case(b"from")
                || field_name.eq_ignore_ascii_case(b"subject")
                || field_name.eq_ignore_ascii_case(b"date")
        });

    if !has_required_header {
        return Err(Error::InvalidAttachment(format!(
            "message/rfc822 attachment \"{filename}\" must include at least one of \
             From, Subject, or Date in the encapsulated header block \
             (RFC 2046 Section 5.2.1)"
        )));
    }

    Ok(())
}

/// Writes an attachment MIME part with base64 encoding.
///
/// Falls back to `application/octet-stream` for unparseable MIME types.
///
/// # Errors
///
/// Returns [`Error::InvalidAttachment`] if the attachment is a `message/*` type whose
/// body contains lines exceeding 998 octets. RFC 2046 Section 5.2.1
/// forbids base64 and quoted-printable for `message/*`, and RFC 2045
/// Section 2.8 requires 8bit content to have lines ≤ 998 octets. Since
/// the data cannot be re-encoded, it must be rejected.
///
/// # References
/// - RFC 2045 Section 5 (Content-Type `name` parameter)
/// - RFC 2183 Section 2 (Content-Disposition: attachment)
/// - RFC 2045 Section 6.8 (base64 line wrapping at 76 chars)
/// - RFC 2046 Section 5.2.1 (message/* encoding restrictions)
/// - RFC 2045 Section 2.8 (8bit line length requirements)
fn write_attachment_headers(
    output: &mut Vec<u8>,
    attachment: &OutgoingAttachment,
    content_type: &str,
    filename: &str,
) -> Result<(), Error> {
    write_attachment_content_type_header(output, content_type, filename)?;
    write_attachment_disposition_header(output, attachment.is_inline, filename)?;
    write_attachment_content_id_header(output, attachment.content_id.as_deref())?;
    Ok(())
}

/// Writes the `Content-Type` header for an attachment, including legacy
/// `name=` and RFC 2231 `name*=` parameters.
///
/// # References
/// - RFC 2045 Section 5 (Content-Type parameters)
/// - RFC 2231 Sections 3-4 (parameter continuations and encoded values)
/// - RFC 5322 Section 2.2 (header bodies are ASCII)
fn write_attachment_content_type_header(
    output: &mut Vec<u8>,
    content_type: &str,
    filename: &str,
) -> Result<(), Error> {
    let is_non_ascii = filename.bytes().any(|b| !b.is_ascii());
    let legacy: String = if is_non_ascii {
        filename
            .chars()
            .map(|c| if c.is_ascii() { c } else { '_' })
            .collect()
    } else {
        filename.to_owned()
    };
    let ct_name = escape_quoted_string(&legacy);
    let legacy_name_needs_rfc2231 = quoted_param_word_needs_rfc2231("name", &ct_name);

    if is_non_ascii || legacy_name_needs_rfc2231 {
        let encoded = percent_encode_filename(filename);
        let single_param_len = "name*=UTF-8''".len() + encoded.len();
        let legacy_prefix = if is_non_ascii && !legacy_name_needs_rfc2231 {
            format!("{content_type}; name=\"{ct_name}\"")
        } else {
            content_type.to_string()
        };
        if single_param_len > MAX_LINE_LEN {
            use std::fmt::Write;
            let chunks = split_percent_encoded(&encoded, RFC2231_CHUNK_MAX);
            let mut value = legacy_prefix;
            for (i, chunk) in chunks.iter().enumerate() {
                if i == 0 {
                    let _ = write!(value, "; name*0*=UTF-8''{chunk}");
                } else {
                    let _ = write!(value, "; name*{i}*={chunk}");
                }
            }
            try_write_header(output, "Content-Type", &value)?;
        } else {
            try_write_header(
                output,
                "Content-Type",
                &format!("{legacy_prefix}; name*=UTF-8''{encoded}"),
            )?;
        }
    } else {
        try_write_header(
            output,
            "Content-Type",
            &format!("{content_type}; name=\"{ct_name}\""),
        )?;
    }

    Ok(())
}

/// Writes the `Content-Disposition` header for an attachment or inline part.
///
/// # References
/// - RFC 2183 Section 2 (attachment vs inline disposition)
/// - RFC 2231 Sections 3-5 (parameter continuations and encoded values)
/// - RFC 5322 Section 3.2.4 (quoted-string escaping)
fn write_attachment_disposition_header(
    output: &mut Vec<u8>,
    is_inline: bool,
    filename: &str,
) -> Result<(), Error> {
    let disposition = if is_inline { "inline" } else { "attachment" };
    let is_non_ascii = filename.bytes().any(|b| !b.is_ascii());
    let legacy: String = if is_non_ascii {
        filename
            .chars()
            .map(|c| if c.is_ascii() { c } else { '_' })
            .collect()
    } else {
        filename.to_owned()
    };
    let escaped_legacy = escape_quoted_string(&legacy);
    let legacy_filename_needs_rfc2231 =
        quoted_param_word_needs_rfc2231("filename", &escaped_legacy);

    if is_non_ascii || legacy_filename_needs_rfc2231 {
        let encoded = percent_encode_filename(filename);
        let single_param_len = "filename*=UTF-8''".len() + encoded.len();
        let legacy_prefix = if is_non_ascii && !legacy_filename_needs_rfc2231 {
            format!("{disposition}; filename=\"{escaped_legacy}\"")
        } else {
            disposition.to_owned()
        };
        if single_param_len > MAX_LINE_LEN {
            use std::fmt::Write;
            let chunks = split_percent_encoded(&encoded, RFC2231_CHUNK_MAX);
            let mut value = legacy_prefix;
            for (i, chunk) in chunks.iter().enumerate() {
                if i == 0 {
                    let _ = write!(value, "; filename*0*=UTF-8''{chunk}");
                } else {
                    let _ = write!(value, "; filename*{i}*={chunk}");
                }
            }
            try_write_header(output, "Content-Disposition", &value)?;
        } else {
            try_write_header(
                output,
                "Content-Disposition",
                &format!("{legacy_prefix}; filename*=UTF-8''{encoded}"),
            )?;
        }
    } else {
        try_write_header(
            output,
            "Content-Disposition",
            &format!("{disposition}; filename=\"{escaped_legacy}\""),
        )?;
    }

    Ok(())
}

/// Writes the optional `Content-ID` header for an attachment.
///
/// # References
/// - RFC 2392 (Content-ID)
/// - RFC 5322 Section 2.1 (header injection safety)
fn write_attachment_content_id_header(
    output: &mut Vec<u8>,
    content_id: Option<&str>,
) -> Result<(), Error> {
    if let Some(cid) = content_id {
        let normalized_cid = normalize_content_id(cid)?;
        try_write_header(output, "Content-ID", &format!("<{normalized_cid}>"))?;
    }
    Ok(())
}

/// Writes the attachment body using either message-safe transport encoding or
/// base64 for all other media types.
///
/// # References
/// - RFC 2046 Section 5.2.1 (message/* encoding restrictions)
/// - RFC 2046 Sections 5.2.2-5.2.3 (7bit-only message subtypes)
/// - RFC 6532 Section 3.7 (`message/global` allows any transfer encoding)
/// - RFC 2045 Section 6.8 (base64 line wrapping)
fn write_attachment_body(
    output: &mut Vec<u8>,
    attachment: &OutgoingAttachment,
    media_type: &str,
) -> Result<(), Error> {
    if media_type
        .as_bytes()
        .get(..8)
        .is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"message/"))
        && !media_type.eq_ignore_ascii_case("message/global")
    {
        validate_message_attachment_bytes(&attachment.filename, &attachment.data)?;
        if media_type.eq_ignore_ascii_case("message/rfc822") {
            validate_message_rfc822_headers_are_ascii(&attachment.filename, &attachment.data)?;
            validate_message_rfc822_required_headers(&attachment.filename, &attachment.data)?;
        }
        let encoding = if media_type.eq_ignore_ascii_case("message/partial")
            || media_type.eq_ignore_ascii_case("message/external-body")
        {
            if attachment.data.iter().any(|&b| !b.is_ascii()) {
                return Err(Error::InvalidAttachment(format!(
                    "{media_type} attachment \"{}\" contains non-ASCII bytes; \
                     RFC 2046 requires this subtype to use 7bit transport only",
                    attachment.filename
                )));
            }
            "7bit"
        } else if attachment.data.iter().any(|&b| !b.is_ascii()) {
            "8bit"
        } else {
            "7bit"
        };
        try_write_header(output, "Content-Transfer-Encoding", encoding)?;
        output.extend_from_slice(b"\r\n");
        output.extend_from_slice(&attachment.data);
        if !attachment.data.ends_with(b"\r\n") {
            output.extend_from_slice(b"\r\n");
        }
    } else {
        try_write_header(output, "Content-Transfer-Encoding", "base64")?;
        output.extend_from_slice(b"\r\n");

        let encoded = base64::engine::general_purpose::STANDARD.encode(&attachment.data);
        for chunk in encoded.as_bytes().chunks(76) {
            output.extend_from_slice(chunk);
            output.extend_from_slice(b"\r\n");
        }
    }

    Ok(())
}

pub(super) fn write_attachment_part(
    output: &mut Vec<u8>,
    attachment: &OutgoingAttachment,
) -> Result<(), Error> {
    let (content_type, media_type) = parse_attachment_content_type(&attachment.content_type)
        .unwrap_or_else(|| {
            (
                "application/octet-stream",
                "application/octet-stream".to_string(),
            )
        });

    // Sanitize filename: replace CR/LF with spaces to prevent header injection
    // (RFC 5322 Section 2.1 — headers are terminated by CRLF).
    let filename = sanitize_header_value(&attachment.filename);
    write_attachment_headers(output, attachment, content_type, &filename)?;
    write_attachment_body(output, attachment, &media_type)
}

/// Normalizes and validates an attachment `Content-ID` value.
///
/// The public API stores a bare ID without angle brackets; the wire format is
/// `Content-ID: <id>` (RFC 2392 / RFC 5322 Section 3.6.4). Accept bracketed
/// caller input for tolerance, but reject malformed IDs rather than emitting an
/// invalid header.
///
/// # References
/// - RFC 2392 (Content-ID syntax)
/// - RFC 5322 Section 3.6.4 (msg-id)
fn normalize_content_id(content_id: &str) -> Result<String, Error> {
    let trimmed = content_id.trim();
    let bare = strip_angle_brackets(trimmed).trim();
    if is_valid_msg_id(bare) {
        Ok(bare.to_string())
    } else {
        Err(Error::InvalidAttachment(format!(
            "attachment Content-ID must be a valid bare msg-id body without angle brackets \
             (RFC 2392 / RFC 5322 Section 3.6.4): {content_id:?}"
        )))
    }
}

/// Percent-encodes a filename for RFC 2231 parameter encoding.
///
/// Encodes all bytes that are not unreserved characters (letters, digits,
/// `-`, `.`, `_`, `~`) per RFC 3986 Section 2.3. This is the encoding
/// used in RFC 2231 `charset'language'encoded-value` parameter values.
///
/// # References
/// - RFC 2231 Section 4 (parameter value character set and language)
/// - RFC 3986 Section 2.1 (percent-encoding)
fn percent_encode_filename(filename: &str) -> String {
    let mut encoded = String::with_capacity(filename.len() * 3);
    for &b in filename.as_bytes() {
        if b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~') {
            encoded.push(b as char);
        } else {
            use std::fmt::Write;
            let _ = write!(encoded, "%{b:02X}");
        }
    }
    encoded
}

/// Maximum length for a single RFC 2231 parameter value chunk.
///
/// Chosen so that `filename*N*=UTF-8''<chunk>;` (with section index,
/// charset prefix on chunk 0, and the trailing semicolon) fits well
/// within the 78-character recommended line length (RFC 5322 Section
/// 2.1.1). The charset prefix `UTF-8''` (7 chars) plus the longest
/// parameter name `filename*NN*=` (~14 chars) plus the trailing `;`
/// leaves roughly 56 chars for the encoded value on the first chunk
/// and 63 on subsequent chunks. We use 50 as a conservative limit
/// to guarantee every chunk stays within 78 characters.
const RFC2231_CHUNK_MAX: usize = 50;

/// Splits a percent-encoded string into chunks for RFC 2231 continuation
/// parameters (RFC 2231 Section 3), ensuring no chunk splits a `%HH`
/// percent-encoding triplet.
///
/// Returns a `Vec` of string slices. If the input fits within a single
/// chunk, returns a single-element vector.
fn split_percent_encoded(encoded: &str, max_chunk_len: usize) -> Vec<&str> {
    if encoded.len() <= max_chunk_len {
        return vec![encoded];
    }

    let bytes = encoded.as_bytes();
    let mut chunks = Vec::new();
    let mut pos = 0;

    while pos < bytes.len() {
        let chunk_start = pos;
        let mut used = 0;

        while pos < bytes.len() && used < max_chunk_len {
            if bytes[pos] == b'%' && pos + 2 < bytes.len() {
                // RFC 3986 Section 2.1: a percent triplet is `%HEXDIG HEXDIG`.
                // Never split in the middle of one.
                if used + 3 > max_chunk_len && used > 0 {
                    break; // triplet would exceed the chunk budget
                }
                pos += 3;
                used += 3;
            } else {
                pos += 1;
                used += 1;
            }
        }

        // Safety: if we made no progress (shouldn't happen — a single char
        // or triplet always fits when used == 0), force at least one byte.
        if pos == chunk_start {
            pos += 1;
        }

        chunks.push(&encoded[chunk_start..pos]);
    }

    chunks
}

/// Returns `true` when a quoted parameter word must switch to RFC 2231
/// encoding because it no longer fits within the recommended 78-character
/// parameter value envelope from RFC 2183.
///
/// RFC 2183 page 3 says parameter values longer than 78 characters "MUST be
/// encoded" using the RFC 2231 mechanism. RFC 2231 Section 3 then defines
/// continuation parameters (`filename*0*=...`, `name*0*=...`) for exactly
/// this case. Because quoted-string parameters are emitted as a single word,
/// once `param="value"` exceeds the recommended folded-line budget there is
/// no safe internal fold point left inside the parameter word.
fn quoted_param_word_needs_rfc2231(param_name: &str, escaped_value: &str) -> bool {
    let word_len = param_name.len() + 2 + escaped_value.len() + 1;
    word_len > (MAX_LINE_LEN - 1)
}

/// MIME type validation: must contain `/` with valid type and subtype tokens.
///
/// Both type and subtype must be non-empty and consist of valid token characters
/// (RFC 2045 Section 5.1).
///
/// A token character is any ASCII character except SPACE, CTLs, and tspecials:
/// `( ) < > @ , ; : \ " / [ ] ? =`
pub(super) fn is_valid_mime_type(ct: &str) -> bool {
    let ct = ct.trim();
    if let Some(slash) = ct.find('/') {
        let type_part = &ct[..slash];
        let subtype_part = &ct[slash + 1..];
        !type_part.is_empty()
            && !subtype_part.is_empty()
            && type_part.chars().all(is_mime_token_char)
            && subtype_part.chars().all(is_mime_token_char)
    } else {
        false
    }
}

/// Parses an attachment `Content-Type` header value into its full value and
/// base media type.
///
/// Accepts a `type "/" subtype` with optional `; attribute=value`
/// parameters, rejecting control characters and malformed parameters so the
/// builder never emits an invalid header value.
///
/// # References
/// - RFC 2045 Section 5.1 (media type and parameter syntax)
/// - RFC 5322 Section 2.2 (header fields are ASCII, excluding CR/LF in values)
fn parse_attachment_content_type(ct: &str) -> Option<(&str, String)> {
    let ct = ct.trim();
    if ct.is_empty()
        || !ct.is_ascii()
        || ct
            .bytes()
            .any(|b| matches!(b, b'\r' | b'\n') || (b.is_ascii_control() && b != b'\t'))
    {
        return None;
    }

    let (media_type, params) = match ct.find(';') {
        Some(idx) => (&ct[..idx], Some(&ct[idx + 1..])),
        None => (ct, None),
    };
    let normalized_media_type = strip_comments(media_type).trim().to_string();
    if !is_valid_mime_type(&normalized_media_type) {
        return None;
    }
    if let Some(params) = params {
        validate_content_type_parameters(params)?;
    }
    Some((ct, normalized_media_type))
}

/// Validates the parameter list portion of a MIME `Content-Type`.
///
/// Parameters follow `attribute=value` syntax where the value is either a
/// token or a quoted-string.
///
/// # References
/// - RFC 2045 Section 5.1 (parameter syntax)
/// - RFC 2045 Section 5.1 (tspecial handling in tokens and quoted-strings)
fn validate_content_type_parameters(params: &str) -> Option<()> {
    let params_without_comments = strip_comments(params);
    let bytes = params_without_comments.as_bytes();
    let mut i = 0;

    while i < bytes.len() {
        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
            i += 1;
        }
        if i == bytes.len() {
            return Some(());
        }

        let attr_start = i;
        while i < bytes.len() && is_mime_token_char(bytes[i] as char) {
            i += 1;
        }
        if i == attr_start {
            return None;
        }

        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
            i += 1;
        }
        if i == bytes.len() || bytes[i] != b'=' {
            return None;
        }
        i += 1;

        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
            i += 1;
        }
        if i == bytes.len() {
            return None;
        }

        if bytes[i] == b'"' {
            i += 1;
            while i < bytes.len() {
                match bytes[i] {
                    b'\\' => {
                        i += 1;
                        if i == bytes.len() || !bytes[i].is_ascii() {
                            return None;
                        }
                        i += 1;
                    }
                    b'"' => {
                        i += 1;
                        break;
                    }
                    b'\r' | b'\n' => return None,
                    b if !b.is_ascii() || (b.is_ascii_control() && b != b'\t') => return None,
                    _ => i += 1,
                }
            }
            if i == 0 || bytes[i - 1] != b'"' {
                return None;
            }
        } else {
            let value_start = i;
            while i < bytes.len() && is_mime_token_char(bytes[i] as char) {
                i += 1;
            }
            if i == value_start {
                return None;
            }
        }

        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
            i += 1;
        }
        if i == bytes.len() {
            return Some(());
        }
        if bytes[i] != b';' {
            return None;
        }
        i += 1;
    }

    Some(())
}

/// Returns `true` if `c` is a valid MIME token character (RFC 2045 Section 5.1).
///
/// Token characters are any ASCII character except SPACE, CTLs, and tspecials:
/// `( ) < > @ , ; : \ " / [ ] ? =`
fn is_mime_token_char(c: char) -> bool {
    c.is_ascii()
        && !c.is_ascii_whitespace()
        && !c.is_ascii_control()
        && !matches!(
            c,
            '(' | ')'
                | '<'
                | '>'
                | '@'
                | ','
                | ';'
                | ':'
                | '\\'
                | '"'
                | '/'
                | '['
                | ']'
                | '?'
                | '='
        )
}