daaki-message 0.2.0

#![allow(clippy::unwrap_used, clippy::expect_used)]
// Re-import the public entry points from this module.
use super::{parse_email, parse_headers_only};

// Re-import wire-layer functions used by tests.
use super::wire::{parse_headers, split_header_body, split_mime_parts};

// Re-import interpret-layer functions used by tests.
#[allow(unused_imports)]
use super::interpret::{
    contains_at_outside_quotes, decode_body, decode_encoded_words, decode_hex_pair,
    decode_q_encoding, decode_quoted_printable, decode_transfer_encoding, extract_comment_text,
    extract_filename, extract_mime_type, extract_param, extract_rfc2231_continuation,
    extract_rfc2231_param, find_closing_quote, find_param_value, hex_digit, is_disposition_type,
    is_inside_quotes, normalize_display_name_phrase, parse_address_list, parse_rfc5322_date,
    parse_single_address, parse_timezone, parse_year, percent_decode, strip_comments,
    strip_outer_quotes, unescape_quoted_string,
};

// Types needed by tests.
use crate::error::Error;
#[allow(unused_imports)]
use crate::types::Address;

#[test]
fn parse_simple_text_email() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Message-ID: <abc123@example.com>\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 Hello, World!";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].email, "sender@example.com");
    assert_eq!(parsed.to.len(), 1);
    assert_eq!(parsed.to[0].email, "recipient@example.com");
    assert_eq!(parsed.subject.as_deref(), Some("Test"));
    assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
    assert_eq!(parsed.body_text.as_deref(), Some("Hello, World!"));
    assert!(parsed.body_html.is_none());
    assert!(parsed.attachments.is_empty());
    assert_eq!(parsed.size, raw.len() as u64);
}

#[test]
fn parse_multipart_alternative() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Multi\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 MIME-Version: 1.0\r\n\
                 Content-Type: multipart/alternative; boundary=\"bound42\"\r\n\
                 \r\n\
                 --bound42\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 Plain text body\r\n\
                 --bound42\r\n\
                 Content-Type: text/html; charset=utf-8\r\n\
                 \r\n\
                 <html><body>HTML body</body></html>\r\n\
                 --bound42--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Plain text body"));
    assert_eq!(
        parsed.body_html.as_deref(),
        Some("<html><body>HTML body</body></html>")
    );
    assert!(parsed.attachments.is_empty());
}

#[test]
fn parse_encoded_words_base64_subject() {
    let raw = b"From: sender@example.com\r\n\
                 Subject: =?UTF-8?B?SGVsbG8gV29ybGQ=?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}

#[test]
fn parse_encoded_words_q_subject() {
    let raw = b"From: sender@example.com\r\n\
                 Subject: =?UTF-8?Q?Hello_World?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}

#[test]
fn parse_encoded_words_in_display_name() {
    let raw = b"From: =?UTF-8?B?Sm9obiBEb2U=?= <john@example.com>\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("John Doe"));
    assert_eq!(parsed.from[0].email, "john@example.com");
}

#[test]
fn parse_non_utf8_charset() {
    // ISO-8859-1 encoded subject: "Héllo"
    let raw = b"From: sender@example.com\r\n\
                 Subject: =?ISO-8859-1?Q?H=E9llo?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Héllo"));
}

#[test]
fn parse_message_id_strips_brackets() {
    let raw = b"From: a@b.com\r\n\
                 Message-ID: <unique-id@host.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.message_id.as_deref(), Some("unique-id@host.com"));
}

#[test]
fn parse_in_reply_to_multiple() {
    let raw = b"From: a@b.com\r\n\
                 In-Reply-To: <first@host> <second@host>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // RFC 5322 Section 3.6.4: in-reply-to = "In-Reply-To:" 1*msg-id CRLF
    assert_eq!(parsed.in_reply_to.len(), 2);
    assert_eq!(parsed.in_reply_to[0], "first@host");
    assert_eq!(parsed.in_reply_to[1], "second@host");
}

#[test]
fn parse_references_all_ids() {
    let raw = b"From: a@b.com\r\n\
                 References: <ref1@host> <ref2@host> <ref3@host>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // RFC 5322 Section 3.6.4: references = "References:" 1*msg-id CRLF
    // Each message-id should be a separate Vec element.
    assert_eq!(parsed.references.len(), 3);
    assert_eq!(parsed.references[0], "ref1@host");
    assert_eq!(parsed.references[1], "ref2@host");
    assert_eq!(parsed.references[2], "ref3@host");
}

/// RFC 5322 Section 3.6.4: in-reply-to = "In-Reply-To:" 1*msg-id CRLF
/// Multiple message-IDs are valid and must all be captured.
#[test]
fn parse_in_reply_to_all_ids() {
    let raw = b"From: a@b.com\r\n\
                 In-Reply-To: <id1@a.com> <id2@b.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.in_reply_to.len(), 2);
    assert_eq!(parsed.in_reply_to[0], "id1@a.com");
    assert_eq!(parsed.in_reply_to[1], "id2@b.com");
}

/// RFC 2046 Section 5.1.1 defines `body-part = MIME-part-headers [CRLF *OCTET]`,
/// so an attachment body may legitimately be zero bytes long. The parser
/// must still surface top-level attachment metadata instead of dropping
/// the message body entirely when no body octets follow the header block.
#[test]
fn parse_empty_top_level_attachment_is_preserved() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 MIME-Version: 1.0\r\n\
                 Content-Type: application/octet-stream\r\n\
                 Content-Disposition: attachment; filename=\"empty.bin\"\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.attachments.len(),
        1,
        "empty top-level attachment must not be dropped"
    );
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("empty.bin"));
    assert_eq!(parsed.attachments[0].size, Some(0));
    assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
}

#[test]
fn parse_date_with_numeric_timezone() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0530\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.year, 2025);
    assert_eq!(date.month, 2);
    assert_eq!(date.day, 13);
    assert_eq!(date.hour, 15);
    assert_eq!(date.minute, 47);
    assert_eq!(date.second, 33);
    assert_eq!(date.tz_offset_minutes, 330);
}

#[test]
fn parse_date_named_timezone() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 10:30:00 EST\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.tz_offset_minutes, -300);
}

#[test]
fn parse_address_with_display_name() {
    let raw = b"From: \"John Doe\" <john@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("John Doe"));
    assert_eq!(parsed.from[0].email, "john@example.com");
}

#[test]
fn parse_multiple_recipients() {
    let raw = b"From: a@b.com\r\n\
                 To: one@x.com, \"Two\" <two@x.com>, three@x.com\r\n\
                 Cc: cc1@x.com, cc2@x.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.to.len(), 3);
    assert_eq!(parsed.to[1].name.as_deref(), Some("Two"));
    assert_eq!(parsed.cc.len(), 2);
}

#[test]
fn parse_multipart_with_attachment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 MIME-Version: 1.0\r\n\
                 Content-Type: multipart/mixed; boundary=\"mixbound\"\r\n\
                 \r\n\
                 --mixbound\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Message body\r\n\
                 --mixbound\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
                 \r\n\
                 PDF_CONTENT_HERE\r\n\
                 --mixbound--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Message body"));
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
    assert_eq!(parsed.attachments[0].content_type, "application/pdf");
    assert!(!parsed.attachments[0].is_inline);
    assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}

#[test]
fn parse_inline_attachment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"bound\"\r\n\
                 \r\n\
                 --bound\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --bound\r\n\
                 Content-Type: image/png\r\n\
                 Content-Disposition: inline\r\n\
                 Content-ID: <img001>\r\n\
                 \r\n\
                 PNG_DATA\r\n\
                 --bound--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert!(parsed.attachments[0].is_inline);
    assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img001"));
}

#[test]
fn parse_headers_only_no_body() {
    let raw = b"From: a@b.com\r\n\
                 Subject: Headers only\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Headers only"));
    assert!(parsed.body_text.is_none());
    assert!(parsed.body_html.is_none());
}

#[test]
fn parse_empty_input() {
    let result = parse_email(b"");
    assert!(matches!(result, Err(Error::EmptyInput)));
}

#[test]
fn parse_missing_from() {
    let raw = b"Subject: No from\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).expect(
        "messages without From should still parse when other headers are usable \
         (consumer robustness / Postel's law)",
    );
    assert!(
        parsed.from.is_empty(),
        "missing From should produce an empty from list, got {:?}",
        parsed.from
    );
    assert_eq!(parsed.subject.as_deref(), Some("No from"));
}

/// Top-level messages with no header fields at all are malformed, but the
/// parser should still surface the body when it is otherwise usable text.
/// This keeps the public API aligned with its documented "partial message"
/// behavior and mirrors how MIME subparts already tolerate empty headers.
#[test]
fn parse_body_only_message_without_headers() {
    let raw = b"Hello from the body only parser path";

    let parsed = parse_email(raw).expect(
        "body-only top-level messages should still parse so consumers can \
         inspect truncated or malformed maildrops",
    );
    assert!(parsed.from.is_empty());
    assert!(parsed.raw_headers.is_empty());
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello from the body only parser path")
    );
    assert_eq!(parsed.size, raw.len() as u64);
}

/// RFC 2046 permits MIME body-parts with an empty header block. Apply the
/// same liberal parsing to malformed top-level messages that begin with a
/// blank line and then body text.
#[test]
fn parse_blank_line_then_body_without_headers() {
    let raw = b"\r\nBody after an empty top-level header block";

    let parsed = parse_email(raw)
        .expect("an empty top-level header block followed by body text should still parse");
    assert!(parsed.from.is_empty());
    assert!(parsed.raw_headers.is_empty());
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Body after an empty top-level header block")
    );
}

/// RFC 5322 Section 3.6.8 allows optional fields outside the well-known
/// header set. The parser should still surface those fields, even when no
/// standard message headers are present, so consumers can inspect partial
/// or malformed messages.
#[test]
fn parse_custom_headers_without_well_known_headers() {
    let raw = b"X-Trace: 12345\r\n\
                 List-Id: Example List <list.example>\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).expect(
        "messages with only optional fields should still parse so consumers \
         can inspect extra headers and body content",
    );
    assert!(parsed.from.is_empty());
    assert!(parsed.subject.is_none());
    assert_eq!(parsed.body_text.as_deref(), Some("body"));
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "x-trace" && value == "12345"),
        "X-Trace must be preserved in extra_headers"
    );
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "list-id" && value == "Example List <list.example>"),
        "List-Id must be preserved in extra_headers"
    );
}

#[test]
fn parse_quoted_printable_body() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: quoted-printable\r\n\
                 \r\n\
                 Hello=20World=0D=0ASoft=\r\n break";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World\r\nSoft break")
    );
}

#[test]
fn parse_base64_body() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}

#[test]
fn parse_nested_multipart_section_numbers() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
                 \r\n\
                 --outer\r\n\
                 Content-Type: multipart/alternative; boundary=\"inner\"\r\n\
                 \r\n\
                 --inner\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Plain\r\n\
                 --inner\r\n\
                 Content-Type: text/html\r\n\
                 \r\n\
                 <b>HTML</b>\r\n\
                 --inner--\r\n\
                 --outer\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
                 \r\n\
                 DATA\r\n\
                 --outer--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Plain"));
    assert_eq!(parsed.body_html.as_deref(), Some("<b>HTML</b>"));
    assert_eq!(parsed.attachments.len(), 1);
    // Attachment is part 2 of the outer multipart
    assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
}

#[test]
fn parse_rfc2231_filename() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("résumé.pdf")
    );
}

#[test]
fn parse_raw_headers_preserved() {
    let raw = b"From: a@b.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.raw_headers.contains("From: a@b.com"));
    assert!(parsed.raw_headers.contains("Subject: Test"));
}

#[test]
fn parse_lf_only_line_endings() {
    let raw = b"From: a@b.com\n\
                 Subject: LF\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\n\
                 \n\
                 Body with LF";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("LF"));
    assert_eq!(parsed.body_text.as_deref(), Some("Body with LF"));
}

#[test]
fn parse_header_continuation_lines() {
    // Continuation line starts with a space (RFC 5322 Section 2.2.3)
    // Can't use `\` line continuation as it strips leading whitespace.
    let raw = b"From: a@b.com\r\nSubject: This is a very long\r\n subject line that wraps\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("This is a very long subject line that wraps")
    );
}

#[test]
fn parse_garbage_input_best_effort() {
    // Binary garbage — no valid From header → error
    let result = parse_email(b"\x00\x01\x02\x03\xff\xfe");
    assert!(result.is_err());
}

#[test]
fn parse_truncated_multipart() {
    // Multipart with missing closing boundary
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"trunc\"\r\n\
                 \r\n\
                 --trunc\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Some text here";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Some text here"));
}

#[test]
fn decode_adjacent_encoded_words() {
    // RFC 2047 Section 6.2: whitespace between adjacent encoded words is removed
    let input = "=?UTF-8?B?SGVs?= =?UTF-8?B?bG8=?=";
    let decoded = decode_encoded_words(input);
    assert_eq!(decoded, "Hello");
}

#[test]
fn decode_iso8859_encoded_word() {
    // =?ISO-8859-1?Q?caf=E9?= → "café"
    let input = "=?ISO-8859-1?Q?caf=E9?=";
    let decoded = decode_encoded_words(input);
    assert_eq!(decoded, "café");
}

#[test]
fn parse_date_without_seconds() {
    let dt = parse_rfc5322_date("Thu, 13 Feb 2025 15:47 +0000").unwrap();
    assert_eq!(dt.hour, 15);
    assert_eq!(dt.minute, 47);
    assert_eq!(dt.second, 0);
}

#[test]
fn parse_two_digit_year() {
    let dt = parse_rfc5322_date("13 Feb 99 12:00:00 +0000").unwrap();
    assert_eq!(dt.year, 1999);

    let dt = parse_rfc5322_date("13 Feb 25 12:00:00 +0000").unwrap();
    assert_eq!(dt.year, 2025);
}

#[test]
fn parse_three_digit_year_rfc5322_section_4_3() {
    // RFC 5322 Section 4.3: any 3-digit year should have 1900 added.
    let dt = parse_rfc5322_date("13 Feb 107 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 2007,
        "3-digit year 107 must map to 2007 per RFC 5322 Section 4.3"
    );

    let dt = parse_rfc5322_date("13 Feb 100 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 2000,
        "3-digit year 100 must map to 2000 per RFC 5322 Section 4.3"
    );

    let dt = parse_rfc5322_date("13 Feb 999 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 2899,
        "3-digit year 999 must map to 2899 per RFC 5322 Section 4.3"
    );
}

#[test]
fn parse_two_digit_year_rfc5322_section_4_3_cutoff() {
    // RFC 5322 Section 4.3: 2-digit years 00-49 → +2000, 50-99 → +1900.
    // The cutoff is 50, not 70.

    // Year 50 should map to 1950 (not 2050)
    let dt = parse_rfc5322_date("13 Feb 50 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 1950,
        "2-digit year 50 must map to 1950 per RFC 5322 Section 4.3"
    );

    // Year 69 should map to 1969 (not 2069)
    let dt = parse_rfc5322_date("13 Feb 69 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 1969,
        "2-digit year 69 must map to 1969 per RFC 5322 Section 4.3"
    );

    // Year 49 should map to 2049
    let dt = parse_rfc5322_date("13 Feb 49 12:00:00 +0000").unwrap();
    assert_eq!(
        dt.year, 2049,
        "2-digit year 49 must map to 2049 per RFC 5322 Section 4.3"
    );
}

#[test]
fn parse_non_text_part_is_attachment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Text\r\n\
                 --b\r\n\
                 Content-Type: image/jpeg\r\n\
                 \r\n\
                 JPEG_DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    // image/jpeg without explicit disposition should be treated as attachment
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
}

#[test]
fn parse_windows1252_body() {
    // Windows-1252 body with smart quotes
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=windows-1252\r\n\
                 \r\n\
                 \x93Hello\x94"; // Smart double quotes in Windows-1252

    let parsed = parse_email(raw).unwrap();
    let text = parsed.body_text.unwrap();
    assert!(text.contains("Hello"));
    // Smart quotes should be converted to Unicode
    assert!(text.contains('\u{201c}') || text.contains('\u{201d}'));
}

#[test]
fn parse_html_only_body() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/html; charset=utf-8\r\n\
                 \r\n\
                 <html><body>Hello</body></html>";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.body_text.is_none());
    assert_eq!(
        parsed.body_html.as_deref(),
        Some("<html><body>Hello</body></html>")
    );
}

#[test]
fn parse_bcc_addresses() {
    let raw = b"From: a@b.com\r\n\
                 To: to@x.com\r\n\
                 Bcc: hidden@x.com, secret@x.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.bcc.len(), 2);
    assert_eq!(parsed.bcc[0].email, "hidden@x.com");
}

#[test]
fn mime_depth_limit() {
    // Construct a deeply nested multipart that exceeds MAX_MIME_DEPTH
    // Just verify it doesn't stack overflow
    let mut msg = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                    Content-Type: multipart/mixed; boundary=\"b0\"\r\n\r\n"
        .to_vec();

    for i in 0..70 {
        msg.extend_from_slice(
            format!(
                "--b{i}\r\nContent-Type: multipart/mixed; boundary=\"b{}\"\r\n\r\n",
                i + 1
            )
            .as_bytes(),
        );
    }
    msg.extend_from_slice(b"--b70\r\nContent-Type: text/plain\r\n\r\nDeep\r\n--b70--\r\n");

    let parsed = parse_email(&msg).unwrap();
    // Should not panic or stack overflow — may not find the body due to depth limit
    assert!(parsed.body_text.is_none() || parsed.body_text.is_some());
}

#[test]
fn parse_reply_to() {
    let raw = b"From: a@b.com\r\n\
                 Reply-To: noreply@example.com, support@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.reply_to.len(), 2);
    assert_eq!(parsed.reply_to[0].email, "noreply@example.com");
    assert_eq!(parsed.reply_to[1].email, "support@example.com");
}

#[test]
fn parse_gb2312_encoded_word() {
    // GB2312 encoded word: "你好" (nǐ hǎo) in base64
    // "你好" in GB2312 is: 0xC4, 0xE3, 0xBA, 0xC3
    let raw = b"From: sender@example.com\r\n\
                 Subject: =?GB2312?B?xOO6ww==?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("你好"));
}

#[test]
fn parse_content_id_strips_brackets() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: image/png\r\n\
                 Content-ID: <cid:image001@01D00000.00000000>\r\n\
                 \r\n\
                 PNG\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.attachments[0].content_id.as_deref(),
        Some("cid:image001@01D00000.00000000")
    );
}

#[test]
fn parse_attachment_without_filename() {
    // Attachment with Content-Disposition but no filename parameter
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/octet-stream\r\n\
                 Content-Disposition: attachment\r\n\
                 \r\n\
                 BINARY\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert!(parsed.attachments[0].filename.is_none());
    assert_eq!(
        parsed.attachments[0].content_type,
        "application/octet-stream"
    );
    assert!(!parsed.attachments[0].is_inline);
}

#[test]
fn parse_content_type_without_charset_defaults() {
    // No charset parameter — should default to us-ascii/utf-8 handling
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Hello ASCII";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}

/// A MIME part with no Content-Type header inside a multipart
/// message must default to "text/plain; charset=us-ascii" per RFC 2045
/// Section 5.2.
#[test]
fn parse_mime_part_no_content_type_defaults_to_us_ascii() {
    // Part has Content-Transfer-Encoding but no Content-Type.
    // RFC 2045 Section 5.2: default is "text/plain; charset=us-ascii".
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Transfer-Encoding: 7bit\r\n\
                 \r\n\
                 Hello ASCII\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
}

/// A MIME part with zero headers (only the blank line
/// separator, no Content-Type or other headers) must still be parsed.
/// This is a valid RFC 2046 construct — the blank line after the
/// boundary delimiter starts the body when there are no part headers.
#[test]
fn parse_mime_part_no_headers_at_all() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 \r\n\
                 Headerless body\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    // Part with no headers defaults to text/plain per RFC 2045 Section 5.2.
    assert_eq!(parsed.body_text.as_deref(), Some("Headerless body"));
}

#[test]
fn parse_multipart_only_attachments() {
    // No text/plain or text/html parts — only attachments
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"a.pdf\"\r\n\
                 \r\n\
                 PDF1\r\n\
                 --b\r\n\
                 Content-Type: image/png\r\n\
                 Content-Disposition: attachment; filename=\"b.png\"\r\n\
                 \r\n\
                 PNG2\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.body_text.is_none());
    assert!(parsed.body_html.is_none());
    assert_eq!(parsed.attachments.len(), 2);
    assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
    assert_eq!(parsed.attachments[1].section.as_deref(), Some("2"));
}

#[test]
fn parse_unknown_charset_body_fallback() {
    // Unknown charset should fall back to UTF-8 lossy conversion
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=x-unknown-fake\r\n\
                 \r\n\
                 Plain text in unknown charset";

    let parsed = parse_email(raw).unwrap();
    // encoding_rs falls back to UTF-8 for unknown charsets
    assert!(parsed.body_text.is_some());
    assert!(parsed.body_text.unwrap().contains("Plain text"));
}

#[test]
fn parse_content_id_without_disposition_is_inline() {
    // Part with Content-ID but no Content-Disposition — should be inline
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: image/gif\r\n\
                 Content-ID: <img42>\r\n\
                 \r\n\
                 GIF89a\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert!(parsed.attachments[0].is_inline);
    assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img42"));
}

#[test]
fn parse_overlong_subject() {
    // Very long subject line — should not crash or truncate
    let long_subject = "A".repeat(10_000);
    let raw = format!(
        "From: a@b.com\r\n\
         Subject: {long_subject}\r\n\
         Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
         \r\n"
    );

    let parsed = parse_email(raw.as_bytes()).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some(long_subject.as_str()));
}

#[test]
fn parse_multiple_from_preserves_all() {
    // RFC 5322 Section 3.6.2: from = "From:" mailbox-list CRLF
    // mailbox-list = mailbox *("," mailbox)
    // All originator mailboxes must be preserved.
    let raw = b"From: Alice <alice@example.com>, Bob <bob@example.com>\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from.len(),
        2,
        "RFC 5322 Section 3.6.2: all From mailboxes must be preserved"
    );
    assert_eq!(parsed.from[0].email, "alice@example.com");
    assert_eq!(parsed.from[0].name.as_deref(), Some("Alice"));
    assert_eq!(parsed.from[1].email, "bob@example.com");
    assert_eq!(parsed.from[1].name.as_deref(), Some("Bob"));
}

#[test]
fn parse_multiple_from_takes_first() {
    // RFC 5322 Section 3.6.2: from = "From:" mailbox-list CRLF
    // All originator mailboxes must be preserved.
    let raw = b"From: first@example.com, second@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from.len(), 2);
    assert_eq!(parsed.from[0].email, "first@example.com");
    assert_eq!(parsed.from[1].email, "second@example.com");
}

#[test]
fn parse_multipart_no_boundary_param() {
    // multipart/mixed but no boundary parameter — fallback to simple body
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed\r\n\
                 \r\n\
                 Some text content";

    let parsed = parse_email(raw).unwrap();
    // Should not panic; falls back to treating body as simple text
    assert!(parsed.body_text.is_some());
}

#[test]
fn parse_empty_body_after_headers() {
    // Headers + blank line + no body content
    let raw = b"From: a@b.com\r\n\
                 Subject: Empty body\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Empty body"));
    assert!(parsed.body_text.is_none());
}

#[test]
fn parse_mixed_charset_encoded_words() {
    // Adjacent encoded words with different charsets
    let raw = b"From: a@b.com\r\n\
                 Subject: =?UTF-8?B?SGVsbG8=?= =?ISO-8859-1?Q?_caf=E9?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Hello caf\u{e9}"));
}

#[test]
fn parse_no_date_header() {
    // Message without Date header — date should be None
    let raw = b"From: a@b.com\r\n\
                 Subject: No date\r\n\
                 \r\n\
                 Body";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.date.is_none());
    assert_eq!(parsed.subject.as_deref(), Some("No date"));
}

#[test]
fn parse_explicit_attachment_text_plain() {
    // text/plain with Content-Disposition: attachment should be attachment, not body
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body text\r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 Content-Disposition: attachment; filename=\"log.txt\"\r\n\
                 \r\n\
                 Log file content\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
    assert_eq!(parsed.attachments[0].content_type, "text/plain");
}

#[test]
fn parse_date_negative_timezone() {
    let raw = b"From: a@b.com\r\n\
                 Date: Fri, 14 Feb 2025 09:15:00 -0800\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.tz_offset_minutes, -480);
}

#[test]
fn parse_size_equals_input_length() {
    let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.size, raw.len() as u64);
}

#[test]
fn parse_binary_garbage_returns_error() {
    // Pure binary garbage — no From header → MissingFrom
    let garbage: Vec<u8> = (0..=255_u8).collect();
    let result = parse_email(&garbage);
    assert!(result.is_err());
}

#[test]
fn parse_folded_encoded_word_subject() {
    // Subject with encoded word that spans a folded line
    let raw = b"From: a@b.com\r\nSubject: =?UTF-8?B?SGVsbG8=?=\r\n =?UTF-8?B?V29ybGQ=?=\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("HelloWorld"));
}

// -----------------------------------------------------------------------
// Additional edge case tests
// -----------------------------------------------------------------------

#[test]
fn parse_encoded_word_lowercase_encoding() {
    // RFC 2047: encoding indicator is case-insensitive
    let raw = b"From: sender@example.com\r\n\
                 Subject: =?utf-8?b?SGVsbG8=?= =?utf-8?q?_World?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
}

#[test]
fn parse_malformed_encoded_word_passthrough() {
    // Incomplete encoded word should be passed through as literal text
    let raw = b"From: a@b.com\r\n\
                 Subject: =?UTF-8?B?broken\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // Should not crash; subject contains the literal malformed encoded word
    assert!(parsed.subject.is_some());
    assert!(parsed.subject.unwrap().contains("=?"));
}

#[test]
fn parse_encoded_word_unknown_encoding_type() {
    // Unknown encoding type (not B or Q) — should pass through
    let raw = b"From: a@b.com\r\n\
                 Subject: =?UTF-8?X?data?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.subject.is_some());
    // Unknown encoding passed through as literal
    assert!(parsed.subject.unwrap().contains("=?"));
}

#[test]
fn parse_utf8_directly_in_headers_rfc6532() {
    // RFC 6532: UTF-8 characters directly in headers (no encoded words)
    let raw = "From: José <jose@example.com>\r\n\
                Subject: Ñoño café\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                \r\n\
                Body";

    let parsed = parse_email(raw.as_bytes()).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Ñoño café"));
    assert_eq!(parsed.from[0].name.as_deref(), Some("José"));
    assert_eq!(parsed.from[0].email, "jose@example.com");
}

#[test]
fn parse_multipart_with_preamble() {
    // RFC 2046 Section 5.1.1: preamble text before the first boundary is ignored
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"preamble-test\"\r\n\
                 \r\n\
                 This is the preamble, which should be ignored.\r\n\
                 --preamble-test\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Actual body\r\n\
                 --preamble-test--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Actual body"));
}

#[test]
fn parse_attachment_name_from_content_type() {
    // Filename from Content-Type name= param when Content-Disposition has no filename
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf; name=\"report.pdf\"\r\n\
                 Content-Disposition: attachment\r\n\
                 \r\n\
                 PDF\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("report.pdf")
    );
}

/// RFC 2183 Section 2.3 lets a part carry a `filename` hint even when the
/// sender omits `Content-Disposition`.  The parser should preserve that as
/// attachment metadata instead of promoting the text part to `body_text`.
#[test]
fn text_plain_name_parameter_is_treated_as_attachment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; name=\"notes.txt\"\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVsbG8=\r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_none(),
        "filename-bearing text/plain part should not become body_text"
    );
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "text/plain");
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("notes.txt"));
}

/// Multipart variant of the same consumer-visible bug: a secondary
/// `text/plain; name=...` part should remain an attachment, not be folded
/// into the message body.
#[test]
fn multipart_text_plain_name_parameter_is_treated_as_attachment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain; name=\"notes.txt\"\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVsbG8=\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_none(),
        "filename-bearing multipart text/plain part should not become body_text"
    );
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "text/plain");
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("notes.txt"));
}

#[test]
fn parse_qp_soft_break_lf_only() {
    // Quoted-printable soft line break with just LF (not CRLF)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: quoted-printable\r\n\
                 \r\n\
                 Hello=\nWorld";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("HelloWorld"));
}

#[test]
fn parse_subject_mixed_encoded_and_plain() {
    // Subject with plain text, encoded word, and more plain text
    let raw = b"From: a@b.com\r\n\
                 Subject: Re: =?UTF-8?B?SGVsbG8=?= there\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.subject.as_deref(), Some("Re: Hello there"));
}

#[test]
fn parse_whitespace_only_body() {
    // Body consisting only of whitespace
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 \r\n  \r\n";

    let parsed = parse_email(raw).unwrap();
    // Whitespace-only body should still be returned as body_text
    assert!(parsed.body_text.is_some());
}

#[test]
fn parse_date_missing_timezone() {
    // Date without timezone — should default to +0000
    let raw = b"From: a@b.com\r\n\
                 Date: 13 Feb 2025 12:00:00\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.year, 2025);
    assert_eq!(date.tz_offset_minutes, 0);
}

#[test]
fn parse_deeply_nested_section_dot_notation() {
    // Verify section numbers use correct dot notation for nested multipart
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
                 \r\n\
                 --outer\r\n\
                 Content-Type: multipart/related; boundary=\"rel\"\r\n\
                 \r\n\
                 --rel\r\n\
                 Content-Type: text/html\r\n\
                 \r\n\
                 <img src=\"cid:img1\">\r\n\
                 --rel\r\n\
                 Content-Type: image/png\r\n\
                 Content-ID: <img1>\r\n\
                 \r\n\
                 PNG_DATA\r\n\
                 --rel--\r\n\
                 --outer\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
                 \r\n\
                 PDF\r\n\
                 --outer--";

    let parsed = parse_email(raw).unwrap();
    // HTML is section 1.1 of the outer, inline image is 1.2
    assert!(parsed.body_html.is_some());
    assert!(parsed.body_html.unwrap().contains("cid:img1"));
    // Inline image attachment: section 1.2
    let inline_att = parsed
        .attachments
        .iter()
        .find(|a| a.content_type == "image/png")
        .unwrap();
    assert_eq!(inline_att.section.as_deref(), Some("1.2"));
    assert!(inline_att.is_inline);
    // PDF attachment: section 2
    let pdf_att = parsed
        .attachments
        .iter()
        .find(|a| a.content_type == "application/pdf")
        .unwrap();
    assert_eq!(pdf_att.section.as_deref(), Some("2"));
}

#[test]
fn parse_non_ascii_bytes_in_body() {
    // Raw non-ASCII bytes in body without charset declaration
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Hello \xff\xfe world";

    let parsed = parse_email(raw).unwrap();
    // Should not panic — lossy UTF-8 conversion
    assert!(parsed.body_text.is_some());
}

#[test]
fn parse_base64_body_with_line_breaks() {
    // Base64 body with CRLF line breaks in the middle (RFC 2045 Section 6.8)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVs\r\nbG8g\r\nV29y\r\nbGQ=";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
}

#[test]
fn parse_date_extra_whitespace() {
    // Date with extra whitespace between fields
    let raw = b"From: a@b.com\r\n\
                 Date:  Thu,  13  Feb  2025  15:47:33  +0000 \r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.year, 2025);
    assert_eq!(date.month, 2);
    assert_eq!(date.day, 13);
}

#[test]
fn parse_multipart_related_with_inline_images() {
    // multipart/related — common for HTML emails with inline images
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/related; boundary=\"rel\"\r\n\
                 \r\n\
                 --rel\r\n\
                 Content-Type: text/html\r\n\
                 \r\n\
                 <html><img src=\"cid:logo\"></html>\r\n\
                 --rel\r\n\
                 Content-Type: image/jpeg\r\n\
                 Content-ID: <logo>\r\n\
                 Content-Disposition: inline; filename=\"logo.jpg\"\r\n\
                 \r\n\
                 JPEG_DATA\r\n\
                 --rel--";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.body_html.is_some());
    assert_eq!(parsed.attachments.len(), 1);
    assert!(parsed.attachments[0].is_inline);
    assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("logo"));
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("logo.jpg"));
}

/// RFC 2183 Sections 2.1 and 2.3: a `filename` parameter may appear on an
/// explicit `inline` body part, and that parameter does not turn the part
/// into an attachment.
#[test]
fn parse_single_part_text_inline_filename_stays_body_text() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8; name=\"notice.txt\"\r\n\
                 Content-Disposition: inline; filename=\"notice.txt\"\r\n\
                 \r\n\
                 Inline notice";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Inline notice"),
        "RFC 2183 Sections 2.1 and 2.3: explicit inline text/plain with a filename hint must still populate body_text"
    );
    assert!(
        parsed.attachments.is_empty(),
        "explicit inline text/plain with a filename hint must not be reclassified as an attachment"
    );
}

/// RFC 2183 Sections 2.1 and 2.3: the presence of `filename` does not
/// override an explicit `inline` disposition inside multipart bodies.
#[test]
fn parse_multipart_inline_html_filename_stays_body_html() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/related; boundary=\"rel\"\r\n\
                 \r\n\
                 --rel\r\n\
                 Content-Type: text/html; charset=utf-8; name=\"body.html\"\r\n\
                 Content-Disposition: inline; filename=\"body.html\"\r\n\
                 \r\n\
                 <p>Inline html body</p>\r\n\
                 --rel--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_html.as_deref(),
        Some("<p>Inline html body</p>"),
        "RFC 2183 Sections 2.1 and 2.3: explicit inline text/html with a filename hint must still populate body_html"
    );
    assert!(
        parsed.attachments.is_empty(),
        "explicit inline text/html with a filename hint must not be reclassified as an attachment"
    );
}

#[test]
fn parse_minimal_message_from_only() {
    // Absolute minimum valid message: just From header
    let raw = b"From: a@b.com\r\n\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].email, "a@b.com");
    assert!(parsed.subject.is_none());
    assert!(parsed.date.is_none());
    assert!(parsed.body_text.is_none());
}

#[test]
fn parse_multiple_same_headers() {
    // Multiple headers with the same name — should take the first one
    let raw = b"From: first@example.com\r\n\
                 From: second@example.com\r\n\
                 Subject: First\r\n\
                 Subject: Second\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].email, "first@example.com");
    assert_eq!(parsed.subject.as_deref(), Some("First"));
}

#[test]
fn parse_date_all_named_timezones() {
    let test_cases = [
        ("EST", -300),
        ("EDT", -240),
        ("CST", -360),
        ("CDT", -300),
        ("MST", -420),
        ("MDT", -360),
        ("PST", -480),
        ("PDT", -420),
        ("GMT", 0),
        ("UTC", 0),
        ("UT", 0),
    ];
    for (tz_name, expected_offset) in test_cases {
        let raw = format!("From: a@b.com\r\nDate: Thu, 13 Feb 2025 12:00:00 {tz_name}\r\n\r\n");
        let parsed = parse_email(raw.as_bytes()).unwrap();
        let date = parsed.date.unwrap();
        assert_eq!(
            date.tz_offset_minutes, expected_offset,
            "Failed for timezone {tz_name}"
        );
    }
}

#[test]
fn parse_boundary_with_special_chars() {
    // Boundary containing special characters (RFC 2046 allows certain chars)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"----=_Part_123+abc\"\r\n\
                 \r\n\
                 ------=_Part_123+abc\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body text\r\n\
                 ------=_Part_123+abc--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
}

#[test]
fn parse_truncated_base64_body() {
    // Truncated base64 — should not crash, fall back to raw bytes
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVsbG8gV29yb===invalid";

    let parsed = parse_email(raw).unwrap();
    // Should not panic — either decodes partial or falls back
    assert!(parsed.body_text.is_some());
}

#[test]
fn parse_address_group_syntax() {
    // Group address syntax: "Group: addr1, addr2;"
    // Should parse what it can without crashing
    let raw = b"From: sender@example.com\r\n\
                 To: Undisclosed:;\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // Group syntax with no addresses — To list may be empty
    assert_eq!(parsed.from[0].email, "sender@example.com");
}

#[test]
fn parse_iso2022jp_encoded_word() {
    // ISO-2022-JP encoded word (common in Japanese email)
    // "テスト" in ISO-2022-JP base64
    let raw = b"From: a@b.com\r\n\
                 Subject: =?ISO-2022-JP?B?GyRCJUYlOSVIGyhC?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // encoding_rs should handle ISO-2022-JP
    assert!(parsed.subject.is_some());
    assert_eq!(parsed.subject.as_deref(), Some("テスト"));
}

#[test]
fn parse_multipart_missing_parts_tolerance() {
    // Multipart header declared but body is completely different (partial fetch)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"never-appears\"\r\n\
                 \r\n\
                 This body doesn't contain any boundaries at all.";

    let parsed = parse_email(raw).unwrap();
    // Should not fail and should preserve recoverable text instead of
    // discarding the body when the declared boundary never appears.
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("This body doesn't contain any boundaries at all.")
    );
    assert!(parsed.body_html.is_none());
    assert!(parsed.attachments.is_empty());
}

#[test]
fn parse_nested_multipart_without_boundary_falls_back_to_text() {
    // RFC 2046 Section 5.1.1 requires a boundary parameter for multipart
    // parts. For malformed nested parts, apply the same best-effort
    // fallback used at the top level so text content is not silently lost.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
                 \r\n\
                 --outer\r\n\
                 Content-Type: multipart/alternative\r\n\
                 \r\n\
                 Inner text that should not disappear.\r\n\
                 --outer--\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Inner text that should not disappear."),
        "malformed nested multipart parts should fall back to simple text"
    );
}

#[test]
fn parse_nested_multipart_without_boundary_preserves_outer_section_number() {
    // RFC 2046 Section 5.1.1 requires a boundary parameter for multipart
    // parts. When recovering a malformed nested multipart as a simple part,
    // attachment metadata should still use the outer part's IMAP section
    // number rather than hardcoding the top-level "1".
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
                 \r\n\
                 --outer\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 First part.\r\n\
                 --outer\r\n\
                 Content-Type: multipart/mixed\r\n\
                 Content-Disposition: attachment; filename=\"nested.txt\"\r\n\
                 \r\n\
                 Recovered nested payload.\r\n\
                 --outer--\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].section.as_deref(),
        Some("2"),
        "recovered malformed nested multipart should keep its outer section number"
    );
}

#[test]
fn parse_encoded_word_in_multiple_header_types() {
    // Encoded words in From display name AND Subject
    let raw = b"From: =?UTF-8?Q?M=C3=BCller?= <mueller@example.com>\r\n\
                 To: =?UTF-8?B?U21pdGg=?= <smith@example.com>\r\n\
                 Subject: =?UTF-8?Q?Caf=C3=A9?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("Müller"));
    assert_eq!(parsed.to[0].name.as_deref(), Some("Smith"));
    assert_eq!(parsed.subject.as_deref(), Some("Café"));
}

#[test]
fn parse_attachment_size_reflects_part_body() {
    // Verify attachment size field is set correctly
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
                 \r\n\
                 0123456789\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].size, Some(10));
}

#[test]
fn parse_unquoted_boundary() {
    // Boundary value without quotes (RFC 2046 allows this)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=simple_boundary\r\n\
                 \r\n\
                 --simple_boundary\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Text\r\n\
                 --simple_boundary--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Text"));
}

#[test]
fn parse_boundary_ignores_stray_bare_word_after_token() {
    // RFC 2046 Section 5.1.1: boundary is a single MIME token. If a
    // malformed sender appends stray bare text after it, recovery should
    // still honor the actual delimiter that appears in the body.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=actual unexpected\r\n\
                 \r\n\
                 --actual\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Hello\r\n\
                 --actual--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello"),
        "RFC 2046 Section 5.1.1: stray bare text after boundary token must not \
         prevent multipart parsing when the actual delimiter is still unambiguous"
    );
}

#[test]
fn parse_message_id_without_angle_brackets() {
    // Some broken mailers omit angle brackets on Message-ID.
    // The parser tolerates this and returns the bare value.
    let raw = b"From: a@b.com\r\n\
                 Message-ID: bare-id@host.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.message_id.as_deref(), Some("bare-id@host.com"));
}

#[test]
fn parse_message_id_without_angle_brackets_requires_msg_id_syntax() {
    // Postel fallback for missing angle brackets must still require a
    // syntactically valid bare msg-id body (RFC 5322 Section 3.6.4).
    let raw = b"From: a@b.com\r\n\
                 Message-ID: not-a-msg-id\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.message_id.is_none(),
        "invalid bare Message-ID fallback must be ignored, got {:?}",
        parsed.message_id
    );
}

#[test]
fn parse_message_id_with_invalid_bracketed_syntax_is_ignored() {
    // RFC 5322 Section 3.6.4: the text inside angle brackets must still
    // be a syntactically valid msg-id body. Brackets alone do not make
    // an invalid identifier valid.
    let raw = b"From: a@b.com\r\n\
                 Message-ID: <not a msg-id>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.message_id.is_none(),
        "invalid bracketed Message-ID must be ignored, got {:?}",
        parsed.message_id
    );
}

#[test]
fn parse_message_id_with_quoted_id_left() {
    // RFC 5322 Section 4.5.4: parsers may recover obsolete `obs-id-left`
    // forms, including quoted local-parts, from inbound Message-ID fields.
    let raw = b"From: a@b.com\r\n\
                 Message-ID: <\"user@inner\"@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.message_id.as_deref(),
        Some("\"user@inner\"@example.com")
    );
}

#[test]
fn parse_message_id_with_obs_cfws_between_atoms() {
    // RFC 5322 Section 4.5.4: `obs-id-left = local-part` and
    // `obs-id-right = domain`, allowing CFWS to appear between atoms.
    // The CFWS is not semantically part of the msg-id and should be
    // ignored during interpretation.
    let raw = b"From: a@b.com\r\n\
                 Message-ID: <foo . bar@example . com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.message_id.as_deref(),
        Some("foo.bar@example.com"),
        "obsolete CFWS inside a bracketed Message-ID must be normalized \
         per RFC 5322 Section 4.5.4"
    );
}

#[test]
fn parse_in_reply_to_without_angle_brackets() {
    // Some broken mailers omit angle brackets on In-Reply-To as well.
    // The parser should recover the bare msg-id bodies so thread
    // reconstruction is still possible.
    let raw = b"From: a@b.com\r\n\
                 In-Reply-To: first@host second@[127.0.0.1]\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.in_reply_to, vec!["first@host", "second@[127.0.0.1]"]);
}

#[test]
fn parse_references_without_angle_brackets() {
    // Some broken mailers omit angle brackets on References.
    // Preserve all recoverable bare msg-id bodies in order.
    let raw = b"From: a@b.com\r\n\
                 References: ref1@host ref2@[IPv6:2001:db8::1]\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.references,
        vec!["ref1@host", "ref2@[IPv6:2001:db8::1]"]
    );
}

#[test]
fn parse_references_filters_invalid_bracketed_msg_ids() {
    // RFC 5322 Section 3.6.4: each bracketed token in References must be
    // a valid msg-id body. Invalid `<...>` tokens must not be surfaced in
    // the parsed threading data.
    let raw = b"From: a@b.com\r\n\
                 References: <good@example.com> <not a msg-id> <also.good@[127.0.0.1]>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.references,
        vec!["good@example.com", "also.good@[127.0.0.1]"]
    );
}

#[test]
fn parse_empty_references_header() {
    // References header with no valid message-ids
    let raw = b"From: a@b.com\r\n\
                 References: \r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.references.is_empty());
}

#[test]
fn parse_large_multipart_many_attachments() {
    // Message with many attachment parts — verify section numbering
    let mut raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                     Content-Type: multipart/mixed; boundary=\"multi\"\r\n\r\n"
        .to_vec();

    raw.extend_from_slice(b"--multi\r\nContent-Type: text/plain\r\n\r\nBody\r\n");
    for i in 1..=5 {
        raw.extend_from_slice(
            format!(
                "--multi\r\nContent-Type: application/octet-stream\r\n\
                 Content-Disposition: attachment; filename=\"file{i}.bin\"\r\n\r\n\
                 DATA{i}\r\n"
            )
            .as_bytes(),
        );
    }
    raw.extend_from_slice(b"--multi--");

    let parsed = parse_email(&raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Body"));
    assert_eq!(parsed.attachments.len(), 5);
    for (i, att) in parsed.attachments.iter().enumerate() {
        assert_eq!(
            att.section.as_deref(),
            Some(&(i + 2).to_string() as &str),
            "Wrong section for attachment {i}"
        );
        assert_eq!(
            att.filename.as_deref(),
            Some(&format!("file{}.bin", i + 1) as &str)
        );
    }
}

#[test]
fn parse_message_id_empty_brackets() {
    // Empty angle brackets should return None
    let raw = b"From: a@b.com\r\n\
                 Message-ID: <>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.message_id.is_none());
}

#[test]
fn parse_message_id_empty_value() {
    // Completely empty Message-ID value should return None
    let raw = b"From: a@b.com\r\n\
                 Message-ID: \r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert!(parsed.message_id.is_none());
}

#[test]
fn rfc2231_param_boundary_check() {
    // The "filename*=" match must be at a parameter boundary,
    // not embedded in another parameter name like "xfilename*="
    let disposition = "attachment; xfilename*=UTF-8''bad.pdf; filename*=UTF-8''good.pdf";
    let result = extract_rfc2231_param(disposition, "filename");
    assert_eq!(result.as_deref(), Some("good.pdf"));
}

#[test]
fn rfc2231_param_at_start() {
    // Parameter at the very start of the value (no preceding `;`)
    let value = "filename*=UTF-8''test.pdf";
    let result = extract_rfc2231_param(value, "filename");
    assert_eq!(result.as_deref(), Some("test.pdf"));
}

#[test]
fn parse_quoted_display_name_with_comma() {
    // Display name with comma must be in a quoted-string (RFC 5322 Section 3.4)
    let raw = b"From: \"Doe, John\" <john@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("Doe, John"));
    assert_eq!(parsed.from[0].email, "john@example.com");
}

#[test]
fn parse_quoted_display_name_with_escaped_chars() {
    // Backslash-escaped characters in quoted display name (RFC 5322 Section 3.2.4)
    let raw = b"From: \"John \\\"Doc\\\" Doe\" <john@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("John \"Doc\" Doe"));
    assert_eq!(parsed.from[0].email, "john@example.com");
}

#[test]
fn unescape_quoted_string_backslash() {
    assert_eq!(unescape_quoted_string("hello"), "hello");
    assert_eq!(unescape_quoted_string("a\\\\b"), "a\\b");
    assert_eq!(unescape_quoted_string("a\\\"b"), "a\"b");
    assert_eq!(unescape_quoted_string("trailing\\"), "trailing\\");
}

#[test]
fn parse_address_list_with_escaped_quotes_in_display_name() {
    // RFC 5322 Section 3.2.4: backslash-escaped quotes within a quoted-string
    // must not prematurely end the quoted context during address list splitting.
    let raw = b"From: a@b.com\r\n\
                 To: \"A\\\"B\" <a@x.com>, c@d.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.to.len(),
        2,
        "Expected 2 To addresses but got {:?}",
        parsed.to
    );
    assert_eq!(parsed.to[0].email, "a@x.com");
    assert_eq!(parsed.to[0].name.as_deref(), Some("A\"B"));
    assert_eq!(parsed.to[1].email, "c@d.com");
}

#[test]
fn parse_rfc2231_continuation_filename() {
    // RFC 2231 Section 3: long filenames split across continuation parameters.
    // filename*0="very_long_"; filename*1="filename.pdf"
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename*0=\"very_long_\"; filename*1=\"filename.pdf\"\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("very_long_filename.pdf"),
        "RFC 2231 continuation filename not reassembled"
    );
}

#[test]
fn parse_rfc2231_continuation_with_charset() {
    // RFC 2231 Section 3+4: continuation with charset encoding.
    // filename*0*=UTF-8''r%C3%A9sum; filename*1*=%C3%A9.pdf
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1*=%C3%A9.pdf\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("résumé.pdf"),
        "RFC 2231 continuation with charset not reassembled"
    );
}

/// commas inside RFC 5322 Section 3.2.2 parenthesized comments
/// must NOT split the address list. The comment `(Doe, John)` is a single
/// unit — the comma inside it is not an address separator.
#[test]
fn parse_address_comment_with_comma() {
    let raw = b"From: sender@example.com\r\n\
                 To: user@example.com (Doe, John), other@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.to.len(),
        2,
        "Expected 2 To addresses but got {:?}",
        parsed.to
    );
    assert_eq!(parsed.to[0].email, "user@example.com");
    assert_eq!(
        parsed.to[0].name.as_deref(),
        Some("Doe, John"),
        "Display name from comment should be preserved intact"
    );
    assert_eq!(parsed.to[1].email, "other@example.com");
}

#[test]
fn parse_header_unfolding_preserves_wsp() {
    // RFC 5322 Section 2.2.3: unfolding removes the CRLF but the leading
    // WSP (tab or space) is part of the FWS and must be preserved.
    let raw = b"From: a@b.com\r\nSubject: Hello\r\n\tWorld\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";

    let parsed = parse_email(raw).unwrap();
    // The tab should be preserved between "Hello" and "World"
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello\tWorld"),
        "Tab from continuation line should be preserved per RFC 5322 Section 2.2.3"
    );
}

#[test]
fn datetime_to_unix_timestamp() {
    use crate::types::DateTime;

    // 2025-02-13 15:47:33 +0000 → known Unix timestamp
    let dt = DateTime {
        year: 2025,
        month: 2,
        day: 13,
        hour: 15,
        minute: 47,
        second: 33,
        tz_offset_minutes: 0,
    };
    // 2025-02-13T15:47:33Z = 1739461653
    assert_eq!(dt.to_unix_timestamp(), 1_739_461_653);

    // Same instant with +0530 offset (local 21:17:33)
    let dt_offset = DateTime {
        year: 2025,
        month: 2,
        day: 13,
        hour: 21,
        minute: 17,
        second: 33,
        tz_offset_minutes: 330,
    };
    assert_eq!(dt_offset.to_unix_timestamp(), dt.to_unix_timestamp());
}

#[test]
fn datetime_from_unix_timestamp() {
    use crate::types::DateTime;

    let ts = 1_739_461_653_i64; // 2025-02-13T15:47:33Z
    let dt = DateTime::from_unix_timestamp(ts, 0);
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.day, 13);
    assert_eq!(dt.hour, 15);
    assert_eq!(dt.minute, 47);
    assert_eq!(dt.second, 33);
    assert_eq!(dt.tz_offset_minutes, 0);

    // With +0530 offset
    let dt_offset = DateTime::from_unix_timestamp(ts, 330);
    assert_eq!(dt_offset.hour, 21);
    assert_eq!(dt_offset.minute, 17);
}

#[test]
fn datetime_round_trip_timestamp() {
    use crate::types::DateTime;

    let dt = DateTime {
        year: 2025,
        month: 12,
        day: 31,
        hour: 23,
        minute: 59,
        second: 59,
        tz_offset_minutes: -480,
    };
    let ts = dt.to_unix_timestamp();
    let restored = DateTime::from_unix_timestamp(ts, -480);
    assert_eq!(dt, restored);
}

#[test]
fn datetime_ord_comparison() {
    use crate::types::DateTime;

    // Same instant in different timezones should be equal
    let utc = DateTime {
        year: 2025,
        month: 1,
        day: 1,
        hour: 12,
        minute: 0,
        second: 0,
        tz_offset_minutes: 0,
    };
    let est = DateTime {
        year: 2025,
        month: 1,
        day: 1,
        hour: 7,
        minute: 0,
        second: 0,
        tz_offset_minutes: -300,
    };
    assert_eq!(utc.cmp(&est), std::cmp::Ordering::Equal);

    // Later timestamp should be greater
    let later = DateTime {
        year: 2025,
        month: 1,
        day: 1,
        hour: 13,
        minute: 0,
        second: 0,
        tz_offset_minutes: 0,
    };
    assert!(later > utc);
}

#[test]
fn datetime_epoch() {
    use crate::types::DateTime;

    let epoch = DateTime::from_unix_timestamp(0, 0);
    assert_eq!(epoch.year, 1970);
    assert_eq!(epoch.month, 1);
    assert_eq!(epoch.day, 1);
    assert_eq!(epoch.hour, 0);
    assert_eq!(epoch.minute, 0);
    assert_eq!(epoch.second, 0);
    assert_eq!(epoch.to_unix_timestamp(), 0);
}

#[test]
fn parse_headers_only_extracts_metadata() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Message-ID: <abc123@example.com>\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 This body should NOT be parsed\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
                 \r\n\
                 PDF_DATA\r\n\
                 --b--";

    let parsed = parse_headers_only(raw).unwrap();

    // Header fields should be populated
    assert_eq!(parsed.from[0].email, "sender@example.com");
    assert_eq!(parsed.to.len(), 1);
    assert_eq!(parsed.subject.as_deref(), Some("Test"));
    assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
    assert!(parsed.date.is_some());

    // Body fields should be empty (skipped)
    assert!(parsed.body_text.is_none());
    assert!(parsed.body_html.is_none());
    assert!(parsed.attachments.is_empty());
}

#[test]
fn parse_headers_only_empty_input() {
    let result = parse_headers_only(b"");
    assert!(matches!(result, Err(Error::EmptyInput)));
}

#[test]
fn parse_headers_only_missing_from() {
    let raw = b"Subject: No From\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_headers_only(raw).expect(
        "headers-only parsing should tolerate missing From when the header block \
         is otherwise usable",
    );
    assert!(
        parsed.from.is_empty(),
        "missing From should produce an empty from list, got {:?}",
        parsed.from
    );
    assert_eq!(parsed.subject.as_deref(), Some("No From"));
}

/// `parse_headers_only` should also tolerate malformed messages that have
/// no header fields at all, returning an empty header view instead of a
/// structural error.
#[test]
fn parse_headers_only_body_only_message_without_headers() {
    let raw = b"Body-only message with no header section";

    let parsed = parse_headers_only(raw).expect(
        "headers-only parsing should accept body-only malformed messages so \
         callers can still inspect size and raw header state",
    );
    assert!(parsed.from.is_empty());
    assert!(parsed.raw_headers.is_empty());
    assert!(parsed.subject.is_none());
    assert!(parsed.body_text.is_none());
    assert!(parsed.body_html.is_none());
    assert!(parsed.attachments.is_empty());
    assert_eq!(parsed.size, raw.len() as u64);
}

/// Headers-only parsing should also tolerate header blocks that contain
/// only optional/custom fields (RFC 5322 Section 3.6.8).
#[test]
fn parse_headers_only_custom_headers_without_well_known_headers() {
    let raw = b"X-Trace: 12345\r\nList-Id: Example List <list.example>\r\n\r\n";
    let parsed = parse_headers_only(raw).expect(
        "headers-only parsing should accept header blocks containing only \
         optional fields",
    );
    assert!(parsed.from.is_empty());
    assert!(parsed.subject.is_none());
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "x-trace" && value == "12345"),
        "X-Trace must be preserved in extra_headers"
    );
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "list-id" && value == "Example List <list.example>"),
        "List-Id must be preserved in extra_headers"
    );
}

/// RFC 5322 Section 2.2.3: when a field body begins only on the first
/// folded continuation line, unfolding removes the CRLF and exactly one
/// leading SP/HTAB serves only as the structural separator before the
/// field body.
#[test]
fn parse_headers_only_extra_header_starting_on_continuation_strips_structural_wsp() {
    let raw = b"From: sender@example.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nX-Note:\r\n hello\r\nX-Indent:\r\n  value\r\n\r\n";

    let parsed = parse_headers_only(raw).unwrap();
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "x-note" && value == "hello"),
        "first continuation SP must be treated as structural, got {:?}",
        parsed.extra_headers
    );
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "x-indent" && value == " value"),
        "only one structural SP should be stripped from the first continuation line, got {:?}",
        parsed.extra_headers
    );
}

// -----------------------------------------------------------------------
// extract_param quoted-string handling (RFC 5322 §3.2.4)
// -----------------------------------------------------------------------

#[test]
fn extract_param_unescapes_backslash_in_filename() {
    // RFC 5322 Section 3.2.4: quoted-pair `\\` in a quoted-string represents
    // a literal backslash. extract_param must unescape it.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"path\\\\file.pdf\"\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    // The filename in the message is `path\\file.pdf` (escaped backslash).
    // After unescaping per RFC 5322 Section 3.2.4, it should be `path\file.pdf`.
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("path\\file.pdf"),
        "Backslash in quoted-string filename must be unescaped per RFC 5322 Section 3.2.4"
    );
}

#[test]
fn extract_param_handles_escaped_quote_in_filename() {
    // RFC 5322 Section 3.2.4: quoted-pair `\"` in a quoted-string represents
    // a literal double-quote. extract_param must skip escaped quotes when
    // finding the closing quote, and then unescape the result.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/pdf\r\n\
                 Content-Disposition: attachment; filename=\"file\\\"name.pdf\"\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    // The filename in the message is `file\"name.pdf` (escaped quote).
    // After unescaping, it should be `file"name.pdf`.
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("file\"name.pdf"),
        "Escaped quote in quoted-string filename must be handled per RFC 5322 Section 3.2.4"
    );
}

#[test]
fn build_then_parse_filename_with_backslash_round_trip() {
    // Round-trip: build a message with a backslash in the filename, then parse it.
    // The builder escapes `\` → `\\` per RFC 5322 Section 3.2.4.
    // The parser must unescape `\\` → `\` to recover the original filename.
    let email = crate::types::OutgoingEmail {
        from: vec![Address {
            name: None,
            email: "a@b.com".into(),
        }],
        sender: None,
        to: vec![Address {
            name: None,
            email: "to@b.com".into(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: None,
        subject: "test".into(),
        body_text: Some("Body".into()),
        body_html: None,
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![crate::types::OutgoingAttachment {
            filename: "path\\file.pdf".into(),
            content_type: "application/pdf".into(),
            data: b"data".to_vec(),
            is_inline: false,
            content_id: None,
        }],
        extra_headers: vec![],
    };

    let built = crate::build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();

    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("path\\file.pdf"),
        "Round-trip filename with backslash must be preserved"
    );
}

#[test]
fn build_then_parse_filename_with_quote_round_trip() {
    // Round-trip: build a message with a double-quote in the filename.
    let email = crate::types::OutgoingEmail {
        from: vec![Address {
            name: None,
            email: "a@b.com".into(),
        }],
        sender: None,
        to: vec![Address {
            name: None,
            email: "to@b.com".into(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: None,
        subject: "test".into(),
        body_text: Some("Body".into()),
        body_html: None,
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![crate::types::OutgoingAttachment {
            filename: "file\"name.pdf".into(),
            content_type: "application/pdf".into(),
            data: b"data".to_vec(),
            is_inline: false,
            content_id: None,
        }],
        extra_headers: vec![],
    };

    let built = crate::build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();

    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("file\"name.pdf"),
        "Round-trip filename with double-quote must be preserved"
    );
}

// -----------------------------------------------------------------------
// to_ascii_lowercase for non-ASCII safety (RFC 6532)
// -----------------------------------------------------------------------

#[test]
fn extract_param_with_non_ascii_before_param() {
    // RFC 6532 allows UTF-8 directly in headers. If a non-ASCII character
    // that changes byte length under Unicode lowercasing appears before
    // the parameter we're searching for, to_lowercase() would misalign
    // byte offsets. to_ascii_lowercase() preserves byte length.
    //
    // İ (U+0130, 2 bytes UTF-8) lowercases to 'i' + combining dot (3 bytes)
    // under full Unicode rules, but stays 2 bytes under ASCII-only rules.
    //
    // We test extract_param directly with İ before the target parameter.
    let header_value = "attachment; description=\"\u{0130}stanbul\"; filename=\"report.pdf\"";
    let result = extract_param(header_value, "filename");
    assert_eq!(
        result.as_deref(),
        Some("report.pdf"),
        "extract_param must work when non-ASCII chars that change byte length \
         under Unicode lowercasing appear before the target parameter (RFC 6532)"
    );
}

// -----------------------------------------------------------------------
// date field range validation (RFC 5322 Section 3.3)
// -----------------------------------------------------------------------

#[test]
fn parse_date_rejects_invalid_hour() {
    // RFC 5322 Section 3.3: hour is 0-23.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 25:00:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    // Invalid hour (25) should cause date parsing to return None
    assert!(
        parsed.date.is_none(),
        "Date with hour=25 should be rejected per RFC 5322 Section 3.3"
    );
}

#[test]
fn parse_date_rejects_invalid_minute() {
    // RFC 5322 Section 3.3: minute is 0-59.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 12:60:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.date.is_none(),
        "Date with minute=60 should be rejected per RFC 5322 Section 3.3"
    );
}

#[test]
fn parse_date_rejects_invalid_second() {
    // RFC 5322 Section 3.3: second is 0-60 (60 for leap second).
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 12:00:61 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.date.is_none(),
        "Date with second=61 should be rejected per RFC 5322 Section 3.3"
    );
}

#[test]
fn parse_date_rejects_invalid_day() {
    // RFC 5322 Section 3.3: day is 1-31.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 32 Feb 2025 12:00:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.date.is_none(),
        "Date with day=32 should be rejected per RFC 5322 Section 3.3"
    );
}

#[test]
fn parse_date_rejects_day_zero() {
    // RFC 5322 Section 3.3: day starts at 1.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 00 Feb 2025 12:00:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.date.is_none(),
        "Date with day=0 should be rejected per RFC 5322 Section 3.3"
    );
}

#[test]
fn parse_date_allows_leap_second() {
    // RFC 5322 Section 3.3: second 60 is valid (leap second).
    let raw = b"From: a@b.com\r\n\
                 Date: Tue, 30 Jun 2015 23:59:60 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.expect("Leap second (60) should be accepted");
    assert_eq!(date.second, 60);
}

#[test]
fn parse_date_accepts_calendar_invalid_but_syntactically_valid_day() {
    // RFC 5322 Section 3.3 grammar: day = ([FWS] 1*2DIGIT FWS).
    // The grammar allows any day value 1-31 regardless of month.
    // Postel's law: accept calendar-impossible dates like Feb 31 since they
    // are syntactically valid and appear in real-world email.
    let raw = b"From: a@b.com\r\n\
                 Date: Wed, 31 Feb 2025 12:00:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    let date = parsed
        .date
        .expect("Calendar-invalid but syntactically valid date (31 Feb) must be accepted");
    assert_eq!(date.day, 31);
    assert_eq!(date.month, 2);
    assert_eq!(date.year, 2025);

    // Also verify 31 Apr, 31 Jun, and 29 Feb in non-leap years
    let dt =
        parse_rfc5322_date("Mon, 31 Apr 2025 12:00:00 +0000").expect("31 Apr must be accepted");
    assert_eq!(dt.day, 31);
    assert_eq!(dt.month, 4);

    let dt =
        parse_rfc5322_date("Mon, 31 Jun 2025 12:00:00 +0000").expect("31 Jun must be accepted");
    assert_eq!(dt.day, 31);
    assert_eq!(dt.month, 6);

    let dt = parse_rfc5322_date("Mon, 29 Feb 2023 12:00:00 +0000")
        .expect("29 Feb in non-leap year must be accepted");
    assert_eq!(dt.day, 29);
    assert_eq!(dt.month, 2);
}

#[test]
fn parse_date_with_comment_between_tokens() {
    // RFC 5322 Section 4.3 (obsolete date syntax) allows CFWS
    // (comments and folding white space) between date tokens.
    // A comment like "(Friday)" between day and month must not cause
    // the date to fail to parse.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 (February) Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed
        .date
        .expect("Date with CFWS comment must parse per RFC 5322 Section 4.3");
    assert_eq!(date.year, 2025);
    assert_eq!(date.month, 2);
    assert_eq!(date.day, 13);
}

#[test]
fn parse_date_with_trailing_comment() {
    // Common: trailing comment like "(UTC)" after timezone.
    // This already works but we add a test to ensure it stays working.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC)\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.expect("Date with trailing comment must parse");
    assert_eq!(date.year, 2025);
    assert_eq!(date.tz_offset_minutes, 0);
}

#[test]
fn parse_date_with_nested_comments() {
    // RFC 5322 Section 3.2.2: comments can be nested.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC (nested))\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.expect("Date with nested comment must parse");
    assert_eq!(date.year, 2025);
}

#[test]
fn parse_display_name_ending_with_escaped_quote() {
    // RFC 5322 Section 3.2.4 — a display name ending with an
    // escaped quote like `"She said \"hello\""` must parse correctly.
    // The outer quotes are the quoted-string delimiters; the inner `\"`
    // sequences are quoted-pairs that represent literal `"`.
    let raw = b"From: \"She said \\\"hello\\\"\" <she@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("She said \"hello\""),
        "Display name ending with escaped quote must be parsed correctly \
         per RFC 5322 Section 3.2.4"
    );
}

#[test]
fn address_from_str_ending_with_escaped_quote() {
    // Same bug in Address::from_str — trim_matches('"') strips too many quotes.
    let addr: Address = "\"She said \\\"hello\\\"\" <she@example.com>"
        .parse()
        .unwrap();
    assert_eq!(
        addr.name.as_deref(),
        Some("She said \"hello\""),
        "Address::from_str must handle display names ending with escaped quotes"
    );
}

#[test]
fn boundary_must_be_at_line_start() {
    // RFC 2046 Section 5.1.1 requires the boundary delimiter
    // to appear at the beginning of a line (preceded by CRLF or at the
    // start of the body). A boundary string appearing mid-line in body
    // content must NOT be treated as a boundary delimiter.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
                 \r\n\
                 --BOUND\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 This line mentions --BOUND in the middle\r\n\
                 --BOUND--";

    let parsed = parse_email(raw).unwrap();
    // The body should contain the mid-line "--BOUND" as literal text,
    // not split into a second part at that point.
    let text = parsed.body_text.as_deref().unwrap_or("");
    assert!(
        text.contains("--BOUND"),
        "Mid-line boundary must be treated as literal text per RFC 2046 Section 5.1.1, \
         but body_text was: {text:?}"
    );
}

#[test]
fn mime_type_exact_match_not_prefix() {
    // A hypothetical MIME type like "text/plaintext" must NOT be treated
    // as text/plain body. Using starts_with("text/plain") would
    // incorrectly match it; exact equality is required.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plaintext\r\n\
                 \r\n\
                 Not really plain text\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    // text/plaintext is NOT text/plain — it should be treated as an
    // attachment, not extracted as body_text.
    assert!(
        parsed.body_text.is_none(),
        "text/plaintext must not be treated as text/plain body"
    );
    assert_eq!(
        parsed.attachments.len(),
        1,
        "text/plaintext should be treated as an attachment"
    );
}

#[test]
fn parse_single_part_non_text_is_attachment() {
    // A single-part message with Content-Type: image/jpeg should be treated
    // as an attachment, not as body_text (RFC 2046; requirements: "A part is
    // an attachment if... a non-text/non-multipart part").
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: image/jpeg\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 /9j/4AAQSkZJRg==";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_none(),
        "image/jpeg single-part must not populate body_text"
    );
    assert_eq!(
        parsed.attachments.len(),
        1,
        "image/jpeg single-part must be treated as an attachment"
    );
    assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
    assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
}

#[test]
fn parse_single_part_application_pdf_is_attachment() {
    // Single-part application/pdf should be an attachment, not body_text.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: application/pdf; name=\"doc.pdf\"\r\n\
                 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 JVBERi0xLjQK";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_none(),
        "application/pdf must not populate body_text"
    );
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "application/pdf");
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
    assert!(!parsed.attachments[0].is_inline);
}

#[test]
fn parse_single_part_text_plain_with_attachment_disposition() {
    // text/plain with Content-Disposition: attachment should be treated as
    // an attachment, not body_text (requirements: "A part is an attachment
    // if it has Content-Disposition: attachment").
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Disposition: attachment; filename=\"log.txt\"\r\n\
                 \r\n\
                 Server log data here";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_none(),
        "text/plain with disposition:attachment must not populate body_text"
    );
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "text/plain");
    assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
}

/// Regression: Content-Disposition type matching must be token-exact,
/// not prefix-based.  RFC 2183 Section 2 defines:
///   disposition-type = "inline" / "attachment" / extension-token
/// An extension-token like "attachmentfoo" must NOT be treated as
/// "attachment".  Using `starts_with("attachment")` incorrectly matches
/// such tokens, causing text/plain body content to be classified as an
/// attachment instead of populating `body_text`.
#[test]
fn disposition_type_requires_token_boundary() {
    // Content-Disposition uses a fabricated extension-token "attachmentfoo".
    // This is NOT "attachment" per RFC 2183 Section 2, so the text/plain
    // body must populate body_text, not become an attachment.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Disposition: attachmentfoo\r\n\
                 \r\n\
                 This is body text";

    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.body_text.is_some(),
        "text/plain with disposition:attachmentfoo must populate body_text \
         (RFC 2183 Section 2: disposition-type is a complete token, not a prefix)"
    );
    assert_eq!(
        parsed.attachments.len(),
        0,
        "extension-token 'attachmentfoo' must not be classified as 'attachment'"
    );
}

#[test]
fn parse_group_address_empty_undisclosed() {
    // RFC 5322 Section 3.4: group = display-name ":" [group-list] ";"
    // "undisclosed-recipients:;" is a common empty group that must not
    // produce malformed addresses or cause a parse failure.
    let raw = b"From: a@b.com\r\n\
                 To: undisclosed-recipients:;\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    // Empty group — no addresses should be extracted
    assert!(
        parsed.to.is_empty(),
        "empty group undisclosed-recipients:; must produce no addresses, got {:?}",
        parsed.to
    );
}

#[test]
fn parse_group_address_with_members() {
    // RFC 5322 Section 3.4: group with member addresses
    // "friends:a@b.com, c@d.com;" should extract the member addresses.
    let raw = b"From: a@b.com\r\n\
                 To: friends:one@x.com, two@x.com;\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.to.len(),
        2,
        "group with 2 members must produce 2 addresses, got {:?}",
        parsed.to
    );
    assert_eq!(parsed.to[0].email, "one@x.com");
    assert_eq!(parsed.to[1].email, "two@x.com");
}

#[test]
fn parse_group_address_mixed_with_regular() {
    // Mix of regular addresses and group syntax in same header.
    let raw = b"From: a@b.com\r\n\
                 To: solo@x.com, friends:one@x.com, two@x.com;, last@x.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let emails: Vec<&str> = parsed.to.iter().map(|a| a.email.as_str()).collect();
    assert_eq!(
        emails,
        vec!["solo@x.com", "one@x.com", "two@x.com", "last@x.com"],
        "must extract all 4 addresses from mixed regular+group syntax"
    );
}

#[test]
fn decode_qp_trailing_equals_is_soft_break() {
    // RFC 2045 Section 6.7: a trailing '=' at end-of-data is a soft line
    // break and must be removed, not emitted as a literal '=' character.
    let result = decode_quoted_printable(b"Hello=");
    assert_eq!(
        result, b"Hello",
        "trailing '=' must be treated as soft line break per RFC 2045 Section 6.7"
    );
}

#[test]
fn decode_qp_trailing_equals_cr_is_soft_break() {
    // '=\r' at end-of-data (bare CR without LF) should also be removed
    // as a soft line break.
    let result = decode_quoted_printable(b"Hello=\r");
    assert_eq!(
        result, b"Hello",
        "trailing '=\\r' must be treated as soft line break"
    );
}

#[test]
fn decode_qp_bare_cr_soft_break_mid_data() {
    // RFC 2045 Section 6.7: '=' followed by a line break is a soft break.
    // Per Postel's law, a bare CR (=\r not followed by \n) in the middle
    // of the data must also be recognized as a soft line break, not just
    // when it appears at the end of the data.
    let result = decode_quoted_printable(b"Hello=\rWorld");
    assert_eq!(
        result, b"HelloWorld",
        "'=\\r' followed by non-LF byte must be treated as soft line break"
    );
}

#[test]
fn parse_bare_address_with_trailing_comment() {
    // RFC 5322 Section 3.4.1: addr-spec can be followed by CFWS.
    // RFC 5322 Section 3.2.2: parenthesized text is a comment.
    // A trailing comment like "(Display Name)" after a bare address
    // must be stripped from the email and used as the display name.
    let raw = b"From: sender@example.com\r\n\
                 To: user@example.com (Display Name)\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.to.len(), 1);
    assert_eq!(
        parsed.to[0].email, "user@example.com",
        "email must not contain the trailing comment"
    );
    assert_eq!(
        parsed.to[0].name.as_deref(),
        Some("Display Name"),
        "trailing comment should become display name per RFC 5322 Section 3.4.1"
    );
}

#[test]
fn parse_bare_address_with_leading_comment() {
    // RFC 5322 Section 3.2.2 / RFC 822 convention: a parenthesized
    // comment adjacent to a bare addr-spec serves as the display name,
    // whether it appears before or after the address. Both positions
    // are common in the wild; extracting from leading comments mirrors
    // the existing trailing-comment extraction for consistency.
    let raw = b"From: sender@example.com\r\n\
                 To: (Comment) user@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.to.len(), 1);
    assert_eq!(
        parsed.to[0].email, "user@example.com",
        "email must not contain the leading comment"
    );
    assert_eq!(
        parsed.to[0].name.as_deref(),
        Some("Comment"),
        "leading comment should be extracted as display name \
         (RFC 5322 Section 3.2.2 / RFC 822 convention)"
    );
}

#[test]
fn extract_param_skips_quoted_values() {
    // Parameter name appearing inside another parameter's quoted value
    // should not be matched (RFC 5322 Section 3.2.4).
    let header = "text/html; boundary=\"has charset=bad inside\"; charset=utf-8";
    let charset = extract_param(header, "charset");
    assert_eq!(
        charset.as_deref(),
        Some("utf-8"),
        "Should skip match inside quoted boundary value"
    );
}

#[test]
fn extract_param_skips_comment_before_unquoted_value() {
    // RFC 2045 Section 5.1 allows RFC 822 / RFC 5322 comments in
    // structured fields, so CFWS between `=` and an unquoted token
    // must not become part of the parameter value.
    let header = "text/plain; charset=(legacy default)windows-1252";
    let charset = extract_param(header, "charset");
    assert_eq!(
        charset.as_deref(),
        Some("windows-1252"),
        "RFC 2045 Section 5.1 comments before an unquoted value must be ignored"
    );
}

#[test]
fn extract_param_skips_comment_before_quoted_value() {
    // RFC 2045 Section 5.1 + RFC 5322 Section 3.2.2: CFWS comments may
    // also appear before a quoted-string parameter value.
    let header = "attachment; filename=(human note)\"report.pdf\"";
    let filename = extract_param(header, "filename");
    assert_eq!(
        filename.as_deref(),
        Some("report.pdf"),
        "RFC 2045 Section 5.1 comments before a quoted-string value must be ignored"
    );
}

#[test]
fn multipart_part_without_charset_uses_us_ascii_default() {
    // RFC 2045 Section 5.2: default Content-Type is text/plain; charset=us-ascii.
    // A MIME part with text/plain but no charset parameter should default
    // to US-ASCII, consistent with the top-level default.
    //
    // Windows-1252 byte 0x93 is a left double quotation mark (U+201C).
    // encoding_rs maps us-ascii → Windows-1252, so 0x93 decodes to U+201C.
    // Under UTF-8, 0x93 is an invalid byte and produces U+FFFD.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Hello \x93World\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    let text = parsed.body_text.unwrap();

    // With the correct US-ASCII default (mapped to Windows-1252 by encoding_rs),
    // 0x93 should decode to U+201C, not the replacement character U+FFFD.
    assert!(
        text.contains('\u{201c}'),
        "Part with text/plain (no charset) should use US-ASCII default per \
         RFC 2045 Section 5.2, decoding 0x93 as U+201C. Got: {text:?}"
    );
    assert!(
        !text.contains('\u{FFFD}'),
        "Part with text/plain (no charset) should not produce UTF-8 replacement \
         characters. Got: {text:?}"
    );
}

/// header unfolding must preserve trailing whitespace from
/// the first line when followed by a continuation line.
///
/// RFC 5322 Section 2.2.3: "Unfolding is accomplished by simply removing
/// any CRLF that is immediately followed by WSP." Only the CRLF is
/// removed; all other whitespace (including trailing spaces on the first
/// line) must be preserved.
#[test]
fn parse_header_unfold_preserves_trailing_whitespace() {
    // "Subject: Hello  \r\n World" should unfold to "Hello   World"
    // (2 trailing spaces from first line + 1 leading space from continuation = 3 spaces)
    let raw = b"From: a@b.com\r\nSubject: Hello  \r\n World\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello   World"),
        "Trailing whitespace on first line must be preserved during unfolding \
         (RFC 5322 Section 2.2.3)"
    );
}

/// Single-part body text must not include the trailing CRLF
/// that the message format requires after the body content. The trailing
/// CRLF is a format artifact (RFC 5322 Section 3.5), not semantic content.
/// Without this fix, build→parse round-trip adds a spurious "\r\n" to the
/// body text of single-part messages.
#[test]
fn parse_single_part_body_no_trailing_crlf() {
    // Single-part message: body ends with \r\n (format artifact)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 Hello, World!\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello, World!"),
        "Single-part body text must not include trailing CRLF"
    );
}

/// Single-part HTML body must not include trailing CRLF.
#[test]
fn parse_single_part_html_no_trailing_crlf() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/html; charset=utf-8\r\n\
                 \r\n\
                 <p>Hello</p>\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_html.as_deref(),
        Some("<p>Hello</p>"),
        "Single-part HTML body must not include trailing CRLF"
    );
}

/// Build→parse round-trip for single-part text must preserve body exactly.
#[test]
fn round_trip_single_part_body_text() {
    let email = crate::types::OutgoingEmail {
        from: vec![crate::types::Address {
            name: None,
            email: "a@b.com".into(),
        }],
        sender: None,
        to: vec![crate::types::Address {
            name: None,
            email: "c@d.com".into(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: None,
        subject: "Test".into(),
        body_text: Some("Hello, World!".into()),
        body_html: None,
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![],
        extra_headers: vec![],
    };

    let built = crate::build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello, World!"),
        "Single-part body text must round-trip without trailing CRLF"
    );
}

/// RFC 2047 encoded words in display names must be decoded
/// AFTER address structure parsing, not before. Decoding before parsing
/// breaks address splitting when the decoded text contains address-
/// significant characters like commas.
///
/// RFC 2047 Section 5 rule (3): encoded-words may appear in a 'phrase'
/// (e.g., display name) within address headers. The encoded word
/// `=?UTF-8?B?Sm9obiwgRG9l?=` decodes to `John, Doe`. If decoded
/// before address parsing, the comma splits the address incorrectly.
#[test]
fn parse_encoded_word_display_name_with_comma() {
    // "John, Doe" base64-encoded as an RFC 2047 encoded word.
    // The comma must NOT split the address — it's part of the name.
    let raw = b"From: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("John, Doe"),
        "RFC 2047 encoded display name with comma must be preserved \
         (RFC 2047 Section 5 rule 3): decode AFTER address parsing"
    );
    assert_eq!(parsed.from[0].email, "john@example.com");
}

/// RFC 2045 Section 6.8: "Any characters outside of the base64 alphabet
/// are to be ignored in base64-encoded data." The base64 alphabet is
/// A-Z, a-z, 0-9, +, /, = (padding). Stray non-alphabet characters such
/// as `!`, `#`, `~` must be stripped before decoding, not just whitespace.
#[test]
fn parse_base64_body_ignores_non_alphabet_chars() {
    // "Hello World" = "SGVsbG8gV29ybGQ=" in base64.
    // Insert non-base64 characters (!, #, ~) that are NOT whitespace
    // to verify the decoder strips all non-alphabet bytes.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVs!bG8#gV29~ybGQ=";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "RFC 2045 Section 6.8: non-alphabet characters must be ignored in base64 data"
    );
}

/// Same as above but for To/Cc headers with multiple recipients.
/// The encoded comma must not create a spurious address split.
#[test]
fn parse_encoded_word_display_name_with_comma_in_to() {
    // Two recipients: first has an encoded comma in the display name,
    // second is a plain address. Must parse as exactly 2 addresses.
    let raw = b"From: sender@example.com\r\n\
                 To: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>, other@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.to.len(),
        2,
        "Must parse exactly 2 addresses, not 3 (encoded comma is not a separator)"
    );
    assert_eq!(
        parsed.to[0].name.as_deref(),
        Some("John, Doe"),
        "First recipient display name must be 'John, Doe'"
    );
    assert_eq!(parsed.to[0].email, "john@example.com");
    assert_eq!(parsed.to[1].email, "other@example.com");
}

/// building a message with `body_text: None` (and attachments,
/// which forces multipart/mixed with an empty text/plain part) and then
/// parsing it back must yield `body_text: None`, not `Some("")`.
///
/// The builder's `write_text_part` always appends a trailing `\r\n` after
/// the body content (RFC 2046 Section 5.1.1 requires CRLF before boundary).
/// When the body is empty, the MIME part body becomes just `\r\n`, which
/// `decode_body` strips to `""`. The parser must treat an empty decoded
/// body as absent (`None`), not present-but-empty (`Some("")`).
///
/// # References
/// - RFC 2046 Section 5.1.1 (CRLF before boundary delimiter)
/// - RFC 5322 Section 3.5 (message body)
#[test]
fn round_trip_empty_body_text_is_none() {
    // Use attachments to force multipart/mixed, which causes the builder
    // to emit an empty text/plain part when body_text is None.
    let email = crate::types::OutgoingEmail {
        from: vec![crate::types::Address {
            name: None,
            email: "a@b.com".into(),
        }],
        sender: None,
        to: vec![crate::types::Address {
            name: None,
            email: "c@d.com".into(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: None,
        subject: "Empty body".into(),
        body_text: None,
        body_html: None,
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![crate::types::OutgoingAttachment {
            filename: "test.txt".into(),
            content_type: "text/plain".into(),
            data: b"attachment data".to_vec(),
            is_inline: false,
            content_id: None,
        }],
        extra_headers: vec![],
    };

    let built = crate::build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();

    assert_eq!(
        parsed.body_text, None,
        "Empty body_text must round-trip as None, not Some(\"\")"
    );
}

/// Same fix for multipart/alternative: `body_text: Some("")`
/// should parse back as `None` since the decoded content is empty.
///
/// # References
/// - RFC 2046 Section 5.1.1 (CRLF before boundary delimiter)
#[test]
fn round_trip_empty_body_html_in_alternative_is_none() {
    // Both body_text and body_html present forces multipart/alternative.
    // An explicitly empty body_html should parse back as None.
    let email = crate::types::OutgoingEmail {
        from: vec![crate::types::Address {
            name: None,
            email: "a@b.com".into(),
        }],
        sender: None,
        to: vec![crate::types::Address {
            name: None,
            email: "c@d.com".into(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: None,
        subject: "Text only".into(),
        body_text: Some("Plain text".into()),
        body_html: Some(String::new()),
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![],
        extra_headers: vec![],
    };

    let built = crate::build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();

    assert_eq!(
        parsed.body_html, None,
        "Empty body_html must parse as None, not Some(\"\")"
    );
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Plain text"),
        "body_text must be preserved"
    );
}

#[test]
fn extract_mime_type_strips_rfc5322_comments() {
    // RFC 5322 Section 3.2.2 parenthesized comments may appear
    // in CFWS positions within the Content-Type type/subtype production.
    // extract_mime_type must strip them so that "text/plain (comment)"
    // normalizes to "text/plain", not "text/plain (comment)".
    //
    // Single-part message: the comment in Content-Type caused the MIME
    // comparison to fail, making the parser treat the body as an
    // attachment instead of body_text.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain (this is a comment); charset=utf-8\r\n\
                 \r\n\
                 Hello with comment";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello with comment"),
        "Body must be extracted as body_text when Content-Type has an RFC 5322 comment"
    );
    assert!(
        parsed.attachments.is_empty(),
        "No attachments expected for a plain text/plain message with a comment"
    );
}

/// RFC 2046 Section 5.1.5: In a multipart/digest, the default Content-Type
/// for body parts is "message/rfc822", NOT "text/plain; charset=us-ascii".
/// Parts without an explicit Content-Type header must be treated as
/// message/rfc822 and show up as attachments, not `body_text`.
#[test]
fn multipart_digest_default_content_type_is_message_rfc822() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: Digest\r\n\
                 Content-Type: multipart/digest; boundary=\"digestboundary\"\r\n\
                 \r\n\
                 --digestboundary\r\n\
                 \r\n\
                 From: nested@example.com\r\n\
                 Subject: Nested message\r\n\
                 \r\n\
                 Nested body text\r\n\
                 --digestboundary--\r\n";

    let parsed = parse_email(raw).unwrap();

    // The part has no Content-Type header. In multipart/digest, this means
    // message/rfc822 per RFC 2046 Section 5.1.5 — it should appear as an
    // attachment, NOT as body_text.
    assert!(
        parsed.body_text.is_none(),
        "multipart/digest parts without Content-Type should default to \
         message/rfc822, not text/plain — body_text should be None"
    );
    assert_eq!(
        parsed.attachments.len(),
        1,
        "multipart/digest part should be treated as message/rfc822 attachment"
    );
    assert_eq!(
        parsed.attachments[0].content_type, "message/rfc822",
        "default Content-Type in multipart/digest must be message/rfc822 \
         (RFC 2046 Section 5.1.5)"
    );
}

/// Content-ID with whitespace inside angle brackets must be
/// trimmed after bracket stripping.
///
/// RFC 2392 defines Content-ID as `"<" addr-spec ">"`. Some mailers
/// add whitespace around the addr-spec inside the brackets. After
/// stripping `<` and `>`, the result must be trimmed to produce a
/// clean identifier for matching (e.g., for CID references in HTML).
#[test]
fn content_id_whitespace_inside_brackets_trimmed() {
    // Multipart path (walk_mime_tree)
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: image/png\r\n\
                 Content-ID: < cid@example.com >\r\n\
                 \r\n\
                 PNG\r\n\
                 --b--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.attachments[0].content_id.as_deref(),
        Some("cid@example.com"),
        "Content-ID must be trimmed after bracket stripping (RFC 2392)"
    );

    // Simple body path (extract_simple_body)
    let raw_single = b"From: a@b.com\r\n\
                       Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                       Content-Type: image/png\r\n\
                       Content-ID: < cid2@example.com >\r\n\
                       \r\n\
                       PNG";
    let parsed_single = parse_email(raw_single).unwrap();
    assert_eq!(
        parsed_single.attachments[0].content_id.as_deref(),
        Some("cid2@example.com"),
        "Content-ID in single-part message must be trimmed (RFC 2392)"
    );
}

#[test]
fn parse_headers_only_all_fields_verified() {
    // Verify that parse_headers_only extracts ALL header fields correctly,
    // including cc, bcc, reply_to, in_reply_to, and references.
    let raw = b"From: sender@example.com\r\n\
                 To: to@example.com\r\n\
                 Cc: cc@example.com\r\n\
                 Bcc: bcc@example.com\r\n\
                 Reply-To: reply@example.com\r\n\
                 Subject: Full test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Message-ID: <msg1@example.com>\r\n\
                 In-Reply-To: <parent@example.com>\r\n\
                 References: <ref1@example.com> <ref2@example.com>\r\n\
                 \r\n\
                 Body that should be ignored";

    let parsed = parse_headers_only(raw).unwrap();

    assert_eq!(parsed.from[0].email, "sender@example.com");
    assert_eq!(parsed.to.len(), 1);
    assert_eq!(parsed.to[0].email, "to@example.com");
    assert_eq!(parsed.cc.len(), 1);
    assert_eq!(parsed.cc[0].email, "cc@example.com");
    assert_eq!(parsed.bcc.len(), 1);
    assert_eq!(parsed.bcc[0].email, "bcc@example.com");
    assert_eq!(parsed.reply_to.len(), 1);
    assert_eq!(parsed.reply_to[0].email, "reply@example.com");
    assert_eq!(parsed.subject.as_deref(), Some("Full test"));
    assert!(parsed.date.is_some());
    assert_eq!(parsed.message_id.as_deref(), Some("msg1@example.com"));
    assert_eq!(parsed.in_reply_to, vec!["parent@example.com"]);
    assert_eq!(
        parsed.references,
        vec!["ref1@example.com", "ref2@example.com"]
    );

    // Body fields must be empty
    assert!(parsed.body_text.is_none());
    assert!(parsed.body_html.is_none());
    assert!(parsed.attachments.is_empty());
}

#[test]
fn parse_missing_optional_headers_returns_none_or_empty() {
    // Only From is required (RFC 5322 Section 3.6.2). All other fields
    // should gracefully return None or empty when absent.
    let raw = b"From: a@b.com\r\n\r\n";

    let parsed = parse_email(raw).unwrap();

    assert_eq!(parsed.from[0].email, "a@b.com");
    assert!(parsed.subject.is_none());
    assert!(parsed.date.is_none());
    assert!(parsed.message_id.is_none());
    assert!(parsed.in_reply_to.is_empty());
    assert!(parsed.references.is_empty());
    assert!(parsed.to.is_empty());
    assert!(parsed.cc.is_empty());
    assert!(parsed.bcc.is_empty());
    assert!(parsed.reply_to.is_empty());
}

#[test]
fn extract_param_rejects_substring_match() {
    // "filename=" must not match inside "xfilename=" — the boundary check
    // requires `;`, space, tab, or start-of-string before the param name.
    let value = "attachment; xfilename=\"bad.pdf\"; filename=\"good.pdf\"";
    let result = extract_param(value, "filename");
    assert_eq!(
        result.as_deref(),
        Some("good.pdf"),
        "Must not match xfilename as filename"
    );
}

#[test]
fn extract_param_rejects_suffix_only_match() {
    // Edge case: param name appears only as suffix of another param name.
    let value = "attachment; notfilename=\"only.pdf\"";
    let result = extract_param(value, "filename");
    assert!(
        result.is_none(),
        "Must not match 'filename' inside 'notfilename'"
    );
}

// -----------------------------------------------------------------------
// Audit coverage: Group address parsing (RFC 5322 Section 3.4)
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.4: empty group `undisclosed-recipients:;`
/// must produce zero addresses.
#[test]
fn parse_group_address_empty() {
    let addrs = parse_address_list("undisclosed-recipients:;");
    assert!(
        addrs.is_empty(),
        "empty group must produce no addresses, got {addrs:?}"
    );
}

/// RFC 5322 Section 3.4: group with members — audit coverage.
#[test]
fn parse_group_address_with_two_members() {
    let addrs = parse_address_list("Friends: a@x.com, b@x.com;");
    assert_eq!(addrs.len(), 2, "group with 2 members: {addrs:?}");
    assert_eq!(addrs[0].email, "a@x.com");
    assert_eq!(addrs[1].email, "b@x.com");
}

/// RFC 5322 Section 3.4: multiple groups in a single header value.
#[test]
fn parse_multiple_groups_and_solo() {
    let addrs = parse_address_list("Team A: a1@x.com, a2@x.com;, Team B: b1@x.com;, solo@x.com");
    assert_eq!(addrs.len(), 4, "2 groups + 1 solo: {addrs:?}");
    assert_eq!(addrs[0].email, "a1@x.com");
    assert_eq!(addrs[1].email, "a2@x.com");
    assert_eq!(addrs[2].email, "b1@x.com");
    assert_eq!(addrs[3].email, "solo@x.com");
}

/// Address with parenthesized comment containing commas must not
/// split on those commas (RFC 5322 Section 3.2.2) — audit coverage.
#[test]
fn parse_address_comment_with_comma_audit() {
    let addrs = parse_address_list("user@x.com (Last, First), other@x.com");
    assert_eq!(
        addrs.len(),
        2,
        "comma inside comment must not split: {addrs:?}"
    );
    assert_eq!(addrs[0].email, "user@x.com");
    assert_eq!(addrs[1].email, "other@x.com");
}

// -----------------------------------------------------------------------
// Audit coverage: RFC 2231 Section 3 — continuation edge cases
// -----------------------------------------------------------------------

/// RFC 2231 Section 3: continuation indices start at 0 and gaps are not
/// allowed. When a later section is missing, preserve only the contiguous
/// prefix instead of fabricating bytes from the trailing fragment.
#[test]
fn rfc2231_continuation_gap_collects_available() {
    let header = "attachment; filename*0=\"hello\"; filename*2=\"skipped\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("hello"),
        "RFC 2231 Section 3: reassembly must stop at the first gap"
    );
}

/// RFC 2231 Section 3 forbids wider gaps as well. Even when the scanner
/// sees a much later section, reassembly must stop once index 1 is absent.
#[test]
fn rfc2231_continuation_double_gap_collected() {
    let header = "attachment; filename*0=\"abc\"; filename*3=\"xyz\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("abc"),
        "RFC 2231 Section 3: reassembly must ignore sections after a gap"
    );
}

/// RFC 2231 Section 3: single continuation section produces the value.
#[test]
fn rfc2231_continuation_single_section() {
    let header = "attachment; filename*0=\"report.pdf\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(result.as_deref(), Some("report.pdf"));
}

/// RFC 2231 Section 3: continuation indices start at 0. A parameter that
/// begins at `*1` is malformed and must not be reassembled into a value.
#[test]
fn rfc2231_continuation_requires_section_zero() {
    let header = "attachment; filename*1=\"tail.txt\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result, None,
        "RFC 2231 Section 3: continuation reassembly must require section 0"
    );
}

/// RFC 2231 Section 3: gaps are not allowed. To avoid fabricating bytes
/// that were never sent, the parser should preserve only the contiguous
/// prefix that starts at `*0`.
#[test]
fn rfc2231_continuation_stops_at_first_gap() {
    let header = "attachment; filename*0=\"hello\"; filename*2=\"tail\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("hello"),
        "RFC 2231 Section 3: reassembly must stop at the first missing index"
    );
}

/// RFC 2231 Section 4 + RFC 2045 Section 5.2: when no charset is declared
/// in continuation parameters, the default per the RFCs would be US-ASCII.
/// We intentionally default to UTF-8 as a Postel's law accommodation (see
/// the inline comment in `extract_rfc2231_continuation`). This test verifies
/// that plain (non-charset-encoded) continuation parameters without any
/// charset declaration decode correctly — ASCII values must round-trip
/// identically since US-ASCII is a strict subset of UTF-8.
#[test]
fn rfc2231_continuation_no_charset_defaults_to_utf8() {
    // Plain continuation sections (no `*` suffix, no charset declaration).
    // The parser must reassemble them using the UTF-8 default.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Body\r\n\
                 --b\r\n\
                 Content-Type: application/octet-stream\r\n\
                 Content-Disposition: attachment; filename*0=\"annual_\"; filename*1=\"report_\"; filename*2=\"2025.pdf\"\r\n\
                 \r\n\
                 DATA\r\n\
                 --b--";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("annual_report_2025.pdf"),
        "RFC 2231 continuation without charset should decode as UTF-8"
    );
}

// -----------------------------------------------------------------------
// Audit coverage: Base64 with embedded whitespace (RFC 2045 Section 6.8)
// -----------------------------------------------------------------------

/// RFC 2045 Section 6.8: "Any characters outside of the base64 alphabet
/// are to be ignored in base64-encoded data." Spaces/tabs within base64
/// lines must be stripped before decoding.
#[test]
fn base64_with_embedded_spaces() {
    // "Hello World" = SGVsbG8gV29ybGQ=, with spaces injected
    let data = b"SGVs bG8g V29y bGQ=";
    let decoded = decode_transfer_encoding(data, "base64");
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello World",
        "base64 decoder must strip non-alphabet characters (RFC 2045 Section 6.8)"
    );
}

/// RFC 2045 Section 6.8: tabs within base64-encoded data must be ignored.
#[test]
fn base64_with_tabs() {
    let data = b"SGVs\tbG8g\tV29ybGQ=";
    let decoded = decode_transfer_encoding(data, "base64");
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello World",
        "base64 decoder must strip tabs (RFC 2045 Section 6.8)"
    );
}

// -----------------------------------------------------------------------
// Audit coverage: Quoted-printable edge cases (RFC 2045 Section 6.7)
// -----------------------------------------------------------------------

/// RFC 2045 Section 6.7: trailing `=` at end of data is a soft line
/// break — it should be stripped, not produce a literal `=`.
#[test]
fn qp_trailing_equals_stripped() {
    let data = b"Hello=";
    let decoded = decode_quoted_printable(data);
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello",
        "trailing '=' is a soft break (RFC 2045 Section 6.7)"
    );
}

/// RFC 2045 Section 6.7: `=` followed by non-hex characters is malformed.
/// Postel's law: pass through the literal `=` and the following characters.
#[test]
fn qp_malformed_hex_passthrough() {
    let data = b"Hello=ZZ World";
    let decoded = decode_quoted_printable(data);
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello=ZZ World",
        "malformed =ZZ must pass through literally (Postel's law)"
    );
}

// -----------------------------------------------------------------------
// Audit coverage: RFC 2047 Q-encoding edge cases
// -----------------------------------------------------------------------

/// RFC 2047 Section 4.2: `=` followed by non-hex in Q-encoding should
/// pass through the literal `=` (Postel's law — decoders accept gracefully).
#[test]
fn q_encoding_malformed_hex_passthrough() {
    let decoded = decode_q_encoding("Hello=ZZWorld");
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello=ZZWorld",
        "malformed =ZZ in Q-encoding must pass through literally"
    );
}

/// RFC 2047 Section 4.2: `=` at end of Q-encoded text with no following
/// hex digits should pass through as literal.
#[test]
fn q_encoding_trailing_equals() {
    let decoded = decode_q_encoding("Hello=");
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello=",
        "trailing '=' in Q-encoding must pass through literally"
    );
}

/// RFC 2047 Sections 2 and 4.2 require a complete `=XX` escape inside a
/// Q-encoded word. A malformed encoded-word must be left literal rather
/// than partially decoded into a different header value.
#[test]
fn malformed_q_encoded_word_stays_literal() {
    let input = b"From: test@example.com\r\nSubject: =?utf-8?Q?a=b?=\r\n\r\nbody";
    let parsed = parse_email(input).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("=?utf-8?Q?a=b?="),
        "malformed RFC 2047 Q-encoded words must remain literal"
    );
}

// -----------------------------------------------------------------------
// Audit coverage: multipart/digest default Content-Type
// -----------------------------------------------------------------------

/// RFC 2046 Section 5.1.5: parts inside multipart/digest without an
/// explicit Content-Type must default to message/rfc822 (NOT text/plain).
/// Verify via a full `parse_email` round-trip.
#[test]
fn multipart_digest_default_content_type_full_email() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/digest; boundary=\"dg\"\r\n\
                 \r\n\
                 --dg\r\n\
                 \r\n\
                 From: nested@example.com\r\n\
                 Subject: Inner\r\n\
                 \r\n\
                 Inner body\r\n\
                 --dg--";

    let parsed = parse_email(raw).unwrap();
    // Part has no Content-Type → default is message/rfc822 in digest
    assert!(
        parsed.body_text.is_none(),
        "digest part must NOT be treated as text/plain"
    );
    assert_eq!(parsed.attachments.len(), 1);
    assert_eq!(parsed.attachments[0].content_type, "message/rfc822");
}

// -----------------------------------------------------------------------
// Coverage: header line starting with space (L208)
// -----------------------------------------------------------------------

/// RFC 5322 Section 2.2.3: a raw message starting with a space (a
/// continuation line before any header name) should be silently
/// skipped by the header parser, not crash or produce garbage.
#[test]
fn parse_headers_leading_space_skipped() {
    // The first line starts with a space, which is a continuation line
    // with no preceding header name. It must be silently ignored.
    let raw = b" continuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].email, "a@b.com");
    assert_eq!(parsed.body_text.as_deref(), Some("Body"));
}

/// A message starting with a tab (another form of continuation WSP)
/// before any header name must also be handled gracefully.
#[test]
fn parse_headers_leading_tab_skipped() {
    let raw = b"\tcontinuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].email, "a@b.com");
}

// -----------------------------------------------------------------------
// Coverage: RFC 2047 decoding edge cases (L491, L542, L550, L558-560)
// -----------------------------------------------------------------------

/// RFC 2047 Section 2: an encoded word with a bad base64 payload
/// should fail decoding and the literal `=?...?=` token should pass
/// through (Postel's law).
#[test]
fn encoded_word_bad_base64_passthrough() {
    // "=====" is not valid base64
    let input = "=?UTF-8?B?=====?=";
    let result = decode_encoded_words(input);
    // Should pass through as literal since base64 decode fails
    assert!(
        result.contains("=?"),
        "Bad base64 encoded word should pass through literally, got: {result:?}"
    );
}

/// RFC 2047 Section 2: an encoded word with an unknown charset should
/// still decode — `decode_charset` falls back to UTF-8 via `encoding_rs`.
#[test]
fn encoded_word_unknown_charset_fallback() {
    // "Hello" in base64 is "SGVsbG8="
    let input = "=?x-nonexistent-charset?B?SGVsbG8=?=";
    let result = decode_encoded_words(input);
    // encoding_rs falls back to UTF-8 for unknown charsets, so this
    // should decode the base64 payload and return something containing "Hello"
    assert!(
        result.contains("Hello"),
        "Unknown charset should fall back to UTF-8, got: {result:?}"
    );
}

/// RFC 2047: a truncated encoded word (missing closing `?=`) should
/// emit the literal `=?` prefix and continue.
#[test]
fn encoded_word_truncated_no_closing() {
    let input = "Start =?UTF-8?B?SGVsbG8= End";
    let result = decode_encoded_words(input);
    // The token is missing the closing "?=", so it cannot be decoded.
    // The "=?" should be emitted literally and parsing continues.
    assert!(
        result.contains("=?"),
        "Truncated encoded word should pass through, got: {result:?}"
    );
}

// -----------------------------------------------------------------------
// Coverage: RFC 2231 parameter continuation (L586, L592, L594)
// -----------------------------------------------------------------------

/// RFC 2231 Section 3: continuation parameters with mixed encoded
/// and plain sections. `name*0*=charset'lang'...; name*1=plain`
#[test]
fn rfc2231_continuation_mixed_encoded_and_plain() {
    let header = "attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1=\"e.pdf\"";
    let result = extract_rfc2231_continuation(header, "filename");
    // Section 0 is encoded: r%C3%A9sum → "résum" (UTF-8)
    // Section 1 is plain: "e.pdf"
    assert_eq!(
        result.as_deref(),
        Some("r\u{e9}sume.pdf"),
        "RFC 2231 mixed encoded/plain continuation should reassemble correctly"
    );
}

/// RFC 2231 Section 3: continuation with three sections.
#[test]
fn rfc2231_continuation_three_sections() {
    let header =
        "attachment; filename*0=\"part1_\"; filename*1=\"part2_\"; filename*2=\"part3.pdf\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(result.as_deref(), Some("part1_part2_part3.pdf"));
}

// -----------------------------------------------------------------------
// Coverage: extract_quoted_value / quoted-string edge cases
// (L605, L614-616, L619-621, L625, L634, L636, L647)
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.2.4: an unterminated quoted-string (no closing
/// `"`) in a parameter value — the parser should read to end of string.
#[test]
fn extract_param_unterminated_quoted_value() {
    let header = "text/plain; charset=\"utf-8";
    let result = extract_param(header, "charset");
    // The closing quote is missing; find_closing_quote returns s.len(),
    // so the entire remainder becomes the value.
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "Unterminated quoted-string should extract to end of string"
    );
}

/// RFC 5322 Section 3.2.4: quoted-string with escaped characters.
#[test]
fn extract_param_quoted_with_backslash_escape() {
    let header = "attachment; filename=\"file\\\\name.txt\"";
    let result = extract_param(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("file\\name.txt"),
        "Backslash escape in quoted param value must be unescaped"
    );
}

/// RFC 5322 Section 3.2.4: empty quoted-string should return None.
#[test]
fn extract_param_empty_quoted_value() {
    let header = "attachment; filename=\"\"";
    let result = extract_param(header, "filename");
    assert!(
        result.is_none(),
        "Empty quoted-string value should return None, got: {result:?}"
    );
}

/// Regression test: an unterminated quote in a MIME parameter value
/// must NOT absorb subsequent parameters.  When the closing `"` is
/// missing, fall back to `;` as the value terminator — just like
/// the unquoted path — so later parameters remain discoverable.
///
/// # References
/// - RFC 5322 Section 3.2.4 (quoted-string)
/// - Postel's law (RFC 1122 Section 1.2.2)
#[test]
fn extract_param_unterminated_quote_does_not_absorb_next_param() {
    // charset has an unterminated quote; name= follows after ;
    let header = "text/plain; charset=\"utf-8; name=test.txt";
    let charset = extract_param(header, "charset");
    assert_eq!(
        charset.as_deref(),
        Some("utf-8"),
        "unterminated quote must fall back to ';' terminator \
         (RFC 5322 Section 3.2.4, Postel's law)"
    );
    // The name param must still be independently extractable
    let name = extract_param(header, "name");
    assert_eq!(
        name.as_deref(),
        Some("test.txt"),
        "parameter after unterminated quote must still be accessible"
    );
}

/// `extract_comment_text` with nested parentheses (RFC 5322 Section 3.2.2).
#[test]
fn extract_comment_text_nested_parens() {
    let result = extract_comment_text("(outer (inner) text)");
    assert_eq!(
        result.as_deref(),
        Some("outer (inner) text"),
        "Nested parens should be included in comment text"
    );
}

/// `extract_comment_text` with escaped characters.
#[test]
fn extract_comment_text_escaped_chars() {
    let result = extract_comment_text("(hello \\(world\\))");
    assert_eq!(
        result.as_deref(),
        Some("hello (world)"),
        "Escaped parens inside comments should be unescaped"
    );
}

/// `extract_comment_text` with empty parens returns None.
#[test]
fn extract_comment_text_empty() {
    let result = extract_comment_text("()");
    assert!(result.is_none(), "Empty comment should return None");
}

/// `extract_comment_text` with non-paren input returns None.
#[test]
fn extract_comment_text_no_paren() {
    let result = extract_comment_text("not a comment");
    assert!(
        result.is_none(),
        "Non-parenthesized input should return None"
    );
}

// -----------------------------------------------------------------------
// Coverage: comment stripping (L720, L1593-1604)
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.2.2: `strip_comments` must handle nested comments,
/// escaped characters inside comments, and preserve text outside.
#[test]
fn strip_comments_nested_and_escaped() {
    // Nested comment with escaped paren inside
    let result = strip_comments("Hello (outer (inner) comment) World");
    assert_eq!(result, "Hello  World");

    // Escaped paren inside a comment — the `\)` is an escape, not end of comment
    let result = strip_comments("Hello (comment with \\) escaped) World");
    assert_eq!(result, "Hello  World");

    // Escaped backslash outside a comment should be preserved
    let result = strip_comments("Hello \\\\ World");
    assert_eq!(result, "Hello \\\\ World");

    // Backslash-escaped character inside comment should be consumed
    let result = strip_comments("Before (escaped \\( paren) After");
    assert_eq!(result, "Before  After");
}

/// RFC 5322 Section 3.2.2: escaped backslash outside a comment is preserved.
#[test]
fn strip_comments_escaped_outside_comment() {
    let result = strip_comments("no \\(comment\\) here");
    // The `\(` is outside a comment, so the backslash is preserved along
    // with the `(` which then opens a comment (since \ outside comment
    // is just pushed). Let's verify the actual behavior:
    // `\` sets escaped=true, depth==0 so push `\`. Then `(` is the
    // escaped char, pushed. Wait — let me re-read the code.
    // Actually: `\` is not the backslash case in strip_comments because
    // `\\` case only triggers when c=='\\'. Let me trace:
    // 'n','o',' ' — pushed (depth 0)
    // '\\' — escaped=true, depth==0 so push '\\'
    // '(' — escaped char: escaped=false, depth==0 so push '('
    // 'c','o','m','m','e','n','t' — pushed
    // '\\' — escaped=true, depth==0 so push '\\'
    // ')' — escaped char: escaped=false, depth==0 so push ')'
    // ' ','h','e','r','e' — pushed
    // Result: "no \\(comment\\) here" — no comments were stripped
    assert_eq!(
        result, "no \\(comment\\) here",
        "Escaped parens outside comments should not open/close comments"
    );
}

// -----------------------------------------------------------------------
// Coverage: date parsing edge cases (L767, L808, L834, L867)
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.3: date with too few parts should return None.
#[test]
fn parse_date_too_few_parts() {
    // Only day and month, no year or time
    assert!(
        parse_rfc5322_date("13 Feb").is_none(),
        "Date with too few parts should return None"
    );
}

/// RFC 5322 Section 3.3: date with time field that has no colon
/// (not a valid time) should return None.
#[test]
fn parse_date_time_no_colon() {
    assert!(
        parse_rfc5322_date("13 Feb 2025 1547 +0000").is_none(),
        "Time without colon should return None"
    );
}

/// RFC 5322 Section 3.3: unknown month abbreviation returns None.
#[test]
fn parse_date_unknown_month() {
    assert!(
        parse_rfc5322_date("13 Foo 2025 12:00:00 +0000").is_none(),
        "Unknown month name should return None"
    );
}

/// RFC 5322 Section 3.3: completely malformed date string.
#[test]
fn parse_date_completely_malformed() {
    assert!(parse_rfc5322_date("not a date at all").is_none());
    assert!(parse_rfc5322_date("").is_none());
    assert!(parse_rfc5322_date("   ").is_none());
}

/// RFC 5322 Section 4.3: unknown timezone abbreviation defaults to +0000.
#[test]
fn parse_date_unknown_timezone_defaults_zero() {
    let dt = parse_rfc5322_date("13 Feb 2025 12:00:00 ZULU").unwrap();
    assert_eq!(
        dt.tz_offset_minutes, 0,
        "Unknown timezone abbreviation should default to +0000"
    );
}

/// RFC 5322 Section 3.3: non-numeric day should return None.
#[test]
fn parse_date_non_numeric_day() {
    assert!(
        parse_rfc5322_date("XX Feb 2025 12:00:00 +0000").is_none(),
        "Non-numeric day should return None"
    );
}

/// RFC 5322 Section 3.3: non-numeric year should return None.
#[test]
fn parse_date_non_numeric_year() {
    assert!(
        parse_rfc5322_date("13 Feb XXXX 12:00:00 +0000").is_none(),
        "Non-numeric year should return None"
    );
}

// -----------------------------------------------------------------------
// Coverage: boundary detection edge cases
// (L918, L954, L1003-1006, L1022-1023)
// -----------------------------------------------------------------------

/// RFC 2046 Section 5.1.1: boundary preceded by `\n` without `\r`
/// (bare LF line ending) should still be recognized.
#[test]
fn split_mime_parts_lf_only_boundaries() {
    let body = b"--boundary\nContent-Type: text/plain\n\nPart 1\n--boundary\nContent-Type: text/plain\n\nPart 2\n--boundary--";
    let parts = split_mime_parts(body, "boundary");
    assert_eq!(
        parts.len(),
        2,
        "Should find 2 parts with LF-only boundaries"
    );
}

/// RFC 2046 Section 5.1.1: boundary at the very start of body
/// (no preceding newline needed since it's position 0).
#[test]
fn split_mime_parts_boundary_at_start() {
    let body = b"--b\r\nContent-Type: text/plain\r\n\r\nOnly part\r\n--b--";
    let parts = split_mime_parts(body, "b");
    assert_eq!(
        parts.len(),
        1,
        "Should find 1 part when boundary is at start"
    );
    let text = String::from_utf8_lossy(parts[0]);
    assert!(text.contains("Only part"));
}

/// Boundary appearing mid-line must be ignored (RFC 2046 Section 5.1.1).
#[test]
fn split_mime_parts_midline_boundary_ignored() {
    let body = b"--b\r\nContent-Type: text/plain\r\n\r\nText mentioning --b in the middle\r\n--b--";
    let parts = split_mime_parts(body, "b");
    assert_eq!(parts.len(), 1, "Mid-line boundary must not split");
    let text = String::from_utf8_lossy(parts[0]);
    assert!(
        text.contains("--b in the middle"),
        "Mid-line boundary text should be preserved"
    );
}

/// Boundary line with trailing whitespace (spaces/tabs after the
/// boundary marker) must still be recognized (RFC 2046 Section 5.1.1:
/// "...followed by LWSP").
#[test]
fn split_mime_parts_boundary_with_trailing_whitespace() {
    let body = b"--b  \t\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--b--";
    let parts = split_mime_parts(body, "b");
    assert_eq!(
        parts.len(),
        1,
        "Boundary with trailing whitespace should be recognized"
    );
}

/// Boundary not preceded by newline at a non-zero position must be
/// skipped (mid-line match).
#[test]
fn split_mime_parts_boundary_not_at_line_start_skipped() {
    // Body has the delimiter string embedded in content, not at line start
    let body = b"--bound\r\n\r\nSome text has --bound embedded\r\n--bound--";
    let parts = split_mime_parts(body, "bound");
    assert_eq!(parts.len(), 1);
    let text = String::from_utf8_lossy(parts[0]);
    assert!(text.contains("--bound embedded"));
}

// -----------------------------------------------------------------------
// Coverage: Content-Transfer-Encoding quoted value (L1130)
// -----------------------------------------------------------------------

/// RFC 2045 Section 6.1: CTE is formally a token, but some non-conformant
/// mailers quote it (e.g., `"base64"`). Per Postel's law we strip the
/// quotes and decode normally.
#[test]
fn parse_quoted_transfer_encoding() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: \"base64\"\r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n";
    let parsed = parse_email(raw).unwrap();
    // Quoted CTE value must be decoded just like the unquoted form.
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "Quoted CTE 'base64' should decode the body correctly"
    );
}

/// Same as above but for quoted-printable CTE.
#[test]
fn parse_quoted_transfer_encoding_qp() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: \"quoted-printable\"\r\n\
                 \r\n\
                 Hello=20World\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "Quoted CTE 'quoted-printable' should decode the body correctly"
    );
}

/// Content-Transfer-Encoding with leading/trailing whitespace must be
/// handled (the `.trim()` call in `decode_transfer_encoding`).
#[test]
fn parse_transfer_encoding_with_whitespace() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding:  base64 \r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "CTE with whitespace should still decode correctly"
    );
}

/// RFC 2045 Section 6.1: CTE is a single token, but some broken mailers
/// append trailing parameters or comments (e.g., `base64; name=foo` or
/// `base64 (standard)`).  The parser must extract only the first token
/// before any semicolon or whitespace so decoding still works.
#[test]
fn parse_transfer_encoding_with_trailing_garbage() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64; name=foo\r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "CTE with trailing semicolon+params should still decode as base64"
    );
}

/// Same trailing-garbage issue but with a parenthesized comment after
/// the encoding token, e.g., `quoted-printable (standard)`.
#[test]
fn parse_transfer_encoding_with_trailing_comment() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: quoted-printable (standard)\r\n\
                 \r\n\
                 Hello=20World\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "CTE with trailing comment should still decode as quoted-printable"
    );
}

/// Postel's law (RFC 1122 Section 1.2.2): tolerate non-conformant
/// senders that both quote the CTE token and append trailing junk after
/// it. RFC 2045 Section 6.1 defines CTE as a single unquoted token, but
/// in practice some generators emit forms like `"base64"; name=foo`.
#[test]
fn parse_quoted_transfer_encoding_with_trailing_garbage() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: \"base64\"; name=foo\r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "Quoted CTE with trailing garbage should still decode as base64"
    );
}

// -----------------------------------------------------------------------
// Coverage: quoted-printable decoding edge cases
// (L1183-1184, L1193)
// -----------------------------------------------------------------------

/// RFC 2045 Section 6.7: soft line break `=\r\n` must be removed,
/// joining the lines without inserting any character.
#[test]
fn qp_soft_line_break_crlf() {
    let data = b"Hello=\r\n World";
    let decoded = decode_quoted_printable(data);
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hello World",
        "=\\r\\n soft break should be removed (RFC 2045 Section 6.7)"
    );
}

/// RFC 2045 Section 6.7: soft line break `=\n` (LF only, no CR)
/// must also be removed (Postel's law: accept bare LF).
#[test]
fn qp_soft_line_break_lf_only() {
    let data = b"Hello=\nWorld";
    let decoded = decode_quoted_printable(data);
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "HelloWorld",
        "=\\n soft break should be removed"
    );
}

/// RFC 2045 Section 6.7: `=\n` near end of data (only 2 bytes left)
/// must be treated as a soft break.
#[test]
fn qp_soft_break_lf_at_end() {
    let data = b"Hi=\n";
    let decoded = decode_quoted_printable(data);
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "Hi",
        "=\\n at end of data should be a soft break"
    );
}

/// RFC 2045 Section 6.7: invalid hex after `=` should pass through
/// the `=` as a literal (Postel's law). Only valid hex pairs are decoded.
#[test]
fn qp_invalid_hex_passthrough() {
    let data = b"=GG=4F=4B";
    let decoded = decode_quoted_printable(data);
    // =GG is invalid hex → pass through literally; =4F=4B → "OK"
    assert_eq!(
        std::str::from_utf8(&decoded).unwrap(),
        "=GGOK",
        "Invalid hex =GG should pass through, valid =4F=4B should decode"
    );
}

// -----------------------------------------------------------------------
// Coverage: base64 content length edge cases (L1285, L1343)
// -----------------------------------------------------------------------

/// RFC 2045 Section 6.8: empty base64 body should produce empty output.
#[test]
fn base64_empty_body() {
    let decoded = decode_transfer_encoding(b"", "base64");
    assert!(
        decoded.is_empty(),
        "Empty base64 input should produce empty output"
    );
}

/// RFC 2045 Section 6.8: base64 body that's only whitespace should
/// produce empty output after stripping non-alphabet chars.
#[test]
fn base64_whitespace_only() {
    let decoded = decode_transfer_encoding(b"  \r\n  \r\n", "base64");
    assert!(
        decoded.is_empty(),
        "Whitespace-only base64 input should produce empty output"
    );
}

/// `find_closing_quote`: unterminated quoted-string returns string length.
#[test]
fn find_closing_quote_unterminated() {
    assert_eq!(find_closing_quote("no closing quote here"), 21);
}

/// `find_closing_quote`: escaped quote is skipped.
#[test]
fn find_closing_quote_skips_escaped() {
    // `\\\"` is an escaped quote, should be skipped; real close is at end
    assert_eq!(find_closing_quote("hello\\\"world\""), 12);
}

// -----------------------------------------------------------------------
// Coverage: multipart boundary search paths
// (L1361-1362, L1400, L1404, L1429-1430, L1478-1479, L1544-1545,
//  L1550, L1552, L1567)
// -----------------------------------------------------------------------

/// Multiple parts with CRLF before boundaries — exercise the CRLF
/// stripping logic in `split_mime_parts` (pos >= 2 and body[pos-2] == CR).
#[test]
fn multipart_crlf_before_boundary() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"mp\"\r\n\
                 \r\n\
                 --mp\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 Part A\r\n\
                 --mp\r\n\
                 Content-Type: text/html\r\n\
                 \r\n\
                 <b>Part B</b>\r\n\
                 --mp--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
    assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}

/// Multiple parts with bare LF before boundaries — exercise the LF-only
/// stripping logic (pos >= 1 && body[pos-1] == LF, but pos < 2 or
/// body[pos-2] != CR).
#[test]
fn multipart_lf_only_before_boundary() {
    let raw = b"From: a@b.com\nDate: Thu, 13 Feb 2025 15:47:33 +0000\nContent-Type: multipart/mixed; boundary=\"mp\"\n\n--mp\nContent-Type: text/plain\n\nPart A\n--mp\nContent-Type: text/html\n\n<b>Part B</b>\n--mp--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
    assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
}

/// Truncated multipart: no closing boundary — the trailing content
/// should still be captured as a part.
#[test]
fn multipart_truncated_no_closing() {
    let body = b"--bnd\r\n\r\nFirst part\r\n--bnd\r\n\r\nSecond part with no closing boundary";
    let parts = split_mime_parts(body, "bnd");
    assert_eq!(
        parts.len(),
        2,
        "Should find 2 parts even without closing boundary"
    );
    let text2 = String::from_utf8_lossy(parts[1]);
    assert!(text2.contains("Second part"));
}

// -----------------------------------------------------------------------
// Coverage: comment removal with escapes (L1593-1604)
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.2.2: backslash inside a comment escapes the
/// next character. `\(` inside a comment should NOT increment depth.
#[test]
fn strip_comments_escaped_paren_inside_comment() {
    let result = strip_comments("Before (escaped \\) paren) After");
    // The `\)` inside the comment is an escape: the `)` is consumed but
    // doesn't decrement depth. The real closing `)` comes later.
    assert_eq!(
        result.trim(),
        "Before  After",
        "Escaped close-paren inside comment must not end the comment"
    );
}

/// RFC 5322 Section 3.2.2: escaped open-paren inside a comment
/// should NOT increment the nesting depth.
#[test]
fn strip_comments_escaped_open_paren_inside_comment() {
    let result = strip_comments("X (comment \\( not nested) Y");
    assert_eq!(
        result.trim(),
        "X  Y",
        "Escaped open-paren inside comment must not increase nesting depth"
    );
}

/// RFC 5322 Section 3.2.2 / 3.2.4: parentheses inside a quoted-string
/// are literal characters — they must NOT be treated as comment delimiters.
#[test]
fn strip_comments_respects_quoted_strings() {
    let result = strip_comments("\"value (not a comment)\" rest");
    assert_eq!(
        result, "\"value (not a comment)\" rest",
        "Parentheses inside quoted-string must not be treated as comments"
    );
}

/// RFC 5322 Section 3.2.4: parentheses inside a quoted-string with
/// surrounding real comments should be preserved inside quotes.
#[test]
fn strip_comments_quoted_string_with_real_comment() {
    let result = strip_comments("\"value (literal)\" (real comment) end");
    assert_eq!(
        result, "\"value (literal)\"  end",
        "Real comment stripped, quoted parens preserved"
    );
}

// -----------------------------------------------------------------------
// Coverage: hex_digit lowercase (L1632)
// -----------------------------------------------------------------------

/// Lowercase hex digits a-f must be recognized in QP and percent decoding.
#[test]
fn hex_digit_lowercase() {
    assert_eq!(hex_digit(b'a'), Some(10));
    assert_eq!(hex_digit(b'f'), Some(15));
    assert_eq!(hex_digit(b'c'), Some(12));
    // Also verify uppercase and digits still work
    assert_eq!(hex_digit(b'A'), Some(10));
    assert_eq!(hex_digit(b'F'), Some(15));
    assert_eq!(hex_digit(b'0'), Some(0));
    assert_eq!(hex_digit(b'9'), Some(9));
    // Invalid
    assert_eq!(hex_digit(b'g'), None);
    assert_eq!(hex_digit(b'G'), None);
    assert_eq!(hex_digit(b' '), None);
}

/// QP decoding with lowercase hex digits (RFC 2045 Section 6.7).
#[test]
fn qp_lowercase_hex_digits() {
    // =c3=a9 is UTF-8 for U+00E9 (e-acute) in UTF-8 encoding
    let data = b"caf=c3=a9";
    let decoded = decode_quoted_printable(data);
    assert_eq!(decoded, b"caf\xc3\xa9");
    let text = String::from_utf8_lossy(&decoded);
    assert_eq!(
        text, "caf\u{e9}",
        "Lowercase hex digits in QP should decode correctly (RFC 2045 Section 6.7)"
    );
}

/// `decode_hex_pair` with lowercase hex digits.
#[test]
fn decode_hex_pair_lowercase() {
    assert_eq!(decode_hex_pair(b'f', b'f'), Some(0xFF));
    assert_eq!(decode_hex_pair(b'a', b'0'), Some(0xA0));
    assert_eq!(decode_hex_pair(b'0', b'a'), Some(0x0A));
}

// -----------------------------------------------------------------------
// Coverage: parse_single_address edge cases
// -----------------------------------------------------------------------

/// Address with empty angle brackets (no email inside `<>`) should
/// return None.
#[test]
fn parse_single_address_empty_angle_brackets() {
    let result = parse_single_address("Display Name <>");
    assert!(
        result.is_none(),
        "Empty angle brackets should not produce an address"
    );
}

/// Address with `>` before `<` (malformed) should not match the
/// angle bracket path.
#[test]
fn parse_single_address_reversed_angles() {
    let result = parse_single_address(">bad<user@example.com");
    // rfind('<') finds position of '<', rfind('>') finds position of last '>'
    // angle_end (0) < angle_start (4), so angle bracket path fails.
    // Falls through to bare email check (contains '@').
    assert!(result.is_some());
}

/// Input without `@` and without angle brackets should return None.
#[test]
fn parse_single_address_no_at_no_brackets() {
    let result = parse_single_address("just plain text");
    assert!(
        result.is_none(),
        "Text without @ or <> should not produce an address"
    );
}

// -----------------------------------------------------------------------
// Coverage: is_inside_quotes
// -----------------------------------------------------------------------

/// `is_inside_quotes` must correctly track escaped quotes.
#[test]
fn is_inside_quotes_with_escapes() {
    // Position 15 is after the escaped quote (\"), still inside quotes
    assert!(is_inside_quotes("\"hello \\\" world\"end", 15));
    // Position 0 is before any quotes — not inside
    assert!(!is_inside_quotes("\"hello\"", 0));
    // Position after closing quote — not inside
    assert!(!is_inside_quotes("\"hello\" world", 8));
}

/// RFC 5322 Section 3.2.4: `quoted-pair` (backslash escape) is only valid
/// inside `quoted-string` and `comment`, not outside them. A backslash
/// outside quotes must be treated as a literal character, not as an escape
/// that consumes the next byte.
#[test]
fn is_inside_quotes_backslash_outside_quotes_is_literal() {
    // Input: x=\"y"z"  — backslash is outside quotes, so it does NOT
    // escape the first `"`. The first `"` opens a quoted string, `y`
    // is inside, the second `"` closes it, and `z` is outside.
    // Position of `z` (index 5) should NOT be inside quotes.
    let s = r#"x=\"y"z""#;
    // Byte layout: x=\" y " z "
    //              0123 4 5 6 7
    // After the fix: `\` at index 2 is literal (outside quotes).
    // `"` at index 3 opens a quoted string, `y` is inside, `"` at
    // index 5 closes it. `z` at index 6 is outside.
    assert!(
        !is_inside_quotes(s, 6),
        "RFC 5322 Section 3.2.4: backslash outside quotes must not escape \
         the next character — position 6 ('z') should be outside quotes"
    );
}

// -----------------------------------------------------------------------
// Coverage: strip_outer_quotes
// -----------------------------------------------------------------------

/// `strip_outer_quotes` must not strip when input is too short.
#[test]
fn strip_outer_quotes_short_input() {
    assert_eq!(strip_outer_quotes("\""), "\"");
    assert_eq!(strip_outer_quotes(""), "");
    assert_eq!(strip_outer_quotes("x"), "x");
}

/// `strip_outer_quotes` must not strip when only one side has quotes.
#[test]
fn strip_outer_quotes_one_sided() {
    assert_eq!(strip_outer_quotes("\"hello"), "\"hello");
    assert_eq!(strip_outer_quotes("hello\""), "hello\"");
}

// -----------------------------------------------------------------------
// Coverage: split_header_body edge case — starts with \n
// -----------------------------------------------------------------------

/// RFC 2046: a MIME part starting with bare `\n` (no headers).
#[test]
fn split_header_body_starts_with_lf() {
    let (headers, body) = split_header_body(b"\nBody text here");
    assert!(
        headers.is_empty(),
        "Headers should be empty when input starts with \\n"
    );
    assert_eq!(body, b"Body text here");
}

/// RFC 2046: a MIME part starting with `\r\n` (no headers).
#[test]
fn split_header_body_starts_with_crlf() {
    let (headers, body) = split_header_body(b"\r\nBody text here");
    assert!(
        headers.is_empty(),
        "Headers should be empty when input starts with \\r\\n"
    );
    assert_eq!(body, b"Body text here");
}

// -----------------------------------------------------------------------
// Coverage: CTE decode_body stripping trailing LF (L1130)
// -----------------------------------------------------------------------

/// `decode_body` strips a trailing `\n` (bare LF without CR).
#[test]
fn decode_body_strips_trailing_lf_only() {
    let result = decode_body(b"Hello\n", "", "text/plain; charset=utf-8");
    assert_eq!(result, "Hello", "Trailing bare LF should be stripped");
}

/// `decode_body` with no trailing newline returns content as-is.
#[test]
fn decode_body_no_trailing_newline() {
    let result = decode_body(b"Hello", "", "text/plain; charset=utf-8");
    assert_eq!(
        result, "Hello",
        "No trailing newline should leave content unchanged"
    );
}

/// Percent decoding with lowercase hex (RFC 2231 / RFC 3986).
#[test]
fn percent_decode_lowercase_hex() {
    let decoded = percent_decode("%c3%a9");
    // %c3%a9 is UTF-8 for U+00E9 (e-acute)
    assert_eq!(decoded, vec![0xC3, 0xA9]);
}

/// Percent decoding with invalid hex passes through literally.
#[test]
fn percent_decode_invalid_hex() {
    let decoded = percent_decode("%ZZ");
    assert_eq!(decoded, b"%ZZ");
}

/// Percent decoding with truncated sequence passes through.
#[test]
fn percent_decode_truncated() {
    let decoded = percent_decode("hello%2");
    assert_eq!(decoded, b"hello%2");
}

// -----------------------------------------------------------------------
// Coverage: address with colon that looks like group but has @
// -----------------------------------------------------------------------

/// RFC 5322 Section 3.4: a colon in an address that contains `@`
/// should NOT be treated as group syntax (heuristic).
#[test]
fn parse_address_colon_with_at_sign() {
    // "user:tag@example.com" has a colon but also @ — the colon
    // should be treated as part of the local-part, not group syntax.
    let addrs = parse_address_list("user:tag@example.com");
    // The heuristic checks if current.trim().contains('@') when seeing ':'.
    // At the point of ':', current is "user" (no @), so it enters group mode.
    // Then "tag@example.com" is parsed as a group member.
    assert!(!addrs.is_empty(), "Should parse at least one address");
}

/// RFC 2231 Section 4 + Postel's law: non-conformant mailers may
/// quote RFC 2231 extended values (`filename*="utf-8''%C3%A9.txt"`).
/// The parser must strip outer quotes and decode correctly.
#[test]
fn extract_rfc2231_param_handles_quoted_value() {
    // A non-conformant mailer wraps the RFC 2231 value in double-quotes.
    // The correct unquoted form is: filename*=utf-8''caf%C3%A9.txt
    let disposition = "attachment; filename*=\"utf-8''caf%C3%A9.txt\"";
    let filename = extract_filename(disposition, "application/octet-stream");
    assert_eq!(
        filename.as_deref(),
        Some("café.txt"),
        "RFC 2231 filename with outer quotes must be decoded correctly \
         (Postel's law: be liberal in what you accept)"
    );
}

/// RFC 2231 Section 4 / Postel's law: when a non-conformant mailer wraps
/// an RFC 2231 extended value in double-quotes AND the value contains a
/// semicolon (either percent-encoded `%3B` that decodes to `;`, or a raw
/// `;` inside quotes), the parser must not truncate the value at the
/// semicolon. The closing quote must be found first to delimit the value.
#[test]
fn extract_rfc2231_param_quoted_value_with_semicolon() {
    // Quoted value with a literal semicolon: a non-conformant mailer
    // wraps the RFC 2231 value in quotes and includes a raw semicolon
    // inside the quoted value.
    let disposition = "attachment; filename*=\"UTF-8''file;name.txt\"";
    let result = extract_rfc2231_param(disposition, "filename");
    assert_eq!(
        result.as_deref(),
        Some("file;name.txt"),
        "RFC 2231 quoted value containing a literal semicolon must not \
         be truncated at the semicolon (Postel's law)"
    );
}

/// RFC 2231 Section 4 + Postel's law: a malformed quoted RFC 2231
/// value with no closing quote must not swallow subsequent
/// parameters. The parser should stop at the next semicolon so later
/// parameters remain outside the filename.
#[test]
fn extract_rfc2231_param_unterminated_quoted_value_stops_at_separator() {
    let disposition = "attachment; filename*=\"UTF-8''report.txt; size=123";
    let result = extract_rfc2231_param(disposition, "filename");
    assert_eq!(
        result.as_deref(),
        Some("report.txt"),
        "unterminated quoted RFC 2231 values must stop at the next \
         parameter separator instead of swallowing later parameters"
    );
}

/// RFC 2231 Section 4 defines `filename*=` as an ext-value with valid
/// percent-encoding. When the extended value is malformed and unusable,
/// a valid plain `filename=` fallback must remain usable per Postel's law.
#[test]
fn extract_filename_malformed_rfc2231_param_falls_back_to_plain_filename() {
    let disposition = "attachment; filename*=UTF-8''report%ZZ.txt; filename=\"report.txt\"";
    let filename = extract_filename(disposition, "application/octet-stream");
    assert_eq!(
        filename.as_deref(),
        Some("report.txt"),
        "malformed RFC 2231 filename* must not override a valid plain filename fallback"
    );
}

/// RFC 5322 Section 3.4: group syntax `display-name ":" [group-list] ";"`
/// must be recognized even when the display-name is a quoted-string
/// containing `@`. The `@` inside quotes is not an addr-spec indicator.
#[test]
fn group_address_with_at_in_quoted_display_name() {
    let input = r#""user@host": addr@example.com;"#;
    let addrs = parse_address_list(input);
    assert_eq!(
        addrs.len(),
        1,
        "group with quoted display-name containing '@' must parse the member address"
    );
    assert_eq!(addrs[0].email, "addr@example.com");
}

/// RFC 5322 Section 4.4: obsolete source-route addresses like
/// `<@host1,@host2:user@domain>` must have the route prefix stripped,
/// yielding `user@domain` as the addr-spec.
#[test]
fn obs_route_stripped_from_angle_bracket_address() {
    // Single-hop route
    let addrs = parse_address_list("<@relay:user@example.com>");
    assert_eq!(addrs.len(), 1);
    assert_eq!(addrs[0].email, "user@example.com");

    // Multi-hop route
    let addrs = parse_address_list("<@hop1,@hop2:alice@domain.org>");
    assert_eq!(addrs.len(), 1);
    assert_eq!(addrs[0].email, "alice@domain.org");

    // With display name
    let addrs = parse_address_list("Alice <@relay:alice@example.com>");
    assert_eq!(addrs.len(), 1);
    assert_eq!(addrs[0].name.as_deref(), Some("Alice"));
    assert_eq!(addrs[0].email, "alice@example.com");
}

/// RFC 2231 Section 4: the first encoded continuation section (`param*0*=`)
/// must follow the `charset'language'value` format. When the value lacks the
/// required apostrophes, the section is malformed and must be treated as
/// non-encoded (raw bytes used as-is) rather than percent-decoded.
#[test]
fn rfc2231_continuation_malformed_first_encoded_section_not_percent_decoded() {
    // filename*0*= without charset'language' apostrophes — malformed per RFC 2231.
    // The raw value "hello%20world.txt" must be kept as-is, not percent-decoded
    // to "hello world.txt".
    let header = "attachment; filename*0*=hello%20world.txt";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("hello%20world.txt"),
        "malformed charset'language'value must not be percent-decoded"
    );
}

/// RFC 2231 Sections 3-4 do not define duplicate continuation sections,
/// but Postel's law says a malformed encoded duplicate must not override
/// an earlier plain section that is already usable.
#[test]
fn rfc2231_continuation_malformed_encoded_duplicate_does_not_override_plain() {
    let header =
        "attachment; filename*0=\"report\"; filename*0*=broken%20value; filename*1=\".txt\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("report.txt"),
        "malformed encoded duplicate section must not override the first valid plain section"
    );
}

/// RFC 5322 Section 3.3: zone = ("+" / "-") 4DIGIT where the digits
/// represent HHMM.  Offsets with hours > 23 or minutes > 59 are
/// semantically invalid and would produce a >4-digit zone field when
/// formatted back via `to_rfc5322_string()`.  They must be treated as
/// unknown and map to +0000 per RFC 5322 Section 4.3.
#[test]
fn test_invalid_timezone_offset_defaults_to_zero() {
    let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +9999\r\n\r\n";
    let email = parse_email(input).unwrap();
    // RFC 5322 Section 4.3: unknown/invalid timezone -> +0000
    assert_eq!(email.date.unwrap().tz_offset_minutes, 0);
}

/// RFC 5322 Section 3.3: valid edge-case timezone offsets must still
/// parse correctly after the range check is added.
#[test]
fn test_valid_edge_case_timezone_offsets() {
    // +1400 (Line Islands, max real-world offset) -> 14*60 + 0 = 840
    let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +1400\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(email.date.unwrap().tz_offset_minutes, 840);

    // -1200 (Baker Island, min real-world offset) -> -(12*60 + 0) = -720
    let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 -1200\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(email.date.unwrap().tz_offset_minutes, -720);

    // +0000 (UTC) -> 0
    let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +0000\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(email.date.unwrap().tz_offset_minutes, 0);
}

/// RFC 5322 Section 3.3 defines a numeric zone as exactly sign + 4DIGIT.
/// Overlong offsets such as `+12345` are malformed and must not be
/// partially interpreted as `+1234`; they degrade to unknown local time
/// (`+0000`) under the parser's Postel-style fallback.
#[test]
fn test_overlong_timezone_offset_defaults_to_zero() {
    let input = b"From: test@example.com\r\nDate: Thu, 13 Feb 2025 12:00:00 +12345\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(
        email.date.unwrap().tz_offset_minutes,
        0,
        "zone = sign 4DIGIT, so +12345 must not be truncated to +1234 \
         (RFC 5322 Section 3.3)"
    );
}

/// RFC 5322 Section 4.3: military single-letter time zones (A–I, K–Y)
/// "SHOULD all be considered equivalent to '-0000'" because their
/// historical usage is unreliable.  Named civil zones (EST, PDT, etc.)
/// retain their well-known offsets.
#[test]
fn test_military_timezone_codes_treated_as_unknown() {
    // Military zone 'A' was nominally +0100, but RFC 5322 Section 4.3
    // says it should be treated as -0000 (offset 0).
    let input = b"From: test@example.com\r\n\
                  Date: Thu, 13 Feb 2025 12:00:00 A\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(
        email.date.unwrap().tz_offset_minutes,
        0,
        "military zone 'A' must be treated as -0000 (RFC 5322 Section 4.3)"
    );

    // Military zone 'N' was nominally -0100.
    let input = b"From: test@example.com\r\n\
                  Date: Thu, 13 Feb 2025 12:00:00 N\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(
        email.date.unwrap().tz_offset_minutes,
        0,
        "military zone 'N' must be treated as -0000 (RFC 5322 Section 4.3)"
    );

    // Military zone 'M' was nominally +1200.
    let input = b"From: test@example.com\r\n\
                  Date: Thu, 13 Feb 2025 12:00:00 M\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(
        email.date.unwrap().tz_offset_minutes,
        0,
        "military zone 'M' must be treated as -0000 (RFC 5322 Section 4.3)"
    );

    // Named civil zones like EDT still have well-known meanings.
    let input = b"From: test@example.com\r\n\
                  Date: Thu, 13 Feb 2025 12:00:00 EDT\r\n\r\n";
    let email = parse_email(input).unwrap();
    assert_eq!(
        email.date.unwrap().tz_offset_minutes,
        -240,
        "EDT is a well-known civil zone and should still resolve to -0400"
    );
}

#[test]
fn spec_audit_date_accepts_calendar_invalid_days() {
    // RFC 5322 Section 3.3: day = 1*2DIGIT, grammar allows 1-31 for any month.
    // Postel's law: calendar-impossible dates are syntactically valid and
    // appear in real-world email, so we accept them rather than rejecting.
    assert!(parse_rfc5322_date("Thu, 31 Feb 2025 12:00:00 +0000").is_some());
    assert!(parse_rfc5322_date("Mon, 31 Apr 2025 12:00:00 +0000").is_some());
    assert!(parse_rfc5322_date("Mon, 31 Jun 2025 12:00:00 +0000").is_some());
    assert!(parse_rfc5322_date("Mon, 29 Feb 2023 12:00:00 +0000").is_some()); // non-leap
    assert!(parse_rfc5322_date("Thu, 29 Feb 2024 12:00:00 +0000").is_some()); // leap year
    assert!(parse_rfc5322_date("Tue, 28 Feb 2023 12:00:00 +0000").is_some());
}

#[test]
fn spec_audit_date_accepts_mismatched_day_of_week() {
    // RFC 5322 Section 3.3: when day-of-week is present, it MUST be the
    // day implied by the date. However, this is a generation constraint.
    // Per Postel's law (RFC 1122 Section 1.2.2), the parser accepts
    // mismatched day-of-week since real-world mailers frequently get it
    // wrong. The numeric date fields are authoritative.
    // 2025-02-13 is Thursday, not Monday — parser must still accept it.
    let result = parse_rfc5322_date("Mon, 13 Feb 2025 12:00:00 +0000");
    assert!(
        result.is_some(),
        "Parser must accept dates with incorrect day-of-week (Postel's law)"
    );
    let dt = result.unwrap();
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.day, 13);
}

#[test]
fn spec_audit_encoded_word_in_quoted_string_not_decoded() {
    // RFC 2047 Section 5: encoded-words MUST NOT appear in quoted-strings.
    let raw = b"From: \"=?UTF-8?B?SGVsbG8=?=\" <test@example.com>\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("=?UTF-8?B?SGVsbG8=?="),
        "Encoded words inside quoted-strings must NOT be decoded (RFC 2047 Section 5)"
    );
}

#[test]
fn spec_audit_encoded_word_in_unquoted_phrase_decoded() {
    // RFC 2047 Section 5: encoded-words in phrase context ARE decoded.
    let raw = b"From: =?UTF-8?B?SGVsbG8=?= <test@example.com>\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.from[0].name.as_deref(), Some("Hello"));
}

#[test]
fn spec_audit_overlong_encoded_word_decoded() {
    // RFC 2047 Section 2: the 75-character limit is a generation constraint.
    // Per Postel's law (RFC 1122 Section 1.2.2), overlong encoded words are
    // decoded since real-world mailers frequently exceed the limit.
    let raw = b"Subject: =?UTF-8?Q?AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA?=\r\n\
                From: test@example.com\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
        "Overlong encoded-word must be decoded (Postel's law, RFC 2047 Section 6.1)"
    );
}

#[test]
fn extract_param_case_insensitive_param_name() {
    // RFC 2045 Section 5.1: parameter names are case-insensitive.
    // The header value is lowered internally, so an uppercase param_name
    // must also be lowered to match.
    let header = "text/plain; charset=utf-8";
    assert_eq!(
        extract_param(header, "CHARSET"),
        Some("utf-8".to_string()),
        "extract_param must match param_name case-insensitively (RFC 2045 Section 5.1)"
    );
    // Mixed case should also work
    assert_eq!(
        extract_param(header, "Charset"),
        Some("utf-8".to_string()),
        "extract_param must match mixed-case param_name (RFC 2045 Section 5.1)"
    );
}

#[test]
fn extract_rfc2231_param_case_insensitive_param_name() {
    // RFC 2045 Section 5.1: parameter names are case-insensitive.
    let header = "attachment; filename*=utf-8''hello%20world.txt";
    assert_eq!(
        extract_rfc2231_param(header, "FILENAME"),
        Some("hello world.txt".to_string()),
        "extract_rfc2231_param must match param_name case-insensitively (RFC 2045 Section 5.1)"
    );
}

#[test]
fn extract_rfc2231_continuation_case_insensitive_param_name() {
    // RFC 2045 Section 5.1: parameter names are case-insensitive.
    let header = "attachment; filename*0=\"hello \"; filename*1=\"world.txt\"";
    assert_eq!(
        extract_rfc2231_continuation(header, "FILENAME"),
        Some("hello world.txt".to_string()),
        "extract_rfc2231_continuation must match param_name case-insensitively \
         (RFC 2045 Section 5.1)"
    );
}

/// RFC 2231 Section 4: `decode_body()` must recognize the RFC 2231
/// encoded charset form `charset*=charset'lang'value` when decoding
/// body parts. Without this, the body falls back to US-ASCII and
/// non-ASCII content is garbled.
#[test]
fn decode_body_rfc2231_charset_star_encoded() {
    // Body is UTF-8 encoded "café" (c3 a9 for é)
    let body = b"caf\xc3\xa9";
    let ct = "text/plain; charset*=UTF-8''utf-8";
    let result = decode_body(body, "8bit", ct);
    assert_eq!(
        result, "caf\u{e9}",
        "RFC 2231 Section 4: charset*=UTF-8''utf-8 must be recognized \
         by decode_body so non-ASCII bodies are decoded correctly"
    );
}

/// RFC 2231 Section 3: `decode_body()` must recognize the RFC 2231
/// continuation form `charset*0=...; charset*1=...` when decoding
/// body parts. Without this, the charset is not assembled and
/// non-ASCII content is garbled.
#[test]
fn decode_body_rfc2231_charset_continuation() {
    // Body is UTF-8 encoded "café" (c3 a9 for é)
    let body = b"caf\xc3\xa9";
    let ct = "text/plain; charset*0=ut; charset*1=f-8";
    let result = decode_body(body, "8bit", ct);
    assert_eq!(
        result, "caf\u{e9}",
        "RFC 2231 Section 3: charset*0=ut; charset*1=f-8 must be \
         assembled and recognized by decode_body"
    );
}

/// RFC 2045 Section 5.1: after stripping comments from the type/subtype,
/// whitespace around the `/` separator must be normalized. Input like
/// `text (comment) /plain` or `text/ plain` should yield `"text/plain"`,
/// not `"text  /plain"` or `"text/ plain"`.
#[test]
fn extract_mime_type_normalizes_whitespace_around_slash() {
    // Whitespace left by comment stripping before the slash
    assert_eq!(
        extract_mime_type("text (comment) /plain"),
        "text/plain",
        "whitespace before '/' (left by comment stripping) must be removed"
    );
    // Whitespace after the slash
    assert_eq!(
        extract_mime_type("text/ plain"),
        "text/plain",
        "whitespace after '/' must be removed"
    );
    // Whitespace on both sides of the slash
    assert_eq!(
        extract_mime_type("text / plain"),
        "text/plain",
        "whitespace around '/' must be removed"
    );
    // Normal input without extra whitespace — must still work
    assert_eq!(extract_mime_type("text/plain; charset=utf-8"), "text/plain");
}

/// RFC 2231 Section 3: non-contiguous continuation indices are malformed.
/// Preserve only the contiguous prefix beginning at `*0`.
#[test]
fn extract_rfc2231_continuation_non_contiguous_sections() {
    let header = "attachment; filename*0=\"hello \"; filename*2=\"world.txt\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result,
        Some("hello ".to_string()),
        "RFC 2231 Section 3: reassembly must stop before section 2 when section 1 is missing"
    );
}

/// RFC 2231 Section 3 still forbids very large gaps. The scanner may see
/// a later segment, but reassembly must retain only the contiguous prefix.
#[test]
fn rfc2231_gap_larger_than_three() {
    let header = "attachment; filename*0=\"hello\"; filename*5=\"world\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result,
        Some("hello".to_string()),
        "RFC 2231 Section 3: reassembly must stop at the first missing section"
    );
}

/// RFC 2231 Section 3: when section 0 is plain (non-encoded) and a later
/// section is the first charset-encoded one, the charset must be extracted
/// from that later section. Previously, charset extraction only triggered
/// for section 0 or when `sections.is_empty()`, so a plain section 0
/// prevented charset extraction from section 1.
#[test]
fn rfc2231_continuation_charset_from_non_first_encoded_section() {
    // Section *0 is plain, section *1* is the first encoded section with charset.
    let header = "attachment; filename*0=\"hello \"; filename*1*=UTF-8''w%C3%B6rld.txt";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result,
        Some("hello wörld.txt".to_string()),
        "RFC 2231 Section 3: charset from the first encoded section (not section 0) \
         must be used to decode percent-encoded bytes"
    );
}

/// RFC 2231 Section 3 + Postel's law (RFC 1122 Section 1.2.2):
/// `find_param_value` must tolerate optional whitespace around `=` in
/// continuation parameters, just as `extract_param` does for simple
/// parameters.
#[test]
fn rfc2231_continuation_tolerates_spaces_around_eq() {
    // Spaces around '=' in continuation parameters — non-conformant but
    // seen in the wild. Postel's law says we should accept this.
    let header = "attachment; filename*0 = \"hello\"; filename*1 = \"world.txt\"";
    assert_eq!(
        extract_rfc2231_continuation(header, "filename"),
        Some("helloworld.txt".to_string()),
        "RFC 2231 Section 3 + Postel's law: spaces around '=' in continuation \
         parameters must be tolerated"
    );
}

/// RFC 2046 Section 5.1.4: in multipart/alternative, the LAST matching
/// part is the "best choice". The parser must prefer the last text/html
/// (and last text/plain) in an alternative container, not the first.
#[test]
fn multipart_alternative_prefers_last_html() {
    let raw = b"From: sender@example.com\r\n\
Subject: test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/alternative; boundary=\"alt\"\r\n\
\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
plain text\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>first html</p>\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>second html - better version</p>\r\n\
--alt--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_html.as_deref(),
        Some("<p>second html - better version</p>"),
        "RFC 2046 Section 5.1.4: should prefer LAST text/html in multipart/alternative"
    );
}

/// RFC 2046 Section 5.1.4: same principle for text/plain — if there are
/// multiple text/plain parts in multipart/alternative, the last is preferred.
#[test]
fn multipart_alternative_prefers_last_plain() {
    let raw = b"From: sender@example.com\r\n\
Subject: test\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
Content-Type: multipart/alternative; boundary=\"alt\"\r\n\
\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
first plain\r\n\
--alt\r\n\
Content-Type: text/plain\r\n\
\r\n\
second plain - better version\r\n\
--alt\r\n\
Content-Type: text/html\r\n\
\r\n\
<p>html version</p>\r\n\
--alt--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("second plain - better version"),
        "RFC 2046 Section 5.1.4: should prefer LAST text/plain in multipart/alternative"
    );
}

/// RFC 2046 Section 5.1.1: boundary delimiter `--{boundary}` must be
/// complete — a longer string that merely starts with the boundary
/// (e.g. `--abcdef` when boundary is `abc`) is NOT a valid delimiter.
#[test]
fn boundary_not_matched_as_prefix() {
    let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: multipart/mixed; boundary=\"abc\"\r\n\r\n--abc\r\nContent-Type: text/plain\r\n\r\nBody text here\r\n--abcdef\r\nThis is NOT a boundary\r\n--abc--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Body text here\r\n--abcdef\r\nThis is NOT a boundary")
    );
}

/// RFC 2046 Section 5.1.1: the closing delimiter line is
/// `--boundary-- [LWSP] CRLF`. Extra non-whitespace text after the final
/// `--` is invalid and must be treated as part data, not as the closing
/// boundary.
#[test]
fn closing_boundary_requires_valid_terminator() {
    let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: multipart/mixed; boundary=\"abc\"\r\n\r\n--abc\r\nContent-Type: text/plain\r\n\r\nBody text here\r\n--abc--junk\r\nThis is NOT a closing boundary\r\n--abc--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Body text here\r\n--abc--junk\r\nThis is NOT a closing boundary")
    );
}

/// RFC 2045 Section 5.2: a Content-Type without a `/` (e.g. `text`)
/// is invalid and should default to `text/plain`.
#[test]
fn invalid_content_type_defaults_to_text_plain() {
    let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\nContent-Type: text\r\n\r\nBody text";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
}

/// RFC 2047 Section 6.2: whitespace between adjacent encoded-words
/// is collapsed only when BOTH tokens are valid encoded words.
#[test]
fn whitespace_preserved_before_invalid_encoded_word() {
    let input = "=?UTF-8?B?SGVsbG8=?= =?broken";
    let decoded = decode_encoded_words(input);
    assert_eq!(decoded, "Hello =?broken");
}

/// RFC 2047 Section 5: in `*text` headers such as `Subject`, an
/// encoded-word MUST be separated from adjacent text by linear
/// whitespace. A glued token must stay literal instead of being decoded.
#[test]
fn encoded_word_without_lwsp_boundary_stays_literal_in_subject() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: Prefix=?UTF-8?Q?caf=C3=A9?=Suffix\r\n\
                 \r\n\
                 Body\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Prefix=?UTF-8?Q?caf=C3=A9?=Suffix"),
        "RFC 2047 Section 5: Subject encoded-words must not decode \
         when they are glued to adjacent text without linear whitespace"
    );
}

/// RFC 2047 Section 5: within a `phrase`, an encoded-word MUST be
/// separated from adjacent words or specials by linear whitespace. A
/// display-name token glued to preceding text must remain literal.
#[test]
fn encoded_word_without_lwsp_boundary_stays_literal_in_phrase() {
    let raw = b"From: Prefix=?UTF-8?Q?caf=C3=A9?= <sender@example.com>\r\n\
                 To: recipient@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body\r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("Prefix=?UTF-8?Q?caf=C3=A9?="),
        "RFC 2047 Section 5: phrase encoded-words must not decode \
         when they are glued to adjacent words without linear whitespace"
    );
}

/// RFC 2047 Section 2: encoded-text must not contain spaces.
/// A space in the encoded-text makes the token an invalid encoded
/// word, so it must be passed through as literal text.
#[test]
fn encoded_word_base64_with_space_in_text() {
    // A space inside B-encoded-text is treated as a fold artifact
    // (RFC 5322 Section 2.2.3) and stripped before base64 decoding,
    // per Postel's law (RFC 1122 Section 1.2.2).
    let input = "=?UTF-8?B?SGVs bG8=?=";
    let decoded = decode_encoded_words(input);
    assert_eq!(
        decoded, "Hello",
        "space in B-encoded-text should be stripped as fold artifact"
    );
}

/// RFC 2046 Section 5.1.1 / Postel's law: bare CR line endings
/// should be accepted as boundary delimiters. The parser already
/// accepts bare LF (see `split_mime_parts_lf_only_boundaries`);
/// bare CR is the remaining gap.
#[test]
fn split_mime_parts_bare_cr_boundaries() {
    // Boundary preceded by bare CR (\r without \n).
    // Mirrors `split_mime_parts_lf_only_boundaries` but with \r.
    let body = b"--boundary\r\nContent-Type: text/plain\r\n\r\nPart 1\r--boundary\r\nContent-Type: text/plain\r\n\r\nPart 2\r--boundary--";
    let parts = split_mime_parts(body, "boundary");
    assert_eq!(
        parts.len(),
        2,
        "Should find 2 parts with bare-CR-only boundaries"
    );
}

/// RFC 5322 Section 3.4: an unmatched `>` in a malformed address list
/// must not cause `angle_depth` to go negative, which would prevent
/// subsequent commas from being recognized as address separators.
/// Regression: `parse_address_list` unconditionally decremented
/// `angle_depth` on `>`, allowing it to go negative.
#[test]
fn regression_unmatched_angle_bracket_does_not_break_separator() {
    // The stray `>` before the comma must not merge the two addresses
    // into one. The second address must still be recognized.
    let addrs = parse_address_list("bad> addr, user@example.com");
    assert!(
        addrs.iter().any(|a| a.email == "user@example.com"),
        "unmatched '>' must not prevent comma from splitting addresses: {addrs:?}"
    );
}

/// RFC 5322 Section 3.2.2: a parenthesized comment appearing BEFORE a
/// bare addr-spec should be extracted as the display name (common RFC 822
/// convention used by some mailers).
/// Regression: `parse_single_address` only extracted display names from
/// trailing comments, silently dropping leading comments.
#[test]
fn regression_leading_comment_display_name_extracted() {
    let addr = parse_single_address("(John Doe) user@example.com");
    let addr = addr.expect("should parse successfully");
    assert_eq!(addr.email, "user@example.com");
    assert_eq!(
        addr.name.as_deref(),
        Some("John Doe"),
        "leading comment should be used as display name (RFC 5322 Section 3.2.2)"
    );
}

/// RFC 2047 Section 5 rule (2): encoded-words may appear in comments.
/// A leading comment containing an encoded-word should be decoded.
#[test]
fn regression_leading_comment_rfc2047_decoded() {
    let addr = parse_single_address("(=?UTF-8?B?Sm9obg==?=) user@example.com");
    let addr = addr.expect("should parse successfully");
    assert_eq!(addr.email, "user@example.com");
    assert_eq!(
        addr.name.as_deref(),
        Some("John"),
        "encoded-words in leading comment must be decoded (RFC 2047 Section 5 rule 2)"
    );
}

/// RFC 5322 Section 3.2.2: comments inside a `display-name` are CFWS and
/// must not appear in the semantic display name returned to callers.
#[test]
fn regression_name_addr_comment_not_retained_in_display_name() {
    let raw = b"From: John (Boss) Doe <john@example.com>\r\n\
                To: recipient@example.com\r\n\
                Subject: Commented name\r\n\
                \r\n\
                body";

    let parsed = parse_email(raw).expect("message should parse successfully");
    assert_eq!(parsed.from.len(), 1);
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("John Doe"),
        "display-name comments are CFWS and must not be retained"
    );
}

/// RFC 5322 Section 3.2.5: a display-name is a `phrase` made of one or
/// more `word`s, and each `word` may be a quoted-string. Mixed phrases
/// such as `\"John\" Doe <...>` must therefore unquote only the quoted
/// word, not preserve the literal quote characters.
#[test]
fn regression_name_addr_mixed_phrase_unquotes_quoted_word() {
    let raw = b"From: \"John\" Doe <john@example.com>\r\n\
                To: recipient@example.com\r\n\
                Subject: Mixed phrase\r\n\
                \r\n\
                body";

    let parsed = parse_email(raw).expect("message should parse successfully");
    assert_eq!(parsed.from.len(), 1);
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("John Doe"),
        "quoted-string words in a display-name phrase must be unquoted"
    );
}

// ===== Edge-case bug-hunting tests =====

/// RFC 2047 Section 4.2: in Q-encoding, underscore represents space.
/// Verify `=?utf-8?Q?hello_world?=` decodes to "hello world", not `hello_world`.
#[test]
fn edge_q_encoding_underscore_is_space() {
    let result = decode_encoded_words("=?utf-8?Q?hello_world?=");
    assert_eq!(
        result, "hello world",
        "RFC 2047 Section 4.2: underscore in Q-encoding must decode to space"
    );
}

/// RFC 2047 Section 6.2: whitespace between adjacent encoded words
/// must be collapsed, even when the encoded words use different charsets.
#[test]
fn edge_adjacent_encoded_words_collapse_whitespace_different_charsets() {
    // "Hello" in UTF-8 B-encoding, then "World" in ISO-8859-1 B-encoding
    // SGVsbG8= is base64("Hello"), V29ybGQ= is base64("World")
    let result = decode_encoded_words("=?utf-8?B?SGVsbG8=?=   =?iso-8859-1?B?V29ybGQ=?=");
    assert_eq!(
        result, "HelloWorld",
        "RFC 2047 Section 6.2: whitespace between adjacent encoded words \
         with different charsets must be collapsed"
    );
}

/// RFC 2046 Section 5.1.1: boundary delimiters must appear at the
/// beginning of a line. A boundary string appearing mid-line in the
/// content must NOT split the part.
#[test]
fn edge_boundary_in_content_not_at_line_start() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
                 \r\n\
                 --BOUND\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 This has --BOUND in the middle\r\n\
                 --BOUND--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("This has --BOUND in the middle"),
        "Boundary mid-line must not split the part (RFC 2046 Section 5.1.1)"
    );
}

/// RFC 5322 Section 3.2.4: semicolons inside quoted parameter values
/// must not split parameters. `charset="utf-8; extra"` should extract
/// the whole quoted string as the charset value.
#[test]
fn edge_semicolon_in_quoted_param_value() {
    let ct = "text/plain; charset=\"utf-8; extra\"";
    let result = extract_param(ct, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8; extra"),
        "Semicolons inside quoted parameter values must not split params \
         (RFC 5322 Section 3.2.4)"
    );
}

/// RFC 2047: `=20` in Q-encoding is a hex-encoded space. A subject
/// consisting entirely of encoded spaces should decode to spaces.
#[test]
fn edge_subject_only_whitespace_after_decode() {
    let result = decode_encoded_words("=?utf-8?Q?=20=20?=");
    assert_eq!(
        result, "  ",
        "RFC 2047 Section 4.2: =20 in Q-encoding must decode to space"
    );
}

/// RFC 5322 Section 2.2.3: header lines over 998 characters must be
/// handled correctly via continuation line unfolding.
#[test]
fn edge_very_long_header_unfolded() {
    let long_value = "x".repeat(1000);
    let raw = format!(
        "From: a@b.com\r\n\
         Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
         Received: {long_value}\r\n\
         \r\n"
    );
    let parsed = parse_email(raw.as_bytes()).unwrap();
    assert!(
        parsed.raw_headers.contains(&long_value),
        "Very long unfolded header line (>998 chars) must be preserved"
    );
}

/// RFC 5322 Section 4.3: CFWS (comments and folding white space) may
/// appear between most date-time tokens. A date with comments before the
/// day-of-week comma and between other tokens must still parse.
#[test]
fn edge_date_cfws_before_day() {
    let raw = b"From: a@b.com\r\n\
                 Date: (Mon, ) 13 Feb (year) 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    let date = parsed
        .date
        .expect("Date with CFWS in various positions must parse per RFC 5322 Section 4.3");
    assert_eq!(date.day, 13);
    assert_eq!(date.month, 2);
    assert_eq!(date.year, 2025);
    assert_eq!(date.hour, 15);
    assert_eq!(date.minute, 47);
    assert_eq!(date.second, 33);
}

/// RFC 2046 Section 5.1: nested message/rfc822 inside multipart/mixed.
/// The inner message itself contains a multipart/mixed. Section numbers
/// must be assigned correctly per RFC 3501 Section 6.4.5.
#[test]
fn edge_nested_message_rfc822() {
    let inner_msg = "From: inner@example.com\r\n\
                     Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                     Content-Type: multipart/mixed; boundary=\"inner-bound\"\r\n\
                     \r\n\
                     --inner-bound\r\n\
                     Content-Type: text/plain\r\n\
                     \r\n\
                     Inner text\r\n\
                     --inner-bound\r\n\
                     Content-Type: application/pdf\r\n\
                     Content-Disposition: attachment; filename=\"inner.pdf\"\r\n\
                     \r\n\
                     PDF-data\r\n\
                     --inner-bound--";

    let raw = format!(
        "From: outer@example.com\r\n\
         Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
         Content-Type: multipart/mixed; boundary=\"outer-bound\"\r\n\
         \r\n\
         --outer-bound\r\n\
         Content-Type: text/plain\r\n\
         \r\n\
         Outer text\r\n\
         --outer-bound\r\n\
         Content-Type: message/rfc822\r\n\
         \r\n\
         {inner_msg}\r\n\
         --outer-bound--"
    );
    let parsed = parse_email(raw.as_bytes()).unwrap();
    // Should parse without panicking and extract body text
    assert!(
        parsed.body_text.is_some(),
        "Nested message/rfc822 must be parsed without error"
    );
}

/// RFC 2047 Section 6.2: two adjacent encoded words with only a single
/// space between them must have the intervening whitespace collapsed.
#[test]
fn edge_adjacent_encoded_words_single_space() {
    let result = decode_encoded_words("=?utf-8?Q?A?= =?utf-8?Q?B?=");
    assert_eq!(
        result, "AB",
        "RFC 2047 Section 6.2: single space between adjacent encoded words must be collapsed"
    );
}

/// RFC 2047 Section 6.2: whitespace between an encoded word and a
/// non-encoded word must be preserved.
#[test]
fn edge_encoded_word_followed_by_plain_text() {
    let result = decode_encoded_words("=?utf-8?Q?Hello?= World");
    assert_eq!(
        result, "Hello World",
        "RFC 2047 Section 6.2: whitespace between encoded word and plain text must be preserved"
    );
}

/// RFC 2047 Q-encoding: multiple underscores should each become a space.
#[test]
fn edge_q_encoding_multiple_underscores() {
    let result = decode_encoded_words("=?utf-8?Q?a__b___c?=");
    assert_eq!(
        result, "a  b   c",
        "RFC 2047 Section 4.2: each underscore in Q-encoding represents one space"
    );
}

/// RFC 2047: an encoded-word with empty encoded text `=?utf-8?Q??=`
/// should decode to an empty string, not cause a panic.
#[test]
fn edge_encoded_word_empty_payload() {
    // RFC 2047 Section 2: encoded-text = 1*<Any printable ASCII ...>
    // Empty encoded-text is malformed; the encoded-word must be left as
    // literal text rather than decoded.
    let result = decode_encoded_words("=?utf-8?Q??=");
    assert_eq!(
        result, "=?utf-8?Q??=",
        "RFC 2047: empty Q-encoded payload is malformed and must be left as literal text"
    );
}

/// RFC 2047 Section 2: an encoded-word with empty B-encoded payload is
/// malformed (encoded-text = 1*<...>) and must be left as literal text.
#[test]
fn edge_encoded_word_empty_base64_payload() {
    let result = decode_encoded_words("=?utf-8?B??=");
    assert_eq!(
        result, "=?utf-8?B??=",
        "RFC 2047: empty B-encoded payload is malformed and must be left as literal text"
    );
}

/// RFC 2047: an encoded-word with an unknown encoding letter (not Q or B)
/// should be left as-is (not decoded).
#[test]
fn edge_encoded_word_unknown_encoding() {
    let result = decode_encoded_words("=?utf-8?X?test?=");
    // Should either pass through literally or produce "=?" + rest
    assert!(
        result.contains("utf-8") || result.contains("test"),
        "Unknown encoding letter should not cause a panic"
    );
}

/// RFC 2046 Section 5.1.1: a multipart boundary delimiter must appear
/// at the beginning of a line. Verify that the boundary `abc` does NOT
/// match `--abcdef` (a longer string that starts with the boundary).
#[test]
fn edge_boundary_prefix_not_false_match() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"abc\"\r\n\
                 \r\n\
                 --abc\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 --abcdef is not a real boundary\r\n\
                 --abc--";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed
            .body_text
            .as_deref()
            .unwrap_or("")
            .contains("--abcdef"),
        "Boundary 'abc' must not falsely match '--abcdef' (RFC 2046 Section 5.1.1)"
    );
}

/// RFC 5322 Section 2.2.3: header unfolding with tab continuation.
/// A Subject header split across lines with a tab continuation character
/// should be correctly unfolded.
#[test]
fn edge_header_unfolding_with_tab() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: Hello\r\n\
                 \tWorld\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    // The unfolded subject should have the tab preserved per RFC 5322 Section 2.2.3
    assert!(
        parsed.subject.as_ref().unwrap().contains("Hello"),
        "Subject must contain 'Hello' after unfolding"
    );
    assert!(
        parsed.subject.as_ref().unwrap().contains("World"),
        "Subject must contain 'World' from continuation line"
    );
}

/// RFC 2046 Section 5.1.1: a completely empty multipart body (no parts
/// between boundary delimiters) should not panic.
#[test]
fn edge_empty_multipart_body() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 --b--";
    let parsed = parse_email(raw).unwrap();
    // Should not panic. Body text may be None or empty.
    // The important thing is graceful handling.
    assert!(
        parsed.body_text.is_none() || parsed.body_text.as_deref() == Some(""),
        "Empty multipart body should produce no body text"
    );
}

/// RFC 2047 Section 4.2: Q-encoding `=` (the literal equals sign) requires
/// hex encoding as `=3D`. Verify that a bare `=` in Q-encoding doesn't
/// break the decoder.
#[test]
fn edge_q_encoding_bare_equals() {
    // A bare `=` not followed by two hex digits — should be passed through
    // or handled gracefully (not panic).
    let result = decode_encoded_words("=?utf-8?Q?a=b?=");
    // The `=b` is an incomplete hex sequence. The decoder should not panic.
    assert!(
        !result.is_empty(),
        "Bare equals in Q-encoding should not cause panic"
    );
}

/// RFC 2047 Section 6.2: three consecutive encoded words with spaces
/// between each pair — all intervening whitespace should be collapsed.
#[test]
fn edge_three_adjacent_encoded_words() {
    let result = decode_encoded_words("=?utf-8?Q?A?= =?utf-8?Q?B?= =?utf-8?Q?C?=");
    assert_eq!(
        result, "ABC",
        "RFC 2047 Section 6.2: whitespace between all three adjacent \
         encoded words must be collapsed"
    );
}

/// RFC 5322 Section 3.3: a two-digit year between 50-99 should have
/// 1900 added per the obsolete year rule (RFC 5322 Section 4.3).
#[test]
fn edge_date_two_digit_year_obsolete() {
    let date = parse_rfc5322_date("13 Feb 99 15:47:33 +0000");
    let date = date.expect("Two-digit year 99 must parse per RFC 5322 Section 4.3");
    assert_eq!(
        date.year, 1999,
        "RFC 5322 Section 4.3: two-digit year 99 must map to 1999"
    );
}

/// RFC 5322 Section 4.3: a two-digit year between 00-49 should have
/// 2000 added.
#[test]
fn edge_date_two_digit_year_2000s() {
    let date = parse_rfc5322_date("13 Feb 05 15:47:33 +0000");
    let date = date.expect("Two-digit year 05 must parse per RFC 5322 Section 4.3");
    assert_eq!(
        date.year, 2005,
        "RFC 5322 Section 4.3: two-digit year 05 must map to 2005"
    );
}

/// RFC 5322 Section 2.2.3: folding white space (CRLF followed by SP or
/// HTAB) within a date header body must be normalised to a single space
/// before parsing. RFC 5322 Section 4.3 allows CFWS between all tokens
/// in the obsolete date-time syntax.
#[test]
fn parse_rfc5322_date_fws_space_folding() {
    let dt = parse_rfc5322_date("Thu, 01 Jan\r\n 2015 12:00:00 +0000");
    let dt = dt.expect("RFC 5322 Section 2.2.3: FWS (CRLF SP) within date must be normalised");
    assert_eq!(dt.year, 2015);
    assert_eq!(dt.month, 1);
    assert_eq!(dt.day, 1);
    assert_eq!(dt.hour, 12);
    assert_eq!(dt.minute, 0);
    assert_eq!(dt.second, 0);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// RFC 5322 Section 2.2.3: folding white space may use CRLF followed by
/// HTAB instead of a space.
#[test]
fn parse_rfc5322_date_fws_tab_folding() {
    let dt = parse_rfc5322_date("Thu, 01 Jan\r\n\t2015 12:00:00 +0000");
    let dt = dt.expect("RFC 5322 Section 2.2.3: FWS (CRLF HTAB) within date must be normalised");
    assert_eq!(dt.year, 2015);
    assert_eq!(dt.month, 1);
    assert_eq!(dt.day, 1);
    assert_eq!(dt.hour, 12);
    assert_eq!(dt.minute, 0);
    assert_eq!(dt.second, 0);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// RFC 5322 Section 4.3: the obsolete date-time syntax allows CFWS
/// between almost all tokens. Multiple FWS sequences in a single date
/// header must all be normalised.
#[test]
fn parse_rfc5322_date_fws_multiple_folds() {
    let dt = parse_rfc5322_date("Thu,\r\n 01\r\n Jan\r\n 2015\r\n 12:00:00\r\n +0000");
    let dt =
        dt.expect("RFC 5322 Section 4.3: multiple FWS sequences in date must all be normalised");
    assert_eq!(dt.year, 2015);
    assert_eq!(dt.month, 1);
    assert_eq!(dt.day, 1);
    assert_eq!(dt.hour, 12);
    assert_eq!(dt.minute, 0);
    assert_eq!(dt.second, 0);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// RFC 5322 Section 3.3 / Postel's law: some non-conformant mailers emit
/// a comma after the month name (e.g., `"Thu, 13 Feb, 2025 15:47:33 +0000"`).
/// The parser must strip trailing punctuation from the month token before
/// matching.
#[test]
fn parse_rfc5322_date_month_trailing_comma() {
    let dt = parse_rfc5322_date("Thu, 13 Feb, 2025 15:47:33 +0000");
    let dt = dt.expect("Postel's law: trailing comma after month must be tolerated");
    assert_eq!(dt.day, 13);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.hour, 15);
    assert_eq!(dt.minute, 47);
    assert_eq!(dt.second, 33);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// RFC 5322 Section 3.3 / Postel's law: some non-conformant mailers emit
/// a period after the abbreviated month name (e.g., `"13 Feb. 2025 12:00:00 +0000"`).
/// The parser must strip trailing punctuation from the month token.
#[test]
fn parse_rfc5322_date_month_trailing_period() {
    let dt = parse_rfc5322_date("13 Feb. 2025 12:00:00 +0000");
    let dt = dt.expect("Postel's law: trailing period after month must be tolerated");
    assert_eq!(dt.day, 13);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.hour, 12);
    assert_eq!(dt.minute, 0);
    assert_eq!(dt.second, 0);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// RFC 5322 Section 3.3: `date-time = [ day-of-week "," ] date time CFWS`.
/// A date without day-of-week but with a comma after the month
/// (non-conformant but seen in the wild) should still parse.
/// The parser must not mistake the pre-comma text for a day-of-week.
#[test]
fn parse_date_no_dow_comma_after_month() {
    let result = parse_rfc5322_date("13 Feb, 2025 12:00:00 +0000");
    assert!(
        result.is_some(),
        "date without DOW but with comma after month should parse"
    );
    let dt = result.unwrap();
    assert_eq!(dt.day, 13);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.hour, 12);
    assert_eq!(dt.minute, 0);
    assert_eq!(dt.second, 0);
}

/// Ensure normal day-of-week parsing still works after the DOW-detection fix.
/// RFC 5322 Section 3.3: `day-of-week = ([FWS] day-name)`.
#[test]
fn parse_date_with_dow_still_works() {
    let result = parse_rfc5322_date("Thu, 13 Feb 2025 12:00:00 +0000");
    assert!(result.is_some());
    let dt = result.unwrap();
    assert_eq!(dt.day, 13);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.year, 2025);
}

/// Both a proper DOW comma and a trailing month comma — the parser must
/// strip the DOW correctly and then tolerate the month-trailing comma.
/// RFC 5322 Section 3.3.
#[test]
fn parse_date_dow_comma_month_comma() {
    let result = parse_rfc5322_date("Thu, 13 Feb, 2025 15:47:33 +0000");
    assert!(result.is_some());
    let dt = result.unwrap();
    assert_eq!(dt.day, 13);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.year, 2025);
}

/// RFC 2047 Section 4.2: Q-encoding with mixed `=XX` hex and underscore
/// characters. `=48=65=6C=6C=6F_=57=6F=72=6C=64` decodes to "Hello World".
#[test]
fn edge_q_encoding_mixed_hex_and_underscore() {
    let result = decode_encoded_words("=?utf-8?Q?=48=65=6C=6C=6F_=57=6F=72=6C=64?=");
    assert_eq!(
        result, "Hello World",
        "RFC 2047 Section 4.2: mixed hex encoding and underscore in Q-encoding"
    );
}

/// Multipart message where the text/plain part has
/// Content-Transfer-Encoding: base64.
#[test]
fn edge_multipart_base64_text_part() {
    // "Hello World" in base64 = "SGVsbG8gV29ybGQ="
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                 \r\n\
                 --b\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVsbG8gV29ybGQ=\r\n\
                 --b--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello World"),
        "Base64-encoded text/plain in multipart must be decoded correctly"
    );
}

/// RFC 2047: encoded-word with unknown charset should decode gracefully
/// (not panic), falling back to lossy UTF-8 conversion.
#[test]
fn edge_encoded_word_unknown_charset() {
    let result = decode_encoded_words("=?x-unknown?B?SGVsbG8=?=");
    // encoding_rs falls back to UTF-8 for unknown charsets
    assert_eq!(
        result, "Hello",
        "Unknown charset in encoded-word should fall back gracefully"
    );
}

/// Parse a From header where the display name contains an `@` sign
/// inside quotes — the `@` must NOT be treated as an address separator.
#[test]
fn edge_from_display_name_with_at_sign() {
    let raw = b"From: \"user@company\" <real@example.com>\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].email, "real@example.com",
        "Email must be extracted from angle brackets, not from quoted display name"
    );
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("user@company"),
        "Display name containing '@' in quotes must be preserved"
    );
}

/// Parse a message with no blank line between headers and body —
/// the entire content should be treated as headers only (RFC 5322
/// Section 2.1 requires the blank line separator).
#[test]
fn edge_no_blank_line_separator() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: Test\r\n\
                 This is body text without blank line";
    let parsed = parse_email(raw).unwrap();
    // "This is body text..." has no colon and isn't a continuation line,
    // so it should be silently skipped by the header parser.
    // The body should be empty since there's no blank-line separator.
    // Or the parser might treat "This..." as beginning of body.
    // Either way, it must not panic.
    assert_eq!(parsed.subject.as_deref(), Some("Test"));
}

/// Encoded-word in Subject with charset switching between ISO-8859-1
/// and UTF-8 — both parts must be decoded correctly and whitespace
/// between them collapsed (RFC 2047 Section 6.2).
#[test]
fn edge_encoded_word_charset_switch() {
    // "café" in ISO-8859-1: 0x63 0x61 0x66 0xE9 → base64 "Y2Fm6Q=="
    // "résumé" in UTF-8: base64 "csOpc3Vtw6k="
    let result = decode_encoded_words("=?iso-8859-1?B?Y2Fm6Q==?= =?utf-8?B?csOpc3Vtw6k=?=");
    assert_eq!(
        result, "caférésumé",
        "RFC 2047 Section 6.2: encoded words with different charsets must \
         decode correctly with whitespace collapsed"
    );
}

/// Multiple From: headers — only the first should be used (RFC 5322
/// Section 3.6 says From occurs exactly once, but per Postel's law
/// we should use the first and not error).
#[test]
fn edge_duplicate_from_header() {
    let raw = b"From: first@example.com\r\n\
                 From: second@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].email, "first@example.com",
        "First From header should be used when duplicates exist"
    );
}

/// Content-Type header with a boundary parameter that contains
/// a double-quote inside a quoted-string. The parser must correctly
/// extract the boundary using backslash-escaping (RFC 5322 Section 3.2.4).
#[test]
fn edge_boundary_with_escaped_quote() {
    // boundary parameter = "a\"b" — the boundary is actually: a"b
    let ct = r#"multipart/mixed; boundary="a\"b""#;
    let boundary = extract_param(ct, "boundary");
    assert_eq!(
        boundary.as_deref(),
        Some("a\"b"),
        "Boundary with escaped quote must be correctly unescaped \
         (RFC 5322 Section 3.2.4)"
    );
}

/// NUL byte (0x00) in header value should not cause the parser to panic
/// or produce incorrect results. Per Postel's law, graceful handling
/// is preferred.
#[test]
fn edge_nul_byte_in_subject() {
    let mut raw = Vec::new();
    raw.extend_from_slice(b"From: a@b.com\r\n");
    raw.extend_from_slice(b"Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n");
    raw.extend_from_slice(b"Subject: Hello\x00World\r\n");
    raw.extend_from_slice(b"\r\n");
    let parsed = parse_email(&raw).unwrap();
    // NUL byte handling is implementation-defined; we just verify
    // the parser doesn't panic.
    assert!(
        parsed.subject.is_some(),
        "Subject with NUL byte must still be extracted (Postel's law)"
    );
}

/// Address with group syntax: `Group: addr1@a.com, addr2@b.com;`
/// The addresses inside the group should be extracted.
#[test]
fn edge_address_group_syntax() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 To: Friends: alice@a.com, bob@b.com;\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.to.len() >= 2,
        "Group syntax 'Friends: alice, bob;' must extract both addresses. \
         Got: {:?}",
        parsed.to
    );
}

/// RFC 2046 Section 5.1.1: multipart/mixed with boundary that contains
/// characters valid in boundary but unusual: `+`, `/`, `=`.
#[test]
fn edge_boundary_special_chars() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: multipart/mixed; boundary=\"a+b/c=d\"\r\n\
                 \r\n\
                 --a+b/c=d\r\n\
                 Content-Type: text/plain\r\n\
                 \r\n\
                 body text\r\n\
                 --a+b/c=d--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("body text"),
        "Boundary with special chars +/= must be handled correctly \
         (RFC 2046 Section 5.1.1)"
    );
}

/// Postel's law (RFC 1122 §1.2.2): tolerate whitespace around `=` in
/// MIME parameters. Some non-conformant mailers emit `charset = utf-8`
/// instead of `charset=utf-8`.
#[test]
fn parse_param_space_before_equals() {
    // Use UTF-8 encoded content (café) so that falling back to the
    // default US-ASCII charset would produce a different (wrong) result,
    // proving the charset parameter was actually extracted.
    let body_bytes: &[u8] = "café".as_bytes(); // 5 bytes: 63 61 66 c3 a9
    let mut raw = b"From: a@b.com\r\n\
                    Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                    Content-Type: text/plain; charset =utf-8\r\n\
                    \r\n"
        .to_vec();
    raw.extend_from_slice(body_bytes);
    let parsed = parse_email(&raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("café"),
        "charset parameter with space before '=' must be recognized"
    );
}

#[test]
fn parse_param_spaces_around_equals() {
    let body_bytes: &[u8] = "café".as_bytes();
    let mut raw = b"From: a@b.com\r\n\
                    Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                    Content-Type: text/plain; charset = utf-8\r\n\
                    \r\n"
        .to_vec();
    raw.extend_from_slice(body_bytes);
    let parsed = parse_email(&raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("café"),
        "charset with spaces around '=' must be parsed (Postel's law)"
    );
}

#[test]
fn parse_boundary_space_around_equals() {
    let raw = b"From: a@b.com\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                Content-Type: multipart/mixed; boundary = \"bound42\"\r\n\
                \r\n\
                --bound42\r\n\
                Content-Type: text/plain\r\n\
                \r\n\
                Body text\r\n\
                --bound42--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Body text"),
        "boundary with spaces around '=' must be parsed (Postel's law)"
    );
}

#[test]
fn parse_rfc2231_param_space_around_equals() {
    let raw = b"From: a@b.com\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                Content-Type: multipart/mixed; boundary=\"b\"\r\n\
                \r\n\
                --b\r\n\
                Content-Type: text/plain\r\n\
                \r\n\
                Body\r\n\
                --b\r\n\
                Content-Type: application/pdf\r\n\
                Content-Disposition: attachment; filename* = UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
                \r\n\
                DATA\r\n\
                --b--";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("résumé.pdf"),
        "RFC 2231 filename* with space around '=' must be parsed (Postel's law)"
    );
}

/// RFC 5322 Section 3.6.2: the Sender field specifies the mailbox of the
/// agent responsible for the actual transmission of the message. It
/// contains exactly one mailbox (not a mailbox-list like From).
#[test]
fn parse_sender_header() {
    let raw = b"From: author@example.com\r\n\
                 Sender: transmitter@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test Sender\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.sender.as_ref().map(|a| a.email.as_str()),
        Some("transmitter@example.com"),
        "Sender header should be parsed into parsed.sender"
    );
    assert_eq!(
        parsed.sender.as_ref().and_then(|a| a.name.as_deref()),
        None,
        "Sender with bare address should have no display name"
    );
}

/// RFC 5322 Section 3.6.2: Sender is optional. When absent,
/// parsed.sender should be None.
#[test]
fn parse_sender_header_absent() {
    let raw = b"From: author@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: No Sender\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body";
    let parsed = parse_email(raw).unwrap();
    assert!(
        parsed.sender.is_none(),
        "Missing Sender header should yield None"
    );
}

/// RFC 5322 Section 3.6.2: Sender with display name.
#[test]
fn parse_sender_header_with_display_name() {
    let raw = b"From: author@example.com\r\n\
                 Sender: Secretary <secretary@example.com>\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body";
    let parsed = parse_email(raw).unwrap();
    let sender = parsed.sender.as_ref().unwrap();
    assert_eq!(sender.email, "secretary@example.com");
    assert_eq!(sender.name.as_deref(), Some("Secretary"));
}

/// RFC 5322 Section 3.6.2: `parse_headers_only` should also extract Sender.
#[test]
fn parse_headers_only_extracts_sender() {
    let raw = b"From: author@example.com\r\n\
                 Sender: transmitter@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 Body";
    let parsed = parse_headers_only(raw).unwrap();
    assert_eq!(
        parsed.sender.as_ref().map(|a| a.email.as_str()),
        Some("transmitter@example.com"),
        "parse_headers_only should also extract Sender"
    );
}

/// RFC 5322 Section 3.6.8: optional fields (headers not defined in the
/// spec) must be accessible via `extra_headers` without re-parsing
/// `raw_headers`.
#[test]
fn parse_extra_headers_accessible() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 X-Mailer: TestMailer\r\n\
                 List-Unsubscribe: <mailto:unsub@example.com>\r\n\
                 DKIM-Signature: v=1; a=rsa-sha256; d=example.com\r\n\
                 \r\n\
                 Body text\r\n";

    let parsed = parse_email(raw).unwrap();

    // Extra headers should contain all non-well-known headers
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(k, v)| k == "x-mailer" && v == "TestMailer"),
        "extra_headers must contain X-Mailer (RFC 5322 Section 3.6.8)"
    );
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(k, v)| k == "list-unsubscribe" && v == "<mailto:unsub@example.com>"),
        "extra_headers must contain List-Unsubscribe (RFC 5322 Section 3.6.8)"
    );
    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(k, v)| k == "dkim-signature" && v == "v=1; a=rsa-sha256; d=example.com"),
        "extra_headers must contain DKIM-Signature (RFC 5322 Section 3.6.8)"
    );

    // Well-known headers must NOT appear in extra_headers
    assert!(
        !parsed
            .extra_headers
            .iter()
            .any(|(k, _)| k == "from" || k == "to" || k == "subject"),
        "Well-known headers must not appear in extra_headers"
    );
}

#[test]
fn military_single_letter_timezones_rfc5322_section_4_3() {
    // RFC 5322 Section 4.3: single-letter military time zones (A–I, K–Y)
    // "SHOULD all be considered equivalent to '-0000'" because their
    // historical usage is unreliable.  Z is explicitly UTC.

    // All military single-letter zones (except Z) map to 0 (unknown).
    assert_eq!(
        parse_timezone("A"),
        0,
        "A should be -0000 (RFC 5322 Section 4.3 SHOULD)"
    );
    assert_eq!(
        parse_timezone("M"),
        0,
        "M should be -0000 (RFC 5322 Section 4.3 SHOULD)"
    );
    assert_eq!(
        parse_timezone("N"),
        0,
        "N should be -0000 (RFC 5322 Section 4.3 SHOULD)"
    );
    assert_eq!(
        parse_timezone("Y"),
        0,
        "Y should be -0000 (RFC 5322 Section 4.3 SHOULD)"
    );
    // Z = +0000 = 0 minutes (UTC)
    assert_eq!(
        parse_timezone("Z"),
        0,
        "Z should be +0000 (RFC 5322 Section 4.3)"
    );
    // J is not used per RFC 5322 Section 4.3, falls back to +0000
    assert_eq!(
        parse_timezone("J"),
        0,
        "J is not used and should fall back to +0000 (RFC 5322 Section 4.3)"
    );

    // Verify full date string parsing with military zone A
    let raw = b"From: sender@example.com\r\n\
                To: recipient@example.com\r\n\
                Subject: Test\r\n\
                Date: Thu, 01 Jan 2015 12:00:00 A\r\n\
                \r\n\
                Body\r\n";
    let parsed = crate::parse_email(raw).unwrap();
    let date = parsed.date.expect("date must be present");
    assert_eq!(
        date.tz_offset_minutes, 0,
        "Full date with military zone A should have tz_offset_minutes = 0 \
         (RFC 5322 Section 4.3: military zones are -0000)"
    );
}

#[test]
fn explicit_attachment_with_content_id_is_not_inline() {
    // RFC 2183 Section 2: Content-Disposition: attachment must not be overridden
    // by the presence of Content-ID (RFC 2392). Content-ID is just an identifier,
    // not a disposition indicator.
    let raw = b"From: sender@example.com\r\n\
                To: recipient@example.com\r\n\
                Subject: Test\r\n\
                Content-Type: image/png\r\n\
                Content-Disposition: attachment; filename=\"image.png\"\r\n\
                Content-ID: <img001@example.com>\r\n\
                Content-Transfer-Encoding: base64\r\n\
                \r\n\
                iVBORw0KGgo=\r\n";
    let parsed = crate::parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert!(
        !parsed.attachments[0].is_inline,
        "Content-Disposition: attachment with Content-ID must not be marked inline \
         (RFC 2183 Section 2)"
    );
}

#[test]
fn multipart_explicit_attachment_with_content_id_is_not_inline() {
    // Same bug in multipart context: Content-Disposition: attachment must not be
    // overridden by Content-ID (RFC 2183 Section 2).
    let raw = b"From: sender@example.com\r\n\
                To: recipient@example.com\r\n\
                Subject: Test\r\n\
                Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
                \r\n\
                --BOUND\r\n\
                Content-Type: text/plain\r\n\
                \r\n\
                Hello\r\n\
                --BOUND\r\n\
                Content-Type: image/png\r\n\
                Content-Disposition: attachment; filename=\"image.png\"\r\n\
                Content-ID: <img001@example.com>\r\n\
                Content-Transfer-Encoding: base64\r\n\
                \r\n\
                iVBORw0KGgo=\r\n\
                --BOUND--\r\n";
    let parsed = crate::parse_email(raw).unwrap();
    assert_eq!(parsed.attachments.len(), 1);
    assert!(
        !parsed.attachments[0].is_inline,
        "multipart: Content-Disposition: attachment with Content-ID must not be marked inline \
         (RFC 2183 Section 2)"
    );
}

/// RFC 5322 Section 3.2.2 / 3.4: `@` inside a parenthesized comment in
/// the display-name portion of a group address must not prevent group
/// syntax recognition.  Comments are not semantically meaningful and the
/// `@` within them is not an addr-spec indicator.
#[test]
fn group_address_with_at_in_comment() {
    // Comment containing '@' right after the group display-name.
    let addrs = parse_address_list("Group (user@host): member@example.com;");
    assert_eq!(
        addrs.len(),
        1,
        "group with comment containing '@' must parse the member address"
    );
    assert_eq!(addrs[0].email, "member@example.com");

    // Comment with a bare '@' sign (no full address).
    let addrs = parse_address_list("Group (has @ sign): member@example.com;");
    assert_eq!(
        addrs.len(),
        1,
        "group with comment containing bare '@' must parse the member address"
    );
    assert_eq!(addrs[0].email, "member@example.com");

    // Plain address must still work (no regression).
    let addrs = parse_address_list("user@host.com");
    assert_eq!(addrs.len(), 1);
    assert_eq!(addrs[0].email, "user@host.com");

    // Verify that `contains_at_outside_quotes` itself correctly
    // distinguishes between '@' in comments vs. outside them.
    assert!(
        !contains_at_outside_quotes("Group (user@host)"),
        "'@' only inside a comment must return false"
    );
    assert!(
        contains_at_outside_quotes("user@host.com (Display Name)"),
        "'@' outside a comment must return true"
    );
    assert!(
        !contains_at_outside_quotes(r#""user@host""#),
        "'@' only inside a quoted string must return false"
    );
    assert!(
        !contains_at_outside_quotes("Group (escaped \\) user@host)"),
        "'@' after an escaped paren inside a comment must return false"
    );
}

/// Spaces in encoded-text are treated as fold artifacts (RFC 5322
/// Section 2.2.3) and stripped before decoding per Postel's law.
/// HTAB and other control characters still cause rejection.
#[test]
fn regression_encoded_word_space_in_encoded_text_decoded() {
    // Base64 payload with an embedded space — treated as fold artifact.
    let with_space = "=?UTF-8?B?SGVs bG8=?=";
    assert_eq!(
        decode_encoded_words(with_space),
        "Hello",
        "space in B-encoded-text should be stripped as fold artifact"
    );

    // Valid base64 encoded word without spaces must still decode.
    let good_b64 = "=?UTF-8?B?SGVsbG8=?=";
    assert_eq!(
        decode_encoded_words(good_b64),
        "Hello",
        "valid base64 encoded word must decode normally"
    );

    // Valid Q-encoded word without spaces must still decode.
    let good_q = "=?UTF-8?Q?caf=C3=A9?=";
    assert_eq!(
        decode_encoded_words(good_q),
        "caf\u{e9}",
        "valid Q-encoded word must decode normally"
    );

    // Q-encoded word with a space in the encoded-text — treated as fold
    // artifact, stripped before decoding.
    let q_with_space = "=?UTF-8?Q?hel lo?=";
    assert_eq!(
        decode_encoded_words(q_with_space),
        "hello",
        "space in Q-encoded-text should be stripped as fold artifact"
    );
}

/// RFC 2231 Section 3: later high-numbered fragments must not be stitched
/// onto section 0 when every intermediate section is missing.
#[test]
fn rfc2231_continuation_gap_larger_than_ten() {
    let header = "attachment; filename*0=\"part1\"; filename*15=\"part2\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result,
        Some("part1".to_string()),
        "RFC 2231 Section 3: reassembly must stop before section 15 when sections 1-14 are absent"
    );
}

/// RFC 2231 Section 3 does not cap continuation indices at two digits.
/// Long parameter values may legitimately require more than 100 segments,
/// and the parser must reassemble the entire value rather than truncating
/// at section 99.
#[test]
fn rfc2231_continuation_more_than_one_hundred_sections() {
    use std::fmt::Write as _;

    let expected = "a".repeat(5_600);
    let mut header = String::from("attachment");
    for (idx, chunk) in expected.as_bytes().chunks(50).enumerate() {
        let chunk = std::str::from_utf8(chunk).unwrap();
        let _ = write!(header, "; filename*{idx}=\"{chunk}\"");
    }

    let result = extract_rfc2231_continuation(&header, "filename");
    assert_eq!(
        result.as_deref(),
        Some(expected.as_str()),
        "RFC 2231 Section 3: continuation parsing must not truncate after \
         100 sections"
    );
}

/// RFC 5322 Section 3.6 + Postel's law: when broken mailers produce
/// duplicate address-list headers (To, Cc, Bcc, Reply-To), the parser
/// must concatenate all occurrences instead of silently dropping
/// recipients from subsequent headers.
#[test]
fn regression_duplicate_address_headers_concatenated() {
    // Two separate To: headers — a broken but real-world pattern.
    let raw = b"From: sender@example.com\r\n\
                 To: alice@example.com\r\n\
                 To: bob@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.to.len(),
        2,
        "both To: headers must be parsed — Postel's law requires concatenation"
    );
    assert_eq!(parsed.to[0].email, "alice@example.com");
    assert_eq!(parsed.to[1].email, "bob@example.com");

    // Two separate Cc: headers.
    let raw_cc = b"From: sender@example.com\r\n\
                    Cc: carol@example.com, dave@example.com\r\n\
                    Cc: eve@example.com\r\n\
                    Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                    \r\n";

    let parsed_cc = parse_email(raw_cc).unwrap();
    assert_eq!(
        parsed_cc.cc.len(),
        3,
        "all Cc: headers must be concatenated"
    );
    assert_eq!(parsed_cc.cc[0].email, "carol@example.com");
    assert_eq!(parsed_cc.cc[1].email, "dave@example.com");
    assert_eq!(parsed_cc.cc[2].email, "eve@example.com");
}

/// Regression test for MSG-001: duplicate From headers lose addresses.
///
/// `extract_from` used `get_header_value` which returns only the first
/// matching header. Duplicate From headers (while technically violating
/// RFC 5322 Section 3.6 SHOULD-at-most-once) are produced by broken
/// mailers; per Postel's law we concatenate addresses from all
/// occurrences, consistent with how To/Cc/Bcc are handled.
#[test]
fn test_duplicate_from_headers_concatenated() {
    let raw = b"From: alice@example.com\r\n\
                 From: bob@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from.len(),
        2,
        "both From: headers must be concatenated"
    );
    assert_eq!(parsed.from[0].email, "alice@example.com");
    assert_eq!(parsed.from[1].email, "bob@example.com");
}

/// Regression test: RFC 2047 Section 5 says encoded-words MUST NOT
/// appear in structured header fields like `Received`,
/// `Authentication-Results`, `DKIM-Signature`, etc.  The parser must
/// not attempt to decode `=?...?=` sequences in these headers.
#[test]
fn structured_headers_not_rfc2047_decoded() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Received: from =?utf-8?Q?server?= (mx.example.com)\r\n\
                 Authentication-Results: dkim=pass header.d==?utf-8?B?ZXhhbXBsZQ==?=.com\r\n\
                 DKIM-Signature: v=1; d==?utf-8?Q?example?=.com\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();

    // Find each structured header and verify the encoded-word syntax
    // is preserved verbatim (not decoded).
    let received = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "received")
        .expect("Received header must be present");
    assert!(
        received.1.contains("=?utf-8?Q?server?="),
        "Received header must NOT be RFC 2047 decoded; got: {:?}",
        received.1
    );

    let auth = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "authentication-results")
        .expect("Authentication-Results header must be present");
    assert!(
        auth.1.contains("=?utf-8?B?ZXhhbXBsZQ==?="),
        "Authentication-Results header must NOT be RFC 2047 decoded; got: {:?}",
        auth.1
    );

    let dkim = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "dkim-signature")
        .expect("DKIM-Signature header must be present");
    assert!(
        dkim.1.contains("=?utf-8?Q?example?="),
        "DKIM-Signature header must NOT be RFC 2047 decoded; got: {:?}",
        dkim.1
    );
}

/// RFC 5322 Section 3.6.6: `Resent-Date` and `Resent-Message-ID`
/// are structured fields, so RFC 2047 Section 5 forbids encoded-word
/// decoding there. The parser must preserve any `=?...?=` sequences
/// literally in `extra_headers`.
#[test]
fn resent_structured_headers_not_rfc2047_decoded() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Resent-Date: =?utf-8?Q?Thu,_13_Feb_2025_15:47:33_+0000?=\r\n\
                 Resent-Message-ID: =?utf-8?Q?<resent@example.com>?=\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();

    let resent_date = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-date")
        .expect("Resent-Date header must be present");
    assert!(
        resent_date
            .1
            .contains("=?utf-8?Q?Thu,_13_Feb_2025_15:47:33_+0000?="),
        "Resent-Date must NOT be RFC 2047 decoded; got: {:?}",
        resent_date.1
    );

    let resent_message_id = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-message-id")
        .expect("Resent-Message-ID header must be present");
    assert!(
        resent_message_id
            .1
            .contains("=?utf-8?Q?<resent@example.com>?="),
        "Resent-Message-ID must NOT be RFC 2047 decoded; got: {:?}",
        resent_message_id.1
    );
}

/// RFC 5322 Section 3.6.6: resent address fields use the same mailbox /
/// address-list syntax as their non-Resent counterparts. RFC 2047
/// Section 5 rule (3) permits encoded-words in `phrase` productions
/// (display names), so the parser must decode them.
#[test]
fn resent_address_headers_rfc2047_decoded_in_extra_headers() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Resent-From: =?utf-8?Q?Alice_Sender?= <alice@example.com>\r\n\
                 Resent-To: =?utf-8?Q?Bob_Recipient?= <bob@example.com>\r\n\
                 Resent-Cc: =?utf-8?Q?Carol_Copy?= <carol@example.com>\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();

    let resent_from = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-from")
        .expect("Resent-From header must be present");
    assert!(
        resent_from.1.contains("Alice Sender"),
        "Resent-From display name must be RFC 2047 decoded; got {:?}",
        resent_from.1
    );

    let resent_to = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-to")
        .expect("Resent-To header must be present");
    assert!(
        resent_to.1.contains("Bob Recipient"),
        "Resent-To display name must be RFC 2047 decoded; got {:?}",
        resent_to.1
    );

    let resent_cc = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-cc")
        .expect("Resent-Cc header must be present");
    assert!(
        resent_cc.1.contains("Carol Copy"),
        "Resent-Cc display name must be RFC 2047 decoded; got {:?}",
        resent_cc.1
    );
}

/// RFC 5322 Section 4.5.6: obsolete `Resent-Reply-To` uses address-list
/// syntax, so RFC 2047 Section 5 rule (3) permits encoded-words in the
/// `phrase` (display name) production. The parser must decode them.
#[test]
fn obsolete_resent_reply_to_rfc2047_decoded_in_extra_headers() {
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Resent-Reply-To: =?utf-8?Q?Relay_Desk?= <relay@example.com>\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();

    let resent_reply_to = parsed
        .extra_headers
        .iter()
        .find(|(k, _)| k == "resent-reply-to")
        .expect("Resent-Reply-To header must be present");
    assert!(
        resent_reply_to.1.contains("Relay Desk"),
        "Resent-Reply-To display name must be RFC 2047 decoded; got {:?}",
        resent_reply_to.1
    );
}

/// Regression test: domain-literal addresses like `user@[10,0,0,1]`
/// must not be split on the commas inside the brackets.
/// RFC 5321 Section 4.1.3 defines domain-literal as `"[" *dtext "]"`.
/// The address parser must track bracket depth to avoid treating
/// characters inside `[...]` as structural separators.
#[test]
fn domain_literal_with_commas_not_split() {
    let input = "user@[10,0,0,1], other@example.com";
    let addrs = parse_address_list(input);
    assert_eq!(
        addrs.len(),
        2,
        "expected 2 addresses, got {}: {:?}",
        addrs.len(),
        addrs
    );
    assert_eq!(
        addrs[0].email, "user@[10,0,0,1]",
        "first address must preserve domain-literal including commas"
    );
    assert_eq!(addrs[1].email, "other@example.com");
}

/// Regression test: dates with fractional seconds like "45.123" must
/// not silently drop the seconds to 0.  RFC 5322 Section 3.3 does not
/// define fractional seconds, but non-conformant mailers produce them.
/// Per Postel's law we strip the fractional part and keep the integer.
#[test]
fn parse_date_with_fractional_seconds() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:30:45.123 +0000\r\n\
                 \r\n";

    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.unwrap();
    assert_eq!(date.year, 2025);
    assert_eq!(date.month, 2);
    assert_eq!(date.day, 13);
    assert_eq!(date.hour, 15);
    assert_eq!(date.minute, 30);
    assert_eq!(
        date.second, 45,
        "fractional seconds must not cause second to drop to 0"
    );
    assert_eq!(date.tz_offset_minutes, 0);
}

#[test]
fn test_rfc2047_empty_fields() {
    // RFC 2047 Section 2 requires:
    //   charset      = token          (token = 1*<...>)
    //   encoding     = token          (token = 1*<...>)
    //   encoded-text = 1*<Any printable ASCII character other than "?" or SPACE>
    // Encoded-words with empty charset, encoding, or encoded-text are
    // malformed and must be left as literal text (not decoded).

    // Empty charset: =??Q?test?=
    let raw_empty_charset = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =??Q?test?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw_empty_charset).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("=??Q?test?="),
        "empty charset must not be decoded"
    );

    // Empty encoding: =?UTF-8??test?=
    let raw_empty_encoding = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =?UTF-8??test?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw_empty_encoding).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("=?UTF-8??test?="),
        "empty encoding must not be decoded"
    );

    // Empty encoded-text: =?UTF-8?Q??=
    let raw_empty_text = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =?UTF-8?Q??=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw_empty_text).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("=?UTF-8?Q??="),
        "empty encoded-text must not be decoded"
    );
}

/// RFC 2047 Section 2: encoded-text = 1*<Any printable ASCII character
/// other than "?" or SPACE>.  An encoded-word whose encoded-text
/// contains a literal '?' must be treated as malformed and left as
/// literal text — the '?' would cause `find("?=")` to match
/// prematurely, silently truncating the decoded output.
#[test]
fn test_rfc2047_question_mark_in_encoded_text() {
    // =?UTF-8?Q?hello?world?= — the first '?=' match is between
    // "hello" and "world", which would truncate to "hello" if the
    // '?' in encoded-text is not rejected.
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =?UTF-8?Q?hello?world?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("=?UTF-8?Q?hello?world?="),
        "'?' in encoded-text must cause the entire encoded-word to be \
         left as literal text (RFC 2047 Section 2)"
    );
}

// -----------------------------------------------------------------------
// Edge-case probe tests
// -----------------------------------------------------------------------

/// RFC 2047 Section 4.2: in Q-encoding, underscores represent spaces.
/// Verify that `=?UTF-8?Q?hello_world?=` decodes to "hello world".
#[test]
fn edge_rfc2047_q_encoding_underscore_to_space() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =?UTF-8?Q?hello_world?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("hello world"),
        "RFC 2047 Section 4.2: underscore in Q-encoding must decode to space"
    );
}

/// RFC 2047 Section 6.2: whitespace between adjacent encoded words must be
/// collapsed even when the encoded words use different charsets.
/// `=?UTF-8?B?w6k=?= =?ISO-8859-1?Q?=E9?=` should produce two e-acute
/// characters with no space between them.
#[test]
fn edge_rfc2047_adjacent_encoded_words_different_charsets() {
    // UTF-8 base64 for "e-acute" (U+00E9): 0xC3 0xA9 → base64 "w6k="
    // ISO-8859-1 Q-encoding for "e-acute" (0xE9): "=E9"
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: =?UTF-8?B?w6k=?= =?ISO-8859-1?Q?=E9?=\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("\u{00E9}\u{00E9}"),
        "RFC 2047 Section 6.2: whitespace between adjacent encoded words \
         with different charsets must be collapsed"
    );
}

/// RFC 2046 Section 5.1.1: boundary delimiter matching must not treat
/// `--abcdef` as a match for boundary `abc`. The delimiter must be
/// followed by a valid terminator (CR, LF, `-`, SP, HTAB, or EOF).
#[test]
fn edge_multipart_boundary_prefix_of_another() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Content-Type: multipart/mixed; boundary=\"abc\"\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n\
                 --abc\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 Line with --abcdef in the body should not split\r\n\
                 --abc\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 \r\n\
                 Second part\r\n\
                 --abc--\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Line with --abcdef in the body should not split"),
        "RFC 2046 Section 5.1.1: boundary 'abc' must not match '--abcdef' \
         in the body — the delimiter must be followed by a valid terminator"
    );
    // The second text/plain part is an extra beyond body_text, so it
    // becomes an attachment. This proves the boundary delimiter was not
    // prematurely consumed by "--abcdef" in the first part's body.
    assert_eq!(
        parsed.attachments.len(),
        1,
        "Two text/plain parts: first is body_text, second becomes an \
         attachment — this proves boundary splitting was correct"
    );
}

/// RFC 2231 Section 3: continuation parameters with a gap (missing index 1)
/// must stop at the contiguous prefix beginning at section 0.
#[test]
fn edge_rfc2231_continuation_with_gap_missing_index_1() {
    let header = "attachment; filename*0=\"part1\"; filename*2=\"part3\"";
    let result = extract_rfc2231_continuation(header, "filename");
    assert_eq!(
        result.as_deref(),
        Some("part1"),
        "RFC 2231 Section 3: continuation must stop at the first missing index"
    );
}

/// Builder round-trip with a subject containing the literal text "=?"
/// which could be misinterpreted as an RFC 2047 encoded-word marker.
/// The subject `Price is =? 50€` must survive build→parse unchanged.
#[test]
fn edge_builder_roundtrip_subject_with_encoded_word_marker() {
    use crate::build_message;
    use crate::types::OutgoingEmail;

    let subject = "Price is =? 50\u{20AC}";
    let email = OutgoingEmail {
        from: vec![Address {
            name: None,
            email: "sender@example.com".to_string(),
        }],
        sender: None,
        to: vec![Address {
            name: None,
            email: "recipient@example.com".to_string(),
        }],
        cc: vec![],
        bcc: vec![],
        reply_to: vec![],
        date: Some(crate::types::DateTime {
            year: 2025,
            month: 2,
            day: 13,
            hour: 15,
            minute: 47,
            second: 33,
            tz_offset_minutes: 0,
        }),
        subject: subject.to_string(),
        body_text: Some("test".to_string()),
        body_html: None,
        in_reply_to: vec![],
        references: vec![],
        attachments: vec![],
        extra_headers: vec![],
    };
    let built = build_message(&email).unwrap();
    let parsed = parse_email(&built.raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some(subject),
        "Subject containing literal '=?' and non-ASCII must survive \
         build->parse round-trip without being mangled by RFC 2047 encoding"
    );
}

/// RFC 5322 Section 4.3: a two-digit year of 99 in a date header
/// must be interpreted as 1999 (50-99 → +1900).
/// Uses the full `parse_email` path (not just `parse_rfc5322_date`).
#[test]
fn edge_date_two_digit_year_via_parse_email() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Sat, 13 Feb 99 12:00:00 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    let date = parsed.date.expect("date must parse");
    assert_eq!(
        date.year, 1999,
        "RFC 5322 Section 4.3: two-digit year 99 must map to 1999"
    );
    assert_eq!(date.month, 2);
    assert_eq!(date.day, 13);
}

/// RFC 5322 Section 3.4.1: a bare addr-spec followed by a parenthesized
/// comment should extract the email correctly and use the comment as
/// display name. `user@example.com (John Doe)` → email=user@example.com,
/// name=Some("John Doe").
#[test]
fn edge_address_with_trailing_comment() {
    let raw = b"From: user@example.com (John Doe)\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 \r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.from[0].email, "user@example.com",
        "RFC 5322 Section 3.4.1: email must be extracted from bare addr-spec \
         before parenthesized comment"
    );
    assert_eq!(
        parsed.from[0].name.as_deref(),
        Some("John Doe"),
        "RFC 822 convention: trailing parenthesized comment should be used \
         as display name"
    );
}

/// RFC 2046 Section 5.1.5: in multipart/digest, parts without an explicit
/// Content-Type header default to message/rfc822, not text/plain.
/// Such a part should not be treated as `body_text`.
#[test]
fn edge_multipart_digest_default_content_type() {
    // Build a multipart/digest with one part that has no Content-Type header.
    // The part body is a complete RFC 5322 message.
    let inner_message = "From: inner@example.com\r\n\
                         To: recipient@example.com\r\n\
                         Subject: Inner\r\n\
                         Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                         \r\n\
                         Inner body text";
    let raw = format!(
        "From: sender@example.com\r\n\
         To: recipient@example.com\r\n\
         Content-Type: multipart/digest; boundary=\"digestbound\"\r\n\
         Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
         \r\n\
         --digestbound\r\n\
         \r\n\
         {inner_message}\r\n\
         --digestbound--\r\n"
    );
    let parsed = parse_email(raw.as_bytes()).unwrap();
    // The part defaults to message/rfc822, so it should NOT be treated
    // as body_text (which is for text/plain). It should instead appear
    // as an attachment with content_type "message/rfc822".
    assert!(
        parsed.body_text.is_none(),
        "RFC 2046 Section 5.1.5: parts in multipart/digest without \
         Content-Type must default to message/rfc822, not text/plain — \
         body_text should be None"
    );
    assert_eq!(
        parsed.attachments.len(),
        1,
        "The message/rfc822 part should appear as an attachment"
    );
    assert_eq!(
        parsed.attachments[0].content_type, "message/rfc822",
        "RFC 2046 Section 5.1.5: default Content-Type in multipart/digest \
         is message/rfc822"
    );
}

/// RFC 2045 Section 6.8: base64 encoded data may contain whitespace
/// (line breaks) that must be ignored during decoding.
/// `SGVs\r\nbG8=` should decode to "Hello".
#[test]
fn edge_base64_body_with_embedded_whitespace() {
    let raw = b"From: sender@example.com\r\n\
                 To: recipient@example.com\r\n\
                 Subject: Test\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Content-Type: text/plain; charset=utf-8\r\n\
                 Content-Transfer-Encoding: base64\r\n\
                 \r\n\
                 SGVs\r\nbG8=";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Hello"),
        "RFC 2045 Section 6.8: whitespace in base64 body must be ignored \
         during decoding"
    );
}

// -----------------------------------------------------------------------
// Regression: Subject trim_start must not strip RFC 2047 encoded whitespace
// -----------------------------------------------------------------------

/// RFC 2047 Section 6.2 / RFC 5322 Section 3.6.5: leading whitespace
/// that is encoded inside an RFC 2047 encoded-word in the Subject header
/// is intentional content, not a fold artifact.  `trim_start()` after
/// decoding strips it, losing data.
///
/// `=?UTF-8?B?IEhlbGxv?=` encodes " Hello" (leading space + "Hello").
/// The decoded subject must be " Hello", not "Hello".
#[test]
fn regression_subject_trim_preserves_rfc2047_encoded_leading_space() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: =?UTF-8?B?IEhlbGxv?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some(" Hello"),
        "RFC 2047 Section 6.2: leading whitespace encoded inside an \
         encoded-word is intentional content, not a fold artifact — \
         trim_start() must not strip it"
    );
}

/// Same issue: a Subject that is entirely encoded whitespace
/// (`=?UTF-8?B?IA==?=` encodes a single space " ") must not be reduced
/// to an empty string.
#[test]
fn regression_subject_trim_preserves_encoded_whitespace_only() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: =?UTF-8?B?IA==?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some(" "),
        "RFC 2047 Section 6.2: a Subject consisting entirely of encoded \
         whitespace must not be stripped to empty"
    );
}

// -----------------------------------------------------------------------
// Regression: RFC 2231 Section 5 language tags in encoded-word charsets
// -----------------------------------------------------------------------

/// RFC 2231 Section 5 allows a language tag after the charset in an
/// RFC 2047 encoded-word: `=?charset*language?encoding?text?=`.
/// The `*language` suffix must be stripped before charset lookup,
/// otherwise non-UTF-8 charsets like ISO-8859-1 fail to decode.
#[test]
fn regression_rfc2231_language_tag_iso8859() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: =?ISO-8859-1*de?Q?Pr=FCfung?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Pr\u{00FC}fung"),
        "RFC 2231 Section 5: language tag in charset field must be \
         stripped before charset lookup — ISO-8859-1*de should decode \
         as ISO-8859-1"
    );
}

/// RFC 2231 Section 5: UTF-8 with a language tag should still decode
/// correctly (this already worked by accident since `encoding_rs` falls
/// back to UTF-8 for unknown labels, but we test it explicitly).
#[test]
fn regression_rfc2231_language_tag_utf8() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: =?UTF-8*en?B?SGVsbG8gV29ybGQ=?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello World"),
        "RFC 2231 Section 5: UTF-8*en should decode as UTF-8"
    );
}

/// RFC 2231 Section 5: an empty language tag (charset followed by `*`
/// with nothing after it) must also be handled — the charset is the
/// part before the `*`.
#[test]
fn regression_rfc2231_empty_language_tag() {
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject: =?ISO-8859-1*?Q?Pr=FCfung?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Pr\u{00FC}fung"),
        "RFC 2231 Section 5: empty language tag (ISO-8859-1*) must \
         still decode as ISO-8859-1"
    );
}

/// The fix must still strip fold-artifact whitespace: when the builder
/// folds before the first encoded-word, unfolding produces a leading
/// space that is NOT part of the decoded content.
#[test]
fn regression_subject_trim_still_strips_fold_artifact() {
    // Simulate a folded Subject: the raw header value after unfolding
    // has a leading space from the continuation line.
    let raw = b"From: a@b.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Subject:\r\n =?UTF-8?B?SGVsbG8=?=\r\n\
                 \r\n\
                 body";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello"),
        "Fold-artifact whitespace before an encoded-word must still be \
         stripped (RFC 5322 Section 2.2.3)"
    );
}

/// Regression: some non-conformant mailers wrap disposition-type in
/// double-quotes (e.g., `"attachment"` instead of `attachment`).
/// RFC 2183 Section 2 defines disposition-type as a bare token, but
/// per Postel's law we should tolerate quoted values.
#[test]
fn is_disposition_type_quoted() {
    // Quoted disposition-type with parameters.
    assert!(
        is_disposition_type("\"attachment\"; filename=\"test.txt\"", "attachment"),
        "quoted attachment with params should match"
    );
    // Quoted disposition-type without parameters.
    assert!(
        is_disposition_type("\"inline\"", "inline"),
        "quoted inline should match"
    );
    // Ensure unquoted tokens still work.
    assert!(
        is_disposition_type("attachment; filename=\"test.txt\"", "attachment"),
        "unquoted attachment should still match"
    );
    assert!(
        is_disposition_type("inline", "inline"),
        "unquoted inline should still match"
    );
}

/// RFC 2183 Section 2 uses a structured field body, so RFC 5322 Section
/// 3.2.2 comments may appear before the disposition token as CFWS.
/// The parser must still classify the part as an attachment.
#[test]
fn parse_email_attachment_with_leading_disposition_comment() {
    let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-Disposition: (saved copy) attachment; filename=\"notes.txt\"\r\n\
\r\n\
Quarterly report\r\n";

    let parsed = parse_email(raw).expect("message with attachment comment must parse");

    assert!(
        parsed.body_text.is_none(),
        "Content-Disposition CFWS before attachment must not demote the part to body text"
    );
    assert_eq!(
        parsed.attachments.len(),
        1,
        "attachment with leading disposition comment must still be detected"
    );
    assert_eq!(
        parsed.attachments[0].filename.as_deref(),
        Some("notes.txt"),
        "filename parameter must still be preserved"
    );
    assert!(
        !parsed.attachments[0].is_inline,
        "attachment disposition must not be treated as inline"
    );
}

/// RFC 2183 Section 2.10 allows `Content-Disposition` on the top-level
/// message. Consumers using `parse_headers_only()` still need that header
/// preserved, so it must remain in `extra_headers` even though the parser
/// also interprets it for body classification.
#[test]
fn parse_headers_only_preserves_top_level_content_disposition() {
    let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-Disposition: attachment; filename=\"message.eml\"\r\n\
\r\n\
Quarterly report\r\n";

    let parsed =
        parse_headers_only(raw).expect("message with top-level Content-Disposition must parse");

    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| {
                name == "content-disposition"
                    && value == "attachment; filename=\"message.eml\""
            }),
        "top-level Content-Disposition must be preserved in extra_headers for header-only consumers"
    );
}

/// RFC 2045 Section 7 allows `Content-ID` on any MIME entity. Consumers
/// using `parse_headers_only()` still need that identifier preserved on the
/// top-level message rather than silently dropped.
#[test]
fn parse_headers_only_preserves_top_level_content_id() {
    let raw = b"From: sender@example.com\r\n\
Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
MIME-Version: 1.0\r\n\
Content-Type: text/plain; charset=us-ascii\r\n\
Content-ID: <top-level@example.com>\r\n\
\r\n\
Quarterly report\r\n";

    let parsed = parse_headers_only(raw).expect("message with top-level Content-ID must parse");

    assert!(
        parsed
            .extra_headers
            .iter()
            .any(|(name, value)| name == "content-id" && value == "<top-level@example.com>"),
        "top-level Content-ID must be preserved in extra_headers for header-only consumers"
    );
}

/// Regression: a bare addr-spec with a quoted local-part containing `@`
/// (e.g., `"user@internal"@example.com`) was misparsed because the simple
/// `contains('@')` check found the `@` inside the quoted-string rather
/// than the structural one separating local-part from domain.
/// RFC 5322 Section 3.4.1: `addr-spec = local-part "@" domain`,
/// where `local-part = dot-atom / quoted-string`.
#[test]
fn regression_quoted_local_part_with_at_in_bare_address() {
    // Bare address with quoted local-part containing '@'
    let bare = parse_single_address("\"user@internal\"@example.com");
    assert!(bare.is_some(), "bare quoted-local-part addr should parse");
    let bare = bare.unwrap();
    assert_eq!(
        bare.email, "\"user@internal\"@example.com",
        "email field must contain the full addr-spec including quotes"
    );
    assert!(
        bare.name.is_none(),
        "bare address should have no display name"
    );

    // Angle-bracketed form — this should already work
    let angle = parse_single_address("<\"user@internal\"@example.com>");
    assert!(
        angle.is_some(),
        "angle-bracketed quoted-local-part addr should parse"
    );
    let angle = angle.unwrap();
    assert_eq!(
        angle.email, "\"user@internal\"@example.com",
        "angle-bracketed form must produce the same addr-spec"
    );
    assert!(
        angle.name.is_none(),
        "no display name in angle-bracket-only form"
    );

    // Quoted local-part with '@' but NO domain part — this is NOT a valid
    // addr-spec. The '@' is inside the quoted-string and does not serve
    // as the structural delimiter (RFC 5322 Section 3.4.1).
    let not_addr = parse_single_address("\"user@internal\"");
    assert!(
        not_addr.is_none(),
        "a quoted-string with '@' but no domain must not be treated as an addr-spec"
    );

    // Bare address with quoted local-part containing '@' plus a trailing
    // comment — must correctly extract the display name from the comment
    // and the full addr-spec (RFC 5322 Section 3.2.2).
    let with_comment = parse_single_address("\"user@internal\"@example.com (Display Name)");
    assert!(
        with_comment.is_some(),
        "quoted-local with trailing comment should parse"
    );
    let with_comment = with_comment.unwrap();
    assert_eq!(
        with_comment.email, "\"user@internal\"@example.com",
        "email field must be the full addr-spec"
    );
    assert_eq!(
        with_comment.name.as_deref(),
        Some("Display Name"),
        "display name must be extracted from trailing comment"
    );
}

/// Regression test: an unquoted Content-Type parameter value that was folded
/// mid-token (RFC 5322 Section 2.2.3) gets a space where the fold was after
/// header unfolding. The parser must reassemble the token by stripping the
/// fold-introduced whitespace, since RFC 2045 Section 5.1 forbids spaces
/// within tokens.
#[test]
fn extract_param_folded_mid_token_charset() {
    // After header unfolding, `charset=ut\r\n f-8` becomes `charset=ut f-8`.
    // The parser must recognise that the space is a folding artifact and
    // return "utf-8", not "ut".
    let header_value = "text/plain; charset=ut f-8";
    let result = extract_param(header_value, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "folded mid-token value must be reassembled (RFC 5322 Section 2.2.3, \
         RFC 2045 Section 5.1)"
    );
}

/// RFC 5322 Section 2.2.3 unfolding plus RFC 2045 Section 5.1 token syntax:
/// a fold may split an unquoted MIME token even when the continuation
/// fragment is alphabetic-only. The parser must still reassemble the token.
#[test]
fn extract_param_folded_mid_token_boundary_all_alpha() {
    let header_value = "multipart/mixed; boundary=abc def";
    let result = extract_param(header_value, "boundary");
    assert_eq!(
        result.as_deref(),
        Some("abcdef"),
        "alphabetic folded boundary token must be reassembled after unfolding"
    );
}

/// RFC 2045 Section 5.1 inherits RFC 5322 CFWS, so a parameter name may
/// follow a comment without an extra literal space, e.g. `;(note)charset=`.
/// The scanner must treat the comment as a separator rather than requiring
/// the immediately preceding byte to be `;`, SP, or HTAB.
#[test]
fn extract_param_accepts_comment_only_cfws_before_name() {
    let header_value = "text/plain;(note)charset=utf-8";
    let result = extract_param(header_value, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "comment-only CFWS before a parameter name must still count as a valid boundary"
    );
}

/// When an unquoted parameter value is followed by whitespace and a bare
/// word that is NOT a parameter name (no `=`), the fold-concatenation
/// heuristic must NOT merge the bare word into the value.
///
/// RFC 2045 Section 5.1: unquoted parameter values are tokens delimited
/// by `;` or whitespace.  A bare word like `unexpected` after the value
/// is stray text, not a fold continuation.
#[test]
fn extract_param_no_false_fold_concat() {
    let ct = "text/plain; charset=utf-8 unexpected; name=test.txt";
    let result = extract_param(ct, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "should not concatenate 'unexpected' with 'utf-8'"
    );
}

/// MIME parameters are tokens optionally surrounded by CFWS; trailing
/// comments are not part of the parameter value.
///
/// RFC 2045 Section 5.1 uses RFC 822-style parameter syntax, and RFC 5322
/// Section 3.2.2 defines comments as CFWS. The parser must stop at the
/// token boundary instead of absorbing `(comment)` into the value.
#[test]
fn extract_param_does_not_absorb_trailing_comment() {
    let ct = "text/plain; charset=utf-8 (comment); name=test.txt";
    let result = extract_param(ct, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "trailing comment must not be absorbed into MIME token value"
    );
}

/// Regression test: `extract_param` must not match a parameter name that
/// appears inside a CFWS comment (RFC 5322 Section 3.2.2).
///
/// RFC 2045 Section 5.1 inherits CFWS from RFC 822.  A parenthesized
/// comment like `(charset=iso-8859-1)` should be ignored during
/// parameter extraction.
///
/// Before the fix, `extract_param` lacked an `is_inside_comment` check,
/// so it would match parameter names inside comments when preceded by SP.
///
/// # References
/// - RFC 2045 Section 5.1 (Content-Type parameter syntax)
/// - RFC 5322 Section 3.2.2 (CFWS / comment)
#[test]
fn extract_param_ignores_comment_content() {
    // The comment `( charset=iso-8859-1)` contains a false match.
    // The real parameter is `charset=utf-8` after the comment.
    let ct = "text/plain; ( charset=iso-8859-1) charset=utf-8";
    let result = extract_param(ct, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "must not match 'charset' inside a parenthesized comment"
    );
}

/// Regression test: nested comments should also be skipped.
///
/// # References
/// - RFC 5322 Section 3.2.2 (nested comments)
#[test]
fn extract_param_ignores_nested_comment() {
    let ct = "text/plain; (outer (charset=iso) comment) charset=utf-8";
    let result = extract_param(ct, "charset");
    assert_eq!(
        result.as_deref(),
        Some("utf-8"),
        "must not match 'charset' inside a nested comment"
    );
}

/// Exercises the full `parse_email` pipeline with a folded charset that
/// affects body decoding. Uses windows-1252 so incorrect charset ("windows"
/// instead of "windows-1252") would produce garbled output for non-ASCII
/// bytes.
#[test]
fn parse_email_folded_charset_mid_token() {
    // windows-1252 byte 0x93 = left double quotation mark U+201C
    // If the parser truncates the charset to "windows" (before the fold-
    // introduced space), encoding_rs won't recognise it and will fall back
    // to UTF-8, producing a replacement character U+FFFD instead.
    let raw: &[u8] = b"From: test@example.com\r\n\
        Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
        Content-Type: text/plain; charset=windows\r\n \
        -1252\r\n\
        \r\n\
        \x93Hello\x94";
    let parsed = parse_email(raw).unwrap();
    let body = parsed.body_text.unwrap();
    assert!(
        body.contains('\u{201C}') && body.contains('\u{201D}'),
        "body must be decoded with reassembled charset windows-1252, got: {body:?}"
    );
}

/// Postel's law for MIME parameters must also tolerate a token folded
/// across more than one continuation line. The parser currently
/// reassembles only one fold fragment, which truncates charsets such as
/// `windows-1252` when they are folded twice.
#[test]
fn parse_email_folded_charset_multiple_mid_token_folds() {
    let raw: &[u8] = b"From: test@example.com\r\n\
        Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
        Content-Type: text/plain; charset=win\r\n \
        dows-\r\n \
        1252\r\n\
        \r\n\
        \x93Hello\x94";
    let parsed = parse_email(raw).unwrap();
    let body = parsed.body_text.unwrap();
    assert!(
        body.contains('\u{201C}') && body.contains('\u{201D}'),
        "body must be decoded with fully reassembled charset windows-1252, \
         got: {body:?}"
    );
}

/// Regression test: alphabetic continuation fragments can still be valid
/// folded MIME token continuations when the previous fragment ends with a
/// non-alphabetic token character such as `-`.
///
/// RFC 2045 Section 5.1 forbids spaces inside tokens, so the unfolded
/// space here can only be a fold artifact (RFC 5322 Section 2.2.3). The
/// parser must reassemble `koi8-` + `r` into `koi8-r`, not truncate at the
/// fold boundary.
#[test]
fn parse_email_folded_charset_alpha_tail_after_hyphen() {
    let (encoded, _, _) = encoding_rs::KOI8_R.encode("Привет");

    let mut raw = b"From: test@example.com\r\n\
        Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
        Content-Type: text/plain; charset=koi8-\r\n \
        r\r\n\
        \r\n"
        .to_vec();
    raw.extend_from_slice(&encoded);

    let parsed = parse_email(&raw).unwrap();
    assert_eq!(
        parsed.body_text.as_deref(),
        Some("Привет"),
        "body must be decoded with reassembled charset koi8-r"
    );
}

#[test]
fn encoded_word_rejects_control_chars_in_encoded_text() {
    // RFC 2047 Section 2: encoded-text must be printable ASCII (33-126)
    // minus '?'. A tab (0x09) in encoded-text should cause the encoded
    // word to be rejected, falling back to literal text.
    let input = b"From: test@example.com\r\nSubject: =?utf-8?B?\tSGVsbG8=?=\r\n\r\n";
    let parsed = parse_email(input).unwrap();
    // The encoded word should NOT be decoded — it should appear as literal text
    assert!(
        parsed.subject.as_deref() != Some("Hello"),
        "encoded word with tab in encoded-text must be rejected (RFC 2047 Section 2), \
         got subject: {:?}",
        parsed.subject
    );
}

#[test]
fn encoded_word_rejects_high_bytes_in_encoded_text() {
    // RFC 2047 Section 2: encoded-text is printable ASCII only (33-126).
    // Byte 0x80 is outside that range.
    let input = b"From: test@example.com\r\nSubject: =?utf-8?B?\x80SGVsbG8=?=\r\n\r\n";
    let parsed = parse_email(input).unwrap();
    assert!(
        parsed.subject.as_deref() != Some("Hello"),
        "encoded word with high byte in encoded-text must be rejected (RFC 2047 Section 2), \
         got subject: {:?}",
        parsed.subject
    );
}

/// Regression test: an RFC 2047 B-encoded word that was split by header
/// folding (RFC 5322 Section 2.2.3) must still be decoded.
///
/// After unfolding, the CRLF is removed but the continuation line's
/// leading whitespace is preserved inside the encoded-text.  The decoder
/// must strip fold-introduced whitespace from B-encoded text before
/// base64 decoding, just as it already does for body base64 decoding.
///
/// # References
/// - RFC 2047 Section 2 (encoded-word structure)
/// - RFC 5322 Section 2.2.3 (header unfolding)
#[test]
fn encoded_word_folded_b_encoding() {
    // "Hello" = base64 "SGVsbG8=" split across fold: "SGVs" + "\r\n " + "bG8="
    let input = b"From: test@example.com\r\nSubject: =?UTF-8?B?SGVs\r\n bG8=?=\r\n\r\nbody";
    let parsed = parse_email(input).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello"),
        "B-encoded word split by header folding must decode to 'Hello'"
    );
}

/// Regression test: Q-encoded word split by header folding must also
/// decode correctly.
///
/// # References
/// - RFC 2047 Section 4.2 (Q encoding)
/// - RFC 5322 Section 2.2.3 (header unfolding)
#[test]
fn encoded_word_folded_q_encoding() {
    // "Hello" in Q encoding: "Hel" + "\r\n " + "lo"
    let input = b"From: test@example.com\r\nSubject: =?UTF-8?Q?Hel\r\n lo?=\r\n\r\nbody";
    let parsed = parse_email(input).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello"),
        "Q-encoded word split by header folding must decode to 'Hello'"
    );
}

/// RFC 5322 Section 2.2.3 allows folded header lines to continue with
/// either SP or HTAB. RFC 2047 decoders therefore need to tolerate a
/// fold-introduced HTAB inside an encoded-word and strip it before
/// decoding, just as the SP variant is already handled.
#[test]
fn encoded_word_folded_b_encoding_with_tab() {
    // "Hello" = base64 "SGVsbG8=" split across fold: "SGVs" + "\r\n\t" + "bG8="
    let input = b"From: test@example.com\r\nSubject: =?UTF-8?B?SGVs\r\n\tbG8=?=\r\n\r\nbody";
    let parsed = parse_email(input).unwrap();
    assert_eq!(
        parsed.subject.as_deref(),
        Some("Hello"),
        "B-encoded word split by tab folding must decode to 'Hello'"
    );
}

// ========================================================================
// Fuzz regression tests
// ========================================================================

/// Regression: `parse_timezone` used fixed byte-offset slicing (`s[1..3]`,
/// `s[3..5]`) to extract hours and minutes from a timezone string like
/// `+0530`. When the input contained multi-byte UTF-8 characters (e.g.,
/// U+FFFD replacement chars from `from_utf8_lossy` on non-ASCII header
/// bytes), these byte offsets could land mid-character, causing a panic.
///
/// Fixed by using `s.get(1..3)` / `s.get(3..5)` which return `None`
/// on non-char-boundary indices instead of panicking.
///
/// Discovered by cargo-fuzz on the `parse_email` target.
#[test]
fn fuzz_parse_timezone_non_ascii_no_panic() {
    // Minimized crash input: Date header value ends with +0\xff\xff
    // which becomes +0\u{FFFD}\u{FFFD} after lossy UTF-8 conversion.
    let input: &[u8] = &[
        b'F', b'r', b'o', b'm', b':', b' ', b'a', b'@', b'b', b'\n', b'D', b'a', b't', b'e', b':',
        b' ', b'T', b'h', b'u', b',', b' ', b'3', b' ', b'F', b'e', b'b', b' ', b'2', b'0', b'2',
        b'5', b' ', b'1', b'5', b':', b'4', b'3', b' ', b'+', b'0', 0xFF, 0xFF,
    ];
    // Must not panic — should return Ok or Err, never abort.
    let _ = parse_email(input);
}

// ========================================================================
// Property-based invariant tests
// ========================================================================

mod prop_invariants {
    use super::*;
    use proptest::prelude::*;

    proptest! {
        #![proptest_config(ProptestConfig::with_cases(1000))]

        /// Email parser never panics on arbitrary input (Postel's law).
        #[test]
        fn parse_email_never_panics(data in prop::collection::vec(any::<u8>(), 0..1000)) {
            let _ = parse_email(&data);
        }

        /// Headers-only parser never panics on arbitrary input.
        #[test]
        fn parse_headers_only_never_panics(data in prop::collection::vec(any::<u8>(), 0..1000)) {
            let _ = parse_headers_only(&data);
        }

        /// RFC 2047 decoder never panics on arbitrary UTF-8 input.
        #[test]
        fn decode_encoded_words_never_panics(s in ".*") {
            let _ = decode_encoded_words(&s);
        }

        /// Date parser never panics on arbitrary UTF-8 input.
        #[test]
        fn parse_date_never_panics(s in ".*") {
            let _ = parse_rfc5322_date(&s);
        }

        /// Parsed email size matches input length.
        ///
        /// RFC 5322: the `size` field must reflect the raw input byte count
        /// so callers can trust it for quota checks, progress bars, etc.
        #[test]
        fn parsed_size_matches_input(data in prop::collection::vec(any::<u8>(), 0..1000)) {
            if let Ok(parsed) = parse_email(&data) {
                prop_assert_eq!(
                    parsed.size,
                    data.len() as u64,
                    "ParsedEmail.size must match input byte count"
                );
            }
        }

        /// Regression: `extract_param` must not match parameters inside
        /// RFC 5322 Section 3.2.2 comments.
        ///
        /// Bug 823a19e: `extract_param` found `charset` inside
        /// `(charset=fake)` comment.
        ///
        /// # References
        /// - RFC 2045 Section 5.1 (Content-Type parameter syntax)
        /// - RFC 5322 Section 3.2.2 (CFWS / comment)
        #[test]
        fn extract_param_ignores_comments(
            real_value in "[a-z][a-z0-9-]{0,15}",
            decoy_value in "[a-z][a-z0-9-]{0,15}",
        ) {
            let ct = format!("text/plain; (charset={decoy_value}) charset={real_value}");
            let result = extract_param(&ct, "charset");
            prop_assert_eq!(
                result.as_deref(),
                Some(real_value.as_str()),
                "must extract real value, not value inside comment, for: {:?}",
                ct
            );
        }

        /// Regression: backslash in unquoted parameter values must be literal.
        ///
        /// Bug 86922a5: backslash was treated as escape outside quoted strings.
        ///
        /// # References
        /// - RFC 2045 Section 5.1 (Content-Type parameter values)
        #[test]
        fn unquoted_param_preserves_backslash(
            prefix in "[a-z]{1,5}",
            suffix in "[a-z]{1,5}",
        ) {
            let ct = format!("application/octet-stream; filename={prefix}\\{suffix}.txt");
            let result = extract_param(&ct, "filename");
            if let Some(val) = result {
                prop_assert!(
                    val.contains('\\'),
                    "backslash in unquoted value must be preserved: got {:?} from {:?}",
                    val, ct
                );
            }
        }
    }
}

/// Tests that parsers terminate in bounded time on adversarial inputs.
/// These catch infinite loops, excessive recursion, and algorithmic
/// complexity attacks that wouldn't be caught by single-call property tests.
mod stuck_tests {
    use super::*;
    use std::time::{Duration, Instant};

    /// Maximum time any single parse call should take.
    /// If it exceeds this, the parser is effectively "stuck".
    const MAX_PARSE_TIME: Duration = Duration::from_secs(5);

    fn assert_terminates<F: FnOnce()>(name: &str, f: F) {
        let start = Instant::now();
        f();
        let elapsed = start.elapsed();
        assert!(
            elapsed < MAX_PARSE_TIME,
            "{name} took {elapsed:?}, exceeds {MAX_PARSE_TIME:?} — parser may be stuck"
        );
    }

    #[test]
    fn parse_email_repeated_bytes_100kb() {
        let data = vec![0xFFu8; 100_000];
        assert_terminates("100KB 0xFF", || {
            let _ = parse_email(&data);
        });
    }

    #[test]
    fn parse_email_deeply_nested_multipart() {
        // 100 levels of nested multipart — tests recursion depth limits
        let mut data = Vec::new();
        let boundaries: Vec<String> = (0..100).map(|i| format!("boundary{i:03}")).collect();

        // Build headers
        data.extend(
            b"From: a@b.com\r\nContent-Type: multipart/mixed; boundary=\"boundary000\"\r\n\r\n",
        );

        // Nest 100 levels deep
        for i in 0..99 {
            data.extend(format!("--{}\r\n", boundaries[i]).as_bytes());
            data.extend(
                format!(
                    "Content-Type: multipart/mixed; boundary=\"{}\"\r\n\r\n",
                    boundaries[i + 1]
                )
                .as_bytes(),
            );
        }
        // Innermost part
        data.extend(format!("--{}\r\n", boundaries[99]).as_bytes());
        data.extend(b"Content-Type: text/plain\r\n\r\nHello\r\n");
        data.extend(format!("--{}--\r\n", boundaries[99]).as_bytes());

        // Close all boundaries
        for i in (0..99).rev() {
            data.extend(format!("--{}--\r\n", boundaries[i]).as_bytes());
        }

        assert_terminates("100-deep nested multipart", || {
            let _ = parse_email(&data);
        });
    }

    #[test]
    fn parse_email_huge_header_count() {
        // 10,000 headers — tests that header parsing is linear
        let mut data = Vec::new();
        data.extend(b"From: a@b.com\r\n");
        for i in 0..10_000 {
            data.extend(format!("X-Header-{i}: value{i}\r\n").as_bytes());
        }
        data.extend(b"\r\nBody\r\n");
        assert_terminates("10K headers", || {
            let _ = parse_email(&data);
        });
    }

    #[test]
    fn parse_email_long_header_value() {
        // Single header with 1MB value — tests that folded line processing is linear
        let mut data = Vec::new();
        data.extend(b"From: a@b.com\r\nSubject: ");
        // 1MB of 'A' characters with folding every 76 chars
        for chunk in 0..14_000 {
            if chunk > 0 {
                data.extend(b"\r\n ");
            }
            data.extend(vec![b'A'; 76]);
        }
        data.extend(b"\r\n\r\nBody\r\n");
        assert_terminates("1MB folded header", || {
            let _ = parse_email(&data);
        });
    }

    #[test]
    fn decode_encoded_words_pathological_pattern() {
        // Many encoded-word-like patterns that fail to decode —
        // tests that the decoder doesn't backtrack quadratically.
        let mut input = String::new();
        for i in 0..10_000 {
            use std::fmt::Write;
            let _ = write!(input, "=?UTF-8?X?invalid{i}?= ");
        }
        assert_terminates("10K invalid encoded words", || {
            let _ = decode_encoded_words(&input);
        });
    }

    #[test]
    fn parse_headers_non_ascii_name_skipped() {
        // RFC 5322 Section 2.2: header field names are ASCII `ftext`.
        // RFC 6532 extends field bodies, not field names, so malformed
        // UTF-8 names must not be parsed as real headers.
        let input = "X-\u{0130}: value\r\n\r\n";
        let headers = parse_headers(input.as_bytes());
        assert!(headers.is_empty(), "non-ASCII header names must be skipped");
    }

    #[test]
    fn parse_year_four_digit_low_value_not_obs_year() {
        // RFC 5322 Section 4.3: obs-year rules only apply to 2-digit and
        // 3-digit year strings. A 4-digit "0050" must parse as year 50,
        // not year 1950, to allow round-tripping via to_rfc5322_string().
        assert_eq!(parse_year("0050"), Some(50));
        assert_eq!(parse_year("0000"), Some(0));
        assert_eq!(parse_year("0500"), Some(500));
        // Existing 2-digit behavior preserved
        assert_eq!(parse_year("50"), Some(1950));
        assert_eq!(parse_year("99"), Some(1999));
        assert_eq!(parse_year("00"), Some(2000));
        assert_eq!(parse_year("49"), Some(2049));
        // Existing 3-digit behavior preserved
        assert_eq!(parse_year("100"), Some(2000));
        // Existing 4-digit behavior preserved
        assert_eq!(parse_year("2025"), Some(2025));
    }

    #[test]
    fn parse_date_repeated_whitespace() {
        // Date with enormous amounts of whitespace — tests strip_comments
        // and split_whitespace performance.
        let mut input = String::from("Thu, 13 Feb 2025 15:47:33 +0000");
        for _ in 0..100_000 {
            input.push(' ');
        }
        assert_terminates("100K trailing spaces in date", || {
            let _ = parse_rfc5322_date(&input);
        });
    }
}

#[test]
fn duplicate_in_reply_to_headers_concatenated() {
    // RFC 5322 Section 3.6.4 + Postel's law: broken mailers may
    // emit duplicate In-Reply-To headers. Concatenate message-IDs
    // from all occurrences, matching treatment of address headers.
    let raw = b"From: a@b.com\r\n\
                In-Reply-To: <id1@host>\r\n\
                In-Reply-To: <id2@host>\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.in_reply_to, vec!["id1@host", "id2@host"]);
}

#[test]
fn duplicate_references_headers_concatenated() {
    // RFC 5322 Section 3.6.4 + Postel's law: broken mailers may
    // emit duplicate References headers. Concatenate message-IDs
    // from all occurrences, matching treatment of address headers.
    let raw = b"From: a@b.com\r\n\
                References: <ref1@host>\r\n\
                References: <ref2@host>\r\n\
                Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
    let parsed = parse_email(raw).unwrap();
    assert_eq!(parsed.references, vec!["ref1@host", "ref2@host"]);
}

#[test]
fn extract_rfc2231_param_skips_comment_match() {
    // RFC 5322 Section 3.2.2: parameter names inside parenthesized
    // comments must be ignored during extraction.
    // Use a space before the param name inside the comment so it
    // passes the is_param_boundary check (requires ; or whitespace).
    let header = "attachment; ( filename*=UTF-8''wrong.pdf) filename*=UTF-8''correct.pdf";
    let result = extract_rfc2231_param(header, "filename");
    assert_eq!(result.as_deref(), Some("correct.pdf"));
}

#[test]
fn find_param_value_skips_comment_match() {
    // RFC 5322 Section 3.2.2: parameter names inside parenthesized
    // comments must be ignored during extraction.
    // Use a space before the param name inside the comment so it
    // passes the is_param_boundary check (requires ; or whitespace).
    let header = "attachment; ( filename*0=\"wrong\") filename*0=\"correct\"";
    let lower = header.to_ascii_lowercase();
    let result = find_param_value(&lower, header, "filename*0=");
    assert_eq!(result.as_deref(), Some("correct"));
}

/// MSG-001: Parser must accept dates with incorrect day-of-week per Postel's
/// law (RFC 1122 Section 1.2.2). The day-of-week MUST requirement in
/// RFC 5322 Section 3.3 is a generation constraint; real-world mailers
/// frequently compute the wrong weekday.
#[test]
fn regression_msg001_date_accepts_wrong_day_of_week() {
    // 2025-02-13 is Thursday, not Wednesday. The parser must still accept
    // this and return the correct numeric date fields.
    let result = parse_rfc5322_date("Wed, 13 Feb 2025 15:47:33 +0000");
    assert!(
        result.is_some(),
        "Parser must accept dates with incorrect day-of-week (Postel's law)"
    );
    let dt = result.unwrap();
    assert_eq!(dt.year, 2025);
    assert_eq!(dt.month, 2);
    assert_eq!(dt.day, 13);
    assert_eq!(dt.hour, 15);
    assert_eq!(dt.minute, 47);
    assert_eq!(dt.second, 33);
    assert_eq!(dt.tz_offset_minutes, 0);
}

/// MSG-019: Parser must decode overlong encoded words per Postel's law
/// (RFC 1122 Section 1.2.2). The 75-character limit in RFC 2047 Section 2
/// is a generation constraint; real-world mailers (Thunderbird, Asian-locale
/// clients) frequently produce overlong encoded words.
#[test]
fn regression_msg019_overlong_encoded_word_decoded() {
    // Encoded word is 116 chars (> 75 limit). Decodes to:
    // "This is a long UTF-8 subject that will exceed seventy-five characters easily"
    let encoded_word = "=?UTF-8?B?VGhpcyBpcyBhIGxvbmcgVVRGLTggc3ViamVjdCB0aGF0IHdpbGwgZXhjZWVkIHNldmVudHktZml2ZSBjaGFyYWN0ZXJzIGVhc2lseQ==?=";
    assert!(
        encoded_word.len() > 75,
        "Test setup: encoded word must be > 75 chars, got {}",
        encoded_word.len()
    );

    let decoded = decode_encoded_words(encoded_word);
    assert_eq!(
        decoded, "This is a long UTF-8 subject that will exceed seventy-five characters easily",
        "Overlong encoded words must be decoded (Postel's law, RFC 2047 Section 6.1)"
    );
}

/// MSG-100: Resent address headers (`Resent-From`, `Resent-Sender`,
/// `Resent-To`, `Resent-Cc`, `Resent-Bcc`, `Resent-Reply-To`) use the
/// same address-list / mailbox syntax as their non-Resent counterparts
/// (RFC 5322 Section 3.6.6). RFC 2047 Section 5 rule (3) permits
/// encoded-words in `phrase` productions (i.e., display names), so the
/// parser must decode them rather than preserving raw `=?...?=` sequences.
#[test]
fn regression_msg100_resent_address_headers_decode_display_names() {
    // "äö" encoded as UTF-8 base64 = "w6TDtg=="
    let raw = b"From: sender@example.com\r\n\
                 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
                 Resent-From: =?UTF-8?B?w6TDtg==?= <user@example.com>\r\n\
                 Resent-Sender: =?UTF-8?B?w6TDtg==?= <sender2@example.com>\r\n\
                 Resent-To: =?UTF-8?B?w6TDtg==?= <to@example.com>\r\n\
                 Resent-Cc: =?UTF-8?B?w6TDtg==?= <cc@example.com>\r\n\
                 Resent-Bcc: =?UTF-8?B?w6TDtg==?= <bcc@example.com>\r\n\
                 Resent-Reply-To: =?UTF-8?B?w6TDtg==?= <reply@example.com>\r\n\
                 \r\n\
                 body";

    let parsed = parse_email(raw).unwrap();

    for header_name in &[
        "resent-from",
        "resent-sender",
        "resent-to",
        "resent-cc",
        "resent-bcc",
        "resent-reply-to",
    ] {
        let (_, value) = parsed
            .extra_headers
            .iter()
            .find(|(k, _)| k == header_name)
            .unwrap_or_else(|| panic!("{header_name} header must be present in extra_headers"));
        assert!(
            value.contains("\u{e4}\u{f6}"),
            "{header_name} display name must be RFC 2047 decoded to 'äö'; got: {value:?}"
        );
        assert!(
            !value.contains("=?UTF-8?B?"),
            "{header_name} must not contain raw encoded-word syntax; got: {value:?}"
        );
    }
}

/// MSG-119: RFC 2045 Section 6.7 Rule #3 — trailing literal SP/HTAB before
/// a hard line break (CRLF) must be stripped during QP decoding because the
/// encoder MUST NOT represent trailing whitespace at the end of an encoded
/// line. Transport gateways may add trailing whitespace, so the decoder
/// must remove it. Hex-encoded whitespace (`=20`, `=09`) is intentionally
/// preserved because it was explicitly encoded by the sender.
#[test]
fn regression_msg119_qp_strip_trailing_whitespace_before_hard_break() {
    // 1. Trailing literal spaces before CRLF are stripped
    assert_eq!(
        decode_quoted_printable(b"hello   \r\nworld"),
        b"hello\r\nworld",
        "trailing literal spaces before CRLF must be stripped (RFC 2045 Section 6.7 Rule #3)"
    );

    // 2. Trailing literal tabs before CRLF are stripped
    assert_eq!(
        decode_quoted_printable(b"hello\t\t\r\nworld"),
        b"hello\r\nworld",
        "trailing literal tabs before CRLF must be stripped (RFC 2045 Section 6.7 Rule #3)"
    );

    // 3. Hex-encoded =20 before CRLF is preserved (intentionally encoded)
    assert_eq!(
        decode_quoted_printable(b"hello=20\r\nworld"),
        b"hello \r\nworld",
        "hex-encoded =20 before CRLF must be preserved (explicitly encoded by sender)"
    );

    // 4. Trailing literal spaces at end of input (no CRLF) are stripped
    assert_eq!(
        decode_quoted_printable(b"hello   "),
        b"hello",
        "trailing literal spaces at end of input must be stripped (RFC 2045 Section 6.7 Rule #3)"
    );

    // 5. Lines with only whitespace have it stripped
    assert_eq!(
        decode_quoted_printable(b"hello\r\n   \r\nworld"),
        b"hello\r\n\r\nworld",
        "lines containing only whitespace must be stripped (RFC 2045 Section 6.7 Rule #3)"
    );

    // 6. Mixed tabs and spaces before CRLF
    assert_eq!(
        decode_quoted_printable(b"hello \t \r\nworld"),
        b"hello\r\nworld",
        "mixed trailing tabs and spaces before CRLF must be stripped"
    );

    // 7. Hex-encoded =09 (tab) before CRLF is preserved
    assert_eq!(
        decode_quoted_printable(b"hello=09\r\nworld"),
        b"hello\t\r\nworld",
        "hex-encoded =09 before CRLF must be preserved (explicitly encoded by sender)"
    );
}

/// RFC 5322 Section 4.3: obsolete time syntax allows CFWS around the colon
/// separators in time-of-day:
///   obs-hour   = [CFWS] 2DIGIT [CFWS]
///   obs-minute = [CFWS] 2DIGIT [CFWS]
///   obs-second = [CFWS] 2DIGIT [CFWS]
///
/// Whitespace around colons splits the time across multiple whitespace-
/// delimited tokens, e.g. `"09 : 55 : 06"` becomes `["09", ":", "55", ":", "06"]`.
#[test]
fn parse_date_obs_time_whitespace_around_colons() {
    let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 : 55 : 06 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 6);
    assert_eq!(dt.tz_offset_minutes, -360);
    assert_eq!(dt.year, 1997);
    assert_eq!(dt.month, 11);
    assert_eq!(dt.day, 21);
}

/// Variant: whitespace only before colons, not after.
/// RFC 5322 Section 4.3 obs-time.
#[test]
fn parse_date_obs_time_whitespace_before_colons() {
    let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 :55 :06 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 6);
    assert_eq!(dt.tz_offset_minutes, -360);
}

/// Variant: whitespace only after colons, not before.
/// RFC 5322 Section 4.3 obs-time.
#[test]
fn parse_date_obs_time_whitespace_after_colons() {
    let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09: 55: 06 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 6);
    assert_eq!(dt.tz_offset_minutes, -360);
}

/// Variant: mixed whitespace placement around colons.
/// RFC 5322 Section 4.3 obs-time.
#[test]
fn parse_date_obs_time_mixed_whitespace() {
    let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 :55: 06 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 6);
    assert_eq!(dt.tz_offset_minutes, -360);
}

/// Normal time format (no whitespace around colons) must still work after
/// the obs-time fix. RFC 5322 Section 3.3.
#[test]
fn parse_date_standard_time_still_works_after_obs_fix() {
    let dt = parse_rfc5322_date("21 Nov 1997 09:55:06 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 6);
    assert_eq!(dt.tz_offset_minutes, -360);
}

/// Obs-time without seconds — RFC 5322 Section 4.3 makes obs-second
/// optional: `obs-time = obs-hour ":" obs-minute [":" obs-second]`.
#[test]
fn parse_date_obs_time_whitespace_no_seconds() {
    let dt = parse_rfc5322_date("Fri, 21 Nov 1997 09 : 55 -0600").unwrap();
    assert_eq!(dt.hour, 9);
    assert_eq!(dt.minute, 55);
    assert_eq!(dt.second, 0);
    assert_eq!(dt.tz_offset_minutes, -360);
}

/// RFC 5322 Section 4.3: "UT" and "GMT" are well-defined as +0000
/// (semantically identical to Universal Time). They must NOT be
/// conflated with unreliable military single-letter zones.
#[test]
fn parse_timezone_ut_gmt_are_well_defined_rfc5322_section_4_3() {
    // RFC 5322 Section 4.3: "'UT' and 'GMT' are indications of
    // 'Universal Time' and 'Greenwich Mean Time', respectively, and
    // are both semantically identical to '+0000'."
    assert_eq!(
        parse_timezone("UT"),
        0,
        "UT is well-defined as +0000 (RFC 5322 Section 4.3)"
    );
    assert_eq!(
        parse_timezone("GMT"),
        0,
        "GMT is well-defined as +0000 (RFC 5322 Section 4.3)"
    );
    // UTC is not in RFC 5322 but is universally understood as +0000.
    assert_eq!(parse_timezone("UTC"), 0, "UTC is well-defined as +0000");
}

/// RFC 5322 Section 4.3: "Z" is defined as +0000 (UTC) in the
/// military zone table and is universally understood. It must be
/// treated as well-defined, unlike the other single-letter zones.
#[test]
fn parse_timezone_z_is_well_defined_utc() {
    assert_eq!(
        parse_timezone("Z"),
        0,
        "Z is well-defined as +0000 (RFC 5322 Section 4.3)"
    );
}

/// RFC 5322 Section 4.3: military single-letter zones (A-I, K-Y)
/// are unreliable and "SHOULD all be considered equivalent to '-0000'".
/// They must map to 0 but are semantically distinct from the
/// well-defined UT/GMT/UTC/Z zones.
#[test]
fn parse_timezone_military_zones_are_unreliable() {
    assert_eq!(
        parse_timezone("A"),
        0,
        "military zone A is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
    );
    assert_eq!(
        parse_timezone("K"),
        0,
        "military zone K is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
    );
    assert_eq!(
        parse_timezone("N"),
        0,
        "military zone N is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
    );
    assert_eq!(
        parse_timezone("Y"),
        0,
        "military zone Y is unreliable, maps to -0000 (RFC 5322 Section 4.3)"
    );
}