email_message_wire/
rfc822.rs

1use std::borrow::Cow;
2use std::str::FromStr;
3
4use base64::Engine;
5use email_message::{
6    Address, AddressList, Attachment, AttachmentBody, Body, ContentDisposition,
7    ContentTransferEncoding, ContentType, Header, Mailbox, Message, MessageId,
8    MessageValidationError, MimePart,
9};
10use time::OffsetDateTime;
11use time::format_description::well_known::Rfc2822;
12
13#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum MessageParseError {
16    #[error("input is not valid UTF-8")]
17    InvalidUtf8,
18    #[error("invalid header line `{line}`")]
19    #[non_exhaustive]
20    InvalidHeaderLine { line: String },
21    #[error("failed to parse mailbox from `{header}` header")]
22    #[non_exhaustive]
23    MailboxHeaderParse { header: &'static str },
24    #[error("failed to parse address list from `{header}` header")]
25    #[non_exhaustive]
26    AddressHeaderParse { header: &'static str },
27    #[error("failed to parse Date header as RFC 2822 datetime")]
28    #[non_exhaustive]
29    Date {
30        #[source]
31        source: time::error::Parse,
32    },
33    #[error("failed to parse Message-ID header")]
34    #[non_exhaustive]
35    MessageId {
36        #[source]
37        source: email_message::MessageIdParseError,
38    },
39    #[error("failed to parse MIME body: {details}")]
40    #[non_exhaustive]
41    MimeBodyParse { details: String },
42}
43
44impl PartialEq for MessageParseError {
45    /// Pragmatic equality: variants compare by tag, ignoring the
46    /// boxed `source` chains on `Date` and `MessageId`. Sufficient
47    /// for tests and avoids forcing `Eq` on third-party error types.
48    fn eq(&self, other: &Self) -> bool {
49        match (self, other) {
50            (Self::InvalidUtf8, Self::InvalidUtf8)
51            | (Self::Date { .. }, Self::Date { .. })
52            | (Self::MessageId { .. }, Self::MessageId { .. }) => true,
53            (Self::InvalidHeaderLine { line: a }, Self::InvalidHeaderLine { line: b })
54            | (Self::MimeBodyParse { details: a }, Self::MimeBodyParse { details: b }) => a == b,
55            (Self::MailboxHeaderParse { header: a }, Self::MailboxHeaderParse { header: b })
56            | (Self::AddressHeaderParse { header: a }, Self::AddressHeaderParse { header: b }) => {
57                a == b
58            }
59            _ => false,
60        }
61    }
62}
63
64impl Eq for MessageParseError {}
65
66/// Maximum input byte length accepted by [`parse_rfc822`]. 16 MiB is far
67/// above any practical RFC 5322 message including base64-inflated
68/// attachments; anything larger is treated as adversarial and rejected
69/// before allocation.
70pub const MAX_INPUT_BYTES: usize = 16 * 1024 * 1024;
71
72/// Maximum nesting depth for `multipart/*` parts during inbound parse.
73/// Real-world archive formats nest at most ~10 levels; 100 leaves
74/// generous headroom while preventing stack-overflow on adversarial
75/// input with deeply-nested multipart parts.
76pub const MAX_MULTIPART_DEPTH: usize = 100;
77
78/// Maximum number of sibling parts inside a single multipart body
79/// during inbound parse. Adversarial input could otherwise produce
80/// millions of empty parts (a "fan-out bomb") at one level deep.
81pub const MAX_MULTIPART_PARTS: usize = 1024;
82
83const RFC5322_HARD_LINE_LEN: usize = 998;
84
85#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
86#[non_exhaustive]
87pub enum MessageRenderError {
88    #[error("header `{name}` contains raw newline characters")]
89    #[non_exhaustive]
90    HeaderContainsRawNewline { name: String },
91    #[error("header `{name}` contains invalid control characters")]
92    #[non_exhaustive]
93    HeaderContainsControlCharacter { name: String },
94    #[error("header `{name}` contains non-ASCII characters")]
95    #[non_exhaustive]
96    HeaderContainsNonAscii { name: String },
97    #[error("header name `{name}` is invalid")]
98    #[non_exhaustive]
99    InvalidHeaderName { name: String },
100    #[error("header `{name}` exceeds RFC 5322 hard line length limit")]
101    #[non_exhaustive]
102    HeaderLineTooLong { name: String },
103    #[error("failed to format Date header as RFC 2822 datetime")]
104    DateFormat,
105    #[error("MIME boundary cannot be empty")]
106    EmptyMimeBoundary,
107    #[error("MIME boundary contains forbidden characters")]
108    InvalidMimeBoundary,
109    #[error("multipart boundary parameter does not match part boundary")]
110    MismatchedMimeBoundary,
111    #[error("multipart parts cannot be empty")]
112    EmptyMultipartParts,
113    #[error("multipart nesting exceeds maximum depth of {MAX_MULTIPART_DEPTH}")]
114    MimeNestingTooDeep,
115    #[error("multipart part must use a multipart content type")]
116    InvalidMultipartContentType,
117    #[error("attachment body variant is not supported")]
118    UnsupportedAttachmentBody,
119    #[error("attachment content-id is invalid")]
120    InvalidContentId,
121    #[error("message body variant is not supported")]
122    UnsupportedBody,
123    #[error(transparent)]
124    MessageValidation(#[from] MessageValidationError),
125}
126
127type HeaderFields = Vec<(String, String)>;
128type RenderedPart = (HeaderFields, Vec<u8>);
129type RenderPayload = (HeaderFields, Vec<u8>, bool);
130
131/// Render-time options for [`render_rfc822_with`].
132///
133/// The struct is `#[non_exhaustive]`; future fields will be additive.
134/// Construct via [`Self::new`] or [`Self::default`] and chain
135/// `with_*` setters.
136#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
137#[non_exhaustive]
138pub struct RenderOptions {
139    /// When `true`, the rendered message includes a `Bcc:` header line
140    /// listing the message's BCC recipients. Defaults to `false`.
141    ///
142    /// Most SMTP relays strip `Bcc:` on submission anyway; rendering
143    /// the field is occasionally useful for archival, `.eml` fixtures,
144    /// or clients that consume the rendered bytes outside the SMTP
145    /// path.
146    pub include_bcc: bool,
147    /// Optional soft-fold target for header lines, in characters.
148    ///
149    /// `None` (the default) emits header lines at the RFC 5322 §2.1.1
150    /// hard limit of 998 characters with no soft folding, long values
151    /// flow on a single physical line. `Some(n)` instructs the renderer
152    /// to fold longer lines at `n` characters via the standard
153    /// folding-whitespace mechanism (CRLF + leading SP/HTAB), targeting
154    /// the SHOULD ≤ 78 recommendation when `n == 78`.
155    ///
156    /// The default is `None` because correct soft folding requires
157    /// per-header-grammar awareness (encoded-word boundaries,
158    /// address-list comma discipline, structured-header whitespace
159    /// rules) that the simple folding helper cannot guarantee in every
160    /// case. Callers who want SHOULD-compliant output for archival or
161    /// for strict legacy MTAs can opt in via `with_soft_fold(78)`; the
162    /// renderer still respects the 998 hard limit regardless.
163    pub soft_fold_at: Option<usize>,
164}
165
166impl RenderOptions {
167    #[must_use]
168    pub const fn new() -> Self {
169        Self {
170            include_bcc: false,
171            soft_fold_at: None,
172        }
173    }
174
175    #[must_use]
176    pub const fn with_include_bcc(mut self, value: bool) -> Self {
177        self.include_bcc = value;
178        self
179    }
180
181    /// Set the soft-fold target. Pass `78` for the RFC 5322 §2.1.1
182    /// SHOULD-compliant recommendation; pass any other positive integer
183    /// up to `997` for a custom target.
184    #[must_use]
185    pub const fn with_soft_fold(mut self, soft_fold_at: usize) -> Self {
186        self.soft_fold_at = Some(soft_fold_at);
187        self
188    }
189
190    /// Disable soft folding. Long header values flow on one physical
191    /// line up to the 998-character hard limit.
192    #[must_use]
193    pub const fn without_soft_fold(mut self) -> Self {
194        self.soft_fold_at = None;
195        self
196    }
197}
198
199/// Parse RFC822/MIME bytes into a structured [`Message`].
200///
201/// # Decoding behavior
202///
203/// - **Body charset.** Bodies declared `utf-8`, `us-ascii`, `iso-8859-1`,
204///   or `latin1` are decoded faithfully. Bodies in other charsets, or
205///   bodies declared `utf-8` with invalid UTF-8 byte sequences, are
206///   passed through `String::from_utf8_lossy`, invalid bytes become
207///   `U+FFFD`. The parser does not error on undecodable bytes; users
208///   needing strict decode semantics should pre-validate.
209/// - **Encoded words.** RFC 2047 encoded words (`=?charset?Q?…?=` /
210///   `=?charset?B?…?=`) are decoded for the same charset allowlist.
211///   Encoded words in other charsets (e.g. `windows-1252`, `gbk`,
212///   `shift_jis`) pass through as the raw `=?…?=` literal.
213/// - **Duplicate headers.** Multiple `To:`, `Cc:`, `Bcc:`, or `Reply-To:`
214///   header lines are merged into a single recipient list. RFC 5322 §3.6
215///   forbids duplicates, but real MTAs occasionally emit them; the
216///   parser is liberal in what it accepts. Outbound rendering emits one
217///   line per category.
218/// - **RFC 6532 (SMTPUTF8).** Header *lines* must be ASCII-only. Senders
219///   that put UTF-8 directly in header bodies (without RFC 2047 encoding)
220///   are rejected with [`MessageParseError::InvalidHeaderLine`]. Most
221///   senders RFC 2047-encode for compat; this rarely surfaces.
222///
223/// # Returned message
224///
225/// The returned [`Message`] has not been promoted through outbound
226/// validation. Wrapping it via [`email_message::OutboundMessage::new`]
227/// may reject inbound-shaped messages that lack a `From:` header or
228/// have no recipients, both legitimate states for an inbound parse.
229///
230/// # Round-trip caveats
231///
232/// `parse_rfc822` is a typed-model deserializer, not a byte-faithful
233/// re-emitter. A `parse → render_rfc822` round-trip is **not** guaranteed
234/// to produce identical bytes:
235///
236/// - **Header order.** Headers are emitted in a fixed canonical order
237///   (`From`, `Sender`, `To`, `Cc`, `Bcc`, `Reply-To`, `Subject`, `Date`,
238///   `Message-ID`, generic headers, MIME headers). Trace metadata such
239///   as `Received:` is preserved as a generic header but appears below
240///   the typed fields rather than at its original parse position.
241/// - **Generic-header decoding asymmetry.** RFC 2047 encoded-words are
242///   decoded for `Subject` and the address headers (`From`, `Sender`,
243///   `To`, `Cc`, `Bcc`, `Reply-To`). For arbitrary other headers, values
244///   are preserved literally, a header value emitted as
245///   `X-Note: =?utf-8?B?w6Fy?=` round-trips as the literal bytes
246///   `=?utf-8?B?w6Fy?=`, *not* the decoded text `ár`. Auto-decoding
247///   every unstructured header would be a security regression because
248///   opaque-bytes headers (`X-Auth-Token`, `DKIM-Signature`,
249///   `Authentication-Results`, `ARC-*`) carry data that must not be
250///   silently rewritten. Callers who *know* a header is unstructured-text
251///   shaped can opt into decoding via [`decode_rfc2047_phrase`].
252///
253/// # Resource bounds
254///
255/// The parser is best-effort and bounded against adversarial input:
256///
257/// - **Input length.** Inputs larger than [`MAX_INPUT_BYTES`] (16 MiB)
258///   are rejected outright with [`MessageParseError::MimeBodyParse`].
259/// - **Multipart depth.** Nested `multipart/*` parts are limited to
260///   [`MAX_MULTIPART_DEPTH`] (100 levels). Deeper inputs would otherwise
261///   stack-overflow on the mutual recursion between the multipart body
262///   parser and the part parser.
263/// - **Multipart fan-out.** A single multipart body cannot contain more
264///   than [`MAX_MULTIPART_PARTS`] (1024) sibling parts.
265///
266/// These caps cover the recursive *parser* surface. The renderer
267/// (`render_rfc822` and `render_rfc822_with`) enforces the symmetric
268/// [`MAX_MULTIPART_DEPTH`] cap on outbound trees, including up to two
269/// frames of attachment-wrapping added by the renderer itself when
270/// inline and/or regular attachments are present (one
271/// `multipart/related` frame for inline parts, one `multipart/mixed`
272/// frame for regular parts). It returns
273/// [`MessageRenderError::MimeNestingTooDeep`] when a `Body::Mime` value
274/// plus those wrap frames exceeds the cap. A `Body::Mime` value at
275/// exactly [`MAX_MULTIPART_DEPTH`] therefore renders cleanly when no
276/// attachments are present but errors when wrapped.
277///
278/// The kernel does **not** depth-cap `serde::Deserialize<Body>` /
279/// `Deserialize<MimePart>` because the recursive
280/// `MimePart::Multipart { parts: Vec<Self> }` shape is the data model,
281/// not a parser artifact. Callers who deserialize untrusted JSON into
282/// [`email_message::Body`] are responsible for pre-bounding the input
283/// themselves (e.g. via `serde_json::de::Deserializer::disable_recursion_limit`
284/// left at its 128-level default, or a separate length cap). The render
285/// path enforces its own cap regardless, so an unbounded deserialize
286/// followed by `render_rfc822` errors cleanly rather than overflowing
287/// the stack.
288///
289/// # Errors
290///
291/// Returns [`MessageParseError`] when headers, mailbox fields, dates,
292/// message ids, MIME metadata, or transfer-encoded bodies are malformed.
293#[allow(clippy::too_many_lines)]
294pub fn parse_rfc822(input: &[u8]) -> Result<Message, MessageParseError> {
295    if input.len() > MAX_INPUT_BYTES {
296        return Err(MessageParseError::MimeBodyParse {
297            details: format!(
298                "input is {} bytes, exceeding maximum of {MAX_INPUT_BYTES}",
299                input.len()
300            ),
301        });
302    }
303
304    let (raw_headers, raw_body) = split_headers_and_body_bytes(input);
305    let parsed_headers = parse_header_lines_bytes(raw_headers)?;
306
307    let mut from: Option<Mailbox> = None;
308    let mut sender: Option<Mailbox> = None;
309    let mut to: Vec<Address> = Vec::new();
310    let mut cc: Vec<Address> = Vec::new();
311    let mut bcc: Vec<Address> = Vec::new();
312    let mut reply_to: Vec<Address> = Vec::new();
313    let mut subject: Option<String> = None;
314    let mut date: Option<OffsetDateTime> = None;
315    let mut message_id: Option<MessageId> = None;
316    let mut root_content_type: Option<ContentTypeHeader> = None;
317    let mut root_content_transfer_encoding: Option<ContentTransferEncoding> = None;
318    let mut headers = Vec::new();
319
320    for (header_name, header_value) in parsed_headers {
321        let header_name_ref = header_name.as_str();
322        let header_value_ref = header_value.as_str();
323        let decoded_header_value = decode_rfc2047_words(header_value_ref);
324
325        // Address-typed headers route the *raw* header value to the
326        // address parser, after escaping encoded-words inside any
327        // quoted-string regions (see
328        // `escape_encoded_words_inside_quoted_strings`). The kernel's
329        // own `decode_rfc2047_words` pass would unconditionally decode
330        // them and the upstream `mail_parser` does the same; the
331        // pre-escape is the only place where the RFC 2047 §5(3) rule
332        // is enforced.
333        let address_value = escape_encoded_words_inside_quoted_strings(header_value_ref);
334        if header_name_ref.eq_ignore_ascii_case("from") {
335            from = Some(
336                address_value
337                    .parse::<Mailbox>()
338                    .map_err(|_| MessageParseError::MailboxHeaderParse { header: "From" })?,
339            );
340            continue;
341        }
342
343        if header_name_ref.eq_ignore_ascii_case("sender") {
344            sender = Some(
345                address_value
346                    .parse::<Mailbox>()
347                    .map_err(|_| MessageParseError::MailboxHeaderParse { header: "Sender" })?,
348            );
349            continue;
350        }
351
352        if header_name_ref.eq_ignore_ascii_case("to") {
353            let mut parsed = AddressList::from_str(&address_value)
354                .map_err(|_| MessageParseError::AddressHeaderParse { header: "To" })?
355                .into_vec();
356            to.append(&mut parsed);
357            continue;
358        }
359
360        if header_name_ref.eq_ignore_ascii_case("cc") {
361            let mut parsed = AddressList::from_str(&address_value)
362                .map_err(|_| MessageParseError::AddressHeaderParse { header: "Cc" })?
363                .into_vec();
364            cc.append(&mut parsed);
365            continue;
366        }
367
368        if header_name_ref.eq_ignore_ascii_case("bcc") {
369            let mut parsed = AddressList::from_str(&address_value)
370                .map_err(|_| MessageParseError::AddressHeaderParse { header: "Bcc" })?
371                .into_vec();
372            bcc.append(&mut parsed);
373            continue;
374        }
375
376        if header_name_ref.eq_ignore_ascii_case("reply-to") {
377            let mut parsed = AddressList::from_str(&address_value)
378                .map_err(|_| MessageParseError::AddressHeaderParse { header: "Reply-To" })?
379                .into_vec();
380            reply_to.append(&mut parsed);
381            continue;
382        }
383
384        if header_name_ref.eq_ignore_ascii_case("subject") {
385            subject = Some(decoded_header_value.into_owned());
386            continue;
387        }
388
389        if header_name_ref.eq_ignore_ascii_case("date") {
390            date = Some(
391                OffsetDateTime::parse(header_value_ref.trim(), &Rfc2822)
392                    .map_err(|source| MessageParseError::Date { source })?,
393            );
394            continue;
395        }
396
397        if header_name_ref.eq_ignore_ascii_case("message-id") {
398            message_id = Some(
399                MessageId::try_from(header_value_ref.trim())
400                    .map_err(|source| MessageParseError::MessageId { source })?,
401            );
402            continue;
403        }
404
405        if header_name_ref.eq_ignore_ascii_case("content-type") {
406            root_content_type = Some(ContentTypeHeader::parse(header_value_ref));
407            continue;
408        }
409
410        if header_name_ref.eq_ignore_ascii_case("content-transfer-encoding") {
411            root_content_transfer_encoding = Some(
412                ContentTransferEncoding::from_str(header_value_ref).map_err(|_| {
413                    MessageParseError::MimeBodyParse {
414                        details: format!(
415                            "invalid top-level content-transfer-encoding `{header_value_ref}`"
416                        ),
417                    }
418                })?,
419            );
420            continue;
421        }
422
423        headers.push(Header::new(header_name, header_value).map_err(|error| {
424            MessageParseError::InvalidHeaderLine {
425                line: error.to_string(),
426            }
427        })?);
428    }
429
430    let body = if let Some(content_type) = root_content_type {
431        if content_type.media_type == "text/plain" {
432            let decoded_root_body = decode_transfer_encoded_body(
433                raw_body,
434                root_content_transfer_encoding
435                    .as_ref()
436                    .map(ContentTransferEncoding::as_str),
437            )?;
438            Body::Text(decode_text_body(
439                &decoded_root_body,
440                content_type.charset.as_deref(),
441            ))
442        } else if content_type.media_type == "text/html" {
443            let decoded_root_body = decode_transfer_encoded_body(
444                raw_body,
445                root_content_transfer_encoding
446                    .as_ref()
447                    .map(ContentTransferEncoding::as_str),
448            )?;
449            Body::Html(decode_text_body(
450                &decoded_root_body,
451                content_type.charset.as_deref(),
452            ))
453        } else if content_type.media_type.starts_with("multipart/") {
454            validate_multipart_transfer_encoding(root_content_transfer_encoding.as_ref())?;
455            let boundary =
456                content_type
457                    .boundary
458                    .ok_or_else(|| MessageParseError::MimeBodyParse {
459                        details: "multipart body is missing boundary parameter".to_owned(),
460                    })?;
461            Body::Mime(parse_multipart_body(
462                raw_body,
463                &content_type.normalized,
464                Some(boundary),
465                0,
466            )?)
467        } else {
468            let decoded_root_body = decode_transfer_encoded_body(
469                raw_body,
470                root_content_transfer_encoding
471                    .as_ref()
472                    .map(ContentTransferEncoding::as_str),
473            )?;
474            Body::Mime(MimePart::Leaf {
475                content_type: ContentType::from_str(&content_type.normalized).map_err(|_| {
476                    MessageParseError::MimeBodyParse {
477                        details: format!("invalid content type `{}`", content_type.normalized),
478                    }
479                })?,
480                content_transfer_encoding: root_content_transfer_encoding,
481                content_disposition: None,
482                body: decoded_root_body,
483            })
484        }
485    } else {
486        let decoded_root_body = decode_transfer_encoded_body(
487            raw_body,
488            root_content_transfer_encoding
489                .as_ref()
490                .map(ContentTransferEncoding::as_str),
491        )?;
492        Body::Text(String::from_utf8_lossy(&decoded_root_body).into_owned())
493    };
494
495    let mut builder = Message::builder(body)
496        .to(to)
497        .cc(cc)
498        .bcc(bcc)
499        .reply_to(reply_to)
500        .headers(headers)
501        .attachments(Vec::new());
502
503    if let Some(from) = from {
504        builder = builder.from_mailbox(from);
505    }
506
507    if let Some(sender) = sender {
508        builder = builder.sender(sender);
509    }
510
511    if let Some(subject) = subject {
512        builder = builder.subject(subject);
513    }
514
515    if let Some(date) = date {
516        builder = builder.date(date);
517    }
518
519    if let Some(message_id) = message_id {
520        builder = builder.message_id(message_id);
521    }
522
523    Ok(builder.build_unchecked())
524}
525
526/// Render a structured [`Message`] as RFC822/MIME bytes.
527///
528/// # Encoding choices
529///
530/// Non-ASCII [`Body::Text`](email_message::Body) and `Body::Html` values are
531/// always rendered with `Content-Transfer-Encoding: base64`. ASCII text bodies
532/// whose physical lines would exceed RFC 5322's 998-octet hard limit are
533/// rendered with `Content-Transfer-Encoding: quoted-printable`. A message
534/// parsed from quoted-printable bytes through [`parse_rfc822`] and rendered
535/// back through this function will therefore round-trip with a different
536/// `Content-Transfer-Encoding`. Callers that need quoted-printable for
537/// near-ASCII bodies can construct a [`MimePart::Leaf`](email_message::MimePart)
538/// with an explicit `content_transfer_encoding` and use [`Body::Mime`].
539///
540/// # Errors
541///
542/// Returns [`MessageRenderError`] when headers or MIME parts cannot be rendered
543/// according to this crate's RFC822 constraints.
544pub fn render_rfc822(message: &Message) -> Result<Vec<u8>, MessageRenderError> {
545    render_rfc822_with(message, &RenderOptions::default())
546}
547
548/// Render a structured [`Message`] as RFC822/MIME bytes with custom options.
549///
550/// See [`render_rfc822`] for the encoding-choice notes; the same trade-offs
551/// apply.
552///
553/// # Errors
554///
555/// Returns [`MessageRenderError`] when headers or MIME parts cannot be rendered
556/// according to this crate's RFC822 constraints.
557pub fn render_rfc822_with(
558    message: &Message,
559    options: &RenderOptions,
560) -> Result<Vec<u8>, MessageRenderError> {
561    message.validate_basic()?;
562
563    let mut out = Vec::new();
564
565    if let Some(from) = message.from_mailbox() {
566        push_header_line(
567            &mut out,
568            "From",
569            &render_mailbox_header(from),
570            options.soft_fold_at,
571        )?;
572    }
573
574    if let Some(sender) = message.sender() {
575        push_header_line(
576            &mut out,
577            "Sender",
578            &render_mailbox_header(sender),
579            options.soft_fold_at,
580        )?;
581    }
582
583    if !message.to().is_empty() {
584        push_header_line(
585            &mut out,
586            "To",
587            &render_address_list_header(message.to()),
588            options.soft_fold_at,
589        )?;
590    }
591
592    if !message.cc().is_empty() {
593        push_header_line(
594            &mut out,
595            "Cc",
596            &render_address_list_header(message.cc()),
597            options.soft_fold_at,
598        )?;
599    }
600
601    if options.include_bcc && !message.bcc().is_empty() {
602        push_header_line(
603            &mut out,
604            "Bcc",
605            &render_address_list_header(message.bcc()),
606            options.soft_fold_at,
607        )?;
608    }
609
610    if !message.reply_to().is_empty() {
611        push_header_line(
612            &mut out,
613            "Reply-To",
614            &render_address_list_header(message.reply_to()),
615            options.soft_fold_at,
616        )?;
617    }
618
619    if let Some(subject) = message.subject() {
620        push_header_line(
621            &mut out,
622            "Subject",
623            &encode_rfc2047_unstructured(subject),
624            options.soft_fold_at,
625        )?;
626    }
627
628    if let Some(date) = message.date() {
629        let formatted = date
630            .format(&Rfc2822)
631            .map_err(|_| MessageRenderError::DateFormat)?;
632        push_header_line(&mut out, "Date", &formatted, options.soft_fold_at)?;
633    }
634
635    if let Some(message_id) = message.message_id() {
636        push_header_line(
637            &mut out,
638            "Message-ID",
639            message_id.as_str(),
640            options.soft_fold_at,
641        )?;
642    }
643
644    let (mime_headers, body_out, is_mime) = build_render_payload(message, options.soft_fold_at)?;
645
646    for header in message.headers() {
647        if is_mime
648            && (header.name().eq_ignore_ascii_case("content-type")
649                || header
650                    .name()
651                    .eq_ignore_ascii_case("content-transfer-encoding")
652                || header.name().eq_ignore_ascii_case("mime-version"))
653        {
654            continue;
655        }
656        // RFC 2047 only applies to *unstructured* fields. Structured
657        // headers (Message-ID, In-Reply-To, References, List-*, Received,
658        // and the standard structured fields) carry their own grammar and
659        // would be corrupted by encoded-word substitution. Generic
660        // headers default to unstructured; a small allowlist below
661        // bypasses the encoder for the structured ones.
662        let value_owned;
663        let value: &str = if header.value().is_ascii() || is_structured_header(header.name()) {
664            header.value()
665        } else {
666            value_owned = encode_rfc2047_unstructured(header.value());
667            &value_owned
668        };
669        push_header_line(&mut out, header.name(), value, options.soft_fold_at)?;
670    }
671
672    if is_mime {
673        push_header_line(&mut out, "MIME-Version", "1.0", options.soft_fold_at)?;
674        for (name, value) in mime_headers {
675            push_header_line(&mut out, &name, &value, options.soft_fold_at)?;
676        }
677    }
678
679    out.extend_from_slice(b"\r\n");
680    out.extend_from_slice(&body_out);
681
682    Ok(out)
683}
684
685fn build_render_payload(
686    message: &Message,
687    soft_fold_at: Option<usize>,
688) -> Result<RenderPayload, MessageRenderError> {
689    if message.attachments().is_empty() {
690        return match message.body() {
691            Body::Text(text) => {
692                let canonical_body = canonicalize_text_line_endings(text);
693                if text.is_ascii() && !contains_overlong_physical_line(&canonical_body) {
694                    Ok((Vec::new(), canonical_body, false))
695                } else {
696                    let root = renderable_text_leaf("text/plain", text);
697                    let mut boundary_counter = 0usize;
698                    let (headers, body) =
699                        render_part(root, &mut boundary_counter, soft_fold_at, 0)?;
700                    Ok((headers, body, true))
701                }
702            }
703            Body::Html(html) => {
704                let root = renderable_text_leaf("text/html", html);
705                let mut boundary_counter = 0usize;
706                let (headers, body) = render_part(root, &mut boundary_counter, soft_fold_at, 0)?;
707                Ok((headers, body, true))
708            }
709            Body::TextAndHtml { .. } | Body::Mime(_) => {
710                let root = body_to_root_part(message.body())?;
711                let mut boundary_counter = 0usize;
712                let (headers, body) = render_part(root, &mut boundary_counter, soft_fold_at, 0)?;
713                Ok((headers, body, true))
714            }
715            _ => Err(MessageRenderError::UnsupportedBody),
716        };
717    }
718
719    let root_body = body_to_root_part(message.body())?;
720    let (inline, regular) = partition_attachments(message.attachments());
721
722    let mut content_root = root_body;
723
724    if !inline.is_empty() {
725        let related_type = media_type_of_render_part(&content_root);
726        let mut parts = vec![content_root];
727        for attachment in inline {
728            parts.push(attachment_to_mime_part(attachment)?);
729        }
730
731        content_root = RenderPart::Multipart {
732            content_type: format!("multipart/related; type=\"{related_type}\""),
733            boundary: None,
734            parts,
735        };
736    }
737
738    if !regular.is_empty() {
739        let mut parts = vec![content_root];
740        for attachment in regular {
741            parts.push(attachment_to_mime_part(attachment)?);
742        }
743
744        content_root = RenderPart::Multipart {
745            content_type: String::from("multipart/mixed"),
746            boundary: None,
747            parts,
748        };
749    }
750
751    let mut boundary_counter = 0usize;
752    let (headers, body) = render_part(content_root, &mut boundary_counter, soft_fold_at, 0)?;
753    Ok((headers, body, true))
754}
755
756enum RenderPart {
757    Leaf {
758        headers: HeaderFields,
759        body: Vec<u8>,
760    },
761    Multipart {
762        content_type: String,
763        boundary: Option<String>,
764        parts: Vec<Self>,
765    },
766}
767
768fn body_to_root_part(body: &Body) -> Result<RenderPart, MessageRenderError> {
769    match body {
770        Body::Text(text) => Ok(renderable_text_leaf("text/plain", text)),
771        Body::Html(html) => Ok(renderable_text_leaf("text/html", html)),
772        Body::TextAndHtml { text, html } => Ok(RenderPart::Multipart {
773            content_type: String::from("multipart/alternative"),
774            boundary: None,
775            parts: vec![
776                renderable_text_leaf("text/plain", text),
777                renderable_text_leaf("text/html", html),
778            ],
779        }),
780        Body::Mime(mime) => mime_to_render_part(mime, 0),
781        _ => Err(MessageRenderError::UnsupportedBody),
782    }
783}
784
785fn mime_to_render_part(part: &MimePart, depth: usize) -> Result<RenderPart, MessageRenderError> {
786    if depth > MAX_MULTIPART_DEPTH {
787        return Err(MessageRenderError::MimeNestingTooDeep);
788    }
789    match part {
790        MimePart::Leaf {
791            content_type,
792            content_transfer_encoding,
793            content_disposition,
794            body,
795        } => {
796            let mut headers = vec![(
797                String::from("Content-Type"),
798                content_type.as_str().to_owned(),
799            )];
800            if let Some(value) = content_transfer_encoding {
801                headers.push((
802                    String::from("Content-Transfer-Encoding"),
803                    value.as_str().to_owned(),
804                ));
805            }
806            if let Some(value) = content_disposition {
807                headers.push((
808                    String::from("Content-Disposition"),
809                    value.as_str().to_owned(),
810                ));
811            }
812
813            let rendered_body = encode_body_for_transfer_encoding(
814                body,
815                content_transfer_encoding
816                    .as_ref()
817                    .map(ContentTransferEncoding::as_str),
818            );
819
820            Ok(RenderPart::Leaf {
821                headers,
822                body: rendered_body,
823            })
824        }
825        MimePart::Multipart {
826            content_type,
827            boundary,
828            parts,
829        } => {
830            let rendered_parts = parts
831                .iter()
832                .map(|part| mime_to_render_part(part, depth + 1))
833                .collect::<Result<Vec<_>, _>>()?;
834            Ok(RenderPart::Multipart {
835                content_type: content_type.as_str().to_owned(),
836                boundary: boundary.clone(),
837                parts: rendered_parts,
838            })
839        }
840    }
841}
842
843fn encode_body_for_transfer_encoding(body: &[u8], encoding: Option<&str>) -> Vec<u8> {
844    let Some(encoding) = encoding else {
845        return body.to_vec();
846    };
847
848    if encoding.eq_ignore_ascii_case("base64") {
849        return encode_base64(body);
850    }
851
852    if encoding.eq_ignore_ascii_case("quoted-printable") {
853        return encode_quoted_printable_body(body);
854    }
855
856    body.to_vec()
857}
858
859fn renderable_text_leaf(content_type: &str, value: &str) -> RenderPart {
860    let canonical_body = canonicalize_text_line_endings(value);
861    let mut content_type_value = String::from(content_type);
862    if value.is_ascii() {
863        let mut headers = vec![(String::from("Content-Type"), content_type_value)];
864        if contains_overlong_physical_line(&canonical_body) {
865            headers.push((
866                String::from("Content-Transfer-Encoding"),
867                String::from("quoted-printable"),
868            ));
869            return RenderPart::Leaf {
870                headers,
871                body: encode_quoted_printable_body(&canonical_body),
872            };
873        }
874
875        return RenderPart::Leaf {
876            headers,
877            body: canonical_body,
878        };
879    }
880
881    content_type_value.push_str("; charset=utf-8");
882    let mut headers = vec![(String::from("Content-Type"), content_type_value)];
883
884    headers.push((
885        String::from("Content-Transfer-Encoding"),
886        String::from("base64"),
887    ));
888
889    RenderPart::Leaf {
890        headers,
891        body: encode_base64(&canonical_body),
892    }
893}
894
895fn canonicalize_text_line_endings(value: &str) -> Vec<u8> {
896    let bytes = value.as_bytes();
897    let mut out = Vec::with_capacity(bytes.len());
898    let mut idx = 0usize;
899
900    while idx < bytes.len() {
901        if bytes[idx] == b'\r' {
902            out.extend_from_slice(b"\r\n");
903            if idx + 1 < bytes.len() && bytes[idx + 1] == b'\n' {
904                idx += 2;
905            } else {
906                idx += 1;
907            }
908            continue;
909        }
910
911        if bytes[idx] == b'\n' {
912            out.extend_from_slice(b"\r\n");
913            idx += 1;
914            continue;
915        }
916
917        out.push(bytes[idx]);
918        idx += 1;
919    }
920
921    out
922}
923
924fn contains_overlong_physical_line(body: &[u8]) -> bool {
925    body.split(|byte| *byte == b'\n').any(|line| {
926        let line = line.strip_suffix(b"\r").unwrap_or(line);
927        line.len() > RFC5322_HARD_LINE_LEN
928    })
929}
930
931fn partition_attachments(attachments: &[Attachment]) -> (Vec<&Attachment>, Vec<&Attachment>) {
932    let mut inline = Vec::new();
933    let mut regular = Vec::new();
934
935    for attachment in attachments {
936        if attachment.is_inline() || attachment.content_id().is_some() {
937            inline.push(attachment);
938        } else {
939            regular.push(attachment);
940        }
941    }
942
943    (inline, regular)
944}
945
946fn attachment_to_mime_part(attachment: &Attachment) -> Result<RenderPart, MessageRenderError> {
947    let AttachmentBody::Bytes(raw) = attachment.body() else {
948        return Err(MessageRenderError::UnsupportedAttachmentBody);
949    };
950
951    let mut disposition = if attachment.is_inline() || attachment.content_id().is_some() {
952        String::from("inline")
953    } else {
954        String::from("attachment")
955    };
956
957    if let Some(filename) = attachment.filename() {
958        let encoded = encode_filename_parameter(filename);
959        if let Some(legacy) = encoded.legacy {
960            disposition.push_str("; ");
961            disposition.push_str(&legacy);
962        }
963        if let Some(star) = encoded.extended {
964            disposition.push_str("; ");
965            disposition.push_str(&star);
966        }
967    }
968
969    let mut headers = vec![(
970        String::from("Content-Type"),
971        attachment.content_type().to_string(),
972    )];
973    headers.push((
974        String::from("Content-Transfer-Encoding"),
975        String::from("base64"),
976    ));
977    headers.push((String::from("Content-Disposition"), disposition));
978
979    if let Some(content_id) = attachment.content_id() {
980        headers.push((
981            String::from("Content-ID"),
982            normalize_content_id(content_id)?,
983        ));
984    }
985
986    Ok(RenderPart::Leaf {
987        headers,
988        body: encode_base64(raw),
989    })
990}
991
992struct EncodedFilenameParameter {
993    legacy: Option<String>,
994    extended: Option<String>,
995}
996
997fn encode_filename_parameter(filename: &str) -> EncodedFilenameParameter {
998    let escaped = filename.replace('\\', "\\\\").replace('"', "\\\"");
999    let plain_ascii = filename
1000        .bytes()
1001        .all(|b| b.is_ascii() && !b.is_ascii_control());
1002    if plain_ascii {
1003        return EncodedFilenameParameter {
1004            legacy: Some(format!("filename=\"{escaped}\"")),
1005            extended: None,
1006        };
1007    }
1008
1009    // Filenames containing control bytes (including TAB, CR, LF) take the
1010    // RFC 2231 percent-encoded path even when the bytes are otherwise ASCII.
1011    // RFC 6266 §4.1 nominally permits TAB inside a quoted-string, but real
1012    // MUAs misinterpret tabs in `filename=` parameters; force the
1013    // unambiguous encoding.
1014    let mut extended = String::from("filename*=utf-8''");
1015    // Writing into a String is infallible.
1016    let _ = write_percent_encoded(filename.as_bytes(), &mut extended);
1017    EncodedFilenameParameter {
1018        legacy: None,
1019        extended: Some(extended),
1020    }
1021}
1022
1023fn write_percent_encoded<W: std::fmt::Write>(input: &[u8], out: &mut W) -> std::fmt::Result {
1024    for byte in input {
1025        let ch = *byte as char;
1026        if ch.is_ascii_alphanumeric()
1027            || matches!(
1028                ch,
1029                '!' | '#' | '$' | '&' | '+' | '-' | '.' | '^' | '_' | '`' | '|' | '~'
1030            )
1031        {
1032            out.write_char(ch)?;
1033        } else {
1034            write!(out, "%{byte:02X}")?;
1035        }
1036    }
1037    Ok(())
1038}
1039
1040fn normalize_content_id(content_id: &str) -> Result<String, MessageRenderError> {
1041    let value = content_id.trim();
1042    if value.is_empty()
1043        || value
1044            .chars()
1045            .any(|ch| ch.is_ascii_control() || ch.is_ascii_whitespace())
1046    {
1047        return Err(MessageRenderError::InvalidContentId);
1048    }
1049
1050    let left = value.matches('<').count();
1051    let right = value.matches('>').count();
1052    if left > 1 || right > 1 {
1053        return Err(MessageRenderError::InvalidContentId);
1054    }
1055    if (left == 1 || right == 1) && !(value.starts_with('<') && value.ends_with('>')) {
1056        return Err(MessageRenderError::InvalidContentId);
1057    }
1058
1059    let addr_spec = if value.starts_with('<') && value.ends_with('>') {
1060        &value[1..value.len() - 1]
1061    } else {
1062        value
1063    };
1064
1065    if addr_spec.is_empty()
1066        || addr_spec
1067            .chars()
1068            .any(|ch| ch.is_ascii_control() || ch.is_ascii_whitespace() || ch == '<' || ch == '>')
1069    {
1070        return Err(MessageRenderError::InvalidContentId);
1071    }
1072
1073    let rendered = if value.starts_with('<') && value.ends_with('>') {
1074        value.to_owned()
1075    } else {
1076        format!("<{value}>")
1077    };
1078
1079    rendered
1080        .parse::<MessageId>()
1081        .map_err(|_| MessageRenderError::InvalidContentId)?;
1082
1083    Ok(rendered)
1084}
1085
1086fn encode_base64(input: &[u8]) -> Vec<u8> {
1087    let encoded = base64::engine::general_purpose::STANDARD.encode(input);
1088    let mut output = Vec::with_capacity(encoded.len() + (encoded.len() / 76 + 2) * 2);
1089
1090    for chunk in encoded.as_bytes().chunks(76) {
1091        output.extend_from_slice(chunk);
1092        output.extend_from_slice(b"\r\n");
1093    }
1094
1095    output
1096}
1097
1098/// RFC 2047 §5(3): an encoded-word MUST NOT appear within a `quoted-string`
1099///, implementations MUST treat such occurrences as literal. The address
1100/// parser the kernel delegates to (`mail_parser` 0.11.2) decodes
1101/// encoded-word tokens unconditionally, including inside quoted-strings,
1102/// which silently rewrites a display name shaped like `"=?utf-8?B?Zm9v?="`
1103/// into its decoded form. Until the upstream parser grows a quoted-string
1104/// guard, the kernel pre-processes address-typed header values to escape
1105/// the encoded-word lead-in (`=?`) inside quoted regions. The escape is
1106/// the RFC 5322 §3.2.4 quoted-pair `\=` form, which the address parser
1107/// strips on unquote, so the literal text reaches the caller intact.
1108fn escape_encoded_words_inside_quoted_strings(input: &str) -> Cow<'_, str> {
1109    let bytes = input.as_bytes();
1110    let mut needs_escape = false;
1111    let mut i = 0;
1112    let mut in_quotes = false;
1113    let mut escaped_pair = false;
1114    while i < bytes.len() {
1115        let byte = bytes[i];
1116        if escaped_pair {
1117            escaped_pair = false;
1118            i += 1;
1119            continue;
1120        }
1121        match byte {
1122            b'\\' if in_quotes => {
1123                escaped_pair = true;
1124            }
1125            b'"' => {
1126                in_quotes = !in_quotes;
1127            }
1128            b'=' if in_quotes && i + 1 < bytes.len() && bytes[i + 1] == b'?' => {
1129                needs_escape = true;
1130                break;
1131            }
1132            _ => {}
1133        }
1134        i += 1;
1135    }
1136
1137    if !needs_escape {
1138        return Cow::Borrowed(input);
1139    }
1140
1141    let mut out = String::with_capacity(input.len() + 4);
1142    in_quotes = false;
1143    escaped_pair = false;
1144    for (idx, byte) in bytes.iter().copied().enumerate() {
1145        if escaped_pair {
1146            escaped_pair = false;
1147            out.push(byte as char);
1148            continue;
1149        }
1150        if in_quotes && byte == b'=' && idx + 1 < bytes.len() && bytes[idx + 1] == b'?' {
1151            out.push('\\');
1152            out.push('=');
1153            continue;
1154        }
1155        match byte {
1156            b'\\' if in_quotes => {
1157                escaped_pair = true;
1158                out.push(byte as char);
1159            }
1160            b'"' => {
1161                in_quotes = !in_quotes;
1162                out.push(byte as char);
1163            }
1164            _ => out.push(byte as char),
1165        }
1166    }
1167    Cow::Owned(out)
1168}
1169
1170/// Opt-in RFC 2047 decoder for header values that the parser preserved as
1171/// raw `=?charset?encoding?text?=` tokens.
1172///
1173/// [`parse_rfc822`] decodes encoded-words for `Subject` and the address
1174/// headers (`From`, `Sender`, `To`, `Cc`, `Bcc`, `Reply-To`) but
1175/// deliberately leaves arbitrary other headers untouched, because
1176/// silently rewriting `=?…?=`-shaped content in opaque-bytes headers
1177/// such as `X-Auth-Token`, `DKIM-Signature`, `Authentication-Results`,
1178/// or `ARC-*` would be a security regression. Callers who *know* a
1179/// header is unstructured-text-shaped and want round-trip semantic
1180/// equality across `parse → render` cycles can opt into decoding by
1181/// calling this function on the header value.
1182///
1183/// ```rust
1184/// use email_message_wire::{decode_rfc2047_phrase, parse_rfc822};
1185///
1186/// let bytes = b"From: from@example.com\r\nTo: to@example.com\r\nX-Note: =?utf-8?B?w6Fy?=\r\n\r\n";
1187/// let message = parse_rfc822(bytes).unwrap();
1188/// let header = message
1189///     .headers()
1190///     .iter()
1191///     .find(|h| h.name().eq_ignore_ascii_case("x-note"))
1192///     .unwrap();
1193/// assert_eq!(header.value(), "=?utf-8?B?w6Fy?=");
1194/// assert_eq!(decode_rfc2047_phrase(header.value()), "ár");
1195/// ```
1196#[must_use]
1197pub fn decode_rfc2047_phrase(input: &str) -> Cow<'_, str> {
1198    decode_rfc2047_words(input)
1199}
1200
1201fn decode_rfc2047_words(input: &str) -> Cow<'_, str> {
1202    // Fast path: no encoded-word marker anywhere → return the input borrowed.
1203    if !input.contains("=?") {
1204        return Cow::Borrowed(input);
1205    }
1206
1207    let mut out: Option<String> = None;
1208    let mut idx = 0usize;
1209    let mut prev_was_encoded_word = false;
1210
1211    while idx < input.len() {
1212        let rest = &input[idx..];
1213        let Some(start_rel) = rest.find("=?") else {
1214            if let Some(buffer) = out.as_mut() {
1215                buffer.push_str(rest);
1216            }
1217            break;
1218        };
1219
1220        let plain = &rest[..start_rel];
1221        let candidate = &rest[start_rel..];
1222
1223        if prev_was_encoded_word
1224            && !plain.is_empty()
1225            && plain.bytes().all(|byte| byte == b' ' || byte == b'\t')
1226            && try_decode_rfc2047_word(candidate).is_some()
1227        {
1228            idx += start_rel;
1229            continue;
1230        }
1231
1232        let buffer = out.get_or_insert_with(|| String::with_capacity(input.len()));
1233        // Keep the buffer in sync with everything we've consumed up to this point.
1234        if buffer.is_empty() && idx > 0 {
1235            buffer.push_str(&input[..idx]);
1236        }
1237        buffer.push_str(plain);
1238
1239        if let Some((decoded, consumed)) = try_decode_rfc2047_word(candidate) {
1240            buffer.push_str(&decoded);
1241            idx += start_rel + consumed;
1242            prev_was_encoded_word = true;
1243        } else {
1244            buffer.push_str("=?");
1245            idx += start_rel + 2;
1246            prev_was_encoded_word = false;
1247        }
1248    }
1249
1250    match out {
1251        Some(buffer) => Cow::Owned(buffer),
1252        None => Cow::Borrowed(input),
1253    }
1254}
1255
1256fn try_decode_rfc2047_word(input: &str) -> Option<(String, usize)> {
1257    let end_rel = input.find("?=")?;
1258    let consumed = end_rel + 2;
1259    let word = &input[..consumed];
1260    Some((decode_rfc2047_word(word)?, consumed))
1261}
1262
1263fn decode_rfc2047_word(word: &str) -> Option<String> {
1264    if !word.starts_with("=?") || !word.ends_with("?=") {
1265        return None;
1266    }
1267
1268    let inner = &word[2..word.len() - 2];
1269    let mut parts = inner.splitn(3, '?');
1270    let charset = parts.next()?;
1271    let encoding = parts.next()?;
1272    let encoded = parts.next()?;
1273
1274    let bytes = if encoding.eq_ignore_ascii_case("B") {
1275        base64::engine::general_purpose::STANDARD
1276            .decode(encoded)
1277            .ok()?
1278    } else if encoding.eq_ignore_ascii_case("Q") {
1279        decode_rfc2047_q(encoded)?
1280    } else {
1281        return None;
1282    };
1283
1284    if charset.eq_ignore_ascii_case("utf-8") || charset.eq_ignore_ascii_case("us-ascii") {
1285        return String::from_utf8(bytes).ok();
1286    }
1287
1288    if charset.eq_ignore_ascii_case("iso-8859-1") || charset.eq_ignore_ascii_case("latin1") {
1289        return Some(bytes.into_iter().map(char::from).collect());
1290    }
1291
1292    None
1293}
1294
1295fn decode_rfc2047_q(input: &str) -> Option<Vec<u8>> {
1296    let mut out = Vec::with_capacity(input.len());
1297    let bytes = input.as_bytes();
1298    let mut idx = 0usize;
1299
1300    while idx < bytes.len() {
1301        let byte = bytes[idx];
1302        if byte == b'_' {
1303            out.push(b' ');
1304            idx += 1;
1305            continue;
1306        }
1307
1308        if byte == b'=' {
1309            if idx + 2 >= bytes.len() {
1310                return None;
1311            }
1312            let hi = hex_val(bytes[idx + 1])?;
1313            let lo = hex_val(bytes[idx + 2])?;
1314            out.push((hi << 4) | lo);
1315            idx += 3;
1316            continue;
1317        }
1318
1319        out.push(byte);
1320        idx += 1;
1321    }
1322
1323    Some(out)
1324}
1325
1326const fn hex_val(byte: u8) -> Option<u8> {
1327    match byte {
1328        b'0'..=b'9' => Some(byte - b'0'),
1329        b'A'..=b'F' => Some(byte - b'A' + 10),
1330        b'a'..=b'f' => Some(byte - b'a' + 10),
1331        _ => None,
1332    }
1333}
1334
1335fn encode_rfc2047_unstructured(input: &str) -> String {
1336    if input.is_ascii() {
1337        return input.to_owned();
1338    }
1339
1340    encode_rfc2047_utf8_base64_words(input)
1341}
1342
1343fn encode_rfc2047_phrase(input: &str) -> String {
1344    if input.is_ascii() {
1345        return quote_phrase(input);
1346    }
1347
1348    encode_rfc2047_utf8_base64_words(input)
1349}
1350
1351fn encode_rfc2047_utf8_base64_words(input: &str) -> String {
1352    const ENCODED_WORD_OVERHEAD: usize = 12; // =?utf-8?B? + ?=
1353    const MAX_ENCODED_WORD_LEN: usize = 75;
1354    const MAX_BASE64_LEN: usize = MAX_ENCODED_WORD_LEN - ENCODED_WORD_OVERHEAD;
1355    const MAX_CHUNK_BYTES: usize = (MAX_BASE64_LEN / 4) * 3;
1356
1357    let bytes = input.as_bytes();
1358    let mut idx = 0usize;
1359    let mut words = Vec::new();
1360
1361    while idx < bytes.len() {
1362        let mut end = (idx + MAX_CHUNK_BYTES).min(bytes.len());
1363        while end > idx && !input.is_char_boundary(end) {
1364            end -= 1;
1365        }
1366
1367        if end == idx {
1368            end = bytes.len();
1369            while end > idx && !input.is_char_boundary(end) {
1370                end -= 1;
1371            }
1372        }
1373
1374        let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes[idx..end]);
1375        words.push(format!("=?utf-8?B?{encoded}?="));
1376        idx = end;
1377    }
1378
1379    words.join(" ")
1380}
1381
1382fn quote_phrase(input: &str) -> String {
1383    let mut out = String::with_capacity(input.len() + 2);
1384    out.push('"');
1385    for ch in input.chars() {
1386        if ch == '\\' || ch == '"' {
1387            out.push('\\');
1388        }
1389        out.push(ch);
1390    }
1391    out.push('"');
1392    out
1393}
1394
1395fn render_mailbox_header(mailbox: &Mailbox) -> String {
1396    mailbox.name().map_or_else(
1397        || mailbox.email().as_str().to_owned(),
1398        |name| {
1399            format!(
1400                "{} <{}>",
1401                encode_rfc2047_phrase(name),
1402                mailbox.email().as_str()
1403            )
1404        },
1405    )
1406}
1407
1408fn render_group_header(group: &email_message::Group) -> String {
1409    let mut out = String::new();
1410    out.push_str(&encode_rfc2047_phrase(group.name()));
1411    out.push(':');
1412    for (idx, member) in group.members().iter().enumerate() {
1413        if idx > 0 {
1414            out.push_str(", ");
1415        }
1416        out.push_str(&render_mailbox_header(member));
1417    }
1418    out.push(';');
1419    out
1420}
1421
1422fn render_address_list_header(addresses: &[Address]) -> String {
1423    let mut out = String::new();
1424    for (idx, address) in addresses.iter().enumerate() {
1425        if idx > 0 {
1426            out.push_str(", ");
1427        }
1428        match address {
1429            Address::Mailbox(mailbox) => out.push_str(&render_mailbox_header(mailbox)),
1430            Address::Group(group) => out.push_str(&render_group_header(group)),
1431        }
1432    }
1433    out
1434}
1435
1436fn split_headers_and_body_bytes(input: &[u8]) -> (&[u8], &[u8]) {
1437    if let Some(rest) = input.strip_prefix(b"\r\n") {
1438        return (&[], rest);
1439    }
1440
1441    if let Some(rest) = input.strip_prefix(b"\n") {
1442        return (&[], rest);
1443    }
1444
1445    if let Some(pos) = input.windows(4).position(|w| w == b"\r\n\r\n") {
1446        return (&input[..pos], &input[pos + 4..]);
1447    }
1448
1449    if let Some(pos) = input.windows(2).position(|w| w == b"\n\n") {
1450        return (&input[..pos], &input[pos + 2..]);
1451    }
1452
1453    (input, &[])
1454}
1455
1456fn parse_header_lines_bytes(
1457    raw_headers: &[u8],
1458) -> Result<Vec<(String, String)>, MessageParseError> {
1459    let normalized = raw_headers
1460        .split(|byte| *byte == b'\n')
1461        .map(|line| line.strip_suffix(b"\r").unwrap_or(line));
1462    let mut output = Vec::new();
1463    let mut current: Option<(String, String)> = None;
1464
1465    for line in normalized {
1466        if line.is_empty() {
1467            continue;
1468        }
1469
1470        let line_str = std::str::from_utf8(line).map_err(|_| MessageParseError::InvalidUtf8)?;
1471
1472        if !line_str.is_ascii() {
1473            return Err(MessageParseError::InvalidHeaderLine {
1474                line: line_str.to_owned(),
1475            });
1476        }
1477
1478        if line_str
1479            .chars()
1480            .any(|ch| ch != '\t' && ch.is_ascii_control())
1481        {
1482            return Err(MessageParseError::InvalidHeaderLine {
1483                line: line_str.to_owned(),
1484            });
1485        }
1486
1487        if line_str.starts_with(' ') || line_str.starts_with('\t') {
1488            let (_, value) =
1489                current
1490                    .as_mut()
1491                    .ok_or_else(|| MessageParseError::InvalidHeaderLine {
1492                        line: line_str.to_owned(),
1493                    })?;
1494            value.push_str(line_str);
1495            continue;
1496        }
1497
1498        if let Some(entry) = current.take() {
1499            output.push(entry);
1500        }
1501
1502        let Some((name, value)) = line_str.split_once(':') else {
1503            return Err(MessageParseError::InvalidHeaderLine {
1504                line: line_str.to_owned(),
1505            });
1506        };
1507        if !is_valid_header_name(name) {
1508            return Err(MessageParseError::InvalidHeaderLine {
1509                line: line_str.to_owned(),
1510            });
1511        }
1512        current = Some((name.trim().to_owned(), value.trim_start().to_owned()));
1513    }
1514
1515    if let Some(entry) = current.take() {
1516        output.push(entry);
1517    }
1518
1519    Ok(output)
1520}
1521
1522#[derive(Clone, Debug)]
1523struct ContentTypeHeader {
1524    normalized: String,
1525    media_type: String,
1526    boundary: Option<String>,
1527    charset: Option<String>,
1528}
1529
1530impl ContentTypeHeader {
1531    fn parse(value: &str) -> Self {
1532        let trimmed = value.trim();
1533        let mut parts = split_unquoted_semicolons(trimmed);
1534        let media_type_segment_raw = parts.next().unwrap_or_default();
1535        let media_type_segment = media_type_segment_raw.trim();
1536        let media_type = media_type_segment.to_ascii_lowercase();
1537        let mut boundary = None;
1538        let mut charset = None;
1539        let mut normalized_parts = vec![media_type_segment.to_owned()];
1540
1541        for param in parts {
1542            let Some((name, value)) = param.trim().split_once('=') else {
1543                continue;
1544            };
1545            if name.trim().eq_ignore_ascii_case("boundary") {
1546                let boundary_value = unquote_parameter_value(value.trim());
1547                if !boundary_value.is_empty() {
1548                    boundary = Some(boundary_value);
1549                }
1550                continue;
1551            }
1552
1553            normalized_parts.push(format!("{}={}", name.trim(), value.trim()));
1554
1555            if name.trim().eq_ignore_ascii_case("charset") {
1556                let charset_value = unquote_parameter_value(value.trim());
1557                if !charset_value.is_empty() {
1558                    charset = Some(charset_value);
1559                }
1560            }
1561        }
1562
1563        Self {
1564            normalized: normalized_parts.join(";"),
1565            media_type,
1566            boundary,
1567            charset,
1568        }
1569    }
1570}
1571
1572fn split_unquoted_semicolons(input: &str) -> impl Iterator<Item = &str> {
1573    let bytes = input.as_bytes();
1574    let mut start = 0usize;
1575    let mut idx = 0usize;
1576    let mut in_quotes = false;
1577    let mut escape = false;
1578    let mut done = false;
1579
1580    std::iter::from_fn(move || {
1581        if done {
1582            return None;
1583        }
1584
1585        while idx < bytes.len() {
1586            let ch = bytes[idx];
1587
1588            if escape {
1589                escape = false;
1590                idx += 1;
1591                continue;
1592            }
1593
1594            if in_quotes && ch == b'\\' {
1595                escape = true;
1596                idx += 1;
1597                continue;
1598            }
1599
1600            if ch == b'"' {
1601                in_quotes = !in_quotes;
1602                idx += 1;
1603                continue;
1604            }
1605
1606            if ch == b';' && !in_quotes {
1607                let segment = &input[start..idx];
1608                idx += 1;
1609                start = idx;
1610                return Some(segment);
1611            }
1612
1613            idx += 1;
1614        }
1615
1616        done = true;
1617        Some(&input[start..])
1618    })
1619}
1620
1621fn unquote_parameter_value(input: &str) -> String {
1622    let value = input.trim();
1623    if !(value.starts_with('"') && value.ends_with('"') && value.len() >= 2) {
1624        return value.to_owned();
1625    }
1626
1627    let mut out = String::with_capacity(value.len().saturating_sub(2));
1628    let mut chars = value[1..value.len() - 1].chars();
1629    while let Some(ch) = chars.next() {
1630        if ch == '\\' {
1631            if let Some(escaped) = chars.next() {
1632                out.push(escaped);
1633            }
1634            continue;
1635        }
1636        out.push(ch);
1637    }
1638    out
1639}
1640
1641fn parse_multipart_body(
1642    body: &[u8],
1643    content_type_value: &str,
1644    boundary: Option<String>,
1645    depth: usize,
1646) -> Result<MimePart, MessageParseError> {
1647    if depth > MAX_MULTIPART_DEPTH {
1648        return Err(MessageParseError::MimeBodyParse {
1649            details: format!("multipart nesting exceeds maximum depth of {MAX_MULTIPART_DEPTH}"),
1650        });
1651    }
1652
1653    let boundary = boundary.ok_or_else(|| MessageParseError::MimeBodyParse {
1654        details: "multipart part is missing boundary parameter".to_owned(),
1655    })?;
1656
1657    let parts = split_multipart_parts(body, &boundary)?;
1658    let mut parsed_parts = Vec::with_capacity(parts.len());
1659    for part in parts {
1660        parsed_parts.push(parse_mime_part(&part, depth + 1)?);
1661    }
1662
1663    Ok(MimePart::Multipart {
1664        content_type: ContentType::from_str(content_type_value).map_err(|_| {
1665            MessageParseError::MimeBodyParse {
1666                details: format!("invalid multipart content type `{content_type_value}`"),
1667            }
1668        })?,
1669        boundary: Some(boundary),
1670        parts: parsed_parts,
1671    })
1672}
1673
1674fn split_multipart_parts(body: &[u8], boundary: &str) -> Result<Vec<Vec<u8>>, MessageParseError> {
1675    let delimiter = {
1676        let mut value = Vec::with_capacity(boundary.len() + 2);
1677        value.extend_from_slice(b"--");
1678        value.extend_from_slice(boundary.as_bytes());
1679        value
1680    };
1681    let end_delimiter = {
1682        let mut value = delimiter.clone();
1683        value.extend_from_slice(b"--");
1684        value
1685    };
1686
1687    let mut parts = Vec::new();
1688    let mut current = Vec::new();
1689    let mut in_part = false;
1690    let mut found_opening = false;
1691    let mut found_closing = false;
1692
1693    for raw_line in body.split(|byte| *byte == b'\n') {
1694        let line = raw_line.strip_suffix(b"\r").unwrap_or(raw_line);
1695        let line = trim_lwsp_end(line);
1696
1697        if line == delimiter.as_slice() {
1698            if in_part {
1699                if parts.len() >= MAX_MULTIPART_PARTS {
1700                    return Err(MessageParseError::MimeBodyParse {
1701                        details: format!(
1702                            "multipart body exceeds maximum of {MAX_MULTIPART_PARTS} parts"
1703                        ),
1704                    });
1705                }
1706                strip_boundary_separator_newline(&mut current);
1707                parts.push(std::mem::take(&mut current));
1708            }
1709            in_part = true;
1710            found_opening = true;
1711            continue;
1712        }
1713
1714        if line == end_delimiter.as_slice() {
1715            if in_part {
1716                if parts.len() >= MAX_MULTIPART_PARTS {
1717                    return Err(MessageParseError::MimeBodyParse {
1718                        details: format!(
1719                            "multipart body exceeds maximum of {MAX_MULTIPART_PARTS} parts"
1720                        ),
1721                    });
1722                }
1723                strip_boundary_separator_newline(&mut current);
1724                parts.push(std::mem::take(&mut current));
1725            }
1726            found_closing = true;
1727            break;
1728        }
1729
1730        if in_part {
1731            current.extend_from_slice(raw_line);
1732            current.push(b'\n');
1733        }
1734    }
1735
1736    if !found_closing {
1737        return Err(MessageParseError::MimeBodyParse {
1738            details: "multipart body missing closing boundary".to_owned(),
1739        });
1740    }
1741
1742    if !found_opening {
1743        return Err(MessageParseError::MimeBodyParse {
1744            details: "multipart body missing opening boundary".to_owned(),
1745        });
1746    }
1747
1748    Ok(parts)
1749}
1750
1751fn parse_mime_part(part: &[u8], depth: usize) -> Result<MimePart, MessageParseError> {
1752    if depth > MAX_MULTIPART_DEPTH {
1753        return Err(MessageParseError::MimeBodyParse {
1754            details: format!("multipart nesting exceeds maximum depth of {MAX_MULTIPART_DEPTH}"),
1755        });
1756    }
1757
1758    let (raw_headers, raw_body) = split_headers_and_body_bytes(part);
1759    let parsed_headers = parse_header_lines_bytes(raw_headers)?;
1760
1761    let mut content_type = ContentTypeHeader {
1762        normalized: "text/plain".to_owned(),
1763        media_type: "text/plain".to_owned(),
1764        boundary: None,
1765        charset: None,
1766    };
1767    let mut content_transfer_encoding = None;
1768    let mut content_disposition = None;
1769
1770    for (name, value) in parsed_headers {
1771        if name.eq_ignore_ascii_case("content-type") {
1772            content_type = ContentTypeHeader::parse(&value);
1773            continue;
1774        }
1775        if name.eq_ignore_ascii_case("content-transfer-encoding") {
1776            content_transfer_encoding =
1777                Some(ContentTransferEncoding::from_str(&value).map_err(|_| {
1778                    MessageParseError::MimeBodyParse {
1779                        details: format!("invalid content-transfer-encoding `{value}`"),
1780                    }
1781                })?);
1782            continue;
1783        }
1784        if name.eq_ignore_ascii_case("content-disposition") {
1785            content_disposition = Some(ContentDisposition::from_str(&value).map_err(|_| {
1786                MessageParseError::MimeBodyParse {
1787                    details: format!("invalid content-disposition `{value}`"),
1788                }
1789            })?);
1790        }
1791    }
1792
1793    if content_type.media_type.starts_with("multipart/") {
1794        validate_multipart_transfer_encoding(content_transfer_encoding.as_ref())?;
1795        return parse_multipart_body(
1796            raw_body,
1797            &content_type.normalized,
1798            content_type.boundary,
1799            depth,
1800        );
1801    }
1802
1803    let decoded_body = decode_transfer_encoded_body(
1804        raw_body,
1805        content_transfer_encoding
1806            .as_ref()
1807            .map(ContentTransferEncoding::as_str),
1808    )?;
1809
1810    Ok(MimePart::Leaf {
1811        content_type: ContentType::from_str(&content_type.normalized).map_err(|_| {
1812            MessageParseError::MimeBodyParse {
1813                details: format!("invalid content type `{}`", content_type.normalized),
1814            }
1815        })?,
1816        content_transfer_encoding,
1817        content_disposition,
1818        body: decoded_body,
1819    })
1820}
1821
1822/// Headers whose grammar is structured (RFC 5322 §3.6.4 / §3.6.7, RFC
1823/// 2369, RFC 5321) and must NOT pass through RFC 2047 encoded-word
1824/// substitution. Generic / custom headers default to unstructured
1825/// (RFC 5322 §3.6.5) and are encoded by the render loop above.
1826///
1827/// The list is intentionally small and covers the structured headers
1828/// most commonly found in real workflows. Less-common structured
1829/// headers (e.g. `Disposition-Notification-To`, `MT-Priority`,
1830/// `Original-Recipient`) are not on the list, if a custom header
1831/// with such a name carries non-ASCII content the renderer will RFC
1832/// 2047-encode it, which corrupts the structured grammar. Encode such
1833/// values ASCII-clean upstream.
1834fn is_structured_header(name: &str) -> bool {
1835    let lower = name.to_ascii_lowercase();
1836    matches!(
1837        lower.as_str(),
1838        "message-id"
1839            | "in-reply-to"
1840            | "references"
1841            | "received"
1842            | "return-path"
1843            | "delivered-to"
1844            | "envelope-from"
1845            | "envelope-to"
1846            | "auto-submitted"
1847            | "content-id"
1848            | "content-location"
1849            | "resent-message-id"
1850            | "dkim-signature"
1851            | "arc-seal"
1852            | "arc-message-signature"
1853            | "arc-authentication-results"
1854            | "authentication-results"
1855    ) || lower.starts_with("list-")
1856        || lower.starts_with("x-original-")
1857}
1858
1859fn push_header_line(
1860    out: &mut Vec<u8>,
1861    name: &str,
1862    value: &str,
1863    soft_fold_at: Option<usize>,
1864) -> Result<(), MessageRenderError> {
1865    validate_header_name(name)?;
1866    if contains_raw_newlines(value) {
1867        return Err(MessageRenderError::HeaderContainsRawNewline {
1868            name: name.to_owned(),
1869        });
1870    }
1871    if contains_invalid_header_control_chars(value) {
1872        return Err(MessageRenderError::HeaderContainsControlCharacter {
1873            name: name.to_owned(),
1874        });
1875    }
1876    if !value.is_ascii() {
1877        return Err(MessageRenderError::HeaderContainsNonAscii {
1878            name: name.to_owned(),
1879        });
1880    }
1881
1882    let name_len = name.len();
1883    let first_hard = RFC5322_HARD_LINE_LEN.saturating_sub(name_len + 2);
1884    let continuation_hard = RFC5322_HARD_LINE_LEN.saturating_sub(1);
1885    // When soft-folding is enabled, target the caller's preferred width;
1886    // otherwise pin preferred to the hard limit so the helper emits one
1887    // line per header up to the RFC 5322 ceiling.
1888    let first_preferred = soft_fold_at
1889        .map(|target| target.saturating_sub(name_len + 2).min(first_hard))
1890        .unwrap_or(first_hard);
1891    let continuation_preferred = soft_fold_at
1892        .map(|target| target.saturating_sub(1).min(continuation_hard))
1893        .unwrap_or(continuation_hard);
1894
1895    let lines = split_header_value_for_folding(
1896        value,
1897        first_preferred,
1898        first_hard,
1899        continuation_preferred,
1900        continuation_hard,
1901    )
1902    .ok_or_else(|| MessageRenderError::HeaderLineTooLong {
1903        name: name.to_owned(),
1904    })?;
1905
1906    for (idx, line) in lines.iter().enumerate() {
1907        if idx == 0 {
1908            out.extend_from_slice(name.as_bytes());
1909            out.extend_from_slice(b": ");
1910            out.extend_from_slice(line.as_bytes());
1911            out.extend_from_slice(b"\r\n");
1912            continue;
1913        }
1914
1915        out.extend_from_slice(b" ");
1916        out.extend_from_slice(line.as_bytes());
1917        out.extend_from_slice(b"\r\n");
1918    }
1919
1920    Ok(())
1921}
1922
1923fn split_header_value_for_folding(
1924    value: &str,
1925    first_preferred: usize,
1926    first_hard: usize,
1927    continuation_preferred: usize,
1928    continuation_hard: usize,
1929) -> Option<Vec<String>> {
1930    if value.is_empty() {
1931        return Some(vec![String::new()]);
1932    }
1933
1934    let mut remaining = value;
1935    let mut lines = Vec::new();
1936    let mut is_first = true;
1937
1938    while !remaining.is_empty() {
1939        let preferred = if is_first {
1940            first_preferred
1941        } else {
1942            continuation_preferred
1943        };
1944        let hard = if is_first {
1945            first_hard
1946        } else {
1947            continuation_hard
1948        };
1949        is_first = false;
1950
1951        if hard == 0 {
1952            return None;
1953        }
1954
1955        if remaining.len() <= preferred {
1956            lines.push(remaining.to_owned());
1957            break;
1958        }
1959
1960        let max_preferred = preferred.min(remaining.len());
1961
1962        if let Some(split_at) = last_lwsp_boundary(remaining, max_preferred) {
1963            lines.push(remaining[..split_at].to_owned());
1964            remaining = &remaining[split_at + 1..];
1965            continue;
1966        }
1967
1968        if remaining.len() <= hard {
1969            lines.push(remaining.to_owned());
1970            break;
1971        }
1972
1973        let max_hard = hard.min(remaining.len());
1974
1975        if let Some(split_at) = last_lwsp_boundary(remaining, max_hard) {
1976            lines.push(remaining[..split_at].to_owned());
1977            remaining = &remaining[split_at + 1..];
1978            continue;
1979        }
1980
1981        return None;
1982    }
1983
1984    Some(lines)
1985}
1986
1987fn last_lwsp_boundary(value: &str, max_len: usize) -> Option<usize> {
1988    if max_len == 0 {
1989        return None;
1990    }
1991
1992    let limit = if value.is_char_boundary(max_len) {
1993        max_len
1994    } else {
1995        let mut idx = max_len;
1996        while idx > 0 && !value.is_char_boundary(idx) {
1997            idx -= 1;
1998        }
1999        idx
2000    };
2001
2002    value[..limit].rfind([' ', '\t'])
2003}
2004
2005fn validate_header_name(name: &str) -> Result<(), MessageRenderError> {
2006    if !is_valid_header_name(name) {
2007        return Err(MessageRenderError::InvalidHeaderName {
2008            name: name.to_owned(),
2009        });
2010    }
2011
2012    Ok(())
2013}
2014
2015fn is_valid_header_name(name: &str) -> bool {
2016    !name.is_empty()
2017        && name.chars().all(|ch| {
2018            ch.is_ascii()
2019                && ch != ':'
2020                && ch != '\r'
2021                && ch != '\n'
2022                && !ch.is_ascii_whitespace()
2023                && !ch.is_ascii_control()
2024        })
2025}
2026
2027fn contains_raw_newlines(value: &str) -> bool {
2028    value.contains('\r') || value.contains('\n')
2029}
2030
2031fn contains_invalid_header_control_chars(value: &str) -> bool {
2032    value
2033        .chars()
2034        .any(|ch| matches!(ch, '\u{0000}'..='\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000E}'..='\u{001F}' | '\u{007F}'))
2035}
2036
2037fn trim_lwsp_end(value: &[u8]) -> &[u8] {
2038    let mut end = value.len();
2039    while end > 0 && (value[end - 1] == b' ' || value[end - 1] == b'\t') {
2040        end -= 1;
2041    }
2042
2043    &value[..end]
2044}
2045
2046fn strip_boundary_separator_newline(value: &mut Vec<u8>) {
2047    if value.ends_with(b"\r\n") {
2048        value.truncate(value.len() - 2);
2049        return;
2050    }
2051
2052    if value.ends_with(b"\n") {
2053        value.truncate(value.len() - 1);
2054    }
2055}
2056
2057fn validate_boundary(value: &str) -> Result<(), MessageRenderError> {
2058    if value.is_empty() {
2059        return Err(MessageRenderError::EmptyMimeBoundary);
2060    }
2061
2062    if value.len() > 70
2063        || value
2064            .chars()
2065            .any(|ch| ch.is_ascii_control() || ch == '\r' || ch == '\n' || !ch.is_ascii())
2066    {
2067        return Err(MessageRenderError::InvalidMimeBoundary);
2068    }
2069
2070    if value.ends_with(' ') {
2071        return Err(MessageRenderError::InvalidMimeBoundary);
2072    }
2073
2074    if value.chars().any(|ch| {
2075        !(ch.is_ascii_alphanumeric()
2076            || matches!(
2077                ch,
2078                '\'' | '(' | ')' | '+' | '_' | ',' | '-' | '.' | '/' | ':' | '=' | '?' | ' '
2079            ))
2080    }) {
2081        return Err(MessageRenderError::InvalidMimeBoundary);
2082    }
2083
2084    Ok(())
2085}
2086
2087fn decode_transfer_encoded_body(
2088    body: &[u8],
2089    encoding: Option<&str>,
2090) -> Result<Vec<u8>, MessageParseError> {
2091    let Some(encoding) = encoding else {
2092        return Ok(body.to_vec());
2093    };
2094
2095    if encoding.eq_ignore_ascii_case("base64") {
2096        return decode_base64_body(body).ok_or_else(|| MessageParseError::MimeBodyParse {
2097            details: "invalid base64 content-transfer-encoding payload".to_owned(),
2098        });
2099    }
2100
2101    if encoding.eq_ignore_ascii_case("quoted-printable") {
2102        return decode_quoted_printable_body(body).ok_or_else(|| {
2103            MessageParseError::MimeBodyParse {
2104                details: "invalid quoted-printable content-transfer-encoding payload".to_owned(),
2105            }
2106        });
2107    }
2108
2109    Ok(body.to_vec())
2110}
2111
2112fn validate_multipart_transfer_encoding(
2113    encoding: Option<&ContentTransferEncoding>,
2114) -> Result<(), MessageParseError> {
2115    let Some(encoding) = encoding else {
2116        return Ok(());
2117    };
2118
2119    let value = encoding.as_str();
2120    if value.eq_ignore_ascii_case("7bit")
2121        || value.eq_ignore_ascii_case("8bit")
2122        || value.eq_ignore_ascii_case("binary")
2123    {
2124        return Ok(());
2125    }
2126
2127    Err(MessageParseError::MimeBodyParse {
2128        details: format!("multipart part cannot use content-transfer-encoding `{value}`"),
2129    })
2130}
2131
2132fn decode_text_body(body: &[u8], charset: Option<&str>) -> String {
2133    let Some(charset) = charset else {
2134        return String::from_utf8_lossy(body).into_owned();
2135    };
2136
2137    if charset.eq_ignore_ascii_case("utf-8") || charset.eq_ignore_ascii_case("us-ascii") {
2138        return String::from_utf8_lossy(body).into_owned();
2139    }
2140
2141    if charset.eq_ignore_ascii_case("iso-8859-1") || charset.eq_ignore_ascii_case("latin1") {
2142        return body.iter().copied().map(char::from).collect();
2143    }
2144
2145    String::from_utf8_lossy(body).into_owned()
2146}
2147
2148fn decode_base64_body(body: &[u8]) -> Option<Vec<u8>> {
2149    let mut filtered = Vec::with_capacity(body.len());
2150    for byte in body.iter().copied() {
2151        if matches!(byte, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=') {
2152            filtered.push(byte);
2153        }
2154    }
2155
2156    base64::engine::general_purpose::STANDARD
2157        .decode(filtered)
2158        .ok()
2159}
2160
2161fn decode_quoted_printable_body(body: &[u8]) -> Option<Vec<u8>> {
2162    let mut out = Vec::with_capacity(body.len());
2163    let mut idx = 0usize;
2164
2165    while idx < body.len() {
2166        let line_start = idx;
2167        while idx < body.len() && body[idx] != b'\r' && body[idx] != b'\n' {
2168            idx += 1;
2169        }
2170
2171        let line = &body[line_start..idx];
2172        let mut line_end = line.len();
2173        while line_end > 0 && matches!(line[line_end - 1], b' ' | b'\t') {
2174            line_end -= 1;
2175        }
2176        let line = &line[..line_end];
2177
2178        let mut newline = &[][..];
2179        if idx < body.len() {
2180            if body[idx] == b'\r' {
2181                if idx + 1 < body.len() && body[idx + 1] == b'\n' {
2182                    newline = b"\r\n";
2183                    idx += 2;
2184                } else {
2185                    newline = b"\r";
2186                    idx += 1;
2187                }
2188            } else {
2189                newline = b"\n";
2190                idx += 1;
2191            }
2192        }
2193
2194        let soft_break = line.ends_with(b"=");
2195        let encoded = if soft_break {
2196            &line[..line.len().saturating_sub(1)]
2197        } else {
2198            line
2199        };
2200
2201        let mut line_idx = 0usize;
2202        while line_idx < encoded.len() {
2203            if encoded[line_idx] != b'=' {
2204                if !is_valid_quoted_printable_literal(encoded[line_idx]) {
2205                    return None;
2206                }
2207                out.push(encoded[line_idx]);
2208                line_idx += 1;
2209                continue;
2210            }
2211
2212            if line_idx + 2 >= encoded.len() {
2213                return None;
2214            }
2215
2216            let hi = hex_val(encoded[line_idx + 1])?;
2217            let lo = hex_val(encoded[line_idx + 2])?;
2218            out.push((hi << 4) | lo);
2219            line_idx += 3;
2220        }
2221
2222        if soft_break {
2223            if newline.is_empty() {
2224                return None;
2225            }
2226            continue;
2227        }
2228
2229        out.extend_from_slice(newline);
2230    }
2231
2232    Some(out)
2233}
2234
2235const fn is_valid_quoted_printable_literal(byte: u8) -> bool {
2236    matches!(byte, b'\t' | b' ' | 33..=60 | 62..=126)
2237}
2238
2239fn encode_quoted_printable_body(body: &[u8]) -> Vec<u8> {
2240    let mut out = Vec::with_capacity(body.len() + body.len() / 2);
2241    let mut idx = 0usize;
2242    let mut line_len = 0usize;
2243
2244    while idx < body.len() {
2245        let byte = body[idx];
2246
2247        if byte == b'\r' {
2248            if idx + 1 < body.len() && body[idx + 1] == b'\n' {
2249                out.extend_from_slice(b"\r\n");
2250                idx += 2;
2251                line_len = 0;
2252                continue;
2253            }
2254
2255            let token = quoted_printable_token(byte, false);
2256            if line_len + token.len() > 76 {
2257                out.extend_from_slice(b"=\r\n");
2258                line_len = 0;
2259            }
2260            out.extend_from_slice(token.as_bytes());
2261            line_len += token.len();
2262            idx += 1;
2263            continue;
2264        }
2265
2266        if byte == b'\n' {
2267            let token = quoted_printable_token(byte, false);
2268            if line_len + token.len() > 76 {
2269                out.extend_from_slice(b"=\r\n");
2270                line_len = 0;
2271            }
2272            out.extend_from_slice(token.as_bytes());
2273            line_len += token.len();
2274            idx += 1;
2275            continue;
2276        }
2277
2278        let next_is_newline =
2279            idx + 1 >= body.len() || body[idx + 1] == b'\r' || body[idx + 1] == b'\n';
2280
2281        let token = quoted_printable_token(byte, next_is_newline);
2282        if line_len + token.len() > 76 {
2283            out.extend_from_slice(b"=\r\n");
2284            line_len = 0;
2285        }
2286
2287        out.extend_from_slice(token.as_bytes());
2288        line_len += token.len();
2289        idx += 1;
2290    }
2291
2292    out
2293}
2294
2295fn quoted_printable_token(byte: u8, at_line_end: bool) -> String {
2296    if matches!(byte, 33..=60 | 62..=126) {
2297        return (byte as char).to_string();
2298    }
2299
2300    if (byte == b' ' || byte == b'\t') && !at_line_end {
2301        return (byte as char).to_string();
2302    }
2303
2304    format!("={byte:02X}")
2305}
2306
2307fn next_boundary(counter: &mut usize) -> String {
2308    let value = format!("=_email_message_boundary_{}", *counter);
2309    *counter += 1;
2310    value
2311}
2312
2313fn contains_boundary_delimiter_line(body: &[u8], boundary: &str) -> bool {
2314    let mut delimiter = Vec::with_capacity(boundary.len() + 2);
2315    delimiter.extend_from_slice(b"--");
2316    delimiter.extend_from_slice(boundary.as_bytes());
2317
2318    let mut closing = delimiter.clone();
2319    closing.extend_from_slice(b"--");
2320
2321    body.split(|byte| *byte == b'\n').any(|raw_line| {
2322        let line = raw_line.strip_suffix(b"\r").unwrap_or(raw_line);
2323        let line = trim_lwsp_end(line);
2324        line == delimiter.as_slice() || line == closing.as_slice()
2325    })
2326}
2327
2328fn multipart_parts_conflict_with_boundary(parts: &[RenderPart], boundary: &str) -> bool {
2329    parts.iter().any(|part| match part {
2330        RenderPart::Leaf { body, .. } => contains_boundary_delimiter_line(body, boundary),
2331        RenderPart::Multipart {
2332            content_type,
2333            boundary: nested_boundary,
2334            parts,
2335        } => {
2336            let header_boundary = extract_boundary_param(content_type);
2337            if nested_boundary.as_deref() == Some(boundary)
2338                || header_boundary.as_deref() == Some(boundary)
2339            {
2340                return true;
2341            }
2342
2343            multipart_parts_conflict_with_boundary(parts, boundary)
2344        }
2345    })
2346}
2347
2348fn media_type_of_render_part(part: &RenderPart) -> String {
2349    match part {
2350        RenderPart::Leaf { headers, .. } => headers
2351            .iter()
2352            .find(|(name, _)| name.eq_ignore_ascii_case("content-type"))
2353            .map_or_else(
2354                || String::from("application/octet-stream"),
2355                |(_, value)| {
2356                    value
2357                        .split(';')
2358                        .next()
2359                        .unwrap_or("application/octet-stream")
2360                        .trim()
2361                        .to_owned()
2362                },
2363            ),
2364        RenderPart::Multipart { content_type, .. } => content_type
2365            .split(';')
2366            .next()
2367            .unwrap_or("multipart/mixed")
2368            .trim()
2369            .to_owned(),
2370    }
2371}
2372
2373fn render_part(
2374    part: RenderPart,
2375    boundary_counter: &mut usize,
2376    soft_fold_at: Option<usize>,
2377    depth: usize,
2378) -> Result<RenderedPart, MessageRenderError> {
2379    if depth > MAX_MULTIPART_DEPTH {
2380        return Err(MessageRenderError::MimeNestingTooDeep);
2381    }
2382    match part {
2383        RenderPart::Leaf { headers, body } => Ok((headers, body)),
2384        RenderPart::Multipart {
2385            content_type,
2386            boundary,
2387            parts,
2388        } => {
2389            let media_type = content_type
2390                .split(';')
2391                .next()
2392                .unwrap_or_default()
2393                .trim()
2394                .to_ascii_lowercase();
2395            if !media_type.starts_with("multipart/") {
2396                return Err(MessageRenderError::InvalidMultipartContentType);
2397            }
2398
2399            if parts.is_empty() {
2400                return Err(MessageRenderError::EmptyMultipartParts);
2401            }
2402
2403            let mut content_type_value = content_type;
2404            let header_boundary = extract_boundary_param(&content_type_value);
2405            let has_header_boundary = header_boundary.is_some();
2406
2407            let boundary_value = if let Some(header_boundary_value) = header_boundary {
2408                validate_boundary(&header_boundary_value)?;
2409                if let Some(explicit_boundary) = boundary.as_ref() {
2410                    validate_boundary(explicit_boundary)?;
2411                    if header_boundary_value != explicit_boundary.as_str() {
2412                        return Err(MessageRenderError::MismatchedMimeBoundary);
2413                    }
2414                }
2415                header_boundary_value
2416            } else {
2417                match boundary {
2418                    Some(value) => {
2419                        validate_boundary(&value)?;
2420                        value
2421                    }
2422                    None => {
2423                        // Cap auto-generation attempts so an adversarial body whose
2424                        // bytes contain successive `--=_email_message_boundary_N` lines
2425                        // cannot spin the renderer indefinitely.
2426                        const MAX_AUTO_BOUNDARY_ATTEMPTS: usize = 128;
2427                        let mut chosen = None;
2428                        for _ in 0..MAX_AUTO_BOUNDARY_ATTEMPTS {
2429                            let candidate = next_boundary(boundary_counter);
2430                            validate_boundary(&candidate)?;
2431                            if !multipart_parts_conflict_with_boundary(&parts, &candidate) {
2432                                chosen = Some(candidate);
2433                                break;
2434                            }
2435                        }
2436                        match chosen {
2437                            Some(value) => value,
2438                            None => return Err(MessageRenderError::InvalidMimeBoundary),
2439                        }
2440                    }
2441                }
2442            };
2443
2444            if multipart_parts_conflict_with_boundary(&parts, &boundary_value) {
2445                return Err(MessageRenderError::InvalidMimeBoundary);
2446            }
2447
2448            if !has_header_boundary {
2449                content_type_value.push_str("; boundary=\"");
2450                content_type_value.push_str(&boundary_value);
2451                content_type_value.push('"');
2452            }
2453            let headers = vec![(String::from("Content-Type"), content_type_value)];
2454
2455            let mut body = Vec::new();
2456
2457            for part in parts {
2458                body.extend_from_slice(b"--");
2459                body.extend_from_slice(boundary_value.as_bytes());
2460                body.extend_from_slice(b"\r\n");
2461                let (part_headers, part_body) =
2462                    render_part(part, boundary_counter, soft_fold_at, depth + 1)?;
2463                // The pre-render `multipart_parts_conflict_with_boundary` walk
2464                // checks `RenderPart::Multipart` nodes against `boundary_value`
2465                // by inspecting their declared `content_type` and `boundary`
2466                // fields, but it cannot see the bytes a nested multipart will
2467                // produce (those are only known after `render_part` returns).
2468                // Re-scan the rendered child bytes here so a nested multipart
2469                // whose own auto-generated or leaf body contains a line
2470                // matching the outer boundary cannot slip through.
2471                if contains_boundary_delimiter_line(&part_body, &boundary_value) {
2472                    return Err(MessageRenderError::InvalidMimeBoundary);
2473                }
2474                for (name, value) in part_headers {
2475                    push_header_line(&mut body, &name, &value, soft_fold_at)?;
2476                }
2477                body.extend_from_slice(b"\r\n");
2478                body.extend_from_slice(&part_body);
2479                body.extend_from_slice(b"\r\n");
2480            }
2481
2482            body.extend_from_slice(b"--");
2483            body.extend_from_slice(boundary_value.as_bytes());
2484            body.extend_from_slice(b"--");
2485            body.extend_from_slice(b"\r\n");
2486
2487            Ok((headers, body))
2488        }
2489    }
2490}
2491
2492fn extract_boundary_param(value: &str) -> Option<String> {
2493    let mut params = split_unquoted_semicolons(value);
2494    let _ = params.next();
2495
2496    params.find_map(|param| {
2497        let (name, _) = param.trim().split_once('=')?;
2498        if !name.trim().eq_ignore_ascii_case("boundary") {
2499            return None;
2500        }
2501
2502        let (_, value) = param.trim().split_once('=')?;
2503        let boundary = unquote_parameter_value(value.trim());
2504        if boundary.is_empty() {
2505            return None;
2506        }
2507
2508        Some(boundary)
2509    })
2510}
2511
2512#[cfg(test)]
2513mod tests {
2514    use email_message::{Body, Message, MessageId};
2515    use time::OffsetDateTime;
2516    use time::format_description::well_known::Rfc2822;
2517
2518    use super::{parse_rfc822, render_rfc822};
2519
2520    #[test]
2521    fn parse_rfc822_extracts_core_headers_and_body() {
2522        let input = concat!(
2523            "From: Mary Smith <mary@x.test>\r\n",
2524            "To: jdoe@one.test\r\n",
2525            "Subject: Test\r\n",
2526            "Date: Fri, 06 Mar 2026 12:00:00 +0000\r\n",
2527            "Message-ID: <test@example.com>\r\n",
2528            "X-Custom: demo\r\n",
2529            "\r\n",
2530            "hello"
2531        );
2532
2533        let message = parse_rfc822(input.as_bytes()).expect("message should parse");
2534        assert_eq!(message.subject(), Some("Test"));
2535        assert_eq!(message.to().len(), 1);
2536        assert_eq!(
2537            message.date(),
2538            Some(
2539                &OffsetDateTime::parse("Fri, 06 Mar 2026 12:00:00 +0000", &Rfc2822)
2540                    .expect("date should parse")
2541            )
2542        );
2543        assert_eq!(
2544            message.message_id(),
2545            Some(
2546                &"<test@example.com>"
2547                    .parse::<MessageId>()
2548                    .expect("message id should parse")
2549            )
2550        );
2551        assert_eq!(message.body(), &Body::Text("hello".to_owned()));
2552    }
2553
2554    #[test]
2555    fn render_rfc822_writes_expected_lines() {
2556        let message = Message::builder(Body::Text("hello".to_owned()))
2557            .from_mailbox("Mary Smith <mary@x.test>".parse().expect("valid mailbox"))
2558            .to(vec![email_message::Address::Mailbox(
2559                "jdoe@one.test".parse().expect("valid mailbox"),
2560            )])
2561            .subject("Test")
2562            .build()
2563            .expect("message should validate");
2564
2565        let rendered = render_rfc822(&message).expect("render should succeed");
2566        let text = String::from_utf8(rendered).expect("rendered text should be utf8");
2567
2568        assert!(text.contains("From: \"Mary Smith\" <mary@x.test>\r\n"));
2569        assert!(text.contains("To: jdoe@one.test\r\n"));
2570        assert!(text.contains("Subject: Test\r\n"));
2571        assert!(text.ends_with("\r\n\r\nhello"));
2572    }
2573}
email_message_wire/rfc822.rs

email_message_wire/
rfc822.rs