Skip to main content

mime_tree/
header_typed.rs

1//! Typed header value parsing for RFC 8621 JMAP `As*` header forms.
2//!
3//! RFC 8621 §4.1.2 defines several parsed-form selectors that a JMAP server
4//! may apply to a single header field's raw bytes:
5//!
6//! | RFC 8621 form        | Section    | mime-tree result variant                |
7//! |----------------------|------------|------------------------------------------|
8//! | `asRaw`              | §4.1.2.1   | [`HeaderValueTyped::Raw`]                |
9//! | `asText`             | §4.1.2.2   | [`HeaderValueTyped::Text`]               |
10//! | `asAddresses`        | §4.1.2.3   | [`HeaderValueTyped::Addresses`]          |
11//! | `asGroupedAddresses` | §4.1.2.4   | [`HeaderValueTyped::GroupedAddresses`]   |
12//! | `asMessageIds`       | §4.1.2.5   | [`HeaderValueTyped::MessageIds`]         |
13//! | `asDate`             | §4.1.2.6   | [`HeaderValueTyped::DateTime`]           |
14//! | `asURLs`             | §4.1.2.7   | [`HeaderValueTyped::URLs`]               |
15//!
16//! The entry point is [`parse_header_typed`]. It takes the [`HeaderForm`]
17//! selector and the raw bytes of the header field value (the portion to the
18//! right of the `:` in the header line, including any folded continuation
19//! lines but excluding the header name and the trailing CRLF).
20//!
21//! Parsing is best-effort. On failure the function returns the appropriate
22//! empty value (an empty `Vec`, an empty `Raw` string, or `DateTime(None)`
23//! for an unparseable date) — it never panics and never returns an error.
24//!
25//! These types are independent of the [`crate::ParsedHeader`] surface,
26//! which exposes both a decoded `value` string and the original wire
27//! bytes in `raw_value`. To layer a typed view on top of an existing
28//! `ParsedHeader`, use [`parse_header_typed_from`] or feed its
29//! `raw_value` bytes to [`parse_header_typed`]:
30//!
31//! ```ignore
32//! let msg = mime_tree::parse(raw)?;
33//! if let Some(h) = msg.headers.iter().find(|h| h.name.eq_ignore_ascii_case("From")) {
34//!     // Option A: convenience wrapper
35//!     let typed = mime_tree::parse_header_typed_from(h, mime_tree::HeaderForm::Addresses);
36//!     // Option B: direct call with raw bytes (equivalent)
37//!     let addrs = mime_tree::parse_addresses(&h.raw_value);
38//! }
39//! ```
40//!
41//! Always use `raw_value` — not `value.as_bytes()` — when calling
42//! [`parse_header_typed`], because `value` undergoes lossy UTF-8
43//! conversion for structured headers and non-UTF-8 bytes would be
44//! silently corrupted.
45
46use std::borrow::Cow;
47use std::fmt;
48
49use mail_parser::{parsers::MessageStream, Address, HeaderValue};
50use serde::{Deserialize, Serialize};
51use unicode_normalization::UnicodeNormalization;
52
53use crate::ParsedHeader;
54
55/// A single RFC 5322 `mailbox` parsed from an `address-list`.
56///
57/// Mirrors the JMAP `EmailAddress` object defined in RFC 8621 §4.1.2.3.
58///
59/// `name` is the optional display name. `address` is the `addr-spec`. Both
60/// are populated best-effort; either may be `None` if the original header
61/// is malformed.
62///
63/// # Equality semantics
64///
65/// The derived `PartialEq`/`Eq`/`Hash` is byte-exact on both fields. In
66/// particular, `address` comparison is case-sensitive across the entire
67/// addr-spec, even though RFC 5321 §2.4 defines the *domain* part of an
68/// addr-spec as case-insensitive — so `alice@example.com` and
69/// `alice@EXAMPLE.COM` compare as not equal and hash differently. Callers
70/// that need RFC-5321-conformant equality (HashSet dedup of recipient
71/// lists, etc.) MUST canonicalise the domain part themselves before
72/// comparing or hashing.
73#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
74#[non_exhaustive]
75pub struct EmailAddress {
76    /// Display name from the `mailbox`, RFC 2047 encoded-words already
77    /// decoded.
78    ///
79    /// Parser-produced values are RFC 8621 §4.1.2.3 normalised:
80    /// surrounding ASCII whitespace is trimmed from the decoded
81    /// display name. A name that is empty after trimming is mapped to
82    /// `None` (a lone empty quoted-string never surfaces as
83    /// `Some(String::new())`).
84    pub name: Option<String>,
85    /// `addr-spec` of the `mailbox`.
86    pub address: Option<String>,
87}
88
89impl EmailAddress {
90    /// Construct an `EmailAddress` from optional display name and
91    /// addr-spec.
92    ///
93    /// `EmailAddress` is `#[non_exhaustive]` so external callers cannot
94    /// use struct expression syntax. Use this constructor — or
95    /// `Default::default()` followed by field assignment — instead.
96    #[must_use]
97    pub fn new(name: Option<String>, address: Option<String>) -> Self {
98        Self { name, address }
99    }
100
101    /// Whether this `EmailAddress` carries an `addr-spec`.
102    ///
103    /// `parse_header_typed` produces `EmailAddress` values with
104    /// `address == None` for malformed mailboxes (most commonly,
105    /// display-name-only mailboxes from non-spec-conformant clients —
106    /// e.g. a draft saved with just a typed-but-incomplete `To:`).
107    /// Such entries are unusable for sending mail, address comparison,
108    /// or addr-spec-keyed lookup.
109    ///
110    /// Use this helper to filter parsed address lists down to the
111    /// usable subset:
112    ///
113    /// ```
114    /// use mime_tree::EmailAddress;
115    ///
116    /// let parsed = vec![
117    ///     EmailAddress::new(
118    ///         Some("Alice".to_owned()),
119    ///         Some("alice@example.com".to_owned()),
120    ///     ),
121    ///     EmailAddress::new(Some("Display-Name Only".to_owned()), None),
122    /// ];
123    /// let usable: Vec<EmailAddress> = parsed
124    ///     .into_iter()
125    ///     .filter(EmailAddress::is_addressable)
126    ///     .collect();
127    /// assert_eq!(usable.len(), 1);
128    /// assert_eq!(usable[0].address.as_deref(), Some("alice@example.com"));
129    /// ```
130    #[must_use]
131    pub fn is_addressable(&self) -> bool {
132        self.address.is_some()
133    }
134}
135
136impl fmt::Display for EmailAddress {
137    /// Render in RFC 5322 §3.4 mailbox-ish form.
138    ///
139    /// * Both `name` and `address` present: `Display Name <addr@host>`.
140    /// * `address` only: `addr@host` (bare addr-spec, no angle brackets).
141    /// * `name` only: `Display Name` (degenerate; not a valid RFC 5322
142    ///   mailbox, but the best a Display impl can do).
143    /// * Neither present: the empty string.
144    ///
145    /// Names are emitted verbatim. This Display impl prioritises human
146    /// readability over RFC 5322 round-trippability — names containing
147    /// `<`, `>`, `,`, or other RFC 5322 specials are not quoted. Callers
148    /// that need byte-stable round-trip into a header field MUST roll
149    /// their own serializer with proper quoting.
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        match (self.name.as_deref(), self.address.as_deref()) {
152            (Some(n), Some(a)) => write!(f, "{n} <{a}>"),
153            (None, Some(a)) => f.write_str(a),
154            (Some(n), None) => f.write_str(n),
155            (None, None) => Ok(()),
156        }
157    }
158}
159
160/// A group of `EmailAddress` values, optionally named.
161///
162/// Mirrors the JMAP `EmailAddressGroup` object defined in RFC 8621 §4.1.2.4.
163///
164/// Per RFC 8621 §4.1.2.4, consecutive mailboxes that are not part of a
165/// declared RFC 5322 `group` are still collected under an `AddressGroup`
166/// whose `name` is `None`, "to provide a uniform type".
167#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
168#[non_exhaustive]
169pub struct AddressGroup {
170    /// Display name of the group, or `None` for ungrouped mailboxes.
171    ///
172    /// Parser-produced values follow the same normalisation as
173    /// [`EmailAddress::name`]: surrounding ASCII whitespace trimmed; an
174    /// empty result is mapped to `None`, not `Some(String::new())`.
175    pub name: Option<String>,
176    /// Mailboxes belonging to this group.
177    pub addresses: Vec<EmailAddress>,
178}
179
180impl AddressGroup {
181    /// Construct an `AddressGroup` from an optional group name and a
182    /// vector of mailboxes.
183    ///
184    /// `AddressGroup` is `#[non_exhaustive]` so external callers cannot
185    /// use struct expression syntax. Use this constructor — or
186    /// `Default::default()` followed by field assignment — instead.
187    #[must_use]
188    pub fn new(name: Option<String>, addresses: Vec<EmailAddress>) -> Self {
189        Self { name, addresses }
190    }
191}
192
193impl fmt::Display for AddressGroup {
194    /// Render in RFC 5322 §3.4 group form: `name: mb1, mb2;`.
195    ///
196    /// * Named group: `Friends: alice@example.com, bob@example.com;`.
197    /// * Anonymous group (`name == None`): just the comma-joined
198    ///   mailbox list, no `:` and no terminating `;`.
199    /// * Empty group: just `name:;` (or empty string for an anonymous
200    ///   empty group).
201    ///
202    /// Same caveat as `EmailAddress` Display: prioritises human
203    /// readability; not guaranteed RFC 5322 round-trippable.
204    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
205        if let Some(n) = &self.name {
206            write!(f, "{n}:")?;
207            if !self.addresses.is_empty() {
208                f.write_str(" ")?;
209            }
210        }
211        for (i, addr) in self.addresses.iter().enumerate() {
212            if i > 0 {
213                f.write_str(", ")?;
214            }
215            write!(f, "{addr}")?;
216        }
217        if self.name.is_some() {
218            f.write_str(";")?;
219        }
220        Ok(())
221    }
222}
223
224/// Sign of a `date-time` timezone offset from GMT (RFC 5322 §3.3).
225///
226/// East of GMT corresponds to positive `+HHMM` offsets (e.g. `+0100`).
227/// West of GMT corresponds to negative `-HHMM` offsets (e.g. `-0600`).
228#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
229#[non_exhaustive]
230pub enum TzSign {
231    /// Offset is east of GMT (`+HHMM`).
232    East,
233    /// Offset is west of GMT (`-HHMM`).
234    West,
235}
236
237/// An RFC 5322 §3.3 `date-time` value parsed from a header.
238///
239/// Public fields permit serde transparency and direct field access from
240/// JMAP-shaped code. The fields mirror `mail_parser::DateTime` 1-to-1
241/// **except** for `tz_sign`, which is an explicit enum rather than a
242/// bool. This is a deliberate API choice — see `TzSign` — and means
243/// `HeaderDateTime` and `mail_parser::DateTime` are not bit-identical
244/// even though they round-trip via [`HeaderDateTime::from_mail_parser`]
245/// / [`HeaderDateTime::to_mail_parser`].
246///
247/// # Wire-format dependency on mail-parser
248///
249/// [`Self::to_rfc3339`] and [`Self::to_timestamp`] delegate to
250/// `mail_parser::DateTime`'s formatters. The exact strings produced by
251/// `to_rfc3339`, and the exact value produced by `to_timestamp` for
252/// edge-case input, are therefore defined by the pinned mail-parser
253/// version. mime-tree's Cargo.toml uses a caret range (`mail-parser =
254/// "0.11"`) so 0.11.x patch updates can in principle change the output
255/// without a mime-tree version bump. Downstream callers that persist
256/// these strings (database keys, JMAP wire responses, indexed columns)
257/// SHOULD pin mail-parser tightly if they require byte-stable output
258/// across mime-tree patch bumps.
259///
260/// # Field invariants
261///
262/// `parse_header_typed` only constructs `HeaderDateTime` values that
263/// passed mail-parser's validation: `year >= 1900`, `month ∈ 1..=12`,
264/// `day ∈ 1..=31` (calendar-validated), `hour ∈ 0..=23`,
265/// `minute ∈ 0..=59`, `second ∈ 0..=60` (RFC 5322 §4.3 leap second),
266/// `tz_hour ∈ 0..=23`, `tz_minute ∈ 0..=59`.
267///
268/// Direct construction with public fields can produce out-of-range
269/// values. The behaviour of `to_rfc3339` and `to_timestamp` on such
270/// values is unspecified — output may be syntactically malformed
271/// RFC 3339 or a meaningless `i64`. Callers that build `HeaderDateTime`
272/// from external sources should validate ranges themselves.
273///
274/// # Equality semantics
275///
276/// The derived `PartialEq`/`Eq`/`Hash` is **field-wise**, not
277/// **instant-wise**. Two `HeaderDateTime` values representing the same
278/// moment in time at different offsets compare as not-equal and hash
279/// differently. For example:
280///
281/// ```text
282///   2024-01-01T12:00:00+00:00   (12:00 UTC)
283///   2024-01-01T13:00:00+01:00   (12:00 UTC, expressed +01:00)
284/// ```
285///
286/// are the same instant but compare `!=`. Callers needing
287/// instant-equality (deduping timestamps across clients in different
288/// time zones, time-series bucketing) MUST compare
289/// [`Self::to_timestamp`] values rather than relying on the derived
290/// `PartialEq`.
291#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
292#[non_exhaustive]
293pub struct HeaderDateTime {
294    /// Four-digit calendar year. Parser-produced values: `1900..=3000`.
295    pub year: u16,
296    /// Month of the year, `1..=12` for parser-produced values.
297    pub month: u8,
298    /// Day of the month, `1..=31` (calendar-validated against
299    /// `year`/`month`) for parser-produced values.
300    pub day: u8,
301    /// Hour of the day, `0..=23` for parser-produced values.
302    pub hour: u8,
303    /// Minute, `0..=59` for parser-produced values.
304    pub minute: u8,
305    /// Second, `0..=60` for parser-produced values (RFC 5322 §4.3
306    /// allows 60 to represent a leap second).
307    pub second: u8,
308    /// Sign of the timezone offset from GMT.
309    pub tz_sign: TzSign,
310    /// Hours component of the timezone offset, `0..=23` for
311    /// parser-produced values.
312    pub tz_hour: u8,
313    /// Minutes component of the timezone offset, `0..=59` for
314    /// parser-produced values.
315    pub tz_minute: u8,
316}
317
318impl HeaderDateTime {
319    /// Render as an RFC 3339 / ISO 8601 §5.6 date-time string.
320    ///
321    /// # Output format
322    ///
323    /// * Non-UTC offset (any of `tz_hour`, `tz_minute` non-zero):
324    ///   `YYYY-MM-DDTHH:MM:SS±HH:MM`. Each component is zero-padded;
325    ///   `±` is `-` for west-of-GMT, `+` otherwise.
326    /// * UTC (`tz_hour == 0 && tz_minute == 0`):
327    ///   `YYYY-MM-DDTHH:MM:SSZ`. Zulu form, not `+00:00`.
328    ///
329    /// No subsecond fraction is emitted (the seconds-fraction extension
330    /// of RFC 3339 is not represented in `HeaderDateTime`).
331    ///
332    /// # Examples
333    ///
334    /// * `1997-11-21T09:55:06-06:00` for `21 Nov 1997 09:55:06 -0600`.
335    /// * `2024-01-15T12:34:56Z` for `15 Jan 2024 12:34:56 +0000`.
336    ///
337    /// # Behaviour on out-of-range input
338    ///
339    /// The exact string for out-of-range field values
340    /// (e.g. `month = 13`) is unspecified — it depends on the pinned
341    /// mail-parser version and may not be syntactically valid RFC 3339.
342    /// See the type-level docs.
343    #[must_use]
344    pub fn to_rfc3339(&self) -> String {
345        self.to_mail_parser().to_rfc3339()
346    }
347
348    /// Render as a Unix timestamp (seconds since 1970-01-01T00:00:00Z).
349    ///
350    /// Pre-epoch dates return negative values. The result is computed
351    /// linearly from the field values without validation; on
352    /// out-of-range or otherwise invalid input (e.g. `month = 0`,
353    /// `day = 99`, year overflowing the calendar arithmetic) the
354    /// returned `i64` is unspecified and SHOULD NOT be relied upon.
355    /// See the type-level docs.
356    #[must_use]
357    pub fn to_timestamp(&self) -> i64 {
358        self.to_mail_parser().to_timestamp()
359    }
360
361    fn to_mail_parser(&self) -> mail_parser::DateTime {
362        mail_parser::DateTime {
363            year: self.year,
364            month: self.month,
365            day: self.day,
366            hour: self.hour,
367            minute: self.minute,
368            second: self.second,
369            tz_before_gmt: matches!(self.tz_sign, TzSign::West),
370            tz_hour: self.tz_hour,
371            tz_minute: self.tz_minute,
372        }
373    }
374
375    fn from_mail_parser(dt: mail_parser::DateTime) -> Self {
376        Self {
377            year: dt.year,
378            month: dt.month,
379            day: dt.day,
380            hour: dt.hour,
381            minute: dt.minute,
382            second: dt.second,
383            tz_sign: if dt.tz_before_gmt {
384                TzSign::West
385            } else {
386                TzSign::East
387            },
388            tz_hour: dt.tz_hour,
389            tz_minute: dt.tz_minute,
390        }
391    }
392}
393
394impl fmt::Display for HeaderDateTime {
395    /// Render as an RFC 3339 / ISO 8601 §5.6 date-time string by
396    /// delegating to [`HeaderDateTime::to_rfc3339`]. See that method
397    /// for the exact output format and behaviour on out-of-range input.
398    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
399        f.write_str(&self.to_rfc3339())
400    }
401}
402
403/// Selector for the RFC 8621 parsed-form of a header value.
404///
405/// This is the form-token from a JMAP `header:<name>:as<form>` property
406/// selector, normalised to an enum.
407///
408/// [`Display`](fmt::Display) emits the canonical JMAP form-token string
409/// (`asRaw`, `asAddresses`, …, `asURLs`).
410/// [`FromStr`](std::str::FromStr) accepts exactly that set of strings;
411/// any other input yields [`UnknownHeaderForm`].
412#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
413#[non_exhaustive]
414pub enum HeaderForm {
415    /// Trim surrounding whitespace; return the bytes as a UTF-8 string.
416    /// (§4.1.2.1)
417    ///
418    /// Non-UTF-8 bytes — legal in raw RFC 5322 but not in JMAP wire
419    /// format — are replaced with U+FFFD REPLACEMENT CHARACTER
420    /// (lossy conversion). This preserves the position and rough shape
421    /// of malformed input so callers can flag a mojibake header
422    /// without losing the rest of the field body.
423    Raw,
424    /// RFC 8621 §4.1.2.2 Text form. Unfold whitespace, strip the
425    /// trailing CRLF and leading SP, decode all syntactically-correct
426    /// RFC 2047 encoded-words, then Unicode-normalise the result to
427    /// NFC.
428    ///
429    /// This is the form most commonly used by JMAP clients fetching
430    /// human-readable header fields like Subject, Comments, Keywords,
431    /// and List-Id.
432    Text,
433    /// Parse as an RFC 5322 `address-list`. Group structure is discarded;
434    /// only the flat list of mailboxes is returned. (§4.1.2.3)
435    Addresses,
436    /// Parse as an RFC 5322 `address-list`, preserving group structure.
437    /// (§4.1.2.4)
438    GroupedAddresses,
439    /// Parse as a list of RFC 5322 `msg-id` values. Surrounding angle
440    /// brackets and CFWS are stripped. (§4.1.2.5)
441    MessageIds,
442    /// Parse as an RFC 5322 §3.3 `date-time`. (§4.1.2.6)
443    Date,
444    /// Parse as an RFC 2369 list of URLs. Surrounding angle brackets and
445    /// comments are stripped. (§4.1.2.7)
446    URLs,
447}
448
449impl HeaderForm {
450    /// Return the canonical RFC 8621 §4.1.2 form-token string for this
451    /// variant.
452    ///
453    /// The token starts with `as` (the convention used in JMAP property
454    /// selectors such as `header:Subject:asText`). Inverse of
455    /// [`HeaderForm`]'s [`FromStr`](std::str::FromStr) impl.
456    #[must_use]
457    pub fn as_jmap_token(&self) -> &'static str {
458        match self {
459            HeaderForm::Raw => "asRaw",
460            HeaderForm::Text => "asText",
461            HeaderForm::Addresses => "asAddresses",
462            HeaderForm::GroupedAddresses => "asGroupedAddresses",
463            HeaderForm::MessageIds => "asMessageIds",
464            HeaderForm::Date => "asDate",
465            HeaderForm::URLs => "asURLs",
466        }
467    }
468}
469
470impl fmt::Display for HeaderForm {
471    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
472        f.write_str(self.as_jmap_token())
473    }
474}
475
476/// Error returned by [`HeaderForm`]'s [`FromStr`](std::str::FromStr) impl
477/// when the input is not a recognised JMAP form-token.
478///
479/// The wrapped string is the input as given (case-sensitive); JMAP
480/// form-tokens are case-sensitive per RFC 8621 §4.1.2.
481#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
482pub struct UnknownHeaderForm(pub String);
483
484impl fmt::Display for UnknownHeaderForm {
485    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
486        write!(f, "unknown JMAP header form token: {:?}", self.0)
487    }
488}
489
490impl std::error::Error for UnknownHeaderForm {}
491
492impl std::str::FromStr for HeaderForm {
493    type Err = UnknownHeaderForm;
494
495    fn from_str(s: &str) -> Result<Self, Self::Err> {
496        match s {
497            "asRaw" => Ok(HeaderForm::Raw),
498            "asText" => Ok(HeaderForm::Text),
499            "asAddresses" => Ok(HeaderForm::Addresses),
500            "asGroupedAddresses" => Ok(HeaderForm::GroupedAddresses),
501            "asMessageIds" => Ok(HeaderForm::MessageIds),
502            "asDate" => Ok(HeaderForm::Date),
503            "asURLs" => Ok(HeaderForm::URLs),
504            _ => Err(UnknownHeaderForm(s.to_owned())),
505        }
506    }
507}
508
509/// A header field value rendered in one of the RFC 8621 parsed forms.
510///
511/// # Serde wire format
512///
513/// `HeaderValueTyped` and [`HeaderForm`] use serde's *default*
514/// representation: externally-tagged for the enum, capitalised Rust
515/// variant names. Examples:
516///
517/// ```json
518/// {"Addresses": [{"name": "Alice", "address": "alice@example.com"}]}
519/// {"DateTime": null}
520/// {"Raw": "Subject line"}
521/// "Addresses"               // serialized HeaderForm
522/// ```
523///
524/// This is a **deliberate** choice for in-crate serialization (between
525/// services, into databases). It is **not** the RFC 8621 wire format
526/// used over JMAP HTTP/JSON. RFC 8621 §4.1.2 uses property-selector
527/// strings such as `header:Subject:asAddresses` rather than serializing
528/// the form name as an enum tag. Callers exposing parsed headers to a
529/// JMAP client SHOULD map between this representation and the JMAP
530/// wire format at the API boundary; relying on the in-crate serde
531/// shape as the wire format will produce a non-conformant JMAP
532/// response.
533///
534/// Pre-1.0, this representation is subject to change. From 1.0 onward
535/// the in-crate serde format will be a stability surface and will be
536/// changed only with a major version bump.
537#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
538#[non_exhaustive]
539pub enum HeaderValueTyped {
540    /// Result of [`HeaderForm::Raw`]: the trimmed UTF-8 string.
541    Raw(String),
542    /// Result of [`HeaderForm::Text`]: whitespace-unfolded, RFC 2047
543    /// decoded, NFC-normalised UTF-8 string.
544    Text(String),
545    /// Result of [`HeaderForm::Addresses`].
546    Addresses(Vec<EmailAddress>),
547    /// Result of [`HeaderForm::GroupedAddresses`].
548    GroupedAddresses(Vec<AddressGroup>),
549    /// Result of [`HeaderForm::MessageIds`]: bare msg-id strings with no
550    /// angle brackets.
551    MessageIds(Vec<String>),
552    /// Result of [`HeaderForm::Date`], or `None` if the header value did
553    /// not parse as a `date-time`.
554    DateTime(Option<HeaderDateTime>),
555    /// Result of [`HeaderForm::URLs`]: bare URL strings with no angle
556    /// brackets.
557    URLs(Vec<String>),
558}
559
560/// Parse a header field value into the requested RFC 8621 parsed form.
561///
562/// `raw_value` is the bytes of the header field value — the portion to
563/// the right of the `:` in the header line, including any folded
564/// continuation lines.
565///
566/// # Trailing line ending
567///
568/// A trailing CRLF (or bare LF) is **permitted but not required**:
569///
570/// * Input ending in `\r\n` is used as-is.
571/// * Input ending in `\n` is converted to `\r\n` internally.
572/// * Input with no trailing line ending has `\r\n` appended internally.
573///
574/// All three shapes produce the same result. Callers may pass the field
575/// body with or without the line ending — pick whichever is easiest to
576/// extract from the source.
577///
578/// # Best-effort parsing
579///
580/// Malformed input yields the empty result for the requested form
581/// (empty `Vec`, empty string, or `DateTime(None)`). The function never
582/// panics and never returns an error.
583///
584/// # Empty-result ambiguity
585///
586/// The empty result is the **same value** regardless of cause:
587///
588/// | Form                | Empty value             | Triggered by                                       |
589/// |---------------------|-------------------------|----------------------------------------------------|
590/// | `Raw`               | `Raw("")`               | empty, all-whitespace, or all-malformed-UTF-8 input |
591/// | `Addresses`         | `Addresses(vec![])`     | empty, malformed, or zero-mailbox input            |
592/// | `GroupedAddresses`  | `GroupedAddresses(vec![])` | same as above                                   |
593/// | `MessageIds`        | `MessageIds(vec![])`    | empty, no `<...>` brackets, all garbage            |
594/// | `Date`              | `DateTime(None)`        | empty, malformed, or all-zero day/month            |
595/// | `URLs`              | `URLs(vec![])`          | empty, no `<...>` brackets, all garbage            |
596///
597/// Callers cannot distinguish "header was present but empty" from
598/// "header was present but malformed" from "input was non-UTF-8 noise"
599/// using this API. Out-of-band signalling (e.g. recording a warning
600/// alongside the parse result) is the caller's responsibility. Future
601/// minor releases may add a parallel `parse_header_typed_strict`
602/// returning `Result` or a tuple `(HeaderValueTyped, Warnings)` —
603/// neither is exposed today.
604///
605/// # Examples
606///
607/// ## Addresses (RFC 8621 §4.1.2.3)
608///
609/// ```
610/// use mime_tree::{parse_header_typed, EmailAddress, HeaderForm, HeaderValueTyped};
611///
612/// // RFC 8621 §4.1.2.3 example (the "James Smythe" address-list, simplified).
613/// let raw = b" \"James Smythe\" <james@example.com>";
614/// let parsed = parse_header_typed(HeaderForm::Addresses, raw);
615/// assert_eq!(
616///     parsed,
617///     HeaderValueTyped::Addresses(vec![EmailAddress::new(
618///         Some("James Smythe".to_owned()),
619///         Some("james@example.com".to_owned()),
620///     )]),
621/// );
622/// ```
623///
624/// ## GroupedAddresses (RFC 8621 §4.1.2.4)
625///
626/// ```
627/// use mime_tree::{parse_header_typed, AddressGroup, EmailAddress, HeaderForm, HeaderValueTyped};
628///
629/// let raw = b"Friends: alice@example.com, bob@example.com;";
630/// let parsed = parse_header_typed(HeaderForm::GroupedAddresses, raw);
631/// assert_eq!(
632///     parsed,
633///     HeaderValueTyped::GroupedAddresses(vec![AddressGroup::new(
634///         Some("Friends".to_owned()),
635///         vec![
636///             EmailAddress::new(None, Some("alice@example.com".to_owned())),
637///             EmailAddress::new(None, Some("bob@example.com".to_owned())),
638///         ],
639///     )]),
640/// );
641/// ```
642///
643/// ## MessageIds (RFC 8621 §4.1.2.5)
644///
645/// ```
646/// use mime_tree::{parse_header_typed, HeaderForm, HeaderValueTyped};
647///
648/// let raw = b"<abc@example.com> <def@example.com>";
649/// let parsed = parse_header_typed(HeaderForm::MessageIds, raw);
650/// assert_eq!(
651///     parsed,
652///     HeaderValueTyped::MessageIds(vec![
653///         "abc@example.com".to_owned(),
654///         "def@example.com".to_owned(),
655///     ]),
656/// );
657/// ```
658///
659/// ## Date (RFC 5322 §3.3 / RFC 3339)
660///
661/// ```
662/// use mime_tree::{parse_header_typed, HeaderForm, HeaderValueTyped};
663///
664/// let raw = b" Fri, 21 Nov 1997 09:55:06 -0600";
665/// let parsed = parse_header_typed(HeaderForm::Date, raw);
666/// if let HeaderValueTyped::DateTime(Some(dt)) = parsed {
667///     assert_eq!(dt.to_rfc3339(), "1997-11-21T09:55:06-06:00");
668/// } else {
669///     panic!("expected DateTime");
670/// }
671/// ```
672///
673/// ## URLs (RFC 8621 §4.1.2.7 / RFC 2369)
674///
675/// ```
676/// use mime_tree::{parse_header_typed, HeaderForm, HeaderValueTyped};
677///
678/// // RFC 2369 List-Help with comment after the URL.
679/// let raw = b" <mailto:list@host.com?subject=help> (List Instructions)";
680/// let parsed = parse_header_typed(HeaderForm::URLs, raw);
681/// assert_eq!(
682///     parsed,
683///     HeaderValueTyped::URLs(vec!["mailto:list@host.com?subject=help".to_owned()]),
684/// );
685/// ```
686///
687/// ## Raw (RFC 8621 §4.1.2.1)
688///
689/// ```
690/// use mime_tree::{parse_header_typed, HeaderForm, HeaderValueTyped};
691///
692/// // Surrounding whitespace stripped; no other transformation.
693/// // Encoded-words survive verbatim.
694/// let raw = b"  Subject line with =?UTF-8?Q?encoded?= words  ";
695/// let parsed = parse_header_typed(HeaderForm::Raw, raw);
696/// assert_eq!(
697///     parsed,
698///     HeaderValueTyped::Raw("Subject line with =?UTF-8?Q?encoded?= words".to_owned()),
699/// );
700/// ```
701#[must_use]
702pub fn parse_header_typed(form: HeaderForm, raw_value: &[u8]) -> HeaderValueTyped {
703    // Why the crlf_terminated() round-trip below: mail-parser's typed
704    // parsing APIs (parse_address, parse_date, etc.) only accept raw
705    // bytes via MessageStream — there is no "parse from &str" path.
706    // The CRLF re-termination is also required because MessageStream
707    // expects RFC 5322 wire-format input. Do not remove either step.
708    match form {
709        HeaderForm::Raw => {
710            // RFC 8621 §4.1.2.1: the value is the header field value
711            // with surrounding white space removed. Non-UTF-8 bytes are
712            // replaced with U+FFFD via `from_utf8_lossy` so
713            // malformed-but-non-empty input does not collapse into an
714            // indistinguishable empty string. This matches mail-parser's
715            // own handling of non-UTF-8 header bytes (`HeaderValue::Text`
716            // is `Cow<str>` populated via `from_utf8_lossy`).
717            let s = String::from_utf8_lossy(raw_value);
718            HeaderValueTyped::Raw(s.trim().to_owned())
719        }
720        HeaderForm::Text => {
721            // RFC 8621 §4.1.2.2: unfold whitespace, strip trailing CRLF
722            // and leading SP, decode RFC 2047 encoded-words with known
723            // charsets, NFC-normalise the result.
724            //
725            // mail-parser's `parse_unstructured` handles unfolding,
726            // CRLF stripping, leading-SP removal, and RFC 2047 decoding
727            // in one pass — that's steps 1-4 of RFC 8621 §4.1.2.2.
728            // Step 5 (NFC) is applied here via the unicode-normalization
729            // crate.
730            let buf = crlf_terminated(raw_value);
731            let hv = MessageStream::new(&buf).parse_unstructured();
732            let decoded = match hv {
733                HeaderValue::Text(s) => s.into_owned(),
734                _ => String::new(),
735            };
736            HeaderValueTyped::Text(decoded.nfc().collect())
737        }
738        HeaderForm::URLs => {
739            // RFC 8621 §4.1.2.7 / RFC 2369 §2: each URL is wrapped in
740            // angle brackets. RFC 8621 §4.1.2.7 mandates that any
741            // value outside of the angle-bracket arguments MUST be
742            // ignored. mail-parser's address parser doesn't honour
743            // that contract (e.g. bare `https://example.com/u/abc`
744            // is treated as a malformed address with `https` as a
745            // group name), so we extract bracket contents directly
746            // rather than delegating.
747            HeaderValueTyped::URLs(extract_bracketed_urls(raw_value))
748        }
749        HeaderForm::Addresses => {
750            let buf = crlf_terminated(raw_value);
751            let hv = MessageStream::new(&buf).parse_address();
752            HeaderValueTyped::Addresses(flatten_addresses(&hv))
753        }
754        HeaderForm::GroupedAddresses => {
755            let buf = crlf_terminated(raw_value);
756            let hv = MessageStream::new(&buf).parse_address();
757            HeaderValueTyped::GroupedAddresses(group_addresses(&hv))
758        }
759        HeaderForm::MessageIds => {
760            // mail-parser's `parse_id` has a broken-client recovery
761            // branch (mail-parser-0.11/src/parsers/fields/id.rs) that
762            // returns `HeaderValue::Text` containing the unparsed bytes
763            // when no `<...>` tokens were found in the input. From the
764            // result type alone we cannot tell that case apart from the
765            // single-valid-msg-id case (`<x>` → `Text("x")`).
766            //
767            // Discriminator: a Text result is the result of bracket
768            // stripping iff the original input contained at least one
769            // `<` byte. mail-parser does not insert angle brackets that
770            // were not present in the input, so absence of `<` in the
771            // raw bytes is a sufficient signal that mail-parser cannot
772            // have produced Text via the stripping branch.
773            let buf = crlf_terminated(raw_value);
774            let hv = MessageStream::new(&buf).parse_id();
775            let had_angle_brackets = raw_value.contains(&b'<');
776            HeaderValueTyped::MessageIds(extract_msg_ids(&hv, had_angle_brackets))
777        }
778        HeaderForm::Date => {
779            let buf = crlf_terminated(raw_value);
780            let hv = MessageStream::new(&buf).parse_date();
781            // mail-parser's `parse_date` returns `HeaderValue::Empty`
782            // when it cannot recover 6 numeric components. Belt-and-
783            // braces: also reject zero month/day, which RFC 5322 §3.3
784            // does not permit and which mail-parser can emit when its
785            // position counter advances past slots that never got
786            // numeric digits. (A zero `year` is unreachable in
787            // mail-parser-0.11: years are remapped 0..=49 → +2000,
788            // 50..=99 → +1900, so `parts[2]` is never copied through
789            // as 0.)
790            let dt = match hv {
791                HeaderValue::DateTime(dt) if dt.month != 0 && dt.day != 0 => {
792                    Some(HeaderDateTime::from_mail_parser(dt))
793                }
794                _ => None,
795            };
796            HeaderValueTyped::DateTime(dt)
797        }
798    }
799}
800
801// ---------------------------------------------------------------------------
802// Per-form entry points
803// ---------------------------------------------------------------------------
804//
805// Convenience wrappers over `parse_header_typed` for callers that know
806// the form statically. Each wrapper unwraps the `HeaderValueTyped`
807// variant that `parse_header_typed` is contractually required to return
808// for the matching `HeaderForm`, eliminating the boilerplate match at
809// every call site. If `parse_header_typed` ever broke that contract, the
810// wrapper returns the empty result for the form (defensive, not panic).
811
812/// Parse the header field value as an RFC 8621 §4.1.2.1 Raw form: trim
813/// surrounding whitespace, decode bytes as UTF-8 (lossy with U+FFFD on
814/// invalid sequences).
815///
816/// Convenience wrapper over [`parse_header_typed`] with
817/// [`HeaderForm::Raw`].
818#[must_use]
819pub fn parse_raw(raw_value: &[u8]) -> String {
820    match parse_header_typed(HeaderForm::Raw, raw_value) {
821        HeaderValueTyped::Raw(s) => s,
822        _ => String::new(),
823    }
824}
825
826/// Parse the header field value as an RFC 8621 §4.1.2.2 Text form:
827/// whitespace unfolded, RFC 2047 encoded-words decoded, Unicode
828/// normalised to NFC.
829///
830/// Convenience wrapper over [`parse_header_typed`] with
831/// [`HeaderForm::Text`].
832#[must_use]
833pub fn parse_text(raw_value: &[u8]) -> String {
834    match parse_header_typed(HeaderForm::Text, raw_value) {
835        HeaderValueTyped::Text(s) => s,
836        _ => String::new(),
837    }
838}
839
840/// Parse the header field value as an RFC 8621 §4.1.2.3 Addresses form:
841/// a flat list of mailboxes with group structure discarded.
842///
843/// Convenience wrapper over [`parse_header_typed`] with
844/// [`HeaderForm::Addresses`].
845#[must_use]
846pub fn parse_addresses(raw_value: &[u8]) -> Vec<EmailAddress> {
847    match parse_header_typed(HeaderForm::Addresses, raw_value) {
848        HeaderValueTyped::Addresses(v) => v,
849        _ => Vec::new(),
850    }
851}
852
853/// Parse the header field value as an RFC 8621 §4.1.2.4 GroupedAddresses
854/// form: preserves group structure; a flat mailbox-list is wrapped in a
855/// single anonymous group.
856///
857/// Convenience wrapper over [`parse_header_typed`] with
858/// [`HeaderForm::GroupedAddresses`].
859#[must_use]
860pub fn parse_grouped_addresses(raw_value: &[u8]) -> Vec<AddressGroup> {
861    match parse_header_typed(HeaderForm::GroupedAddresses, raw_value) {
862        HeaderValueTyped::GroupedAddresses(v) => v,
863        _ => Vec::new(),
864    }
865}
866
867/// Parse the header field value as an RFC 8621 §4.1.2.5 MessageIds form:
868/// `<...>`-stripped msg-id strings.
869///
870/// Convenience wrapper over [`parse_header_typed`] with
871/// [`HeaderForm::MessageIds`].
872#[must_use]
873pub fn parse_message_ids(raw_value: &[u8]) -> Vec<String> {
874    match parse_header_typed(HeaderForm::MessageIds, raw_value) {
875        HeaderValueTyped::MessageIds(v) => v,
876        _ => Vec::new(),
877    }
878}
879
880/// Parse the header field value as an RFC 8621 §4.1.2.6 Date form: an
881/// RFC 5322 §3.3 date-time, or `None` on parse failure.
882///
883/// Convenience wrapper over [`parse_header_typed`] with
884/// [`HeaderForm::Date`].
885#[must_use]
886pub fn parse_date(raw_value: &[u8]) -> Option<HeaderDateTime> {
887    match parse_header_typed(HeaderForm::Date, raw_value) {
888        HeaderValueTyped::DateTime(dt) => dt,
889        _ => None,
890    }
891}
892
893/// Parse the header field value as an RFC 8621 §4.1.2.7 URLs form: bare
894/// URL strings with surrounding angle brackets stripped (RFC 2369).
895///
896/// Convenience wrapper over [`parse_header_typed`] with
897/// [`HeaderForm::URLs`].
898#[must_use]
899pub fn parse_urls(raw_value: &[u8]) -> Vec<String> {
900    match parse_header_typed(HeaderForm::URLs, raw_value) {
901        HeaderValueTyped::URLs(v) => v,
902        _ => Vec::new(),
903    }
904}
905
906/// Parse an existing [`ParsedHeader`]'s value into the requested
907/// RFC 8621 parsed form.
908///
909/// Uses `header.raw_value` (the original wire bytes) rather than
910/// `header.value.as_bytes()`, so non-UTF-8 bytes in structured headers
911/// (From, To, Date, etc.) are preserved faithfully. This matters for
912/// display names containing ISO-8859-1 or other non-UTF-8 encodings,
913/// which would be silently corrupted by lossy UTF-8 round-tripping.
914#[must_use]
915pub fn parse_header_typed_from(header: &ParsedHeader, form: HeaderForm) -> HeaderValueTyped {
916    parse_header_typed(form, &header.raw_value)
917}
918
919// ---------------------------------------------------------------------------
920// Conversion helpers
921// ---------------------------------------------------------------------------
922
923/// Trim ASCII whitespace from `s` and return `Some(trimmed)` if the
924/// result is non-empty, otherwise `None`.
925///
926/// Centralises the RFC 8621 §4.1.2.3 / §4.1.2.4 normalisation rule for
927/// display names ("trim surrounding white space; an empty result is
928/// `None`").
929fn trim_or_none(s: &str) -> Option<String> {
930    let trimmed = s.trim();
931    if trimmed.is_empty() {
932        None
933    } else {
934        Some(trimmed.to_owned())
935    }
936}
937
938/// Ensure `raw_value` ends with a CRLF, returning the original bytes
939/// when it already does (no allocation), and a freshly allocated copy
940/// terminated with `\r\n` otherwise.
941///
942/// mail-parser's `MessageStream` parsers expect header field bodies in
943/// a real RFC 5322 stream — i.e. terminated by CRLF. Callers pass the
944/// field value with no trailing CRLF; this helper normalises any of
945/// {already-CRLF, LF-only, no-line-ending} to CRLF-terminated.
946fn crlf_terminated(raw_value: &[u8]) -> Cow<'_, [u8]> {
947    if raw_value.ends_with(b"\r\n") {
948        Cow::Borrowed(raw_value)
949    } else if raw_value.ends_with(b"\n") {
950        // Strip the bare LF and replace with CRLF.
951        let head = &raw_value[..raw_value.len() - 1];
952        let mut v = Vec::with_capacity(head.len() + 2);
953        v.extend_from_slice(head);
954        v.extend_from_slice(b"\r\n");
955        Cow::Owned(v)
956    } else {
957        let mut v = Vec::with_capacity(raw_value.len() + 2);
958        v.extend_from_slice(raw_value);
959        v.extend_from_slice(b"\r\n");
960        Cow::Owned(v)
961    }
962}
963
964fn convert_addr(addr: &mail_parser::Addr<'_>) -> EmailAddress {
965    // RFC 8621 §4.1.2.3 mandates that for a quoted-string display name,
966    // surrounding DQUOTE characters be removed, quoted-pairs decoded, and
967    // white space unfolded with leading/trailing white space removed.
968    // mail-parser already does the dequoting and quoted-pair decoding, but
969    // leaves surrounding white space inside the quoted-string in place
970    // (e.g. `"  James Smythe"` parses to `Some("  James Smythe")`). Strip
971    // here. An empty trimmed result is mapped to `None` so a lone empty
972    // quoted-string does not surface as a phantom display name.
973    let name = addr.name.as_ref().and_then(|s| trim_or_none(s.as_ref()));
974    EmailAddress {
975        name,
976        address: addr.address.as_ref().map(|s| s.as_ref().to_owned()),
977    }
978}
979
980/// Flatten an `Address` (which is either a flat list of mailboxes or a
981/// list of groups) into a single `Vec<EmailAddress>`. Used for
982/// [`HeaderForm::Addresses`], which per RFC 8621 §4.1.2.3 discards group
983/// structure and produces one item per mailbox.
984fn flatten_addresses(hv: &HeaderValue<'_>) -> Vec<EmailAddress> {
985    match hv {
986        HeaderValue::Address(Address::List(list)) => list.iter().map(convert_addr).collect(),
987        HeaderValue::Address(Address::Group(groups)) => groups
988            .iter()
989            .flat_map(|g| g.addresses.iter().map(convert_addr))
990            .collect(),
991        _ => Vec::new(),
992    }
993}
994
995/// Convert an `Address` into a list of groups, per RFC 8621 §4.1.2.4. A
996/// flat list of mailboxes is wrapped in a single group with `name = None`.
997fn group_addresses(hv: &HeaderValue<'_>) -> Vec<AddressGroup> {
998    match hv {
999        HeaderValue::Address(Address::List(list)) if !list.is_empty() => {
1000            vec![AddressGroup {
1001                name: None,
1002                addresses: list.iter().map(convert_addr).collect(),
1003            }]
1004        }
1005        HeaderValue::Address(Address::Group(groups)) => groups
1006            .iter()
1007            .map(|g| AddressGroup {
1008                // RFC 8621 §4.1.2.4: the group `name` is "processed the
1009                // same as the name in the EmailAddress type" — trim white
1010                // space; empty after trimming becomes None.
1011                name: g.name.as_ref().and_then(|s| trim_or_none(s.as_ref())),
1012                addresses: g.addresses.iter().map(convert_addr).collect(),
1013            })
1014            .collect(),
1015        _ => Vec::new(),
1016    }
1017}
1018
1019/// Extract bare msg-id strings from a `HeaderValue` produced by
1020/// mail-parser's `parse_id`.
1021///
1022/// `had_angle_brackets` indicates whether the original raw input
1023/// contained at least one `<` byte. mail-parser's `parse_id` returns
1024/// `HeaderValue::Text` both for a single valid bracket-stripped msg-id
1025/// and for its broken-client recovery branch on input with no brackets at
1026/// all (returning the lossy UTF-8 of the unparsed bytes). Without this
1027/// discriminator, malformed input would leak the unparsed bytes into the
1028/// result vec, violating the RFC 8621 §4.1.2.5 empty-on-malformed
1029/// contract.
1030fn extract_msg_ids(hv: &HeaderValue<'_>, had_angle_brackets: bool) -> Vec<String> {
1031    match hv {
1032        HeaderValue::Text(s) if had_angle_brackets => vec![s.as_ref().to_owned()],
1033        HeaderValue::TextList(list) => list.iter().map(|s| s.as_ref().to_owned()).collect(),
1034        _ => Vec::new(),
1035    }
1036}
1037
1038/// Extract URL strings from RFC 2369 / RFC 8621 §4.1.2.7 bracketed
1039/// list-URL syntax.
1040///
1041/// Scans `raw_value` for `<...>` substrings and yields the byte sequence
1042/// between each matching pair, in order. Bytes outside the brackets are
1043/// ignored — including comments (`(...)`), CFWS, commas, and any
1044/// malformed framing — per RFC 8621 §4.1.2.7: "Any value outside of the
1045/// angle bracket arguments MUST be ignored."
1046///
1047/// ASCII whitespace inside a bracketed value is stripped, because RFC
1048/// 3986 URIs cannot contain literal whitespace; any whitespace seen is a
1049/// CRLF folding artifact. Non-UTF-8 bracket contents are dropped.
1050///
1051/// An unclosed `<` (no matching `>`) is ignored. An empty `<>` is
1052/// ignored.
1053fn extract_bracketed_urls(raw_value: &[u8]) -> Vec<String> {
1054    let mut out = Vec::new();
1055    let mut iter = raw_value.iter();
1056    while let Some(&b) = iter.next() {
1057        if b != b'<' {
1058            continue;
1059        }
1060        let mut url = Vec::new();
1061        let mut closed = false;
1062        for &b2 in iter.by_ref() {
1063            if b2 == b'>' {
1064                closed = true;
1065                break;
1066            }
1067            // Drop ASCII whitespace; URIs per RFC 3986 cannot contain it
1068            // literally, so any whitespace seen is CRLF folding.
1069            if !matches!(b2, b' ' | b'\t' | b'\r' | b'\n') {
1070                url.push(b2);
1071            }
1072        }
1073        if !closed || url.is_empty() {
1074            continue;
1075        }
1076        if let Ok(s) = std::str::from_utf8(&url) {
1077            out.push(s.to_owned());
1078        }
1079    }
1080    out
1081}