Skip to main content

email_message/
mime_types.rs

1//! MIME content-type, content-disposition, and content-transfer-encoding
2//! types.
3//!
4//! Most of this module, `ContentType`, `MediaType`, `ContentDisposition`,
5//! `ContentTransferEncoding`, `ParameterValue`, is **always available**
6//! regardless of feature flags. The `mime` Cargo feature gates only
7//! [`MimePart`], the multipart/leaf MIME tree used by full-message
8//! rendering. A consumer that just wants typed content-type validation
9//! can use `email-message` with `default-features = false` and skip the
10//! `mime` feature.
11
12use std::fmt::Display;
13use std::str::FromStr;
14
15/// MIME content type.
16///
17/// # Equality and hashing
18///
19/// `PartialEq` / `Eq` / `Hash` are derived. To make derived equality
20/// match RFC 2045 §5.1 semantics (type, subtype, and parameter names
21/// are case-insensitive), construction lowercases those tokens. Parameter
22/// values are preserved as-is because their case sensitivity depends on
23/// the parameter (`boundary` is case-sensitive per RFC 2046 §5.1.1;
24/// `charset` is case-insensitive per RFC 2046 §4.1.2 but the kernel
25/// leaves the caller's bytes intact for round-trip fidelity).
26#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
27#[derive(Clone, Debug, PartialEq, Eq, Hash)]
28pub struct ContentType(String);
29
30impl ContentType {
31    #[must_use]
32    pub fn as_str(&self) -> &str {
33        self.0.as_str()
34    }
35
36    /// Borrowed type/subtype view, with no parameters.
37    ///
38    /// Cheap: it slices the stored string; no allocation. Validation guarantees
39    /// a well-formed `type/subtype` prefix exists.
40    #[must_use]
41    pub fn media_type(&self) -> MediaType<'_> {
42        let head = self.0.split(';').next().unwrap_or("").trim();
43        let (type_, subtype) = head.split_once('/').unwrap_or((head, ""));
44        MediaType { type_, subtype }
45    }
46
47    /// Iterate `(name, value)` parameter pairs in declaration order.
48    ///
49    /// Quoted values are returned with surrounding quotes stripped and
50    /// backslash escapes resolved.
51    pub fn parameters(&self) -> impl Iterator<Item = (&str, ParameterValue<'_>)> {
52        let mut segments = split_content_type_segments(self.0.as_str()).into_iter();
53        // Skip the type/subtype segment.
54        let _ = segments.next();
55        segments.filter_map(|segment| {
56            let (name, value) = segment.trim().split_once('=')?;
57            Some((name.trim(), ParameterValue::from_raw(value.trim())))
58        })
59    }
60
61    /// Look up a parameter by case-insensitive name.
62    #[must_use]
63    pub fn parameter(&self, name: &str) -> Option<ParameterValue<'_>> {
64        self.parameters()
65            .find(|(key, _)| key.eq_ignore_ascii_case(name))
66            .map(|(_, value)| value)
67    }
68
69    /// Convenience accessor for the `boundary` parameter (multipart only).
70    #[must_use]
71    pub fn boundary(&self) -> Option<ParameterValue<'_>> {
72        self.parameter("boundary")
73    }
74
75    /// Convenience accessor for the `charset` parameter.
76    #[must_use]
77    pub fn charset(&self) -> Option<ParameterValue<'_>> {
78        self.parameter("charset")
79    }
80}
81
82/// Borrowed view of a content-type's `type/subtype`.
83#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
84pub struct MediaType<'a> {
85    type_: &'a str,
86    subtype: &'a str,
87}
88
89impl<'a> MediaType<'a> {
90    #[must_use]
91    pub const fn type_(&self) -> &'a str {
92        self.type_
93    }
94
95    #[must_use]
96    pub const fn subtype(&self) -> &'a str {
97        self.subtype
98    }
99
100    #[must_use]
101    pub fn is_text(&self) -> bool {
102        self.type_.eq_ignore_ascii_case("text")
103    }
104
105    #[must_use]
106    pub fn is_multipart(&self) -> bool {
107        self.type_.eq_ignore_ascii_case("multipart")
108    }
109
110    #[must_use]
111    pub fn is_image(&self) -> bool {
112        self.type_.eq_ignore_ascii_case("image")
113    }
114
115    /// Case-insensitive compare against a `"type/subtype"` literal.
116    #[must_use]
117    pub fn matches(&self, expected: &str) -> bool {
118        let Some((ty, sub)) = expected.split_once('/') else {
119            return false;
120        };
121        self.type_.eq_ignore_ascii_case(ty) && self.subtype.eq_ignore_ascii_case(sub)
122    }
123}
124
125/// Borrowed parameter value, lazily resolving quoted-string escapes.
126#[derive(Clone, Debug)]
127pub struct ParameterValue<'a> {
128    raw: &'a str,
129}
130
131impl<'a> ParameterValue<'a> {
132    fn from_raw(raw: &'a str) -> Self {
133        Self { raw }
134    }
135
136    /// Raw textual form as it appears in the header (still quoted/escaped if it
137    /// was emitted that way).
138    #[must_use]
139    pub const fn as_raw(&self) -> &'a str {
140        self.raw
141    }
142
143    /// Returns the unquoted, unescaped string. For unquoted values this is a
144    /// borrow; for quoted values it allocates only to materialize the escapes.
145    #[must_use]
146    pub fn unquoted(&self) -> std::borrow::Cow<'a, str> {
147        let raw = self.raw;
148        if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
149            return std::borrow::Cow::Borrowed(raw);
150        }
151
152        let inner = &raw[1..raw.len() - 1];
153        if !inner.contains('\\') {
154            return std::borrow::Cow::Borrowed(inner);
155        }
156
157        let mut out = String::with_capacity(inner.len());
158        let mut escaped = false;
159        for ch in inner.chars() {
160            if escaped {
161                out.push(ch);
162                escaped = false;
163            } else if ch == '\\' {
164                escaped = true;
165            } else {
166                out.push(ch);
167            }
168        }
169        std::borrow::Cow::Owned(out)
170    }
171}
172
173impl PartialEq<&str> for ParameterValue<'_> {
174    fn eq(&self, other: &&str) -> bool {
175        self.unquoted().as_ref() == *other
176    }
177}
178
179impl Display for ContentType {
180    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181        f.write_str(self.as_str())
182    }
183}
184
185#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
186#[error("content type must have a type/subtype form")]
187pub struct ContentTypeParseError;
188
189impl FromStr for ContentType {
190    type Err = ContentTypeParseError;
191
192    fn from_str(s: &str) -> Result<Self, Self::Err> {
193        normalize_parameterized_value(s, true)
194            .map(Self)
195            .ok_or(ContentTypeParseError)
196    }
197}
198
199fn is_mime_token(value: &str) -> bool {
200    value.bytes().all(is_mime_token_byte)
201}
202
203fn split_content_type_segments(value: &str) -> Vec<&str> {
204    let mut segments = Vec::new();
205    let mut start = 0;
206    let mut in_quotes = false;
207    let mut escaped = false;
208
209    for (index, ch) in value.char_indices() {
210        if escaped {
211            escaped = false;
212            continue;
213        }
214
215        if in_quotes && ch == '\\' {
216            escaped = true;
217            continue;
218        }
219
220        if ch == '"' {
221            in_quotes = !in_quotes;
222            continue;
223        }
224
225        if ch == ';' && !in_quotes {
226            segments.push(&value[start..index]);
227            start = index + ch.len_utf8();
228        }
229    }
230
231    segments.push(&value[start..]);
232    segments
233}
234
235const fn is_mime_token_byte(byte: u8) -> bool {
236    matches!(
237        byte,
238        b'!' | b'#'
239            | b'$'
240            | b'%'
241            | b'&'
242            | b'\''
243            | b'*'
244            | b'+'
245            | b'-'
246            | b'.'
247            | b'^'
248            | b'_'
249            | b'`'
250            | b'|'
251            | b'~'
252            | b'0'..=b'9'
253            | b'A'..=b'Z'
254            | b'a'..=b'z'
255    )
256}
257
258fn is_parameter_value(value: &str) -> bool {
259    if value.starts_with('"') {
260        return is_quoted_parameter_value(value);
261    }
262
263    is_mime_token(value)
264}
265
266fn is_quoted_parameter_value(value: &str) -> bool {
267    if !(value.ends_with('"') && value.len() >= 2) {
268        return false;
269    }
270
271    let mut escaped = false;
272    for byte in value[1..value.len() - 1].bytes() {
273        if escaped {
274            if is_forbidden_quoted_parameter_byte(byte) {
275                return false;
276            }
277            escaped = false;
278            continue;
279        }
280
281        if byte == b'\\' {
282            escaped = true;
283            continue;
284        }
285
286        if byte == b'"' || is_forbidden_quoted_parameter_byte(byte) {
287            return false;
288        }
289    }
290
291    !escaped
292}
293
294/// Reject NUL, CR, LF, and any non-tab ASCII control character inside a
295/// MIME quoted parameter. Matches the byte-discipline `validate_header`
296/// (in `crate::message`) and `push_header_line` (in
297/// `email_message_wire::rfc822`) apply to header values, so a parsed
298/// `ContentType` cannot carry bytes the wire renderer would later
299/// reject (META-001 R3 invariant).
300const fn is_forbidden_quoted_parameter_byte(byte: u8) -> bool {
301    byte != b'\t' && byte.is_ascii_control()
302}
303
304impl TryFrom<&str> for ContentType {
305    type Error = ContentTypeParseError;
306
307    fn try_from(value: &str) -> Result<Self, Self::Error> {
308        Self::from_str(value)
309    }
310}
311
312impl From<ContentType> for String {
313    fn from(value: ContentType) -> Self {
314        value.0
315    }
316}
317
318#[cfg(feature = "serde")]
319impl serde::Serialize for ContentType {
320    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
321    where
322        S: serde::Serializer,
323    {
324        serializer.serialize_str(self.as_str())
325    }
326}
327
328#[cfg(feature = "serde")]
329impl<'de> serde::Deserialize<'de> for ContentType {
330    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
331    where
332        D: serde::Deserializer<'de>,
333    {
334        let value = String::deserialize(deserializer)?;
335        value.parse().map_err(serde::de::Error::custom)
336    }
337}
338
339#[cfg(feature = "arbitrary")]
340impl<'a> arbitrary::Arbitrary<'a> for ContentType {
341    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
342        let value = match u.int_in_range::<u8>(0..=4)? {
343            0 => "text/plain",
344            1 => "text/html; charset=utf-8",
345            2 => "application/octet-stream",
346            3 => "image/png",
347            _ => "multipart/mixed; boundary=boundary",
348        };
349        value.parse().map_err(|_| arbitrary::Error::IncorrectFormat)
350    }
351}
352
353/// MIME content-transfer-encoding (RFC 2045 §6).
354///
355/// The five RFC-defined values are explicit variants; any other syntactically
356/// valid mime-token (e.g. an `x-` extension) round-trips through `Other`.
357///
358/// # Casing
359///
360/// RFC 2045 §6.1 says encoding names are case-insensitive. Both the
361/// known-variant parser and the [`Other`] branch normalize to ASCII
362/// lowercase on construction, so equality and hashing through the
363/// derived impls are case-insensitive automatically: `Other("Base64")`
364/// is unreachable (parses to [`Base64`] instead) and `Other("X-MyEnc")`
365/// stores `"x-myenc"`.
366///
367/// [`Base64`]: Self::Base64
368/// [`Other`]: Self::Other
369#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
370#[derive(Clone, Debug, PartialEq, Eq, Hash)]
371#[non_exhaustive]
372pub enum ContentTransferEncoding {
373    SevenBit,
374    EightBit,
375    Binary,
376    QuotedPrintable,
377    Base64,
378    Other(String),
379}
380
381impl ContentTransferEncoding {
382    #[must_use]
383    pub fn as_str(&self) -> &str {
384        match self {
385            Self::SevenBit => "7bit",
386            Self::EightBit => "8bit",
387            Self::Binary => "binary",
388            Self::QuotedPrintable => "quoted-printable",
389            Self::Base64 => "base64",
390            Self::Other(value) => value.as_str(),
391        }
392    }
393}
394
395impl Display for ContentTransferEncoding {
396    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
397        f.write_str(self.as_str())
398    }
399}
400
401#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
402#[error("content-transfer-encoding cannot be empty")]
403pub struct ContentTransferEncodingParseError;
404
405impl FromStr for ContentTransferEncoding {
406    type Err = ContentTransferEncodingParseError;
407
408    fn from_str(s: &str) -> Result<Self, Self::Err> {
409        let value = s.trim();
410        if value.is_empty() || !is_mime_token(value) {
411            return Err(ContentTransferEncodingParseError);
412        }
413        Ok(if value.eq_ignore_ascii_case("7bit") {
414            Self::SevenBit
415        } else if value.eq_ignore_ascii_case("8bit") {
416            Self::EightBit
417        } else if value.eq_ignore_ascii_case("binary") {
418            Self::Binary
419        } else if value.eq_ignore_ascii_case("quoted-printable") {
420            Self::QuotedPrintable
421        } else if value.eq_ignore_ascii_case("base64") {
422            Self::Base64
423        } else {
424            Self::Other(value.to_ascii_lowercase())
425        })
426    }
427}
428
429impl TryFrom<&str> for ContentTransferEncoding {
430    type Error = ContentTransferEncodingParseError;
431
432    fn try_from(value: &str) -> Result<Self, Self::Error> {
433        Self::from_str(value)
434    }
435}
436
437#[cfg(feature = "serde")]
438impl serde::Serialize for ContentTransferEncoding {
439    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
440    where
441        S: serde::Serializer,
442    {
443        serializer.serialize_str(self.as_str())
444    }
445}
446
447#[cfg(feature = "serde")]
448impl<'de> serde::Deserialize<'de> for ContentTransferEncoding {
449    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
450    where
451        D: serde::Deserializer<'de>,
452    {
453        let value = String::deserialize(deserializer)?;
454        value.parse().map_err(serde::de::Error::custom)
455    }
456}
457
458#[cfg(feature = "arbitrary")]
459impl<'a> arbitrary::Arbitrary<'a> for ContentTransferEncoding {
460    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
461        Ok(match u.int_in_range::<u8>(0..=5)? {
462            0 => Self::SevenBit,
463            1 => Self::EightBit,
464            2 => Self::Binary,
465            3 => Self::QuotedPrintable,
466            4 => Self::Base64,
467            _ => Self::Other("x-experimental".to_owned()),
468        })
469    }
470}
471
472/// MIME content-disposition token (RFC 2183).
473///
474/// # Equality and hashing
475///
476/// Same shape as [`ContentType`]: construction lowercases the disposition
477/// kind and parameter names, then `PartialEq` / `Eq` / `Hash` compare that
478/// normalized string. RFC 2183 §3 makes the disposition type and parameter
479/// names case-insensitive but leaves parameter value case sensitivity
480/// dependent on the parameter. The kernel preserves parameter values
481/// verbatim; for semantic comparison route through the disposition's
482/// accessors rather than comparing raw input strings.
483#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
484#[derive(Clone, Debug, PartialEq, Eq, Hash)]
485pub struct ContentDisposition(String);
486
487impl ContentDisposition {
488    #[must_use]
489    pub fn as_str(&self) -> &str {
490        self.0.as_str()
491    }
492
493    /// Borrowed disposition kind (`"inline"`, `"attachment"`, or an
494    /// `x-` extension), with no parameters.
495    ///
496    /// Cheap: it slices the stored string; no allocation. Validation
497    /// guarantees a well-formed disposition token prefix exists.
498    #[must_use]
499    pub fn kind(&self) -> &str {
500        self.0.split(';').next().unwrap_or("").trim()
501    }
502
503    /// Iterate `(name, value)` parameter pairs in declaration order.
504    ///
505    /// Quoted values are returned with surrounding quotes stripped and
506    /// backslash escapes resolved, mirroring [`ContentType::parameters`].
507    pub fn parameters(&self) -> impl Iterator<Item = (&str, ParameterValue<'_>)> {
508        let mut segments = split_content_type_segments(self.0.as_str()).into_iter();
509        // Skip the disposition-kind segment.
510        let _ = segments.next();
511        segments.filter_map(|segment| {
512            let (name, value) = segment.trim().split_once('=')?;
513            Some((name.trim(), ParameterValue::from_raw(value.trim())))
514        })
515    }
516
517    /// Look up a parameter by case-insensitive name.
518    #[must_use]
519    pub fn parameter(&self, name: &str) -> Option<ParameterValue<'_>> {
520        self.parameters()
521            .find(|(key, _)| key.eq_ignore_ascii_case(name))
522            .map(|(_, value)| value)
523    }
524
525    /// Convenience accessor for the `filename` parameter.
526    ///
527    /// RFC 2183 §2.3 defines this as the suggested filename a recipient's
528    /// mail client should use when saving the attachment to disk. For
529    /// non-ASCII filenames the kernel emits `filename*` (RFC 2231
530    /// charset/language extension); this accessor returns `filename` when
531    /// present and otherwise falls back to `filename*`.
532    #[must_use]
533    pub fn filename(&self) -> Option<ParameterValue<'_>> {
534        self.parameter("filename")
535            .or_else(|| self.parameter("filename*"))
536    }
537
538    /// Returns `true` if the disposition kind is `inline`
539    /// (case-insensitive).
540    #[must_use]
541    pub fn is_inline(&self) -> bool {
542        self.kind().eq_ignore_ascii_case("inline")
543    }
544
545    /// Returns `true` if the disposition kind is `attachment`
546    /// (case-insensitive).
547    #[must_use]
548    pub fn is_attachment(&self) -> bool {
549        self.kind().eq_ignore_ascii_case("attachment")
550    }
551}
552
553impl Display for ContentDisposition {
554    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
555        f.write_str(self.as_str())
556    }
557}
558
559#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
560#[error("content-disposition cannot be empty")]
561pub struct ContentDispositionParseError;
562
563impl FromStr for ContentDisposition {
564    type Err = ContentDispositionParseError;
565
566    fn from_str(s: &str) -> Result<Self, Self::Err> {
567        normalize_parameterized_value(s, false)
568            .map(Self)
569            .ok_or(ContentDispositionParseError)
570    }
571}
572
573impl TryFrom<&str> for ContentDisposition {
574    type Error = ContentDispositionParseError;
575
576    fn try_from(value: &str) -> Result<Self, Self::Error> {
577        Self::from_str(value)
578    }
579}
580
581/// Validate and normalize a parameterized header value (`Content-Type` shape
582/// or `Content-Disposition` shape). When `with_subtype` is true, the head
583/// must be `type/subtype`; otherwise it must be a single MIME token.
584///
585/// Lowercases the type/subtype tokens and parameter names so derived
586/// equality matches RFC 2045 §5.1 semantics. Parameter values are
587/// preserved verbatim. Returns `None` if the input fails any validation
588/// rule the previous bool-returning checks enforced.
589fn normalize_parameterized_value(value: &str, with_subtype: bool) -> Option<String> {
590    let value = value.trim();
591    if value.is_empty() {
592        return None;
593    }
594
595    let segments = split_content_type_segments(value);
596    let mut parts = segments.into_iter();
597    let head = parts.next()?.trim();
598
599    let canonical_head = if with_subtype {
600        let (ty, subtype) = head.split_once('/')?;
601        if ty.is_empty()
602            || subtype.is_empty()
603            || subtype.contains('/')
604            || !is_mime_token(ty)
605            || !is_mime_token(subtype)
606        {
607            return None;
608        }
609        format!(
610            "{}/{}",
611            ty.to_ascii_lowercase(),
612            subtype.to_ascii_lowercase()
613        )
614    } else {
615        if head.is_empty() || !is_mime_token(head) {
616            return None;
617        }
618        head.to_ascii_lowercase()
619    };
620
621    let mut canonical = canonical_head;
622    for parameter in parts {
623        let parameter = parameter.trim();
624        let (name, raw_value) = parameter.split_once('=')?;
625        let name = name.trim();
626        let raw_value = raw_value.trim();
627        if name.is_empty()
628            || raw_value.is_empty()
629            || !is_mime_token(name)
630            || !is_parameter_value(raw_value)
631        {
632            return None;
633        }
634        canonical.push_str("; ");
635        canonical.push_str(&name.to_ascii_lowercase());
636        canonical.push('=');
637        canonical.push_str(raw_value);
638    }
639
640    Some(canonical)
641}
642
643#[cfg(feature = "serde")]
644impl serde::Serialize for ContentDisposition {
645    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
646    where
647        S: serde::Serializer,
648    {
649        serializer.serialize_str(self.as_str())
650    }
651}
652
653#[cfg(feature = "serde")]
654impl<'de> serde::Deserialize<'de> for ContentDisposition {
655    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
656    where
657        D: serde::Deserializer<'de>,
658    {
659        let value = String::deserialize(deserializer)?;
660        value.parse().map_err(serde::de::Error::custom)
661    }
662}
663
664#[cfg(feature = "arbitrary")]
665impl<'a> arbitrary::Arbitrary<'a> for ContentDisposition {
666    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
667        let value = match u.int_in_range::<u8>(0..=2)? {
668            0 => "inline",
669            1 => "attachment",
670            _ => "attachment; filename=example.txt",
671        };
672        value.parse().map_err(|_| arbitrary::Error::IncorrectFormat)
673    }
674}
675
676/// Low-level MIME tree node, gated behind the `mime` Cargo feature.
677///
678/// `MimePart` is the kernel's escape hatch for callers building exotic
679/// MIME structures (custom multipart shapes, hand-rolled
680/// transfer-encoding choices, etc.). High-level paths through
681/// [`Body::Text`](crate::Body) / `Body::Html` / `Body::TextAndHtml` cover
682/// the common cases and apply byte-discipline (auto-promote non-ASCII
683/// text to base64, etc.) on the caller's behalf.
684///
685/// # Body byte-discipline is the caller's responsibility
686///
687/// Constructing `MimePart::Leaf` directly bypasses the kernel's
688/// auto-promotion path. The wire renderer enforces *header* invariants
689/// strictly (rejects raw CR / LF / NUL / non-tab control chars in any
690/// header value, regardless of `Content-Transfer-Encoding`), but it
691/// **trusts the caller's bytes** for body content under any transfer
692/// encoding other than `base64` / `quoted-printable`. That includes
693/// `7bit`, `8bit`, `binary`, and any `Other(...)` value: the renderer
694/// emits the body verbatim. RFC 2045 §6.2 forbids bytes > 127 under
695/// `7bit` and forbids bare CR / LF under both `7bit` and `8bit`;
696/// callers building `MimePart::Leaf` with a non-base64 / non-QP
697/// encoding must satisfy those invariants themselves, or downstream
698/// MTAs may reject the message.
699///
700/// # Variant set
701///
702/// Deliberately *not* `#[non_exhaustive]`. RFC 2046 closes MIME
703/// parts to exactly `discrete` (Leaf) and `composite` (Multipart);
704/// the kernel cannot honestly add a third variant without an RFC
705/// update. The exhaustive `match` shape lets downstream callers
706/// type-cover both arms without an `_ =>` clause.
707///
708/// # Untrusted-deserialize caveat
709///
710/// `MimePart::Multipart { parts: Vec<Self> }` is recursive: any
711/// caller deserializing a `MimePart` (or a `Body` containing one)
712/// from untrusted input must pre-bound the input length and the
713/// recursion depth. `serde_json` defaults to a 128-frame recursion
714/// limit which is safe; other formats (e.g. `serde_yaml`,
715/// `bincode`, `rmp-serde`, `serde_cbor`) may not, and a deeply
716/// nested attacker payload yields a `MimePart` value of arbitrary
717/// depth. The wire renderer (`email_message_wire::render_rfc822`)
718/// enforces a `MAX_MULTIPART_DEPTH` cap on outbound trees, including
719/// up to two frames of attachment-wrapping when inline and/or regular
720/// attachments are present, but other consumers of a deserialized
721/// `MimePart` (e.g. arbitrary caller code that walks the tree) must
722/// defend themselves.
723#[cfg(feature = "mime")]
724#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
725#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
726#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
727#[derive(Clone, Debug, PartialEq, Eq)]
728pub enum MimePart {
729    Leaf {
730        #[cfg_attr(feature = "schemars", schemars(with = "String"))]
731        content_type: ContentType,
732        #[cfg_attr(feature = "schemars", schemars(with = "Option<String>"))]
733        content_transfer_encoding: Option<ContentTransferEncoding>,
734        #[cfg_attr(feature = "schemars", schemars(with = "Option<String>"))]
735        content_disposition: Option<ContentDisposition>,
736        body: Vec<u8>,
737    },
738    Multipart {
739        #[cfg_attr(feature = "schemars", schemars(with = "String"))]
740        content_type: ContentType,
741        boundary: Option<String>,
742        parts: Vec<Self>,
743    },
744}
745
746#[cfg(test)]
747mod tests {
748    use super::{ContentTransferEncoding, ContentType};
749
750    #[test]
751    fn content_type_accepts_valid_media_types_and_parameters() {
752        for value in [
753            "text/plain",
754            "text/plain;charset=utf-8",
755            "multipart/related; type=\"text/html\"",
756            "application/octet-stream; name=\"a;b.txt\"",
757        ] {
758            assert!(
759                ContentType::try_from(value).is_ok(),
760                "expected valid content type: {value}"
761            );
762        }
763    }
764
765    #[test]
766    fn content_type_rejects_invalid_media_types() {
767        for value in [
768            "text/",
769            "/plain",
770            "text/plain/html",
771            "text /plain",
772            "text/plain; charset",
773            "text/plain; charset=\"unterminated",
774        ] {
775            assert!(
776                ContentType::try_from(value).is_err(),
777                "expected invalid content type: {value}"
778            );
779        }
780    }
781
782    #[test]
783    fn content_type_rejects_quoted_parameter_with_control_chars() {
784        // Direct bytes, NUL, BEL, VT, ESC must be rejected to match the
785        // wire renderer's `push_header_line` byte discipline.
786        for value in [
787            "text/plain; name=\"x\u{0}y\"",
788            "text/plain; name=\"x\u{07}y\"",
789            "text/plain; name=\"x\u{0B}y\"",
790            "text/plain; name=\"x\u{1B}y\"",
791        ] {
792            assert!(
793                ContentType::try_from(value).is_err(),
794                "expected control-char rejection: {value:?}"
795            );
796        }
797    }
798
799    #[test]
800    fn content_type_rejects_quoted_parameter_with_escaped_control_chars() {
801        // Even after a `\` escape, control chars are still rejected.
802        for value in [
803            "text/plain; name=\"x\\\u{0}y\"",
804            "text/plain; name=\"x\\\u{07}y\"",
805        ] {
806            assert!(
807                ContentType::try_from(value).is_err(),
808                "expected escaped-control-char rejection: {value:?}"
809            );
810        }
811    }
812
813    #[test]
814    fn content_type_accepts_tab_inside_quoted_parameter() {
815        // Tab is the documented exception in the byte-discipline rule.
816        assert!(ContentType::try_from("text/plain; name=\"a\tb\"").is_ok());
817    }
818
819    #[test]
820    fn content_type_media_type_view_splits_type_and_subtype() {
821        let ct: ContentType = "text/plain; charset=utf-8".parse().unwrap();
822        let media = ct.media_type();
823        assert_eq!(media.type_(), "text");
824        assert_eq!(media.subtype(), "plain");
825        assert!(media.is_text());
826        assert!(!media.is_multipart());
827        assert!(media.matches("text/plain"));
828        assert!(media.matches("TEXT/PLAIN"));
829    }
830
831    #[test]
832    fn content_type_parameter_lookup_is_case_insensitive_and_unquotes() {
833        let ct: ContentType = "multipart/mixed; Boundary=\"abc\\\"def\"".parse().unwrap();
834        let boundary = ct.boundary().expect("boundary present");
835        assert_eq!(boundary.as_raw(), "\"abc\\\"def\"");
836        assert_eq!(boundary.unquoted().as_ref(), "abc\"def");
837    }
838
839    #[test]
840    fn content_type_parameters_iterates_in_declaration_order() {
841        let ct: ContentType = "text/html; charset=utf-8; boundary=x".parse().unwrap();
842        let pairs: Vec<(String, String)> = ct
843            .parameters()
844            .map(|(k, v)| (k.to_owned(), v.unquoted().into_owned()))
845            .collect();
846        assert_eq!(
847            pairs,
848            vec![
849                ("charset".to_owned(), "utf-8".to_owned()),
850                ("boundary".to_owned(), "x".to_owned()),
851            ]
852        );
853    }
854
855    #[test]
856    fn content_transfer_encoding_canonicalizes_known_tokens() {
857        assert_eq!(
858            "Base64"
859                .parse::<ContentTransferEncoding>()
860                .unwrap()
861                .as_str(),
862            "base64"
863        );
864        assert_eq!(
865            "7BIT".parse::<ContentTransferEncoding>().unwrap().as_str(),
866            "7bit"
867        );
868        assert_eq!(
869            "Quoted-Printable"
870                .parse::<ContentTransferEncoding>()
871                .unwrap(),
872            ContentTransferEncoding::QuotedPrintable
873        );
874
875        let other: ContentTransferEncoding = "x-my-encoding".parse().unwrap();
876        assert_eq!(
877            other,
878            ContentTransferEncoding::Other("x-my-encoding".to_owned())
879        );
880        assert_eq!(other.as_str(), "x-my-encoding");
881    }
882
883    #[test]
884    fn content_disposition_kind_and_parameter_accessors() {
885        use super::ContentDisposition;
886        let cd: ContentDisposition = "attachment; filename=\"report.pdf\""
887            .parse()
888            .expect("disposition should parse");
889        assert_eq!(cd.kind(), "attachment");
890        assert!(cd.is_attachment());
891        assert!(!cd.is_inline());
892        let filename = cd.filename().expect("filename present");
893        assert_eq!(filename.unquoted().as_ref(), "report.pdf");
894    }
895
896    #[test]
897    fn content_disposition_filename_falls_back_to_extended_parameter() {
898        use super::ContentDisposition;
899        let cd: ContentDisposition = "attachment; filename*=utf-8''f%C3%A1jl.txt"
900            .parse()
901            .expect("disposition should parse");
902
903        let filename = cd.filename().expect("filename* present");
904        assert_eq!(filename.as_raw(), "utf-8''f%C3%A1jl.txt");
905    }
906
907    #[test]
908    fn content_disposition_inline_kind_is_case_insensitive() {
909        use super::ContentDisposition;
910        let cd: ContentDisposition = "INLINE".parse().expect("disposition should parse");
911        assert!(cd.is_inline());
912        assert!(!cd.is_attachment());
913    }
914
915    #[test]
916    fn content_disposition_parameters_iterates_in_declaration_order() {
917        use super::ContentDisposition;
918        let cd: ContentDisposition = "attachment; filename=report.pdf; size=42".parse().unwrap();
919        let pairs: Vec<(String, String)> = cd
920            .parameters()
921            .map(|(k, v)| (k.to_owned(), v.unquoted().into_owned()))
922            .collect();
923        assert_eq!(
924            pairs,
925            vec![
926                ("filename".to_owned(), "report.pdf".to_owned()),
927                ("size".to_owned(), "42".to_owned()),
928            ]
929        );
930    }
931
932    #[test]
933    fn content_disposition_parameter_lookup_is_case_insensitive() {
934        use super::ContentDisposition;
935        let cd: ContentDisposition = "attachment; FileName=\"x.txt\"".parse().unwrap();
936        assert_eq!(
937            cd.parameter("filename").unwrap().unquoted().as_ref(),
938            "x.txt"
939        );
940        assert_eq!(
941            cd.parameter("FILENAME").unwrap().unquoted().as_ref(),
942            "x.txt"
943        );
944    }
945
946    #[test]
947    fn content_transfer_encoding_other_is_case_insensitive() {
948        // RFC 2045 §6.1, encoding names are case-insensitive. Two
949        // differently-cased spellings of the same x-* extension must
950        // compare equal and hash to the same value.
951        let a: ContentTransferEncoding = "X-MyEnc".parse().unwrap();
952        let b: ContentTransferEncoding = "x-myenc".parse().unwrap();
953        let c: ContentTransferEncoding = "X-MYENC".parse().unwrap();
954        assert_eq!(a, b);
955        assert_eq!(a, c);
956        assert_eq!(a.as_str(), "x-myenc");
957        assert_eq!(c.as_str(), "x-myenc");
958
959        // Same value can be safely used as a HashMap/HashSet key.
960        use std::collections::HashSet;
961        let mut set: HashSet<ContentTransferEncoding> = HashSet::new();
962        set.insert(a);
963        assert!(set.contains(&b));
964        assert!(set.contains(&c));
965    }
966
967    #[test]
968    fn content_type_eq_is_case_insensitive_after_normalize() {
969        use std::collections::hash_map::DefaultHasher;
970        use std::hash::{Hash, Hasher};
971
972        let upper = ContentType::try_from("TEXT/PLAIN; CHARSET=UTF-8").unwrap();
973        let lower = ContentType::try_from("text/plain; charset=UTF-8").unwrap();
974
975        assert_eq!(upper, lower);
976        assert_eq!(upper.as_str(), "text/plain; charset=UTF-8");
977
978        let mut h_u = DefaultHasher::new();
979        upper.hash(&mut h_u);
980        let mut h_l = DefaultHasher::new();
981        lower.hash(&mut h_l);
982        assert_eq!(h_u.finish(), h_l.finish());
983
984        // Parameter values are preserved as-is (case-sensitive per RFC 2046
985        // §5.1.1 for `boundary`).
986        let preserved = ContentType::try_from("multipart/mixed; BOUNDARY=\"AbC\"").unwrap();
987        assert_eq!(preserved.as_str(), "multipart/mixed; boundary=\"AbC\"");
988    }
989
990    #[test]
991    fn content_disposition_eq_is_case_insensitive_after_normalize() {
992        use super::ContentDisposition;
993        let upper = ContentDisposition::try_from("ATTACHMENT; FILENAME=\"x.pdf\"").unwrap();
994        let lower = ContentDisposition::try_from("attachment; filename=\"x.pdf\"").unwrap();
995
996        assert_eq!(upper, lower);
997        assert_eq!(upper.as_str(), "attachment; filename=\"x.pdf\"");
998    }
999}