Skip to main content

email_message/
mime_types.rs

1//! MIME content-type, content-disposition, and content-transfer-encoding
2//! types.
3//!
4//! Most of this module, `ContentType`, `MediaType`, `ContentDisposition`,
5//! `ContentTransferEncoding`, `ParameterValue`, is **always available**
6//! regardless of feature flags. The `mime` Cargo feature gates only
7//! [`MimePart`], the multipart/leaf MIME tree used by full-message
8//! rendering. A consumer that just wants typed content-type validation
9//! can use `email-message` with `default-features = false` and skip the
10//! `mime` feature.
11
12use std::fmt::Display;
13use std::str::FromStr;
14
15/// MIME content type.
16///
17/// # Equality and hashing
18///
19/// `PartialEq` / `Eq` / `Hash` are derived. To make derived equality
20/// match RFC 2045 §5.1 semantics (type, subtype, and parameter names
21/// are case-insensitive), construction lowercases those tokens. Parameter
22/// values are preserved as-is because their case sensitivity depends on
23/// the parameter (`boundary` is case-sensitive per RFC 2046 §5.1.1;
24/// `charset` is case-insensitive per RFC 2046 §4.1.2 but the kernel
25/// leaves the caller's bytes intact for round-trip fidelity).
26#[derive(Clone, Debug, PartialEq, Eq, Hash)]
27pub struct ContentType(String);
28
29impl ContentType {
30    #[must_use]
31    pub fn as_str(&self) -> &str {
32        self.0.as_str()
33    }
34
35    /// Borrowed type/subtype view, with no parameters.
36    ///
37    /// Cheap: it slices the stored string; no allocation. Validation guarantees
38    /// a well-formed `type/subtype` prefix exists.
39    #[must_use]
40    pub fn media_type(&self) -> MediaType<'_> {
41        let head = self.0.split(';').next().unwrap_or("").trim();
42        let (type_, subtype) = head.split_once('/').unwrap_or((head, ""));
43        MediaType { type_, subtype }
44    }
45
46    /// Iterate `(name, value)` parameter pairs in declaration order.
47    ///
48    /// Quoted values are returned with surrounding quotes stripped and
49    /// backslash escapes resolved.
50    pub fn parameters(&self) -> impl Iterator<Item = (&str, ParameterValue<'_>)> {
51        let mut segments = split_content_type_segments(self.0.as_str()).into_iter();
52        // Skip the type/subtype segment.
53        let _ = segments.next();
54        segments.filter_map(|segment| {
55            let (name, value) = segment.trim().split_once('=')?;
56            Some((name.trim(), ParameterValue::from_raw(value.trim())))
57        })
58    }
59
60    /// Look up a parameter by case-insensitive name.
61    #[must_use]
62    pub fn parameter(&self, name: &str) -> Option<ParameterValue<'_>> {
63        self.parameters()
64            .find(|(key, _)| key.eq_ignore_ascii_case(name))
65            .map(|(_, value)| value)
66    }
67
68    /// Convenience accessor for the `boundary` parameter (multipart only).
69    #[must_use]
70    pub fn boundary(&self) -> Option<ParameterValue<'_>> {
71        self.parameter("boundary")
72    }
73
74    /// Convenience accessor for the `charset` parameter.
75    #[must_use]
76    pub fn charset(&self) -> Option<ParameterValue<'_>> {
77        self.parameter("charset")
78    }
79}
80
81/// Borrowed view of a content-type's `type/subtype`.
82#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
83pub struct MediaType<'a> {
84    type_: &'a str,
85    subtype: &'a str,
86}
87
88impl<'a> MediaType<'a> {
89    #[must_use]
90    pub const fn type_(&self) -> &'a str {
91        self.type_
92    }
93
94    #[must_use]
95    pub const fn subtype(&self) -> &'a str {
96        self.subtype
97    }
98
99    #[must_use]
100    pub fn is_text(&self) -> bool {
101        self.type_.eq_ignore_ascii_case("text")
102    }
103
104    #[must_use]
105    pub fn is_multipart(&self) -> bool {
106        self.type_.eq_ignore_ascii_case("multipart")
107    }
108
109    #[must_use]
110    pub fn is_image(&self) -> bool {
111        self.type_.eq_ignore_ascii_case("image")
112    }
113
114    /// Case-insensitive compare against a `"type/subtype"` literal.
115    #[must_use]
116    pub fn matches(&self, expected: &str) -> bool {
117        let Some((ty, sub)) = expected.split_once('/') else {
118            return false;
119        };
120        self.type_.eq_ignore_ascii_case(ty) && self.subtype.eq_ignore_ascii_case(sub)
121    }
122}
123
124/// Borrowed parameter value, lazily resolving quoted-string escapes.
125#[derive(Clone, Debug)]
126pub struct ParameterValue<'a> {
127    raw: &'a str,
128}
129
130impl<'a> ParameterValue<'a> {
131    fn from_raw(raw: &'a str) -> Self {
132        Self { raw }
133    }
134
135    /// Raw textual form as it appears in the header (still quoted/escaped if it
136    /// was emitted that way).
137    #[must_use]
138    pub const fn as_raw(&self) -> &'a str {
139        self.raw
140    }
141
142    /// Returns the unquoted, unescaped string. For unquoted values this is a
143    /// borrow; for quoted values it allocates only to materialize the escapes.
144    #[must_use]
145    pub fn unquoted(&self) -> std::borrow::Cow<'a, str> {
146        let raw = self.raw;
147        if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
148            return std::borrow::Cow::Borrowed(raw);
149        }
150
151        let inner = &raw[1..raw.len() - 1];
152        if !inner.contains('\\') {
153            return std::borrow::Cow::Borrowed(inner);
154        }
155
156        let mut out = String::with_capacity(inner.len());
157        let mut escaped = false;
158        for ch in inner.chars() {
159            if escaped {
160                out.push(ch);
161                escaped = false;
162            } else if ch == '\\' {
163                escaped = true;
164            } else {
165                out.push(ch);
166            }
167        }
168        std::borrow::Cow::Owned(out)
169    }
170}
171
172impl PartialEq<&str> for ParameterValue<'_> {
173    fn eq(&self, other: &&str) -> bool {
174        self.unquoted().as_ref() == *other
175    }
176}
177
178impl Display for ContentType {
179    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180        f.write_str(self.as_str())
181    }
182}
183
184#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
185#[error("content type must have a type/subtype form")]
186pub struct ContentTypeParseError;
187
188impl FromStr for ContentType {
189    type Err = ContentTypeParseError;
190
191    fn from_str(s: &str) -> Result<Self, Self::Err> {
192        normalize_parameterized_value(s, true)
193            .map(Self)
194            .ok_or(ContentTypeParseError)
195    }
196}
197
198fn is_mime_token(value: &str) -> bool {
199    value.bytes().all(is_mime_token_byte)
200}
201
202fn split_content_type_segments(value: &str) -> Vec<&str> {
203    let mut segments = Vec::new();
204    let mut start = 0;
205    let mut in_quotes = false;
206    let mut escaped = false;
207
208    for (index, ch) in value.char_indices() {
209        if escaped {
210            escaped = false;
211            continue;
212        }
213
214        if in_quotes && ch == '\\' {
215            escaped = true;
216            continue;
217        }
218
219        if ch == '"' {
220            in_quotes = !in_quotes;
221            continue;
222        }
223
224        if ch == ';' && !in_quotes {
225            segments.push(&value[start..index]);
226            start = index + ch.len_utf8();
227        }
228    }
229
230    segments.push(&value[start..]);
231    segments
232}
233
234const fn is_mime_token_byte(byte: u8) -> bool {
235    matches!(
236        byte,
237        b'!' | b'#'
238            | b'$'
239            | b'%'
240            | b'&'
241            | b'\''
242            | b'*'
243            | b'+'
244            | b'-'
245            | b'.'
246            | b'^'
247            | b'_'
248            | b'`'
249            | b'|'
250            | b'~'
251            | b'0'..=b'9'
252            | b'A'..=b'Z'
253            | b'a'..=b'z'
254    )
255}
256
257fn is_parameter_value(value: &str) -> bool {
258    if value.starts_with('"') {
259        return is_quoted_parameter_value(value);
260    }
261
262    is_mime_token(value)
263}
264
265fn is_quoted_parameter_value(value: &str) -> bool {
266    if !(value.ends_with('"') && value.len() >= 2) {
267        return false;
268    }
269
270    let mut escaped = false;
271    for byte in value[1..value.len() - 1].bytes() {
272        if escaped {
273            if is_forbidden_quoted_parameter_byte(byte) {
274                return false;
275            }
276            escaped = false;
277            continue;
278        }
279
280        if byte == b'\\' {
281            escaped = true;
282            continue;
283        }
284
285        if byte == b'"' || is_forbidden_quoted_parameter_byte(byte) {
286            return false;
287        }
288    }
289
290    !escaped
291}
292
293/// Reject NUL, CR, LF, and any non-tab ASCII control character inside a
294/// MIME quoted parameter. Matches the byte-discipline `validate_header`
295/// (in `crate::message`) and `push_header_line` (in
296/// `email_message_wire::rfc822`) apply to header values, so a parsed
297/// `ContentType` cannot carry bytes the wire renderer would later
298/// reject (META-001 R3 invariant).
299const fn is_forbidden_quoted_parameter_byte(byte: u8) -> bool {
300    byte != b'\t' && byte.is_ascii_control()
301}
302
303impl TryFrom<&str> for ContentType {
304    type Error = ContentTypeParseError;
305
306    fn try_from(value: &str) -> Result<Self, Self::Error> {
307        Self::from_str(value)
308    }
309}
310
311impl From<ContentType> for String {
312    fn from(value: ContentType) -> Self {
313        value.0
314    }
315}
316
317#[cfg(feature = "serde")]
318impl serde::Serialize for ContentType {
319    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
320    where
321        S: serde::Serializer,
322    {
323        serializer.serialize_str(self.as_str())
324    }
325}
326
327#[cfg(feature = "serde")]
328impl<'de> serde::Deserialize<'de> for ContentType {
329    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
330    where
331        D: serde::Deserializer<'de>,
332    {
333        let value = String::deserialize(deserializer)?;
334        value.parse().map_err(serde::de::Error::custom)
335    }
336}
337
338#[cfg(feature = "schemars")]
339impl schemars::JsonSchema for ContentType {
340    fn inline_schema() -> bool {
341        true
342    }
343
344    fn schema_name() -> std::borrow::Cow<'static, str> {
345        "ContentType".into()
346    }
347
348    fn schema_id() -> std::borrow::Cow<'static, str> {
349        concat!(module_path!(), "::ContentType").into()
350    }
351
352    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
353        schemars::json_schema!({
354            "type": "string",
355            "description": "MIME Content-Type field value"
356        })
357    }
358}
359
360#[cfg(feature = "arbitrary")]
361impl<'a> arbitrary::Arbitrary<'a> for ContentType {
362    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
363        let value = match u.int_in_range::<u8>(0..=4)? {
364            0 => "text/plain",
365            1 => "text/html; charset=utf-8",
366            2 => "application/octet-stream",
367            3 => "image/png",
368            _ => "multipart/mixed; boundary=boundary",
369        };
370        value.parse().map_err(|_| arbitrary::Error::IncorrectFormat)
371    }
372}
373
374/// MIME content-transfer-encoding (RFC 2045 §6).
375///
376/// The five RFC-defined values are explicit variants; any other syntactically
377/// valid mime-token (e.g. an `x-` extension) round-trips through `Other`.
378///
379/// # Casing
380///
381/// RFC 2045 §6.1 says encoding names are case-insensitive. Both the
382/// known-variant parser and the [`Other`] branch normalize to ASCII
383/// lowercase on construction, so equality and hashing through the
384/// derived impls are case-insensitive automatically: `Other("Base64")`
385/// is unreachable (parses to [`Base64`] instead) and `Other("X-MyEnc")`
386/// stores `"x-myenc"`.
387///
388/// [`Base64`]: Self::Base64
389/// [`Other`]: Self::Other
390#[derive(Clone, Debug, PartialEq, Eq, Hash)]
391#[non_exhaustive]
392pub enum ContentTransferEncoding {
393    SevenBit,
394    EightBit,
395    Binary,
396    QuotedPrintable,
397    Base64,
398    Other(String),
399}
400
401impl ContentTransferEncoding {
402    #[must_use]
403    pub fn as_str(&self) -> &str {
404        match self {
405            Self::SevenBit => "7bit",
406            Self::EightBit => "8bit",
407            Self::Binary => "binary",
408            Self::QuotedPrintable => "quoted-printable",
409            Self::Base64 => "base64",
410            Self::Other(value) => value.as_str(),
411        }
412    }
413}
414
415impl Display for ContentTransferEncoding {
416    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417        f.write_str(self.as_str())
418    }
419}
420
421#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
422#[error("content-transfer-encoding cannot be empty")]
423pub struct ContentTransferEncodingParseError;
424
425impl FromStr for ContentTransferEncoding {
426    type Err = ContentTransferEncodingParseError;
427
428    fn from_str(s: &str) -> Result<Self, Self::Err> {
429        let value = s.trim();
430        if value.is_empty() || !is_mime_token(value) {
431            return Err(ContentTransferEncodingParseError);
432        }
433        Ok(if value.eq_ignore_ascii_case("7bit") {
434            Self::SevenBit
435        } else if value.eq_ignore_ascii_case("8bit") {
436            Self::EightBit
437        } else if value.eq_ignore_ascii_case("binary") {
438            Self::Binary
439        } else if value.eq_ignore_ascii_case("quoted-printable") {
440            Self::QuotedPrintable
441        } else if value.eq_ignore_ascii_case("base64") {
442            Self::Base64
443        } else {
444            Self::Other(value.to_ascii_lowercase())
445        })
446    }
447}
448
449impl TryFrom<&str> for ContentTransferEncoding {
450    type Error = ContentTransferEncodingParseError;
451
452    fn try_from(value: &str) -> Result<Self, Self::Error> {
453        Self::from_str(value)
454    }
455}
456
457#[cfg(feature = "serde")]
458impl serde::Serialize for ContentTransferEncoding {
459    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
460    where
461        S: serde::Serializer,
462    {
463        serializer.serialize_str(self.as_str())
464    }
465}
466
467#[cfg(feature = "serde")]
468impl<'de> serde::Deserialize<'de> for ContentTransferEncoding {
469    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
470    where
471        D: serde::Deserializer<'de>,
472    {
473        let value = String::deserialize(deserializer)?;
474        value.parse().map_err(serde::de::Error::custom)
475    }
476}
477
478#[cfg(feature = "schemars")]
479impl schemars::JsonSchema for ContentTransferEncoding {
480    fn inline_schema() -> bool {
481        true
482    }
483
484    fn schema_name() -> std::borrow::Cow<'static, str> {
485        "ContentTransferEncoding".into()
486    }
487
488    fn schema_id() -> std::borrow::Cow<'static, str> {
489        concat!(module_path!(), "::ContentTransferEncoding").into()
490    }
491
492    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
493        schemars::json_schema!({
494            "type": "string",
495            "description": "RFC 2045 Content-Transfer-Encoding token"
496        })
497    }
498}
499
500#[cfg(feature = "arbitrary")]
501impl<'a> arbitrary::Arbitrary<'a> for ContentTransferEncoding {
502    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
503        Ok(match u.int_in_range::<u8>(0..=5)? {
504            0 => Self::SevenBit,
505            1 => Self::EightBit,
506            2 => Self::Binary,
507            3 => Self::QuotedPrintable,
508            4 => Self::Base64,
509            _ => Self::Other("x-experimental".to_owned()),
510        })
511    }
512}
513
514/// MIME content-disposition token (RFC 2183).
515///
516/// # Equality and hashing
517///
518/// Same shape as [`ContentType`]: construction lowercases the disposition
519/// kind and parameter names, then `PartialEq` / `Eq` / `Hash` compare that
520/// normalized string. RFC 2183 §3 makes the disposition type and parameter
521/// names case-insensitive but leaves parameter value case sensitivity
522/// dependent on the parameter. The kernel preserves parameter values
523/// verbatim; for semantic comparison route through the disposition's
524/// accessors rather than comparing raw input strings.
525#[derive(Clone, Debug, PartialEq, Eq, Hash)]
526pub struct ContentDisposition(String);
527
528impl ContentDisposition {
529    #[must_use]
530    pub fn as_str(&self) -> &str {
531        self.0.as_str()
532    }
533
534    /// Borrowed disposition kind (`"inline"`, `"attachment"`, or an
535    /// `x-` extension), with no parameters.
536    ///
537    /// Cheap: it slices the stored string; no allocation. Validation
538    /// guarantees a well-formed disposition token prefix exists.
539    #[must_use]
540    pub fn kind(&self) -> &str {
541        self.0.split(';').next().unwrap_or("").trim()
542    }
543
544    /// Iterate `(name, value)` parameter pairs in declaration order.
545    ///
546    /// Quoted values are returned with surrounding quotes stripped and
547    /// backslash escapes resolved, mirroring [`ContentType::parameters`].
548    pub fn parameters(&self) -> impl Iterator<Item = (&str, ParameterValue<'_>)> {
549        let mut segments = split_content_type_segments(self.0.as_str()).into_iter();
550        // Skip the disposition-kind segment.
551        let _ = segments.next();
552        segments.filter_map(|segment| {
553            let (name, value) = segment.trim().split_once('=')?;
554            Some((name.trim(), ParameterValue::from_raw(value.trim())))
555        })
556    }
557
558    /// Look up a parameter by case-insensitive name.
559    #[must_use]
560    pub fn parameter(&self, name: &str) -> Option<ParameterValue<'_>> {
561        self.parameters()
562            .find(|(key, _)| key.eq_ignore_ascii_case(name))
563            .map(|(_, value)| value)
564    }
565
566    /// Convenience accessor for the `filename` parameter.
567    ///
568    /// RFC 2183 §2.3 defines this as the suggested filename a recipient's
569    /// mail client should use when saving the attachment to disk. For
570    /// non-ASCII filenames the kernel emits `filename*` (RFC 2231
571    /// charset/language extension); this accessor returns `filename` when
572    /// present and otherwise falls back to `filename*`.
573    #[must_use]
574    pub fn filename(&self) -> Option<ParameterValue<'_>> {
575        self.parameter("filename")
576            .or_else(|| self.parameter("filename*"))
577    }
578
579    /// Returns `true` if the disposition kind is `inline`
580    /// (case-insensitive).
581    #[must_use]
582    pub fn is_inline(&self) -> bool {
583        self.kind().eq_ignore_ascii_case("inline")
584    }
585
586    /// Returns `true` if the disposition kind is `attachment`
587    /// (case-insensitive).
588    #[must_use]
589    pub fn is_attachment(&self) -> bool {
590        self.kind().eq_ignore_ascii_case("attachment")
591    }
592}
593
594impl Display for ContentDisposition {
595    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
596        f.write_str(self.as_str())
597    }
598}
599
600#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
601#[error("content-disposition cannot be empty")]
602pub struct ContentDispositionParseError;
603
604impl FromStr for ContentDisposition {
605    type Err = ContentDispositionParseError;
606
607    fn from_str(s: &str) -> Result<Self, Self::Err> {
608        normalize_parameterized_value(s, false)
609            .map(Self)
610            .ok_or(ContentDispositionParseError)
611    }
612}
613
614impl TryFrom<&str> for ContentDisposition {
615    type Error = ContentDispositionParseError;
616
617    fn try_from(value: &str) -> Result<Self, Self::Error> {
618        Self::from_str(value)
619    }
620}
621
622/// Validate and normalize a parameterized header value (`Content-Type` shape
623/// or `Content-Disposition` shape). When `with_subtype` is true, the head
624/// must be `type/subtype`; otherwise it must be a single MIME token.
625///
626/// Lowercases the type/subtype tokens and parameter names so derived
627/// equality matches RFC 2045 §5.1 semantics. Parameter values are
628/// preserved verbatim. Returns `None` if the input fails any validation
629/// rule the previous bool-returning checks enforced.
630fn normalize_parameterized_value(value: &str, with_subtype: bool) -> Option<String> {
631    let value = value.trim();
632    if value.is_empty() {
633        return None;
634    }
635
636    let segments = split_content_type_segments(value);
637    let mut parts = segments.into_iter();
638    let head = parts.next()?.trim();
639
640    let canonical_head = if with_subtype {
641        let (ty, subtype) = head.split_once('/')?;
642        if ty.is_empty()
643            || subtype.is_empty()
644            || subtype.contains('/')
645            || !is_mime_token(ty)
646            || !is_mime_token(subtype)
647        {
648            return None;
649        }
650        format!(
651            "{}/{}",
652            ty.to_ascii_lowercase(),
653            subtype.to_ascii_lowercase()
654        )
655    } else {
656        if head.is_empty() || !is_mime_token(head) {
657            return None;
658        }
659        head.to_ascii_lowercase()
660    };
661
662    let mut canonical = canonical_head;
663    for parameter in parts {
664        let parameter = parameter.trim();
665        let (name, raw_value) = parameter.split_once('=')?;
666        let name = name.trim();
667        let raw_value = raw_value.trim();
668        if name.is_empty()
669            || raw_value.is_empty()
670            || !is_mime_token(name)
671            || !is_parameter_value(raw_value)
672        {
673            return None;
674        }
675        canonical.push_str("; ");
676        canonical.push_str(&name.to_ascii_lowercase());
677        canonical.push('=');
678        canonical.push_str(raw_value);
679    }
680
681    Some(canonical)
682}
683
684#[cfg(feature = "serde")]
685impl serde::Serialize for ContentDisposition {
686    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
687    where
688        S: serde::Serializer,
689    {
690        serializer.serialize_str(self.as_str())
691    }
692}
693
694#[cfg(feature = "serde")]
695impl<'de> serde::Deserialize<'de> for ContentDisposition {
696    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
697    where
698        D: serde::Deserializer<'de>,
699    {
700        let value = String::deserialize(deserializer)?;
701        value.parse().map_err(serde::de::Error::custom)
702    }
703}
704
705#[cfg(feature = "schemars")]
706impl schemars::JsonSchema for ContentDisposition {
707    fn inline_schema() -> bool {
708        true
709    }
710
711    fn schema_name() -> std::borrow::Cow<'static, str> {
712        "ContentDisposition".into()
713    }
714
715    fn schema_id() -> std::borrow::Cow<'static, str> {
716        concat!(module_path!(), "::ContentDisposition").into()
717    }
718
719    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
720        schemars::json_schema!({
721            "type": "string",
722            "description": "RFC 2183 Content-Disposition field value"
723        })
724    }
725}
726
727#[cfg(feature = "arbitrary")]
728impl<'a> arbitrary::Arbitrary<'a> for ContentDisposition {
729    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
730        let value = match u.int_in_range::<u8>(0..=2)? {
731            0 => "inline",
732            1 => "attachment",
733            _ => "attachment; filename=example.txt",
734        };
735        value.parse().map_err(|_| arbitrary::Error::IncorrectFormat)
736    }
737}
738
739/// Low-level MIME tree node, gated behind the `mime` Cargo feature.
740///
741/// `MimePart` is the kernel's escape hatch for callers building exotic
742/// MIME structures (custom multipart shapes, hand-rolled
743/// transfer-encoding choices, etc.). High-level paths through
744/// [`Body::Text`](crate::Body) / `Body::Html` / `Body::TextAndHtml` cover
745/// the common cases and apply byte-discipline (auto-promote non-ASCII
746/// text to base64, etc.) on the caller's behalf.
747///
748/// # Body byte-discipline is the caller's responsibility
749///
750/// Constructing `MimePart::Leaf` directly bypasses the kernel's
751/// auto-promotion path. The wire renderer enforces *header* invariants
752/// strictly (rejects raw CR / LF / NUL / non-tab control chars in any
753/// header value, regardless of `Content-Transfer-Encoding`), but it
754/// **trusts the caller's bytes** for body content under any transfer
755/// encoding other than `base64` / `quoted-printable`. That includes
756/// `7bit`, `8bit`, `binary`, and any `Other(...)` value: the renderer
757/// emits the body verbatim. RFC 2045 §6.2 forbids bytes > 127 under
758/// `7bit` and forbids bare CR / LF under both `7bit` and `8bit`;
759/// callers building `MimePart::Leaf` with a non-base64 / non-QP
760/// encoding must satisfy those invariants themselves, or downstream
761/// MTAs may reject the message.
762///
763/// # Variant set
764///
765/// Deliberately *not* `#[non_exhaustive]`. RFC 2046 closes MIME
766/// parts to exactly `discrete` (Leaf) and `composite` (Multipart);
767/// the kernel cannot honestly add a third variant without an RFC
768/// update. The exhaustive `match` shape lets downstream callers
769/// type-cover both arms without an `_ =>` clause.
770///
771/// # Untrusted-deserialize caveat
772///
773/// `MimePart::Multipart { parts: Vec<Self> }` is recursive: any
774/// caller deserializing a `MimePart` (or a `Body` containing one)
775/// from untrusted input must pre-bound the input length and the
776/// recursion depth. `serde_json` defaults to a 128-frame recursion
777/// limit which is safe; other formats (e.g. `serde_yaml`,
778/// `bincode`, `rmp-serde`, `serde_cbor`) may not, and a deeply
779/// nested attacker payload yields a `MimePart` value of arbitrary
780/// depth. The wire renderer (`email_message_wire::render_rfc822`)
781/// enforces a `MAX_MULTIPART_DEPTH` cap on outbound trees, including
782/// up to two frames of attachment-wrapping when inline and/or regular
783/// attachments are present, but other consumers of a deserialized
784/// `MimePart` (e.g. arbitrary caller code that walks the tree) must
785/// defend themselves.
786#[cfg(feature = "mime")]
787#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
788#[derive(Clone, Debug, PartialEq, Eq)]
789pub enum MimePart {
790    Leaf {
791        content_type: ContentType,
792        content_transfer_encoding: Option<ContentTransferEncoding>,
793        content_disposition: Option<ContentDisposition>,
794        body: Vec<u8>,
795    },
796    Multipart {
797        content_type: ContentType,
798        boundary: Option<String>,
799        parts: Vec<Self>,
800    },
801}
802
803#[cfg(all(feature = "mime", feature = "serde"))]
804impl serde::Serialize for MimePart {
805    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
806    where
807        S: serde::Serializer,
808    {
809        use base64::Engine as _;
810        use serde::ser::SerializeStruct as _;
811
812        match self {
813            Self::Leaf {
814                content_type,
815                content_transfer_encoding,
816                content_disposition,
817                body,
818            } => {
819                let mut len = 3; // type + content_type + body
820                if content_transfer_encoding.is_some() {
821                    len += 1;
822                }
823                if content_disposition.is_some() {
824                    len += 1;
825                }
826                let encoded = base64::engine::general_purpose::STANDARD.encode(body);
827                let mut value = serializer.serialize_struct("MimePart", len)?;
828                value.serialize_field("type", "leaf")?;
829                value.serialize_field("content_type", content_type)?;
830                if let Some(cte) = content_transfer_encoding {
831                    value.serialize_field("content_transfer_encoding", cte)?;
832                }
833                if let Some(cd) = content_disposition {
834                    value.serialize_field("content_disposition", cd)?;
835                }
836                value.serialize_field("body", &encoded)?;
837                value.end()
838            }
839            Self::Multipart {
840                content_type,
841                boundary,
842                parts,
843            } => {
844                let mut len = 3; // type + content_type + parts
845                if boundary.is_some() {
846                    len += 1;
847                }
848                let mut value = serializer.serialize_struct("MimePart", len)?;
849                value.serialize_field("type", "multipart")?;
850                value.serialize_field("content_type", content_type)?;
851                if let Some(b) = boundary {
852                    value.serialize_field("boundary", b)?;
853                }
854                value.serialize_field("parts", parts)?;
855                value.end()
856            }
857        }
858    }
859}
860
861#[cfg(all(feature = "mime", feature = "serde"))]
862impl<'de> serde::Deserialize<'de> for MimePart {
863    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
864    where
865        D: serde::Deserializer<'de>,
866    {
867        use base64::Engine as _;
868
869        #[derive(serde::Deserialize)]
870        #[serde(tag = "type", rename_all = "snake_case")]
871        enum RawMimePart {
872            Leaf {
873                content_type: ContentType,
874                #[serde(default)]
875                content_transfer_encoding: Option<ContentTransferEncoding>,
876                #[serde(default)]
877                content_disposition: Option<ContentDisposition>,
878                body: String,
879            },
880            Multipart {
881                content_type: ContentType,
882                #[serde(default)]
883                boundary: Option<String>,
884                #[serde(default)]
885                parts: Vec<MimePart>,
886            },
887        }
888
889        Ok(match RawMimePart::deserialize(deserializer)? {
890            RawMimePart::Leaf {
891                content_type,
892                content_transfer_encoding,
893                content_disposition,
894                body,
895            } => {
896                let decoded = base64::engine::general_purpose::STANDARD
897                    .decode(body.as_bytes())
898                    .map_err(|err| {
899                        serde::de::Error::custom(format!("invalid base64 MIME body: {err}"))
900                    })?;
901                Self::Leaf {
902                    content_type,
903                    content_transfer_encoding,
904                    content_disposition,
905                    body: decoded,
906                }
907            }
908            RawMimePart::Multipart {
909                content_type,
910                boundary,
911                parts,
912            } => Self::Multipart {
913                content_type,
914                boundary,
915                parts,
916            },
917        })
918    }
919}
920
921#[cfg(all(feature = "mime", feature = "schemars"))]
922impl schemars::JsonSchema for MimePart {
923    fn schema_name() -> std::borrow::Cow<'static, str> {
924        "MimePart".into()
925    }
926
927    fn schema_id() -> std::borrow::Cow<'static, str> {
928        concat!(module_path!(), "::MimePart").into()
929    }
930
931    /// MIME parts have no RFC 5322 string form, so this schema is *not*
932    /// wrapped in an `rfc5322-string-compat` `oneOf: [object, string]`
933    /// the way `Mailbox` / `Group` / `Address` are. The asymmetry is
934    /// deliberate: there is no producer-side wire shape for "MIME part
935    /// as a header-like string" to migrate from.
936    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
937        let recursive = generator.subschema_for::<MimePart>();
938        schemars::json_schema!({
939            "oneOf": [
940                {
941                    "type": "object",
942                    "properties": {
943                        "type": {"const": "leaf"},
944                        "content_type": {
945                            "type": "string",
946                            "description": "MIME Content-Type field value"
947                        },
948                        "content_transfer_encoding": {
949                            "type": "string",
950                            "description": "RFC 2045 Content-Transfer-Encoding token"
951                        },
952                        "content_disposition": {
953                            "type": "string",
954                            "description": "RFC 2183 Content-Disposition field value"
955                        },
956                        "body": {
957                            "type": "string",
958                            "contentEncoding": "base64",
959                            "description": "Base64-encoded MIME part body (RFC 4648, with padding)"
960                        }
961                    },
962                    "required": ["type", "content_type", "body"]
963                },
964                {
965                    "type": "object",
966                    "properties": {
967                        "type": {"const": "multipart"},
968                        "content_type": {
969                            "type": "string",
970                            "description": "MIME Content-Type field value"
971                        },
972                        "boundary": {"type": "string"},
973                        "parts": {"type": "array", "items": recursive}
974                    },
975                    "required": ["type", "content_type", "parts"]
976                }
977            ]
978        })
979    }
980}
981
982#[cfg(test)]
983mod tests {
984    use super::{ContentTransferEncoding, ContentType};
985
986    #[test]
987    fn content_type_accepts_valid_media_types_and_parameters() {
988        for value in [
989            "text/plain",
990            "text/plain;charset=utf-8",
991            "multipart/related; type=\"text/html\"",
992            "application/octet-stream; name=\"a;b.txt\"",
993        ] {
994            assert!(
995                ContentType::try_from(value).is_ok(),
996                "expected valid content type: {value}"
997            );
998        }
999    }
1000
1001    #[test]
1002    fn content_type_rejects_invalid_media_types() {
1003        for value in [
1004            "text/",
1005            "/plain",
1006            "text/plain/html",
1007            "text /plain",
1008            "text/plain; charset",
1009            "text/plain; charset=\"unterminated",
1010        ] {
1011            assert!(
1012                ContentType::try_from(value).is_err(),
1013                "expected invalid content type: {value}"
1014            );
1015        }
1016    }
1017
1018    #[test]
1019    fn content_type_rejects_quoted_parameter_with_control_chars() {
1020        // Direct bytes, NUL, BEL, VT, ESC must be rejected to match the
1021        // wire renderer's `push_header_line` byte discipline.
1022        for value in [
1023            "text/plain; name=\"x\u{0}y\"",
1024            "text/plain; name=\"x\u{07}y\"",
1025            "text/plain; name=\"x\u{0B}y\"",
1026            "text/plain; name=\"x\u{1B}y\"",
1027        ] {
1028            assert!(
1029                ContentType::try_from(value).is_err(),
1030                "expected control-char rejection: {value:?}"
1031            );
1032        }
1033    }
1034
1035    #[test]
1036    fn content_type_rejects_quoted_parameter_with_escaped_control_chars() {
1037        // Even after a `\` escape, control chars are still rejected.
1038        for value in [
1039            "text/plain; name=\"x\\\u{0}y\"",
1040            "text/plain; name=\"x\\\u{07}y\"",
1041        ] {
1042            assert!(
1043                ContentType::try_from(value).is_err(),
1044                "expected escaped-control-char rejection: {value:?}"
1045            );
1046        }
1047    }
1048
1049    #[test]
1050    fn content_type_accepts_tab_inside_quoted_parameter() {
1051        // Tab is the documented exception in the byte-discipline rule.
1052        assert!(ContentType::try_from("text/plain; name=\"a\tb\"").is_ok());
1053    }
1054
1055    #[test]
1056    fn content_type_media_type_view_splits_type_and_subtype() {
1057        let ct: ContentType = "text/plain; charset=utf-8".parse().unwrap();
1058        let media = ct.media_type();
1059        assert_eq!(media.type_(), "text");
1060        assert_eq!(media.subtype(), "plain");
1061        assert!(media.is_text());
1062        assert!(!media.is_multipart());
1063        assert!(media.matches("text/plain"));
1064        assert!(media.matches("TEXT/PLAIN"));
1065    }
1066
1067    #[test]
1068    fn content_type_parameter_lookup_is_case_insensitive_and_unquotes() {
1069        let ct: ContentType = "multipart/mixed; Boundary=\"abc\\\"def\"".parse().unwrap();
1070        let boundary = ct.boundary().expect("boundary present");
1071        assert_eq!(boundary.as_raw(), "\"abc\\\"def\"");
1072        assert_eq!(boundary.unquoted().as_ref(), "abc\"def");
1073    }
1074
1075    #[test]
1076    fn content_type_parameters_iterates_in_declaration_order() {
1077        let ct: ContentType = "text/html; charset=utf-8; boundary=x".parse().unwrap();
1078        let pairs: Vec<(String, String)> = ct
1079            .parameters()
1080            .map(|(k, v)| (k.to_owned(), v.unquoted().into_owned()))
1081            .collect();
1082        assert_eq!(
1083            pairs,
1084            vec![
1085                ("charset".to_owned(), "utf-8".to_owned()),
1086                ("boundary".to_owned(), "x".to_owned()),
1087            ]
1088        );
1089    }
1090
1091    #[test]
1092    fn content_transfer_encoding_canonicalizes_known_tokens() {
1093        assert_eq!(
1094            "Base64"
1095                .parse::<ContentTransferEncoding>()
1096                .unwrap()
1097                .as_str(),
1098            "base64"
1099        );
1100        assert_eq!(
1101            "7BIT".parse::<ContentTransferEncoding>().unwrap().as_str(),
1102            "7bit"
1103        );
1104        assert_eq!(
1105            "Quoted-Printable"
1106                .parse::<ContentTransferEncoding>()
1107                .unwrap(),
1108            ContentTransferEncoding::QuotedPrintable
1109        );
1110
1111        let other: ContentTransferEncoding = "x-my-encoding".parse().unwrap();
1112        assert_eq!(
1113            other,
1114            ContentTransferEncoding::Other("x-my-encoding".to_owned())
1115        );
1116        assert_eq!(other.as_str(), "x-my-encoding");
1117    }
1118
1119    #[test]
1120    fn content_disposition_kind_and_parameter_accessors() {
1121        use super::ContentDisposition;
1122        let cd: ContentDisposition = "attachment; filename=\"report.pdf\""
1123            .parse()
1124            .expect("disposition should parse");
1125        assert_eq!(cd.kind(), "attachment");
1126        assert!(cd.is_attachment());
1127        assert!(!cd.is_inline());
1128        let filename = cd.filename().expect("filename present");
1129        assert_eq!(filename.unquoted().as_ref(), "report.pdf");
1130    }
1131
1132    #[test]
1133    fn content_disposition_filename_falls_back_to_extended_parameter() {
1134        use super::ContentDisposition;
1135        let cd: ContentDisposition = "attachment; filename*=utf-8''f%C3%A1jl.txt"
1136            .parse()
1137            .expect("disposition should parse");
1138
1139        let filename = cd.filename().expect("filename* present");
1140        assert_eq!(filename.as_raw(), "utf-8''f%C3%A1jl.txt");
1141    }
1142
1143    #[test]
1144    fn content_disposition_inline_kind_is_case_insensitive() {
1145        use super::ContentDisposition;
1146        let cd: ContentDisposition = "INLINE".parse().expect("disposition should parse");
1147        assert!(cd.is_inline());
1148        assert!(!cd.is_attachment());
1149    }
1150
1151    #[test]
1152    fn content_disposition_parameters_iterates_in_declaration_order() {
1153        use super::ContentDisposition;
1154        let cd: ContentDisposition = "attachment; filename=report.pdf; size=42".parse().unwrap();
1155        let pairs: Vec<(String, String)> = cd
1156            .parameters()
1157            .map(|(k, v)| (k.to_owned(), v.unquoted().into_owned()))
1158            .collect();
1159        assert_eq!(
1160            pairs,
1161            vec![
1162                ("filename".to_owned(), "report.pdf".to_owned()),
1163                ("size".to_owned(), "42".to_owned()),
1164            ]
1165        );
1166    }
1167
1168    #[test]
1169    fn content_disposition_parameter_lookup_is_case_insensitive() {
1170        use super::ContentDisposition;
1171        let cd: ContentDisposition = "attachment; FileName=\"x.txt\"".parse().unwrap();
1172        assert_eq!(
1173            cd.parameter("filename").unwrap().unquoted().as_ref(),
1174            "x.txt"
1175        );
1176        assert_eq!(
1177            cd.parameter("FILENAME").unwrap().unquoted().as_ref(),
1178            "x.txt"
1179        );
1180    }
1181
1182    #[test]
1183    fn content_transfer_encoding_other_is_case_insensitive() {
1184        // RFC 2045 §6.1, encoding names are case-insensitive. Two
1185        // differently-cased spellings of the same x-* extension must
1186        // compare equal and hash to the same value.
1187        let a: ContentTransferEncoding = "X-MyEnc".parse().unwrap();
1188        let b: ContentTransferEncoding = "x-myenc".parse().unwrap();
1189        let c: ContentTransferEncoding = "X-MYENC".parse().unwrap();
1190        assert_eq!(a, b);
1191        assert_eq!(a, c);
1192        assert_eq!(a.as_str(), "x-myenc");
1193        assert_eq!(c.as_str(), "x-myenc");
1194
1195        // Same value can be safely used as a HashMap/HashSet key.
1196        use std::collections::HashSet;
1197        let mut set: HashSet<ContentTransferEncoding> = HashSet::new();
1198        set.insert(a);
1199        assert!(set.contains(&b));
1200        assert!(set.contains(&c));
1201    }
1202
1203    #[test]
1204    fn content_type_eq_is_case_insensitive_after_normalize() {
1205        use std::collections::hash_map::DefaultHasher;
1206        use std::hash::{Hash, Hasher};
1207
1208        let upper = ContentType::try_from("TEXT/PLAIN; CHARSET=UTF-8").unwrap();
1209        let lower = ContentType::try_from("text/plain; charset=UTF-8").unwrap();
1210
1211        assert_eq!(upper, lower);
1212        assert_eq!(upper.as_str(), "text/plain; charset=UTF-8");
1213
1214        let mut h_u = DefaultHasher::new();
1215        upper.hash(&mut h_u);
1216        let mut h_l = DefaultHasher::new();
1217        lower.hash(&mut h_l);
1218        assert_eq!(h_u.finish(), h_l.finish());
1219
1220        // Parameter values are preserved as-is (case-sensitive per RFC 2046
1221        // §5.1.1 for `boundary`).
1222        let preserved = ContentType::try_from("multipart/mixed; BOUNDARY=\"AbC\"").unwrap();
1223        assert_eq!(preserved.as_str(), "multipart/mixed; boundary=\"AbC\"");
1224    }
1225
1226    #[test]
1227    fn content_disposition_eq_is_case_insensitive_after_normalize() {
1228        use super::ContentDisposition;
1229        let upper = ContentDisposition::try_from("ATTACHMENT; FILENAME=\"x.pdf\"").unwrap();
1230        let lower = ContentDisposition::try_from("attachment; filename=\"x.pdf\"").unwrap();
1231
1232        assert_eq!(upper, lower);
1233        assert_eq!(upper.as_str(), "attachment; filename=\"x.pdf\"");
1234    }
1235}