Skip to main content

mime_tree/
part.rs

1use std::fmt;
2
3use serde::{Deserialize, Serialize};
4
5/// Transfer encoding of a MIME body part.
6#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
7#[non_exhaustive]
8pub enum TransferEncoding {
9    Identity,
10    QuotedPrintable,
11    Base64,
12    SevenBit,
13    EightBit,
14    Binary,
15    /// UUencode, as used in `Content-Transfer-Encoding: x-uuencode`,
16    /// `x-uue`, or `uuencode`.  RFC 2045 permits x-token CTE values.
17    UUEncode,
18}
19
20impl fmt::Display for TransferEncoding {
21    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22        match self {
23            TransferEncoding::Identity => f.write_str("identity"),
24            TransferEncoding::QuotedPrintable => f.write_str("quoted-printable"),
25            TransferEncoding::Base64 => f.write_str("base64"),
26            TransferEncoding::SevenBit => f.write_str("7bit"),
27            TransferEncoding::EightBit => f.write_str("8bit"),
28            TransferEncoding::Binary => f.write_str("binary"),
29            TransferEncoding::UUEncode => f.write_str("x-uuencode"),
30        }
31    }
32}
33
34/// A decoded RFC 5322 / MIME header field.
35///
36/// For headers whose value mail-parser parses as plain text (`Subject`,
37/// `Comments`, `Content-Description`, and any unstructured header), `value`
38/// contains the fully decoded Unicode string (RFC 2047 encoded-words are
39/// already resolved).
40///
41/// For all other header types (`Address`, `DateTime`, `ContentType`,
42/// `Received`), `value` is the raw bytes sliced from the original message
43/// and converted with `String::from_utf8_lossy`.  These structured values
44/// require their own dedicated parsers — see
45/// [`parse_header_typed`][crate::parse_header_typed].
46///
47/// `raw_value` always contains the original bytes of the header field
48/// value from the wire message (the portion after the `:` separator).
49/// Use `raw_value` — not `value.as_bytes()` — when feeding a header
50/// into [`parse_header_typed`][crate::parse_header_typed], because
51/// `value` may have undergone lossy UTF-8 conversion for structured
52/// headers.
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
54#[non_exhaustive]
55pub struct ParsedHeader {
56    /// Header field name (e.g. `"From"`, `"Subject"`).
57    pub name: String,
58    /// Decoded or lossy-converted header field value as a UTF-8 string.
59    ///
60    /// For text headers, this is the fully decoded value (RFC 2047
61    /// encoded-words resolved). For structured headers, this is a
62    /// lossy UTF-8 conversion of the raw bytes — non-UTF-8 bytes are
63    /// replaced with U+FFFD.
64    pub value: String,
65    /// Raw bytes of the header field value from the original message.
66    ///
67    /// This is the byte sequence after the `:` separator, preserving the
68    /// original encoding without lossy UTF-8 conversion. For accurate
69    /// typed parsing, always use these bytes with
70    /// [`parse_header_typed`][crate::parse_header_typed] rather than
71    /// `value.as_bytes()`.
72    pub raw_value: Vec<u8>,
73}
74
75/// A single MIME part in the parsed tree.
76///
77/// Byte ranges (`header_range`, `body_range`) are `(offset, length)` indices
78/// into the caller's original `&[u8]`. The crate never stores raw bytes.
79///
80/// Both fields use `u32` to guarantee identical serialized representation on
81/// 32-bit and 64-bit hosts.  RFC 5321 recommends a 10 MB message size limit;
82/// the 4 GiB `u32` range covers all realistic messages.  Callers processing
83/// raw input exceeding 4 GiB MUST reject it before calling [`parse`][crate::parse]
84/// — `mail-parser` uses `usize` offsets internally and `parse` truncates them
85/// to `u32` via `saturating_sub`, which would produce incorrect byte ranges
86/// without error on oversized input.
87///
88/// For `multipart/*` parts, `children` is non-empty and `body_range` covers
89/// the entire multipart body including boundaries.
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91#[non_exhaustive]
92pub struct ParsedPart {
93    /// IMAP dotted-path part ID: `"1"`, `"1.1"`, `"1.2"`, etc.
94    pub part_id: String,
95    /// Content-Type media type/subtype (e.g. `"text/plain"`).
96    pub content_type: String,
97    /// Charset parameter from Content-Type, if present.
98    ///
99    /// `None` means no explicit `charset=` parameter was present on the
100    /// Content-Type header. Per RFC 2045 §5.2 the default is US-ASCII, but
101    /// `decode_body_value()` defaults to UTF-8 instead (a strict superset)
102    /// for better handling of the modern email corpus.
103    pub charset: Option<String>,
104    /// Content-Transfer-Encoding.
105    pub transfer_encoding: TransferEncoding,
106    /// Content-Disposition value (e.g. `"attachment"`, `"inline"`).
107    pub disposition: Option<String>,
108    /// Filename from Content-Disposition or Content-Type.
109    pub filename: Option<String>,
110    /// Content-ID header value, if present.
111    pub cid: Option<String>,
112    /// `(offset, length)` of this part's headers in the original bytes.
113    ///
114    /// To access individual typed headers for a part, slice
115    /// `raw[offset..offset+length]` and pass the result to
116    /// [`parse_header_typed`][crate::parse_header_typed].
117    pub header_range: (u32, u32),
118    /// `(offset, length)` of this part's body (pre-decode) in the original bytes.
119    pub body_range: (u32, u32),
120    /// Child parts. Non-empty only for `multipart/*` content types.
121    pub children: Vec<ParsedPart>,
122    /// True if mail-parser flagged this part as having a structural encoding
123    /// problem (e.g., invalid base64 padding in the raw transfer encoding).
124    ///
125    /// This is a parse-time flag, distinct from
126    /// [`DecodedBodyValue::is_encoding_problem`] which is set during
127    /// charset conversion in `decode_body_value()`.
128    pub is_encoding_problem: bool,
129}
130
131impl ParsedPart {
132    /// Find a descendant part by its dotted IMAP part ID.
133    ///
134    /// Searches this part and all descendants depth-first.  Returns `None` if
135    /// no part with the given ID exists in the tree.
136    ///
137    /// # Part ID conventions
138    ///
139    /// - **Non-multipart root**: the root part has `part_id = "1"`.
140    /// - **Multipart root**: the root part has `part_id = ""` (empty string);
141    ///   its children are `"1"`, `"2"`, etc.
142    /// - **Nested multipart**: children use dotted paths like `"1.1"`, `"1.2"`.
143    ///
144    /// ```
145    /// # use mime_tree::parse;
146    /// // Non-multipart: root is "1"
147    /// let raw = b"Content-Type: text/plain\r\n\r\nHello\r\n";
148    /// let msg = parse(raw).unwrap();
149    /// let part = msg.part_index.find_by_id("1").unwrap();
150    /// assert_eq!(part.content_type, "text/plain");
151    /// ```
152    pub fn find_by_id(&self, id: &str) -> Option<&ParsedPart> {
153        if self.part_id == id {
154            return Some(self);
155        }
156        self.children.iter().find_map(|child| child.find_by_id(id))
157    }
158}