mime_tree/part.rs
1use std::fmt;
2
3use serde::{Deserialize, Serialize};
4
5/// Transfer encoding of a MIME body part.
6#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
7#[non_exhaustive]
8pub enum TransferEncoding {
9 Identity,
10 QuotedPrintable,
11 Base64,
12 SevenBit,
13 EightBit,
14 Binary,
15 /// UUencode, as used in `Content-Transfer-Encoding: x-uuencode`,
16 /// `x-uue`, or `uuencode`. RFC 2045 permits x-token CTE values.
17 UUEncode,
18}
19
20impl fmt::Display for TransferEncoding {
21 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22 match self {
23 TransferEncoding::Identity => f.write_str("identity"),
24 TransferEncoding::QuotedPrintable => f.write_str("quoted-printable"),
25 TransferEncoding::Base64 => f.write_str("base64"),
26 TransferEncoding::SevenBit => f.write_str("7bit"),
27 TransferEncoding::EightBit => f.write_str("8bit"),
28 TransferEncoding::Binary => f.write_str("binary"),
29 TransferEncoding::UUEncode => f.write_str("x-uuencode"),
30 }
31 }
32}
33
34/// A decoded RFC 5322 / MIME header field.
35///
36/// For headers whose value mail-parser parses as plain text (`Subject`,
37/// `Comments`, `Content-Description`, and any unstructured header), `value`
38/// contains the fully decoded Unicode string (RFC 2047 encoded-words are
39/// already resolved).
40///
41/// For all other header types (`Address`, `DateTime`, `ContentType`,
42/// `Received`), `value` is the raw bytes sliced from the original message
43/// and converted with `String::from_utf8_lossy`. These structured values
44/// require their own dedicated parsers — see
45/// [`parse_header_typed`][crate::parse_header_typed].
46///
47/// `raw_value` always contains the original bytes of the header field
48/// value from the wire message (the portion after the `:` separator).
49/// Use `raw_value` — not `value.as_bytes()` — when feeding a header
50/// into [`parse_header_typed`][crate::parse_header_typed], because
51/// `value` may have undergone lossy UTF-8 conversion for structured
52/// headers.
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
54#[non_exhaustive]
55pub struct ParsedHeader {
56 /// Header field name (e.g. `"From"`, `"Subject"`).
57 pub name: String,
58 /// Decoded or lossy-converted header field value as a UTF-8 string.
59 ///
60 /// For text headers, this is the fully decoded value (RFC 2047
61 /// encoded-words resolved). For structured headers, this is a
62 /// lossy UTF-8 conversion of the raw bytes — non-UTF-8 bytes are
63 /// replaced with U+FFFD.
64 pub value: String,
65 /// Raw bytes of the header field value from the original message.
66 ///
67 /// This is the byte sequence after the `:` separator, preserving the
68 /// original encoding without lossy UTF-8 conversion. For accurate
69 /// typed parsing, always use these bytes with
70 /// [`parse_header_typed`][crate::parse_header_typed] rather than
71 /// `value.as_bytes()`.
72 pub raw_value: Vec<u8>,
73}
74
75/// A single MIME part in the parsed tree.
76///
77/// Byte ranges (`header_range`, `body_range`) are `(offset, length)` indices
78/// into the caller's original `&[u8]`. The crate never stores raw bytes.
79///
80/// Both fields use `u32` to guarantee identical serialized representation on
81/// 32-bit and 64-bit hosts. RFC 5321 recommends a 10 MB message size limit;
82/// the 4 GiB `u32` range covers all realistic messages. Callers processing
83/// raw input exceeding 4 GiB MUST reject it before calling [`parse`][crate::parse]
84/// — `mail-parser` uses `usize` offsets internally and `parse` truncates them
85/// to `u32` via `saturating_sub`, which would produce incorrect byte ranges
86/// without error on oversized input.
87///
88/// For `multipart/*` parts, `children` is non-empty and `body_range` covers
89/// the entire multipart body including boundaries.
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91#[non_exhaustive]
92pub struct ParsedPart {
93 /// IMAP dotted-path part ID: `"1"`, `"1.1"`, `"1.2"`, etc.
94 pub part_id: String,
95 /// Content-Type media type/subtype (e.g. `"text/plain"`).
96 pub content_type: String,
97 /// Charset parameter from Content-Type, if present.
98 ///
99 /// `None` means no explicit `charset=` parameter was present on the
100 /// Content-Type header. Per RFC 2045 §5.2 the default is US-ASCII, but
101 /// `decode_body_value()` defaults to UTF-8 instead (a strict superset)
102 /// for better handling of the modern email corpus.
103 pub charset: Option<String>,
104 /// Content-Transfer-Encoding.
105 pub transfer_encoding: TransferEncoding,
106 /// Content-Disposition value (e.g. `"attachment"`, `"inline"`).
107 pub disposition: Option<String>,
108 /// Filename from Content-Disposition or Content-Type.
109 pub filename: Option<String>,
110 /// Content-ID header value, if present.
111 pub cid: Option<String>,
112 /// `(offset, length)` of this part's headers in the original bytes.
113 ///
114 /// To access individual typed headers for a part, slice
115 /// `raw[offset..offset+length]` and pass the result to
116 /// [`parse_header_typed`][crate::parse_header_typed].
117 pub header_range: (u32, u32),
118 /// `(offset, length)` of this part's body (pre-decode) in the original bytes.
119 pub body_range: (u32, u32),
120 /// Child parts. Non-empty only for `multipart/*` content types.
121 pub children: Vec<ParsedPart>,
122 /// True if mail-parser flagged this part as having a structural encoding
123 /// problem (e.g., invalid base64 padding in the raw transfer encoding).
124 ///
125 /// This is a parse-time flag, distinct from
126 /// [`DecodedBodyValue::is_encoding_problem`] which is set during
127 /// charset conversion in `decode_body_value()`.
128 pub is_encoding_problem: bool,
129}
130
131impl ParsedPart {
132 /// Find a descendant part by its dotted IMAP part ID.
133 ///
134 /// Searches this part and all descendants depth-first. Returns `None` if
135 /// no part with the given ID exists in the tree.
136 ///
137 /// # Part ID conventions
138 ///
139 /// - **Non-multipart root**: the root part has `part_id = "1"`.
140 /// - **Multipart root**: the root part has `part_id = ""` (empty string);
141 /// its children are `"1"`, `"2"`, etc.
142 /// - **Nested multipart**: children use dotted paths like `"1.1"`, `"1.2"`.
143 ///
144 /// ```
145 /// # use mime_tree::parse;
146 /// // Non-multipart: root is "1"
147 /// let raw = b"Content-Type: text/plain\r\n\r\nHello\r\n";
148 /// let msg = parse(raw).unwrap();
149 /// let part = msg.part_index.find_by_id("1").unwrap();
150 /// assert_eq!(part.content_type, "text/plain");
151 /// ```
152 pub fn find_by_id(&self, id: &str) -> Option<&ParsedPart> {
153 if self.part_id == id {
154 return Some(self);
155 }
156 self.children.iter().find_map(|child| child.find_by_id(id))
157 }
158}