Skip to main content

mailrs_mime/
content_type.rs

1//! Content-Type + Content-Disposition header value parsing.
2//!
3//! Goes only as deep as we need for the MIME body tree walker: split
4//! `type/subtype` and pick out a `boundary=` / `charset=` / `filename=`
5//! / `name=` parameter. RFC 2231 extended forms (`filename*=...`) are
6//! decoded via [`mailrs-rfc2231`](mailrs_rfc2231).
7
8use std::collections::HashMap;
9
10/// Parsed `Content-Type:` header value.
11#[derive(Debug, Clone)]
12pub struct ContentType {
13    /// Top-level type ("text", "multipart", "application", ...), lowercased.
14    pub type_: String,
15    /// Subtype ("plain", "html", "alternative", "mixed", "calendar", ...),
16    /// lowercased.
17    pub subtype: String,
18    /// Lowercased parameter map (boundary, charset, name, ...).
19    /// Values are RFC 2231-decoded when the on-wire shape was
20    /// `name*=charset''pct-encoded`.
21    pub params: HashMap<String, String>,
22}
23
24impl ContentType {
25    /// Default per RFC 2045 §5.2 when no Content-Type is present:
26    /// `text/plain; charset=us-ascii`.
27    pub fn default_for_missing_header() -> Self {
28        let mut params = HashMap::new();
29        params.insert("charset".into(), "us-ascii".into());
30        Self {
31            type_: "text".into(),
32            subtype: "plain".into(),
33            params,
34        }
35    }
36
37    /// Parse from raw header value (e.g. `multipart/mixed; boundary="xyz"`).
38    pub fn parse(value: &str) -> Self {
39        let trimmed = value.trim();
40        // type/subtype is everything up to the first `;`.
41        let (kind, rest) = match trimmed.split_once(';') {
42            Some((k, r)) => (k.trim(), r),
43            None => (trimmed, ""),
44        };
45        let (type_, subtype) = match kind.split_once('/') {
46            Some((t, s)) => (t.trim().to_ascii_lowercase(), s.trim().to_ascii_lowercase()),
47            None => (kind.to_ascii_lowercase(), String::new()),
48        };
49        let params = parse_params(rest);
50        Self {
51            type_,
52            subtype,
53            params,
54        }
55    }
56
57    /// `true` if this is a multipart/* type.
58    pub fn is_multipart(&self) -> bool {
59        self.type_ == "multipart"
60    }
61
62    /// Convenience: `"<type>/<subtype>"`.
63    pub fn mime_type(&self) -> String {
64        format!("{}/{}", self.type_, self.subtype)
65    }
66
67    /// `boundary=` parameter for multipart parts. `None` for non-multipart
68    /// or malformed multipart.
69    pub fn boundary(&self) -> Option<&str> {
70        self.params.get("boundary").map(String::as_str)
71    }
72
73    /// `charset=` parameter for text/* parts. Defaults to "us-ascii"
74    /// per RFC 2045 §5.2 when absent.
75    pub fn charset(&self) -> &str {
76        self.params
77            .get("charset")
78            .map(String::as_str)
79            .unwrap_or("us-ascii")
80    }
81
82    /// `name=` parameter — historical attachment filename source.
83    /// See also Content-Disposition `filename=`.
84    pub fn name(&self) -> Option<&str> {
85        self.params.get("name").map(String::as_str)
86    }
87}
88
89/// Parsed `Content-Disposition:` header value.
90#[derive(Debug, Clone)]
91pub struct Disposition {
92    /// `"inline"`, `"attachment"`, or other disposition type, lowercased.
93    pub kind: String,
94    /// Same shape as `ContentType::params`.
95    pub params: HashMap<String, String>,
96}
97
98impl Disposition {
99    /// Parse from raw header value (e.g.
100    /// `attachment; filename="report.pdf"`).
101    pub fn parse(value: &str) -> Self {
102        let trimmed = value.trim();
103        let (kind, rest) = match trimmed.split_once(';') {
104            Some((k, r)) => (k.trim().to_ascii_lowercase(), r),
105            None => (trimmed.to_ascii_lowercase(), ""),
106        };
107        let params = parse_params(rest);
108        Self { kind, params }
109    }
110
111    /// `filename=` parameter (RFC 2183) — preferred attachment name.
112    pub fn filename(&self) -> Option<&str> {
113        self.params.get("filename").map(String::as_str)
114    }
115
116    /// `true` if `kind == "attachment"`.
117    pub fn is_attachment(&self) -> bool {
118        self.kind == "attachment"
119    }
120
121    /// `true` if `kind == "inline"`.
122    pub fn is_inline(&self) -> bool {
123        self.kind == "inline"
124    }
125}
126
127/// Parse the `; name=value; name2=value2` parameter tail of a
128/// Content-Type / Content-Disposition header value.
129///
130/// Handles both legacy quoted (`name="value"`) and RFC 2231 extended
131/// (`name*=UTF-8''pct-encoded`) forms via [`mailrs-rfc2231`].
132fn parse_params(input: &str) -> HashMap<String, String> {
133    let mut out = HashMap::new();
134    // Naive split on `;`. Values may legitimately contain `;` inside
135    // quoted strings — RFC-strict parsing would tokenize MIME-style.
136    // We accept the simple split; if a quoted value contains `;` we'd
137    // truncate, which is rare in practice.
138    for token in input.split(';') {
139        let token = token.trim();
140        if token.is_empty() {
141            continue;
142        }
143        let Some((name, value)) = token.split_once('=') else {
144            continue;
145        };
146        let mut name = name.trim().to_ascii_lowercase();
147        // RFC 2231 extended form: `filename*=UTF-8''...` — trailing
148        // `*` marks the value as extended-form. Strip it so callers
149        // look up by the base name (`filename`, not `filename*`).
150        if let Some(base) = name.strip_suffix('*') {
151            name = base.to_string();
152        }
153        let value_decoded = mailrs_rfc2231::decode_param_value(value.trim())
154            .map(|c| c.into_owned())
155            .unwrap_or_else(|| value.trim().to_string());
156        // Trim quotes if the value came back quoted but decode didn't
157        // strip them (fallback path).
158        let value_clean = value_decoded
159            .trim()
160            .trim_matches('"')
161            .to_string();
162        out.insert(name, value_clean);
163    }
164    out
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn parse_simple_text_plain() {
173        let ct = ContentType::parse("text/plain");
174        assert_eq!(ct.type_, "text");
175        assert_eq!(ct.subtype, "plain");
176        assert!(ct.params.is_empty());
177    }
178
179    #[test]
180    fn parse_text_plain_with_charset() {
181        let ct = ContentType::parse("text/plain; charset=utf-8");
182        assert_eq!(ct.charset(), "utf-8");
183    }
184
185    #[test]
186    fn parse_multipart_with_boundary() {
187        let ct = ContentType::parse("multipart/mixed; boundary=\"abc-123\"");
188        assert!(ct.is_multipart());
189        assert_eq!(ct.boundary(), Some("abc-123"));
190    }
191
192    #[test]
193    fn parse_multipart_unquoted_boundary() {
194        let ct = ContentType::parse("multipart/alternative; boundary=xyz");
195        assert_eq!(ct.boundary(), Some("xyz"));
196    }
197
198    #[test]
199    fn parse_case_insensitive_type() {
200        let ct = ContentType::parse("TEXT/HTML");
201        assert_eq!(ct.type_, "text");
202        assert_eq!(ct.subtype, "html");
203    }
204
205    #[test]
206    fn parse_attachment_filename_quoted() {
207        let ct = ContentType::parse("application/pdf; name=\"report.pdf\"");
208        assert_eq!(ct.name(), Some("report.pdf"));
209    }
210
211    #[test]
212    fn parse_rfc2231_filename_decoded() {
213        let ct = ContentType::parse(
214            "application/pdf; name*=UTF-8''%E6%97%A5%E6%9C%AC.pdf",
215        );
216        assert_eq!(ct.name(), Some("日本.pdf"));
217    }
218
219    #[test]
220    fn parse_disposition_attachment() {
221        let d = Disposition::parse("attachment; filename=\"report.pdf\"");
222        assert!(d.is_attachment());
223        assert_eq!(d.filename(), Some("report.pdf"));
224    }
225
226    #[test]
227    fn parse_disposition_inline() {
228        let d = Disposition::parse("inline");
229        assert!(d.is_inline());
230        assert!(d.filename().is_none());
231    }
232
233    #[test]
234    fn parse_disposition_rfc2231_filename() {
235        let d = Disposition::parse("attachment; filename*=UTF-8''%E6%97%A5.pdf");
236        assert_eq!(d.filename(), Some("日.pdf"));
237    }
238
239    #[test]
240    fn default_for_missing_header_is_text_plain_ascii() {
241        let ct = ContentType::default_for_missing_header();
242        assert_eq!(ct.mime_type(), "text/plain");
243        assert_eq!(ct.charset(), "us-ascii");
244    }
245
246    #[test]
247    fn parse_handles_extra_whitespace() {
248        let ct = ContentType::parse("  multipart/mixed ;  boundary=\"xx\"  ");
249        assert!(ct.is_multipart());
250        assert_eq!(ct.boundary(), Some("xx"));
251    }
252
253    #[test]
254    fn parse_no_subtype_yields_empty() {
255        let ct = ContentType::parse("application");
256        assert_eq!(ct.type_, "application");
257        assert_eq!(ct.subtype, "");
258    }
259
260    #[test]
261    fn parse_handles_multiple_params() {
262        let ct = ContentType::parse(
263            "text/plain; charset=utf-8; format=flowed; delsp=yes",
264        );
265        assert_eq!(ct.charset(), "utf-8");
266        assert_eq!(ct.params.get("format").map(String::as_str), Some("flowed"));
267        assert_eq!(ct.params.get("delsp").map(String::as_str), Some("yes"));
268    }
269}