content_disposition/
lib.rs

1//
2//  MIT OR BSD-0-Clause; https://github.com/staktrace/mailparse.git
3//
4
5use std::collections::{BTreeMap, HashMap};
6
7use charset::Charset;
8
9/// The possible disposition types in a Content-Disposition header. A more
10/// comprehensive list of IANA-recognized types can be found at
11/// https://www.iana.org/assignments/cont-disp/cont-disp.xhtml. This library
12/// only enumerates the types most commonly found in email messages, and
13/// provides the `Extension` value for holding all other types.
14#[derive(Debug, Clone, PartialEq)]
15pub enum DispositionType {
16    /// Default value, indicating the content is to be displayed inline as
17    /// part of the enclosing document.
18    Inline,
19    /// A disposition indicating the content is not meant for inline display,
20    /// but whose content can be accessed for use.
21    Attachment,
22    /// A disposition indicating the content contains a form submission.
23    FormData,
24    /// Extension type to hold any disposition not explicitly enumerated.
25    Extension(String),
26}
27
28impl Default for DispositionType {
29    fn default() -> Self {
30        DispositionType::Inline
31    }
32}
33
34/// Convert the string represented disposition type to enum.
35fn parse_disposition_type(disposition: &str) -> DispositionType {
36    match &disposition.to_lowercase()[..] {
37        "inline" => DispositionType::Inline,
38        "attachment" => DispositionType::Attachment,
39        "form-data" => DispositionType::FormData,
40        extension => DispositionType::Extension(extension.to_string()),
41    }
42}
43
44/// A struct to hold a more structured representation of the Content-Disposition header.
45/// This is provided mostly as a convenience since this metadata is usually
46/// needed to interpret the message body properly.
47#[derive(Debug, Clone, Default)]
48pub struct ParsedContentDisposition {
49    /// The disposition type of the Content-Disposition header. If this
50    /// is an extension type, the string will be lowercased.
51    pub disposition: DispositionType,
52    /// The additional params of Content-Disposition, e.g. filename. The
53    /// keys in the map will be lowercased, and the values will have any
54    /// enclosing quotes stripped.
55    pub params: BTreeMap<String, String>,
56}
57
58impl ParsedContentDisposition {
59    #[allow(dead_code)]
60    pub fn name(&self) -> Option<String> {
61        self.params.get("name").cloned()
62    }
63    #[allow(dead_code)]
64    pub fn filename_full(&self) -> Option<String> {
65        self.params.get("filename").cloned()
66    }
67    #[allow(dead_code)]
68    pub fn filename(&self) -> Option<(String, Option<String>)> {
69        let clone = self.params.get("filename").cloned();
70        match clone {
71            Some(c) => {
72                let mut arr: Vec<&str> = c.split(".").collect();
73                let last = arr.pop();
74                let first = arr.join(".");
75                Some(match last {
76                    Some(l) => (first, Some(l.to_owned())),
77                    None => (first, None),
78                })
79            }
80            None => None,
81        }
82    }
83}
84
85pub fn parse_content_disposition(header: &str) -> ParsedContentDisposition {
86    let params = parse_param_content(header);
87    let disposition = parse_disposition_type(&params.value);
88    ParsedContentDisposition {
89        disposition,
90        params: params.params,
91    }
92}
93
94/// Used to store params for content-type and content-disposition
95struct ParamContent {
96    value: String,
97    params: BTreeMap<String, String>,
98}
99
100/// Parse parameterized header values such as that for Content-Type
101/// e.g. `multipart/alternative; boundary=foobar`
102/// Note: this function is not made public as it may require
103/// significant changes to be fully correct. For instance,
104/// it does not handle quoted parameter values containing the
105/// semicolon (';') character. It also produces a BTreeMap,
106/// which implicitly does not support multiple parameters with
107/// the same key. Also, the parameter values may contain language
108/// information in a format specified by RFC 2184 which is thrown
109/// away. The format for parameterized header values doesn't
110/// appear to be strongly specified anywhere.
111fn parse_param_content(content: &str) -> ParamContent {
112    let mut tokens = content.split(';');
113    // There must be at least one token produced by split, even if it's empty.
114    let value = tokens.next().unwrap().trim();
115    let mut map: BTreeMap<String, String> = tokens
116        .filter_map(|kv| {
117            kv.find('=').map(|idx| {
118                let key = kv[0..idx].trim().to_lowercase();
119                let mut value = kv[idx + 1..].trim();
120                if value.starts_with('"') && value.ends_with('"') && value.len() > 1 {
121                    value = &value[1..value.len() - 1];
122                }
123                (key, value.to_string())
124            })
125        })
126        .collect();
127
128    // Decode charset encoding, as described in RFC 2184, Section 4.
129    let decode_key_list: Vec<String> = map
130        .keys()
131        .filter_map(|k| k.strip_suffix('*'))
132        .map(String::from)
133        // Skip encoded keys where there is already an equivalent decoded key in the map
134        .filter(|k| !map.contains_key(k))
135        .collect();
136    let encodings = compute_parameter_encodings(&map, &decode_key_list);
137    // Note that when we get here, we might still have entries in `encodings` for continuation segments
138    // that didn't have a *0 segment at all. These shouldn't exist per spec so we can do whatever we want,
139    // as long as we don't panic.
140    for (k, (e, strip)) in encodings {
141        if let Some(charset) = Charset::for_label_no_replacement(e.as_bytes()) {
142            let key = format!("{}*", k);
143            let percent_encoded_value = map.remove(&key).unwrap();
144            let encoded_value = if strip {
145                percent_decode(percent_encoded_value.splitn(3, '\'').nth(2).unwrap_or(""))
146            } else {
147                percent_decode(&percent_encoded_value)
148            };
149            let decoded_value = charset.decode_without_bom_handling(&encoded_value).0;
150            map.insert(k, decoded_value.to_string());
151        }
152    }
153
154    // Unwrap parameter value continuations, as described in RFC 2184, Section 3.
155    let unwrap_key_list: Vec<String> = map
156        .keys()
157        .filter_map(|k| k.strip_suffix("*0"))
158        .map(String::from)
159        // Skip wrapped keys where there is already an unwrapped equivalent in the map
160        .filter(|k| !map.contains_key(k))
161        .collect();
162    for unwrap_key in unwrap_key_list {
163        let mut unwrapped_value = String::new();
164        let mut index = 0;
165        while let Some(wrapped_value_part) = map.remove(&format!("{}*{}", &unwrap_key, index)) {
166            index += 1;
167            unwrapped_value.push_str(&wrapped_value_part);
168        }
169        let old_value = map.insert(unwrap_key, unwrapped_value);
170        assert!(old_value.is_none());
171    }
172
173    ParamContent {
174        value: value.into(),
175        params: map,
176    }
177}
178
179/// In the returned map, the key is one of the entries from the decode_key_list,
180/// (i.e. the parameter key with the trailing '*' stripped). The value is a tuple
181/// containing the encoding (or empty string for no encoding found) and a flag
182/// that indicates if the encoding needs to be stripped from the value. This is
183/// set to true for non-continuation parameter values.
184fn compute_parameter_encodings(
185    map: &BTreeMap<String, String>,
186    decode_key_list: &Vec<String>,
187) -> HashMap<String, (String, bool)> {
188    // To handle section 4.1 (combining encodings with continuations), we first
189    // compute the encoding for each parameter value or parameter value segment
190    // that is encoded. For continuation segments the encoding from the *0 segment
191    // overwrites the continuation segment's encoding, if there is one.
192    let mut encodings: HashMap<String, (String, bool)> = HashMap::new();
193    for decode_key in decode_key_list {
194        if let Some(unwrap_key) = decode_key.strip_suffix("*0") {
195            // Per spec, there should always be an encoding. If it's missing, handle that case gracefully
196            // by setting it to an empty string that we handle specially later.
197            let encoding = map
198                .get(&format!("{}*", decode_key))
199                .unwrap()
200                .split('\'')
201                .next()
202                .unwrap_or("");
203            let continuation_prefix = format!("{}*", unwrap_key);
204            for continuation_key in decode_key_list {
205                if continuation_key.starts_with(&continuation_prefix) {
206                    // This may (intentionally) overwite encodings previously found for the
207                    // continuation segments (which are bogus). In those cases, the flag
208                    // in the tuple should get updated from true to false.
209                    encodings.insert(
210                        continuation_key.clone(),
211                        (encoding.to_string(), continuation_key == decode_key),
212                    );
213                }
214            }
215        } else if !encodings.contains_key(decode_key) {
216            let encoding = map
217                .get(&format!("{}*", decode_key))
218                .unwrap()
219                .split('\'')
220                .next()
221                .unwrap_or("")
222                .to_string();
223            let old_value = encodings.insert(decode_key.clone(), (encoding, true));
224            assert!(old_value.is_none());
225        }
226        // else this is a continuation segment and the encoding has already been populated
227        // by the initial *0 segment, so we can ignore it.
228    }
229    encodings
230}
231
232fn percent_decode(encoded: &str) -> Vec<u8> {
233    let mut decoded = Vec::with_capacity(encoded.len());
234    let mut bytes = encoded.bytes();
235    let mut next = bytes.next();
236    while next.is_some() {
237        let b = next.unwrap();
238        if b != b'%' {
239            decoded.push(b);
240            next = bytes.next();
241            continue;
242        }
243
244        let top = match bytes.next() {
245            Some(n) if n.is_ascii_hexdigit() => n,
246            n => {
247                decoded.push(b);
248                next = n;
249                continue;
250            }
251        };
252        let bottom = match bytes.next() {
253            Some(n) if n.is_ascii_hexdigit() => n,
254            n => {
255                decoded.push(b);
256                decoded.push(top);
257                next = n;
258                continue;
259            }
260        };
261        let decoded_byte = (hex_to_nybble(top) << 4) | hex_to_nybble(bottom);
262        decoded.push(decoded_byte);
263
264        next = bytes.next();
265    }
266    decoded
267}
268
269fn hex_to_nybble(byte: u8) -> u8 {
270    match byte {
271        b'0'..=b'9' => byte - b'0',
272        b'a'..=b'f' => byte - b'a' + 10,
273        b'A'..=b'F' => byte - b'A' + 10,
274        _ => panic!("Not a hex character!"),
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_parse_content_disposition() {
284        let dis = parse_content_disposition("inline");
285        assert_eq!(dis.disposition, DispositionType::Inline);
286        assert_eq!(dis.params.get("name"), None);
287        assert_eq!(dis.params.get("filename"), None);
288
289        let dis = parse_content_disposition(
290            " attachment; x=y; charset=\"fake\" ; x2=y2; name=\"King Joffrey.death\"",
291        );
292        assert_eq!(dis.disposition, DispositionType::Attachment);
293        assert_eq!(
294            dis.params.get("name"),
295            Some(&"King Joffrey.death".to_string())
296        );
297        assert_eq!(dis.params.get("filename"), None);
298
299        let dis = parse_content_disposition(" form-data; name=\"cover\"; filename=\"exif.jpg\"");
300        assert_eq!(dis.disposition, DispositionType::FormData);
301        assert_eq!(dis.name(), Some("cover".to_string()));
302        assert_eq!(dis.filename_full(), Some("exif.jpg".to_string()));
303        let f = dis.filename().unwrap();
304        assert_eq!(f.0, "exif".to_string());
305        assert_eq!(f.1, Some("jpg".to_string()));
306    }
307}