Skip to main content

multipart_any/server/
field.rs

1// Copyright 2016 `multipart` Crate Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! `multipart` field header parsing.
9use mime::Mime;
10
11use std::error::Error;
12use std::io::{self, Read, BufRead};
13use std::{str, fmt};
14
15use std::sync::Arc;
16
17use super::httparse::{self, EMPTY_HEADER, Header, Status, Error as HttparseError};
18
19use self::ReadEntryResult::*;
20
21use super::save::SaveBuilder;
22
23
24const EMPTY_STR_HEADER: StrHeader<'static> = StrHeader {
25    name: "",
26    val: "",
27};
28
29macro_rules! invalid_cont_disp {
30    ($reason: expr, $cause: expr) => {
31        return Err(
32            ParseHeaderError::InvalidContDisp($reason, $cause.to_string())
33        );
34    }
35}
36
37/// Not exposed
38#[derive(Copy, Clone, Debug)]
39pub struct StrHeader<'a> {
40    name: &'a str,
41    val: &'a str,
42}
43
44struct DisplayHeaders<'s, 'a: 's>(&'s [StrHeader<'a>]);
45
46impl <'s, 'a: 's> fmt::Display for  DisplayHeaders<'s, 'a> {
47    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48        for hdr in self.0 {
49            writeln!(f, "{}: {}", hdr.name, hdr.val)?;
50        }
51
52        Ok(())
53    }
54}
55
56fn with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError>
57where R: BufRead, F: FnOnce(&[StrHeader]) -> Ret {
58    const HEADER_LEN: usize = 4;
59
60    let consume;
61    let ret;
62
63    let mut last_len = 0;
64
65    loop {
66        // this should return a larger buffer each time
67        let buf = r.fill_buf()?;
68
69        // buffer has stopped growing
70        if buf.len() == last_len {
71            return Err(ParseHeaderError::TooLarge);
72        }
73
74        let mut raw_headers = [EMPTY_HEADER; HEADER_LEN];
75
76        match httparse::parse_headers(buf, &mut raw_headers)? {
77            // read more and try again
78            Status::Partial => last_len = buf.len(),
79            Status::Complete((consume_, raw_headers)) => {
80                let mut headers = [EMPTY_STR_HEADER; HEADER_LEN];
81                let headers = copy_headers(raw_headers, &mut headers)?;
82                debug!("Parsed headers: {:?}", headers);
83                consume = consume_;
84                ret = closure(headers);
85                break;
86            },
87        }
88    }
89
90    r.consume(consume);
91    Ok(ret)
92}
93
94fn copy_headers<'h, 'b: 'h>(raw: &[Header<'b>], headers: &'h mut [StrHeader<'b>]) -> io::Result<&'h [StrHeader<'b>]> {
95    for (raw, header) in raw.iter().zip(&mut *headers) {
96        header.name = raw.name;
97        header.val = io_str_utf8(raw.value)?;
98    }
99
100    Ok(&headers[..raw.len()])
101}
102
103/// The headers that (may) appear before a `multipart/form-data` field.
104///
105/// ### Warning: Values are Client-Provided
106/// Everything in this struct are values from the client and should be considered **untrustworthy**.
107/// This crate makes no effort to validate or sanitize any client inputs.
108#[derive(Clone, Debug)]
109pub struct FieldHeaders {
110    /// The field's name from the form.
111    pub name: Arc<str>,
112
113    /// The filename of this entry, if supplied. This is not guaranteed to match the original file
114    /// or even to be a valid filename for the current platform.
115    pub filename: Option<String>,
116
117    /// The MIME type (`Content-Type` value) of this file, if supplied by the client.
118    ///
119    /// If this is not supplied, the content-type of the field should default to `text/plain` as
120    /// per [IETF RFC 7578, section 4.4](https://tools.ietf.org/html/rfc7578#section-4.4), but this
121    /// should not be implicitly trusted. This crate makes no attempt to identify or validate
122    /// the content-type of the actual field data.
123    pub content_type: Option<Mime>,
124}
125
126impl FieldHeaders {
127    /// Parse the field headers from the passed `BufRead`, consuming the relevant bytes.
128    fn read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError> {
129        with_headers(r, Self::parse)?
130    }
131
132    fn parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError> {
133        // Does not require `Content-Disposition` header in FieldHearders.
134        // So, you can parse any `multipart/*` body.
135        match ContentDisp::parse_required(headers) {
136            Ok(cont_disp) =>
137                Ok(FieldHeaders {
138                    name: cont_disp.field_name.into(),
139                    filename: cont_disp.filename,
140                    content_type: parse_content_type(headers)?,
141                }),
142            Err(_) =>
143                Ok(FieldHeaders {
144                    name: "".into(),
145                    filename: None,
146                    content_type: parse_content_type(headers)?,
147                }),
148        }
149    }
150}
151
152/// The `Content-Disposition` header.
153struct ContentDisp {
154    /// The name of the `multipart/form-data` field.
155    field_name: String,
156    /// The optional filename for this field.
157    filename: Option<String>,
158}
159
160impl ContentDisp {
161    fn parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError> {
162        let header = if let Some(header) = find_header(headers, "Content-Disposition") {
163            header
164        } else {
165            return Err(ParseHeaderError::MissingContentDisposition(
166                DisplayHeaders(headers).to_string()
167            ));
168        };
169
170        // Content-Disposition: ?
171        let after_disp_type = match split_once(header.val, ';') {
172            Some((disp_type, after_disp_type)) => {
173                // assert Content-Disposition: form-data
174                // but needs to be parsed out to trim the spaces (allowed by spec IIRC)
175                if disp_type.trim() != "form-data" {
176                    invalid_cont_disp!("unexpected Content-Disposition value", disp_type);
177                }
178                after_disp_type
179            },
180            None => invalid_cont_disp!("expected additional data after Content-Disposition type",
181                                       header.val),
182        };
183
184        // Content-Disposition: form-data; name=?
185        let (field_name, filename) = match get_str_after("name=", ';', after_disp_type) {
186            None => invalid_cont_disp!("expected field name and maybe filename, got",
187                                       after_disp_type),
188            // Content-Disposition: form-data; name={field_name}; filename=?
189            Some((field_name, after_field_name)) => {
190                let field_name = trim_quotes(field_name);
191                let filename = get_str_after("filename=", ';', after_field_name)
192                    .map(|(filename, _)| trim_quotes(filename).to_owned());
193                (field_name, filename)
194            },
195        };
196
197        Ok(ContentDisp { field_name: field_name.to_owned(), filename })
198    }
199}
200
201fn parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError> {
202    if let Some(header) = find_header(headers, "Content-Type") {
203        // Boundary parameter will be parsed into the `Mime`
204        debug!("Found Content-Type: {:?}", header.val);
205        Ok(Some(header.val.parse::<Mime>()
206            .map_err(|_| ParseHeaderError::MimeError(header.val.into()))?))
207    } else {
208        Ok(None)
209    }
210}
211
212/// A field in a multipart request with its associated headers and data.
213#[derive(Debug)]
214pub struct MultipartField<M: ReadEntry> {
215    /// The headers for this field, including the name, filename, and content-type, if provided.
216    ///
217    /// ### Warning: Values are Client-Provided
218    /// Everything in this struct are values from the client and should be considered **untrustworthy**.
219    /// This crate makes no effort to validate or sanitize any client inputs.
220    pub headers: FieldHeaders,
221
222    /// The field's data.
223    pub data: MultipartData<M>,
224}
225
226impl<M: ReadEntry> MultipartField<M> {
227    /// Returns `true` if this field has no content-type or the content-type is `text/...`.
228    ///
229    /// This typically means it can be read to a string, but it could still be using an unsupported
230    /// character encoding, so decoding to `String` needs to ensure that the data is valid UTF-8.
231    ///
232    /// Note also that the field contents may be too large to reasonably fit in memory.
233    /// The `.save()` adapter can be used to enforce a size limit.
234    ///
235    /// Detecting character encodings by any means is (currently) beyond the scope of this crate.
236    pub fn is_text(&self) -> bool {
237        self.headers.content_type.as_ref().map_or(true, |ct| ct.type_() == mime::TEXT)
238    }
239
240    /// Read the next entry in the request.
241    pub fn next_entry(self) -> ReadEntryResult<M> {
242        self.data.into_inner().read_entry()
243    }
244
245    /// Update `self` as the next entry.
246    ///
247    /// Returns `Ok(Some(self))` if another entry was read, `Ok(None)` if the end of the body was
248    /// reached, and `Err(e)` for any errors that occur.
249    pub fn next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> where for<'a> &'a mut M: ReadEntry {
250        let multipart = self.data.take_inner();
251
252        match multipart.read_entry() {
253            Entry(entry) => {
254                *self = entry;
255                Ok(Some(self))
256            },
257            End(multipart) => {
258                self.data.give_inner(multipart);
259                Ok(None)
260            },
261            Error(multipart, err) => {
262                self.data.give_inner(multipart);
263                Err(err)
264            }
265        }
266    }
267}
268
269/// The data of a field in a `multipart/form-data` request.
270///
271/// You can read it to EOF, or use the `save()` adaptor to save it to disk/memory.
272#[derive(Debug)]
273pub struct MultipartData<M> {
274    inner: Option<M>,
275}
276
277const DATA_INNER_ERR: &str = "MultipartFile::inner taken and not replaced; this is likely \
278                              caused by a logic error in `multipart` or by resuming after \
279                              a previously caught panic.\nPlease open an issue with the \
280                              relevant backtrace and debug logs at \
281                              https://github.com/abonander/multipart";
282
283impl<M> MultipartData<M> where M: ReadEntry {
284    /// Get a builder type which can save the field with or without a size limit.
285    pub fn save(&mut self) -> SaveBuilder<&mut Self> {
286        SaveBuilder::new(self)
287    }
288
289    /// Take the inner `Multipart` or `&mut Multipart`
290    pub fn into_inner(self) -> M {
291        self.inner.expect(DATA_INNER_ERR)
292    }
293
294    /// Set the minimum buffer size that `BufRead::fill_buf(self)` will return
295    /// until the end of the stream is reached. Set this as small as you can tolerate
296    /// to minimize `read()` calls (`read()` won't be called again until the buffer
297    /// is smaller than this).
298    ///
299    /// This value is reset between fields.
300    pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
301        self.inner_mut().set_min_buf_size(min_buf_size)
302    }
303
304    fn inner_mut(&mut self) -> &mut M {
305        self.inner.as_mut().expect(DATA_INNER_ERR)
306    }
307
308    fn take_inner(&mut self) -> M {
309        self.inner.take().expect(DATA_INNER_ERR)
310    }
311
312    fn give_inner(&mut self, inner: M) {
313        self.inner = Some(inner);
314    }
315}
316
317impl<M: ReadEntry> Read for MultipartData<M> {
318    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize>{
319        self.inner_mut().source_mut().read(buf)
320    }
321}
322
323/// In this implementation, `fill_buf()` can return more data with each call.
324///
325/// Use `set_min_buf_size()` if you require a minimum buffer length.
326impl<M: ReadEntry> BufRead for MultipartData<M> {
327    fn fill_buf(&mut self) -> io::Result<&[u8]> {
328        self.inner_mut().source_mut().fill_buf()
329    }
330
331    fn consume(&mut self, amt: usize) {
332        self.inner_mut().source_mut().consume(amt)
333    }
334}
335
336fn split_once(s: &str, delim: char) -> Option<(&str, &str)> {
337    s.find(delim).map(|idx| s.split_at(idx))
338}
339
340fn trim_quotes(s: &str) -> &str {
341    s.trim_matches('"')
342}
343
344/// Get the string after `needle` in `haystack`, stopping before `end_val_delim`
345fn get_str_after<'a>(needle: &str, end_val_delim: char, haystack: &'a str) -> Option<(&'a str, &'a str)> {
346    let val_start_idx = try_opt!(haystack.find(needle)) + needle.len();
347    let val_end_idx = haystack[val_start_idx..].find(end_val_delim)
348        .map_or(haystack.len(), |end_idx| end_idx + val_start_idx);
349    Some((&haystack[val_start_idx..val_end_idx], &haystack[val_end_idx..]))
350}
351
352fn io_str_utf8(buf: &[u8]) -> io::Result<&str> {
353    str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
354}
355
356fn find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>> {
357    // Field names are case insensitive and consist of ASCII characters
358    // only (see https://tools.ietf.org/html/rfc822#section-3.2).
359    headers.iter().find(|header| header.name.eq_ignore_ascii_case(name))
360}
361
362/// Common trait for `Multipart` and `&mut Multipart`
363pub trait ReadEntry: PrivReadEntry + Sized {
364    /// Attempt to read the next entry in the multipart stream.
365    fn read_entry(mut self) -> ReadEntryResult<Self> {
366        self.set_min_buf_size(super::boundary::MIN_BUF_SIZE);
367
368        debug!("ReadEntry::read_entry()");
369
370        if !try_read_entry!(self; self.consume_boundary()) {
371            return End(self);
372        }
373
374        let field_headers: FieldHeaders = try_read_entry!(self; self.read_headers());
375
376        if let Some(ct) = field_headers.content_type.as_ref() {
377            if ct.type_() == mime::MULTIPART {
378                // fields of this type are sent by (supposedly) no known clients
379                // (https://tools.ietf.org/html/rfc7578#appendix-A) so I'd be fascinated
380                // to hear about any in the wild
381                info!("Found nested multipart field: {:?}:\r\n\
382                       Please report this client's User-Agent and any other available details \
383                       at https://github.com/abonander/multipart/issues/56",
384                       field_headers);
385            }
386        }
387
388        Entry(
389            MultipartField {
390                headers: field_headers,
391                data: MultipartData {
392                    inner: Some(self),
393                },
394            }
395        )
396    }
397
398    /// Equivalent to `read_entry()` but takes `&mut self`
399    fn read_entry_mut(&mut self) -> ReadEntryResult<&mut Self> {
400        ReadEntry::read_entry(self)
401    }
402}
403
404impl<T> ReadEntry for T where T: PrivReadEntry {}
405
406/// Public trait but not re-exported.
407pub trait PrivReadEntry {
408    type Source: BufRead;
409
410    fn source_mut(&mut self) -> &mut Self::Source;
411
412    fn set_min_buf_size(&mut self, min_buf_size: usize);
413
414    /// Consume the next boundary.
415    /// Returns `true` if a field should follow, `false` otherwise.
416    fn consume_boundary(&mut self) -> io::Result<bool>;
417
418    fn read_headers(&mut self) -> Result<FieldHeaders, io::Error> {
419        FieldHeaders::read_from(self.source_mut())
420            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
421    }
422
423    fn read_to_string(&mut self) -> io::Result<String> {
424        let mut buf = String::new();
425
426        match self.source_mut().read_to_string(&mut buf) {
427            Ok(_) => Ok(buf),
428            Err(err) => Err(err),
429        }
430    }
431}
432
433impl<'a, M: ReadEntry> PrivReadEntry for &'a mut M {
434    type Source = M::Source;
435
436    fn source_mut(&mut self) -> &mut M::Source {
437        (**self).source_mut()
438    }
439
440    fn set_min_buf_size(&mut self, min_buf_size: usize) {
441        (**self).set_min_buf_size(min_buf_size)
442    }
443
444    fn consume_boundary(&mut self) -> io::Result<bool> {
445        (**self).consume_boundary()
446    }
447}
448
449/// Ternary result type returned by `ReadEntry::next_entry()`,
450/// `Multipart::into_entry()` and `MultipartField::next_entry()`.
451pub enum ReadEntryResult<M: ReadEntry, Entry = MultipartField<M>> {
452    /// The next entry was found.
453    Entry(Entry),
454    /// No  more entries could be read.
455    End(M),
456    /// An error occurred.
457    Error(M, io::Error),
458}
459
460impl<M: ReadEntry, Entry> ReadEntryResult<M, Entry> {
461    /// Convert `self` into `Result<Option<Entry>>` as follows:
462    ///
463    /// * `Entry(entry) -> Ok(Some(entry))`
464    /// * `End(_) -> Ok(None)`
465    /// * `Error(_, err) -> Err(err)`
466    pub fn into_result(self) -> io::Result<Option<Entry>> {
467        match self {
468            ReadEntryResult::Entry(entry) => Ok(Some(entry)),
469            ReadEntryResult::End(_) => Ok(None),
470            ReadEntryResult::Error(_, err) => Err(err),
471        }
472    }
473
474    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
475    pub fn unwrap(self) -> Entry {
476        self.expect_alt("`ReadEntryResult::unwrap()` called on `End` value",
477                        "`ReadEntryResult::unwrap()` called on `Error` value: {:?}")
478    }
479
480    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`
481    /// with the given message. Adds the error's message in the `Error` case.
482    pub fn expect(self, msg: &str) -> Entry {
483        self.expect_alt(msg, msg)
484    }
485
486    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
487    /// If this is `End`, panics with `end_msg`; if `Error`, panics with `err_msg`
488    /// as well as the error's message.
489    pub fn expect_alt(self, end_msg: &str, err_msg: &str) -> Entry {
490        match self {
491            Entry(entry) => entry,
492            End(_) => panic!("{}", end_msg),
493            Error(_, err) => panic!("{}: {:?}", err_msg, err),
494        }
495    }
496
497    /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case.
498    pub fn unwrap_opt(self) -> Option<Entry> {
499        self.expect_opt("`ReadEntryResult::unwrap_opt()` called on `Error` value")
500    }
501
502    /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case
503    /// with the given message as well as the error's message.
504    pub fn expect_opt(self, msg: &str) -> Option<Entry> {
505        match self {
506            Entry(entry) => Some(entry),
507            End(_) => None,
508            Error(_, err) => panic!("{}: {:?}", msg, err),
509        }
510    }
511}
512
513const GENERIC_PARSE_ERR: &str = "an error occurred while parsing field headers";
514
515quick_error! {
516    #[derive(Debug)]
517    enum ParseHeaderError {
518        /// The `Content-Disposition` header was not found
519        MissingContentDisposition(headers: String) {
520            display(x) -> ("{}:\n{}", x.description(), headers)
521            description("\"Content-Disposition\" header not found in field headers")
522        }
523        InvalidContDisp(reason: &'static str, cause: String) {
524            display(x) -> ("{}: {}: {}", x.description(), reason, cause)
525            description("invalid \"Content-Disposition\" header")
526        }
527        /// The header was found but could not be parsed
528        TokenizeError(err: HttparseError) {
529            description(GENERIC_PARSE_ERR)
530            display(x) -> ("{}: {}", x.description(), err)
531            cause(err)
532            from()
533        }
534        MimeError(cont_type: String) {
535            description("Failed to parse Content-Type")
536            display(this) -> ("{}: {}", this.description(), cont_type)
537        }
538        TooLarge {
539            description("field headers section ridiculously long or missing trailing CRLF-CRLF")
540        }
541        /// IO error
542        Io(err: io::Error) {
543            description("an io error occurred while parsing the headers")
544            display(x) -> ("{}: {}", x.description(), err)
545            cause(err)
546            from()
547        }
548    }
549}
550
551#[test]
552fn test_find_header() {
553    let headers = [
554        StrHeader { name: "Content-Type", val: "text/plain" },
555        StrHeader { name: "Content-disposition", val: "form-data" },
556        StrHeader { name: "content-transfer-encoding", val: "binary" }
557    ];
558
559    assert_eq!(find_header(&headers, "Content-Type").unwrap().val, "text/plain");
560    assert_eq!(find_header(&headers, "Content-Disposition").unwrap().val, "form-data");
561    assert_eq!(find_header(&headers, "Content-Transfer-Encoding").unwrap().val, "binary");
562}