multipart_2021/server/
field.rs

1// Copyright 2016 `multipart` Crate Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! `multipart` field header parsing.
9use mime::Mime;
10
11use std::io::{self, BufRead, Read};
12use std::{fmt, str};
13
14use std::sync::Arc;
15
16use super::httparse::{self, Error as HttparseError, Header, Status, EMPTY_HEADER};
17
18use self::ReadEntryResult::*;
19
20use super::save::SaveBuilder;
21
22const EMPTY_STR_HEADER: StrHeader<'static> = StrHeader { name: "", val: "" };
23
24macro_rules! invalid_cont_disp {
25    ($reason: expr, $cause: expr) => {
26        return Err(ParseHeaderError::InvalidContDisp(
27            $reason,
28            $cause.to_string(),
29        ))
30    };
31}
32
33/// Not exposed
34#[derive(Copy, Clone, Debug)]
35pub struct StrHeader<'a> {
36    name: &'a str,
37    val: &'a str,
38}
39
40struct DisplayHeaders<'s, 'a: 's>(&'s [StrHeader<'a>]);
41
42impl<'s, 'a: 's> fmt::Display for DisplayHeaders<'s, 'a> {
43    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44        for hdr in self.0 {
45            writeln!(f, "{}: {}", hdr.name, hdr.val)?;
46        }
47
48        Ok(())
49    }
50}
51
52fn with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError>
53where
54    R: BufRead,
55    F: FnOnce(&[StrHeader]) -> Ret,
56{
57    const HEADER_LEN: usize = 4;
58
59    let consume;
60    let ret;
61
62    let mut last_len = 0;
63
64    loop {
65        // this should return a larger buffer each time
66        let buf = r.fill_buf()?;
67
68        // buffer has stopped growing
69        if buf.len() == last_len {
70            return Err(ParseHeaderError::TooLarge);
71        }
72
73        let mut raw_headers = [EMPTY_HEADER; HEADER_LEN];
74
75        match httparse::parse_headers(buf, &mut raw_headers)? {
76            // read more and try again
77            Status::Partial => last_len = buf.len(),
78            Status::Complete((consume_, raw_headers)) => {
79                let mut headers = [EMPTY_STR_HEADER; HEADER_LEN];
80                let headers = copy_headers(raw_headers, &mut headers)?;
81                debug!("Parsed headers: {:?}", headers);
82                consume = consume_;
83                ret = closure(headers);
84                break;
85            }
86        }
87    }
88
89    r.consume(consume);
90    Ok(ret)
91}
92
93fn copy_headers<'h, 'b: 'h>(
94    raw: &[Header<'b>],
95    headers: &'h mut [StrHeader<'b>],
96) -> io::Result<&'h [StrHeader<'b>]> {
97    for (raw, header) in raw.iter().zip(&mut *headers) {
98        header.name = raw.name;
99        header.val = io_str_utf8(raw.value)?;
100    }
101
102    Ok(&headers[..raw.len()])
103}
104
105/// The headers that (may) appear before a `multipart/form-data` field.
106///
107/// ### Warning: Values are Client-Provided
108/// Everything in this struct are values from the client and should be considered **untrustworthy**.
109/// This crate makes no effort to validate or sanitize any client inputs.
110#[derive(Clone, Debug)]
111pub struct FieldHeaders {
112    /// The field's name from the form.
113    pub name: Arc<str>,
114
115    /// The filename of this entry, if supplied. This is not guaranteed to match the original file
116    /// or even to be a valid filename for the current platform.
117    pub filename: Option<String>,
118
119    /// The MIME type (`Content-Type` value) of this file, if supplied by the client.
120    ///
121    /// If this is not supplied, the content-type of the field should default to `text/plain` as
122    /// per [IETF RFC 7578, section 4.4](https://tools.ietf.org/html/rfc7578#section-4.4), but this
123    /// should not be implicitly trusted. This crate makes no attempt to identify or validate
124    /// the content-type of the actual field data.
125    pub content_type: Option<Mime>,
126}
127
128impl FieldHeaders {
129    /// Parse the field headers from the passed `BufRead`, consuming the relevant bytes.
130    fn read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError> {
131        with_headers(r, Self::parse)?
132    }
133
134    fn parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError> {
135        let cont_disp = ContentDisp::parse_required(headers)?;
136
137        Ok(FieldHeaders {
138            name: cont_disp.field_name.into(),
139            filename: cont_disp.filename,
140            content_type: parse_content_type(headers)?,
141        })
142    }
143}
144
145/// The `Content-Disposition` header.
146struct ContentDisp {
147    /// The name of the `multipart/form-data` field.
148    field_name: String,
149    /// The optional filename for this field.
150    filename: Option<String>,
151}
152
153impl ContentDisp {
154    fn parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError> {
155        let header = if let Some(header) = find_header(headers, "Content-Disposition") {
156            header
157        } else {
158            return Err(ParseHeaderError::MissingContentDisposition(
159                DisplayHeaders(headers).to_string(),
160            ));
161        };
162
163        // Content-Disposition: ?
164        let after_disp_type = match split_once(header.val, ';') {
165            Some((disp_type, after_disp_type)) => {
166                // assert Content-Disposition: form-data
167                // but needs to be parsed out to trim the spaces (allowed by spec IIRC)
168                if disp_type.trim() != "form-data" {
169                    invalid_cont_disp!("unexpected Content-Disposition value", disp_type);
170                }
171                after_disp_type
172            }
173            None => invalid_cont_disp!(
174                "expected additional data after Content-Disposition type",
175                header.val
176            ),
177        };
178
179        // Content-Disposition: form-data; name=?
180        let (field_name, filename) = match get_str_after("name=", ';', after_disp_type) {
181            None => invalid_cont_disp!(
182                "expected field name and maybe filename, got",
183                after_disp_type
184            ),
185            // Content-Disposition: form-data; name={field_name}; filename=?
186            Some((field_name, after_field_name)) => {
187                let field_name = trim_quotes(field_name);
188                let filename = get_str_after("filename=", ';', after_field_name)
189                    .map(|(filename, _)| trim_quotes(filename).to_owned());
190                (field_name, filename)
191            }
192        };
193
194        Ok(ContentDisp {
195            field_name: field_name.to_owned(),
196            filename,
197        })
198    }
199}
200
201fn parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError> {
202    if let Some(header) = find_header(headers, "Content-Type") {
203        // Boundary parameter will be parsed into the `Mime`
204        debug!("Found Content-Type: {:?}", header.val);
205        Ok(Some(header.val.parse::<Mime>().map_err(|_| {
206            ParseHeaderError::MimeError(header.val.into())
207        })?))
208    } else {
209        Ok(None)
210    }
211}
212
213/// A field in a multipart request with its associated headers and data.
214#[derive(Debug)]
215pub struct MultipartField<M: ReadEntry> {
216    /// The headers for this field, including the name, filename, and content-type, if provided.
217    ///
218    /// ### Warning: Values are Client-Provided
219    /// Everything in this struct are values from the client and should be considered **untrustworthy**.
220    /// This crate makes no effort to validate or sanitize any client inputs.
221    pub headers: FieldHeaders,
222
223    /// The field's data.
224    pub data: MultipartData<M>,
225}
226
227impl<M: ReadEntry> MultipartField<M> {
228    /// Returns `true` if this field has no content-type or the content-type is `text/...`.
229    ///
230    /// This typically means it can be read to a string, but it could still be using an unsupported
231    /// character encoding, so decoding to `String` needs to ensure that the data is valid UTF-8.
232    ///
233    /// Note also that the field contents may be too large to reasonably fit in memory.
234    /// The `.save()` adapter can be used to enforce a size limit.
235    ///
236    /// Detecting character encodings by any means is (currently) beyond the scope of this crate.
237    pub fn is_text(&self) -> bool {
238        self.headers
239            .content_type
240            .as_ref()
241            .map_or(true, |ct| ct.type_() == mime::TEXT)
242    }
243
244    /// Read the next entry in the request.
245    pub fn next_entry(self) -> ReadEntryResult<M> {
246        self.data.into_inner().read_entry()
247    }
248
249    /// Update `self` as the next entry.
250    ///
251    /// Returns `Ok(Some(self))` if another entry was read, `Ok(None)` if the end of the body was
252    /// reached, and `Err(e)` for any errors that occur.
253    pub fn next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>>
254    where
255        for<'a> &'a mut M: ReadEntry,
256    {
257        let multipart = self.data.take_inner();
258
259        match multipart.read_entry() {
260            Entry(entry) => {
261                *self = entry;
262                Ok(Some(self))
263            }
264            End(multipart) => {
265                self.data.give_inner(multipart);
266                Ok(None)
267            }
268            Error(multipart, err) => {
269                self.data.give_inner(multipart);
270                Err(err)
271            }
272        }
273    }
274}
275
276/// The data of a field in a `multipart/form-data` request.
277///
278/// You can read it to EOF, or use the `save()` adaptor to save it to disk/memory.
279#[derive(Debug)]
280pub struct MultipartData<M> {
281    inner: Option<M>,
282}
283
284const DATA_INNER_ERR: &str = "MultipartFile::inner taken and not replaced; this is likely \
285                              caused by a logic error in `multipart` or by resuming after \
286                              a previously caught panic.\nPlease open an issue with the \
287                              relevant backtrace and debug logs at \
288                              https://github.com/abonander/multipart";
289
290impl<M> MultipartData<M>
291where
292    M: ReadEntry,
293{
294    /// Get a builder type which can save the field with or without a size limit.
295    pub fn save(&mut self) -> SaveBuilder<&mut Self> {
296        SaveBuilder::new(self)
297    }
298
299    /// Take the inner `Multipart` or `&mut Multipart`
300    pub fn into_inner(self) -> M {
301        self.inner.expect(DATA_INNER_ERR)
302    }
303
304    /// Set the minimum buffer size that `BufRead::fill_buf(self)` will return
305    /// until the end of the stream is reached. Set this as small as you can tolerate
306    /// to minimize `read()` calls (`read()` won't be called again until the buffer
307    /// is smaller than this).
308    ///
309    /// This value is reset between fields.
310    pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
311        self.inner_mut().set_min_buf_size(min_buf_size)
312    }
313
314    fn inner_mut(&mut self) -> &mut M {
315        self.inner.as_mut().expect(DATA_INNER_ERR)
316    }
317
318    fn take_inner(&mut self) -> M {
319        self.inner.take().expect(DATA_INNER_ERR)
320    }
321
322    fn give_inner(&mut self, inner: M) {
323        self.inner = Some(inner);
324    }
325}
326
327impl<M: ReadEntry> Read for MultipartData<M> {
328    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
329        self.inner_mut().source_mut().read(buf)
330    }
331}
332
333/// In this implementation, `fill_buf()` can return more data with each call.
334///
335/// Use `set_min_buf_size()` if you require a minimum buffer length.
336impl<M: ReadEntry> BufRead for MultipartData<M> {
337    fn fill_buf(&mut self) -> io::Result<&[u8]> {
338        self.inner_mut().source_mut().fill_buf()
339    }
340
341    fn consume(&mut self, amt: usize) {
342        self.inner_mut().source_mut().consume(amt)
343    }
344}
345
346fn split_once(s: &str, delim: char) -> Option<(&str, &str)> {
347    s.find(delim).map(|idx| s.split_at(idx))
348}
349
350fn trim_quotes(s: &str) -> &str {
351    s.trim_matches('"')
352}
353
354/// Get the string after `needle` in `haystack`, stopping before `end_val_delim`
355fn get_str_after<'a>(
356    needle: &str,
357    end_val_delim: char,
358    haystack: &'a str,
359) -> Option<(&'a str, &'a str)> {
360    let val_start_idx = try_opt!(haystack.find(needle)) + needle.len();
361    let val_end_idx = haystack[val_start_idx..]
362        .find(end_val_delim)
363        .map_or(haystack.len(), |end_idx| end_idx + val_start_idx);
364    Some((
365        &haystack[val_start_idx..val_end_idx],
366        &haystack[val_end_idx..],
367    ))
368}
369
370fn io_str_utf8(buf: &[u8]) -> io::Result<&str> {
371    str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
372}
373
374fn find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>> {
375    // Field names are case insensitive and consist of ASCII characters
376    // only (see https://tools.ietf.org/html/rfc822#section-3.2).
377    headers
378        .iter()
379        .find(|header| header.name.eq_ignore_ascii_case(name))
380}
381
382/// Common trait for `Multipart` and `&mut Multipart`
383pub trait ReadEntry: PrivReadEntry + Sized {
384    /// Attempt to read the next entry in the multipart stream.
385    fn read_entry(mut self) -> ReadEntryResult<Self> {
386        self.set_min_buf_size(super::boundary::MIN_BUF_SIZE);
387
388        debug!("ReadEntry::read_entry()");
389
390        if !try_read_entry!(self; self.consume_boundary()) {
391            return End(self);
392        }
393
394        let field_headers: FieldHeaders = try_read_entry!(self; self.read_headers());
395
396        if let Some(ct) = field_headers.content_type.as_ref() {
397            if ct.type_() == mime::MULTIPART {
398                // fields of this type are sent by (supposedly) no known clients
399                // (https://tools.ietf.org/html/rfc7578#appendix-A) so I'd be fascinated
400                // to hear about any in the wild
401                info!(
402                    "Found nested multipart field: {:?}:\r\n\
403                     Please report this client's User-Agent and any other available details \
404                     at https://github.com/abonander/multipart/issues/56",
405                    field_headers
406                );
407            }
408        }
409
410        Entry(MultipartField {
411            headers: field_headers,
412            data: MultipartData { inner: Some(self) },
413        })
414    }
415
416    /// Equivalent to `read_entry()` but takes `&mut self`
417    fn read_entry_mut(&mut self) -> ReadEntryResult<&mut Self> {
418        ReadEntry::read_entry(self)
419    }
420}
421
422impl<T> ReadEntry for T where T: PrivReadEntry {}
423
424/// Public trait but not re-exported.
425pub trait PrivReadEntry {
426    type Source: BufRead;
427
428    fn source_mut(&mut self) -> &mut Self::Source;
429
430    fn set_min_buf_size(&mut self, min_buf_size: usize);
431
432    /// Consume the next boundary.
433    /// Returns `true` if a field should follow, `false` otherwise.
434    fn consume_boundary(&mut self) -> io::Result<bool>;
435
436    fn read_headers(&mut self) -> Result<FieldHeaders, io::Error> {
437        FieldHeaders::read_from(self.source_mut())
438            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
439    }
440
441    fn read_to_string(&mut self) -> io::Result<String> {
442        let mut buf = String::new();
443
444        match self.source_mut().read_to_string(&mut buf) {
445            Ok(_) => Ok(buf),
446            Err(err) => Err(err),
447        }
448    }
449}
450
451impl<'a, M: ReadEntry> PrivReadEntry for &'a mut M {
452    type Source = M::Source;
453
454    fn source_mut(&mut self) -> &mut M::Source {
455        (**self).source_mut()
456    }
457
458    fn set_min_buf_size(&mut self, min_buf_size: usize) {
459        (**self).set_min_buf_size(min_buf_size)
460    }
461
462    fn consume_boundary(&mut self) -> io::Result<bool> {
463        (**self).consume_boundary()
464    }
465}
466
467/// Ternary result type returned by `ReadEntry::next_entry()`,
468/// `Multipart::into_entry()` and `MultipartField::next_entry()`.
469pub enum ReadEntryResult<M: ReadEntry, Entry = MultipartField<M>> {
470    /// The next entry was found.
471    Entry(Entry),
472    /// No  more entries could be read.
473    End(M),
474    /// An error occurred.
475    Error(M, io::Error),
476}
477
478impl<M: ReadEntry, Entry> ReadEntryResult<M, Entry> {
479    /// Convert `self` into `Result<Option<Entry>>` as follows:
480    ///
481    /// * `Entry(entry) -> Ok(Some(entry))`
482    /// * `End(_) -> Ok(None)`
483    /// * `Error(_, err) -> Err(err)`
484    pub fn into_result(self) -> io::Result<Option<Entry>> {
485        match self {
486            ReadEntryResult::Entry(entry) => Ok(Some(entry)),
487            ReadEntryResult::End(_) => Ok(None),
488            ReadEntryResult::Error(_, err) => Err(err),
489        }
490    }
491
492    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
493    pub fn unwrap(self) -> Entry {
494        self.expect_alt(
495            "`ReadEntryResult::unwrap()` called on `End` value",
496            "`ReadEntryResult::unwrap()` called on `Error` value: {:?}",
497        )
498    }
499
500    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`
501    /// with the given message. Adds the error's message in the `Error` case.
502    pub fn expect(self, msg: &str) -> Entry {
503        self.expect_alt(msg, msg)
504    }
505
506    /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
507    /// If this is `End`, panics with `end_msg`; if `Error`, panics with `err_msg`
508    /// as well as the error's message.
509    pub fn expect_alt(self, end_msg: &str, err_msg: &str) -> Entry {
510        match self {
511            Entry(entry) => entry,
512            End(_) => panic!("{}", end_msg),
513            Error(_, err) => panic!("{}: {:?}", err_msg, err),
514        }
515    }
516
517    /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case.
518    pub fn unwrap_opt(self) -> Option<Entry> {
519        self.expect_opt("`ReadEntryResult::unwrap_opt()` called on `Error` value")
520    }
521
522    /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case
523    /// with the given message as well as the error's message.
524    pub fn expect_opt(self, msg: &str) -> Option<Entry> {
525        match self {
526            Entry(entry) => Some(entry),
527            End(_) => None,
528            Error(_, err) => panic!("{}: {:?}", msg, err),
529        }
530    }
531}
532
533quick_error! {
534    #[derive(Debug)]
535    enum ParseHeaderError {
536        /// The `Content-Disposition` header was not found
537        MissingContentDisposition(headers: String) {
538            display(x) -> ("{}:\n{}", x, headers)
539            description("\"Content-Disposition\" header not found in field headers")
540        }
541        InvalidContDisp(reason: &'static str, cause: String) {
542            display(x) -> ("{}: {}: {}", x, reason, cause)
543            description("invalid \"Content-Disposition\" header")
544        }
545        /// The header was found but could not be parsed
546        TokenizeError(err: HttparseError) {
547            description("an error occurred while parsing field headers")
548            display(x) -> ("{}: {}", x, err)
549            cause(err)
550            from()
551        }
552        MimeError(cont_type: String) {
553            description("Failed to parse Content-Type")
554            display(this) -> ("{}: {}", this, cont_type)
555        }
556        TooLarge {
557            description("field headers section ridiculously long or missing trailing CRLF-CRLF")
558        }
559        /// IO error
560        Io(err: io::Error) {
561            description("an io error occurred while parsing the headers")
562            display(x) -> ("{}: {}", x, err)
563            cause(err)
564            from()
565        }
566    }
567}
568
569#[test]
570fn test_find_header() {
571    let headers = [
572        StrHeader {
573            name: "Content-Type",
574            val: "text/plain",
575        },
576        StrHeader {
577            name: "Content-disposition",
578            val: "form-data",
579        },
580        StrHeader {
581            name: "content-transfer-encoding",
582            val: "binary",
583        },
584    ];
585
586    assert_eq!(
587        find_header(&headers, "Content-Type").unwrap().val,
588        "text/plain"
589    );
590    assert_eq!(
591        find_header(&headers, "Content-Disposition").unwrap().val,
592        "form-data"
593    );
594    assert_eq!(
595        find_header(&headers, "Content-Transfer-Encoding")
596            .unwrap()
597            .val,
598        "binary"
599    );
600}