multipart_async/server/field/
headers.rs

1use futures::{Poll, Stream};
2
3use mime::{self, Mime, Name};
4
5use std::{io, str};
6
7use server::{Multipart, BodyChunk, StreamError, httparse, twoway};
8use server::boundary::BoundaryFinder;
9
10use self::httparse::{EMPTY_HEADER, Status};
11
12use helpers::*;
13
14const MAX_BUF_LEN: usize = 1024;
15const MAX_HEADERS: usize = 4;
16
17/// The headers of a `Field`, including the name, filename, and `Content-Type`, if provided.
18///
19/// ### Note: Untrustworthy
20/// These values are provided directly by the client, and as such, should be considered
21/// *untrustworthy* and potentially **dangerous**. Avoid any unsanitized usage on the filesystem
22/// or in a shell or database, or performing unsafe operations with the assumption of a
23/// certain file type. Sanitizing/verifying these values is (currently) beyond the scope of this
24/// crate.
25#[derive(Clone, Default, Debug)]
26pub struct FieldHeaders {
27    /// The name of the field as provided by the client.
28    ///
29    /// ### Special Value: `_charset_`
30    /// If the client intended a different character set than UTF-8 for its text values, it may
31    /// provide the name of the charset as a text field (ASCII-encoded) with the name `_charset_`.
32    /// See [IETF RFC 7578, Section 4.6](https://tools.ietf.org/html/rfc7578#section-4.6) for more.
33    ///
34    /// Alternately, the charset can be provided for an individual field as a `charset` parameter
35    /// to its `Content-Type` header; see the `charset()` method for a convenient wrapper.
36    pub name: String,
37    /// The name of the file as it was on the client. If not provided, it may still have been a
38    /// file field.
39    pub filename: Option<String>,
40    /// The `Content-Type` of this field, as provided by the client. If `None`, then the field
41    /// is probably text, but this is not guaranteed.
42    pub content_type: Option<Mime>,
43}
44
45impl FieldHeaders {
46    /// `true` if `content_type` is `None` or `text/*` (such as `text/plain`).
47    ///
48    /// **Note**: this does not guarantee that the field data is compatible with
49    /// `FieldData::read_text()`; supporting more encodings than ASCII/UTF-8 is (currently)
50    /// beyond the scope of this crate.
51    pub fn is_text(&self) -> bool {
52        self.content_type.as_ref().map_or(true, |ct| ct.type_() == mime::TEXT)
53    }
54
55    /// The character set of this field, if provided.
56    pub fn charset(&self) -> Option<Name> {
57        self.content_type.as_ref().and_then(|ct| ct.get_param(mime::CHARSET))
58    }
59}
60
61#[derive(Debug, Default)]
62pub struct ReadHeaders {
63    accumulator: Vec<u8>
64}
65
66impl ReadHeaders {
67    pub fn read_headers<S: Stream>(&mut self, stream: &mut BoundaryFinder<S>) -> PollOpt<FieldHeaders, S::Error>
68    where S::Item: BodyChunk, S::Error: StreamError {
69        loop {
70            let chunk = match try_ready!(stream.poll()) {
71                Some(chunk) => chunk,
72                None => return if !self.accumulator.is_empty() {
73                    error("unexpected end of stream")
74                } else {
75                    ready(None)
76                },
77            };
78
79            // End of the headers section is signalled by a double-CRLF
80            if let Some(header_end) = twoway::find_bytes(chunk.as_slice(), b"\r\n\r\n") {
81                // Split after the double-CRLF because we don't want to yield it and httparse expects it
82                let (headers, rem) = chunk.split_at(header_end + 4);
83                stream.push_chunk(rem);
84
85                if !self.accumulator.is_empty() {
86                    self.accumulator.extend_from_slice(headers.as_slice());
87                    let headers = parse_headers(&self.accumulator)?;
88                    self.accumulator.clear();
89
90                    return ready(Some(headers));
91                } else {
92                    return ready(Some(parse_headers(headers.as_slice())?));
93                }
94            } else if let Some(split_idx) = header_end_split(&self.accumulator, chunk.as_slice()) {
95                let (head, tail) = chunk.split_at(split_idx);
96                self.accumulator.extend_from_slice(head.as_slice());
97                stream.push_chunk(tail);
98                continue;
99            }
100
101            if self.accumulator.len().saturating_add(chunk.len()) > MAX_BUF_LEN {
102                return error("headers section too long or trailing double-CRLF missing");
103            }
104
105            self.accumulator.extend_from_slice(chunk.as_slice());
106        }
107    }
108}
109
110const CRLF2: &[u8] = b"\r\n\r\n";
111
112/// Check if the double-CRLF falls between chunk boundaries, and if so, the split index of
113/// the second boundary
114fn header_end_split(first: &[u8], second: &[u8]) -> Option<usize> {
115    fn split_subcheck(start: usize, first: &[u8], second: &[u8]) -> bool {
116        first.len() >= start && first[first.len() - start ..].iter().chain(second).take(4).eq(CRLF2)
117    }
118
119    if split_subcheck(3, first, second) {
120        Some(1)
121    } else if split_subcheck(2, first, second) {
122        Some(2)
123    } else if split_subcheck(1, first, second) {
124        Some(3)
125    } else {
126        None
127    }
128}
129
130fn parse_headers<E: StreamError>(bytes: &[u8]) -> Result<FieldHeaders, E> {
131    debug_assert!(bytes.ends_with(b"\r\n\r\n"),
132                  "header byte sequence does not end with `\\r\\n\\r\\n`: {}",
133                  show_bytes(bytes));
134
135    let mut header_buf = [EMPTY_HEADER; MAX_HEADERS];
136
137    let headers = match httparse::parse_headers(bytes, &mut header_buf) {
138        Ok(Status::Complete((_, headers))) => headers,
139        Ok(Status::Partial) => ret_err!("field headers incomplete: {}", show_bytes(bytes)),
140        Err(e) => ret_err!("error parsing headers: {}; from buffer: {}", e, show_bytes(bytes)),
141    };
142
143    let mut out_headers = FieldHeaders::default();
144
145    for header in headers {
146        let str_val = str::from_utf8(header.value)
147            .or_else(|_| error("multipart field headers must be UTF-8 encoded"))?
148            .trim();
149
150        match header.name {
151            "Content-Disposition" => parse_cont_disp_val(str_val, &mut out_headers)?,
152            "Content-Type" => out_headers.content_type = Some(str_val.parse::<Mime>()
153                 .or_else(|_| ret_err!("could not parse MIME type from {:?}", str_val))?),
154            _ => (),
155        }
156    }
157
158    Ok(out_headers)
159}
160
161fn parse_cont_disp_val<E: StreamError>(val: &str, out: &mut FieldHeaders) -> Result<(), E> {
162    // Only take the first section, the rest can be in quoted strings that we want to handle
163    let mut sections = val.splitn(1, ';').map(str::trim);
164
165    match sections.next() {
166        Some("form-data") => (),
167        Some(other) => ret_err!("unexpected multipart field Content-Disposition: {}", other),
168        None => return error("each multipart field requires a Content-Disposition: form-data header"),
169    }
170
171    let mut rem = sections.next().unwrap_or("");
172
173    while let Some((key, val, rest)) = parse_keyval(rem) {
174        rem = rest;
175
176        match key {
177            "name" => out.name = val.to_string(),
178            "filename" => out.filename = Some(val.to_string()),
179            _ => debug!("unknown key-value pair in Content-Disposition: {:?} = {:?}", key, val),
180        }
181    }
182
183    if out.name.is_empty() {
184        ret_err!("expected 'name' attribute in Content-Disposition: {}", val);
185    }
186
187    Ok(())
188}
189
190fn parse_keyval(input: &str) -> Option<(&str, &str, &str)> {
191    let (name, rest) = try_opt!(param_name(input));
192    let (val, rest) = try_opt!(param_val(rest));
193
194    Some((name, val, rest))
195}
196
197fn param_name(input: &str) -> Option<(&str, &str)> {
198    let mut splits = input.trim_left_matches(&[' ', ';'][..]).splitn(1, '=');
199
200    let name = try_opt!(splits.next()).trim();
201    let rem = splits.next().unwrap_or("");
202
203    Some((name, rem))
204}
205
206fn param_val(input: &str) -> Option<(&str, &str)> {
207    let pat: &[char] = &['"'];
208    let mut splits = input.splitn(2, pat);
209
210    let token = try_opt!(splits.next()).trim();
211
212    // the value doesn't have to be in quotes if it doesn't contain forbidden chars like `;`
213    if !token.is_empty() {
214        let mut splits = token.splitn(1, ';');
215        let token = try_opt!(splits.next()).trim();
216        let rem = splits.next().unwrap_or("");
217
218        return Some((token, rem));
219    }
220
221    let qstr = try_opt!(splits.next()).trim();
222    let rem = splits.next().unwrap_or_else(|| { warn!("unterminated quote: {:?}", qstr); "" });
223
224    Some((qstr, rem))
225}
226
227#[test]
228fn test_header_end_split() {
229    assert_eq!(header_end_split(b"\r\n\r", b"\n"), Some(1));
230    assert_eq!(header_end_split(b"\r\n", b"\r\n"), Some(2));
231    assert_eq!(header_end_split(b"\r", b"\n\r\n"), Some(3));
232    assert_eq!(header_end_split(b"\r\n\r\n", b"FOOBAR"), None);
233    assert_eq!(header_end_split(b"FOOBAR", b"\r\n\r\n"), None);
234}