mime_multipart/
lib.rs

1// Copyright 2016-2020 mime-multipart Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8pub mod error;
9
10#[cfg(test)]
11mod mock;
12#[cfg(test)]
13mod tests;
14
15pub use error::Error;
16
17use std::fs::File;
18use std::io::{BufRead, BufReader, Read, Write};
19use std::path::{Path, PathBuf};
20use std::borrow::Cow;
21use std::ops::Drop;
22use encoding::{all, Encoding, DecoderTrap};
23use hyper::header::{ContentType, Headers, ContentDisposition, DispositionParam,
24                    DispositionType, Charset};
25use textnonce::TextNonce;
26use mime::{Attr, Mime, TopLevel, Value};
27use buf_read_ext::BufReadExt;
28
29/// A multipart part which is not a file (stored in memory)
30#[derive(Clone, Debug, PartialEq)]
31pub struct Part {
32    pub headers: Headers,
33    pub body: Vec<u8>,
34}
35impl Part {
36    /// Mime content-type specified in the header
37    pub fn content_type(&self) -> Option<Mime> {
38        let ct: Option<&ContentType> = self.headers.get();
39        ct.map(|ref ct| ct.0.clone())
40    }
41}
42
43/// A file that is to be inserted into a `multipart/*` or alternatively an uploaded file that
44/// was received as part of `multipart/*` parsing.
45#[derive(Clone, Debug, PartialEq)]
46pub struct FilePart {
47    /// The headers of the part
48    pub headers: Headers,
49    /// A temporary file containing the file content
50    pub path: PathBuf,
51    /// Optionally, the size of the file.  This is filled when multiparts are parsed, but is
52    /// not necessary when they are generated.
53    pub size: Option<usize>,
54    // The temporary directory the upload was put into, saved for the Drop trait
55    tempdir: Option<PathBuf>,
56}
57impl FilePart {
58    pub fn new(headers: Headers, path: &Path) -> FilePart
59    {
60        FilePart {
61            headers: headers,
62            path: path.to_owned(),
63            size: None,
64            tempdir: None,
65        }
66    }
67
68    /// If you do not want the file on disk to be deleted when Self drops, call this
69    /// function.  It will become your responsability to clean up.
70    pub fn do_not_delete_on_drop(&mut self) {
71        self.tempdir = None;
72    }
73
74    /// Create a new temporary FilePart (when created this way, the file will be
75    /// deleted once the FilePart object goes out of scope).
76    pub fn create(headers: Headers) -> Result<FilePart, Error> {
77        // Setup a file to capture the contents.
78        let mut path = tempfile::Builder::new().prefix("mime_multipart").tempdir()?.into_path();
79        let tempdir = Some(path.clone());
80        path.push(TextNonce::sized_urlsafe(32).unwrap().into_string());
81        Ok(FilePart {
82            headers: headers,
83            path: path,
84            size: None,
85            tempdir: tempdir,
86        })
87    }
88
89    /// Filename that was specified when the file was uploaded.  Returns `Ok<None>` if there
90    /// was no content-disposition header supplied.
91    pub fn filename(&self) -> Result<Option<String>, Error> {
92        let cd: Option<&ContentDisposition> = self.headers.get();
93        match cd {
94            Some(cd) => get_content_disposition_filename(cd),
95            None => Ok(None),
96        }
97    }
98
99    /// Mime content-type specified in the header
100    pub fn content_type(&self) -> Option<Mime> {
101        let ct: Option<&ContentType> = self.headers.get();
102        ct.map(|ref ct| ct.0.clone())
103    }
104}
105impl Drop for FilePart {
106    fn drop(&mut self) {
107        if self.tempdir.is_some() {
108            let _ = ::std::fs::remove_file(&self.path);
109            let _ = ::std::fs::remove_dir(&self.tempdir.as_ref().unwrap());
110        }
111    }
112}
113
114/// A multipart part which could be either a file, in memory, or another multipart
115/// container containing nested parts.
116#[derive(Clone, Debug)]
117pub enum Node {
118    /// A part in memory
119    Part(Part),
120    /// A part streamed to a file
121    File(FilePart),
122    /// A container of nested multipart parts
123    Multipart((Headers, Vec<Node>)),
124}
125
126/// Parse a MIME `multipart/*` from a `Read`able stream into a `Vec` of `Node`s, streaming
127/// files to disk and keeping the rest in memory.  Recursive `multipart/*` parts will are
128/// parsed as well and returned within a `Node::Multipart` variant.
129///
130/// If `always_use_files` is true, all parts will be streamed to files.  If false, only parts
131/// with a `ContentDisposition` header set to `Attachment` or otherwise containing a `Filename`
132/// parameter will be streamed to files.
133///
134/// It is presumed that the headers are still in the stream.  If you have them separately,
135/// use `read_multipart_body()` instead.
136pub fn read_multipart<S: Read>(
137    stream: &mut S,
138    always_use_files: bool)
139    -> Result<Vec<Node>, Error>
140{
141    let mut reader = BufReader::with_capacity(4096, stream);
142    let mut nodes: Vec<Node> = Vec::new();
143
144    let mut buf: Vec<u8> = Vec::new();
145
146    let (_, found) = reader.stream_until_token(b"\r\n\r\n", &mut buf)?;
147    if ! found { return Err(Error::EofInMainHeaders); }
148
149    // Keep the CRLFCRLF as httparse will expect it
150    buf.extend(b"\r\n\r\n".iter().cloned());
151
152    // Parse the headers
153    let mut header_memory = [httparse::EMPTY_HEADER; 64];
154    let headers = match httparse::parse_headers(&buf, &mut header_memory) {
155        Ok(httparse::Status::Complete((_, raw_headers))) => {
156            Headers::from_raw(raw_headers).map_err(|e| From::from(e))
157        },
158        Ok(httparse::Status::Partial) => Err(Error::PartialHeaders),
159        Err(err) => Err(From::from(err)),
160    }?;
161
162    inner(&mut reader, &headers, &mut nodes, always_use_files)?;
163    Ok(nodes)
164}
165
166/// Parse a MIME `multipart/*` from a `Read`able stream into a `Vec` of `Node`s, streaming
167/// files to disk and keeping the rest in memory.  Recursive `multipart/*` parts will are
168/// parsed as well and returned within a `Node::Multipart` variant.
169///
170/// If `always_use_files` is true, all parts will be streamed to files.  If false, only parts
171/// with a `ContentDisposition` header set to `Attachment` or otherwise containing a `Filename`
172/// parameter will be streamed to files.
173///
174/// It is presumed that you have the `Headers` already and the stream starts at the body.
175/// If the headers are still in the stream, use `read_multipart()` instead.
176pub fn read_multipart_body<S: Read>(
177    stream: &mut S,
178    headers: &Headers,
179    always_use_files: bool)
180    -> Result<Vec<Node>, Error>
181{
182    let mut reader = BufReader::with_capacity(4096, stream);
183    let mut nodes: Vec<Node> = Vec::new();
184    inner(&mut reader, headers, &mut nodes, always_use_files)?;
185    Ok(nodes)
186}
187
188fn inner<R: BufRead>(
189    reader: &mut R,
190    headers: &Headers,
191    nodes: &mut Vec<Node>,
192    always_use_files: bool)
193    -> Result<(), Error>
194{
195    let mut buf: Vec<u8> = Vec::new();
196
197    let boundary = get_multipart_boundary(headers)?;
198
199    // Read past the initial boundary
200    let (_, found) = reader.stream_until_token(&boundary, &mut buf)?;
201    if ! found { return Err(Error::EofBeforeFirstBoundary); }
202
203    // Define the boundary, including the line terminator preceding it.
204    // Use their first line terminator to determine whether to use CRLF or LF.
205    let (lt, ltlt, lt_boundary) = {
206        let peeker = reader.fill_buf()?;
207        if peeker.len() > 1 && &peeker[..2]==b"\r\n" {
208            let mut output = Vec::with_capacity(2 + boundary.len());
209            output.push(b'\r');
210            output.push(b'\n');
211            output.extend(boundary.clone());
212            (vec![b'\r', b'\n'], vec![b'\r', b'\n', b'\r', b'\n'], output)
213        }
214        else if peeker.len() > 0 && peeker[0]==b'\n' {
215            let mut output = Vec::with_capacity(1 + boundary.len());
216            output.push(b'\n');
217            output.extend(boundary.clone());
218            (vec![b'\n'], vec![b'\n', b'\n'], output)
219        }
220        else {
221            return Err(Error::NoCrLfAfterBoundary);
222        }
223    };
224
225    loop {
226        // If the next two lookahead characters are '--', parsing is finished.
227        {
228            let peeker = reader.fill_buf()?;
229            if peeker.len() >= 2 && &peeker[..2] == b"--" {
230                return Ok(());
231            }
232        }
233
234        // Read the line terminator after the boundary
235        let (_, found) = reader.stream_until_token(&lt, &mut buf)?;
236        if ! found { return Err(Error::NoCrLfAfterBoundary); }
237
238        // Read the headers (which end in 2 line terminators)
239        buf.truncate(0); // start fresh
240        let (_, found) = reader.stream_until_token(&ltlt, &mut buf)?;
241        if ! found { return Err(Error::EofInPartHeaders); }
242
243        // Keep the 2 line terminators as httparse will expect it
244        buf.extend(ltlt.iter().cloned());
245
246        // Parse the headers
247        let part_headers = {
248            let mut header_memory = [httparse::EMPTY_HEADER; 4];
249            match httparse::parse_headers(&buf, &mut header_memory) {
250                Ok(httparse::Status::Complete((_, raw_headers))) => {
251                    Headers::from_raw(raw_headers).map_err(|e| From::from(e))
252                },
253                Ok(httparse::Status::Partial) => Err(Error::PartialHeaders),
254                Err(err) => Err(From::from(err)),
255            }?
256        };
257
258        // Check for a nested multipart
259        let nested = {
260            let ct: Option<&ContentType> = part_headers.get();
261            if let Some(ct) = ct {
262                let &ContentType(Mime(ref top_level, _, _)) = ct;
263                *top_level == TopLevel::Multipart
264            } else {
265                false
266            }
267        };
268        if nested {
269            // Recurse:
270            let mut inner_nodes: Vec<Node> = Vec::new();
271            inner(reader, &part_headers, &mut inner_nodes, always_use_files)?;
272            nodes.push(Node::Multipart((part_headers, inner_nodes)));
273            continue;
274        }
275
276        let is_file = always_use_files || {
277            let cd: Option<&ContentDisposition> = part_headers.get();
278            if cd.is_some() {
279                if cd.unwrap().disposition == DispositionType::Attachment {
280                    true
281                } else {
282                    cd.unwrap().parameters.iter().any(|x| match x {
283                        &DispositionParam::Filename(_,_,_) => true,
284                        _ => false
285                    })
286                }
287            } else {
288                false
289            }
290        };
291        if is_file {
292            // Setup a file to capture the contents.
293            let mut filepart = FilePart::create(part_headers)?;
294            let mut file = File::create(filepart.path.clone())?;
295
296            // Stream out the file.
297            let (read, found) = reader.stream_until_token(&lt_boundary, &mut file)?;
298            if ! found { return Err(Error::EofInFile); }
299            filepart.size = Some(read);
300
301            // TODO: Handle Content-Transfer-Encoding.  RFC 7578 section 4.7 deprecated
302            // this, and the authors state "Currently, no deployed implementations that
303            // send such bodies have been discovered", so this is very low priority.
304
305            nodes.push(Node::File(filepart));
306        } else {
307            buf.truncate(0); // start fresh
308            let (_, found) = reader.stream_until_token(&lt_boundary, &mut buf)?;
309            if ! found { return Err(Error::EofInPart); }
310
311            nodes.push(Node::Part(Part {
312                headers: part_headers,
313                body: buf.clone(),
314            }));
315        }
316    }
317}
318
319/// Get the `multipart/*` boundary string from `hyper::Headers`
320pub fn get_multipart_boundary(headers: &Headers) -> Result<Vec<u8>, Error> {
321    // Verify that the request is 'Content-Type: multipart/*'.
322    let ct: &ContentType = match headers.get() {
323        Some(ct) => ct,
324        None => return Err(Error::NoRequestContentType),
325    };
326    let ContentType(ref mime) = *ct;
327    let Mime(ref top_level, _, ref params) = *mime;
328
329    if *top_level != TopLevel::Multipart {
330        return Err(Error::NotMultipart);
331    }
332
333    for &(ref attr, ref val) in params.iter() {
334        if let (&Attr::Boundary, &Value::Ext(ref val)) = (attr, val) {
335            let mut boundary = Vec::with_capacity(2 + val.len());
336            boundary.extend(b"--".iter().cloned());
337            boundary.extend(val.as_bytes());
338            return Ok(boundary);
339        }
340    }
341    Err(Error::BoundaryNotSpecified)
342}
343
344#[inline]
345fn get_content_disposition_filename(cd: &ContentDisposition) -> Result<Option<String>, Error> {
346    if let Some(&DispositionParam::Filename(ref charset, _, ref bytes)) =
347        cd.parameters.iter().find(|&x| match *x {
348            DispositionParam::Filename(_,_,_) => true,
349            _ => false,
350        })
351    {
352        match charset_decode(charset, bytes) {
353            Ok(filename) => Ok(Some(filename)),
354            Err(e) => Err(Error::Decoding(e)),
355        }
356    } else {
357        Ok(None)
358    }
359}
360
361// This decodes bytes encoded according to a hyper::header::Charset encoding, using the
362// rust-encoding crate.  Only supports encodings defined in both crates.
363fn charset_decode(charset: &Charset, bytes: &[u8]) -> Result<String, Cow<'static, str>> {
364    Ok(match *charset {
365        Charset::Us_Ascii => all::ASCII.decode(bytes, DecoderTrap::Strict)?,
366        Charset::Iso_8859_1 => all::ISO_8859_1.decode(bytes, DecoderTrap::Strict)?,
367        Charset::Iso_8859_2 => all::ISO_8859_2.decode(bytes, DecoderTrap::Strict)?,
368        Charset::Iso_8859_3 => all::ISO_8859_3.decode(bytes, DecoderTrap::Strict)?,
369        Charset::Iso_8859_4 => all::ISO_8859_4.decode(bytes, DecoderTrap::Strict)?,
370        Charset::Iso_8859_5 => all::ISO_8859_5.decode(bytes, DecoderTrap::Strict)?,
371        Charset::Iso_8859_6 => all::ISO_8859_6.decode(bytes, DecoderTrap::Strict)?,
372        Charset::Iso_8859_7 => all::ISO_8859_7.decode(bytes, DecoderTrap::Strict)?,
373        Charset::Iso_8859_8 => all::ISO_8859_8.decode(bytes, DecoderTrap::Strict)?,
374        Charset::Iso_8859_9 => return Err("ISO_8859_9 is not supported".into()),
375        Charset::Iso_8859_10 => all::ISO_8859_10.decode(bytes, DecoderTrap::Strict)?,
376        Charset::Shift_Jis => return Err("Shift_Jis is not supported".into()),
377        Charset::Euc_Jp => all::EUC_JP.decode(bytes, DecoderTrap::Strict)?,
378        Charset::Iso_2022_Kr => return Err("Iso_2022_Kr is not supported".into()),
379        Charset::Euc_Kr => return Err("Euc_Kr is not supported".into()),
380        Charset::Iso_2022_Jp => all::ISO_2022_JP.decode(bytes, DecoderTrap::Strict)?,
381        Charset::Iso_2022_Jp_2 => return Err("Iso_2022_Jp_2 is not supported".into()),
382        Charset::Iso_8859_6_E => return Err("Iso_8859_6_E is not supported".into()),
383        Charset::Iso_8859_6_I => return Err("Iso_8859_6_I is not supported".into()),
384        Charset::Iso_8859_8_E => return Err("Iso_8859_8_E is not supported".into()),
385        Charset::Iso_8859_8_I => return Err("Iso_8859_8_I is not supported".into()),
386        Charset::Gb2312 => return Err("Gb2312 is not supported".into()),
387        Charset::Big5 => all::BIG5_2003.decode(bytes, DecoderTrap::Strict)?,
388        Charset::Koi8_R => all::KOI8_R.decode(bytes, DecoderTrap::Strict)?,
389        Charset::Ext(ref s) => match &**s {
390            "UTF-8" => all::UTF_8.decode(bytes, DecoderTrap::Strict)?,
391            _ => return Err("Encoding is not supported".into()),
392        },
393    })
394}
395
396/// Generate a valid multipart boundary, statistically unlikely to be found within
397/// the content of the parts.
398pub fn generate_boundary() -> Vec<u8> {
399    TextNonce::sized(68).unwrap().into_string().into_bytes().iter().map(|&ch| {
400        if ch==b'=' { return b'-'; }
401        else if ch==b'/' { return b'.'; }
402        else { return ch; }
403    }).collect()
404}
405
406// Convenience method, like write_all(), but returns the count of bytes written.
407trait WriteAllCount {
408    fn write_all_count(&mut self, buf: &[u8]) -> ::std::io::Result<usize>;
409}
410impl<T: Write> WriteAllCount for T {
411    fn write_all_count(&mut self, buf: &[u8]) -> ::std::io::Result<usize>
412    {
413        self.write_all(buf)?;
414        Ok(buf.len())
415    }
416}
417
418/// Stream a multipart body to the output `stream` given, made up of the `parts`
419/// given.  Top-level headers are NOT included in this stream; the caller must send
420/// those prior to calling write_multipart().
421/// Returns the number of bytes written, or an error.
422pub fn write_multipart<S: Write>(
423    stream: &mut S,
424    boundary: &Vec<u8>,
425    nodes: &Vec<Node>)
426    -> Result<usize, Error>
427{
428    let mut count: usize = 0;
429
430    for node in nodes {
431        // write a boundary
432        count += stream.write_all_count(b"--")?;
433        count += stream.write_all_count(&boundary)?;
434        count += stream.write_all_count(b"\r\n")?;
435
436        match node {
437            &Node::Part(ref part) => {
438                // write the part's headers
439                for header in part.headers.iter() {
440                    count += stream.write_all_count(header.name().as_bytes())?;
441                    count += stream.write_all_count(b": ")?;
442                    count += stream.write_all_count(header.value_string().as_bytes())?;
443                    count += stream.write_all_count(b"\r\n")?;
444                }
445
446                // write the blank line
447                count += stream.write_all_count(b"\r\n")?;
448
449                // Write the part's content
450                count += stream.write_all_count(&part.body)?;
451            },
452            &Node::File(ref filepart) => {
453                // write the part's headers
454                for header in filepart.headers.iter() {
455                    count += stream.write_all_count(header.name().as_bytes())?;
456                    count += stream.write_all_count(b": ")?;
457                    count += stream.write_all_count(header.value_string().as_bytes())?;
458                    count += stream.write_all_count(b"\r\n")?;
459                }
460
461                // write the blank line
462                count += stream.write_all_count(b"\r\n")?;
463
464                // Write out the files's content
465                let mut file = File::open(&filepart.path)?;
466                count += std::io::copy(&mut file, stream)? as usize;
467            },
468            &Node::Multipart((ref headers, ref subnodes)) => {
469                // Get boundary
470                let boundary = get_multipart_boundary(headers)?;
471
472                // write the multipart headers
473                for header in headers.iter() {
474                    count += stream.write_all_count(header.name().as_bytes())?;
475                    count += stream.write_all_count(b": ")?;
476                    count += stream.write_all_count(header.value_string().as_bytes())?;
477                    count += stream.write_all_count(b"\r\n")?;
478                }
479
480                // write the blank line
481                count += stream.write_all_count(b"\r\n")?;
482
483                // Recurse
484                count += write_multipart(stream, &boundary, &subnodes)?;
485            },
486        }
487
488        // write a line terminator
489        count += stream.write_all_count(b"\r\n")?;
490    }
491
492    // write a final boundary
493    count += stream.write_all_count(b"--")?;
494    count += stream.write_all_count(&boundary)?;
495    count += stream.write_all_count(b"--")?;
496
497    Ok(count)
498}
499
500pub fn write_chunk<S: Write>(
501    stream: &mut S,
502    chunk: &[u8]) -> Result<(), ::std::io::Error>
503{
504    write!(stream, "{:x}\r\n", chunk.len())?;
505    stream.write_all(chunk)?;
506    stream.write_all(b"\r\n")?;
507    Ok(())
508}
509
510/// Stream a multipart body to the output `stream` given, made up of the `parts`
511/// given, using Tranfer-Encoding: Chunked.  Top-level headers are NOT included in this
512/// stream; the caller must send those prior to calling write_multipart_chunked().
513pub fn write_multipart_chunked<S: Write>(
514    stream: &mut S,
515    boundary: &Vec<u8>,
516    nodes: &Vec<Node>)
517    -> Result<(), Error>
518{
519    for node in nodes {
520        // write a boundary
521        write_chunk(stream, b"--")?;
522        write_chunk(stream, &boundary)?;
523        write_chunk(stream, b"\r\n")?;
524
525        match node {
526            &Node::Part(ref part) => {
527                // write the part's headers
528                for header in part.headers.iter() {
529                    write_chunk(stream, header.name().as_bytes())?;
530                    write_chunk(stream, b": ")?;
531                    write_chunk(stream, header.value_string().as_bytes())?;
532                    write_chunk(stream, b"\r\n")?;
533                }
534
535                // write the blank line
536                write_chunk(stream, b"\r\n")?;
537
538                // Write the part's content
539                write_chunk(stream, &part.body)?;
540            },
541            &Node::File(ref filepart) => {
542                // write the part's headers
543                for header in filepart.headers.iter() {
544                    write_chunk(stream, header.name().as_bytes())?;
545                    write_chunk(stream, b": ")?;
546                    write_chunk(stream, header.value_string().as_bytes())?;
547                    write_chunk(stream, b"\r\n")?;
548                }
549
550                // write the blank line
551                write_chunk(stream, b"\r\n")?;
552
553                // Write out the files's length
554                let metadata = std::fs::metadata(&filepart.path)?;
555                write!(stream, "{:x}\r\n", metadata.len())?;
556
557                // Write out the file's content
558                let mut file = File::open(&filepart.path)?;
559                std::io::copy(&mut file, stream)? as usize;
560                stream.write(b"\r\n")?;
561            },
562            &Node::Multipart((ref headers, ref subnodes)) => {
563                // Get boundary
564                let boundary = get_multipart_boundary(headers)?;
565
566                // write the multipart headers
567                for header in headers.iter() {
568                    write_chunk(stream, header.name().as_bytes())?;
569                    write_chunk(stream, b": ")?;
570                    write_chunk(stream, header.value_string().as_bytes())?;
571                    write_chunk(stream, b"\r\n")?;
572                }
573
574                // write the blank line
575                write_chunk(stream, b"\r\n")?;
576
577                // Recurse
578                write_multipart_chunked(stream, &boundary, &subnodes)?;
579            },
580        }
581
582        // write a line terminator
583        write_chunk(stream, b"\r\n")?;
584    }
585
586    // write a final boundary
587    write_chunk(stream, b"--")?;
588    write_chunk(stream, &boundary)?;
589    write_chunk(stream, b"--")?;
590
591    // Write an empty chunk to signal the end of the body
592    write_chunk(stream, b"")?;
593
594    Ok(())
595}