cogo_http/multipart/
mod.rs

1// Copyright 2016-2020 mime-multipart Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8pub mod error;
9pub mod mult_part;
10pub mod byte_buf;
11
12#[cfg(test)]
13mod mock;
14#[cfg(test)]
15mod tests;
16
17pub use error::Error;
18
19use std::fs::File;
20use std::io::{BufRead, BufReader, Read, Write};
21use std::path::{Path, PathBuf};
22use std::borrow::{BorrowMut, Cow};
23use std::cell::RefCell;
24use std::fmt::{Debug, Formatter};
25use std::ops::{DerefMut, Drop};
26use std::sync::Arc;
27use encoding::{all, Encoding, DecoderTrap};
28use textnonce::TextNonce;
29use mime::{Attr, Mime, TopLevel, Value};
30use buf_read_ext::BufReadExt;
31use crate::header::{Charset, ContentDisposition, ContentType, DispositionParam, DispositionType, Headers};
32
33
34pub trait ReadWrite:Write+Read{
35
36}
37
38impl<T> ReadWrite for T where T:Read+Write  {}
39
40/// A multipart part which is not a file (stored in memory)
41#[derive(Clone, Debug, PartialEq)]
42pub struct Part {
43    pub headers: Headers,
44    pub body: Vec<u8>,
45}
46
47impl Part {
48    /// Mime content-type specified in the header
49    pub fn content_type(&self) -> Option<Mime> {
50        let ct: Option<&ContentType> = self.headers.get();
51        ct.map(|ref ct| ct.0.clone())
52    }
53}
54
55/// A file that is to be inserted into a `multipart/*` or alternatively an uploaded file that
56/// was received as part of `multipart/*` parsing.
57pub struct FilePart {
58    /// The headers of the part
59    pub headers: Headers,
60    /// Optionally, the size of the file.  This is filled when multiparts are parsed, but is
61    /// not necessary when they are generated.
62    pub size: Option<usize>,
63
64    pub path:PathBuf,
65
66    pub key: String,
67
68    pub write: Option<Box<dyn ReadWrite>>,
69}
70
71impl Clone for FilePart{
72    fn clone(&self) -> Self {
73        Self{
74            headers: self.headers.clone(),
75            size: self.size.clone(),
76            path: Default::default(),
77            key: "".to_string(),
78            write: None
79        }
80    }
81}
82
83impl Debug for FilePart{
84    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
85        f.debug_struct("FilePart")
86            .field("headers",&self.headers)
87            .field("size",&self.size)
88            .finish()
89    }
90}
91
92impl FilePart {
93    pub fn new(headers: Headers, path: PathBuf) -> FilePart
94    {
95        FilePart {
96            headers: headers,
97            size: None,
98            path: path,
99            key: "".to_string(),
100            write: None,
101        }
102    }
103
104    /// set any Write and Read impl struct to FilePart
105    pub fn set_write<W: ReadWrite+'static>(&mut self, w: W) {
106        self.write = Some(Box::new(w));
107    }
108
109    pub fn set_path(&mut self, path: PathBuf) {
110        self.path = path;
111    }
112
113    /// Create a new temporary FilePart (when created this way, the file will be
114    /// deleted once the FilePart object goes out of scope).
115    pub fn create(headers: Headers) -> Result<FilePart, Error> {
116        let cd_name: Option<String> = {
117            let cd: &ContentDisposition = match headers.get() {
118                Some(cd) => cd,
119                None => return Err(Error::MissingDisposition),
120            };
121            crate::multipart::mult_part::get_content_disposition_name(&cd)
122        };
123        Ok(FilePart {
124            headers: headers,
125            size: None,
126            path: Default::default(),
127            key: cd_name.unwrap_or_default(),
128            write: None,
129        })
130    }
131
132    /// Filename that was specified when the file was uploaded.  Returns `Ok<None>` if there
133    /// was no content-disposition header supplied.
134    pub fn filename(&self) -> Result<String, Error> {
135        let cd: Option<&ContentDisposition> = self.headers.get();
136        match cd {
137            Some(cd) => match get_content_disposition_filename(cd) {
138                Ok(v) => { Ok(v.unwrap()) }
139                Err(e) => { Err(e) }
140            },
141            None => Err(Error::NoName),
142        }
143    }
144
145    /// Mime content-type specified in the header
146    pub fn content_type(&self) -> Option<Mime> {
147        let ct: Option<&ContentType> = self.headers.get();
148        ct.map(|ref ct| ct.0.clone())
149    }
150}
151
152/// A multipart part which could be either a file, in memory, or another multipart
153/// container containing nested parts.
154#[derive(Clone, Debug)]
155pub enum Node {
156    /// A part in memory
157    Part(Part),
158    /// A part streamed to a file
159    File(FilePart),
160    /// A container of nested multipart parts
161    Multipart((Headers, Vec<Node>)),
162}
163
164/// Parse a MIME `multipart/*` from a `Read`able stream into a `Vec` of `Node`s, streaming
165/// files to disk and keeping the rest in memory.  Recursive `multipart/*` parts will are
166/// parsed as well and returned within a `Node::Multipart` variant.
167///
168/// If `always_use_files` is true, all parts will be streamed to files.  If false, only parts
169/// with a `ContentDisposition` header set to `Attachment` or otherwise containing a `Filename`
170/// parameter will be streamed to files.
171///
172/// It is presumed that the headers are still in the stream.  If you have them separately,
173/// use `read_multipart_body()` instead.
174pub fn read_multipart<S: Read>(
175    stream: &mut S,
176    always_use_files: bool, f: Option<fn(name: &mut FilePart) -> std::io::Result<()>>)
177    -> Result<Vec<Node>, Error>
178{
179    let mut reader = BufReader::with_capacity(4096, stream);
180    let mut nodes: Vec<Node> = Vec::new();
181
182    let mut buf: Vec<u8> = Vec::new();
183
184    let (_, found) = reader.stream_until_token(b"\r\n\r\n", &mut buf)?;
185    if !found { return Err(Error::EofInMainHeaders); }
186
187    // Keep the CRLFCRLF as httparse will expect it
188    buf.extend(b"\r\n\r\n".iter().cloned());
189
190    // Parse the headers
191    let mut header_memory = [httparse::EMPTY_HEADER; 64];
192    let headers = match httparse::parse_headers(&buf, &mut header_memory) {
193        Ok(httparse::Status::Complete((_, raw_headers))) => {
194            Headers::from_raw(raw_headers).map_err(|e| From::from(e))
195        }
196        Ok(httparse::Status::Partial) => Err(Error::PartialHeaders),
197        Err(err) => Err(From::from(err)),
198    }?;
199
200    inner(&mut reader, &headers, &mut nodes, always_use_files, f)?;
201    Ok(nodes)
202}
203
204/// Parse a MIME `multipart/*` from a `Read`able stream into a `Vec` of `Node`s, streaming
205/// files to disk and keeping the rest in memory.  Recursive `multipart/*` parts will are
206/// parsed as well and returned within a `Node::Multipart` variant.
207///
208/// If `always_use_files` is true, all parts will be streamed to files.  If false, only parts
209/// with a `ContentDisposition` header set to `Attachment` or otherwise containing a `Filename`
210/// parameter will be streamed to files.
211///
212/// It is presumed that you have the `Headers` already and the stream starts at the body.
213/// If the headers are still in the stream, use `read_multipart()` instead.
214pub fn read_multipart_body<S: Read>(
215    stream: &mut S,
216    headers: &Headers,
217    always_use_files: bool, f: Option<fn(name: &mut FilePart) -> std::io::Result<()>>)
218    -> Result<Vec<Node>, Error>
219{
220    let mut reader = BufReader::with_capacity(4096, stream);
221    let mut nodes: Vec<Node> = Vec::new();
222    inner(&mut reader, headers, &mut nodes, always_use_files, f)?;
223    Ok(nodes)
224}
225
226fn inner<R: BufRead>(
227    reader: &mut R,
228    headers: &Headers,
229    nodes: &mut Vec<Node>,
230    always_use_files: bool,
231    f: Option<fn(name: &mut FilePart) -> std::io::Result<()>>)
232    -> Result<(), Error>
233{
234    let mut buf: Vec<u8> = Vec::new();
235
236    let boundary = get_multipart_boundary(headers)?;
237
238    // Read past the initial boundary
239    let (_, found) = reader.stream_until_token(&boundary, &mut buf)?;
240    if !found { return Err(Error::EofBeforeFirstBoundary); }
241
242    // Define the boundary, including the line terminator preceding it.
243    // Use their first line terminator to determine whether to use CRLF or LF.
244    let (lt, ltlt, lt_boundary) = {
245        let peeker = reader.fill_buf()?;
246        if peeker.len() > 1 && &peeker[..2] == b"\r\n" {
247            let mut output = Vec::with_capacity(2 + boundary.len());
248            output.push(b'\r');
249            output.push(b'\n');
250            output.extend(boundary.clone());
251            (vec![b'\r', b'\n'], vec![b'\r', b'\n', b'\r', b'\n'], output)
252        } else if peeker.len() > 0 && peeker[0] == b'\n' {
253            let mut output = Vec::with_capacity(1 + boundary.len());
254            output.push(b'\n');
255            output.extend(boundary.clone());
256            (vec![b'\n'], vec![b'\n', b'\n'], output)
257        } else {
258            return Err(Error::NoCrLfAfterBoundary);
259        }
260    };
261
262    loop {
263        // If the next two lookahead characters are '--', parsing is finished.
264        {
265            let peeker = reader.fill_buf()?;
266            if peeker.len() >= 2 && &peeker[..2] == b"--" {
267                return Ok(());
268            }
269        }
270
271        // Read the line terminator after the boundary
272        let (_, found) = reader.stream_until_token(&lt, &mut buf)?;
273        if !found { return Err(Error::NoCrLfAfterBoundary); }
274
275        // Read the headers (which end in 2 line terminators)
276        buf.truncate(0); // start fresh
277        let (_, found) = reader.stream_until_token(&ltlt, &mut buf)?;
278        if !found { return Err(Error::EofInPartHeaders); }
279
280        // Keep the 2 line terminators as httparse will expect it
281        buf.extend(ltlt.iter().cloned());
282
283        // Parse the headers
284        let part_headers = {
285            let mut header_memory = [httparse::EMPTY_HEADER; 4];
286            match httparse::parse_headers(&buf, &mut header_memory) {
287                Ok(httparse::Status::Complete((_, raw_headers))) => {
288                    Headers::from_raw(raw_headers).map_err(|e| From::from(e))
289                }
290                Ok(httparse::Status::Partial) => Err(Error::PartialHeaders),
291                Err(err) => Err(From::from(err)),
292            }?
293        };
294
295        // Check for a nested multipart
296        let nested = {
297            let ct: Option<&ContentType> = part_headers.get();
298            if let Some(ct) = ct {
299                let &ContentType(Mime(ref top_level, _, _)) = ct;
300                *top_level == TopLevel::Multipart
301            } else {
302                false
303            }
304        };
305        if nested {
306            // Recurse:
307            let mut inner_nodes: Vec<Node> = Vec::new();
308            inner(reader, &part_headers, &mut inner_nodes, always_use_files, f)?;
309            nodes.push(Node::Multipart((part_headers, inner_nodes)));
310            continue;
311        }
312
313        let is_file = always_use_files || {
314            let cd: Option<&ContentDisposition> = part_headers.get();
315            if cd.is_some() {
316                if cd.unwrap().disposition == DispositionType::Attachment {
317                    true
318                } else {
319                    cd.unwrap().parameters.iter().any(|x| match x {
320                        &DispositionParam::Filename(_, _, _) => true,
321                        _ => false
322                    })
323                }
324            } else {
325                false
326            }
327        };
328        if is_file {
329            // Setup a file to capture the contents.
330            let mut filepart = FilePart::create(part_headers)?;
331
332            match f {
333                None => {}
334                Some(f) => {
335                    f(&mut filepart)?;
336                    if let Some(w) = &mut filepart.write {
337                        // Stream out the file.
338                        let (read, found) = reader.stream_until_token(&lt_boundary, w)?;
339                        if !found { return Err(Error::EofInFile); }
340                        filepart.size = Some(read);
341                        nodes.push(Node::File(filepart));
342                    }
343                }
344            }
345            // TODO: Handle Content-Transfer-Encoding.  RFC 7578 section 4.7 deprecated
346            // this, and the authors state "Currently, no deployed implementations that
347            // send such bodies have been discovered", so this is very low priority.
348        } else {
349            buf.truncate(0); // start fresh
350            let (_, found) = reader.stream_until_token(&lt_boundary, &mut buf)?;
351            if !found { return Err(Error::EofInPart); }
352
353            nodes.push(Node::Part(Part {
354                headers: part_headers,
355                body: buf.clone(),
356            }));
357        }
358    }
359}
360
361/// Get the `multipart/*` boundary string from `hyper::Headers`
362pub fn get_multipart_boundary(headers: &Headers) -> Result<Vec<u8>, Error> {
363    // Verify that the request is 'Content-Type: multipart/*'.
364    let ct: &ContentType = match headers.get() {
365        Some(ct) => ct,
366        None => return Err(Error::NoRequestContentType),
367    };
368    let ContentType(ref mime) = *ct;
369    let Mime(ref top_level, _, ref params) = *mime;
370
371    if *top_level != TopLevel::Multipart {
372        return Err(Error::NotMultipart);
373    }
374
375    for &(ref attr, ref val) in params.iter() {
376        if let (&Attr::Boundary, &Value::Ext(ref val)) = (attr, val) {
377            let mut boundary = Vec::with_capacity(2 + val.len());
378            boundary.extend(b"--".iter().cloned());
379            boundary.extend(val.as_bytes());
380            return Ok(boundary);
381        }
382    }
383    Err(Error::BoundaryNotSpecified)
384}
385
386#[inline]
387fn get_content_disposition_filename(cd: &ContentDisposition) -> Result<Option<String>, Error> {
388    if let Some(&DispositionParam::Filename(ref charset, _, ref bytes)) =
389    cd.parameters.iter().find(|&x| match *x {
390        DispositionParam::Filename(_, _, _) => true,
391        _ => false,
392    })
393    {
394        match charset_decode(charset, bytes) {
395            Ok(filename) => Ok(Some(filename)),
396            Err(e) => Err(Error::Decoding(e)),
397        }
398    } else {
399        Ok(None)
400    }
401}
402
403// This decodes bytes encoded according to a hyper::header::Charset encoding, using the
404// rust-encoding crate.  Only supports encodings defined in both crates.
405fn charset_decode(charset: &Charset, bytes: &[u8]) -> Result<String, Cow<'static, str>> {
406    Ok(match *charset {
407        Charset::Us_Ascii => all::ASCII.decode(bytes, DecoderTrap::Strict)?,
408        Charset::Iso_8859_1 => all::ISO_8859_1.decode(bytes, DecoderTrap::Strict)?,
409        Charset::Iso_8859_2 => all::ISO_8859_2.decode(bytes, DecoderTrap::Strict)?,
410        Charset::Iso_8859_3 => all::ISO_8859_3.decode(bytes, DecoderTrap::Strict)?,
411        Charset::Iso_8859_4 => all::ISO_8859_4.decode(bytes, DecoderTrap::Strict)?,
412        Charset::Iso_8859_5 => all::ISO_8859_5.decode(bytes, DecoderTrap::Strict)?,
413        Charset::Iso_8859_6 => all::ISO_8859_6.decode(bytes, DecoderTrap::Strict)?,
414        Charset::Iso_8859_7 => all::ISO_8859_7.decode(bytes, DecoderTrap::Strict)?,
415        Charset::Iso_8859_8 => all::ISO_8859_8.decode(bytes, DecoderTrap::Strict)?,
416        Charset::Iso_8859_9 => return Err("ISO_8859_9 is not supported".into()),
417        Charset::Iso_8859_10 => all::ISO_8859_10.decode(bytes, DecoderTrap::Strict)?,
418        Charset::Shift_Jis => return Err("Shift_Jis is not supported".into()),
419        Charset::Euc_Jp => all::EUC_JP.decode(bytes, DecoderTrap::Strict)?,
420        Charset::Iso_2022_Kr => return Err("Iso_2022_Kr is not supported".into()),
421        Charset::Euc_Kr => return Err("Euc_Kr is not supported".into()),
422        Charset::Iso_2022_Jp => all::ISO_2022_JP.decode(bytes, DecoderTrap::Strict)?,
423        Charset::Iso_2022_Jp_2 => return Err("Iso_2022_Jp_2 is not supported".into()),
424        Charset::Iso_8859_6_E => return Err("Iso_8859_6_E is not supported".into()),
425        Charset::Iso_8859_6_I => return Err("Iso_8859_6_I is not supported".into()),
426        Charset::Iso_8859_8_E => return Err("Iso_8859_8_E is not supported".into()),
427        Charset::Iso_8859_8_I => return Err("Iso_8859_8_I is not supported".into()),
428        Charset::Gb2312 => return Err("Gb2312 is not supported".into()),
429        Charset::Big5 => all::BIG5_2003.decode(bytes, DecoderTrap::Strict)?,
430        Charset::Koi8_R => all::KOI8_R.decode(bytes, DecoderTrap::Strict)?,
431        Charset::Ext(ref s) => match &**s {
432            "UTF-8" => all::UTF_8.decode(bytes, DecoderTrap::Strict)?,
433            _ => return Err("Encoding is not supported".into()),
434        },
435    })
436}
437
438/// Generate a valid multipart boundary, statistically unlikely to be found within
439/// the content of the parts.
440pub fn generate_boundary() -> Vec<u8> {
441    TextNonce::sized(68).unwrap().into_string().into_bytes()
442}
443
444// Convenience method, like write_all(), but returns the count of bytes written.
445trait WriteAllCount {
446    fn write_all_count(&mut self, buf: &[u8]) -> ::std::io::Result<usize>;
447}
448
449impl<T: Write> WriteAllCount for T {
450    fn write_all_count(&mut self, buf: &[u8]) -> ::std::io::Result<usize>
451    {
452        self.write_all(buf)?;
453        Ok(buf.len())
454    }
455}
456
457/// Stream a multipart body to the output `stream` given, made up of the `parts`
458/// given.  Top-level headers are NOT included in this stream; the caller must send
459/// those prior to calling write_multipart().
460/// Returns the number of bytes written, or an error.
461pub fn write_multipart<S: Write>(
462    stream: &mut S,
463    boundary: &Vec<u8>,
464    nodes: &mut Vec<Node>,
465    file: Option<fn(name: &mut FilePart) -> std::io::Result<()>>)
466    -> Result<usize, Error>
467{
468    let mut count: usize = 0;
469
470    for node in nodes {
471        // write a boundary
472        count += stream.write_all_count(b"--")?;
473        count += stream.write_all_count(&boundary)?;
474        count += stream.write_all_count(b"\r\n")?;
475
476        match node {
477            &mut Node::Part(ref part) => {
478                // write the part's headers
479                for header in part.headers.iter() {
480                    count += stream.write_all_count(header.name().as_bytes())?;
481                    count += stream.write_all_count(b": ")?;
482                    count += stream.write_all_count(header.value_string().as_bytes())?;
483                    count += stream.write_all_count(b"\r\n")?;
484                }
485
486                // write the blank line
487                count += stream.write_all_count(b"\r\n")?;
488
489                // Write the part's content
490                count += stream.write_all_count(&part.body)?;
491            }
492            &mut Node::File(ref mut filepart) => {
493                // write the part's headers
494                for header in filepart.headers.iter() {
495                    count += stream.write_all_count(header.name().as_bytes())?;
496                    count += stream.write_all_count(b": ")?;
497                    count += stream.write_all_count(header.value_string().as_bytes())?;
498                    count += stream.write_all_count(b"\r\n")?;
499                }
500
501                // write the blank line
502                count += stream.write_all_count(b"\r\n")?;
503
504                // Write out the files's content
505                match file{
506                    None => {}
507                    Some(f) => {
508                        f(filepart)?;
509                        count += std::io::copy(filepart.write.as_mut().unwrap(), stream)? as usize;
510                    }
511                }
512            }
513            &mut Node::Multipart((ref headers, ref mut subnodes)) => {
514                // Get boundary
515                let boundary = get_multipart_boundary(headers)?;
516
517                // write the multipart headers
518                for header in headers.iter() {
519                    count += stream.write_all_count(header.name().as_bytes())?;
520                    count += stream.write_all_count(b": ")?;
521                    count += stream.write_all_count(header.value_string().as_bytes())?;
522                    count += stream.write_all_count(b"\r\n")?;
523                }
524
525                // write the blank line
526                count += stream.write_all_count(b"\r\n")?;
527
528                // Recurse
529                count += write_multipart(stream, &boundary,  subnodes,file)?;
530            }
531        }
532
533        // write a line terminator
534        count += stream.write_all_count(b"\r\n")?;
535    }
536
537    // write a final boundary
538    count += stream.write_all_count(b"--")?;
539    count += stream.write_all_count(&boundary)?;
540    count += stream.write_all_count(b"--")?;
541
542    Ok(count)
543}
544
545pub fn write_chunk<S: Write>(
546    stream: &mut S,
547    chunk: &[u8]) -> Result<(), ::std::io::Error>
548{
549    write!(stream, "{:x}\r\n", chunk.len())?;
550    stream.write_all(chunk)?;
551    stream.write_all(b"\r\n")?;
552    Ok(())
553}
554
555/// Stream a multipart body to the output `stream` given, made up of the `parts`
556/// given, using Tranfer-Encoding: Chunked.  Top-level headers are NOT included in this
557/// stream; the caller must send those prior to calling write_multipart_chunked().
558pub fn write_multipart_chunked<S: Write>(
559    stream: &mut S,
560    boundary: &Vec<u8>,
561    nodes: &Vec<Node>)
562    -> Result<(), Error>
563{
564    for node in nodes {
565        // write a boundary
566        write_chunk(stream, b"--")?;
567        write_chunk(stream, &boundary)?;
568        write_chunk(stream, b"\r\n")?;
569
570        match node {
571            &Node::Part(ref part) => {
572                // write the part's headers
573                for header in part.headers.iter() {
574                    write_chunk(stream, header.name().as_bytes())?;
575                    write_chunk(stream, b": ")?;
576                    write_chunk(stream, header.value_string().as_bytes())?;
577                    write_chunk(stream, b"\r\n")?;
578                }
579
580                // write the blank line
581                write_chunk(stream, b"\r\n")?;
582
583                // Write the part's content
584                write_chunk(stream, &part.body)?;
585            }
586            &Node::File(ref filepart) => {
587                // write the part's headers
588                for header in filepart.headers.iter() {
589                    write_chunk(stream, header.name().as_bytes())?;
590                    write_chunk(stream, b": ")?;
591                    write_chunk(stream, header.value_string().as_bytes())?;
592                    write_chunk(stream, b"\r\n")?;
593                }
594
595                // write the blank line
596                write_chunk(stream, b"\r\n")?;
597
598                // Write out the files's length
599                let metadata = std::fs::metadata(&filepart.path)?;
600                write!(stream, "{:x}\r\n", metadata.len())?;
601
602                // Write out the file's content
603                let mut file = File::open(&filepart.path)?;
604                std::io::copy(&mut file, stream)? as usize;
605                stream.write(b"\r\n")?;
606            }
607            &Node::Multipart((ref headers, ref subnodes)) => {
608                // Get boundary
609                let boundary = get_multipart_boundary(headers)?;
610
611                // write the multipart headers
612                for header in headers.iter() {
613                    write_chunk(stream, header.name().as_bytes())?;
614                    write_chunk(stream, b": ")?;
615                    write_chunk(stream, header.value_string().as_bytes())?;
616                    write_chunk(stream, b"\r\n")?;
617                }
618
619                // write the blank line
620                write_chunk(stream, b"\r\n")?;
621
622                // Recurse
623                write_multipart_chunked(stream, &boundary, &subnodes)?;
624            }
625        }
626
627        // write a line terminator
628        write_chunk(stream, b"\r\n")?;
629    }
630
631    // write a final boundary
632    write_chunk(stream, b"--")?;
633    write_chunk(stream, &boundary)?;
634    write_chunk(stream, b"--")?;
635
636    // Write an empty chunk to signal the end of the body
637    write_chunk(stream, b"")?;
638
639    Ok(())
640}