Skip to main content

rust_web_server/body/multipart_form_data/
mod.rs

1#[cfg(test)]
2mod tests;
3
4use std::io;
5use std::io::{BufRead, Cursor};
6use crate::ext::string_ext::StringExt;
7use crate::header::Header;
8use crate::symbol::SYMBOL;
9
10/// Parser for `multipart/form-data` request bodies (file uploads).
11pub struct FormMultipartData;
12
13/// A single part from a multipart body, with its own headers and raw byte body.
14pub struct Part {
15    pub headers: Vec<Header>,
16    pub body: Vec<u8>,
17}
18
19impl Part {
20    pub fn get_header(&self, name: String) -> Option<&Header> {
21        let header =  self.headers.iter().find(|x| x.name.to_lowercase() == name.to_lowercase());
22        header
23    }
24}
25
26impl FormMultipartData {
27    /// Parses a raw multipart body into a list of [`Part`]s.
28    /// `boundary` is extracted from the `Content-Type` header value after `boundary=`.
29    pub fn parse(data: &[u8], boundary: String) -> Result<Vec<Part>, String> {
30
31        let cursor = io::Cursor::new(data);
32        let bytes_read : i128 = 0;
33        let total_bytes : i128 = data.len() as i128;
34
35        let part_list : Vec<Part> = vec![];
36
37        let boxed_part_list = FormMultipartData::
38            parse_form_part_recursively(
39                cursor,
40                boundary,
41                bytes_read,
42                total_bytes,
43                part_list
44            );
45
46        if boxed_part_list.is_err() {
47            let message  = boxed_part_list.err().unwrap();
48            return Err(message)
49        }
50
51        Ok(boxed_part_list.unwrap())
52    }
53
54    fn parse_form_part_recursively(
55                mut cursor: Cursor<&[u8]>,
56                boundary: String,
57                mut bytes_read: i128,
58                total_bytes: i128,
59                mut part_list: Vec<Part>) -> Result<Vec<Part>, String> {
60        let mut buf = vec![];
61        let mut part = Part { headers: vec![], body: vec![] };
62
63        // first boundary starts parsable payload
64        if bytes_read == 0 {
65            let boxed_read = cursor.read_until(b'\n', &mut buf);
66            if boxed_read.is_err() {
67                let message = boxed_read.err().unwrap().to_string();
68                return Err(message);
69            }
70            let bytes_offset = boxed_read.unwrap();
71            let b : &[u8] = &buf;
72            bytes_read = bytes_read + bytes_offset as i128;
73
74            let boxed_line = String::from_utf8(Vec::from(b));
75            if boxed_line.is_err() {
76                let error_message = boxed_line.err().unwrap().to_string();
77                return Err(error_message);
78            }
79            let string = boxed_line.unwrap();
80            let string = StringExt::filter_ascii_control_characters(&string);
81            let string = StringExt::truncate_new_line_carriage_return(&string);
82
83            let _current_string_is_boundary =
84                string.replace(SYMBOL.hyphen, SYMBOL.empty_string)
85                    .ends_with(&boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string));
86
87            if !_current_string_is_boundary {
88                let message = format!("Body in multipart/form-data request needs to start with a boundary, actual string: '{}'", string);
89                return Err(message.to_string())
90            }
91        }
92
93        // headers part. by spec it shall have at least Content-Disposition header or more, following
94        // by empty line. Headers shall be valid utf-8 encoded strings
95        let mut current_string_is_empty = false;
96        while !current_string_is_empty {
97            buf = vec![];
98            let boxed_read = cursor.read_until(b'\n', &mut buf);
99            if boxed_read.is_err() {
100                let message = boxed_read.err().unwrap().to_string();
101                return Err(message);
102            }
103            let bytes_offset = boxed_read.unwrap();
104            let b : &[u8] = &buf;
105            bytes_read = bytes_read + bytes_offset as i128;
106
107            let boxed_line = String::from_utf8(Vec::from(b));
108            if boxed_line.is_err() {
109                let error_message = boxed_line.err().unwrap().to_string();
110                return Err(error_message);
111            }
112            let string = boxed_line.unwrap();
113
114            let string = StringExt::filter_ascii_control_characters(&string);
115            current_string_is_empty = string.trim().len() == 0;
116
117            let _current_string_is_boundary =
118                string.replace(SYMBOL.hyphen, SYMBOL.empty_string)
119                    .ends_with(&boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string));
120
121            if _current_string_is_boundary {
122                let message = "There is at least one missing body part in the multipart/form-data request";
123                return Err(message.to_string())
124            }
125
126            if bytes_read == total_bytes as i128 {
127                return Ok(part_list)
128            }
129
130
131            // multipart/form-data part does not have any header specified
132            if current_string_is_empty && part.headers.len() == 0 {
133                let message = "One of the body parts does not have any header specified. At least Content-Disposition is required";
134                return Err(message.to_string());
135            }
136
137            if !current_string_is_empty {
138                let boxed_header = Header::parse_header(&string);
139                if boxed_header.is_err() {
140                    let message = boxed_header.err().unwrap();
141                    return Err(message)
142                }
143
144                let header = boxed_header.unwrap();
145                part.headers.push(header);
146            }
147        }
148
149
150        // multipart/form-data body part. it just arbitrary bytes. ends by delimiter.
151        let mut _boundary_position = 0;
152        let mut current_string_is_boundary = false;
153        while !current_string_is_boundary {
154            buf = vec![];
155
156            let boxed_read = cursor.read_until(b'\n', &mut buf);
157            if boxed_read.is_err() {
158                let message = boxed_read.err().unwrap().to_string();
159                return Err(message);
160            }
161
162            let bytes_offset = boxed_read.unwrap();
163
164            if bytes_offset == 0 { break };
165
166            let b : &[u8] = &buf;
167
168            bytes_read = bytes_read + bytes_offset as i128;
169
170            let escaped_dash_boundary = boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string);
171
172            current_string_is_boundary = false;
173            if b.len() >= escaped_dash_boundary.len() {
174                let boxed_sequence = FormMultipartData::find_subsequence(b, escaped_dash_boundary.as_bytes());
175                if boxed_sequence.is_some() {
176                    current_string_is_boundary = true;
177                    _boundary_position = boxed_sequence.unwrap();
178                }
179            }
180
181            if !current_string_is_boundary {
182                part.body.append(&mut buf.clone());
183            }
184
185        }
186
187        if !current_string_is_boundary && bytes_read == total_bytes as i128 {
188            let message = "No end boundary present in the multipart/form-data request body";
189            return Err(message.to_string());
190        }
191
192        // body for specific part may end with a new line or carriage return and a new line
193        // in both cases new line carriage return delimiter is not part of the body
194        let body_length = part.body.len();
195        if body_length > 2 { // check if body itself is present
196            let is_new_line_carriage_return_ending =
197                *part.body.get(body_length-2).unwrap() == b'\r'
198                    && *part.body.get(body_length-1).unwrap() == b'\n';
199
200            let is_new_line_ending =
201                *part.body.get(body_length-2).unwrap() != b'\r'
202                    && *part.body.get(body_length-1).unwrap() == b'\n';
203
204            if is_new_line_carriage_return_ending {
205                part.body.remove(body_length - 1); // removing \n
206                part.body.remove(body_length - 2); // removing \r
207            }
208
209            if is_new_line_ending {
210                part.body.remove(body_length - 1); // removing \n
211            }
212        }
213
214
215
216        part_list.push(part);
217
218
219        if bytes_read == total_bytes as i128 {
220            return Ok(part_list)
221        }
222
223        FormMultipartData::parse_form_part_recursively(cursor, boundary, bytes_read, total_bytes, part_list)
224    }
225
226    pub fn extract_boundary(content_type: &str) -> Result<String, String> {
227        let boxed_split = content_type.split_once("boundary=");
228        if boxed_split.is_none() {
229            let message = "No boundary found in Content-Type header";
230            return Err(message.to_string())
231        }
232
233
234        let (_, boundary) = boxed_split.unwrap();
235        Ok(boundary.to_string())
236    }
237
238    fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
239        haystack.windows(needle.len()).position(|window| window == needle)
240    }
241
242    pub fn generate_part(part: Part) -> Result<Vec<u8>, String> {
243        if part.headers.len() == 0 {
244            let message = "One of the body parts does not have any header specified. At least Content-Disposition is required";
245            return Err(message.to_string())
246        }
247
248        let mut formatted_header_list : String = "".to_string();
249        for header in part.headers.into_iter() {
250            let formatted = format!("{}{}", header.as_string(), SYMBOL.new_line_carriage_return.to_string());
251            formatted_header_list = [formatted_header_list, formatted].join(SYMBOL.empty_string);
252        }
253
254        let header_body_delimiter = SYMBOL.new_line_carriage_return.to_string();
255
256        let body = part.body;
257
258        let part = [
259            formatted_header_list.as_bytes().to_vec(),
260            header_body_delimiter.as_bytes().to_vec(),
261            body
262        ].join(SYMBOL.empty_string.as_bytes());
263
264        Ok(part)
265    }
266
267    pub fn generate(part_list: Vec<Part>, boundary: &str) -> Result<Vec<u8>, String> {
268        if part_list.len() == 0 {
269            let message = "List of the multipart/form-data request body parts is empty";
270            return Err(message.to_string());
271        }
272
273        let mut bytes = vec![];
274        bytes.push(boundary.as_bytes().to_vec());
275
276        for part in part_list.into_iter() {
277            let boxed_part_as_bytes = FormMultipartData::generate_part(part);
278            if boxed_part_as_bytes.is_err() {
279                let message = boxed_part_as_bytes.err().unwrap();
280                return Err(message);
281            }
282            let part_as_bytes = boxed_part_as_bytes.unwrap();
283            bytes.push(part_as_bytes);
284            bytes.push(boundary.as_bytes().to_vec());
285        }
286
287        let result = bytes.join(SYMBOL.new_line_carriage_return.as_bytes());
288
289        Ok(result)
290    }
291}