rust_web_server/body/multipart_form_data/
mod.rs

1#[cfg(test)]
2mod tests;
3
4use std::io;
5use std::io::{BufRead, Cursor};
6use crate::ext::string_ext::StringExt;
7use crate::header::Header;
8use crate::symbol::SYMBOL;
9
10pub struct FormMultipartData;
11
12pub struct Part {
13    pub headers: Vec<Header>,
14    pub body: Vec<u8>,
15}
16
17impl Part {
18    pub fn get_header(&self, name: String) -> Option<&Header> {
19        let header =  self.headers.iter().find(|x| x.name.to_lowercase() == name.to_lowercase());
20        header
21    }
22}
23
24impl FormMultipartData {
25    pub fn parse(data: &[u8], boundary: String) -> Result<Vec<Part>, String> {
26
27        let cursor = io::Cursor::new(data);
28        let bytes_read : i128 = 0;
29        let total_bytes : i128 = data.len() as i128;
30
31        let part_list : Vec<Part> = vec![];
32
33        let boxed_part_list = FormMultipartData::
34            parse_form_part_recursively(
35                cursor,
36                boundary,
37                bytes_read,
38                total_bytes,
39                part_list
40            );
41
42        if boxed_part_list.is_err() {
43            let message  = boxed_part_list.err().unwrap();
44            return Err(message)
45        }
46
47        Ok(boxed_part_list.unwrap())
48    }
49
50    fn parse_form_part_recursively(
51                mut cursor: Cursor<&[u8]>,
52                boundary: String,
53                mut bytes_read: i128,
54                total_bytes: i128,
55                mut part_list: Vec<Part>) -> Result<Vec<Part>, String> {
56        let mut buf = vec![];
57        let mut part = Part { headers: vec![], body: vec![] };
58
59        // first boundary starts parsable payload
60        if bytes_read == 0 {
61            let boxed_read = cursor.read_until(b'\n', &mut buf);
62            if boxed_read.is_err() {
63                let message = boxed_read.err().unwrap().to_string();
64                return Err(message);
65            }
66            let bytes_offset = boxed_read.unwrap();
67            let b : &[u8] = &buf;
68            bytes_read = bytes_read + bytes_offset as i128;
69
70            let boxed_line = String::from_utf8(Vec::from(b));
71            if boxed_line.is_err() {
72                let error_message = boxed_line.err().unwrap().to_string();
73                return Err(error_message);
74            }
75            let string = boxed_line.unwrap();
76            let string = StringExt::filter_ascii_control_characters(&string);
77            let string = StringExt::truncate_new_line_carriage_return(&string);
78
79            let _current_string_is_boundary =
80                string.replace(SYMBOL.hyphen, SYMBOL.empty_string)
81                    .ends_with(&boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string));
82
83            if !_current_string_is_boundary {
84                let message = format!("Body in multipart/form-data request needs to start with a boundary, actual string: '{}'", string);
85                return Err(message.to_string())
86            }
87        }
88
89        // headers part. by spec it shall have at least Content-Disposition header or more, following
90        // by empty line. Headers shall be valid utf-8 encoded strings
91        let mut current_string_is_empty = false;
92        while !current_string_is_empty {
93            buf = vec![];
94            let boxed_read = cursor.read_until(b'\n', &mut buf);
95            if boxed_read.is_err() {
96                let message = boxed_read.err().unwrap().to_string();
97                return Err(message);
98            }
99            let bytes_offset = boxed_read.unwrap();
100            let b : &[u8] = &buf;
101            bytes_read = bytes_read + bytes_offset as i128;
102
103            let boxed_line = String::from_utf8(Vec::from(b));
104            if boxed_line.is_err() {
105                let error_message = boxed_line.err().unwrap().to_string();
106                return Err(error_message);
107            }
108            let string = boxed_line.unwrap();
109
110            let string = StringExt::filter_ascii_control_characters(&string);
111            current_string_is_empty = string.trim().len() == 0;
112
113            let _current_string_is_boundary =
114                string.replace(SYMBOL.hyphen, SYMBOL.empty_string)
115                    .ends_with(&boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string));
116
117            if _current_string_is_boundary {
118                let message = "There is at least one missing body part in the multipart/form-data request";
119                return Err(message.to_string())
120            }
121
122            if bytes_read == total_bytes as i128 {
123                return Ok(part_list)
124            }
125
126
127            // multipart/form-data part does not have any header specified
128            if current_string_is_empty && part.headers.len() == 0 {
129                let message = "One of the body parts does not have any header specified. At least Content-Disposition is required";
130                return Err(message.to_string());
131            }
132
133            if !current_string_is_empty {
134                let boxed_header = Header::parse_header(&string);
135                if boxed_header.is_err() {
136                    let message = boxed_header.err().unwrap();
137                    return Err(message)
138                }
139
140                let header = boxed_header.unwrap();
141                part.headers.push(header);
142            }
143        }
144
145
146        // multipart/form-data body part. it just arbitrary bytes. ends by delimiter.
147        let mut _boundary_position = 0;
148        let mut current_string_is_boundary = false;
149        while !current_string_is_boundary {
150            buf = vec![];
151
152            let boxed_read = cursor.read_until(b'\n', &mut buf);
153            if boxed_read.is_err() {
154                let message = boxed_read.err().unwrap().to_string();
155                return Err(message);
156            }
157
158            let bytes_offset = boxed_read.unwrap();
159
160            if bytes_offset == 0 { break };
161
162            let b : &[u8] = &buf;
163
164            bytes_read = bytes_read + bytes_offset as i128;
165
166            let escaped_dash_boundary = boundary.replace(SYMBOL.hyphen, SYMBOL.empty_string);
167
168            current_string_is_boundary = false;
169            if b.len() >= escaped_dash_boundary.len() {
170                let boxed_sequence = FormMultipartData::find_subsequence(b, escaped_dash_boundary.as_bytes());
171                if boxed_sequence.is_some() {
172                    current_string_is_boundary = true;
173                    _boundary_position = boxed_sequence.unwrap();
174                }
175            }
176
177            if !current_string_is_boundary {
178                part.body.append(&mut buf.clone());
179            }
180
181        }
182
183        if !current_string_is_boundary && bytes_read == total_bytes as i128 {
184            let message = "No end boundary present in the multipart/form-data request body";
185            return Err(message.to_string());
186        }
187
188        // body for specific part may end with a new line or carriage return and a new line
189        // in both cases new line carriage return delimiter is not part of the body
190        let body_length = part.body.len();
191        if body_length > 2 { // check if body itself is present
192            let is_new_line_carriage_return_ending =
193                *part.body.get(body_length-2).unwrap() == b'\r'
194                    && *part.body.get(body_length-1).unwrap() == b'\n';
195
196            let is_new_line_ending =
197                *part.body.get(body_length-2).unwrap() != b'\r'
198                    && *part.body.get(body_length-1).unwrap() == b'\n';
199
200            if is_new_line_carriage_return_ending {
201                part.body.remove(body_length - 1); // removing \n
202                part.body.remove(body_length - 2); // removing \r
203            }
204
205            if is_new_line_ending {
206                part.body.remove(body_length - 1); // removing \n
207            }
208        }
209
210
211
212        part_list.push(part);
213
214
215        if bytes_read == total_bytes as i128 {
216            return Ok(part_list)
217        }
218
219        FormMultipartData::parse_form_part_recursively(cursor, boundary, bytes_read, total_bytes, part_list)
220    }
221
222    pub fn extract_boundary(content_type: &str) -> Result<String, String> {
223        let boxed_split = content_type.split_once("boundary=");
224        if boxed_split.is_none() {
225            let message = "No boundary found in Content-Type header";
226            return Err(message.to_string())
227        }
228
229
230        let (_, boundary) = boxed_split.unwrap();
231        Ok(boundary.to_string())
232    }
233
234    fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
235        haystack.windows(needle.len()).position(|window| window == needle)
236    }
237
238    pub fn generate_part(part: Part) -> Result<Vec<u8>, String> {
239        if part.headers.len() == 0 {
240            let message = "One of the body parts does not have any header specified. At least Content-Disposition is required";
241            return Err(message.to_string())
242        }
243
244        let mut formatted_header_list : String = "".to_string();
245        for header in part.headers.into_iter() {
246            let formatted = format!("{}{}", header.as_string(), SYMBOL.new_line_carriage_return.to_string());
247            formatted_header_list = [formatted_header_list, formatted].join(SYMBOL.empty_string);
248        }
249
250        let header_body_delimiter = SYMBOL.new_line_carriage_return.to_string();
251
252        let body = part.body;
253
254        let part = [
255            formatted_header_list.as_bytes().to_vec(),
256            header_body_delimiter.as_bytes().to_vec(),
257            body
258        ].join(SYMBOL.empty_string.as_bytes());
259
260        Ok(part)
261    }
262
263    pub fn generate(part_list: Vec<Part>, boundary: &str) -> Result<Vec<u8>, String> {
264        if part_list.len() == 0 {
265            let message = "List of the multipart/form-data request body parts is empty";
266            return Err(message.to_string());
267        }
268
269        let mut bytes = vec![];
270        bytes.push(boundary.as_bytes().to_vec());
271
272        for part in part_list.into_iter() {
273            let boxed_part_as_bytes = FormMultipartData::generate_part(part);
274            if boxed_part_as_bytes.is_err() {
275                let message = boxed_part_as_bytes.err().unwrap();
276                return Err(message);
277            }
278            let part_as_bytes = boxed_part_as_bytes.unwrap();
279            bytes.push(part_as_bytes);
280            bytes.push(boundary.as_bytes().to_vec());
281        }
282
283        let result = bytes.join(SYMBOL.new_line_carriage_return.as_bytes());
284
285        Ok(result)
286    }
287}