Skip to main content

rust_serv/file_upload/
multipart.rs

1//! Multipart form-data parser
2
3/// A single part from multipart form data
4#[derive(Debug, Clone)]
5pub struct MultipartPart {
6    /// Field name
7    pub name: String,
8    /// Filename (if file upload)
9    pub filename: Option<String>,
10    /// Content type
11    pub content_type: String,
12    /// Content data
13    pub data: Vec<u8>,
14}
15
16impl MultipartPart {
17    /// Create a new multipart part
18    pub fn new(name: impl Into<String>, data: Vec<u8>) -> Self {
19        Self {
20            name: name.into(),
21            filename: None,
22            content_type: "application/octet-stream".to_string(),
23            data,
24        }
25    }
26
27    /// Set filename
28    pub fn with_filename(mut self, filename: impl Into<String>) -> Self {
29        self.filename = Some(filename.into());
30        self
31    }
32
33    /// Set content type
34    pub fn with_content_type(mut self, content_type: impl Into<String>) -> Self {
35        self.content_type = content_type.into();
36        self
37    }
38
39    /// Check if this is a file upload
40    pub fn is_file(&self) -> bool {
41        self.filename.is_some()
42    }
43
44    /// Get data size
45    pub fn size(&self) -> usize {
46        self.data.len()
47    }
48}
49
50/// Multipart parser
51#[derive(Debug, Clone)]
52pub struct MultipartParser {
53    /// Boundary string
54    boundary: String,
55}
56
57impl MultipartParser {
58    /// Create a new multipart parser
59    pub fn new(boundary: impl Into<String>) -> Self {
60        Self {
61            boundary: boundary.into(),
62        }
63    }
64
65    /// Extract boundary from Content-Type header
66    pub fn from_content_type(content_type: &str) -> Option<Self> {
67        // Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryXXX
68        let boundary_start = content_type.find("boundary=")?;
69        let boundary_str = &content_type[boundary_start + 9..];
70        
71        // Extract boundary (may be quoted)
72        let boundary = if boundary_str.starts_with('"') {
73            // Quoted boundary - find matching end quote
74            let rest = &boundary_str[1..]; // Skip opening quote
75            if let Some(end_pos) = rest.find('"') {
76                rest[..end_pos].to_string()
77            } else {
78                // No closing quote found, use trimmed value
79                rest.trim().trim_end_matches('"').to_string()
80            }
81        } else {
82            // Unquoted boundary - take until semicolon or end
83            boundary_str.split(';').next()?.trim().to_string()
84        };
85        
86        Some(Self { boundary })
87    }
88
89    /// Get the boundary
90    pub fn boundary(&self) -> &str {
91        &self.boundary
92    }
93
94    /// Parse multipart data
95    pub fn parse(&self, data: &[u8]) -> Result<Vec<MultipartPart>, String> {
96        let mut parts = Vec::new();
97        
98        // Find all boundaries
99        let boundary_bytes = format!("--{}", self.boundary);
100        let mut positions = Vec::new();
101        
102        for i in 0..data.len().saturating_sub(boundary_bytes.len()) {
103            if &data[i..i + boundary_bytes.len()] == boundary_bytes.as_bytes() {
104                positions.push(i);
105            }
106        }
107        
108        // Parse segments between boundaries
109        for i in 0..positions.len() {
110            let start = positions[i] + boundary_bytes.len();
111            let end = if i + 1 < positions.len() {
112                positions[i + 1]
113            } else {
114                // Last segment - find end marker or use data end
115                data.len()
116            };
117            
118            // Skip if this is the end marker
119            let segment_data = &data[start..end];
120            if segment_data.starts_with(b"--") {
121                continue;
122            }
123            
124            if let Some(part) = self.parse_part_bytes(segment_data) {
125                parts.push(part);
126            }
127        }
128        
129        Ok(parts)
130    }
131    
132    /// Parse a single part from bytes
133    fn parse_part_bytes(&self, segment: &[u8]) -> Option<MultipartPart> {
134        // Remove leading/trailing whitespace (CRLF)
135        let segment = if segment.starts_with(b"\r\n") {
136            &segment[2..]
137        } else {
138            segment
139        };
140        
141        // Find header end (double CRLF)
142        let header_end_marker = b"\r\n\r\n";
143        let header_end = segment.windows(header_end_marker.len())
144            .position(|w| w == header_end_marker)?;
145        
146        let headers = &segment[..header_end];
147        let data_start = header_end + 4;
148        let data = &segment[data_start..];
149        
150        // Remove trailing \r\n from data
151        let data = if data.ends_with(b"\r\n") {
152            &data[..data.len() - 2]
153        } else {
154            data
155        };
156        
157        let headers_str = std::str::from_utf8(headers).ok()?;
158        let name = self.extract_header_value(headers_str, "name")?;
159        let filename = self.extract_header_value(headers_str, "filename");
160        let content_type = self.extract_content_type(headers_str);
161        
162        let mut part = MultipartPart::new(name, data.to_vec());
163        
164        if let Some(fname) = filename {
165            part = part.with_filename(fname);
166        }
167        
168        if let Some(ct) = content_type {
169            part = part.with_content_type(ct);
170        }
171        
172        Some(part)
173    }
174
175    /// Extract a value from Content-Disposition header
176    fn extract_header_value(&self, headers: &str, key: &str) -> Option<String> {
177        let search = format!("{}=\"", key);
178        
179        for line in headers.lines() {
180            if line.contains("Content-Disposition") {
181                if let Some(start) = line.find(&search) {
182                    let value_start = start + search.len();
183                    if let Some(end) = line[value_start..].find('"') {
184                        return Some(line[value_start..value_start + end].to_string());
185                    }
186                }
187            }
188        }
189        
190        None
191    }
192
193    /// Extract Content-Type from headers
194    fn extract_content_type(&self, headers: &str) -> Option<String> {
195        for line in headers.lines() {
196            if line.starts_with("Content-Type:") {
197                return Some(line[13..].trim().to_string());
198            }
199        }
200        None
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_multipart_part_creation() {
210        let part = MultipartPart::new("field", vec![1, 2, 3]);
211        assert_eq!(part.name, "field");
212        assert_eq!(part.data, vec![1, 2, 3]);
213        assert_eq!(part.content_type, "application/octet-stream");
214    }
215
216    #[test]
217    fn test_multipart_part_with_filename() {
218        let part = MultipartPart::new("file", vec![])
219            .with_filename("test.txt");
220        
221        assert_eq!(part.filename, Some("test.txt".to_string()));
222        assert!(part.is_file());
223    }
224
225    #[test]
226    fn test_multipart_part_with_content_type() {
227        let part = MultipartPart::new("text", vec![])
228            .with_content_type("text/plain");
229        
230        assert_eq!(part.content_type, "text/plain");
231    }
232
233    #[test]
234    fn test_multipart_part_is_file() {
235        let file_part = MultipartPart::new("file", vec![])
236            .with_filename("test.txt");
237        assert!(file_part.is_file());
238        
239        let field_part = MultipartPart::new("field", vec![]);
240        assert!(!field_part.is_file());
241    }
242
243    #[test]
244    fn test_multipart_part_size() {
245        let part = MultipartPart::new("data", vec![1, 2, 3, 4, 5]);
246        assert_eq!(part.size(), 5);
247    }
248
249    #[test]
250    fn test_multipart_parser_creation() {
251        let parser = MultipartParser::new("----WebKitFormBoundary");
252        assert_eq!(parser.boundary(), "----WebKitFormBoundary");
253    }
254
255    #[test]
256    fn test_from_content_type() {
257        let content_type = "multipart/form-data; boundary=----WebKitFormBoundary";
258        let parser = MultipartParser::from_content_type(content_type);
259        
260        assert!(parser.is_some());
261        assert_eq!(parser.unwrap().boundary(), "----WebKitFormBoundary");
262    }
263
264    #[test]
265    fn test_from_content_type_quoted() {
266        let content_type = "multipart/form-data; boundary=\"----WebKitFormBoundary\"";
267        let parser = MultipartParser::from_content_type(content_type);
268        
269        assert!(parser.is_some());
270        assert_eq!(parser.unwrap().boundary(), "----WebKitFormBoundary");
271    }
272
273    #[test]
274    fn test_from_content_type_no_boundary() {
275        let content_type = "multipart/form-data";
276        let parser = MultipartParser::from_content_type(content_type);
277        
278        assert!(parser.is_none());
279    }
280
281    #[test]
282    fn test_from_content_type_invalid() {
283        let content_type = "application/json";
284        let parser = MultipartParser::from_content_type(content_type);
285        
286        assert!(parser.is_none());
287    }
288
289    #[test]
290    fn test_parse_simple() {
291        let parser = MultipartParser::new("boundary");
292        let data = b"--boundary\r\n\
293            Content-Disposition: form-data; name=\"field\"\r\n\
294            \r\n\
295            value\r\n\
296            --boundary--";
297        
298        let parts = parser.parse(data).unwrap();
299        
300        assert_eq!(parts.len(), 1);
301        assert_eq!(parts[0].name, "field");
302        assert_eq!(parts[0].data, b"value");
303    }
304
305    #[test]
306    fn test_parse_file() {
307        let parser = MultipartParser::new("boundary");
308        let data = b"--boundary\r\n\
309            Content-Disposition: form-data; name=\"file\"; filename=\"test.txt\"\r\n\
310            Content-Type: text/plain\r\n\
311            \r\n\
312            Hello World\r\n\
313            --boundary--";
314        
315        let parts = parser.parse(data).unwrap();
316        
317        assert_eq!(parts.len(), 1);
318        assert_eq!(parts[0].name, "file");
319        assert_eq!(parts[0].filename, Some("test.txt".to_string()));
320        assert_eq!(parts[0].content_type, "text/plain");
321        assert!(parts[0].is_file());
322    }
323
324    #[test]
325    fn test_parse_multiple_parts() {
326        let parser = MultipartParser::new("boundary");
327        let data = b"--boundary\r\n\
328            Content-Disposition: form-data; name=\"field1\"\r\n\
329            \r\n\
330            value1\r\n\
331            --boundary\r\n\
332            Content-Disposition: form-data; name=\"field2\"\r\n\
333            \r\n\
334            value2\r\n\
335            --boundary--";
336        
337        let parts = parser.parse(data).unwrap();
338        
339        assert_eq!(parts.len(), 2);
340        assert_eq!(parts[0].name, "field1");
341        assert_eq!(parts[1].name, "field2");
342    }
343
344    #[test]
345    fn test_parse_binary_data() {
346        let parser = MultipartParser::new("boundary");
347        let data = b"--boundary\r\n\
348            Content-Disposition: form-data; name=\"file\"; filename=\"test.bin\"\r\n\
349            Content-Type: application/octet-stream\r\n\
350            \r\n\
351            \x00\x01\x02\x03\xff\r\n\
352            --boundary--";
353        
354        let parts = parser.parse(data).unwrap();
355        
356        assert_eq!(parts.len(), 1);
357        assert_eq!(parts[0].data, b"\x00\x01\x02\x03\xff");
358    }
359
360    #[test]
361    fn test_parse_empty_field() {
362        let parser = MultipartParser::new("boundary");
363        let data = b"--boundary\r\n\
364            Content-Disposition: form-data; name=\"empty\"\r\n\
365            \r\n\
366            \r\n\
367            --boundary--";
368        
369        let parts = parser.parse(data).unwrap();
370        
371        assert_eq!(parts.len(), 1);
372        assert!(parts[0].data.is_empty());
373    }
374}