Skip to main content

ferro_rs/http/
multipart.rs

1//! Multipart/form-data parsing utilities for HTTP requests.
2//!
3//! Provides `MultipartForm` and `UploadedFile` types, the `parse_multipart_body`
4//! helper, and the `validate_mime` / `validate_size` free functions used by
5//! handlers to accept file uploads. Mirrors the body-parsing pattern in
6//! `body.rs` but operates on the raw `hyper::body::Incoming` stream so the
7//! `multer` crate can iterate fields without buffering the full request body.
8
9use crate::error::FrameworkError;
10use bytes::Bytes;
11use ferro_storage::{Disk, PutOptions};
12use futures_util::StreamExt;
13use http_body_util::BodyStream;
14use hyper::body::Incoming;
15use std::collections::HashMap;
16use std::path::Path;
17
18/// A single uploaded file extracted from a multipart/form-data request.
19#[derive(Debug, Clone)]
20pub struct UploadedFile {
21    /// Name of the form field this file was attached to (e.g. `"avatar"`).
22    pub field_name: String,
23    /// Original filename from the part's `Content-Disposition` header, if present.
24    pub file_name: Option<String>,
25    /// MIME type from the part headers, if present.
26    pub content_type: Option<String>,
27    /// Buffered file content.
28    pub bytes: Bytes,
29}
30
31impl UploadedFile {
32    /// Size of the uploaded payload in bytes.
33    pub fn size(&self) -> usize {
34        self.bytes.len()
35    }
36
37    /// File extension derived from `file_name` via `std::path::Path::extension()`.
38    ///
39    /// Returns `None` when `file_name` is `None` or has no extension.
40    pub fn extension(&self) -> Option<&str> {
41        self.file_name
42            .as_deref()
43            .and_then(|n| Path::new(n).extension())
44            .and_then(|e| e.to_str())
45    }
46
47    /// `true` if `content_type` is present and starts with `"image/"`.
48    pub fn is_image(&self) -> bool {
49        self.content_type
50            .as_deref()
51            .map(|ct| ct.starts_with("image/"))
52            .unwrap_or(false)
53    }
54
55    /// Persist the buffered bytes to the given storage disk.
56    ///
57    /// The content type stored alongside the object defaults to
58    /// `"application/octet-stream"` when this file has no declared MIME type.
59    /// The caller is responsible for selecting the disk via
60    /// `storage.disk("public")?` (or another configured name).
61    ///
62    /// # Security
63    ///
64    /// `path` is passed verbatim to the storage driver. Callers MUST sanitize
65    /// any user-supplied component (e.g. `self.file_name`) before constructing
66    /// the path — this method does not perform path-traversal checks.
67    pub async fn store(&self, disk: &Disk, path: &str) -> Result<(), ferro_storage::Error> {
68        let opts = PutOptions::new().content_type(
69            self.content_type
70                .as_deref()
71                .unwrap_or("application/octet-stream"),
72        );
73        disk.put_with_options(path, self.bytes.clone(), opts).await
74    }
75}
76
77/// A parsed multipart/form-data body.
78///
79/// Holds every file part keyed by form field name as well as every text part.
80#[derive(Debug)]
81pub struct MultipartForm {
82    pub(crate) files_map: HashMap<String, Vec<UploadedFile>>,
83    pub(crate) text_fields: HashMap<String, String>,
84}
85
86impl MultipartForm {
87    /// First file uploaded under `field`, if any.
88    pub fn file(&self, field: &str) -> Option<&UploadedFile> {
89        self.files_map.get(field).and_then(|v| v.first())
90    }
91
92    /// All files uploaded under `field`. Returns an empty slice if the field
93    /// is absent.
94    pub fn files(&self, field: &str) -> &[UploadedFile] {
95        self.files_map
96            .get(field)
97            .map(|v| v.as_slice())
98            .unwrap_or(&[])
99    }
100
101    /// Value of the text field `name`, if present.
102    pub fn field(&self, name: &str) -> Option<&str> {
103        self.text_fields.get(name).map(|s| s.as_str())
104    }
105
106    /// All text fields keyed by name.
107    pub fn fields(&self) -> &HashMap<String, String> {
108        &self.text_fields
109    }
110}
111
112/// Parse a `hyper::body::Incoming` request body as multipart/form-data.
113///
114/// This is the low-level entry point. `Request::multipart()` and
115/// `Request::file()` (added in plan 02) are the public-facing wrappers.
116///
117/// Bridges `Incoming` (which does not implement `futures::Stream` in
118/// hyper 1.x) to the stream interface multer expects via
119/// `http_body_util::BodyStream` + `StreamExt::filter_map`.
120pub(crate) async fn parse_multipart_body(
121    body: Incoming,
122    content_type: &str,
123    max_file_bytes: u64,
124    max_fields: usize,
125) -> Result<MultipartForm, FrameworkError> {
126    let boundary = multer::parse_boundary(content_type).map_err(|_| {
127        FrameworkError::domain(
128            "Content-Type is not multipart/form-data or missing boundary",
129            400,
130        )
131    })?;
132
133    let body_stream = BodyStream::new(body)
134        .filter_map(|result| async move { result.map(|frame| frame.into_data().ok()).transpose() });
135
136    let constraints =
137        multer::Constraints::new().size_limit(multer::SizeLimit::new().per_field(max_file_bytes));
138
139    let mut multipart = multer::Multipart::with_constraints(body_stream, boundary, constraints);
140
141    let mut files_map: HashMap<String, Vec<UploadedFile>> = HashMap::new();
142    let mut text_fields: HashMap<String, String> = HashMap::new();
143    let mut field_count: usize = 0;
144
145    while let Some(field) = multipart
146        .next_field()
147        .await
148        .map_err(|e| FrameworkError::internal(format!("Multipart parse error: {e}")))?
149    {
150        field_count += 1;
151        if field_count > max_fields {
152            return Err(FrameworkError::domain(
153                "Too many fields in multipart request",
154                400,
155            ));
156        }
157
158        let field_name = field.name().map(|s| s.to_string()).unwrap_or_default();
159        let file_name = field.file_name().map(|s| s.to_string());
160        let content_type = field.content_type().map(|m| m.to_string());
161        let bytes = field.bytes().await.map_err(|e| match e {
162            multer::Error::FieldSizeExceeded { .. } | multer::Error::StreamSizeExceeded { .. } => {
163                FrameworkError::domain("Upload field exceeds maximum size", 413)
164            }
165            _ => FrameworkError::internal(format!("Field read error: {e}")),
166        })?;
167
168        if file_name.is_some() {
169            files_map
170                .entry(field_name.clone())
171                .or_default()
172                .push(UploadedFile {
173                    field_name,
174                    file_name,
175                    content_type,
176                    bytes,
177                });
178        } else {
179            let value = String::from_utf8(bytes.to_vec()).map_err(|_| {
180                FrameworkError::internal("Multipart text field contains invalid UTF-8")
181            })?;
182            text_fields.insert(field_name, value);
183        }
184    }
185
186    Ok(MultipartForm {
187        files_map,
188        text_fields,
189    })
190}
191
192/// Reject the file if its declared MIME type is not in `allowed`.
193///
194/// A file with no `content_type` is treated as the empty string and will
195/// only pass if `allowed` contains `""` — which is never useful, so callers
196/// should treat content_type-less uploads as rejections.
197///
198/// **Security note:** this check is based solely on the client-supplied
199/// `Content-Type` header inside the multipart part, which can be forged.
200/// For security-sensitive contexts, validate the actual file magic bytes
201/// (e.g. with the `infer` crate) in addition to this check.
202pub fn validate_mime(file: &UploadedFile, allowed: &[&str]) -> Result<(), FrameworkError> {
203    let ct = file.content_type.as_deref().unwrap_or("");
204    if allowed.contains(&ct) {
205        Ok(())
206    } else {
207        Err(FrameworkError::domain(
208            format!(
209                "File type '{ct}' is not allowed; accepted: {}",
210                allowed.join(", ")
211            ),
212            422,
213        ))
214    }
215}
216
217/// Reject the file if `file.size() > max_bytes`.
218pub fn validate_size(file: &UploadedFile, max_bytes: usize) -> Result<(), FrameworkError> {
219    if file.size() <= max_bytes {
220        Ok(())
221    } else {
222        Err(FrameworkError::domain(
223            format!("File too large: {} bytes (max {max_bytes})", file.size()),
224            422,
225        ))
226    }
227}
228
229/// Read the per-field byte limit from `UPLOAD_MAX_SIZE_MB` (default 10 MiB).
230///
231/// The env var is interpreted in mebibytes (MiB). Values of 0 are clamped to
232/// 1 MiB so a misconfigured operator setting does not silently reject every
233/// upload without a clear error.
234pub(crate) fn max_file_bytes() -> u64 {
235    let mb = std::env::var("UPLOAD_MAX_SIZE_MB")
236        .ok()
237        .and_then(|v| v.parse::<u64>().ok())
238        .unwrap_or(10);
239    mb.max(1) * 1024 * 1024
240}
241
242/// Read the per-request field limit from `UPLOAD_MAX_FIELDS` (default 100).
243pub(crate) fn max_fields() -> usize {
244    std::env::var("UPLOAD_MAX_FIELDS")
245        .ok()
246        .and_then(|v| v.parse::<usize>().ok())
247        .unwrap_or(100)
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use bytes::Bytes;
254    use http_body_util::{BodyStream, Full};
255
256    /// Build a raw multipart/form-data body and matching Content-Type value.
257    ///
258    /// Each part is `(name, value, filename)`. `Some(filename)` produces a
259    /// file part (Content-Disposition includes `filename="..."` and the bytes
260    /// of `value` are placed in the part body); `None` produces a text part.
261    fn make_multipart_body(
262        boundary: &str,
263        parts: &[(&str, &[u8], Option<&str>)],
264    ) -> (Bytes, String) {
265        let ct = format!("multipart/form-data; boundary={boundary}");
266        let mut body: Vec<u8> = Vec::new();
267        for (name, value, filename) in parts {
268            body.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
269            match filename {
270                Some(fname) => body.extend_from_slice(
271                    format!(
272                        "Content-Disposition: form-data; name=\"{name}\"; filename=\"{fname}\"\r\nContent-Type: application/octet-stream\r\n\r\n"
273                    )
274                    .as_bytes(),
275                ),
276                None => body.extend_from_slice(
277                    format!("Content-Disposition: form-data; name=\"{name}\"\r\n\r\n")
278                        .as_bytes(),
279                ),
280            }
281            body.extend_from_slice(value);
282            body.extend_from_slice(b"\r\n");
283        }
284        body.extend_from_slice(format!("--{boundary}--\r\n").as_bytes());
285        (Bytes::from(body), ct)
286    }
287
288    /// Mirror of `parse_multipart_body` that accepts an in-memory body so
289    /// tests don't need a live `hyper::body::Incoming`.
290    async fn parse_for_test(
291        raw: Bytes,
292        content_type: &str,
293        max_bytes: u64,
294        max_fields_cap: usize,
295    ) -> Result<MultipartForm, FrameworkError> {
296        let boundary = multer::parse_boundary(content_type).map_err(|_| {
297            FrameworkError::internal("Content-Type is not multipart/form-data or missing boundary")
298        })?;
299
300        let body = Full::new(raw);
301        let stream = BodyStream::new(body).filter_map(|result| async move {
302            result.map(|frame| frame.into_data().ok()).transpose()
303        });
304
305        let constraints =
306            multer::Constraints::new().size_limit(multer::SizeLimit::new().per_field(max_bytes));
307
308        let mut multipart = multer::Multipart::with_constraints(stream, boundary, constraints);
309
310        let mut files_map: HashMap<String, Vec<UploadedFile>> = HashMap::new();
311        let mut text_fields: HashMap<String, String> = HashMap::new();
312        let mut field_count: usize = 0;
313
314        while let Some(field) = multipart
315            .next_field()
316            .await
317            .map_err(|e| FrameworkError::internal(format!("Multipart parse error: {e}")))?
318        {
319            field_count += 1;
320            if field_count > max_fields_cap {
321                return Err(FrameworkError::internal(
322                    "Too many fields in multipart request",
323                ));
324            }
325
326            let field_name = field.name().map(|s| s.to_string()).unwrap_or_default();
327            let file_name = field.file_name().map(|s| s.to_string());
328            let content_type = field.content_type().map(|m| m.to_string());
329            let bytes = field
330                .bytes()
331                .await
332                .map_err(|e| FrameworkError::internal(format!("Field read error: {e}")))?;
333
334            if file_name.is_some() {
335                files_map
336                    .entry(field_name.clone())
337                    .or_default()
338                    .push(UploadedFile {
339                        field_name,
340                        file_name,
341                        content_type,
342                        bytes,
343                    });
344            } else {
345                let value = String::from_utf8(bytes.to_vec()).map_err(|_| {
346                    FrameworkError::internal("Multipart text field contains invalid UTF-8")
347                })?;
348                text_fields.insert(field_name, value);
349            }
350        }
351
352        Ok(MultipartForm {
353            files_map,
354            text_fields,
355        })
356    }
357
358    // D-03 / D-04: parsing + accessors
359
360    #[tokio::test]
361    async fn multipart_parses_fields() {
362        let (raw, ct) = make_multipart_body(
363            "BOUNDARY",
364            &[
365                ("title", b"hello", None),
366                ("avatar", b"\x89PNG\r\n\x1a\n", Some("avatar.png")),
367            ],
368        );
369        let form = parse_for_test(raw, &ct, 10 * 1024 * 1024, 100)
370            .await
371            .expect("parses");
372        assert_eq!(form.field("title"), Some("hello"));
373        let file = form.file("avatar").expect("avatar present");
374        assert_eq!(file.field_name, "avatar");
375        assert_eq!(file.file_name.as_deref(), Some("avatar.png"));
376        assert_eq!(file.bytes.as_ref(), b"\x89PNG\r\n\x1a\n");
377    }
378
379    #[tokio::test]
380    async fn multipart_form_accessors() {
381        let (raw, ct) = make_multipart_body(
382            "B",
383            &[
384                ("photos", b"AAA", Some("a.jpg")),
385                ("photos", b"BBB", Some("b.jpg")),
386                ("caption", b"two photos", None),
387            ],
388        );
389        let form = parse_for_test(raw, &ct, 10 * 1024 * 1024, 100)
390            .await
391            .expect("parses");
392        assert_eq!(form.file("photos").unwrap().bytes.as_ref(), b"AAA");
393        assert_eq!(form.files("photos").len(), 2);
394        assert_eq!(form.files("photos")[1].bytes.as_ref(), b"BBB");
395        assert!(form.files("absent").is_empty());
396        assert!(form.file("absent").is_none());
397        assert_eq!(form.field("caption"), Some("two photos"));
398        assert_eq!(form.fields().len(), 1);
399    }
400
401    // D-07: UploadedFile fields populated
402
403    #[tokio::test]
404    async fn uploaded_file_fields() {
405        let (raw, ct) = make_multipart_body("B", &[("doc", b"PDFDATA", Some("report.pdf"))]);
406        let form = parse_for_test(raw, &ct, 1024, 100).await.expect("parses");
407        let file = form.file("doc").expect("present");
408        assert_eq!(file.field_name, "doc");
409        assert_eq!(file.file_name.as_deref(), Some("report.pdf"));
410        assert_eq!(
411            file.content_type.as_deref(),
412            Some("application/octet-stream")
413        );
414        assert_eq!(file.bytes.len(), b"PDFDATA".len());
415    }
416
417    // D-08: UploadedFile method coverage
418
419    #[test]
420    fn uploaded_file_size_returns_byte_len() {
421        let f = UploadedFile {
422            field_name: "f".into(),
423            file_name: None,
424            content_type: None,
425            bytes: Bytes::from_static(b"12345"),
426        };
427        assert_eq!(f.size(), 5);
428    }
429
430    #[test]
431    fn extension_from_filename() {
432        let with_ext = UploadedFile {
433            field_name: "f".into(),
434            file_name: Some("avatar.png".into()),
435            content_type: None,
436            bytes: Bytes::new(),
437        };
438        let no_ext = UploadedFile {
439            field_name: "f".into(),
440            file_name: Some("noext".into()),
441            content_type: None,
442            bytes: Bytes::new(),
443        };
444        let none = UploadedFile {
445            field_name: "f".into(),
446            file_name: None,
447            content_type: None,
448            bytes: Bytes::new(),
449        };
450        assert_eq!(with_ext.extension(), Some("png"));
451        assert_eq!(no_ext.extension(), None);
452        assert_eq!(none.extension(), None);
453    }
454
455    #[test]
456    fn is_image_true_false() {
457        let img = UploadedFile {
458            field_name: "f".into(),
459            file_name: None,
460            content_type: Some("image/jpeg".into()),
461            bytes: Bytes::new(),
462        };
463        let pdf = UploadedFile {
464            field_name: "f".into(),
465            file_name: None,
466            content_type: Some("application/pdf".into()),
467            bytes: Bytes::new(),
468        };
469        let none = UploadedFile {
470            field_name: "f".into(),
471            file_name: None,
472            content_type: None,
473            bytes: Bytes::new(),
474        };
475        assert!(img.is_image());
476        assert!(!pdf.is_image());
477        assert!(!none.is_image());
478    }
479
480    // D-18: missing/wrong Content-Type
481
482    #[tokio::test]
483    async fn multipart_missing_boundary() {
484        let raw = Bytes::from_static(b"irrelevant");
485        let err = parse_for_test(raw, "application/json", 1024, 100)
486            .await
487            .expect_err("must error");
488        let msg = format!("{err}");
489        assert!(
490            msg.contains("Content-Type is not multipart/form-data or missing boundary"),
491            "unexpected error message: {msg}"
492        );
493    }
494
495    // D-12: per-field size limit
496
497    #[tokio::test]
498    async fn multipart_size_limit_rejects_oversized_field() {
499        let big = vec![b'A'; 50];
500        let (raw, ct) = make_multipart_body("B", &[("blob", &big, Some("big.bin"))]);
501        let err = parse_for_test(raw, &ct, 10, 100)
502            .await
503            .expect_err("oversized must error");
504        let msg = format!("{err}");
505        assert!(
506            msg.contains("Multipart parse error") || msg.contains("Field read error"),
507            "expected size-limit error from multer, got: {msg}"
508        );
509    }
510
511    // D-13: per-request field count limit
512
513    #[tokio::test]
514    async fn multipart_max_fields_rejects_excess() {
515        let (raw, ct) = make_multipart_body(
516            "B",
517            &[("a", b"1", None), ("b", b"2", None), ("c", b"3", None)],
518        );
519        let err = parse_for_test(raw, &ct, 1024, 2)
520            .await
521            .expect_err("must reject excess fields");
522        let msg = format!("{err}");
523        assert!(
524            msg.contains("Too many fields in multipart request"),
525            "unexpected error message: {msg}"
526        );
527    }
528
529    // D-14: validation helpers
530
531    #[test]
532    fn validate_mime_accepts_allowed() {
533        let f = UploadedFile {
534            field_name: "f".into(),
535            file_name: None,
536            content_type: Some("image/png".into()),
537            bytes: Bytes::new(),
538        };
539        validate_mime(&f, &["image/png", "image/jpeg"]).expect("png is allowed");
540    }
541
542    #[test]
543    fn validate_mime_rejects_disallowed() {
544        let f = UploadedFile {
545            field_name: "f".into(),
546            file_name: None,
547            content_type: Some("application/x-msdownload".into()),
548            bytes: Bytes::new(),
549        };
550        let err = validate_mime(&f, &["image/png"]).expect_err("must reject exe");
551        let msg = format!("{err}");
552        assert!(msg.contains("application/x-msdownload"));
553        assert!(msg.contains("image/png"));
554    }
555
556    #[test]
557    fn validate_size_accepts_within_cap() {
558        let f = UploadedFile {
559            field_name: "f".into(),
560            file_name: None,
561            content_type: None,
562            bytes: Bytes::from_static(b"hello"),
563        };
564        validate_size(&f, 10).expect("5 bytes is within 10");
565    }
566
567    #[test]
568    fn validate_size_rejects_over_cap() {
569        let f = UploadedFile {
570            field_name: "f".into(),
571            file_name: None,
572            content_type: None,
573            bytes: Bytes::from_static(b"hello world!!"),
574        };
575        let err = validate_size(&f, 5).expect_err("13 > 5");
576        let msg = format!("{err}");
577        assert!(msg.contains("13 bytes"));
578        assert!(msg.contains("max 5"));
579    }
580
581    // Killer-feature integration: UploadedFile::store() wires to ferro-storage
582
583    #[tokio::test]
584    async fn store_to_memory_disk() {
585        use ferro_storage::{DiskConfig, Storage};
586
587        let storage = Storage::with_config("mem", vec![("mem", DiskConfig::memory())]);
588        let disk = storage.disk("mem").expect("memory disk exists");
589
590        let file = UploadedFile {
591            field_name: "avatar".into(),
592            file_name: Some("photo.png".into()),
593            content_type: Some("image/png".into()),
594            bytes: Bytes::from_static(b"\x89PNG\r\n\x1a\n"),
595        };
596
597        file.store(&disk, "uploads/photo.png")
598            .await
599            .expect("store succeeds");
600
601        let stored = disk
602            .get("uploads/photo.png")
603            .await
604            .expect("file readable after store");
605        assert_eq!(stored.as_ref(), b"\x89PNG\r\n\x1a\n");
606        assert!(disk.exists("uploads/photo.png").await.unwrap());
607    }
608}