Skip to main content

ferro_rs/http/
multipart.rs

1//! Multipart/form-data parsing utilities for HTTP requests.
2//!
3//! Provides `MultipartForm` and `UploadedFile` types, the `parse_multipart_body`
4//! helper, and the `validate_mime` / `validate_size` free functions used by
5//! handlers to accept file uploads. Mirrors the body-parsing pattern in
6//! `body.rs` but operates on the raw `hyper::body::Incoming` stream so the
7//! `multer` crate can iterate fields without buffering the full request body.
8
9use crate::error::FrameworkError;
10use bytes::Bytes;
11use ferro_storage::{Disk, PutOptions};
12use futures_util::StreamExt;
13use http_body_util::BodyStream;
14use hyper::body::Incoming;
15use std::collections::HashMap;
16use std::path::Path;
17
18/// A single uploaded file extracted from a multipart/form-data request.
19#[derive(Debug, Clone)]
20pub struct UploadedFile {
21    /// Name of the form field this file was attached to (e.g. `"avatar"`).
22    pub field_name: String,
23    /// Original filename from the part's `Content-Disposition` header, if present.
24    pub file_name: Option<String>,
25    /// MIME type from the part headers, if present.
26    pub content_type: Option<String>,
27    /// Buffered file content.
28    pub bytes: Bytes,
29}
30
31impl UploadedFile {
32    /// Size of the uploaded payload in bytes.
33    pub fn size(&self) -> usize {
34        self.bytes.len()
35    }
36
37    /// File extension derived from `file_name` via `std::path::Path::extension()`.
38    ///
39    /// Returns `None` when `file_name` is `None` or has no extension.
40    pub fn extension(&self) -> Option<&str> {
41        self.file_name
42            .as_deref()
43            .and_then(|n| Path::new(n).extension())
44            .and_then(|e| e.to_str())
45    }
46
47    /// `true` if `content_type` is present and starts with `"image/"`.
48    pub fn is_image(&self) -> bool {
49        self.content_type
50            .as_deref()
51            .map(|ct| ct.starts_with("image/"))
52            .unwrap_or(false)
53    }
54
55    /// Persist the buffered bytes to the given storage disk.
56    ///
57    /// The content type stored alongside the object defaults to
58    /// `"application/octet-stream"` when this file has no declared MIME type.
59    /// The caller is responsible for selecting the disk via
60    /// `storage.disk("public")?` (or another configured name).
61    ///
62    /// # Security
63    ///
64    /// `path` is passed verbatim to the storage driver. Callers MUST sanitize
65    /// any user-supplied component (e.g. `self.file_name`) before constructing
66    /// the path — this method does not perform path-traversal checks.
67    pub async fn store(&self, disk: &Disk, path: &str) -> Result<(), ferro_storage::Error> {
68        let opts = PutOptions::new().content_type(
69            self.content_type
70                .as_deref()
71                .unwrap_or("application/octet-stream"),
72        );
73        disk.put_with_options(path, self.bytes.clone(), opts).await
74    }
75}
76
77/// A parsed multipart/form-data body.
78///
79/// Holds every file part keyed by form field name as well as every text part.
80#[derive(Debug)]
81pub struct MultipartForm {
82    pub(crate) files_map: HashMap<String, Vec<UploadedFile>>,
83    pub(crate) text_fields: HashMap<String, String>,
84}
85
86impl MultipartForm {
87    /// First file uploaded under `field`, if any.
88    pub fn file(&self, field: &str) -> Option<&UploadedFile> {
89        self.files_map.get(field).and_then(|v| v.first())
90    }
91
92    /// All files uploaded under `field`. Returns an empty slice if the field
93    /// is absent.
94    pub fn files(&self, field: &str) -> &[UploadedFile] {
95        self.files_map
96            .get(field)
97            .map(|v| v.as_slice())
98            .unwrap_or(&[])
99    }
100
101    /// Value of the text field `name`, if present.
102    pub fn field(&self, name: &str) -> Option<&str> {
103        self.text_fields.get(name).map(|s| s.as_str())
104    }
105
106    /// All text fields keyed by name.
107    pub fn fields(&self) -> &HashMap<String, String> {
108        &self.text_fields
109    }
110}
111
112/// Parse a `hyper::body::Incoming` request body as multipart/form-data.
113///
114/// This is the low-level entry point. `Request::multipart()` and
115/// `Request::file()` (added in plan 02) are the public-facing wrappers.
116///
117/// Bridges `Incoming` (which does not implement `futures::Stream` in
118/// hyper 1.x) to the stream interface multer expects via
119/// `http_body_util::BodyStream` + `StreamExt::filter_map`.
120pub(crate) async fn parse_multipart_body(
121    body: Incoming,
122    content_type: &str,
123    max_file_bytes: u64,
124    max_fields: usize,
125) -> Result<MultipartForm, FrameworkError> {
126    let boundary = parse_boundary(content_type)?;
127
128    let body_stream = BodyStream::new(body)
129        .filter_map(|result| async move { result.map(|frame| frame.into_data().ok()).transpose() });
130
131    let constraints =
132        multer::Constraints::new().size_limit(multer::SizeLimit::new().per_field(max_file_bytes));
133
134    let multipart = multer::Multipart::with_constraints(body_stream, boundary, constraints);
135    drain_multipart(multipart, max_fields).await
136}
137
138/// Parse cached multipart bytes — used by `Request::multipart_mut` and
139/// `Request::file_mut` to re-parse a body that was already collected to memory
140/// by `body_bytes_mut`. Mirrors `parse_multipart_body` but takes `Bytes`
141/// instead of a streaming `Incoming` body. Both call into `drain_multipart`
142/// for the actual field-by-field iteration.
143pub(crate) async fn parse_multipart_bytes(
144    bytes: Bytes,
145    content_type: &str,
146    max_file_bytes: u64,
147    max_fields: usize,
148) -> Result<MultipartForm, FrameworkError> {
149    let boundary = parse_boundary(content_type)?;
150
151    // Single-chunk stream over the cached bytes — multer accepts any stream of
152    // `Result<Bytes, _>`. `stream::once` keeps the API surface identical to the
153    // Incoming path so `drain_multipart` doesn't need to know which constructor
154    // it was called with.
155    let body_stream = futures_util::stream::once(async move { Ok::<_, std::io::Error>(bytes) });
156
157    let constraints =
158        multer::Constraints::new().size_limit(multer::SizeLimit::new().per_field(max_file_bytes));
159
160    let multipart = multer::Multipart::with_constraints(body_stream, boundary, constraints);
161    drain_multipart(multipart, max_fields).await
162}
163
164/// Shared boundary extraction used by both `parse_multipart_body` and
165/// `parse_multipart_bytes`. Returns the same `FrameworkError::domain(400)`
166/// shape callers already match against, keeping their behaviour identical.
167fn parse_boundary(content_type: &str) -> Result<String, FrameworkError> {
168    multer::parse_boundary(content_type).map_err(|_| {
169        FrameworkError::domain(
170            "Content-Type is not multipart/form-data or missing boundary",
171            400,
172        )
173    })
174}
175
176/// Iterate every field on a constructed `multer::Multipart`, separating files
177/// from text fields. Extracted so both stream variants (`Incoming` and cached
178/// `Bytes`) share one implementation. `Multipart<'_>` erases its inner stream
179/// type, so the function is non-generic — both callers' stream types fit.
180async fn drain_multipart(
181    mut multipart: multer::Multipart<'_>,
182    max_fields: usize,
183) -> Result<MultipartForm, FrameworkError> {
184    let mut files_map: HashMap<String, Vec<UploadedFile>> = HashMap::new();
185    let mut text_fields: HashMap<String, String> = HashMap::new();
186    let mut field_count: usize = 0;
187
188    while let Some(field) = multipart
189        .next_field()
190        .await
191        .map_err(|e| FrameworkError::internal(format!("Multipart parse error: {e}")))?
192    {
193        field_count += 1;
194        if field_count > max_fields {
195            return Err(FrameworkError::domain(
196                "Too many fields in multipart request",
197                400,
198            ));
199        }
200
201        let field_name = field.name().map(|s| s.to_string()).unwrap_or_default();
202        let file_name = field.file_name().map(|s| s.to_string());
203        let content_type = field.content_type().map(|m| m.to_string());
204        let bytes = field.bytes().await.map_err(|e| match e {
205            multer::Error::FieldSizeExceeded { .. } | multer::Error::StreamSizeExceeded { .. } => {
206                FrameworkError::domain("Upload field exceeds maximum size", 413)
207            }
208            _ => FrameworkError::internal(format!("Field read error: {e}")),
209        })?;
210
211        if file_name.is_some() {
212            files_map
213                .entry(field_name.clone())
214                .or_default()
215                .push(UploadedFile {
216                    field_name,
217                    file_name,
218                    content_type,
219                    bytes,
220                });
221        } else {
222            let value = String::from_utf8(bytes.to_vec()).map_err(|_| {
223                FrameworkError::internal("Multipart text field contains invalid UTF-8")
224            })?;
225            text_fields.insert(field_name, value);
226        }
227    }
228
229    Ok(MultipartForm {
230        files_map,
231        text_fields,
232    })
233}
234
235/// Reject the file if its declared MIME type is not in `allowed`.
236///
237/// A file with no `content_type` is treated as the empty string and will
238/// only pass if `allowed` contains `""` — which is never useful, so callers
239/// should treat content_type-less uploads as rejections.
240///
241/// **Security note:** this check is based solely on the client-supplied
242/// `Content-Type` header inside the multipart part, which can be forged.
243/// For security-sensitive contexts, validate the actual file magic bytes
244/// (e.g. with the `infer` crate) in addition to this check.
245pub fn validate_mime(file: &UploadedFile, allowed: &[&str]) -> Result<(), FrameworkError> {
246    let ct = file.content_type.as_deref().unwrap_or("");
247    if allowed.contains(&ct) {
248        Ok(())
249    } else {
250        Err(FrameworkError::domain(
251            format!(
252                "File type '{ct}' is not allowed; accepted: {}",
253                allowed.join(", ")
254            ),
255            422,
256        ))
257    }
258}
259
260/// Reject the file if `file.size() > max_bytes`.
261pub fn validate_size(file: &UploadedFile, max_bytes: usize) -> Result<(), FrameworkError> {
262    if file.size() <= max_bytes {
263        Ok(())
264    } else {
265        Err(FrameworkError::domain(
266            format!("File too large: {} bytes (max {max_bytes})", file.size()),
267            422,
268        ))
269    }
270}
271
272/// Read the per-field byte limit from `UPLOAD_MAX_SIZE_MB` (default 10 MiB).
273///
274/// The env var is interpreted in mebibytes (MiB). Values of 0 are clamped to
275/// 1 MiB so a misconfigured operator setting does not silently reject every
276/// upload without a clear error.
277pub(crate) fn max_file_bytes() -> u64 {
278    let mb = std::env::var("UPLOAD_MAX_SIZE_MB")
279        .ok()
280        .and_then(|v| v.parse::<u64>().ok())
281        .unwrap_or(10);
282    mb.max(1) * 1024 * 1024
283}
284
285/// Read the per-request field limit from `UPLOAD_MAX_FIELDS` (default 100).
286pub(crate) fn max_fields() -> usize {
287    std::env::var("UPLOAD_MAX_FIELDS")
288        .ok()
289        .and_then(|v| v.parse::<usize>().ok())
290        .unwrap_or(100)
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296    use bytes::Bytes;
297    use http_body_util::{BodyStream, Full};
298
299    /// Build a raw multipart/form-data body and matching Content-Type value.
300    ///
301    /// Each part is `(name, value, filename)`. `Some(filename)` produces a
302    /// file part (Content-Disposition includes `filename="..."` and the bytes
303    /// of `value` are placed in the part body); `None` produces a text part.
304    fn make_multipart_body(
305        boundary: &str,
306        parts: &[(&str, &[u8], Option<&str>)],
307    ) -> (Bytes, String) {
308        let ct = format!("multipart/form-data; boundary={boundary}");
309        let mut body: Vec<u8> = Vec::new();
310        for (name, value, filename) in parts {
311            body.extend_from_slice(format!("--{boundary}\r\n").as_bytes());
312            match filename {
313                Some(fname) => body.extend_from_slice(
314                    format!(
315                        "Content-Disposition: form-data; name=\"{name}\"; filename=\"{fname}\"\r\nContent-Type: application/octet-stream\r\n\r\n"
316                    )
317                    .as_bytes(),
318                ),
319                None => body.extend_from_slice(
320                    format!("Content-Disposition: form-data; name=\"{name}\"\r\n\r\n")
321                        .as_bytes(),
322                ),
323            }
324            body.extend_from_slice(value);
325            body.extend_from_slice(b"\r\n");
326        }
327        body.extend_from_slice(format!("--{boundary}--\r\n").as_bytes());
328        (Bytes::from(body), ct)
329    }
330
331    /// Mirror of `parse_multipart_body` that accepts an in-memory body so
332    /// tests don't need a live `hyper::body::Incoming`.
333    async fn parse_for_test(
334        raw: Bytes,
335        content_type: &str,
336        max_bytes: u64,
337        max_fields_cap: usize,
338    ) -> Result<MultipartForm, FrameworkError> {
339        let boundary = multer::parse_boundary(content_type).map_err(|_| {
340            FrameworkError::internal("Content-Type is not multipart/form-data or missing boundary")
341        })?;
342
343        let body = Full::new(raw);
344        let stream = BodyStream::new(body).filter_map(|result| async move {
345            result.map(|frame| frame.into_data().ok()).transpose()
346        });
347
348        let constraints =
349            multer::Constraints::new().size_limit(multer::SizeLimit::new().per_field(max_bytes));
350
351        let mut multipart = multer::Multipart::with_constraints(stream, boundary, constraints);
352
353        let mut files_map: HashMap<String, Vec<UploadedFile>> = HashMap::new();
354        let mut text_fields: HashMap<String, String> = HashMap::new();
355        let mut field_count: usize = 0;
356
357        while let Some(field) = multipart
358            .next_field()
359            .await
360            .map_err(|e| FrameworkError::internal(format!("Multipart parse error: {e}")))?
361        {
362            field_count += 1;
363            if field_count > max_fields_cap {
364                return Err(FrameworkError::internal(
365                    "Too many fields in multipart request",
366                ));
367            }
368
369            let field_name = field.name().map(|s| s.to_string()).unwrap_or_default();
370            let file_name = field.file_name().map(|s| s.to_string());
371            let content_type = field.content_type().map(|m| m.to_string());
372            let bytes = field
373                .bytes()
374                .await
375                .map_err(|e| FrameworkError::internal(format!("Field read error: {e}")))?;
376
377            if file_name.is_some() {
378                files_map
379                    .entry(field_name.clone())
380                    .or_default()
381                    .push(UploadedFile {
382                        field_name,
383                        file_name,
384                        content_type,
385                        bytes,
386                    });
387            } else {
388                let value = String::from_utf8(bytes.to_vec()).map_err(|_| {
389                    FrameworkError::internal("Multipart text field contains invalid UTF-8")
390                })?;
391                text_fields.insert(field_name, value);
392            }
393        }
394
395        Ok(MultipartForm {
396            files_map,
397            text_fields,
398        })
399    }
400
401    // D-03 / D-04: parsing + accessors
402
403    #[tokio::test]
404    async fn multipart_parses_fields() {
405        let (raw, ct) = make_multipart_body(
406            "BOUNDARY",
407            &[
408                ("title", b"hello", None),
409                ("avatar", b"\x89PNG\r\n\x1a\n", Some("avatar.png")),
410            ],
411        );
412        let form = parse_for_test(raw, &ct, 10 * 1024 * 1024, 100)
413            .await
414            .expect("parses");
415        assert_eq!(form.field("title"), Some("hello"));
416        let file = form.file("avatar").expect("avatar present");
417        assert_eq!(file.field_name, "avatar");
418        assert_eq!(file.file_name.as_deref(), Some("avatar.png"));
419        assert_eq!(file.bytes.as_ref(), b"\x89PNG\r\n\x1a\n");
420    }
421
422    #[tokio::test]
423    async fn multipart_form_accessors() {
424        let (raw, ct) = make_multipart_body(
425            "B",
426            &[
427                ("photos", b"AAA", Some("a.jpg")),
428                ("photos", b"BBB", Some("b.jpg")),
429                ("caption", b"two photos", None),
430            ],
431        );
432        let form = parse_for_test(raw, &ct, 10 * 1024 * 1024, 100)
433            .await
434            .expect("parses");
435        assert_eq!(form.file("photos").unwrap().bytes.as_ref(), b"AAA");
436        assert_eq!(form.files("photos").len(), 2);
437        assert_eq!(form.files("photos")[1].bytes.as_ref(), b"BBB");
438        assert!(form.files("absent").is_empty());
439        assert!(form.file("absent").is_none());
440        assert_eq!(form.field("caption"), Some("two photos"));
441        assert_eq!(form.fields().len(), 1);
442    }
443
444    // D-07: UploadedFile fields populated
445
446    #[tokio::test]
447    async fn uploaded_file_fields() {
448        let (raw, ct) = make_multipart_body("B", &[("doc", b"PDFDATA", Some("report.pdf"))]);
449        let form = parse_for_test(raw, &ct, 1024, 100).await.expect("parses");
450        let file = form.file("doc").expect("present");
451        assert_eq!(file.field_name, "doc");
452        assert_eq!(file.file_name.as_deref(), Some("report.pdf"));
453        assert_eq!(
454            file.content_type.as_deref(),
455            Some("application/octet-stream")
456        );
457        assert_eq!(file.bytes.len(), b"PDFDATA".len());
458    }
459
460    // D-08: UploadedFile method coverage
461
462    #[test]
463    fn uploaded_file_size_returns_byte_len() {
464        let f = UploadedFile {
465            field_name: "f".into(),
466            file_name: None,
467            content_type: None,
468            bytes: Bytes::from_static(b"12345"),
469        };
470        assert_eq!(f.size(), 5);
471    }
472
473    #[test]
474    fn extension_from_filename() {
475        let with_ext = UploadedFile {
476            field_name: "f".into(),
477            file_name: Some("avatar.png".into()),
478            content_type: None,
479            bytes: Bytes::new(),
480        };
481        let no_ext = UploadedFile {
482            field_name: "f".into(),
483            file_name: Some("noext".into()),
484            content_type: None,
485            bytes: Bytes::new(),
486        };
487        let none = UploadedFile {
488            field_name: "f".into(),
489            file_name: None,
490            content_type: None,
491            bytes: Bytes::new(),
492        };
493        assert_eq!(with_ext.extension(), Some("png"));
494        assert_eq!(no_ext.extension(), None);
495        assert_eq!(none.extension(), None);
496    }
497
498    #[test]
499    fn is_image_true_false() {
500        let img = UploadedFile {
501            field_name: "f".into(),
502            file_name: None,
503            content_type: Some("image/jpeg".into()),
504            bytes: Bytes::new(),
505        };
506        let pdf = UploadedFile {
507            field_name: "f".into(),
508            file_name: None,
509            content_type: Some("application/pdf".into()),
510            bytes: Bytes::new(),
511        };
512        let none = UploadedFile {
513            field_name: "f".into(),
514            file_name: None,
515            content_type: None,
516            bytes: Bytes::new(),
517        };
518        assert!(img.is_image());
519        assert!(!pdf.is_image());
520        assert!(!none.is_image());
521    }
522
523    // D-18: missing/wrong Content-Type
524
525    #[tokio::test]
526    async fn multipart_missing_boundary() {
527        let raw = Bytes::from_static(b"irrelevant");
528        let err = parse_for_test(raw, "application/json", 1024, 100)
529            .await
530            .expect_err("must error");
531        let msg = format!("{err}");
532        assert!(
533            msg.contains("Content-Type is not multipart/form-data or missing boundary"),
534            "unexpected error message: {msg}"
535        );
536    }
537
538    // D-12: per-field size limit
539
540    #[tokio::test]
541    async fn multipart_size_limit_rejects_oversized_field() {
542        let big = vec![b'A'; 50];
543        let (raw, ct) = make_multipart_body("B", &[("blob", &big, Some("big.bin"))]);
544        let err = parse_for_test(raw, &ct, 10, 100)
545            .await
546            .expect_err("oversized must error");
547        let msg = format!("{err}");
548        assert!(
549            msg.contains("Multipart parse error") || msg.contains("Field read error"),
550            "expected size-limit error from multer, got: {msg}"
551        );
552    }
553
554    // D-13: per-request field count limit
555
556    #[tokio::test]
557    async fn multipart_max_fields_rejects_excess() {
558        let (raw, ct) = make_multipart_body(
559            "B",
560            &[("a", b"1", None), ("b", b"2", None), ("c", b"3", None)],
561        );
562        let err = parse_for_test(raw, &ct, 1024, 2)
563            .await
564            .expect_err("must reject excess fields");
565        let msg = format!("{err}");
566        assert!(
567            msg.contains("Too many fields in multipart request"),
568            "unexpected error message: {msg}"
569        );
570    }
571
572    // D-14: validation helpers
573
574    #[test]
575    fn validate_mime_accepts_allowed() {
576        let f = UploadedFile {
577            field_name: "f".into(),
578            file_name: None,
579            content_type: Some("image/png".into()),
580            bytes: Bytes::new(),
581        };
582        validate_mime(&f, &["image/png", "image/jpeg"]).expect("png is allowed");
583    }
584
585    #[test]
586    fn validate_mime_rejects_disallowed() {
587        let f = UploadedFile {
588            field_name: "f".into(),
589            file_name: None,
590            content_type: Some("application/x-msdownload".into()),
591            bytes: Bytes::new(),
592        };
593        let err = validate_mime(&f, &["image/png"]).expect_err("must reject exe");
594        let msg = format!("{err}");
595        assert!(msg.contains("application/x-msdownload"));
596        assert!(msg.contains("image/png"));
597    }
598
599    #[test]
600    fn validate_size_accepts_within_cap() {
601        let f = UploadedFile {
602            field_name: "f".into(),
603            file_name: None,
604            content_type: None,
605            bytes: Bytes::from_static(b"hello"),
606        };
607        validate_size(&f, 10).expect("5 bytes is within 10");
608    }
609
610    #[test]
611    fn validate_size_rejects_over_cap() {
612        let f = UploadedFile {
613            field_name: "f".into(),
614            file_name: None,
615            content_type: None,
616            bytes: Bytes::from_static(b"hello world!!"),
617        };
618        let err = validate_size(&f, 5).expect_err("13 > 5");
619        let msg = format!("{err}");
620        assert!(msg.contains("13 bytes"));
621        assert!(msg.contains("max 5"));
622    }
623
624    // Killer-feature integration: UploadedFile::store() wires to ferro-storage
625
626    #[tokio::test]
627    async fn store_to_memory_disk() {
628        use ferro_storage::{DiskConfig, Storage};
629
630        let storage = Storage::with_config("mem", vec![("mem", DiskConfig::memory())]);
631        let disk = storage.disk("mem").expect("memory disk exists");
632
633        let file = UploadedFile {
634            field_name: "avatar".into(),
635            file_name: Some("photo.png".into()),
636            content_type: Some("image/png".into()),
637            bytes: Bytes::from_static(b"\x89PNG\r\n\x1a\n"),
638        };
639
640        file.store(&disk, "uploads/photo.png")
641            .await
642            .expect("store succeeds");
643
644        let stored = disk
645            .get("uploads/photo.png")
646            .await
647            .expect("file readable after store");
648        assert_eq!(stored.as_ref(), b"\x89PNG\r\n\x1a\n");
649        assert!(disk.exists("uploads/photo.png").await.unwrap());
650    }
651}