Skip to main content

umbral_core/web/
multipart.rs

1//! `multipart/form-data` parsing and the storage-merge upload helper.
2//!
3//! ## What this is
4//!
5//! A browser that POSTs a form containing a `<input type="file">` sends a
6//! `multipart/form-data` body, not the `application/x-www-form-urlencoded`
7//! body the rest of the form layer ([`crate::forms`], the admin's
8//! `serde_urlencoded` path) understands. This module turns that multipart
9//! body into the *same* flat `Vec<(String, String)>` shape the urlencoded
10//! path yields — text fields stay as `(name, value)` pairs, and each
11//! uploaded file is stored through the ambient [`Storage`] backend and
12//! reduced to a `(field_name, stored_key)` pair. A consumer (the admin's
13//! `create` / `update` handlers, wired in a later wave) can then feed the
14//! result to the ORM identically whether the body was urlencoded or
15//! multipart.
16//!
17//! ## Layering
18//!
19//! Two layers, so each is independently testable:
20//!
21//! 1. [`parse_multipart`] — pure parsing. No storage, no I/O beyond reading
22//!    the in-memory body. Returns a [`MultipartForm`] separating text
23//!    [`MultipartForm::fields`] from binary [`MultipartForm::files`].
24//! 2. [`parse_and_store_multipart`] — parse, then [`Storage::store`] every
25//!    non-empty file part and flatten everything to `Vec<(String, String)>`.
26//!
27//! [`Storage`]: crate::storage::Storage
28//! [`Storage::store`]: crate::storage::Storage::store
29
30use std::convert::Infallible;
31
32use crate::storage::StorageError;
33
34/// One uploaded file part of a `multipart/form-data` body.
35///
36/// A multipart part is treated as a *file* iff multer reports a
37/// `Content-Disposition` `filename` for it; a part with no filename is a
38/// plain text field and lands in [`MultipartForm::fields`] instead. The
39/// raw [`bytes`](FilePart::bytes) are kept verbatim — never lossy-decoded —
40/// so binary uploads (images, PDFs) round-trip intact.
41#[derive(Clone, Debug)]
42pub struct FilePart {
43    /// The form field name (the `<input name="...">`).
44    pub field_name: String,
45    /// The client-supplied filename from the `Content-Disposition` header,
46    /// if any. Used to derive the storage key and as a content-type hint.
47    pub filename: Option<String>,
48    /// The part's declared `Content-Type`, if the client sent one.
49    pub content_type: Option<String>,
50    /// The raw file bytes, exactly as received.
51    pub bytes: Vec<u8>,
52}
53
54/// A parsed `multipart/form-data` body: text fields and file parts.
55///
56/// [`fields`](MultipartForm::fields) preserves both order and repeats — a
57/// multi-select / M2M widget sends the same field name multiple times and
58/// every value has to survive — so it is a `Vec`, not a map.
59#[derive(Debug, Default)]
60pub struct MultipartForm {
61    /// The non-file text parts, as `(name, value)` pairs, in body order,
62    /// with repeats preserved.
63    pub fields: Vec<(String, String)>,
64    /// The uploaded file parts (those with a `filename`), in body order.
65    pub files: Vec<FilePart>,
66}
67
68impl MultipartForm {
69    /// The value of the text field `name`, last-wins if it repeats.
70    ///
71    /// Returns `None` if no text field by that name was sent. (File parts
72    /// are not considered; look in [`files`](MultipartForm::files) for
73    /// those.)
74    pub fn field(&self, name: &str) -> Option<&str> {
75        self.fields
76            .iter()
77            .rev()
78            .find(|(k, _)| k == name)
79            .map(|(_, v)| v.as_str())
80    }
81
82    /// Iterate over every text field as `(&name, &value)`, in body order,
83    /// including repeats.
84    pub fn iter_fields(&self) -> impl Iterator<Item = (&str, &str)> {
85        self.fields.iter().map(|(k, v)| (k.as_str(), v.as_str()))
86    }
87}
88
89/// Errors [`parse_multipart`] can return.
90#[derive(Debug)]
91pub enum MultipartError {
92    /// The `Content-Type` header had no `boundary` parameter, so the body
93    /// can't be split into parts.
94    MissingBoundary,
95    /// The underlying multipart parser rejected the body (malformed part
96    /// headers, truncated body, etc.). Carries multer's message.
97    Parse(String),
98    /// A part (or the whole body) exceeded a configured size cap.
99    ///
100    /// Not produced by [`parse_multipart`] today (no cap is imposed at this
101    /// layer yet); reserved so a future size-limited entry point can report
102    /// it without a breaking API change.
103    TooLarge {
104        /// The configured limit, in bytes.
105        limit: usize,
106        /// The actual size that was rejected, in bytes.
107        actual: usize,
108    },
109}
110
111impl std::fmt::Display for MultipartError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        match self {
114            MultipartError::MissingBoundary => {
115                write!(f, "multipart: Content-Type has no boundary parameter")
116            }
117            MultipartError::Parse(s) => write!(f, "multipart: parse error: {s}"),
118            MultipartError::TooLarge { limit, actual } => write!(
119                f,
120                "multipart: body {actual}B exceeds configured cap of {limit}B"
121            ),
122        }
123    }
124}
125
126impl std::error::Error for MultipartError {}
127
128/// Errors [`parse_and_store_multipart`] can return: a parse failure, a
129/// storage failure, or the absence of a registered storage backend.
130#[derive(Debug)]
131pub enum MultipartUploadError {
132    /// Parsing the multipart body failed. See [`MultipartError`].
133    Multipart(MultipartError),
134    /// Storing an uploaded file through the [`Storage`] backend failed.
135    ///
136    /// [`Storage`]: crate::storage::Storage
137    Storage(StorageError),
138    /// No [`Storage`] backend was registered, but the body carried a file
139    /// part that needed storing.
140    ///
141    /// A stray multipart POST against a server with no media backend lands
142    /// here rather than panicking the worker; the boot-time system check
143    /// (Wave 2) is what guarantees a backend exists whenever a model
144    /// declares a file field.
145    ///
146    /// [`Storage`]: crate::storage::Storage
147    NoStorageBackend,
148}
149
150impl std::fmt::Display for MultipartUploadError {
151    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152        match self {
153            MultipartUploadError::Multipart(e) => write!(f, "{e}"),
154            MultipartUploadError::Storage(e) => write!(f, "{e}"),
155            MultipartUploadError::NoStorageBackend => write!(
156                f,
157                "multipart upload: no Storage backend registered; add StoragePlugin \
158                 or call umbral::storage::set_storage"
159            ),
160        }
161    }
162}
163
164impl std::error::Error for MultipartUploadError {
165    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
166        match self {
167            MultipartUploadError::Multipart(e) => Some(e),
168            MultipartUploadError::Storage(e) => Some(e),
169            MultipartUploadError::NoStorageBackend => None,
170        }
171    }
172}
173
174impl From<MultipartError> for MultipartUploadError {
175    fn from(e: MultipartError) -> Self {
176        MultipartUploadError::Multipart(e)
177    }
178}
179
180impl From<StorageError> for MultipartUploadError {
181    fn from(e: StorageError) -> Self {
182        MultipartUploadError::Storage(e)
183    }
184}
185
186/// Whether a `Content-Type` header value denotes a `multipart/form-data`
187/// body.
188///
189/// True when the header (ignoring leading whitespace) starts with
190/// `multipart/form-data`; the trailing `; boundary=...` parameter is
191/// ignored here and parsed later by [`parse_multipart`].
192pub fn is_multipart(content_type: &str) -> bool {
193    content_type
194        .trim_start()
195        .to_ascii_lowercase()
196        .starts_with("multipart/form-data")
197}
198
199/// Parse a `multipart/form-data` body into text fields and file parts.
200///
201/// `content_type_header` is the full `Content-Type` header value (it must
202/// carry the `boundary=...` parameter). `body` is the complete request body
203/// in memory.
204///
205/// Text parts (no `filename`) land in [`MultipartForm::fields`] preserving
206/// order and repeats; parts with a `filename` land in
207/// [`MultipartForm::files`] as [`FilePart`]s with their bytes kept verbatim.
208///
209/// # Errors
210///
211/// - [`MultipartError::MissingBoundary`] if the header has no boundary.
212/// - [`MultipartError::Parse`] on a malformed body.
213pub async fn parse_multipart(
214    content_type_header: &str,
215    body: impl Into<bytes::Bytes>,
216) -> Result<MultipartForm, MultipartError> {
217    let boundary =
218        multer::parse_boundary(content_type_header).map_err(|_| MultipartError::MissingBoundary)?;
219
220    let body: bytes::Bytes = body.into();
221    // multer's constructor wants a Bytes stream; the whole body is already
222    // in memory, so a single-chunk, never-erroring stream is enough.
223    let stream = futures_util::stream::once(async move { Ok::<_, Infallible>(body) });
224    let mut multipart = multer::Multipart::new(stream, boundary);
225
226    let mut form = MultipartForm::default();
227
228    while let Some(field) = multipart
229        .next_field()
230        .await
231        .map_err(|e| MultipartError::Parse(e.to_string()))?
232    {
233        // Capture all metadata BEFORE reading the body: multer's `bytes()`
234        // / `text()` consume the field handle, after which name/filename/
235        // content_type are gone.
236        let field_name = field.name().map(str::to_owned).unwrap_or_default();
237        let filename = field.file_name().map(str::to_owned);
238        let content_type = field.content_type().map(|m| m.to_string());
239
240        if filename.is_some() {
241            // A part with a filename is a file: keep raw bytes, never decode.
242            let bytes = field
243                .bytes()
244                .await
245                .map_err(|e| MultipartError::Parse(e.to_string()))?;
246            form.files.push(FilePart {
247                field_name,
248                filename,
249                content_type,
250                bytes: bytes.to_vec(),
251            });
252        } else {
253            // A part with no filename is a plain text field.
254            let value = field
255                .text()
256                .await
257                .map_err(|e| MultipartError::Parse(e.to_string()))?;
258            form.fields.push((field_name, value));
259        }
260    }
261
262    Ok(form)
263}
264
265/// Parse a `multipart/form-data` body, store its file parts, and return a
266/// flat `Vec<(String, String)>` of every field — text values plus the
267/// storage key of each uploaded file.
268///
269/// This is the upload entry point a handler calls instead of
270/// `serde_urlencoded::from_str::<Vec<(String, String)>>` when the body is
271/// multipart: the return shape is identical, so the rest of the form
272/// pipeline doesn't care which encoding arrived.
273///
274/// Each non-empty [`FilePart`] is stored via the ambient [`Storage`]
275/// backend and contributes one `(field_name, stored_key)` pair, using the
276/// part's `filename` (falling back to the field name) and its
277/// `content_type` (falling back to `application/octet-stream`).
278///
279/// ## Empty file parts are skipped — "keep current file on edit"
280///
281/// When a user edits a record with a file field but does *not* choose a new
282/// file, the browser still sends the file part — with an empty body. Such a
283/// part is **skipped entirely**: no pair is emitted for it. This is
284/// deliberate. Emitting `(field, "")` would overwrite the stored key with
285/// an empty string and lose the existing file; omitting the pair leaves the
286/// current value untouched downstream.
287///
288/// # Errors
289///
290/// - [`MultipartUploadError::Multipart`] on a parse failure.
291/// - [`MultipartUploadError::NoStorageBackend`] if a file needs storing but
292///   no backend is registered (returned, never panicked).
293/// - [`MultipartUploadError::Storage`] if the backend's `store` fails.
294///
295/// [`Storage`]: crate::storage::Storage
296pub async fn parse_and_store_multipart(
297    content_type_header: &str,
298    body: impl Into<bytes::Bytes>,
299) -> Result<Vec<(String, String)>, MultipartUploadError> {
300    let form = parse_multipart(content_type_header, body).await?;
301
302    let mut pairs: Vec<(String, String)> = Vec::new();
303
304    for file in &form.files {
305        // Skip empty file parts: the user submitted the edit form without
306        // choosing a new file, so leave the existing stored value alone.
307        if file.bytes.is_empty() {
308            continue;
309        }
310
311        // Resolve the backend lazily and only when a file actually needs
312        // storing, so a multipart POST with no file (or only empty parts)
313        // never trips on a missing backend.
314        let backend =
315            crate::storage::storage_opt().ok_or(MultipartUploadError::NoStorageBackend)?;
316
317        let filename = file
318            .filename
319            .as_deref()
320            .filter(|s| !s.is_empty())
321            .unwrap_or(&file.field_name);
322        let content_type = file
323            .content_type
324            .as_deref()
325            .unwrap_or("application/octet-stream");
326
327        let stored = backend.store(filename, content_type, &file.bytes).await?;
328        pairs.push((file.field_name.clone(), stored.key));
329    }
330
331    // Text fields always pass through, after the file keys.
332    pairs.extend(form.fields);
333
334    Ok(pairs)
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    const BOUNDARY: &str = "X-UMBRAL-BOUNDARY";
342
343    /// One part spec for [`build_body`]: `(name, filename, content_type,
344    /// value)`. A `None` filename means a text field; `Some` means a file.
345    type PartSpec<'a> = (&'a str, Option<&'a str>, Option<&'a str>, &'a [u8]);
346
347    /// Build a real `multipart/form-data` body from part specs. A `None`
348    /// filename emits a plain text field; `Some(name)` emits a file part
349    /// with a `Content-Type` line.
350    fn build_body(parts: &[PartSpec<'_>]) -> Vec<u8> {
351        let mut out = Vec::new();
352        for (name, filename, content_type, value) in parts {
353            out.extend_from_slice(format!("--{BOUNDARY}\r\n").as_bytes());
354            match filename {
355                Some(fname) => {
356                    out.extend_from_slice(
357                        format!(
358                            "Content-Disposition: form-data; name=\"{name}\"; filename=\"{fname}\"\r\n"
359                        )
360                        .as_bytes(),
361                    );
362                    if let Some(ct) = content_type {
363                        out.extend_from_slice(format!("Content-Type: {ct}\r\n").as_bytes());
364                    }
365                }
366                None => {
367                    out.extend_from_slice(
368                        format!("Content-Disposition: form-data; name=\"{name}\"\r\n").as_bytes(),
369                    );
370                }
371            }
372            out.extend_from_slice(b"\r\n");
373            out.extend_from_slice(value);
374            out.extend_from_slice(b"\r\n");
375        }
376        out.extend_from_slice(format!("--{BOUNDARY}--\r\n").as_bytes());
377        out
378    }
379
380    fn ct_header() -> String {
381        format!("multipart/form-data; boundary={BOUNDARY}")
382    }
383
384    #[test]
385    fn is_multipart_matches_form_data_content_types() {
386        assert!(is_multipart("multipart/form-data; boundary=abc"));
387        assert!(is_multipart("multipart/form-data"));
388        assert!(is_multipart("  Multipart/Form-Data; boundary=Z")); // case + leading ws
389        assert!(!is_multipart("application/x-www-form-urlencoded"));
390        assert!(!is_multipart("application/json"));
391        assert!(!is_multipart("multipart/mixed; boundary=abc"));
392    }
393
394    #[tokio::test]
395    async fn parse_separates_text_and_file_parts() {
396        let png = b"\x89PNG\r\n\x1a\nfake-image-bytes";
397        let body = build_body(&[
398            ("title", None, None, b"Hello"),
399            ("cover", Some("p.png"), Some("image/png"), png),
400        ]);
401
402        let form = parse_multipart(&ct_header(), body).await.unwrap();
403
404        assert_eq!(
405            form.fields,
406            vec![("title".to_string(), "Hello".to_string())]
407        );
408        assert_eq!(form.files.len(), 1);
409        let file = &form.files[0];
410        assert_eq!(file.field_name, "cover");
411        assert_eq!(file.filename.as_deref(), Some("p.png"));
412        assert_eq!(file.content_type.as_deref(), Some("image/png"));
413        assert_eq!(file.bytes, png);
414    }
415
416    #[tokio::test]
417    async fn parse_preserves_repeated_text_field_names() {
418        let body = build_body(&[
419            ("tags", None, None, b"red"),
420            ("tags", None, None, b"blue"),
421            ("name", None, None, b"shirt"),
422        ]);
423
424        let form = parse_multipart(&ct_header(), body).await.unwrap();
425
426        // Both `tags` survive, in order — M2M / multi-select correctness.
427        assert_eq!(
428            form.fields,
429            vec![
430                ("tags".to_string(), "red".to_string()),
431                ("tags".to_string(), "blue".to_string()),
432                ("name".to_string(), "shirt".to_string()),
433            ]
434        );
435        // field() is last-wins.
436        assert_eq!(form.field("tags"), Some("blue"));
437        assert_eq!(form.field("name"), Some("shirt"));
438        assert_eq!(form.field("missing"), None);
439        // iter_fields yields every entry including the repeat.
440        assert_eq!(form.iter_fields().filter(|(k, _)| *k == "tags").count(), 2);
441    }
442
443    #[tokio::test]
444    async fn parse_keeps_binary_bytes_intact() {
445        // Non-UTF8 bytes: 0xFF / 0x80 are invalid UTF-8 and must not be
446        // decoded. (Built at runtime, not a const literal — clippy
447        // const-folds a literal `from_utf8` and warns it always errors.)
448        let raw: Vec<u8> = vec![0x00, 0xFF, 0xFE, 0x80, 0x01, 0x7F];
449        assert!(std::str::from_utf8(&raw).is_err());
450        let body = build_body(&[(
451            "blob",
452            Some("data.bin"),
453            Some("application/octet-stream"),
454            &raw,
455        )]);
456
457        let form = parse_multipart(&ct_header(), body).await.unwrap();
458
459        assert_eq!(form.files.len(), 1);
460        assert_eq!(form.files[0].bytes, raw, "raw bytes must round-trip");
461    }
462
463    #[tokio::test]
464    async fn parse_errors_on_missing_boundary() {
465        let body = build_body(&[("title", None, None, b"Hi")]);
466        let err = parse_multipart("multipart/form-data", body)
467            .await
468            .unwrap_err();
469        assert!(matches!(err, MultipartError::MissingBoundary));
470    }
471}