umbral_core/web/multipart.rs
1//! `multipart/form-data` parsing and the storage-merge upload helper.
2//!
3//! ## What this is
4//!
5//! A browser that POSTs a form containing a `<input type="file">` sends a
6//! `multipart/form-data` body, not the `application/x-www-form-urlencoded`
7//! body the rest of the form layer ([`crate::forms`], the admin's
8//! `serde_urlencoded` path) understands. This module turns that multipart
9//! body into the *same* flat `Vec<(String, String)>` shape the urlencoded
10//! path yields — text fields stay as `(name, value)` pairs, and each
11//! uploaded file is stored through the ambient [`Storage`] backend and
12//! reduced to a `(field_name, stored_key)` pair. A consumer (the admin's
13//! `create` / `update` handlers, wired in a later wave) can then feed the
14//! result to the ORM identically whether the body was urlencoded or
15//! multipart.
16//!
17//! ## Layering
18//!
19//! Two layers, so each is independently testable:
20//!
21//! 1. [`parse_multipart`] — pure parsing. No storage, no I/O beyond reading
22//! the in-memory body. Returns a [`MultipartForm`] separating text
23//! [`MultipartForm::fields`] from binary [`MultipartForm::files`].
24//! 2. [`parse_and_store_multipart`] — parse, then [`Storage::store`] every
25//! non-empty file part and flatten everything to `Vec<(String, String)>`.
26//!
27//! [`Storage`]: crate::storage::Storage
28//! [`Storage::store`]: crate::storage::Storage::store
29
30use std::convert::Infallible;
31
32use crate::storage::StorageError;
33
34/// One uploaded file part of a `multipart/form-data` body.
35///
36/// A multipart part is treated as a *file* iff multer reports a
37/// `Content-Disposition` `filename` for it; a part with no filename is a
38/// plain text field and lands in [`MultipartForm::fields`] instead. The
39/// raw [`bytes`](FilePart::bytes) are kept verbatim — never lossy-decoded —
40/// so binary uploads (images, PDFs) round-trip intact.
41#[derive(Clone, Debug)]
42pub struct FilePart {
43 /// The form field name (the `<input name="...">`).
44 pub field_name: String,
45 /// The client-supplied filename from the `Content-Disposition` header,
46 /// if any. Used to derive the storage key and as a content-type hint.
47 pub filename: Option<String>,
48 /// The part's declared `Content-Type`, if the client sent one.
49 pub content_type: Option<String>,
50 /// The raw file bytes, exactly as received.
51 pub bytes: Vec<u8>,
52}
53
54/// A parsed `multipart/form-data` body: text fields and file parts.
55///
56/// [`fields`](MultipartForm::fields) preserves both order and repeats — a
57/// multi-select / M2M widget sends the same field name multiple times and
58/// every value has to survive — so it is a `Vec`, not a map.
59#[derive(Debug, Default)]
60pub struct MultipartForm {
61 /// The non-file text parts, as `(name, value)` pairs, in body order,
62 /// with repeats preserved.
63 pub fields: Vec<(String, String)>,
64 /// The uploaded file parts (those with a `filename`), in body order.
65 pub files: Vec<FilePart>,
66}
67
68impl MultipartForm {
69 /// The value of the text field `name`, last-wins if it repeats.
70 ///
71 /// Returns `None` if no text field by that name was sent. (File parts
72 /// are not considered; look in [`files`](MultipartForm::files) for
73 /// those.)
74 pub fn field(&self, name: &str) -> Option<&str> {
75 self.fields
76 .iter()
77 .rev()
78 .find(|(k, _)| k == name)
79 .map(|(_, v)| v.as_str())
80 }
81
82 /// Iterate over every text field as `(&name, &value)`, in body order,
83 /// including repeats.
84 pub fn iter_fields(&self) -> impl Iterator<Item = (&str, &str)> {
85 self.fields.iter().map(|(k, v)| (k.as_str(), v.as_str()))
86 }
87}
88
89/// Errors [`parse_multipart`] can return.
90#[derive(Debug)]
91pub enum MultipartError {
92 /// The `Content-Type` header had no `boundary` parameter, so the body
93 /// can't be split into parts.
94 MissingBoundary,
95 /// The underlying multipart parser rejected the body (malformed part
96 /// headers, truncated body, etc.). Carries multer's message.
97 Parse(String),
98 /// A part (or the whole body) exceeded a configured size cap.
99 ///
100 /// Not produced by [`parse_multipart`] today (no cap is imposed at this
101 /// layer yet); reserved so a future size-limited entry point can report
102 /// it without a breaking API change.
103 TooLarge {
104 /// The configured limit, in bytes.
105 limit: usize,
106 /// The actual size that was rejected, in bytes.
107 actual: usize,
108 },
109}
110
111impl std::fmt::Display for MultipartError {
112 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113 match self {
114 MultipartError::MissingBoundary => {
115 write!(f, "multipart: Content-Type has no boundary parameter")
116 }
117 MultipartError::Parse(s) => write!(f, "multipart: parse error: {s}"),
118 MultipartError::TooLarge { limit, actual } => write!(
119 f,
120 "multipart: body {actual}B exceeds configured cap of {limit}B"
121 ),
122 }
123 }
124}
125
126impl std::error::Error for MultipartError {}
127
128/// Errors [`parse_and_store_multipart`] can return: a parse failure, a
129/// storage failure, or the absence of a registered storage backend.
130#[derive(Debug)]
131pub enum MultipartUploadError {
132 /// Parsing the multipart body failed. See [`MultipartError`].
133 Multipart(MultipartError),
134 /// Storing an uploaded file through the [`Storage`] backend failed.
135 ///
136 /// [`Storage`]: crate::storage::Storage
137 Storage(StorageError),
138 /// No [`Storage`] backend was registered, but the body carried a file
139 /// part that needed storing.
140 ///
141 /// A stray multipart POST against a server with no media backend lands
142 /// here rather than panicking the worker; the boot-time system check
143 /// (Wave 2) is what guarantees a backend exists whenever a model
144 /// declares a file field.
145 ///
146 /// [`Storage`]: crate::storage::Storage
147 NoStorageBackend,
148}
149
150impl std::fmt::Display for MultipartUploadError {
151 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152 match self {
153 MultipartUploadError::Multipart(e) => write!(f, "{e}"),
154 MultipartUploadError::Storage(e) => write!(f, "{e}"),
155 MultipartUploadError::NoStorageBackend => write!(
156 f,
157 "multipart upload: no Storage backend registered; add StoragePlugin \
158 or call umbral::storage::set_storage"
159 ),
160 }
161 }
162}
163
164impl std::error::Error for MultipartUploadError {
165 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
166 match self {
167 MultipartUploadError::Multipart(e) => Some(e),
168 MultipartUploadError::Storage(e) => Some(e),
169 MultipartUploadError::NoStorageBackend => None,
170 }
171 }
172}
173
174impl From<MultipartError> for MultipartUploadError {
175 fn from(e: MultipartError) -> Self {
176 MultipartUploadError::Multipart(e)
177 }
178}
179
180impl From<StorageError> for MultipartUploadError {
181 fn from(e: StorageError) -> Self {
182 MultipartUploadError::Storage(e)
183 }
184}
185
186/// Whether a `Content-Type` header value denotes a `multipart/form-data`
187/// body.
188///
189/// True when the header (ignoring leading whitespace) starts with
190/// `multipart/form-data`; the trailing `; boundary=...` parameter is
191/// ignored here and parsed later by [`parse_multipart`].
192pub fn is_multipart(content_type: &str) -> bool {
193 content_type
194 .trim_start()
195 .to_ascii_lowercase()
196 .starts_with("multipart/form-data")
197}
198
199/// Parse a `multipart/form-data` body into text fields and file parts.
200///
201/// `content_type_header` is the full `Content-Type` header value (it must
202/// carry the `boundary=...` parameter). `body` is the complete request body
203/// in memory.
204///
205/// Text parts (no `filename`) land in [`MultipartForm::fields`] preserving
206/// order and repeats; parts with a `filename` land in
207/// [`MultipartForm::files`] as [`FilePart`]s with their bytes kept verbatim.
208///
209/// # Errors
210///
211/// - [`MultipartError::MissingBoundary`] if the header has no boundary.
212/// - [`MultipartError::Parse`] on a malformed body.
213pub async fn parse_multipart(
214 content_type_header: &str,
215 body: impl Into<bytes::Bytes>,
216) -> Result<MultipartForm, MultipartError> {
217 let boundary =
218 multer::parse_boundary(content_type_header).map_err(|_| MultipartError::MissingBoundary)?;
219
220 let body: bytes::Bytes = body.into();
221 // multer's constructor wants a Bytes stream; the whole body is already
222 // in memory, so a single-chunk, never-erroring stream is enough.
223 let stream = futures_util::stream::once(async move { Ok::<_, Infallible>(body) });
224 let mut multipart = multer::Multipart::new(stream, boundary);
225
226 let mut form = MultipartForm::default();
227
228 while let Some(field) = multipart
229 .next_field()
230 .await
231 .map_err(|e| MultipartError::Parse(e.to_string()))?
232 {
233 // Capture all metadata BEFORE reading the body: multer's `bytes()`
234 // / `text()` consume the field handle, after which name/filename/
235 // content_type are gone.
236 let field_name = field.name().map(str::to_owned).unwrap_or_default();
237 let filename = field.file_name().map(str::to_owned);
238 let content_type = field.content_type().map(|m| m.to_string());
239
240 if filename.is_some() {
241 // A part with a filename is a file: keep raw bytes, never decode.
242 let bytes = field
243 .bytes()
244 .await
245 .map_err(|e| MultipartError::Parse(e.to_string()))?;
246 form.files.push(FilePart {
247 field_name,
248 filename,
249 content_type,
250 bytes: bytes.to_vec(),
251 });
252 } else {
253 // A part with no filename is a plain text field.
254 let value = field
255 .text()
256 .await
257 .map_err(|e| MultipartError::Parse(e.to_string()))?;
258 form.fields.push((field_name, value));
259 }
260 }
261
262 Ok(form)
263}
264
265/// Parse a `multipart/form-data` body, store its file parts, and return a
266/// flat `Vec<(String, String)>` of every field — text values plus the
267/// storage key of each uploaded file.
268///
269/// This is the upload entry point a handler calls instead of
270/// `serde_urlencoded::from_str::<Vec<(String, String)>>` when the body is
271/// multipart: the return shape is identical, so the rest of the form
272/// pipeline doesn't care which encoding arrived.
273///
274/// Each non-empty [`FilePart`] is stored via the ambient [`Storage`]
275/// backend and contributes one `(field_name, stored_key)` pair, using the
276/// part's `filename` (falling back to the field name) and its
277/// `content_type` (falling back to `application/octet-stream`).
278///
279/// ## Empty file parts are skipped — "keep current file on edit"
280///
281/// When a user edits a record with a file field but does *not* choose a new
282/// file, the browser still sends the file part — with an empty body. Such a
283/// part is **skipped entirely**: no pair is emitted for it. This is
284/// deliberate. Emitting `(field, "")` would overwrite the stored key with
285/// an empty string and lose the existing file; omitting the pair leaves the
286/// current value untouched downstream.
287///
288/// # Errors
289///
290/// - [`MultipartUploadError::Multipart`] on a parse failure.
291/// - [`MultipartUploadError::NoStorageBackend`] if a file needs storing but
292/// no backend is registered (returned, never panicked).
293/// - [`MultipartUploadError::Storage`] if the backend's `store` fails.
294///
295/// [`Storage`]: crate::storage::Storage
296pub async fn parse_and_store_multipart(
297 content_type_header: &str,
298 body: impl Into<bytes::Bytes>,
299) -> Result<Vec<(String, String)>, MultipartUploadError> {
300 let form = parse_multipart(content_type_header, body).await?;
301
302 let mut pairs: Vec<(String, String)> = Vec::new();
303
304 for file in &form.files {
305 // Skip empty file parts: the user submitted the edit form without
306 // choosing a new file, so leave the existing stored value alone.
307 if file.bytes.is_empty() {
308 continue;
309 }
310
311 // Resolve the backend lazily and only when a file actually needs
312 // storing, so a multipart POST with no file (or only empty parts)
313 // never trips on a missing backend.
314 let backend =
315 crate::storage::storage_opt().ok_or(MultipartUploadError::NoStorageBackend)?;
316
317 let filename = file
318 .filename
319 .as_deref()
320 .filter(|s| !s.is_empty())
321 .unwrap_or(&file.field_name);
322 let content_type = file
323 .content_type
324 .as_deref()
325 .unwrap_or("application/octet-stream");
326
327 let stored = backend.store(filename, content_type, &file.bytes).await?;
328 pairs.push((file.field_name.clone(), stored.key));
329 }
330
331 // Text fields always pass through, after the file keys.
332 pairs.extend(form.fields);
333
334 Ok(pairs)
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 const BOUNDARY: &str = "X-UMBRAL-BOUNDARY";
342
343 /// One part spec for [`build_body`]: `(name, filename, content_type,
344 /// value)`. A `None` filename means a text field; `Some` means a file.
345 type PartSpec<'a> = (&'a str, Option<&'a str>, Option<&'a str>, &'a [u8]);
346
347 /// Build a real `multipart/form-data` body from part specs. A `None`
348 /// filename emits a plain text field; `Some(name)` emits a file part
349 /// with a `Content-Type` line.
350 fn build_body(parts: &[PartSpec<'_>]) -> Vec<u8> {
351 let mut out = Vec::new();
352 for (name, filename, content_type, value) in parts {
353 out.extend_from_slice(format!("--{BOUNDARY}\r\n").as_bytes());
354 match filename {
355 Some(fname) => {
356 out.extend_from_slice(
357 format!(
358 "Content-Disposition: form-data; name=\"{name}\"; filename=\"{fname}\"\r\n"
359 )
360 .as_bytes(),
361 );
362 if let Some(ct) = content_type {
363 out.extend_from_slice(format!("Content-Type: {ct}\r\n").as_bytes());
364 }
365 }
366 None => {
367 out.extend_from_slice(
368 format!("Content-Disposition: form-data; name=\"{name}\"\r\n").as_bytes(),
369 );
370 }
371 }
372 out.extend_from_slice(b"\r\n");
373 out.extend_from_slice(value);
374 out.extend_from_slice(b"\r\n");
375 }
376 out.extend_from_slice(format!("--{BOUNDARY}--\r\n").as_bytes());
377 out
378 }
379
380 fn ct_header() -> String {
381 format!("multipart/form-data; boundary={BOUNDARY}")
382 }
383
384 #[test]
385 fn is_multipart_matches_form_data_content_types() {
386 assert!(is_multipart("multipart/form-data; boundary=abc"));
387 assert!(is_multipart("multipart/form-data"));
388 assert!(is_multipart(" Multipart/Form-Data; boundary=Z")); // case + leading ws
389 assert!(!is_multipart("application/x-www-form-urlencoded"));
390 assert!(!is_multipart("application/json"));
391 assert!(!is_multipart("multipart/mixed; boundary=abc"));
392 }
393
394 #[tokio::test]
395 async fn parse_separates_text_and_file_parts() {
396 let png = b"\x89PNG\r\n\x1a\nfake-image-bytes";
397 let body = build_body(&[
398 ("title", None, None, b"Hello"),
399 ("cover", Some("p.png"), Some("image/png"), png),
400 ]);
401
402 let form = parse_multipart(&ct_header(), body).await.unwrap();
403
404 assert_eq!(
405 form.fields,
406 vec![("title".to_string(), "Hello".to_string())]
407 );
408 assert_eq!(form.files.len(), 1);
409 let file = &form.files[0];
410 assert_eq!(file.field_name, "cover");
411 assert_eq!(file.filename.as_deref(), Some("p.png"));
412 assert_eq!(file.content_type.as_deref(), Some("image/png"));
413 assert_eq!(file.bytes, png);
414 }
415
416 #[tokio::test]
417 async fn parse_preserves_repeated_text_field_names() {
418 let body = build_body(&[
419 ("tags", None, None, b"red"),
420 ("tags", None, None, b"blue"),
421 ("name", None, None, b"shirt"),
422 ]);
423
424 let form = parse_multipart(&ct_header(), body).await.unwrap();
425
426 // Both `tags` survive, in order — M2M / multi-select correctness.
427 assert_eq!(
428 form.fields,
429 vec![
430 ("tags".to_string(), "red".to_string()),
431 ("tags".to_string(), "blue".to_string()),
432 ("name".to_string(), "shirt".to_string()),
433 ]
434 );
435 // field() is last-wins.
436 assert_eq!(form.field("tags"), Some("blue"));
437 assert_eq!(form.field("name"), Some("shirt"));
438 assert_eq!(form.field("missing"), None);
439 // iter_fields yields every entry including the repeat.
440 assert_eq!(form.iter_fields().filter(|(k, _)| *k == "tags").count(), 2);
441 }
442
443 #[tokio::test]
444 async fn parse_keeps_binary_bytes_intact() {
445 // Non-UTF8 bytes: 0xFF / 0x80 are invalid UTF-8 and must not be
446 // decoded. (Built at runtime, not a const literal — clippy
447 // const-folds a literal `from_utf8` and warns it always errors.)
448 let raw: Vec<u8> = vec![0x00, 0xFF, 0xFE, 0x80, 0x01, 0x7F];
449 assert!(std::str::from_utf8(&raw).is_err());
450 let body = build_body(&[(
451 "blob",
452 Some("data.bin"),
453 Some("application/octet-stream"),
454 &raw,
455 )]);
456
457 let form = parse_multipart(&ct_header(), body).await.unwrap();
458
459 assert_eq!(form.files.len(), 1);
460 assert_eq!(form.files[0].bytes, raw, "raw bytes must round-trip");
461 }
462
463 #[tokio::test]
464 async fn parse_errors_on_missing_boundary() {
465 let body = build_body(&[("title", None, None, b"Hi")]);
466 let err = parse_multipart("multipart/form-data", body)
467 .await
468 .unwrap_err();
469 assert!(matches!(err, MultipartError::MissingBoundary));
470 }
471}