Skip to main content

jerrycan_core/
multipart.rs

1//! `multipart/form-data` (RFC 7578). The parser half is a pure incremental
2//! state machine — fed chunks, drained as events, no IO — so the grammar is
3//! unit-testable at every chunk straddle and fuzzable in isolation
4//! (`fuzz/fuzz_targets/multipart_parse.rs`). The extractor half (Task 7)
5//! adapts it to the request body lanes.
6
7use crate::error::{Error, Result};
8use crate::extract::{BodyLane, FromRequest, RequestCtx, StreamLane, map_stream_error};
9use bytes::{Bytes, BytesMut};
10
11/// Part headers larger than this are rejected (413) — headers are
12/// attacker-controlled and have no legitimate reason to be large.
13pub(crate) const MAX_PART_HEADER_BYTES: usize = 8 * 1024;
14/// More parts than this is rejected (413) — a part-count bomb, not a form.
15pub(crate) const MAX_PARTS: usize = 256;
16
17#[derive(Debug, PartialEq, Eq)]
18pub(crate) struct PartMeta {
19    pub(crate) name: String,
20    pub(crate) filename: Option<String>,
21    pub(crate) content_type: Option<String>,
22}
23
24pub(crate) enum Event {
25    PartHeaders(PartMeta),
26    Data(Bytes),
27    EndOfPart,
28    Done,
29}
30
31#[derive(Debug)]
32pub(crate) enum ParseError {
33    Malformed(&'static str),
34    HeadersTooLarge,
35    TooManyParts,
36}
37
38enum State {
39    Preamble,
40    AfterBoundary,
41    Headers,
42    Data,
43    Done,
44}
45
46pub(crate) struct Parser {
47    /// The delimiter as it appears mid-stream: `\r\n--<boundary>`.
48    delimiter: Vec<u8>,
49    buf: BytesMut,
50    state: State,
51    parts: usize,
52    eof: bool,
53    /// How far into `buf` the Headers-state `\r\n\r\n` scan has already looked.
54    /// Persisted across `next_event` calls so a header block fed one byte at a
55    /// time is scanned once (O(n)) instead of re-scanned from 0 each feed
56    /// (O(n²)). Reset to 0 on every transition into Headers and on a hit.
57    header_scan_from: usize,
58}
59
60impl Parser {
61    pub(crate) fn new(boundary: &str) -> Self {
62        let mut delimiter = Vec::with_capacity(boundary.len() + 4);
63        delimiter.extend_from_slice(b"\r\n--");
64        delimiter.extend_from_slice(boundary.as_bytes());
65        Self {
66            delimiter,
67            buf: BytesMut::new(),
68            state: State::Preamble,
69            parts: 0,
70            eof: false,
71            header_scan_from: 0,
72        }
73    }
74
75    pub(crate) fn feed(&mut self, chunk: &[u8]) {
76        self.buf.extend_from_slice(chunk);
77    }
78
79    /// No more input will arrive. After this, `next_event` never returns
80    /// `Ok(None)` — every state resolves to an event or a truncation error.
81    pub(crate) fn finish(&mut self) {
82        self.eof = true;
83    }
84
85    /// The next parse event, or `Ok(None)` when more input is needed.
86    pub(crate) fn next_event(&mut self) -> std::result::Result<Option<Event>, ParseError> {
87        loop {
88            match self.state {
89                State::Done => return Ok(Some(Event::Done)),
90                State::Preamble => {
91                    // The FIRST boundary may sit at offset 0 without a leading CRLF.
92                    let bare = &self.delimiter[2..];
93                    if self.buf.len() >= bare.len() && self.buf[..bare.len()] == *bare {
94                        let _ = self.buf.split_to(bare.len());
95                        self.state = State::AfterBoundary;
96                        continue;
97                    }
98                    match find(&self.buf, &self.delimiter) {
99                        Some(i) => {
100                            let _ = self.buf.split_to(i + self.delimiter.len());
101                            self.state = State::AfterBoundary;
102                        }
103                        None => {
104                            if self.eof {
105                                return Err(ParseError::Malformed("no multipart boundary found"));
106                            }
107                            // Preamble is discardable; keep only a possible
108                            // delimiter prefix at the tail.
109                            let keep = (self.delimiter.len() - 1).min(self.buf.len());
110                            let cut = self.buf.len() - keep;
111                            let _ = self.buf.split_to(cut);
112                            return Ok(None);
113                        }
114                    }
115                }
116                State::AfterBoundary => {
117                    // Past `--boundary`: optional transport padding (SP/HT),
118                    // then CRLF (a part follows) or `--` (closing boundary).
119                    let mut i = 0;
120                    while i < self.buf.len() && (self.buf[i] == b' ' || self.buf[i] == b'\t') {
121                        i += 1;
122                    }
123                    // Cap the padding run BEFORE any need-more-data return so the
124                    // buffer cannot grow unboundedly while an attacker streams
125                    // spaces after a boundary — the only otherwise-unbounded state
126                    // (Preamble/Data are bounded by holdback, Headers by its cap).
127                    // Real transport padding is a handful of bytes; reusing the
128                    // header cap keeps the constant set minimal.
129                    if i > MAX_PART_HEADER_BYTES {
130                        return Err(ParseError::Malformed(
131                            "excessive padding after multipart boundary",
132                        ));
133                    }
134                    if self.buf.len() < i + 2 {
135                        if self.eof {
136                            return Err(ParseError::Malformed("truncated multipart boundary line"));
137                        }
138                        return Ok(None);
139                    }
140                    if &self.buf[i..i + 2] == b"--" {
141                        let _ = self.buf.split_to(i + 2);
142                        self.state = State::Done;
143                        continue;
144                    }
145                    if &self.buf[i..i + 2] == b"\r\n" {
146                        let _ = self.buf.split_to(i + 2);
147                        self.parts += 1;
148                        if self.parts > MAX_PARTS {
149                            return Err(ParseError::TooManyParts);
150                        }
151                        // Entering Headers with a fresh buffer window: start the
152                        // incremental `\r\n\r\n` scan from the beginning.
153                        self.header_scan_from = 0;
154                        self.state = State::Headers;
155                        continue;
156                    }
157                    return Err(ParseError::Malformed(
158                        "invalid bytes after multipart boundary",
159                    ));
160                }
161                State::Headers => {
162                    // Resume the `\r\n\r\n` scan from where the last call stopped.
163                    // A 4-byte terminator can only newly complete within the last
164                    // 3 bytes of previously scanned input plus the freshly fed
165                    // bytes, so back up 3 from the cursor. The buffer is never
166                    // consumed while in Headers (split_to happens only on the hit
167                    // that leaves this state), so cursor positions stay valid.
168                    let start = self.header_scan_from.saturating_sub(3);
169                    match find(&self.buf[start..], b"\r\n\r\n").map(|i| i + start) {
170                        Some(i) => {
171                            let block = self.buf.split_to(i + 4);
172                            let meta = parse_part_headers(&block[..i])?;
173                            // Reset for the next part's header block.
174                            self.header_scan_from = 0;
175                            self.state = State::Data;
176                            return Ok(Some(Event::PartHeaders(meta)));
177                        }
178                        None => {
179                            if self.buf.len() > MAX_PART_HEADER_BYTES {
180                                return Err(ParseError::HeadersTooLarge);
181                            }
182                            if self.eof {
183                                return Err(ParseError::Malformed(
184                                    "truncated multipart part headers",
185                                ));
186                            }
187                            // Everything up to here has been scanned; next call
188                            // resumes from the new tail.
189                            self.header_scan_from = self.buf.len();
190                            return Ok(None);
191                        }
192                    }
193                }
194                State::Data => match find(&self.buf, &self.delimiter) {
195                    Some(0) => {
196                        let _ = self.buf.split_to(self.delimiter.len());
197                        self.state = State::AfterBoundary;
198                        return Ok(Some(Event::EndOfPart));
199                    }
200                    Some(i) => {
201                        let data = self.buf.split_to(i).freeze();
202                        return Ok(Some(Event::Data(data)));
203                    }
204                    None => {
205                        // Emit all but a possible delimiter prefix (holdback).
206                        let keep = (self.delimiter.len() - 1).min(self.buf.len());
207                        let emit = self.buf.len() - keep;
208                        if emit > 0 {
209                            let data = self.buf.split_to(emit).freeze();
210                            return Ok(Some(Event::Data(data)));
211                        }
212                        if self.eof {
213                            return Err(ParseError::Malformed("truncated multipart body"));
214                        }
215                        return Ok(None);
216                    }
217                },
218            }
219        }
220    }
221}
222
223fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
224    if needle.is_empty() || haystack.len() < needle.len() {
225        return None;
226    }
227    haystack.windows(needle.len()).position(|w| w == needle)
228}
229
230fn parse_part_headers(block: &[u8]) -> std::result::Result<PartMeta, ParseError> {
231    let text = std::str::from_utf8(block)
232        .map_err(|_| ParseError::Malformed("part headers are not valid UTF-8"))?;
233    let mut name = None;
234    let mut filename = None;
235    let mut content_type = None;
236    for line in text.split("\r\n").filter(|l| !l.is_empty()) {
237        let Some((key, value)) = line.split_once(':') else {
238            return Err(ParseError::Malformed("malformed part header line"));
239        };
240        let key = key.trim().to_ascii_lowercase();
241        let value = value.trim();
242        if key == "content-disposition" {
243            for param in value.split(';').skip(1) {
244                let Some((k, v)) = param.split_once('=') else {
245                    continue;
246                };
247                match k.trim() {
248                    "name" => name = Some(unquote(v.trim())),
249                    "filename" => filename = Some(unquote(v.trim())),
250                    _ => {}
251                }
252            }
253        } else if key == "content-type" {
254            content_type = Some(value.to_string());
255        }
256    }
257    Ok(PartMeta {
258        name: name.ok_or(ParseError::Malformed("part is missing a form-data name"))?,
259        filename,
260        content_type,
261    })
262}
263
264/// RFC 2183 quoted-string: strip surrounding quotes, unescape `\"` and `\\`.
265/// Unquoted tokens pass through.
266fn unquote(v: &str) -> String {
267    match v.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
268        Some(q) => {
269            let mut out = String::with_capacity(q.len());
270            let mut chars = q.chars();
271            while let Some(c) = chars.next() {
272                if c == '\\' {
273                    if let Some(next) = chars.next() {
274                        out.push(next);
275                    }
276                } else {
277                    out.push(c);
278                }
279            }
280            out
281        }
282        None => v.to_string(),
283    }
284}
285
286/// Default per-part size cap for `Part::bytes`/`Part::text` (8 MiB). Override
287/// per request with [`Multipart::set_part_cap`]. Streamed `chunk()` reads are
288/// not capped by this — the route's cumulative `body_limit` governs them.
289pub(crate) const DEFAULT_PART_CAP: usize = 8 * 1024 * 1024;
290
291/// Streaming `multipart/form-data` extractor. Parts arrive in wire order and
292/// must be consumed sequentially; [`next_part`](Multipart::next_part) discards
293/// any unread remainder of the previous part. Requires
294/// `content-type: multipart/form-data` with a valid boundary — anything else is
295/// `415 JC0415`.
296///
297/// Single-consumer: the extractor takes ownership of the body, so extracting it
298/// twice in one handler is a programming error (500 on stream routes).
299pub struct Multipart {
300    parser: Parser,
301    source: Option<StreamLane>,
302    part_cap: usize,
303    in_part: bool,
304    done: bool,
305}
306
307impl FromRequest for Multipart {
308    async fn from_request(ctx: &mut RequestCtx) -> Result<Self> {
309        if ctx.is_task {
310            return Err(Error::task_context());
311        }
312        let content_type = ctx
313            .headers()
314            .get(http::header::CONTENT_TYPE)
315            .and_then(|v| v.to_str().ok())
316            .unwrap_or("");
317        let boundary =
318            boundary_from_content_type(content_type).ok_or_else(Error::unsupported_media_type)?;
319        let mut parser = Parser::new(&boundary);
320        let source = match &mut ctx.body {
321            BodyLane::Buffered(bytes) => {
322                parser.feed(bytes);
323                parser.finish();
324                None
325            }
326            BodyLane::Stream(slot) => Some(
327                slot.take()
328                    .ok_or_else(|| Error::internal("request body was already consumed"))?,
329            ),
330        };
331        Ok(Multipart {
332            parser,
333            source,
334            part_cap: DEFAULT_PART_CAP,
335            in_part: false,
336            done: false,
337        })
338    }
339}
340
341impl Multipart {
342    /// Build a buffered `Multipart` from an already-read body and its
343    /// `Content-Type` header value, or `None` if the content type is not
344    /// `multipart/form-data` with a valid boundary.
345    ///
346    /// The `Multipart` *extractor* (`FromRequest`) is the normal path and is
347    /// `415` on a non-multipart request. This constructor exists for handlers
348    /// that must accept *either* a multipart upload or some other content type
349    /// on the same route (e.g. a CSV `import` endpoint whose generated success
350    /// probe posts an empty JSON body): take `Headers` + [`RawBody`], then
351    /// `Multipart::from_buffered` when the type matches. The parsing is
352    /// identical to the extractor's buffered path.
353    ///
354    /// [`RawBody`]: crate::extract::RawBody
355    pub fn from_buffered(content_type: &str, body: impl Into<Bytes>) -> Option<Self> {
356        let boundary = boundary_from_content_type(content_type)?;
357        let mut parser = Parser::new(&boundary);
358        parser.feed(&body.into());
359        parser.finish();
360        Some(Multipart {
361            parser,
362            source: None,
363            part_cap: DEFAULT_PART_CAP,
364            in_part: false,
365            done: false,
366        })
367    }
368
369    /// Per-part byte cap enforced by [`Part::bytes`]/[`Part::text`]
370    /// (default 8 MiB).
371    pub fn set_part_cap(&mut self, bytes: usize) {
372        self.part_cap = bytes;
373    }
374
375    /// The next part, or `None` after the closing boundary. Any unread data of
376    /// the current part is discarded first.
377    pub async fn next_part(&mut self) -> Result<Option<Part<'_>>> {
378        if self.done {
379            return Ok(None);
380        }
381        while self.in_part {
382            match self.pull_event().await? {
383                Event::EndOfPart => self.in_part = false,
384                Event::Done => {
385                    self.done = true;
386                    return Ok(None);
387                }
388                Event::Data(_) => {}
389                Event::PartHeaders(_) => {
390                    return Err(Error::internal("multipart parser yielded headers mid-part"));
391                }
392            }
393        }
394        match self.pull_event().await? {
395            Event::PartHeaders(meta) => {
396                self.in_part = true;
397                Ok(Some(Part {
398                    multipart: self,
399                    meta,
400                }))
401            }
402            Event::Done => {
403                self.done = true;
404                Ok(None)
405            }
406            Event::Data(_) | Event::EndOfPart => Err(Error::internal(
407                "multipart parser yielded data outside a part",
408            )),
409        }
410    }
411
412    /// Drain the next parse event, feeding more body bytes from the stream lane
413    /// (or `finish`ing the parser at EOF) whenever the parser needs them.
414    async fn pull_event(&mut self) -> Result<Event> {
415        loop {
416            if let Some(event) = self.parser.next_event().map_err(map_parse_error)? {
417                return Ok(event);
418            }
419            match &mut self.source {
420                None => {
421                    return Err(Error::internal(
422                        "multipart parser stalled after end of input",
423                    ));
424                }
425                Some(stream) => {
426                    use http_body_util::BodyExt;
427                    match stream.frame().await {
428                        Some(Ok(frame)) => {
429                            if let Ok(data) = frame.into_data() {
430                                self.parser.feed(&data);
431                            }
432                        }
433                        Some(Err(e)) => return Err(map_stream_error(e)),
434                        None => {
435                            self.parser.finish();
436                            self.source = None;
437                        }
438                    }
439                }
440            }
441        }
442    }
443}
444
445/// One part of a multipart request, borrowed from the [`Multipart`] it came
446/// from (parts are sequential — finish one before asking for the next).
447pub struct Part<'m> {
448    multipart: &'m mut Multipart,
449    meta: PartMeta,
450}
451
452impl Part<'_> {
453    /// The `name` from `content-disposition` (always present — enforced).
454    pub fn name(&self) -> &str {
455        &self.meta.name
456    }
457    /// The `filename`, when the part is a file upload.
458    pub fn filename(&self) -> Option<&str> {
459        self.meta.filename.as_deref()
460    }
461    /// The part's own `content-type` header, when given.
462    pub fn content_type(&self) -> Option<&str> {
463        self.meta.content_type.as_deref()
464    }
465
466    /// The next chunk of this part's data, or `None` at the part's end.
467    /// Chunked reads are bounded by the route's cumulative `body_limit`, not
468    /// the per-part cap — use them to process big uploads without buffering.
469    pub async fn chunk(&mut self) -> Result<Option<Bytes>> {
470        if !self.multipart.in_part {
471            return Ok(None);
472        }
473        match self.multipart.pull_event().await? {
474            Event::Data(data) => Ok(Some(data)),
475            Event::EndOfPart => {
476                self.multipart.in_part = false;
477                Ok(None)
478            }
479            Event::Done => {
480                self.multipart.in_part = false;
481                self.multipart.done = true;
482                Ok(None)
483            }
484            Event::PartHeaders(_) => {
485                Err(Error::internal("multipart parser yielded headers mid-part"))
486            }
487        }
488    }
489
490    /// The whole part, buffered — capped at the per-part cap (413 beyond it).
491    pub async fn bytes(mut self) -> Result<Bytes> {
492        let cap = self.multipart.part_cap;
493        let mut out = BytesMut::new();
494        while let Some(chunk) = self.chunk().await? {
495            if out.len() + chunk.len() > cap {
496                return Err(Error::new(
497                    http::StatusCode::PAYLOAD_TOO_LARGE,
498                    "JC0413",
499                    format!("multipart part exceeds the per-part cap of {cap} bytes"),
500                ));
501            }
502            out.extend_from_slice(&chunk);
503        }
504        Ok(out.freeze())
505    }
506
507    /// The whole part as UTF-8 text (400 on invalid UTF-8).
508    pub async fn text(self) -> Result<String> {
509        let bytes = self.bytes().await?;
510        String::from_utf8(bytes.to_vec())
511            .map_err(|_| Error::bad_request("multipart part is not valid UTF-8"))
512    }
513}
514
515fn map_parse_error(e: ParseError) -> Error {
516    match e {
517        ParseError::Malformed(what) => {
518            Error::bad_request(format!("malformed multipart body: {what}"))
519        }
520        ParseError::HeadersTooLarge => Error::new(
521            http::StatusCode::PAYLOAD_TOO_LARGE,
522            "JC0413",
523            format!("multipart part headers exceed {MAX_PART_HEADER_BYTES} bytes"),
524        ),
525        ParseError::TooManyParts => Error::new(
526            http::StatusCode::PAYLOAD_TOO_LARGE,
527            "JC0413",
528            format!("more than {MAX_PARTS} multipart parts"),
529        ),
530    }
531}
532
533/// Extracts and validates the boundary from a `multipart/form-data`
534/// content type. RFC 2046 §5.1.1: 1–70 chars from a restricted set.
535fn boundary_from_content_type(value: &str) -> Option<String> {
536    let mut segments = value.split(';');
537    let media_type = segments.next()?.trim();
538    if !media_type.eq_ignore_ascii_case("multipart/form-data") {
539        return None;
540    }
541    for param in segments {
542        let Some((k, v)) = param.split_once('=') else {
543            continue;
544        };
545        if k.trim().eq_ignore_ascii_case("boundary") {
546            let v = v.trim();
547            let boundary = v
548                .strip_prefix('"')
549                .and_then(|s| s.strip_suffix('"'))
550                .unwrap_or(v);
551            let valid_char = |c: char| c.is_ascii_alphanumeric() || "'()+_,-./:=? ".contains(c);
552            if (1..=70).contains(&boundary.len())
553                && boundary.chars().all(valid_char)
554                && !boundary.ends_with(' ')
555            {
556                return Some(boundary.to_string());
557            }
558            return None;
559        }
560    }
561    None
562}
563
564/// Fuzzing hook: drives the parser over `input` split at `chunk` bytes until
565/// completion or error. Hidden — the fuzz crate is its only consumer.
566#[doc(hidden)]
567pub fn fuzz_drive(boundary: &str, input: &[u8], chunk: usize) {
568    let chunk = chunk.max(1);
569    let mut parser = Parser::new(boundary);
570    let mut feeds = input.chunks(chunk);
571    // The parser must terminate in events linear in the input size: every
572    // event either consumes bytes or is the terminal Done/Err. The budget
573    // asserts that — a fuzz-discovered livelock fails loudly here.
574    //
575    // Bound: worst case is chunk=1. Per fed byte the driver does at most two
576    // loop turns — an Ok(None) requesting the feed, then one Ok(Some(_)) that
577    // consumes >=1 byte (Data is holdback-bounded to >=1 byte; PartHeaders/
578    // EndOfPart consume their boundary/header bytes). Ok(None) turns total
579    // input.len()+1 (one per chunk + one finish); event turns total
580    // <=input.len() (each consumes >=1 byte). So ~3*input.len()+O(1) turns;
581    // input.len()*4 + 64 holds with margin and never underflows on empty input.
582    let mut budget = input.len() * 4 + 64;
583    loop {
584        match parser.next_event() {
585            Err(_) => return,
586            Ok(Some(Event::Done)) => return,
587            Ok(Some(_)) => {}
588            Ok(None) => match feeds.next() {
589                Some(c) => parser.feed(c),
590                None => parser.finish(),
591            },
592        }
593        budget -= 1;
594        assert!(budget > 0, "parser did not terminate in linear time");
595    }
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601
602    const BOUNDARY: &str = "XbOuNdArYx";
603
604    fn fixture() -> Vec<u8> {
605        let mut b = Vec::new();
606        b.extend_from_slice(b"--XbOuNdArYx\r\n");
607        b.extend_from_slice(b"content-disposition: form-data; name=\"title\"\r\n\r\n");
608        b.extend_from_slice(b"hello world\r\n");
609        b.extend_from_slice(b"--XbOuNdArYx\r\n");
610        b.extend_from_slice(
611            b"content-disposition: form-data; name=\"file\"; filename=\"a.csv\"\r\ncontent-type: text/csv\r\n\r\n",
612        );
613        b.extend_from_slice(b"col\r\n--not-a-boundary\r\nrow2\r\n"); // CRLF-- INSIDE data
614        b.extend_from_slice(b"\r\n--XbOuNdArYx--\r\n");
615        b
616    }
617
618    /// Drives the parser over `input` in `chunk`-byte steps.
619    fn run(input: &[u8], chunk: usize) -> (Vec<Vec<u8>>, Vec<PartMeta>) {
620        let mut p = Parser::new(BOUNDARY);
621        let mut feeds = input.chunks(chunk);
622        let mut datas: Vec<Vec<u8>> = Vec::new();
623        let mut metas = Vec::new();
624        loop {
625            match p.next_event().expect("valid fixture") {
626                Some(Event::PartHeaders(m)) => {
627                    metas.push(m);
628                    datas.push(Vec::new());
629                }
630                Some(Event::Data(d)) => datas.last_mut().unwrap().extend_from_slice(&d),
631                Some(Event::EndOfPart) => {}
632                Some(Event::Done) => return (datas, metas),
633                None => match feeds.next() {
634                    Some(c) => p.feed(c),
635                    None => p.finish(),
636                },
637            }
638        }
639    }
640
641    /// THE invariant: chunking must never change what is parsed. Every chunk
642    /// size from 1 byte up exercises every possible boundary straddle.
643    #[test]
644    fn every_chunking_yields_identical_parts() {
645        let input = fixture();
646        let (want_data, want_meta) = run(&input, input.len());
647        assert_eq!(want_data.len(), 2);
648        assert_eq!(want_data[0], b"hello world");
649        assert_eq!(
650            &want_data[1][..],
651            b"col\r\n--not-a-boundary\r\nrow2\r\n".as_slice()
652        );
653        assert_eq!(want_meta[1].filename.as_deref(), Some("a.csv"));
654        assert_eq!(want_meta[1].content_type.as_deref(), Some("text/csv"));
655        for chunk in 1..=input.len() {
656            let (data, meta) = run(&input, chunk);
657            assert_eq!(data, want_data, "chunk size {chunk}");
658            // Full meta equality (name/filename/content_type), not just length —
659            // chunking must not perturb any parsed header field at any straddle.
660            assert_eq!(meta, want_meta, "chunk size {chunk}");
661        }
662    }
663
664    #[test]
665    fn preamble_is_ignored_and_epilogue_is_ignored() {
666        let mut input = b"this is preamble junk\r\n".to_vec();
667        input.extend_from_slice(&fixture());
668        input.extend_from_slice(b"trailing epilogue junk");
669        let (data, _) = run(&input, 7);
670        assert_eq!(data.len(), 2);
671        assert_eq!(data[0], b"hello world");
672    }
673
674    #[test]
675    fn truncated_input_is_malformed_not_a_hang() {
676        let input = fixture();
677        for cut in [10, 40, input.len() - 5] {
678            let mut p = Parser::new(BOUNDARY);
679            p.feed(&input[..cut]);
680            p.finish();
681            let mut saw_err = false;
682            for _ in 0..1000 {
683                match p.next_event() {
684                    Err(_) => {
685                        saw_err = true;
686                        break;
687                    }
688                    Ok(Some(Event::Done)) => break,
689                    Ok(Some(_)) => {}
690                    Ok(None) => panic!("NeedMore after finish() at cut {cut}"),
691                }
692            }
693            assert!(saw_err, "cut {cut} must error (truncation), not complete");
694        }
695    }
696
697    /// Data-state truncation where the input ends mid-delimiter: the tail is a
698    /// PARTIAL delimiter (`\r\n--XbOuNdArY`, one byte short of the full token),
699    /// held back as a possible boundary prefix. On finish() this must surface as
700    /// truncation, never silent data loss (the held-back bytes dropped) or a hang.
701    #[test]
702    fn data_ending_mid_partial_delimiter_is_truncation() {
703        let mut input = Vec::new();
704        input.extend_from_slice(b"--XbOuNdArYx\r\n");
705        input.extend_from_slice(b"content-disposition: form-data; name=\"f\"\r\n\r\n");
706        input.extend_from_slice(b"payload");
707        // Full delimiter is "\r\n--XbOuNdArYx"; drop the final 'x' so the buffer
708        // ends one byte short of a boundary, all of it inside the holdback window.
709        input.extend_from_slice(b"\r\n--XbOuNdArY");
710        let mut p = Parser::new(BOUNDARY);
711        p.feed(&input);
712        p.finish();
713        // Drain any leading "payload" Data event, then require the truncation error.
714        let mut saw_err = None;
715        for _ in 0..1000 {
716            match p.next_event() {
717                Err(e) => {
718                    saw_err = Some(e);
719                    break;
720                }
721                Ok(Some(Event::Done)) => panic!("completed despite a truncated trailing delimiter"),
722                Ok(Some(_)) => {}
723                Ok(None) => panic!("NeedMore after finish()"),
724            }
725        }
726        assert!(
727            matches!(
728                saw_err,
729                Some(ParseError::Malformed("truncated multipart body"))
730            ),
731            "partial trailing delimiter must be truncated multipart body, got {saw_err:?}"
732        );
733    }
734
735    /// Important 1 regression: a boundary followed by a flood of SP padding must
736    /// be rejected before the buffer can grow past the cap, even while the
737    /// 2-byte CRLF/`--` discriminator is still pending (need-more-data path).
738    #[test]
739    fn padding_after_boundary_is_capped() {
740        let mut input = b"--XbOuNdArYx".to_vec();
741        input.extend_from_slice(&vec![b' '; 9 * 1024]);
742        let mut p = Parser::new(BOUNDARY);
743        p.feed(&input);
744        // No finish(): the cap must fire on the need-more-data path, not via eof.
745        assert!(matches!(
746            drive_to_error(&mut p),
747            ParseError::Malformed("excessive padding after multipart boundary")
748        ));
749    }
750
751    #[test]
752    fn header_block_over_cap_errors() {
753        let mut input = b"--XbOuNdArYx\r\ncontent-disposition: form-data; name=\"x".to_vec();
754        input.extend_from_slice(&vec![b'a'; MAX_PART_HEADER_BYTES + 1]);
755        let mut p = Parser::new(BOUNDARY);
756        p.feed(&input);
757        assert!(matches!(
758            drive_to_error(&mut p),
759            ParseError::HeadersTooLarge
760        ));
761    }
762
763    #[test]
764    fn part_count_over_cap_errors() {
765        let mut input = Vec::new();
766        for i in 0..=MAX_PARTS {
767            input.extend_from_slice(b"--XbOuNdArYx\r\n");
768            input.extend_from_slice(
769                format!("content-disposition: form-data; name=\"f{i}\"\r\n\r\nx\r\n").as_bytes(),
770            );
771        }
772        input.extend_from_slice(b"--XbOuNdArYx--");
773        let mut p = Parser::new(BOUNDARY);
774        p.feed(&input);
775        p.finish();
776        assert!(matches!(drive_to_error(&mut p), ParseError::TooManyParts));
777    }
778
779    #[test]
780    fn missing_name_is_malformed() {
781        let input = b"--XbOuNdArYx\r\ncontent-disposition: form-data\r\n\r\nx\r\n--XbOuNdArYx--";
782        let mut p = Parser::new(BOUNDARY);
783        p.feed(input);
784        p.finish();
785        assert!(matches!(drive_to_error(&mut p), ParseError::Malformed(_)));
786    }
787
788    #[test]
789    fn quoted_filenames_unescape() {
790        let input = b"--XbOuNdArYx\r\ncontent-disposition: form-data; name=\"f\"; filename=\"a \\\"b\\\".txt\"\r\n\r\nx\r\n--XbOuNdArYx--";
791        let mut p = Parser::new(BOUNDARY);
792        p.feed(input);
793        p.finish();
794        let meta = loop {
795            match p.next_event().unwrap() {
796                Some(Event::PartHeaders(m)) => break m,
797                Some(_) => {}
798                None => unreachable!(),
799            }
800        };
801        assert_eq!(meta.filename.as_deref(), Some("a \"b\".txt"));
802    }
803
804    /// RFC-degenerate edge: a value ending in an escaped quote (`filename="x\""`).
805    /// The naive strip-then-unescape leaves a dangling backslash; this proves the
806    /// parser neither panics nor loops, and produces a sane (lossless of `x`) result.
807    #[test]
808    fn filename_ending_in_escaped_quote_does_not_panic() {
809        let input = b"--XbOuNdArYx\r\ncontent-disposition: form-data; name=\"f\"; filename=\"x\\\"\"\r\n\r\nx\r\n--XbOuNdArYx--";
810        let mut p = Parser::new(BOUNDARY);
811        p.feed(input);
812        p.finish();
813        let meta = loop {
814            match p.next_event().unwrap() {
815                Some(Event::PartHeaders(m)) => break m,
816                Some(_) => {}
817                None => unreachable!(),
818            }
819        };
820        // Whatever the strip yields, it must contain the leading `x` and not panic.
821        assert!(meta.filename.as_deref().unwrap().starts_with('x'));
822    }
823
824    fn drive_to_error(p: &mut Parser) -> ParseError {
825        for _ in 0..100_000 {
826            match p.next_event() {
827                Err(e) => return e,
828                Ok(Some(Event::Done)) => panic!("completed without error"),
829                Ok(Some(_)) => {}
830                Ok(None) => panic!("NeedMore in drive_to_error"),
831            }
832        }
833        panic!("no error after 100k events");
834    }
835
836    // ----- Extractor tests ---------------------------------------------------
837
838    use crate::prelude::*;
839
840    const FORM_DATA_CT: &str = "multipart/form-data; boundary=XbOuNdArYx";
841
842    /// Collect every part's `(name, byte length)` — the shared handler body for
843    /// the buffered/stream parity tests.
844    async fn upload(mut mp: Multipart) -> Result<Json<Vec<(String, usize)>>> {
845        let mut out = Vec::new();
846        while let Some(part) = mp.next_part().await? {
847            let name = part.name().to_string();
848            let bytes = part.bytes().await?;
849            out.push((name, bytes.len()));
850        }
851        Ok(Json(out))
852    }
853
854    #[tokio::test]
855    async fn multipart_extracts_parts_on_a_stream_route() {
856        let t = App::new()
857            .route("/upload", post(upload).stream_body())
858            .into_test();
859        let res = t
860            .post_bytes_with("/upload", &fixture(), &[("content-type", FORM_DATA_CT)])
861            .await;
862        assert_eq!(res.status().as_u16(), 200, "body: {}", res.text());
863        // "hello world" is 11 bytes; the csv payload (with the in-data CRLF--)
864        // is 29 bytes. Stream framing must not perturb either.
865        assert_eq!(
866            res.json::<Vec<(String, usize)>>(),
867            vec![("title".to_string(), 11), ("file".to_string(), 29)]
868        );
869    }
870
871    #[tokio::test]
872    async fn multipart_works_on_buffered_routes_too() {
873        // Same handler, NO `.stream_body()`: the buffered lane feeds the parser
874        // upfront and must yield the identical parts.
875        let t = App::new().route("/upload", post(upload)).into_test();
876        let res = t
877            .post_bytes_with("/upload", &fixture(), &[("content-type", FORM_DATA_CT)])
878            .await;
879        assert_eq!(res.status().as_u16(), 200, "body: {}", res.text());
880        assert_eq!(
881            res.json::<Vec<(String, usize)>>(),
882            vec![("title".to_string(), 11), ("file".to_string(), 29)]
883        );
884    }
885
886    #[tokio::test]
887    async fn from_buffered_parses_multipart_and_returns_none_otherwise() {
888        // A multipart content type + body yields a working parser...
889        let mut mp = Multipart::from_buffered(FORM_DATA_CT, fixture()).expect("multipart");
890        let mut names = Vec::new();
891        while let Some(part) = mp.next_part().await.unwrap() {
892            names.push(part.name().to_string());
893        }
894        assert_eq!(names, vec!["title".to_string(), "file".to_string()]);
895        // ...and a non-multipart content type yields None (the caller falls back).
896        assert!(Multipart::from_buffered("application/json", b"{}".to_vec()).is_none());
897    }
898
899    #[tokio::test]
900    async fn wrong_content_type_is_415() {
901        let t = App::new().route("/upload", post(upload)).into_test();
902        // post_bytes defaults to application/octet-stream — not multipart.
903        let res = t.post_bytes("/upload", &fixture()).await;
904        assert_eq!(res.status().as_u16(), 415);
905        assert!(res.text().contains("JC0415"), "body: {}", res.text());
906    }
907
908    #[tokio::test]
909    async fn oversized_part_is_413_with_the_cap_message() {
910        async fn tiny_cap(mut mp: Multipart) -> Result<Json<usize>> {
911            mp.set_part_cap(16);
912            let mut count = 0;
913            while let Some(part) = mp.next_part().await? {
914                let _ = part.bytes().await?; // the second part (29 bytes) trips the cap
915                count += 1;
916            }
917            Ok(Json(count))
918        }
919        let t = App::new().route("/upload", post(tiny_cap)).into_test();
920        let res = t
921            .post_bytes_with("/upload", &fixture(), &[("content-type", FORM_DATA_CT)])
922            .await;
923        assert_eq!(res.status().as_u16(), 413, "body: {}", res.text());
924        assert!(res.text().contains("per-part"), "body: {}", res.text());
925    }
926
927    #[tokio::test]
928    async fn malformed_multipart_is_400() {
929        let t = App::new().route("/upload", post(upload)).into_test();
930        let body = b"--XbOuNdArYx\r\ngarbage-without-colon\r\n\r\n";
931        let res = t
932            .post_bytes_with("/upload", body, &[("content-type", FORM_DATA_CT)])
933            .await;
934        assert_eq!(res.status().as_u16(), 400, "body: {}", res.text());
935    }
936
937    #[tokio::test]
938    async fn next_part_discards_unread_remainder() {
939        // Read part 1's name but NOT its data; the next `next_part` must skip the
940        // unread remainder and still surface part 2 correctly.
941        async fn skip_first(mut mp: Multipart) -> Result<Json<Vec<String>>> {
942            let mut names = Vec::new();
943            if let Some(part) = mp.next_part().await? {
944                names.push(part.name().to_string());
945                // deliberately do not read part.bytes()/chunk()
946            }
947            while let Some(part) = mp.next_part().await? {
948                let name = part.name().to_string();
949                let data = part.bytes().await?;
950                names.push(format!("{name}:{}", data.len()));
951            }
952            Ok(Json(names))
953        }
954        let t = App::new()
955            .route("/upload", post(skip_first).stream_body())
956            .into_test();
957        let res = t
958            .post_bytes_with("/upload", &fixture(), &[("content-type", FORM_DATA_CT)])
959            .await;
960        assert_eq!(res.status().as_u16(), 200, "body: {}", res.text());
961        assert_eq!(
962            res.json::<Vec<String>>(),
963            vec!["title".to_string(), "file:29".to_string()]
964        );
965    }
966
967    #[tokio::test]
968    async fn chunked_reads_stream_without_part_cap() {
969        // A single part larger than a tiny per-part cap, read via `chunk()`:
970        // chunk() is governed by the route body_limit, NOT the per-part cap, so
971        // it must succeed even though `bytes()` would 413 at the same cap.
972        async fn stream_part(mut mp: Multipart) -> Result<Json<usize>> {
973            mp.set_part_cap(4); // far below the part's real size
974            let mut total = 0;
975            while let Some(mut part) = mp.next_part().await? {
976                while let Some(chunk) = part.chunk().await? {
977                    total += chunk.len();
978                }
979            }
980            Ok(Json(total))
981        }
982        // One part whose data is 200 bytes of 'z' — well over the 4-byte cap.
983        let payload = "z".repeat(200);
984        let mut body = Vec::new();
985        body.extend_from_slice(b"--XbOuNdArYx\r\n");
986        body.extend_from_slice(b"content-disposition: form-data; name=\"big\"\r\n\r\n");
987        body.extend_from_slice(payload.as_bytes());
988        body.extend_from_slice(b"\r\n--XbOuNdArYx--\r\n");
989        let t = App::new()
990            .route(
991                "/upload",
992                post(stream_part).stream_body().body_limit(64 * 1024),
993            )
994            .into_test();
995        let res = t
996            .post_bytes_with("/upload", &body, &[("content-type", FORM_DATA_CT)])
997            .await;
998        assert_eq!(res.status().as_u16(), 200, "body: {}", res.text());
999        assert_eq!(res.json::<usize>(), 200);
1000    }
1001
1002    #[tokio::test]
1003    async fn multipart_rejects_a_task_context_with_jc1003() {
1004        // HTTP-coupled extractor inside a task context must reject before reading
1005        // anything — mirrors the Headers/Json guard.
1006        use crate::dep::DepEnv;
1007        use crate::dep::DepResolver;
1008        use std::sync::Arc;
1009        let req = http::Request::builder()
1010            .uri("/")
1011            .header(http::header::CONTENT_TYPE, FORM_DATA_CT)
1012            .body(())
1013            .unwrap();
1014        let (parts, ()) = req.into_parts();
1015        let mut ctx = RequestCtx::new(
1016            parts,
1017            Bytes::new(),
1018            DepResolver::new(Arc::new(DepEnv::default()), Default::default()),
1019        );
1020        ctx.is_task = true;
1021        let err = Multipart::from_request(&mut ctx).await.err().unwrap();
1022        assert_eq!(err.code(), "JC1003");
1023        assert_eq!(err.status().as_u16(), 500);
1024    }
1025
1026    // ----- boundary_from_content_type unit tests -----------------------------
1027
1028    #[test]
1029    fn boundary_quoted_value_is_unquoted() {
1030        assert_eq!(
1031            boundary_from_content_type("multipart/form-data; boundary=\"abc123\"").as_deref(),
1032            Some("abc123")
1033        );
1034    }
1035
1036    #[test]
1037    fn boundary_over_70_chars_is_rejected() {
1038        let long = "x".repeat(71);
1039        let ct = format!("multipart/form-data; boundary={long}");
1040        assert_eq!(boundary_from_content_type(&ct), None);
1041        // Exactly 70 is the allowed maximum.
1042        let ok = "y".repeat(70);
1043        let ct = format!("multipart/form-data; boundary={ok}");
1044        assert_eq!(
1045            boundary_from_content_type(&ct).as_deref(),
1046            Some(ok.as_str())
1047        );
1048    }
1049
1050    #[test]
1051    fn boundary_empty_is_rejected() {
1052        // An empty boundary makes the parser grammar degenerate — must be None.
1053        assert_eq!(
1054            boundary_from_content_type("multipart/form-data; boundary="),
1055            None
1056        );
1057        assert_eq!(
1058            boundary_from_content_type("multipart/form-data; boundary=\"\""),
1059            None
1060        );
1061    }
1062
1063    #[test]
1064    fn boundary_media_type_is_case_insensitive() {
1065        assert_eq!(
1066            boundary_from_content_type("MULTIPART/FORM-DATA; BOUNDARY=x").as_deref(),
1067            Some("x")
1068        );
1069    }
1070
1071    #[test]
1072    fn boundary_missing_is_none() {
1073        assert_eq!(boundary_from_content_type("multipart/form-data"), None);
1074        // A non-multipart media type is also None (→ 415 at the call site).
1075        assert_eq!(
1076            boundary_from_content_type("application/json; boundary=x"),
1077            None
1078        );
1079    }
1080
1081    #[test]
1082    fn boundary_invalid_chars_are_rejected() {
1083        // `*` is outside the RFC 2046 §5.1.1 restricted set.
1084        assert_eq!(
1085            boundary_from_content_type("multipart/form-data; boundary=a*b"),
1086            None
1087        );
1088    }
1089
1090    #[test]
1091    fn boundary_trailing_space_is_rejected() {
1092        // A space is a valid bchars char but may not be the LAST char.
1093        assert_eq!(
1094            boundary_from_content_type("multipart/form-data; boundary=\"abc \""),
1095            None
1096        );
1097    }
1098}