#[cfg(test)]
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub(crate) struct MultipartPart {
pub name: String,
pub filename: Option<String>,
#[allow(dead_code)]
pub content_type: String,
pub body: Vec<u8>,
}
#[derive(Debug, Default)]
pub(crate) struct MultipartForm {
pub parts: Vec<MultipartPart>,
}
impl MultipartForm {
#[cfg(test)]
pub fn text_fields(&self) -> HashMap<String, String> {
let mut out = HashMap::with_capacity(self.parts.len());
for p in &self.parts {
if p.filename.is_none() {
if let Ok(s) = std::str::from_utf8(&p.body) {
out.insert(p.name.clone(), s.to_string());
}
}
}
out
}
}
#[derive(Debug)]
pub(crate) enum MultipartError {
MissingBoundary,
NoOpeningBoundary,
PartHeadersTooLong,
MalformedPart(String),
}
impl std::fmt::Display for MultipartError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingBoundary => write!(f, "Content-Type missing boundary parameter"),
Self::NoOpeningBoundary => write!(f, "body does not start with the declared boundary"),
Self::PartHeadersTooLong => write!(f, "part headers exceeded cap"),
Self::MalformedPart(msg) => write!(f, "malformed part: {msg}"),
}
}
}
impl std::error::Error for MultipartError {}
pub(crate) fn boundary_from_content_type(ct: &str) -> Option<String> {
let lower = ct.to_ascii_lowercase();
if !lower.starts_with("multipart/form-data") {
return None;
}
for piece in ct.split(';').skip(1) {
let piece = piece.trim();
if let Some(rest) = piece.strip_prefix("boundary=") {
let b = rest.trim().trim_matches('"').to_string();
if !b.is_empty() {
return Some(b);
}
}
}
None
}
const HEADERS_MAX: usize = 16 * 1024;
pub(crate) fn parse_multipart(
body: &[u8],
boundary: &str,
) -> Result<MultipartForm, MultipartError> {
if boundary.is_empty() {
return Err(MultipartError::MissingBoundary);
}
let delim = format!("--{boundary}");
let delim_bytes = delim.as_bytes();
let mut cursor =
find_subsequence(body, delim_bytes).ok_or(MultipartError::NoOpeningBoundary)?;
cursor += delim_bytes.len();
let mut parts: Vec<MultipartPart> = Vec::new();
loop {
if body.get(cursor..cursor + 2) == Some(b"--") {
break;
}
if body.get(cursor..cursor + 2) == Some(b"\r\n") {
cursor += 2;
} else if body.get(cursor..cursor + 1) == Some(b"\n") {
cursor += 1;
} else if cursor >= body.len() {
break;
} else {
return Err(MultipartError::MalformedPart(format!(
"expected CRLF after boundary at byte {cursor}"
)));
}
let headers_end_rel = find_subsequence(&body[cursor..], b"\r\n\r\n")
.or_else(|| find_subsequence(&body[cursor..], b"\n\n"))
.ok_or_else(|| MultipartError::MalformedPart("part headers unterminated".into()))?;
if headers_end_rel > HEADERS_MAX {
return Err(MultipartError::PartHeadersTooLong);
}
let header_block = &body[cursor..cursor + headers_end_rel];
let header_terminator_len = if body
.get(cursor + headers_end_rel..cursor + headers_end_rel + 4)
== Some(b"\r\n\r\n")
{
4
} else {
2
};
cursor += headers_end_rel + header_terminator_len;
let (name, filename, content_type) = parse_part_headers(header_block)?;
let next_boundary_rel = find_subsequence(&body[cursor..], delim_bytes)
.ok_or_else(|| MultipartError::MalformedPart("part body unterminated".into()))?;
let body_end = if next_boundary_rel >= 2
&& &body[cursor + next_boundary_rel - 2..cursor + next_boundary_rel] == b"\r\n"
{
next_boundary_rel - 2
} else if next_boundary_rel >= 1 && body[cursor + next_boundary_rel - 1] == b'\n' {
next_boundary_rel - 1
} else {
next_boundary_rel
};
let part_body = body[cursor..cursor + body_end].to_vec();
cursor += next_boundary_rel + delim_bytes.len();
parts.push(MultipartPart {
name,
filename,
content_type,
body: part_body,
});
}
Ok(MultipartForm { parts })
}
fn parse_part_headers(block: &[u8]) -> Result<(String, Option<String>, String), MultipartError> {
let text = std::str::from_utf8(block)
.map_err(|e| MultipartError::MalformedPart(format!("non-utf8 header: {e}")))?;
let mut name: Option<String> = None;
let mut filename: Option<String> = None;
let mut content_type = String::new();
for line in text.split("\r\n").flat_map(|l| l.split('\n')) {
let line = line.trim();
if line.is_empty() {
continue;
}
let lower = line.to_ascii_lowercase();
if let Some(rest) = lower.strip_prefix("content-disposition:") {
let raw = &line[line.find(':').unwrap_or(0) + 1..];
name = extract_quoted_param(raw, "name=").or(name);
filename = extract_quoted_param(raw, "filename=").or(filename);
let _ = rest;
} else if let Some(_rest) = lower.strip_prefix("content-type:") {
content_type = line[line.find(':').unwrap_or(0) + 1..].trim().to_string();
}
}
let name = name.ok_or_else(|| {
MultipartError::MalformedPart("part missing Content-Disposition name".into())
})?;
Ok((name, filename, content_type))
}
fn extract_quoted_param(haystack: &str, key: &str) -> Option<String> {
let needle_lower = key.to_ascii_lowercase();
let hay_lower = haystack.to_ascii_lowercase();
let start = hay_lower.find(&needle_lower)? + key.len();
let rest = &haystack[start..];
let value: String = if let Some(rest) = rest.strip_prefix('"') {
let end = rest.find('"')?;
rest[..end].to_string()
} else {
rest.chars()
.take_while(|c| *c != ';' && *c != '\r' && *c != '\n')
.collect::<String>()
.trim()
.to_string()
};
if value.is_empty() {
None
} else {
Some(value)
}
}
fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() || needle.len() > haystack.len() {
return None;
}
haystack
.windows(needle.len())
.position(|window| window == needle)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn boundary_extracts_simple() {
assert_eq!(
boundary_from_content_type("multipart/form-data; boundary=abc"),
Some("abc".into())
);
}
#[test]
fn boundary_extracts_quoted() {
assert_eq!(
boundary_from_content_type("multipart/form-data; boundary=\"a-b-c\""),
Some("a-b-c".into())
);
}
#[test]
fn boundary_rejects_non_multipart() {
assert_eq!(boundary_from_content_type("application/json"), None);
assert_eq!(
boundary_from_content_type("application/x-www-form-urlencoded"),
None
);
}
#[test]
fn boundary_case_insensitive_prefix() {
assert_eq!(
boundary_from_content_type("Multipart/Form-Data; boundary=ZZ"),
Some("ZZ".into())
);
}
#[test]
fn parse_simple_text_field() {
let body = b"--xxx\r\n\
Content-Disposition: form-data; name=\"title\"\r\n\r\n\
Hello world\r\n\
--xxx--\r\n";
let parsed = parse_multipart(body, "xxx").unwrap();
assert_eq!(parsed.parts.len(), 1);
assert_eq!(parsed.parts[0].name, "title");
assert!(parsed.parts[0].filename.is_none());
assert_eq!(parsed.parts[0].body, b"Hello world");
assert_eq!(parsed.text_fields().get("title").unwrap(), "Hello world");
}
#[test]
fn parse_file_part() {
let body = b"--xxx\r\n\
Content-Disposition: form-data; name=\"photo\"; filename=\"x.png\"\r\n\
Content-Type: image/png\r\n\r\n\
\x89PNG\r\n\
--xxx--\r\n";
let parsed = parse_multipart(body, "xxx").unwrap();
assert_eq!(parsed.parts.len(), 1);
let p = &parsed.parts[0];
assert_eq!(p.name, "photo");
assert_eq!(p.filename.as_deref(), Some("x.png"));
assert_eq!(p.content_type, "image/png");
assert_eq!(p.body, b"\x89PNG");
}
#[test]
fn parse_mixed_text_and_file() {
let body = b"--xxx\r\n\
Content-Disposition: form-data; name=\"title\"\r\n\r\n\
hello\r\n\
--xxx\r\n\
Content-Disposition: form-data; name=\"avatar\"; filename=\"a.txt\"\r\n\
Content-Type: text/plain\r\n\r\n\
inside\r\n\
--xxx--\r\n";
let parsed = parse_multipart(body, "xxx").unwrap();
assert_eq!(parsed.parts.len(), 2);
assert_eq!(parsed.parts[0].name, "title");
assert!(parsed.parts[0].filename.is_none());
assert_eq!(parsed.parts[0].body, b"hello");
assert_eq!(parsed.parts[1].name, "avatar");
assert_eq!(parsed.parts[1].filename.as_deref(), Some("a.txt"));
assert_eq!(parsed.parts[1].body, b"inside");
}
#[test]
fn parse_rejects_no_opening_boundary() {
let body = b"hello no boundary anywhere\r\n";
let err = parse_multipart(body, "xxx").unwrap_err();
assert!(matches!(err, MultipartError::NoOpeningBoundary));
}
#[test]
fn parse_rejects_unterminated_headers() {
let body = b"--xxx\r\nContent-Disposition: form-data; name=\"x\"";
let err = parse_multipart(body, "xxx").unwrap_err();
assert!(matches!(err, MultipartError::MalformedPart(_)));
}
#[test]
fn extract_param_handles_quotes_and_unquoted() {
assert_eq!(
extract_quoted_param(" form-data; name=\"x\"; filename=\"y.png\"", "name="),
Some("x".into())
);
assert_eq!(
extract_quoted_param(" form-data; name=plain; whatever", "name="),
Some("plain".into())
);
assert_eq!(extract_quoted_param(" something else", "name="), None);
}
}