use crate::content_disposition::ContentDisposition;
use crate::content_type::ContentType;
use crate::validate::is_token_char;
use core::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MultipartError {
Empty,
InvalidBoundary,
InvalidHeader,
InvalidPart,
Incomplete,
MissingContentDisposition,
InvalidContentDisposition,
MissingName,
}
impl fmt::Display for MultipartError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MultipartError::Empty => write!(f, "empty multipart body"),
MultipartError::InvalidBoundary => write!(f, "invalid boundary"),
MultipartError::InvalidHeader => write!(f, "invalid part header"),
MultipartError::InvalidPart => write!(f, "invalid part"),
MultipartError::Incomplete => write!(f, "incomplete multipart data"),
MultipartError::MissingContentDisposition => {
write!(
f,
"missing Content-Disposition header (RFC 7578 Section 4.2)"
)
}
MultipartError::InvalidContentDisposition => {
write!(
f,
"Content-Disposition type must be form-data (RFC 7578 Section 4.2)"
)
}
MultipartError::MissingName => {
write!(
f,
"Content-Disposition must contain name parameter (RFC 7578 Section 4.2)"
)
}
}
}
}
impl std::error::Error for MultipartError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Part {
content_disposition: Option<ContentDisposition>,
content_type: Option<ContentType>,
headers: Vec<(String, String)>,
body: Vec<u8>,
}
impl Part {
pub fn new(name: &str) -> Self {
Part {
content_disposition: Some(
ContentDisposition::new(crate::content_disposition::DispositionType::FormData)
.with_name(name),
),
content_type: None,
headers: Vec::new(),
body: Vec::new(),
}
}
pub fn file(name: &str, filename: &str, content_type: &str) -> Self {
let ct = ContentType::parse(content_type).ok();
Part {
content_disposition: Some(
ContentDisposition::new(crate::content_disposition::DispositionType::FormData)
.with_name(name)
.with_filename(filename),
),
content_type: ct,
headers: Vec::new(),
body: Vec::new(),
}
}
pub fn with_body(mut self, body: &[u8]) -> Self {
self.body = body.to_vec();
self
}
pub fn with_content_type(mut self, content_type: ContentType) -> Self {
self.content_type = Some(content_type);
self
}
pub fn name(&self) -> Option<&str> {
self.content_disposition.as_ref()?.name()
}
pub fn filename(&self) -> Option<&str> {
self.content_disposition.as_ref()?.filename()
}
pub fn content_disposition(&self) -> Option<&ContentDisposition> {
self.content_disposition.as_ref()
}
pub fn content_type(&self) -> Option<&ContentType> {
self.content_type.as_ref()
}
pub fn headers(&self) -> &[(String, String)] {
&self.headers
}
pub fn body(&self) -> &[u8] {
&self.body
}
pub fn body_str(&self) -> Option<&str> {
std::str::from_utf8(&self.body).ok()
}
pub fn is_file(&self) -> bool {
self.filename().is_some()
}
}
#[derive(Debug, Clone)]
pub struct MultipartParser {
boundary: String,
buffer: Vec<u8>,
state: ParserState,
finished: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum ParserState {
Initial,
InPart,
Finished,
}
fn is_valid_boundary_char(b: u8) -> bool {
matches!(b,
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' |
b'\'' | b'(' | b')' | b'+' | b'_' | b',' | b'-' | b'.' |
b'/' | b':' | b'=' | b'?' | b' '
)
}
fn is_valid_boundary(boundary: &str) -> bool {
let bytes = boundary.as_bytes();
if bytes.is_empty() || bytes.len() > 70 {
return false;
}
if !bytes.iter().all(|&b| is_valid_boundary_char(b)) {
return false;
}
bytes.last() != Some(&b' ')
}
impl MultipartParser {
pub fn new(boundary: &str) -> Self {
MultipartParser {
boundary: boundary.to_string(),
buffer: Vec::new(),
state: ParserState::Initial,
finished: false,
}
}
pub fn try_new(boundary: &str) -> Result<Self, MultipartError> {
if !is_valid_boundary(boundary) {
return Err(MultipartError::InvalidBoundary);
}
Ok(Self::new(boundary))
}
pub fn feed(&mut self, data: &[u8]) {
self.buffer.extend_from_slice(data);
}
pub fn is_finished(&self) -> bool {
self.finished
}
pub fn next_part(&mut self) -> Result<Option<Part>, MultipartError> {
if self.finished {
return Ok(None);
}
let delimiter = format!("--{}", self.boundary);
loop {
match self.state {
ParserState::Initial => {
if let Some(pos) = find_bytes(&self.buffer, delimiter.as_bytes()) {
let after_delim = pos + delimiter.len();
if self.buffer.len() > after_delim + 2 {
if &self.buffer[after_delim..after_delim + 2] == b"\r\n" {
self.buffer = self.buffer[after_delim + 2..].to_vec();
self.state = ParserState::InPart;
} else if &self.buffer[after_delim..after_delim + 2] == b"--" {
self.state = ParserState::Finished;
self.finished = true;
return Ok(None);
} else {
self.buffer = self.buffer[after_delim..].to_vec();
if self.buffer.starts_with(b"\r\n") {
self.buffer = self.buffer[2..].to_vec();
}
self.state = ParserState::InPart;
}
} else {
return Err(MultipartError::Incomplete);
}
} else {
return Err(MultipartError::Incomplete);
}
}
ParserState::InPart => {
if let Some(header_end) = find_bytes(&self.buffer, b"\r\n\r\n") {
let header_bytes = &self.buffer[..header_end];
let body_start = header_end + 4;
let headers_str = std::str::from_utf8(header_bytes)
.map_err(|_| MultipartError::InvalidHeader)?;
let mut content_disposition = None;
let mut content_type = None;
let mut headers = Vec::new();
for line in headers_str.split("\r\n") {
if line.is_empty() {
continue;
}
if let Some((name, value)) = line.split_once(':') {
let name = name.trim();
let value = value.trim();
if name.eq_ignore_ascii_case("Content-Disposition") {
content_disposition = ContentDisposition::parse(value).ok();
} else if name.eq_ignore_ascii_case("Content-Type") {
content_type = ContentType::parse(value).ok();
} else {
headers.push((name.to_string(), value.to_string()));
}
}
}
let content_disposition =
content_disposition.ok_or(MultipartError::MissingContentDisposition)?;
if !content_disposition.is_form_data() {
return Err(MultipartError::InvalidContentDisposition);
}
if content_disposition.name().is_none() {
return Err(MultipartError::MissingName);
}
let body_buffer = &self.buffer[body_start..];
let next_delim = format!("\r\n--{}", self.boundary);
if let Some(body_end) = find_bytes(body_buffer, next_delim.as_bytes()) {
let body = body_buffer[..body_end].to_vec();
let after_next = body_start + body_end + next_delim.len();
if self.buffer.len() >= after_next + 2 {
if &self.buffer[after_next..after_next + 2] == b"--" {
self.finished = true;
self.state = ParserState::Finished;
} else if &self.buffer[after_next..after_next + 2] == b"\r\n" {
self.buffer = self.buffer[after_next + 2..].to_vec();
} else {
self.buffer = self.buffer[after_next..].to_vec();
}
} else {
self.buffer = self.buffer[after_next..].to_vec();
}
return Ok(Some(Part {
content_disposition: Some(content_disposition),
content_type,
headers,
body,
}));
} else {
return Err(MultipartError::Incomplete);
}
} else {
return Err(MultipartError::Incomplete);
}
}
ParserState::Finished => {
return Ok(None);
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct MultipartBuilder {
boundary: String,
parts: Vec<Part>,
}
impl MultipartBuilder {
pub fn new(random_value: u64) -> Self {
let boundary = format!("----FormBoundary{}", random_value);
MultipartBuilder {
boundary,
parts: Vec::new(),
}
}
pub fn with_boundary(boundary: &str) -> Self {
MultipartBuilder {
boundary: boundary.to_string(),
parts: Vec::new(),
}
}
pub fn try_with_boundary(boundary: &str) -> Result<Self, MultipartError> {
if !is_valid_boundary(boundary) {
return Err(MultipartError::InvalidBoundary);
}
Ok(Self::with_boundary(boundary))
}
pub fn boundary(&self) -> &str {
&self.boundary
}
pub fn content_type(&self) -> String {
if self.boundary.bytes().all(is_token_char) {
format!("multipart/form-data; boundary={}", self.boundary)
} else {
format!("multipart/form-data; boundary=\"{}\"", self.boundary)
}
}
pub fn text_field(mut self, name: &str, value: &str) -> Self {
let part = Part::new(name).with_body(value.as_bytes());
self.parts.push(part);
self
}
pub fn file_field(
mut self,
name: &str,
filename: &str,
content_type: &str,
data: &[u8],
) -> Self {
let part = Part::file(name, filename, content_type).with_body(data);
self.parts.push(part);
self
}
pub fn part(mut self, part: Part) -> Self {
self.parts.push(part);
self
}
pub fn build(&self) -> Vec<u8> {
let mut result = Vec::new();
for part in &self.parts {
result.extend_from_slice(b"--");
result.extend_from_slice(self.boundary.as_bytes());
result.extend_from_slice(b"\r\n");
if let Some(cd) = &part.content_disposition {
result.extend_from_slice(b"Content-Disposition: ");
result.extend_from_slice(cd.to_string().as_bytes());
result.extend_from_slice(b"\r\n");
}
if let Some(ct) = &part.content_type {
result.extend_from_slice(b"Content-Type: ");
result.extend_from_slice(ct.to_string().as_bytes());
result.extend_from_slice(b"\r\n");
}
for (name, value) in &part.headers {
result.extend_from_slice(name.as_bytes());
result.extend_from_slice(b": ");
result.extend_from_slice(value.as_bytes());
result.extend_from_slice(b"\r\n");
}
result.extend_from_slice(b"\r\n");
result.extend_from_slice(&part.body);
result.extend_from_slice(b"\r\n");
}
result.extend_from_slice(b"--");
result.extend_from_slice(self.boundary.as_bytes());
result.extend_from_slice(b"--\r\n");
result
}
}
fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() {
return Some(0);
}
if needle.len() > haystack.len() {
return None;
}
haystack
.windows(needle.len())
.position(|window| window == needle)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple() {
let boundary = "----WebKitFormBoundary";
let body = b"------WebKitFormBoundary\r\n\
Content-Disposition: form-data; name=\"field1\"\r\n\r\n\
value1\r\n\
------WebKitFormBoundary--\r\n";
let mut parser = MultipartParser::new(boundary);
parser.feed(body);
let part = parser.next_part().unwrap().unwrap();
assert_eq!(part.name(), Some("field1"));
assert_eq!(part.body_str(), Some("value1"));
assert!(parser.next_part().unwrap().is_none());
}
#[test]
fn test_parse_multiple_parts() {
let boundary = "boundary";
let body = b"--boundary\r\n\
Content-Disposition: form-data; name=\"field1\"\r\n\r\n\
value1\r\n\
--boundary\r\n\
Content-Disposition: form-data; name=\"field2\"\r\n\r\n\
value2\r\n\
--boundary--\r\n";
let mut parser = MultipartParser::new(boundary);
parser.feed(body);
let part1 = parser.next_part().unwrap().unwrap();
assert_eq!(part1.name(), Some("field1"));
assert_eq!(part1.body_str(), Some("value1"));
let part2 = parser.next_part().unwrap().unwrap();
assert_eq!(part2.name(), Some("field2"));
assert_eq!(part2.body_str(), Some("value2"));
assert!(parser.next_part().unwrap().is_none());
}
#[test]
fn test_parse_with_file() {
let boundary = "boundary";
let body = b"--boundary\r\n\
Content-Disposition: form-data; name=\"file\"; filename=\"test.txt\"\r\n\
Content-Type: text/plain\r\n\r\n\
file content\r\n\
--boundary--\r\n";
let mut parser = MultipartParser::new(boundary);
parser.feed(body);
let part = parser.next_part().unwrap().unwrap();
assert_eq!(part.name(), Some("file"));
assert_eq!(part.filename(), Some("test.txt"));
assert!(part.is_file());
assert_eq!(part.body_str(), Some("file content"));
assert!(part.content_type().is_some());
}
#[test]
fn test_builder_simple() {
let body = MultipartBuilder::with_boundary("boundary")
.text_field("field1", "value1")
.build();
let expected = b"--boundary\r\n\
Content-Disposition: form-data; name=\"field1\"\r\n\r\n\
value1\r\n\
--boundary--\r\n";
assert_eq!(body, expected);
}
#[test]
fn test_builder_with_file() {
let body = MultipartBuilder::with_boundary("boundary")
.file_field("file", "test.txt", "text/plain", b"content")
.build();
let body_str = String::from_utf8_lossy(&body);
assert!(
body_str
.contains("Content-Disposition: form-data; name=\"file\"; filename=\"test.txt\"")
);
assert!(body_str.contains("Content-Type: text/plain"));
assert!(body_str.contains("content"));
}
#[test]
fn test_roundtrip() {
let original_body = MultipartBuilder::with_boundary("test-boundary")
.text_field("name", "John")
.text_field("age", "30")
.file_field("photo", "photo.jpg", "image/jpeg", b"\xFF\xD8\xFF\xE0")
.build();
let mut parser = MultipartParser::new("test-boundary");
parser.feed(&original_body);
let part1 = parser.next_part().unwrap().unwrap();
assert_eq!(part1.name(), Some("name"));
assert_eq!(part1.body_str(), Some("John"));
let part2 = parser.next_part().unwrap().unwrap();
assert_eq!(part2.name(), Some("age"));
assert_eq!(part2.body_str(), Some("30"));
let part3 = parser.next_part().unwrap().unwrap();
assert_eq!(part3.name(), Some("photo"));
assert_eq!(part3.filename(), Some("photo.jpg"));
assert_eq!(part3.body(), b"\xFF\xD8\xFF\xE0");
assert!(parser.next_part().unwrap().is_none());
}
#[test]
fn test_content_type() {
let builder = MultipartBuilder::with_boundary("abc123");
assert_eq!(
builder.content_type(),
"multipart/form-data; boundary=abc123"
);
}
#[test]
fn test_part_new() {
let part = Part::new("field").with_body(b"value");
assert_eq!(part.name(), Some("field"));
assert_eq!(part.body(), b"value");
assert!(!part.is_file());
}
#[test]
fn test_part_file() {
let part = Part::file("upload", "file.txt", "text/plain").with_body(b"data");
assert_eq!(part.name(), Some("upload"));
assert_eq!(part.filename(), Some("file.txt"));
assert!(part.is_file());
assert!(part.content_type().is_some());
}
#[test]
fn test_find_bytes() {
assert_eq!(find_bytes(b"hello world", b"world"), Some(6));
assert_eq!(find_bytes(b"hello", b"x"), None);
assert_eq!(find_bytes(b"hello", b""), Some(0));
assert_eq!(find_bytes(b"", b"x"), None);
}
#[test]
fn test_binary_content() {
let binary_data = vec![0x00, 0xFF, 0x10, 0x20];
let body = MultipartBuilder::with_boundary("boundary")
.file_field(
"data",
"binary.bin",
"application/octet-stream",
&binary_data,
)
.build();
let mut parser = MultipartParser::new("boundary");
parser.feed(&body);
let part = parser.next_part().unwrap().unwrap();
assert_eq!(part.body(), &binary_data);
}
#[test]
fn test_try_new_valid_boundary() {
assert!(MultipartParser::try_new("simple").is_ok());
assert!(MultipartParser::try_new("a-b_c.d").is_ok());
assert!(MultipartParser::try_new("with space").is_ok());
assert!(MultipartParser::try_new("----WebKitFormBoundary").is_ok());
}
#[test]
fn test_try_new_invalid_boundary() {
assert!(MultipartParser::try_new("").is_err());
assert!(MultipartParser::try_new(&"a".repeat(71)).is_err());
assert!(MultipartParser::try_new("boundary ").is_err());
assert!(MultipartParser::try_new("bound\x00ary").is_err());
assert!(MultipartParser::try_new("bound*ary").is_err());
}
#[test]
fn test_builder_try_with_boundary() {
assert!(MultipartBuilder::try_with_boundary("valid-boundary").is_ok());
assert!(MultipartBuilder::try_with_boundary("").is_err());
}
}