use crate::error::Error;
pub(crate) struct WireMessage {
pub headers: Vec<(String, String)>,
pub body: Vec<u8>,
pub raw_headers: String,
pub size: u64,
pub headerless: bool,
}
pub(crate) fn parse_wire(raw: &[u8]) -> Result<WireMessage, Error> {
if raw.is_empty() {
return Err(Error::EmptyInput);
}
let size = raw.len() as u64;
let (header_bytes, body_bytes, headerless_top_level) = classify_top_level_message(raw);
let raw_headers = if headerless_top_level {
String::new()
} else {
String::from_utf8_lossy(header_bytes).into_owned()
};
let headers = if headerless_top_level {
Vec::new()
} else {
parse_headers(header_bytes)
};
Ok(WireMessage {
headers,
body: body_bytes.to_vec(),
raw_headers,
size,
headerless: headerless_top_level,
})
}
pub(super) fn split_header_body(raw: &[u8]) -> (&[u8], &[u8]) {
if raw.starts_with(b"\r\n") {
return (&[], &raw[2..]);
}
if raw.starts_with(b"\n") {
return (&[], &raw[1..]);
}
if raw.starts_with(b"\r") {
let next = raw.get(1).copied();
if next.is_none() || next == Some(b'\r') || next == Some(b'\n') {
return (&[], &raw[1..]);
}
return split_header_body(&raw[1..]);
}
if let Some(pos) = find_subsequence(raw, b"\r\n\r\n") {
return (&raw[..pos], &raw[pos + 4..]);
}
if let Some(pos) = find_subsequence(raw, b"\n\n") {
return (&raw[..pos], &raw[pos + 2..]);
}
if let Some(pos) = find_subsequence(raw, b"\r\r") {
return (&raw[..pos], &raw[pos + 2..]);
}
(raw, &[])
}
fn classify_top_level_message(raw: &[u8]) -> (&[u8], &[u8], bool) {
let (header_bytes, body_bytes) = split_header_body(raw);
if header_bytes.is_empty() && !body_bytes.is_empty() {
return if looks_like_headerless_body(body_bytes) {
(&[], body_bytes, true)
} else {
(header_bytes, body_bytes, false)
};
}
if body_bytes.is_empty()
&& parse_headers(header_bytes).is_empty()
&& looks_like_headerless_body(raw)
{
(&[], raw, true)
} else {
(header_bytes, body_bytes, false)
}
}
fn looks_like_headerless_body(raw: &[u8]) -> bool {
!raw.is_empty()
&& raw
.iter()
.all(|&b| matches!(b, b'\r' | b'\n' | b'\t') || (b >= 0x20 && b != 0x7F))
}
pub(super) fn parse_headers(raw: &[u8]) -> Vec<(String, String)> {
let text = String::from_utf8_lossy(raw);
let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
let mut headers: Vec<(String, String)> = Vec::new();
let mut current_name = String::new();
let mut current_value = String::new();
let mut current_encoded_word_state = EncodedWordFoldState::default();
let mut lines = normalized.split('\n').peekable();
while let Some(line) = lines.next() {
if line.is_empty() {
break;
}
if line.starts_with(' ') || line.starts_with('\t') {
if !current_name.is_empty() {
if current_encoded_word_state.inside_encoded_text() {
let unfolded = strip_one_leading_wsp(line);
current_value.push_str(unfolded);
current_encoded_word_state.feed_str(unfolded);
} else {
current_value.push_str(line);
current_encoded_word_state.feed_str(line);
}
}
} else if let Some(colon_pos) = line.find(':') {
let field_name = line[..colon_pos].trim();
if !is_valid_header_name(field_name) {
if !current_name.is_empty() {
headers.push((current_name.to_ascii_lowercase(), current_value));
current_name = String::new();
current_value = String::new();
current_encoded_word_state = EncodedWordFoldState::default();
}
continue;
}
if !current_name.is_empty() {
headers.push((current_name.to_ascii_lowercase(), current_value));
}
current_name = field_name.to_string();
let raw_value = &line[colon_pos + 1..];
let next_is_continuation = lines
.peek()
.is_some_and(|next| next.starts_with(' ') || next.starts_with('\t'));
current_value = if next_is_continuation
&& !raw_value.is_empty()
&& raw_value.bytes().all(|byte| byte == b' ' || byte == b'\t')
{
raw_value.to_string()
} else {
raw_value
.strip_prefix(' ')
.or_else(|| raw_value.strip_prefix('\t'))
.unwrap_or(raw_value)
.to_string()
};
current_encoded_word_state = EncodedWordFoldState::default();
current_encoded_word_state.feed_str(¤t_value);
}
}
if !current_name.is_empty() {
headers.push((current_name.to_ascii_lowercase(), current_value));
}
headers
}
fn strip_one_leading_wsp(line: &str) -> &str {
line.strip_prefix(' ')
.or_else(|| line.strip_prefix('\t'))
.unwrap_or(line)
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
struct EncodedWordFoldState {
phase: EncodedWordFoldPhase,
}
impl EncodedWordFoldState {
fn feed_str(&mut self, text: &str) {
for byte in text.bytes() {
self.feed(byte);
}
}
fn inside_encoded_text(self) -> bool {
self.phase == EncodedWordFoldPhase::EncodedText
}
fn feed(&mut self, byte: u8) {
self.phase = match self.phase {
EncodedWordFoldPhase::Outside => {
if byte == b'=' {
EncodedWordFoldPhase::SawEquals
} else {
EncodedWordFoldPhase::Outside
}
}
EncodedWordFoldPhase::SawEquals => {
if byte == b'?' {
EncodedWordFoldPhase::Charset
} else if byte == b'=' {
EncodedWordFoldPhase::SawEquals
} else {
EncodedWordFoldPhase::Outside
}
}
EncodedWordFoldPhase::Charset => {
if byte == b'?' {
EncodedWordFoldPhase::Encoding
} else {
EncodedWordFoldPhase::Charset
}
}
EncodedWordFoldPhase::Encoding => {
if byte == b'?' {
EncodedWordFoldPhase::EncodedText
} else {
EncodedWordFoldPhase::Encoding
}
}
EncodedWordFoldPhase::EncodedText => {
if byte == b'?' {
EncodedWordFoldPhase::MaybeClose
} else {
EncodedWordFoldPhase::EncodedText
}
}
EncodedWordFoldPhase::MaybeClose => {
if byte == b'=' {
EncodedWordFoldPhase::Outside
} else {
EncodedWordFoldPhase::EncodedText
}
}
};
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
enum EncodedWordFoldPhase {
#[default]
Outside,
SawEquals,
Charset,
Encoding,
EncodedText,
MaybeClose,
}
fn is_valid_header_name(name: &str) -> bool {
!name.is_empty() && name.bytes().all(|b| matches!(b, 33..=57 | 59..=126))
}
pub(super) fn split_mime_parts<'a>(body: &'a [u8], boundary: &str) -> Vec<&'a [u8]> {
let delim = format!("--{boundary}");
let delim_bytes = delim.as_bytes();
let end_delim = format!("--{boundary}--");
let end_delim_bytes = end_delim.as_bytes();
let mut parts: Vec<&'a [u8]> = Vec::new();
let mut search_from: usize = 0;
let mut part_start: Option<usize> = None;
loop {
let Some(rel_pos) = find_subsequence(&body[search_from..], delim_bytes) else {
if let Some(start) = part_start {
if start < body.len() {
parts.push(&body[start..]);
}
}
break;
};
let pos = search_from + rel_pos;
if pos > 0 && body[pos - 1] != b'\n' && body[pos - 1] != b'\r' {
search_from = pos + delim_bytes.len();
continue;
}
let after = pos + delim_bytes.len();
if after < body.len() {
let next_byte = body[after];
if next_byte == b'-' {
if body.get(after + 1) != Some(&b'-') {
search_from = after + 1;
continue;
}
let closing_after = after + 2;
if closing_after < body.len() {
let closing_next = body[closing_after];
if closing_next != b'\r'
&& closing_next != b'\n'
&& closing_next != b' '
&& closing_next != b'\t'
{
search_from = closing_after;
continue;
}
}
} else if next_byte != b'\r'
&& next_byte != b'\n'
&& next_byte != b' '
&& next_byte != b'\t'
{
search_from = after;
continue;
}
}
if let Some(start) = part_start {
let end = if pos >= 2 && body[pos - 2] == b'\r' && body[pos - 1] == b'\n' {
pos - 2
} else if pos >= 1 && (body[pos - 1] == b'\n' || body[pos - 1] == b'\r') {
pos - 1
} else {
pos
};
if start <= end {
parts.push(&body[start..end]);
}
}
if body[pos..].starts_with(end_delim_bytes) {
break;
}
let mut next = pos + delim_bytes.len();
while next < body.len() && (body[next] == b' ' || body[next] == b'\t') {
next += 1;
}
if next < body.len() && body[next] == b'\r' {
next += 1;
}
if next < body.len() && body[next] == b'\n' {
next += 1;
}
part_start = Some(next);
search_from = next;
}
parts
}
pub(super) fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack.windows(needle.len()).position(|w| w == needle)
}
#[cfg(test)]
#[path = "wire_tests.rs"]
mod tests;