use crate::ascii::HttpChar;
use crate::error::ParseErrorKind;
use crate::tchar::TABLE as TCHAR;
#[derive(Debug)]
pub struct HeaderLineSpan {
pub colon: usize,
pub line_end: usize,
}
#[inline]
fn scan_header_line_scalar(
buf: &[u8],
start: usize,
mut colon_pos: Option<usize>,
) -> Result<HeaderLineSpan, ParseErrorKind> {
let len = buf.len();
let mut i = start;
while i < len {
let b = buf[i];
if let Some(colon) = colon_pos {
if b == HttpChar::CarriageReturn {
if i + 1 < len && buf[i + 1] == HttpChar::LineFeed {
return Ok(HeaderLineSpan { colon, line_end: i });
}
return Err(ParseErrorKind::MalformedHeader);
}
if b == HttpChar::LineFeed || b == HttpChar::Null {
return Err(ParseErrorKind::MalformedHeader);
}
} else if b == HttpChar::Colon {
if i == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
colon_pos = Some(i);
} else if !TCHAR[b as usize] {
return Err(ParseErrorKind::MalformedHeader);
}
i += 1;
}
Err(ParseErrorKind::MalformedHeader)
}
macro_rules! impl_bitmask_scan_header_line {
($tchar:ty) => {
#[inline]
pub(super) unsafe fn scan_header_line(
buf: &[u8],
) -> Result<HeaderLineSpan, ParseErrorKind> {
let len = buf.len();
unsafe {
let v_cr = simd_splat!(HttpChar::CarriageReturn.as_u8());
let v_lf = simd_splat!(HttpChar::LineFeed.as_u8());
let v_nul = simd_splat!(HttpChar::Null.as_u8());
let v_colon = simd_splat!(HttpChar::Colon.as_u8());
let mut i = 0;
let mut colon_pos: Option<usize> = None;
while i + 16 <= len {
let chunk = simd_load!(buf.as_ptr().add(i));
let cr_mask = simd_mask!(chunk, v_cr);
let lf_mask = simd_mask!(chunk, v_lf);
let nul_mask = simd_mask!(chunk, v_nul);
match colon_pos {
None => {
let colon_mask = simd_mask!(chunk, v_colon);
if colon_mask != 0 {
let colon_bit = colon_mask.trailing_zeros() as usize;
let cpos = i + colon_bit;
if cpos == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
let name_mask = (1u32 << colon_bit) - 1;
if <$tchar>::mask16(buf.as_ptr().add(i)) & name_mask != name_mask {
return Err(ParseErrorKind::MalformedHeader);
}
colon_pos = Some(cpos);
if cr_mask != 0 {
let cr_bit = cr_mask.trailing_zeros() as usize;
if cr_bit > colon_bit {
let pos = i + cr_bit;
let before_cr = (1u32 << cr_bit) - 1;
let after_colon = !((1u32 << (colon_bit + 1)) - 1);
let value_mask = before_cr & after_colon;
if ((nul_mask | lf_mask) & value_mask) != 0 {
return Err(ParseErrorKind::MalformedHeader);
}
if pos + 1 < len
&& *buf.get_unchecked(pos + 1) == HttpChar::LineFeed
{
return Ok(HeaderLineSpan {
colon: cpos,
line_end: pos,
});
}
return Err(ParseErrorKind::MalformedHeader);
}
return Err(ParseErrorKind::MalformedHeader);
}
let after_colon = !((1u32 << (colon_bit + 1)) - 1);
if ((nul_mask | lf_mask) & after_colon) != 0 {
return Err(ParseErrorKind::MalformedHeader);
}
i += 16;
continue;
}
if (cr_mask | lf_mask | nul_mask) != 0 {
return Err(ParseErrorKind::MalformedHeader);
}
if <$tchar>::mask16(buf.as_ptr().add(i)) != 0xFFFF {
return Err(ParseErrorKind::MalformedHeader);
}
}
Some(colon) => {
if cr_mask != 0 {
let cr_bit = cr_mask.trailing_zeros();
let pos = i + cr_bit as usize;
let before_mask = (1u32 << cr_bit) - 1;
if ((nul_mask | lf_mask) & before_mask) != 0 {
return Err(ParseErrorKind::MalformedHeader);
}
if pos + 1 < len
&& *buf.get_unchecked(pos + 1) == HttpChar::LineFeed
{
return Ok(HeaderLineSpan {
colon,
line_end: pos,
});
}
return Err(ParseErrorKind::MalformedHeader);
}
if (nul_mask | lf_mask) != 0 {
return Err(ParseErrorKind::MalformedHeader);
}
}
}
i += 16;
}
scan_header_line_scalar(buf, i, colon_pos)
}
}
};
}
#[cfg(target_arch = "x86_64")]
#[allow(
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_ptr_alignment
)]
mod avx2_mod {
use std::arch::x86_64::{
_mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_set1_epi8,
};
use super::{HeaderLineSpan, ParseErrorKind, scan_header_line_scalar};
use crate::ascii::HttpChar;
use crate::tchar::Avx2;
#[derive(Clone, Copy)]
enum Phase {
Name,
Value { colon: usize },
}
#[inline]
#[allow(clippy::cast_possible_truncation)]
const fn before_bit(b: u32) -> u32 {
((1u64 << b) - 1) as u32
}
#[target_feature(enable = "avx2")]
#[inline]
pub(super) unsafe fn scan_header_line_avx2(
buf: &[u8],
) -> Result<HeaderLineSpan, ParseErrorKind> {
let len = buf.len();
unsafe {
let v_cr = _mm256_set1_epi8(HttpChar::CarriageReturn.as_i8());
let v_lf = _mm256_set1_epi8(HttpChar::LineFeed.as_i8());
let v_nul = _mm256_set1_epi8(HttpChar::Null.as_i8());
let v_colon = _mm256_set1_epi8(HttpChar::Colon.as_i8());
let mut i = 0;
let mut phase = Phase::Name;
while i + 32 <= len {
let chunk = _mm256_loadu_si256(buf.as_ptr().add(i).cast());
let cr_mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, v_cr)) as u32;
let lf_mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, v_lf)) as u32;
let nul_mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, v_nul)) as u32;
let bad_value = lf_mask | nul_mask;
match phase {
Phase::Name => {
let colon_mask =
_mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, v_colon)) as u32;
let stop_mask = colon_mask | cr_mask | bad_value;
if stop_mask == 0 {
if Avx2::mask32(buf.as_ptr().add(i)) != 0xFFFF_FFFF {
return Err(ParseErrorKind::MalformedHeader);
}
i += 32;
continue;
}
let stop_bit = stop_mask.trailing_zeros();
let stop_pos = i + stop_bit as usize;
let stop_byte_mask = 1u32 << stop_bit;
if (colon_mask & stop_byte_mask) == 0 || stop_pos == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
let name_mask = before_bit(stop_bit);
if Avx2::mask32(buf.as_ptr().add(i)) & name_mask != name_mask {
return Err(ParseErrorKind::MalformedHeader);
}
phase = Phase::Value { colon: stop_pos };
let after_colon = !before_bit(stop_bit + 1);
let value_stop = (cr_mask | bad_value) & after_colon;
if value_stop == 0 {
i += 32;
continue;
}
let v_bit = value_stop.trailing_zeros();
let v_byte = 1u32 << v_bit;
debug_assert_eq!(
(lf_mask | nul_mask) & after_colon & (v_byte - 1),
0,
"value_stop logic violated: LF/NUL before first stop byte"
);
if (cr_mask & v_byte) == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
let pos = i + v_bit as usize;
return finish_line(buf, len, stop_pos, pos);
}
Phase::Value { colon } => {
let stop_mask = cr_mask | bad_value;
if stop_mask == 0 {
i += 32;
continue;
}
let stop_bit = stop_mask.trailing_zeros();
let stop_byte = 1u32 << stop_bit;
if (cr_mask & stop_byte) == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
let pos = i + stop_bit as usize;
return finish_line(buf, len, colon, pos);
}
}
}
let colon_pos = match phase {
Phase::Name => None,
Phase::Value { colon } => Some(colon),
};
scan_header_line_scalar(buf, i, colon_pos)
}
}
#[inline]
fn finish_line(
buf: &[u8],
len: usize,
colon: usize,
cr_pos: usize,
) -> Result<HeaderLineSpan, ParseErrorKind> {
if cr_pos + 1 < len && unsafe { *buf.get_unchecked(cr_pos + 1) } == HttpChar::LineFeed {
return Ok(HeaderLineSpan {
colon,
line_end: cr_pos,
});
}
Err(ParseErrorKind::MalformedHeader)
}
}
#[cfg(target_arch = "x86_64")]
#[allow(
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_ptr_alignment
)]
mod sse2_ssse3 {
crate::simd::define_simd_primitives!();
use super::{HeaderLineSpan, ParseErrorKind, scan_header_line_scalar};
use crate::ascii::HttpChar;
use crate::tchar::{Ssse3, TcharCheck};
impl_bitmask_scan_header_line!(Ssse3);
#[target_feature(enable = "ssse3")]
#[inline]
pub(super) unsafe fn scan_header_line_ssse3(
buf: &[u8],
) -> Result<HeaderLineSpan, ParseErrorKind> {
unsafe { scan_header_line(buf) }
}
}
#[cfg(target_arch = "x86_64")]
#[allow(
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_ptr_alignment
)]
mod sse2_scalar {
crate::simd::define_simd_primitives!();
use super::{HeaderLineSpan, ParseErrorKind, scan_header_line_scalar};
use crate::ascii::HttpChar;
use crate::tchar::{Sse2Only, TcharCheck};
impl_bitmask_scan_header_line!(Sse2Only);
}
#[cfg(target_arch = "aarch64")]
mod neon {
use std::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8, vmaxvq_u8};
use super::{HeaderLineSpan, ParseErrorKind, scan_header_line_scalar};
use crate::ascii::HttpChar;
use crate::tchar::TcharCheck;
#[inline]
#[allow(clippy::cast_sign_loss)]
pub(super) fn scan_header_line(buf: &[u8]) -> Result<HeaderLineSpan, ParseErrorKind> {
let len = buf.len();
unsafe {
let v_cr = vdupq_n_u8(HttpChar::CarriageReturn.as_u8());
let v_lf = vdupq_n_u8(HttpChar::LineFeed.as_u8());
let v_nul = vdupq_n_u8(HttpChar::Null.as_u8());
let v_colon = vdupq_n_u8(HttpChar::Colon.as_u8());
let mut i = 0;
let mut colon_pos: Option<usize> = None;
while i + 16 <= len {
let chunk = vld1q_u8(buf.as_ptr().add(i));
let has_cr = vmaxvq_u8(vceqq_u8(chunk, v_cr)) != 0;
let has_lf = vmaxvq_u8(vceqq_u8(chunk, v_lf)) != 0;
let has_nul = vmaxvq_u8(vceqq_u8(chunk, v_nul)) != 0;
if colon_pos.is_none() {
let has_colon = vmaxvq_u8(vceqq_u8(chunk, v_colon)) != 0;
if !has_cr && !has_lf && !has_nul && !has_colon {
if !crate::tchar::Neon::all16(buf.as_ptr().add(i)) {
return Err(ParseErrorKind::MalformedHeader);
}
i += 16;
continue;
}
let chunk_end = i + 16;
let mut j = i;
if has_colon {
while j < chunk_end {
let b = *buf.get_unchecked(j);
if b == HttpChar::Colon {
if j == 0 {
return Err(ParseErrorKind::MalformedHeader);
}
if !crate::tchar::Neon::all16(buf.as_ptr().add(i)) {
let colon_off = j - i;
for k in i..j {
if !crate::tchar::TABLE[*buf.get_unchecked(k) as usize] {
return Err(ParseErrorKind::MalformedHeader);
}
}
let _ = colon_off;
}
colon_pos = Some(j);
j += 1;
break;
}
j += 1;
}
} else {
return Err(ParseErrorKind::MalformedHeader);
}
let Some(colon) = colon_pos else {
return scan_header_line_scalar(buf, i, colon_pos);
};
while j < chunk_end {
let b = *buf.get_unchecked(j);
if b == HttpChar::CarriageReturn {
if j + 1 < len && *buf.get_unchecked(j + 1) == HttpChar::LineFeed {
return Ok(HeaderLineSpan { colon, line_end: j });
}
return Err(ParseErrorKind::MalformedHeader);
}
if b == HttpChar::LineFeed || b == HttpChar::Null {
return Err(ParseErrorKind::MalformedHeader);
}
j += 1;
}
i = chunk_end;
continue;
}
if has_cr || has_lf || has_nul {
let Some(colon) = colon_pos else {
return scan_header_line_scalar(buf, i, colon_pos);
};
for j in i..i + 16 {
let b = *buf.get_unchecked(j);
if b == HttpChar::CarriageReturn {
if j + 1 < len && *buf.get_unchecked(j + 1) == HttpChar::LineFeed {
return Ok(HeaderLineSpan { colon, line_end: j });
}
return Err(ParseErrorKind::MalformedHeader);
}
if b == HttpChar::LineFeed || b == HttpChar::Null {
return Err(ParseErrorKind::MalformedHeader);
}
}
debug_assert!(
false,
"NEON vmaxvq_u8 detected CR/LF/NUL but scalar scan did not find it at offset {i}"
);
return scan_header_line_scalar(buf, i, colon_pos);
}
i += 16;
}
scan_header_line_scalar(buf, i, colon_pos)
}
}
}
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
#[allow(clippy::cast_sign_loss)]
mod wasm_simd {
crate::simd::define_simd_primitives!();
use super::{HeaderLineSpan, ParseErrorKind, scan_header_line_scalar};
use crate::ascii::HttpChar;
use crate::tchar::{TcharCheck, WasmSimd};
impl_bitmask_scan_header_line!(WasmSimd);
}
#[inline]
pub fn scan_header_line(buf: &[u8]) -> Result<HeaderLineSpan, ParseErrorKind> {
#[cfg(target_arch = "x86_64")]
{
if crate::tchar::has_avx2() {
unsafe { avx2_mod::scan_header_line_avx2(buf) }
} else if crate::tchar::has_ssse3() {
unsafe { sse2_ssse3::scan_header_line_ssse3(buf) }
} else {
unsafe { sse2_scalar::scan_header_line(buf) }
}
}
#[cfg(target_arch = "aarch64")]
{
neon::scan_header_line(buf)
}
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
{
wasm_simd::scan_header_line(buf)
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "aarch64",
all(target_arch = "wasm32", target_feature = "simd128")
)))]
{
scan_header_line_scalar(buf, 0, None)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_header_line() {
let span = scan_header_line(b"Host: localhost\r\n").unwrap();
assert_eq!(span.colon, 4);
assert_eq!(span.line_end, 15);
}
#[test]
fn header_with_no_value() {
let span = scan_header_line(b"X-Empty:\r\n").unwrap();
assert_eq!(span.colon, 7);
assert_eq!(span.line_end, 8);
}
#[test]
fn header_with_spaces_in_value() {
let span = scan_header_line(b"Content-Type: text/html; charset=utf-8\r\n").unwrap();
assert_eq!(span.colon, 12);
assert_eq!(span.line_end, 38);
}
#[test]
fn header_with_colon_in_value() {
let span = scan_header_line(b"X-Url: http://example.com:8080\r\n").unwrap();
assert_eq!(span.colon, 5);
assert_eq!(span.line_end, 30);
}
#[test]
fn colon_at_simd_boundary() {
let span = scan_header_line(b"X-Long-Name-Hdr: val\r\n").unwrap();
assert_eq!(span.colon, 15);
}
#[test]
fn crlf_at_simd_boundary() {
let buf = b"X-H: 0123456789\r\n";
let span = scan_header_line(buf).unwrap();
assert_eq!(span.line_end, 15);
}
#[test]
fn header_spanning_two_simd_chunks() {
let buf = b"X-Very-Long-Header-Name-Here: some-value-that-is-long\r\n";
let span = scan_header_line(buf).unwrap();
assert_eq!(span.colon, 28);
assert_eq!(span.line_end, 53);
}
#[test]
fn short_header_in_scalar_path() {
let span = scan_header_line(b"A: b\r\n").unwrap();
assert_eq!(span.colon, 1);
assert_eq!(span.line_end, 4);
}
#[test]
fn rejects_empty_header_name() {
let err = scan_header_line(b": value\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_no_colon() {
let err = scan_header_line(b"NoColonHere\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_space_in_header_name() {
let err = scan_header_line(b"Bad Name: val\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_nul_in_value() {
let err = scan_header_line(b"X: val\x00ue\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_bare_lf_in_value() {
let err = scan_header_line(b"X: val\nue\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_bare_cr_in_value() {
let err = scan_header_line(b"X: val\rue\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_missing_lf_after_cr() {
let err = scan_header_line(b"X: value\rX").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_no_terminator() {
let err = scan_header_line(b"X: value").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn rejects_control_char_in_name() {
let err = scan_header_line(b"X\x01Y: val\r\n").unwrap_err();
assert_eq!(err, ParseErrorKind::MalformedHeader);
}
#[test]
fn terminates_at_first_crlf() {
let span = scan_header_line(b"X: first\r\n second\r\n").unwrap();
assert_eq!(span.colon, 1);
assert_eq!(span.line_end, 8);
}
}