#[inline]
pub fn find_crlf(buf: &[u8], start: usize) -> Option<usize> {
#[cfg(target_arch = "x86_64")]
{
if has_avx2() {
return unsafe { find_crlf_avx2(buf, start) };
}
}
#[cfg(target_arch = "aarch64")]
{
return unsafe { find_crlf_neon(buf, start) };
}
#[allow(unreachable_code)]
find_crlf_swar(buf, start)
}
#[cfg(target_arch = "x86_64")]
fn has_avx2() -> bool {
use core::sync::atomic::{AtomicI8, Ordering};
static CACHED: AtomicI8 = AtomicI8::new(-1);
let c = CACHED.load(Ordering::Relaxed);
if c >= 0 {
return c == 1;
}
let detected = std::is_x86_feature_detected!("avx2");
CACHED.store(i8::from(detected), Ordering::Relaxed);
detected
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn find_crlf_avx2(buf: &[u8], start: usize) -> Option<usize> {
use core::arch::x86_64::{
__m256i, _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_set1_epi8,
};
let n = buf.len();
if start + 1 >= n {
return None;
}
let mut i = start;
let cr = _mm256_set1_epi8(0x0D);
while i + 32 < n {
let chunk =
unsafe { _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i) };
let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, cr)) as u32;
if mask != 0 {
let bit = mask.trailing_zeros() as usize;
let pos = i + bit;
if buf[pos + 1] == b'\n' {
return Some(pos);
}
i = pos + 1;
continue;
}
i += 32;
}
find_crlf_swar(buf, i)
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn find_crlf_neon(buf: &[u8], start: usize) -> Option<usize> {
use core::arch::aarch64::{vceqq_u8, vdupq_n_u8, vld1q_u8, vmaxvq_u8};
let n = buf.len();
if start + 1 >= n {
return None;
}
let mut i = start;
let cr = vdupq_n_u8(0x0D);
while i + 16 < n {
let chunk = unsafe { vld1q_u8(buf.as_ptr().add(i)) };
let eq = vceqq_u8(chunk, cr);
let any = vmaxvq_u8(eq);
if any != 0 {
for j in 0..16 {
if buf[i + j] == b'\r' {
let pos = i + j;
if buf[pos + 1] == b'\n' {
return Some(pos);
}
return find_crlf_swar(buf, pos + 1);
}
}
}
i += 16;
}
find_crlf_swar(buf, i)
}
pub(crate) fn find_crlf_swar(buf: &[u8], start: usize) -> Option<usize> {
const CR_BCAST: u64 = 0x0D0D_0D0D_0D0D_0D0D_u64;
const ONES: u64 = 0x0101_0101_0101_0101_u64;
const HIGH: u64 = 0x8080_8080_8080_8080_u64;
let n = buf.len();
let mut i = start;
if i + 1 >= n {
return None;
}
while i + 8 < n {
let word = u64::from_le_bytes(buf[i..i + 8].try_into().expect("8 bytes"));
let x = word ^ CR_BCAST;
let zeroed = x.wrapping_sub(ONES) & !x & HIGH;
if zeroed != 0 {
let bit_idx = zeroed.trailing_zeros();
let pos = i + (bit_idx / 8) as usize;
if buf[pos + 1] == b'\n' {
return Some(pos);
}
i = pos + 1;
continue;
}
i += 8;
}
while i + 1 < n {
if buf[i] == b'\r' && buf[i + 1] == b'\n' {
return Some(i);
}
i += 1;
}
None
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_matches_swar(buf: &[u8]) {
for start in 0..=buf.len() {
assert_eq!(
find_crlf(buf, start),
find_crlf_swar(buf, start),
"mismatch at buf={buf:?} start={start}",
);
}
}
#[test]
fn empty_and_short_buffers() {
for buf in &[b"" as &[u8], b"a", b"\r", b"\n", b"ab"] {
assert_matches_swar(buf);
}
}
#[test]
fn crlf_at_every_offset() {
for off in 0..=80 {
let mut buf = vec![b'X'; off + 2];
buf[off] = b'\r';
buf[off + 1] = b'\n';
assert_eq!(find_crlf(&buf, 0), Some(off), "off={off}");
}
}
#[test]
fn lone_cr_does_not_terminate() {
let mut buf = vec![b'X'; 50];
buf[10] = b'\r';
assert_eq!(find_crlf(&buf, 0), None);
buf[30] = b'\r';
buf[31] = b'\n';
assert_eq!(find_crlf(&buf, 0), Some(30));
}
#[test]
fn multiple_crs_in_a_row() {
let buf = b"X\r\r\r\nY";
assert_eq!(find_crlf(buf, 0), Some(3));
}
#[test]
fn start_past_crlf_finds_next() {
let buf = b"AAA\r\nBBB\r\nCCC";
assert_eq!(find_crlf(buf, 0), Some(3));
assert_eq!(find_crlf(buf, 4), Some(8));
assert_eq!(find_crlf(buf, 9), None);
}
#[test]
fn cross_tier_oracle_random_shapes() {
let shapes: &[&[u8]] = &[
b"*3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n",
b"*1\r\n$4\r\nPING\r\n",
b"PING\r\n",
b"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r\n", b"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r\n", b"\r\nXX\r\nXXXXXX\r\n", b"XXXXXXXXXXXXXXXX\rXXXXXXXXXXXXXXXX\r\n", b"only-text-no-newline-at-all-just-bytes-here-XXXX",
];
for buf in shapes {
assert_matches_swar(buf);
}
}
#[test]
fn returns_none_when_only_one_byte_after_start() {
let buf = b"AAAAA";
assert_eq!(find_crlf(buf, 4), None);
assert_eq!(find_crlf(buf, 5), None);
}
}