#[inline(always)]
pub fn find_string_terminator(bytes: &[u8]) -> Option<usize> {
if bytes.len() < 32 {
return swar::find_string_terminator(bytes);
}
#[cfg(target_arch = "aarch64")]
{
return unsafe { aarch64_neon::find_string_terminator(bytes) };
}
#[cfg(target_arch = "x86_64")]
{
return unsafe { x86_64_sse2::find_string_terminator(bytes) };
}
#[cfg(all(target_arch = "arm", target_feature = "neon"))]
{
return unsafe { arm_neon::find_string_terminator(bytes) };
}
#[allow(unreachable_code)]
swar::find_string_terminator(bytes)
}
mod swar {
const ONES: u64 = 0x0101_0101_0101_0101;
const HIGHS: u64 = 0x8080_8080_8080_8080;
#[inline(always)]
fn mask(w: u64) -> u64 {
let q = w ^ (b'"' as u64 * ONES);
let bs = w ^ (b'\\' as u64 * ONES);
let lo = w & 0xE0E0_E0E0_E0E0_E0E0;
let m_q = q.wrapping_sub(ONES) & !q;
let m_bs = bs.wrapping_sub(ONES) & !bs;
let m_lo = lo.wrapping_sub(ONES) & !lo;
(m_q | m_bs | m_lo) & HIGHS
}
#[inline(always)]
pub fn find_string_terminator(bytes: &[u8]) -> Option<usize> {
let len = bytes.len();
let mut i = 0;
while i + 8 <= len {
let w = u64::from_le_bytes(bytes[i..i + 8].try_into().unwrap());
let m = mask(w);
if m != 0 {
return Some(i + (m.trailing_zeros() / 8) as usize);
}
i += 8;
}
while i < len {
let b = bytes[i];
if matches!(b, b'"' | b'\\') || b < 0x20 {
return Some(i);
}
i += 1;
}
None
}
}
#[cfg(target_arch = "aarch64")]
mod aarch64_neon {
use core::arch::aarch64::*;
#[inline(always)]
pub unsafe fn find_string_terminator(bytes: &[u8]) -> Option<usize> {
unsafe {
let len = bytes.len();
let ptr = bytes.as_ptr();
let v_q = vdupq_n_u8(b'"');
let v_b = vdupq_n_u8(b'\\');
let v_20 = vdupq_n_u8(0x20);
let mut i = 0;
while i + 16 <= len {
let v = vld1q_u8(ptr.add(i));
let eq_q = vceqq_u8(v, v_q);
let eq_b = vceqq_u8(v, v_b);
let is_ctl = vcltq_u8(v, v_20); let combined = vorrq_u8(vorrq_u8(eq_q, eq_b), is_ctl);
let nibble = vshrn_n_u16::<4>(vreinterpretq_u16_u8(combined));
let mask64 = vget_lane_u64::<0>(vreinterpret_u64_u8(nibble));
if mask64 != 0 {
return Some(i + (mask64.trailing_zeros() as usize) / 4);
}
i += 16;
}
super::swar::find_string_terminator(&bytes[i..]).map(|off| i + off)
}
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64_sse2 {
use core::arch::x86_64::*;
#[inline(always)]
pub unsafe fn find_string_terminator(bytes: &[u8]) -> Option<usize> {
unsafe {
let len = bytes.len();
let ptr = bytes.as_ptr();
let v_q = _mm_set1_epi8(b'"' as i8);
let v_b = _mm_set1_epi8(b'\\' as i8);
let v_1f = _mm_set1_epi8(0x1F);
let mut i = 0;
while i + 16 <= len {
let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
let eq_q = _mm_cmpeq_epi8(v, v_q);
let eq_b = _mm_cmpeq_epi8(v, v_b);
let min_v = _mm_min_epu8(v, v_1f);
let is_ctl = _mm_cmpeq_epi8(min_v, v);
let combined = _mm_or_si128(_mm_or_si128(eq_q, eq_b), is_ctl);
let mask = _mm_movemask_epi8(combined) as u32;
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 16;
}
super::swar::find_string_terminator(&bytes[i..]).map(|off| i + off)
}
}
}
#[cfg(all(target_arch = "arm", target_feature = "neon"))]
mod arm_neon {
use core::arch::arm::*;
#[target_feature(enable = "neon")]
pub unsafe fn find_string_terminator(bytes: &[u8]) -> Option<usize> {
unsafe {
let len = bytes.len();
let ptr = bytes.as_ptr();
let v_q = vdupq_n_u8(b'"');
let v_b = vdupq_n_u8(b'\\');
let v_20 = vdupq_n_u8(0x20);
let mut i = 0;
while i + 16 <= len {
let v = vld1q_u8(ptr.add(i));
let eq_q = vceqq_u8(v, v_q);
let eq_b = vceqq_u8(v, v_b);
let is_ctl = vcltq_u8(v, v_20);
let combined = vorrq_u8(vorrq_u8(eq_q, eq_b), is_ctl);
let nibble = vshrn_n_u16::<4>(vreinterpretq_u16_u8(combined));
let mask64 = vget_lane_u64::<0>(vreinterpret_u64_u8(nibble));
if mask64 != 0 {
return Some(i + (mask64.trailing_zeros() as usize) / 4);
}
i += 16;
}
super::swar::find_string_terminator(&bytes[i..]).map(|off| i + off)
}
}
}
#[cfg(test)]
mod tests {
use super::find_string_terminator;
#[test]
fn empty() {
assert_eq!(find_string_terminator(b""), None);
}
#[test]
fn no_terminator() {
let s = b"abcdefghijklmnopqrstuvwxyz0123456789";
assert_eq!(find_string_terminator(s), None);
}
#[test]
fn first_byte_quote() {
assert_eq!(find_string_terminator(b"\"abc"), Some(0));
}
#[test]
fn quote_after_long_run() {
let mut s = vec![b'x'; 30];
s.push(b'"');
s.push(b'y');
assert_eq!(find_string_terminator(&s), Some(30));
}
#[test]
fn backslash_in_tail() {
let mut s = vec![b'x'; 19];
s.push(b'\\');
assert_eq!(find_string_terminator(&s), Some(19));
}
#[test]
fn every_control_byte_at_window_boundary() {
for ctl in 0u8..0x20 {
let mut s = vec![b'x'; 16];
s.push(ctl);
s.push(b'y');
assert_eq!(
find_string_terminator(&s),
Some(16),
"control byte 0x{ctl:02x} missed",
);
}
}
#[test]
fn high_bit_bytes_are_safe() {
let s: Vec<u8> = (0x80u8..=0xFFu8).collect();
assert_eq!(find_string_terminator(&s), None);
}
#[test]
fn multibyte_utf8_safe() {
let s = "café and more text past the SIMD window";
assert_eq!(find_string_terminator(s.as_bytes()), None);
}
}