const MASK_80: u64 = 0x8080808080808080;
const MASK_01: u64 = 0x0101010101010101;
pub(crate) fn skip_plain_ascii_bytes(s: &[u8]) -> usize {
let mut i = 0;
while i + 8 <= s.len() {
let chunk: [u8; 8] = s[i..i + 8].try_into().unwrap();
let w = u64::from_ne_bytes(chunk);
if is_all_plain_ascii(w) {
i += 8;
} else {
return i + first_non_plain_offset(w);
}
}
while i < s.len() {
if is_plain_ascii_byte(s[i]) {
i += 1;
} else {
break;
}
}
i
}
#[inline(always)]
pub(crate) fn skip_non_ascii_bytes(s: &[u8]) -> usize {
let mut i = 0;
while i + 8 <= s.len() {
let chunk: [u8; 8] = s[i..i + 8].try_into().unwrap();
let w = u64::from_ne_bytes(chunk);
if w & MASK_80 == MASK_80 {
i += 8;
} else {
break;
}
}
while i < s.len() && s[i] & 0x80 != 0 {
i += 1;
}
i
}
#[inline]
pub(crate) fn skip_json_whitespace(s: &[u8]) -> usize {
let mut i = 0;
while i < s.len() && matches!(s[i], b' ' | b'\t' | b'\r' | b'\n') {
i += 1;
}
i
}
pub(crate) fn skip_ascii_digits(s: &[u8]) -> usize {
let mut i = 0;
while i + 8 <= s.len() {
let chunk: [u8; 8] = s[i..i + 8].try_into().unwrap();
let w = u64::from_ne_bytes(chunk);
if is_all_ascii_digits(w) {
i += 8;
} else {
return i + first_non_digit_offset(w);
}
}
while i < s.len() && s[i].is_ascii_digit() {
i += 1;
}
i
}
#[inline(always)]
fn non_digit_mask(w: u64) -> u64 {
let non_ascii = w & MASK_80;
let lt_30 = (w.wrapping_add(0x5050505050505050) ^ MASK_80) & MASK_80;
let gt_39 = w.wrapping_add(0x4646464646464646) & MASK_80;
non_ascii | lt_30 | gt_39
}
#[inline(always)]
fn is_all_ascii_digits(w: u64) -> bool {
non_digit_mask(w) == 0
}
#[inline(always)]
fn first_non_digit_offset(w: u64) -> usize {
let fail = non_digit_mask(w);
#[cfg(target_endian = "little")]
{
(fail.trailing_zeros() / 8) as usize
}
#[cfg(target_endian = "big")]
{
(fail.leading_zeros() / 8) as usize
}
}
#[inline(always)]
fn non_plain_mask(w: u64) -> u64 {
let non_ascii = w & MASK_80;
let control = (w.wrapping_add(0x6060606060606060) ^ MASK_80) & MASK_80;
let xor_quote = w ^ 0x2222222222222222;
let quote = xor_quote.wrapping_sub(MASK_01) & !xor_quote & MASK_80;
let xor_bslash = w ^ 0x5C5C5C5C5C5C5C5C;
let bslash = xor_bslash.wrapping_sub(MASK_01) & !xor_bslash & MASK_80;
non_ascii | control | quote | bslash
}
#[inline(always)]
fn is_all_plain_ascii(w: u64) -> bool {
non_plain_mask(w) == 0
}
#[inline(always)]
fn first_non_plain_offset(w: u64) -> usize {
let fail = non_plain_mask(w);
#[cfg(target_endian = "little")]
{
(fail.trailing_zeros() / 8) as usize
}
#[cfg(target_endian = "big")]
{
(fail.leading_zeros() / 8) as usize
}
}
#[inline(always)]
fn is_plain_ascii_byte(b: u8) -> bool {
(0x20..0x80).contains(&b) && b != b'"' && b != b'\\'
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty() {
assert_eq!(skip_plain_ascii_bytes(b""), 0);
}
#[test]
fn all_plain_short() {
assert_eq!(skip_plain_ascii_bytes(b"hello"), 5);
}
#[test]
fn all_plain_8_bytes() {
assert_eq!(skip_plain_ascii_bytes(b"abcdefgh"), 8);
}
#[test]
fn all_plain_16_bytes() {
assert_eq!(skip_plain_ascii_bytes(b"abcdefghijklmnop"), 16);
}
#[test]
fn quote_at_various_positions() {
for pos in 0..16 {
let mut buf = [b'a'; 16];
buf[pos] = b'"';
assert_eq!(
skip_plain_ascii_bytes(&buf),
pos,
"quote at position {}",
pos
);
}
}
#[test]
fn backslash_at_various_positions() {
for pos in 0..16 {
let mut buf = [b'a'; 16];
buf[pos] = b'\\';
assert_eq!(
skip_plain_ascii_bytes(&buf),
pos,
"backslash at position {}",
pos
);
}
}
#[test]
fn control_chars() {
for b in 0..0x20u8 {
let buf = [b'a', b'b', b'c', b];
assert_eq!(skip_plain_ascii_bytes(&buf), 3, "control char 0x{:02x}", b);
}
}
#[test]
fn control_char_at_start() {
assert_eq!(skip_plain_ascii_bytes(b"\x00abc"), 0);
assert_eq!(skip_plain_ascii_bytes(b"\nabc"), 0);
assert_eq!(skip_plain_ascii_bytes(b"\tabc"), 0);
}
#[test]
fn non_ascii_stops() {
assert_eq!(skip_plain_ascii_bytes("abc日本語".as_bytes()), 3);
assert_eq!(skip_plain_ascii_bytes("あ".as_bytes()), 0);
}
#[test]
fn non_ascii_run() {
assert_eq!(skip_non_ascii_bytes("日本語x".as_bytes()), "日本語".len());
assert_eq!(skip_non_ascii_bytes("あa".as_bytes()), "あ".len());
assert_eq!(skip_non_ascii_bytes("abc".as_bytes()), 0);
}
#[test]
fn high_ascii_boundary() {
assert_eq!(skip_plain_ascii_bytes(&[0x7F]), 1);
assert_eq!(skip_plain_ascii_bytes(&[0x80]), 0);
assert_eq!(skip_plain_ascii_bytes(&[0x20]), 1);
assert_eq!(skip_plain_ascii_bytes(&[0x1F]), 0);
}
#[test]
fn mixed_content() {
let input = b"Hello, World!\"rest";
assert_eq!(skip_plain_ascii_bytes(input), 13); }
#[test]
fn all_printable_ascii() {
let mut count = 0;
for b in 0x20..0x80u8 {
if b != b'"' && b != b'\\' {
assert_eq!(
skip_plain_ascii_bytes(&[b]),
1,
"byte 0x{:02x} ('{}') should be plain",
b,
b as char,
);
count += 1;
}
}
assert_eq!(count, 94); }
#[test]
fn long_plain_then_special() {
let buf = [b'x'; 1024];
assert_eq!(skip_plain_ascii_bytes(&buf), 1024);
let mut buf2 = [b'x'; 64];
buf2[50] = b'"';
assert_eq!(skip_plain_ascii_bytes(&buf2), 50);
}
#[test]
fn digits_empty_and_short() {
assert_eq!(skip_ascii_digits(b""), 0);
assert_eq!(skip_ascii_digits(b"0"), 1);
assert_eq!(skip_ascii_digits(b"9"), 1);
assert_eq!(skip_ascii_digits(b"a"), 0);
assert_eq!(skip_ascii_digits(b"123"), 3);
}
#[test]
fn digits_full_chunk() {
assert_eq!(skip_ascii_digits(b"01234567"), 8);
assert_eq!(skip_ascii_digits(b"0123456789012345"), 16);
}
#[test]
fn digits_stop_at_non_digit() {
assert_eq!(skip_ascii_digits(b"12345abc"), 5);
assert_eq!(skip_ascii_digits(b"1234567a"), 7);
assert_eq!(skip_ascii_digits(b"12345678a"), 8);
assert_eq!(skip_ascii_digits(b"123456789a"), 9);
}
#[test]
fn digits_boundary_chars() {
assert_eq!(skip_ascii_digits(b"/"), 0);
assert_eq!(skip_ascii_digits(b":"), 0);
assert_eq!(skip_ascii_digits(b"0/"), 1);
assert_eq!(skip_ascii_digits(b"9:"), 1);
}
#[test]
fn digits_non_ascii() {
let mut buf = b"12345\xC2\xA00".to_vec();
assert_eq!(skip_ascii_digits(&buf), 5);
buf = b"\xFF1234".to_vec();
assert_eq!(skip_ascii_digits(&buf), 0);
}
#[test]
fn digits_each_non_digit_position() {
for pos in 0..16 {
let mut buf = [b'5'; 16];
buf[pos] = b'.';
assert_eq!(
skip_ascii_digits(&buf),
pos,
"non-digit '.' at position {pos}"
);
}
}
#[test]
fn ws_empty_and_short() {
assert_eq!(skip_json_whitespace(b""), 0);
assert_eq!(skip_json_whitespace(b" "), 1);
assert_eq!(skip_json_whitespace(b"\t"), 1);
assert_eq!(skip_json_whitespace(b"\r"), 1);
assert_eq!(skip_json_whitespace(b"\n"), 1);
assert_eq!(skip_json_whitespace(b" \t \n "), 6);
assert_eq!(skip_json_whitespace(b"a"), 0);
}
#[test]
fn ws_full_chunk() {
assert_eq!(skip_json_whitespace(b" "), 8);
assert_eq!(skip_json_whitespace(b"\t\n\r \t\n\r "), 8);
assert_eq!(skip_json_whitespace(b" \t "), 16);
}
#[test]
fn ws_stop_at_non_ws() {
assert_eq!(skip_json_whitespace(b" abc"), 3);
assert_eq!(skip_json_whitespace(b"\n\n{"), 2);
for pos in 0..16 {
let mut buf = [b' '; 16];
buf[pos] = b'a';
assert_eq!(
skip_json_whitespace(&buf),
pos,
"non-ws 'a' at position {pos}"
);
}
}
#[test]
fn ws_excludes_non_json_whitespace() {
assert_eq!(skip_json_whitespace(b"\x0B"), 0);
assert_eq!(skip_json_whitespace(b"\x0C"), 0);
assert_eq!(skip_json_whitespace(b"\x00"), 0);
assert_eq!(skip_json_whitespace(b"\xC2\xA0"), 0);
}
#[test]
fn ws_long_run() {
let buf = [b' '; 1024];
assert_eq!(skip_json_whitespace(&buf), 1024);
let mut buf2 = [b' '; 256];
buf2[200] = b'{';
assert_eq!(skip_json_whitespace(&buf2), 200);
}
}