use core::arch::aarch64::*;
#[inline]
#[target_feature(enable = "neon")]
unsafe fn neon_movemask(v: uint8x16_t) -> u16 {
let high_bits = vshrq_n_u8::<7>(v);
let low_u64 = vgetq_lane_u64::<0>(vreinterpretq_u64_u8(high_bits));
let high_u64 = vgetq_lane_u64::<1>(vreinterpretq_u64_u8(high_bits));
const MAGIC: u64 = 0x0102040810204080;
let low_packed = (low_u64.wrapping_mul(MAGIC) >> 56) as u8;
let high_packed = (high_u64.wrapping_mul(MAGIC) >> 56) as u8;
(low_packed as u16) | ((high_packed as u16) << 8)
}
#[inline]
pub fn find_quote_or_escape_neon(input: &[u8], start: usize, end: usize) -> Option<usize> {
unsafe { find_quote_or_escape_neon_impl(input, start, end) }
}
#[target_feature(enable = "neon")]
unsafe fn find_quote_or_escape_neon_impl(input: &[u8], start: usize, end: usize) -> Option<usize> {
let len = end - start;
let data = &input[start..end];
let mut offset = 0;
let quote_vec = vdupq_n_u8(b'"');
let backslash_vec = vdupq_n_u8(b'\\');
while offset + 16 <= len {
let chunk = vld1q_u8(data.as_ptr().add(offset));
let quotes = vceqq_u8(chunk, quote_vec);
let backslashes = vceqq_u8(chunk, backslash_vec);
let matches = vorrq_u8(quotes, backslashes);
let mask = neon_movemask(matches);
if mask != 0 {
return Some(offset + mask.trailing_zeros() as usize);
}
offset += 16;
}
data[offset..]
.iter()
.position(|&b| b == b'"' || b == b'\\')
.map(|pos| offset + pos)
}
#[inline]
pub fn find_single_quote_neon(input: &[u8], start: usize, end: usize) -> Option<usize> {
unsafe { find_single_quote_neon_impl(input, start, end) }
}
#[target_feature(enable = "neon")]
unsafe fn find_single_quote_neon_impl(input: &[u8], start: usize, end: usize) -> Option<usize> {
let len = end - start;
let data = &input[start..end];
let mut offset = 0;
let quote_vec = vdupq_n_u8(b'\'');
while offset + 16 <= len {
let chunk = vld1q_u8(data.as_ptr().add(offset));
let matches = vceqq_u8(chunk, quote_vec);
let mask = neon_movemask(matches);
if mask != 0 {
return Some(offset + mask.trailing_zeros() as usize);
}
offset += 16;
}
data[offset..]
.iter()
.position(|&b| b == b'\'')
.map(|pos| offset + pos)
}
#[inline]
pub fn count_leading_spaces_neon(input: &[u8], start: usize) -> usize {
unsafe { count_leading_spaces_neon_impl(input, start) }
}
#[target_feature(enable = "neon")]
unsafe fn count_leading_spaces_neon_impl(input: &[u8], start: usize) -> usize {
let data = &input[start..];
let len = data.len();
let mut offset = 0;
let space_vec = vdupq_n_u8(b' ');
while offset + 16 <= len {
let chunk = vld1q_u8(data.as_ptr().add(offset));
let matches = vceqq_u8(chunk, space_vec);
let mask = neon_movemask(matches);
if mask != 0xFFFF {
return offset + (!mask).trailing_zeros() as usize;
}
offset += 16;
}
offset + data[offset..].iter().take_while(|&&b| b == b' ').count()
}
#[inline(always)]
const fn broadcast_byte(b: u8) -> u64 {
0x0101010101010101u64 * (b as u64)
}
const LO_BYTES: u64 = 0x0101010101010101u64;
const HI_BYTES: u64 = 0x8080808080808080u64;
#[inline(always)]
const fn has_zero_byte(x: u64) -> u64 {
x.wrapping_sub(LO_BYTES) & !x & HI_BYTES
}
#[inline(always)]
const fn find_byte(x: u64, target: u8) -> u64 {
has_zero_byte(x ^ broadcast_byte(target))
}
#[inline(always)]
const fn extract_mask_u64(x: u64) -> u8 {
const MAGIC: u64 = 0x0102040810204080u64;
((x >> 7).wrapping_mul(MAGIC) >> 56) as u8
}
#[derive(Debug, Clone, Copy, Default)]
#[allow(dead_code)]
pub struct YamlCharClassBroadword {
pub newlines: u8,
pub colons: u8,
pub hyphens: u8,
pub spaces: u8,
pub quotes_double: u8,
pub quotes_single: u8,
pub backslashes: u8,
pub hash: u8,
}
#[allow(dead_code)]
impl YamlCharClassBroadword {
#[inline(always)]
pub fn has_any(&self) -> bool {
(self.newlines
| self.colons
| self.hyphens
| self.spaces
| self.quotes_double
| self.quotes_single
| self.backslashes
| self.hash)
!= 0
}
#[inline(always)]
pub fn value_terminators(&self) -> u8 {
self.newlines | self.colons | self.spaces | self.hash
}
}
#[inline]
#[allow(dead_code)]
pub fn classify_yaml_chars_broadword(
input: &[u8],
offset: usize,
) -> Option<YamlCharClassBroadword> {
if offset + 8 > input.len() {
return None;
}
let chunk = u64::from_le_bytes(input[offset..offset + 8].try_into().unwrap());
let newlines = find_byte(chunk, b'\n');
let colons = find_byte(chunk, b':');
let hyphens = find_byte(chunk, b'-');
let spaces = find_byte(chunk, b' ');
let quotes_double = find_byte(chunk, b'"');
let quotes_single = find_byte(chunk, b'\'');
let backslashes = find_byte(chunk, b'\\');
let hash = find_byte(chunk, b'#');
Some(YamlCharClassBroadword {
newlines: extract_mask_u64(newlines),
colons: extract_mask_u64(colons),
hyphens: extract_mask_u64(hyphens),
spaces: extract_mask_u64(spaces),
quotes_double: extract_mask_u64(quotes_double),
quotes_single: extract_mask_u64(quotes_single),
backslashes: extract_mask_u64(backslashes),
hash: extract_mask_u64(hash),
})
}
#[derive(Debug, Clone, Copy, Default)]
#[allow(dead_code)]
pub struct YamlCharClass16 {
pub newlines: u16,
pub colons: u16,
pub hyphens: u16,
pub spaces: u16,
pub quotes_double: u16,
pub quotes_single: u16,
pub backslashes: u16,
pub hash: u16,
}
#[allow(dead_code)]
impl YamlCharClass16 {
#[inline(always)]
pub fn value_terminators(&self) -> u16 {
self.newlines | self.colons | self.spaces | self.hash
}
}
#[inline]
pub fn classify_yaml_chars_16(input: &[u8], offset: usize) -> Option<YamlCharClass16> {
if offset + 16 > input.len() {
return None;
}
let chunk0 = u64::from_le_bytes(input[offset..offset + 8].try_into().unwrap());
let chunk1 = u64::from_le_bytes(input[offset + 8..offset + 16].try_into().unwrap());
#[inline(always)]
fn classify_both(c0: u64, c1: u64, target: u8) -> u16 {
let m0 = extract_mask_u64(find_byte(c0, target)) as u16;
let m1 = extract_mask_u64(find_byte(c1, target)) as u16;
m0 | (m1 << 8)
}
Some(YamlCharClass16 {
newlines: classify_both(chunk0, chunk1, b'\n'),
colons: classify_both(chunk0, chunk1, b':'),
hyphens: classify_both(chunk0, chunk1, b'-'),
spaces: classify_both(chunk0, chunk1, b' '),
quotes_double: classify_both(chunk0, chunk1, b'"'),
quotes_single: classify_both(chunk0, chunk1, b'\''),
backslashes: classify_both(chunk0, chunk1, b'\\'),
hash: classify_both(chunk0, chunk1, b'#'),
})
}
#[inline]
pub fn find_newline_broadword(input: &[u8], start: usize) -> Option<usize> {
let data = &input[start..];
let len = data.len();
let mut offset = 0;
while offset + 8 <= len {
let chunk = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
let matches = find_byte(chunk, b'\n');
if matches != 0 {
return Some(offset + (matches.trailing_zeros() / 8) as usize);
}
offset += 8;
}
data[offset..]
.iter()
.position(|&b| b == b'\n')
.map(|pos| offset + pos)
}
#[inline]
pub fn parse_anchor_name_neon(input: &[u8], start: usize) -> usize {
if start >= input.len() {
return start;
}
if start + 16 <= input.len() {
unsafe { parse_anchor_name_neon_impl(input, start) }
} else {
parse_anchor_name_scalar(input, start)
}
}
#[target_feature(enable = "neon")]
unsafe fn parse_anchor_name_neon_impl(input: &[u8], start: usize) -> usize {
let len = input.len();
let mut pos = start;
let space = vdupq_n_u8(b' ');
let tab = vdupq_n_u8(b'\t');
let newline = vdupq_n_u8(b'\n');
let cr = vdupq_n_u8(b'\r');
let lbracket = vdupq_n_u8(b'[');
let rbracket = vdupq_n_u8(b']');
let lbrace = vdupq_n_u8(b'{');
let rbrace = vdupq_n_u8(b'}');
let comma = vdupq_n_u8(b',');
let colon = vdupq_n_u8(b':');
while pos + 16 <= len {
let chunk = vld1q_u8(input.as_ptr().add(pos));
let is_space = vceqq_u8(chunk, space);
let is_tab = vceqq_u8(chunk, tab);
let is_newline = vceqq_u8(chunk, newline);
let is_cr = vceqq_u8(chunk, cr);
let is_lbracket = vceqq_u8(chunk, lbracket);
let is_rbracket = vceqq_u8(chunk, rbracket);
let is_lbrace = vceqq_u8(chunk, lbrace);
let is_rbrace = vceqq_u8(chunk, rbrace);
let is_comma = vceqq_u8(chunk, comma);
let is_colon = vceqq_u8(chunk, colon);
let ws = vorrq_u8(is_space, is_tab);
let ws = vorrq_u8(ws, is_newline);
let ws = vorrq_u8(ws, is_cr);
let flow = vorrq_u8(is_lbracket, is_rbracket);
let flow = vorrq_u8(flow, is_lbrace);
let flow = vorrq_u8(flow, is_rbrace);
let flow = vorrq_u8(flow, is_comma);
let definite_terminators = vorrq_u8(ws, flow);
let definite_mask = neon_movemask(definite_terminators);
let colon_mask = neon_movemask(is_colon);
if definite_mask != 0 || colon_mask != 0 {
let combined_mask = definite_mask | colon_mask;
let first_pos = combined_mask.trailing_zeros() as usize;
if (definite_mask >> first_pos) & 1 != 0 {
return pos + first_pos;
}
let colon_pos = pos + first_pos;
if colon_pos + 1 < len {
let next = input[colon_pos + 1];
if next == b' ' || next == b'\t' || next == b'\n' || next == b'\r' {
return colon_pos;
}
}
return parse_anchor_name_scalar(input, colon_pos + 1);
}
pos += 16;
}
parse_anchor_name_scalar(input, pos)
}
fn parse_anchor_name_scalar(input: &[u8], start: usize) -> usize {
let mut pos = start;
while pos < input.len() {
let b = input[pos];
match b {
b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'{' | b'}' | b',' => break,
b':' => {
if pos + 1 < input.len() {
let next = input[pos + 1];
if next == b' ' || next == b'\t' || next == b'\n' || next == b'\r' {
break;
}
}
pos += 1;
}
_ => pos += 1,
}
}
pos
}
#[inline(always)]
pub fn find_json_escape_neon(bytes: &[u8], start: usize) -> usize {
if start >= bytes.len() {
return bytes.len();
}
if bytes.len() - start >= 16 {
unsafe { find_json_escape_neon_impl(bytes, start) }
} else {
find_json_escape_scalar(bytes, start)
}
}
#[inline(always)]
fn find_json_escape_scalar(bytes: &[u8], start: usize) -> usize {
for (i, &b) in bytes[start..].iter().enumerate() {
if b == b'"' || b == b'\\' || b < 0x20 {
return start + i;
}
}
bytes.len()
}
#[target_feature(enable = "neon")]
unsafe fn find_json_escape_neon_impl(bytes: &[u8], start: usize) -> usize {
let len = bytes.len();
let data = &bytes[start..];
let data_len = data.len();
let mut offset = 0;
let quote_vec = vdupq_n_u8(b'"');
let backslash_vec = vdupq_n_u8(b'\\');
let control_threshold = vdupq_n_u8(0x20);
while offset + 16 <= data_len {
let chunk = vld1q_u8(data.as_ptr().add(offset));
let quotes = vceqq_u8(chunk, quote_vec);
let backslashes = vceqq_u8(chunk, backslash_vec);
let controls = vcltq_u8(chunk, control_threshold);
let matches = vorrq_u8(vorrq_u8(quotes, backslashes), controls);
let mask = neon_movemask(matches);
if mask != 0 {
return start + offset + mask.trailing_zeros() as usize;
}
offset += 16;
}
for i in offset..data_len {
let b = data[i];
if b == b'"' || b == b'\\' || b < 0x20 {
return start + i;
}
}
len
}
#[inline]
pub fn find_block_scalar_end_neon(input: &[u8], start: usize, min_indent: usize) -> usize {
if start >= input.len() {
return input.len();
}
unsafe { find_block_scalar_end_neon_impl(input, start, min_indent) }
}
#[target_feature(enable = "neon")]
unsafe fn find_block_scalar_end_neon_impl(input: &[u8], start: usize, min_indent: usize) -> usize {
let newline_vec = vdupq_n_u8(b'\n');
let space_vec = vdupq_n_u8(b' ');
let mut pos = start;
while pos + 16 < input.len() {
let chunk = vld1q_u8(input.as_ptr().add(pos));
let nl_matches = vceqq_u8(chunk, newline_vec);
let mut nl_mask = neon_movemask(nl_matches);
if nl_mask != 0 {
while nl_mask != 0 {
let offset = nl_mask.trailing_zeros() as usize;
let line_start = pos + offset + 1;
if line_start >= input.len() {
return input.len(); }
let mut indent = 0;
let remaining = input.len() - line_start;
if remaining >= 16 {
let next_chunk = vld1q_u8(input.as_ptr().add(line_start));
let space_matches = vceqq_u8(next_chunk, space_vec);
let space_mask = neon_movemask(space_matches);
if space_mask != 0xFFFF {
indent = (!space_mask).trailing_zeros() as usize;
} else {
indent = 16;
let mut check_pos = line_start + 16;
while check_pos < input.len() && input[check_pos] == b' ' {
indent += 1;
check_pos += 1;
}
}
} else {
while line_start + indent < input.len() && input[line_start + indent] == b' ' {
indent += 1;
}
}
if line_start + indent < input.len() {
let next_char = input[line_start + indent];
if next_char != b'\n' && next_char != b'\r' && indent < min_indent {
return line_start;
}
}
nl_mask &= nl_mask - 1;
}
}
pos += 16;
}
find_block_scalar_end_scalar(input, pos, min_indent)
}
fn find_block_scalar_end_scalar(input: &[u8], start: usize, min_indent: usize) -> usize {
let mut pos = start;
while pos < input.len() {
if input[pos] == b'\n' {
let line_start = pos + 1;
if line_start >= input.len() {
return input.len();
}
let mut indent = 0;
while line_start + indent < input.len() && input[line_start + indent] == b' ' {
indent += 1;
}
if line_start + indent < input.len() {
let next_char = input[line_start + indent];
if next_char != b'\n' && next_char != b'\r' && indent < min_indent {
return line_start;
}
}
}
pos += 1;
}
input.len()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_neon_find_quote_basic() {
let input = b"hello\"world";
assert_eq!(find_quote_or_escape_neon(input, 0, input.len()), Some(5));
}
#[test]
fn test_neon_find_backslash() {
let input = b"hello\\world";
assert_eq!(find_quote_or_escape_neon(input, 0, input.len()), Some(5));
}
#[test]
fn test_neon_find_single_quote() {
let input = b"hello'world";
assert_eq!(find_single_quote_neon(input, 0, input.len()), Some(5));
}
#[test]
fn test_neon_long_string() {
let mut input = vec![b'a'; 100];
input[50] = b'"';
assert_eq!(find_quote_or_escape_neon(&input, 0, input.len()), Some(50));
}
#[test]
fn test_neon_at_chunk_boundary() {
let mut input = vec![b'a'; 32];
input[16] = b'"';
assert_eq!(find_quote_or_escape_neon(&input, 0, input.len()), Some(16));
}
#[test]
fn test_neon_in_remainder() {
let mut input = vec![b'a'; 20];
input[18] = b'"';
assert_eq!(find_quote_or_escape_neon(&input, 0, input.len()), Some(18));
}
#[test]
fn test_neon_count_leading_spaces_basic() {
assert_eq!(count_leading_spaces_neon(b" hello", 0), 2);
assert_eq!(count_leading_spaces_neon(b" world", 0), 4);
assert_eq!(count_leading_spaces_neon(b"no spaces", 0), 0);
}
#[test]
fn test_neon_count_leading_spaces_long() {
let mut input = vec![b' '; 50];
input.extend_from_slice(b"content");
assert_eq!(count_leading_spaces_neon(&input, 0), 50);
}
#[test]
fn test_neon_count_leading_spaces_at_boundary() {
let mut input = vec![b' '; 16];
input.push(b'x');
assert_eq!(count_leading_spaces_neon(&input, 0), 16);
let mut input32 = vec![b' '; 32];
input32.push(b'x');
assert_eq!(count_leading_spaces_neon(&input32, 0), 32);
}
#[test]
fn test_neon_count_leading_spaces_in_remainder() {
let mut input = vec![b' '; 20];
input.push(b'x');
assert_eq!(count_leading_spaces_neon(&input, 0), 20);
}
#[test]
fn test_broadword_find_byte_basic() {
let data = b"hello:world";
let chunk = u64::from_le_bytes(data[0..8].try_into().unwrap());
let colon_mask = find_byte(chunk, b':');
assert_ne!(colon_mask, 0);
assert_eq!(colon_mask.trailing_zeros() / 8, 5);
}
#[test]
fn test_broadword_classify_basic() {
let input = b"key: value\n";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
assert_eq!(class.colons, 0b00001000); assert_eq!(class.spaces, 0b00010000); }
#[test]
fn test_broadword_classify_multiple() {
let input = b": - # \"\n\\";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
assert_ne!(class.colons, 0);
assert_ne!(class.hyphens, 0);
assert_ne!(class.hash, 0);
assert_ne!(class.quotes_double, 0);
assert_ne!(class.newlines, 0);
}
#[test]
fn test_broadword_classify_16_basic() {
let input = b"0123456789abcdef";
let class = classify_yaml_chars_16(input, 0).unwrap();
assert_eq!(class.colons, 0);
assert_eq!(class.newlines, 0);
}
#[test]
fn test_broadword_classify_16_with_matches() {
let input = b"key: val\nmore: x\n";
let class = classify_yaml_chars_16(input, 0).unwrap();
assert!(class.colons & (1 << 3) != 0);
assert!(class.colons & (1 << 13) != 0);
assert!(class.newlines & (1 << 8) != 0);
}
#[test]
fn test_broadword_find_newline() {
let input = b"hello\nworld";
assert_eq!(find_newline_broadword(input, 0), Some(5));
let input2 = b"no newline here";
assert_eq!(find_newline_broadword(input2, 0), None);
let mut long = vec![b'a'; 100];
long[50] = b'\n';
assert_eq!(find_newline_broadword(&long, 0), Some(50));
}
#[test]
fn test_broadword_find_newline_in_remainder() {
let mut input = vec![b'a'; 10];
input[9] = b'\n';
assert_eq!(find_newline_broadword(&input, 0), Some(9));
}
#[test]
fn test_broadword_value_terminators() {
let input = b"value: x";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
let terminators = class.value_terminators();
assert!(terminators & (1 << 5) != 0);
assert!(terminators & (1 << 6) != 0);
}
#[test]
fn test_parse_anchor_name_basic() {
assert_eq!(parse_anchor_name_neon(b"anchor_name value", 0), 11);
assert_eq!(parse_anchor_name_neon(b"anchor:value", 0), 12);
assert_eq!(parse_anchor_name_neon(b"anchor: value", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor:\nvalue", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor\nvalue", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor\tvalue", 0), 6);
}
#[test]
fn test_parse_anchor_name_flow_indicators() {
assert_eq!(parse_anchor_name_neon(b"anchor[0]", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor]end", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor{key}", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor}end", 0), 6);
assert_eq!(parse_anchor_name_neon(b"anchor,next", 0), 6);
}
#[test]
fn test_parse_anchor_name_long() {
let mut input = vec![b'a'; 50];
input.push(b' ');
input.extend_from_slice(b"value");
assert_eq!(parse_anchor_name_neon(&input, 0), 50);
}
#[test]
fn test_parse_anchor_name_no_terminator() {
assert_eq!(parse_anchor_name_neon(b"anchor_name", 0), 11);
}
#[test]
fn test_parse_anchor_name_with_offset() {
assert_eq!(parse_anchor_name_neon(b"&anchor_name value", 1), 12);
}
#[test]
fn test_find_block_scalar_end_basic() {
let input = b"|\n line1\n line2\nnext_key:";
let result = find_block_scalar_end_neon(input, 2, 2);
assert_eq!(result, 18); }
#[test]
fn test_find_block_scalar_end_eof() {
let input = b"|\n line1\n line2";
let result = find_block_scalar_end_neon(input, 2, 2);
assert_eq!(result, input.len());
}
#[test]
fn test_find_block_scalar_end_long() {
let mut input = b"|\n".to_vec();
for _ in 0..5 {
input.extend_from_slice(b" ");
input.extend_from_slice(&[b'x'; 20]);
input.push(b'\n');
}
input.extend_from_slice(b"next:");
let result = find_block_scalar_end_neon(&input, 2, 2);
assert_eq!(result, input.len() - 5);
}
#[test]
fn test_find_block_scalar_end_empty_lines() {
let input = b"|\n line1\n\n line2\nnext:";
let result = find_block_scalar_end_neon(input, 2, 2);
assert_eq!(result, 19); }
#[test]
fn test_find_block_scalar_matches_scalar() {
let test_cases: &[(&[u8], usize)] = &[
(b"|\n line1\n line2\nnext:", 2),
(b"|\n deep\n indent\nshallow:", 4),
(b"|\n a\n b\n c\n", 2),
];
for &(input, min_indent) in test_cases {
let neon_result = find_block_scalar_end_neon(input, 2, min_indent);
let scalar_result = find_block_scalar_end_scalar(input, 2, min_indent);
assert_eq!(
neon_result,
scalar_result,
"Mismatch for input {:?} with min_indent={}",
String::from_utf8_lossy(input),
min_indent
);
}
}
#[test]
fn test_find_json_escape_quote() {
let input = b"hello\"world";
assert_eq!(find_json_escape_neon(input, 0), 5);
}
#[test]
fn test_find_json_escape_backslash() {
let input = b"hello\\world";
assert_eq!(find_json_escape_neon(input, 0), 5);
}
#[test]
fn test_find_json_escape_control_char() {
let input = b"hello\tworld";
assert_eq!(find_json_escape_neon(input, 0), 5);
let input2 = b"hello\nworld";
assert_eq!(find_json_escape_neon(input2, 0), 5);
let input3 = b"hello\x00world";
assert_eq!(find_json_escape_neon(input3, 0), 5);
}
#[test]
fn test_find_json_escape_no_escape() {
let input = b"hello world";
assert_eq!(find_json_escape_neon(input, 0), input.len());
}
#[test]
fn test_find_json_escape_long_string() {
let mut input = vec![b'a'; 100];
input[50] = b'"';
assert_eq!(find_json_escape_neon(&input, 0), 50);
}
#[test]
fn test_find_json_escape_at_chunk_boundary() {
let mut input = vec![b'a'; 32];
input[16] = b'"';
assert_eq!(find_json_escape_neon(&input, 0), 16);
}
#[test]
fn test_find_json_escape_in_remainder() {
let mut input = vec![b'a'; 20];
input[18] = b'\\';
assert_eq!(find_json_escape_neon(&input, 0), 18);
}
#[test]
fn test_find_json_escape_with_offset() {
let input = b"abc\"def\"ghi";
assert_eq!(find_json_escape_neon(input, 0), 3);
assert_eq!(find_json_escape_neon(input, 4), 7);
}
#[test]
fn test_find_json_escape_control_chars_throughout() {
for ctrl in 0u8..0x20 {
let mut input = vec![b'x'; 50];
input[25] = ctrl;
assert_eq!(
find_json_escape_neon(&input, 0),
25,
"Failed for control char 0x{:02x}",
ctrl
);
}
}
#[test]
fn test_find_json_escape_empty() {
assert_eq!(find_json_escape_neon(b"", 0), 0);
}
#[test]
fn test_find_json_escape_start_past_end() {
let input = b"hello";
assert_eq!(find_json_escape_neon(input, 10), input.len());
}
fn find_json_escape_scalar(bytes: &[u8], start: usize) -> usize {
for (i, &b) in bytes[start..].iter().enumerate() {
if b == b'"' || b == b'\\' || b < 0x20 {
return start + i;
}
}
bytes.len()
}
#[test]
fn test_find_json_escape_matches_scalar() {
let test_cases: &[&[u8]] = &[
b"",
b"\"",
b"\\",
b"\t",
b"\n",
b"\r",
b"\x00",
b"no escape chars here",
b"escape at end\"",
b"\"escape at start",
b"has\\backslash",
b"has\ttab",
b"has\nnewline",
b"multiple \"escapes\" here\\",
&[b'x'; 100],
];
for &input in test_cases {
let scalar = find_json_escape_scalar(input, 0);
let neon = find_json_escape_neon(input, 0);
assert_eq!(
scalar,
neon,
"Mismatch for {:?}: scalar={}, neon={}",
String::from_utf8_lossy(input),
scalar,
neon
);
}
let input = b"abc\"def\\ghi\tjkl";
for start in 0..input.len() {
let scalar = find_json_escape_scalar(input, start);
let neon = find_json_escape_neon(input, start);
assert_eq!(
scalar, neon,
"Mismatch at offset {}: scalar={}, neon={}",
start, scalar, neon
);
}
}
}