use core::arch::aarch64::*;
use crate::json::simple::{SemiIndex as SimpleSemiIndex, State as SimpleState};
use crate::json::standard::{SemiIndex, State};
use crate::json::BitWriter;
const LO_NIBBLE_TABLE: [u8; 16] = [
0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x01, 0x14, 0x02, 0x00, 0x00, ];
const HI_NIBBLE_TABLE: [u8; 16] = [
0x00, 0x00, 0x0C, 0x04, 0x00, 0x13, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
const FLAG_OPEN: u8 = 0x01;
const FLAG_CLOSE: u8 = 0x02;
const FLAG_DELIM: u8 = 0x04;
const FLAG_QUOTE: u8 = 0x08;
const FLAG_BACKSLASH: u8 = 0x10;
const VALUE_LO_TABLE: [u8; 16] = [
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x06, 0x0E, 0x06, 0x0E, 0x16, 0x06, ];
const VALUE_HI_TABLE: [u8; 16] = [
0x00, 0x00, 0x18, 0x01, 0x02, 0x02, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
#[inline]
#[target_feature(enable = "neon")]
unsafe fn neon_movemask(v: uint8x16_t) -> u16 {
let high_bits = vshrq_n_u8::<7>(v);
let low_u64 = vgetq_lane_u64::<0>(vreinterpretq_u64_u8(high_bits));
let high_u64 = vgetq_lane_u64::<1>(vreinterpretq_u64_u8(high_bits));
const MAGIC: u64 = 0x0102040810204080;
let low_packed = (low_u64.wrapping_mul(MAGIC) >> 56) as u8;
let high_packed = (high_u64.wrapping_mul(MAGIC) >> 56) as u8;
(low_packed as u16) | ((high_packed as u16) << 8)
}
#[derive(Debug, Clone, Copy)]
struct CharClass {
quotes: u16,
backslashes: u16,
opens: u16,
closes: u16,
delims: u16,
value_chars: u16,
string_special: u16,
}
#[derive(Debug, Clone, Copy)]
struct CharClass32 {
quotes: u32,
opens: u32,
closes: u32,
value_chars: u32,
string_special: u32,
}
impl CharClass32 {
#[inline]
fn from_pair(lo: CharClass, hi: CharClass) -> Self {
Self {
quotes: (lo.quotes as u32) | ((hi.quotes as u32) << 16),
opens: (lo.opens as u32) | ((hi.opens as u32) << 16),
closes: (lo.closes as u32) | ((hi.closes as u32) << 16),
value_chars: (lo.value_chars as u32) | ((hi.value_chars as u32) << 16),
string_special: (lo.string_special as u32) | ((hi.string_special as u32) << 16),
}
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn classify_chars(chunk: uint8x16_t) -> CharClass {
unsafe {
let lo_table = vld1q_u8(LO_NIBBLE_TABLE.as_ptr());
let hi_table = vld1q_u8(HI_NIBBLE_TABLE.as_ptr());
let lo_nibble = vandq_u8(chunk, vdupq_n_u8(0x0F));
let hi_nibble = vshrq_n_u8::<4>(chunk);
let lo_result = vqtbl1q_u8(lo_table, lo_nibble);
let hi_result = vqtbl1q_u8(hi_table, hi_nibble);
let classified = vandq_u8(lo_result, hi_result);
let flag_open_vec = vdupq_n_u8(FLAG_OPEN);
let flag_close_vec = vdupq_n_u8(FLAG_CLOSE);
let flag_delim_vec = vdupq_n_u8(FLAG_DELIM);
let flag_quote_vec = vdupq_n_u8(FLAG_QUOTE);
let flag_backslash_vec = vdupq_n_u8(FLAG_BACKSLASH);
let opens_vec = vtstq_u8(classified, flag_open_vec);
let closes_vec = vtstq_u8(classified, flag_close_vec);
let delims_vec = vtstq_u8(classified, flag_delim_vec);
let quotes_vec = vtstq_u8(classified, flag_quote_vec);
let backslashes_vec = vtstq_u8(classified, flag_backslash_vec);
let value_lo_table = vld1q_u8(VALUE_LO_TABLE.as_ptr());
let value_hi_table = vld1q_u8(VALUE_HI_TABLE.as_ptr());
let value_lo_result = vqtbl1q_u8(value_lo_table, lo_nibble);
let value_hi_result = vqtbl1q_u8(value_hi_table, hi_nibble);
let value_classified = vandq_u8(value_lo_result, value_hi_result);
let zero = vdupq_n_u8(0);
let value_chars = vmvnq_u8(vceqq_u8(value_classified, zero));
let quotes = neon_movemask(quotes_vec);
let backslashes = neon_movemask(backslashes_vec);
CharClass {
quotes,
backslashes,
opens: neon_movemask(opens_vec),
closes: neon_movemask(closes_vec),
delims: neon_movemask(delims_vec),
value_chars: neon_movemask(value_chars),
string_special: quotes | backslashes,
}
}
}
#[inline]
#[allow(dead_code)]
fn process_chunk_standard_serial(
class: CharClass,
mut state: State,
ib: &mut BitWriter,
bp: &mut BitWriter,
bytes: &[u8],
) -> State {
for i in 0..bytes.len().min(16) {
let bit = 1u16 << i;
let is_quote = (class.quotes & bit) != 0;
let is_backslash = (class.backslashes & bit) != 0;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_delim = (class.delims & bit) != 0;
let is_value_char = (class.value_chars & bit) != 0;
match state {
State::InJson => {
if is_open {
ib.write_1();
bp.write_1();
} else if is_close {
ib.write_0();
bp.write_0();
} else if is_delim {
ib.write_0();
} else if is_value_char {
ib.write_1();
bp.write_1();
bp.write_0();
state = State::InValue;
} else if is_quote {
ib.write_1();
bp.write_1();
bp.write_0();
state = State::InString;
} else {
ib.write_0();
}
}
State::InString => {
ib.write_0();
if is_quote {
state = State::InJson;
} else if is_backslash {
state = State::InEscape;
}
}
State::InEscape => {
ib.write_0();
state = State::InString;
}
State::InValue => {
if is_open {
ib.write_1();
bp.write_1();
state = State::InJson;
} else if is_close {
ib.write_0();
bp.write_0();
state = State::InJson;
} else if is_delim {
ib.write_0();
state = State::InJson;
} else if is_value_char {
ib.write_0();
} else {
ib.write_0();
state = State::InJson;
}
}
}
}
state
}
#[inline]
fn process_chunk_standard(
class: CharClass,
mut state: State,
ib: &mut BitWriter,
bp: &mut BitWriter,
bytes: &[u8],
) -> State {
let len = bytes.len().min(16);
let mut i = 0;
while i < len {
let remaining_mask = !((1u16 << i) - 1);
match state {
State::InJson => {
let bit = 1u16 << i;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_quote = (class.quotes & bit) != 0;
let is_value_char = (class.value_chars & bit) != 0;
if is_open {
bp.write_1();
ib.write_1();
} else if is_close {
bp.write_0();
ib.write_0();
} else if is_quote {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InString;
} else if is_value_char {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InValue;
} else {
ib.write_0();
}
i += 1;
}
State::InString => {
let special_remaining = class.string_special & remaining_mask;
if special_remaining == 0 {
let zeros_to_write = len - i;
ib.write_zeros(zeros_to_write);
return State::InString;
}
let next_special = special_remaining.trailing_zeros() as usize;
if next_special > i {
let zeros = next_special - i;
ib.write_zeros(zeros);
i = next_special;
}
let bit = 1u16 << i;
ib.write_0();
if (class.quotes & bit) != 0 {
state = State::InJson;
} else {
state = State::InEscape;
}
i += 1;
}
State::InEscape => {
ib.write_0();
state = State::InString;
i += 1;
}
State::InValue => {
let bit = 1u16 << i;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_quote = (class.quotes & bit) != 0;
let is_value_char = (class.value_chars & bit) != 0;
if is_open {
bp.write_1();
ib.write_1();
state = State::InJson;
} else if is_close {
bp.write_0();
ib.write_0();
state = State::InJson;
} else if is_quote {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InString;
} else if is_value_char {
ib.write_0();
} else {
ib.write_0();
state = State::InJson;
}
i += 1;
}
}
}
state
}
#[inline]
fn process_chunk_standard_32(
class: CharClass32,
mut state: State,
ib: &mut BitWriter,
bp: &mut BitWriter,
len: usize,
) -> State {
let len = len.min(32);
let mut i = 0;
while i < len {
let remaining_mask = !((1u32 << i) - 1);
match state {
State::InJson => {
let bit = 1u32 << i;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_quote = (class.quotes & bit) != 0;
let is_value_char = (class.value_chars & bit) != 0;
if is_open {
bp.write_1();
ib.write_1();
} else if is_close {
bp.write_0();
ib.write_0();
} else if is_quote {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InString;
} else if is_value_char {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InValue;
} else {
ib.write_0();
}
i += 1;
}
State::InString => {
let special_remaining = class.string_special & remaining_mask;
if special_remaining == 0 {
let zeros_to_write = len - i;
ib.write_zeros(zeros_to_write);
return State::InString;
}
let next_special = special_remaining.trailing_zeros() as usize;
if next_special > i {
let zeros = next_special - i;
ib.write_zeros(zeros);
i = next_special;
}
let bit = 1u32 << i;
ib.write_0();
if (class.quotes & bit) != 0 {
state = State::InJson;
} else {
state = State::InEscape;
}
i += 1;
}
State::InEscape => {
ib.write_0();
state = State::InString;
i += 1;
}
State::InValue => {
let bit = 1u32 << i;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_quote = (class.quotes & bit) != 0;
let is_value_char = (class.value_chars & bit) != 0;
if is_open {
bp.write_1();
ib.write_1();
state = State::InJson;
} else if is_close {
bp.write_0();
ib.write_0();
state = State::InJson;
} else if is_quote {
bp.write_1();
bp.write_0();
ib.write_1();
state = State::InString;
} else if is_value_char {
ib.write_0();
} else {
ib.write_0();
state = State::InJson;
}
i += 1;
}
}
}
state
}
pub fn build_semi_index_standard(json: &[u8]) -> SemiIndex {
unsafe { build_semi_index_standard_neon(json) }
}
#[target_feature(enable = "neon")]
unsafe fn build_semi_index_standard_neon(json: &[u8]) -> SemiIndex {
unsafe {
let word_capacity = json.len().div_ceil(64);
let mut ib = BitWriter::with_capacity(word_capacity);
let mut bp = BitWriter::with_capacity(word_capacity * 2);
let mut state = State::InJson;
let mut offset = 0;
while offset + 32 <= json.len() {
let chunk_lo = vld1q_u8(json.as_ptr().add(offset));
let chunk_hi = vld1q_u8(json.as_ptr().add(offset + 16));
let class_lo = classify_chars(chunk_lo);
let class_hi = classify_chars(chunk_hi);
let class32 = CharClass32::from_pair(class_lo, class_hi);
state = process_chunk_standard_32(class32, state, &mut ib, &mut bp, 32);
offset += 32;
}
if offset + 16 <= json.len() {
let chunk = vld1q_u8(json.as_ptr().add(offset));
let class = classify_chars(chunk);
state =
process_chunk_standard(class, state, &mut ib, &mut bp, &json[offset..offset + 16]);
offset += 16;
}
if offset < json.len() {
let mut padded = [0u8; 16];
let remaining = json.len() - offset;
padded[..remaining].copy_from_slice(&json[offset..]);
let chunk = vld1q_u8(padded.as_ptr());
let class = classify_chars(chunk);
state = process_chunk_standard(class, state, &mut ib, &mut bp, &json[offset..]);
}
SemiIndex {
state,
ib: ib.finish(),
bp: bp.finish(),
}
}
}
#[inline]
fn process_chunk_simple(
class: CharClass,
mut state: SimpleState,
ib: &mut BitWriter,
bp: &mut BitWriter,
bytes: &[u8],
) -> SimpleState {
for i in 0..bytes.len().min(16) {
let bit = 1u16 << i;
let is_quote = (class.quotes & bit) != 0;
let is_backslash = (class.backslashes & bit) != 0;
let is_open = (class.opens & bit) != 0;
let is_close = (class.closes & bit) != 0;
let is_delim = (class.delims & bit) != 0;
match state {
SimpleState::InJson => {
if is_open {
bp.write_1();
bp.write_1();
ib.write_1();
} else if is_close {
bp.write_0();
bp.write_0();
ib.write_1();
} else if is_delim {
bp.write_0();
bp.write_1();
ib.write_1();
} else if is_quote {
ib.write_0();
state = SimpleState::InString;
} else {
ib.write_0();
}
}
SimpleState::InString => {
ib.write_0();
if is_quote {
state = SimpleState::InJson;
} else if is_backslash {
state = SimpleState::InEscape;
}
}
SimpleState::InEscape => {
ib.write_0();
state = SimpleState::InString;
}
}
}
state
}
pub fn build_semi_index_simple(json: &[u8]) -> SimpleSemiIndex {
unsafe { build_semi_index_simple_neon(json) }
}
#[target_feature(enable = "neon")]
unsafe fn build_semi_index_simple_neon(json: &[u8]) -> SimpleSemiIndex {
unsafe {
let word_capacity = json.len().div_ceil(64);
let mut ib = BitWriter::with_capacity(word_capacity);
let mut bp = BitWriter::with_capacity(word_capacity * 2);
let mut state = SimpleState::InJson;
let mut offset = 0;
while offset + 16 <= json.len() {
let chunk = vld1q_u8(json.as_ptr().add(offset));
let class = classify_chars(chunk);
state =
process_chunk_simple(class, state, &mut ib, &mut bp, &json[offset..offset + 16]);
offset += 16;
}
if offset < json.len() {
let mut padded = [0u8; 16];
let remaining = json.len() - offset;
padded[..remaining].copy_from_slice(&json[offset..]);
let chunk = vld1q_u8(padded.as_ptr());
let class = classify_chars(chunk);
state = process_chunk_simple(class, state, &mut ib, &mut bp, &json[offset..]);
}
SimpleSemiIndex {
state,
ib: ib.finish(),
bp: bp.finish(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn get_bit(words: &[u64], i: usize) -> bool {
let word_idx = i / 64;
let bit_idx = i % 64;
if word_idx < words.len() {
(words[word_idx] >> bit_idx) & 1 == 1
} else {
false
}
}
fn bits_to_string(words: &[u64], n: usize) -> String {
(0..n)
.map(|i| if get_bit(words, i) { '1' } else { '0' })
.collect()
}
#[test]
fn test_neon_movemask() {
unsafe {
let v = vdupq_n_u8(0x80);
assert_eq!(neon_movemask(v), 0xFFFF);
let v = vdupq_n_u8(0x7F);
assert_eq!(neon_movemask(v), 0x0000);
let bytes: [u8; 16] = [
0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
0x80, 0x00,
];
let v = vld1q_u8(bytes.as_ptr());
assert_eq!(neon_movemask(v), 0x5555);
}
}
#[test]
fn test_classify_chars() {
unsafe {
let input = br#"{"hello":123} "#;
let chunk = vld1q_u8(input.as_ptr());
let class = classify_chars(chunk);
assert_ne!(class.opens & (1 << 0), 0);
assert_ne!(class.closes & (1 << 12), 0);
assert_ne!(class.quotes & (1 << 1), 0);
assert_ne!(class.quotes & (1 << 7), 0);
assert_ne!(class.delims & (1 << 8), 0);
assert_ne!(class.value_chars & (1 << 9), 0);
assert_ne!(class.value_chars & (1 << 10), 0);
assert_ne!(class.value_chars & (1 << 11), 0);
}
}
#[test]
fn test_simd_matches_scalar_empty_object() {
let json = b"{}";
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len())
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simd_matches_scalar_simple_object() {
let json = br#"{"a":"b"}"#;
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simd_matches_scalar_array() {
let json = b"[1,2,3]";
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
}
#[test]
fn test_simd_matches_scalar_nested() {
let json = br#"{"a":{"b":1}}"#;
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
}
#[test]
fn test_simd_matches_scalar_escaped() {
let json = br#"{"a":"b\"c"}"#;
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
}
#[test]
fn test_simd_matches_scalar_long_input() {
let json = br#"{"name":"value","number":12345,"array":[1,2,3]}"#;
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for long input"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simd_matches_scalar_booleans() {
let json = br#"{"t":true,"f":false,"n":null}"#;
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for booleans"
);
}
#[test]
fn test_simd_matches_scalar_whitespace() {
let json = b"{ \"a\" : 1 }";
let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for whitespace"
);
}
#[test]
fn test_simd_matches_scalar_exact_16_bytes() {
let json = br#"{"abc":"defghi"}"#; let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for 16-byte input"
);
}
#[test]
fn test_simd_matches_scalar_32_bytes() {
let json = br#"{"abcdefghij":"klmnopqrst"}"#; let simd_result = build_semi_index_standard(json);
let scalar_result = crate::json::standard::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for 32-byte input"
);
}
#[test]
fn test_simple_simd_matches_scalar_empty_object() {
let json = b"{}";
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(
bits_to_string(&simd_result.bp, 4),
bits_to_string(&scalar_result.bp, 4),
"BP mismatch"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simple_simd_matches_scalar_empty_array() {
let json = b"[]";
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(
bits_to_string(&simd_result.bp, 4),
bits_to_string(&scalar_result.bp, 4),
"BP mismatch"
);
}
#[test]
fn test_simple_simd_matches_scalar_simple_object() {
let json = br#"{"a":"b"}"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(
bits_to_string(&simd_result.bp, 6),
bits_to_string(&scalar_result.bp, 6),
"BP mismatch"
);
}
#[test]
fn test_simple_simd_matches_scalar_array_with_values() {
let json = b"[1,2,3]";
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(
bits_to_string(&simd_result.bp, 8),
bits_to_string(&scalar_result.bp, 8),
"BP mismatch"
);
}
#[test]
fn test_simple_simd_matches_scalar_nested() {
let json = br#"{"a":{"b":1}}"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
}
#[test]
fn test_simple_simd_matches_scalar_escaped() {
let json = br#"{"a":"b\"c"}"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simple_simd_matches_scalar_escaped_backslash() {
let json = br#""a\\b""#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simple_simd_matches_scalar_long_input() {
let json = br#"{"name":"value","number":12345,"array":[1,2,3]}"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for long input"
);
assert_eq!(simd_result.state, scalar_result.state);
}
#[test]
fn test_simple_simd_matches_scalar_whitespace() {
let json = b"{ \"a\" : 1 }";
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for whitespace"
);
}
#[test]
fn test_simple_simd_matches_scalar_exact_16_bytes() {
let json = br#"{"abc":"defghi"}"#; let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for 16-byte input"
);
}
#[test]
fn test_simple_simd_matches_scalar_32_bytes() {
let json = br#"{"abcdefghij":"klmnopqrst"}"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(
bits_to_string(&simd_result.ib, json.len()),
bits_to_string(&scalar_result.ib, json.len()),
"IB mismatch for 32-byte input"
);
}
#[test]
fn test_simple_simd_matches_scalar_unterminated_string() {
let json = br#"{"a"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(simd_result.state, scalar_result.state);
assert_eq!(simd_result.state, SimpleState::InString);
}
#[test]
fn test_simple_simd_matches_scalar_unterminated_escape() {
let json = br#""\"#;
let simd_result = build_semi_index_simple(json);
let scalar_result = crate::json::simple::build_semi_index(json);
assert_eq!(simd_result.state, scalar_result.state);
assert_eq!(simd_result.state, SimpleState::InEscape);
}
}