#[cfg(target_arch = "aarch64")]
pub mod amx;
#[cfg(target_arch = "x86_64")]
pub mod avx2;
pub mod generic;
#[cfg(target_arch = "aarch64")]
pub mod neon;
#[cfg(target_arch = "aarch64")]
pub mod sve2;
pub const TOKEN_QUOTE: u32 = 1 << 28;
pub const TOKEN_LBRACE: u32 = 2 << 28;
pub const TOKEN_RBRACE: u32 = 3 << 28;
pub const TOKEN_LBRACKET: u32 = 4 << 28;
pub const TOKEN_RBRACKET: u32 = 5 << 28;
pub const TOKEN_COLON: u32 = 6 << 28;
pub const TOKEN_COMMA: u32 = 7 << 28;
pub const OFFSET_MASK: u32 = 0x0FFF_FFFF;
pub static TAG_TABLE: [u32; 256] = {
let mut table = [0u32; 256];
table[b'"' as usize] = TOKEN_QUOTE;
table[b'{' as usize] = TOKEN_LBRACE;
table[b'}' as usize] = TOKEN_RBRACE;
table[b'[' as usize] = TOKEN_LBRACKET;
table[b']' as usize] = TOKEN_RBRACKET;
table[b':' as usize] = TOKEN_COLON;
table[b',' as usize] = TOKEN_COMMA;
table
};
#[inline(always)]
pub fn tag_byte(byte: u8, pos: usize) -> u32 {
unsafe { *TAG_TABLE.get_unchecked(byte as usize) | (pos as u32) }
}
pub struct Scanner<'a> {
input: &'a [u8],
}
impl<'a> Scanner<'a> {
#[inline(always)]
pub fn new(input: &'a [u8]) -> Self {
Self { input }
}
pub fn scan(&self, tape: &mut [u32]) -> usize {
#[cfg(target_arch = "x86_64")]
{
if std::is_x86_feature_detected!("avx2") && std::is_x86_feature_detected!("pclmulqdq") {
return unsafe { avx2::scan_avx2(self.input, tape) };
}
}
#[cfg(target_arch = "aarch64")]
{
return unsafe { neon::scan_neon(self.input, tape) };
}
#[allow(unreachable_code)]
{
let generic_scanner = generic::Scanner::new(self.input);
generic_scanner.scan(tape)
}
}
}
#[cfg(test)]
#[allow(clippy::identity_op)]
mod tests {
use super::*;
#[test]
fn test_dynamic_scan_simple_object() {
let json = b"{\"key\":\"value\"}";
let scanner = Scanner::new(json);
let mut tape = vec![0; 10];
let count = scanner.scan(&mut tape);
assert_eq!(count, 7);
assert_eq!(
&tape[..count],
&[
TOKEN_LBRACE | 0,
TOKEN_QUOTE | 1,
TOKEN_QUOTE | 5,
TOKEN_COLON | 6,
TOKEN_QUOTE | 7,
TOKEN_QUOTE | 13,
TOKEN_RBRACE | 14
]
);
}
#[test]
fn test_dynamic_scan_with_escaped_quotes() {
let json = br#"{"key":"val\"ue"}"#;
let scanner = Scanner::new(json);
let mut tape = vec![0; 10];
let count = scanner.scan(&mut tape);
assert_eq!(count, 7);
assert_eq!(
&tape[..count],
&[
TOKEN_LBRACE | 0,
TOKEN_QUOTE | 1,
TOKEN_QUOTE | 5,
TOKEN_COLON | 6,
TOKEN_QUOTE | 7,
TOKEN_QUOTE | 15,
TOKEN_RBRACE | 16
]
);
}
#[test]
fn test_dynamic_scan_array_and_primitives() {
let json = b"[1, true, null]";
let scanner = Scanner::new(json);
let mut tape = vec![0; 10];
let count = scanner.scan(&mut tape);
assert_eq!(count, 4);
assert_eq!(
&tape[..count],
&[
TOKEN_LBRACKET | 0,
TOKEN_COMMA | 2,
TOKEN_COMMA | 8,
TOKEN_RBRACKET | 14
]
);
}
#[test]
fn test_dynamic_scan_string_spans_64byte_boundary() {
let mut json = Vec::with_capacity(140);
json.extend_from_slice(b"{\""); json.extend(std::iter::repeat_n(b'A', 57)); json.extend_from_slice(b"\":\""); json.extend(std::iter::repeat_n(b'a', 2)); json.extend_from_slice(b"["); json.extend(std::iter::repeat_n(b'a', 56)); json.extend_from_slice(b"\",\"k2\":"); json.extend_from_slice(b"\"v2\"}");
assert_eq!(json.len(), 133, "input length sanity check");
let scanner = Scanner::new(&json);
let mut tape = vec![0u32; 20];
let count = scanner.scan(&mut tape);
assert_eq!(
&tape[..count],
&[
TOKEN_LBRACE | 0,
TOKEN_QUOTE | 1,
TOKEN_QUOTE | 59,
TOKEN_COLON | 60,
TOKEN_QUOTE | 61,
TOKEN_QUOTE | 121,
TOKEN_COMMA | 122,
TOKEN_QUOTE | 123,
TOKEN_QUOTE | 126,
TOKEN_COLON | 127,
TOKEN_QUOTE | 128,
TOKEN_QUOTE | 131,
TOKEN_RBRACE | 132
],
"unexpected tape; count={count}"
);
}
}