#![doc = include_str!("../README.md")]
pub mod json_ref;
pub mod tape;
pub use json_ref::JsonRef;
pub use tape::{Tape, TapeArrayIter, TapeEntry, TapeEntryKind, TapeObjectIter, TapeRef};
use tape::TapeWriter;
#[cfg(target_arch = "x86_64")]
#[repr(C)]
struct ZmmVtab {
null: unsafe extern "C" fn(*mut ()),
bool_val: unsafe extern "C" fn(*mut (), bool),
number: unsafe extern "C" fn(*mut (), *const u8, usize),
string: unsafe extern "C" fn(*mut (), *const u8, usize),
escaped_string: unsafe extern "C" fn(*mut (), *const u8, usize),
key: unsafe extern "C" fn(*mut (), *const u8, usize),
escaped_key: unsafe extern "C" fn(*mut (), *const u8, usize),
start_object: unsafe extern "C" fn(*mut ()),
end_object: unsafe extern "C" fn(*mut ()),
start_array: unsafe extern "C" fn(*mut ()),
end_array: unsafe extern "C" fn(*mut ()),
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_null(data: *mut ()) {
unsafe { (*(data as *mut TapeWriter<'static>)).null() }
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_bool_val(data: *mut (), v: bool) {
unsafe { (*(data as *mut TapeWriter<'static>)).bool_val(v) }
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_number(data: *mut (), ptr: *const u8, len: usize) {
unsafe {
let s: &'static str = std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len));
(*(data as *mut TapeWriter<'static>)).number(s)
}
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_string(data: *mut (), ptr: *const u8, len: usize) {
unsafe {
let s: &'static str = std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len));
(*(data as *mut TapeWriter<'static>)).string(s)
}
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_escaped_string(data: *mut (), ptr: *const u8, len: usize) {
unsafe {
let s = std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len));
(*(data as *mut TapeWriter<'static>)).escaped_string(Box::from(s))
}
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_key(data: *mut (), ptr: *const u8, len: usize) {
unsafe {
let s: &'static str = std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len));
(*(data as *mut TapeWriter<'static>)).key(s)
}
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_escaped_key(data: *mut (), ptr: *const u8, len: usize) {
unsafe {
let s = std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len));
(*(data as *mut TapeWriter<'static>)).escaped_key(Box::from(s))
}
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_start_object(data: *mut ()) {
unsafe { (*(data as *mut TapeWriter<'static>)).start_object() }
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_end_object(data: *mut ()) {
unsafe { (*(data as *mut TapeWriter<'static>)).end_object() }
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_start_array(data: *mut ()) {
unsafe { (*(data as *mut TapeWriter<'static>)).start_array() }
}
#[cfg(target_arch = "x86_64")]
unsafe extern "C" fn tw_end_array(data: *mut ()) {
unsafe { (*(data as *mut TapeWriter<'static>)).end_array() }
}
#[cfg(target_arch = "x86_64")]
#[allow(improper_ctypes)]
unsafe extern "C" {
fn parse_json_zmm_dyn(
src_ptr: *const u8,
src_len: usize,
writer_data: *mut (),
writer_vtab: *const ZmmVtab,
frames_buf: *mut u8,
unescape_buf: *mut String,
) -> bool;
fn parse_json_zmm_tape(
src_ptr: *const u8,
src_len: usize,
tape_ptr: *mut TapeEntry<'static>,
tape_len_out: *mut usize,
frames_buf: *mut u8,
open_buf: *mut u64,
unescape_buf: *mut String,
has_escapes_out: *mut bool,
tape_cap: usize,
) -> u8;
}
#[cfg(feature = "stats")]
pub mod stats {
use std::sync::atomic::{AtomicU64, Ordering::Relaxed};
pub static VALUE_WHITESPACE: AtomicU64 = AtomicU64::new(0);
pub static STRING_CHARS: AtomicU64 = AtomicU64::new(0);
pub static STRING_ESCAPE: AtomicU64 = AtomicU64::new(0);
pub static KEY_CHARS: AtomicU64 = AtomicU64::new(0);
pub static KEY_ESCAPE: AtomicU64 = AtomicU64::new(0);
pub static KEY_END: AtomicU64 = AtomicU64::new(0);
pub static AFTER_COLON: AtomicU64 = AtomicU64::new(0);
pub static ATOM_CHARS: AtomicU64 = AtomicU64::new(0);
pub static OBJECT_START: AtomicU64 = AtomicU64::new(0);
pub static ARRAY_START: AtomicU64 = AtomicU64::new(0);
pub static AFTER_VALUE: AtomicU64 = AtomicU64::new(0);
pub fn reset() {
for s in all() {
s.store(0, Relaxed);
}
}
fn all() -> [&'static AtomicU64; 11] {
[
&VALUE_WHITESPACE,
&STRING_CHARS,
&STRING_ESCAPE,
&KEY_CHARS,
&KEY_ESCAPE,
&KEY_END,
&AFTER_COLON,
&ATOM_CHARS,
&OBJECT_START,
&ARRAY_START,
&AFTER_VALUE,
]
}
pub struct StateStats {
pub value_whitespace: u64,
pub string_chars: u64,
pub string_escape: u64,
pub key_chars: u64,
pub key_escape: u64,
pub key_end: u64,
pub after_colon: u64,
pub atom_chars: u64,
pub object_start: u64,
pub array_start: u64,
pub after_value: u64,
}
pub fn get() -> StateStats {
StateStats {
value_whitespace: VALUE_WHITESPACE.load(Relaxed),
string_chars: STRING_CHARS.load(Relaxed),
string_escape: STRING_ESCAPE.load(Relaxed),
key_chars: KEY_CHARS.load(Relaxed),
key_escape: KEY_ESCAPE.load(Relaxed),
key_end: KEY_END.load(Relaxed),
after_colon: AFTER_COLON.load(Relaxed),
atom_chars: ATOM_CHARS.load(Relaxed),
object_start: OBJECT_START.load(Relaxed),
array_start: ARRAY_START.load(Relaxed),
after_value: AFTER_VALUE.load(Relaxed),
}
}
}
macro_rules! stat {
($counter:path) => {
#[cfg(feature = "stats")]
$counter.fetch_add(1, ::std::sync::atomic::Ordering::Relaxed);
};
}
#[derive(PartialEq)]
enum State {
ValueWhitespace,
StringChars,
StringEscape,
KeyChars,
KeyEscape,
KeyEnd,
AfterColon,
AtomChars,
Error,
ObjectStart,
ArrayStart,
AfterValue,
}
#[derive(Copy, Clone, PartialEq)]
#[repr(u8)]
enum FrameKind {
Object = 0,
Array = 1,
}
pub const MAX_JSON_DEPTH: usize = 64;
pub trait JsonWriter<'src> {
type Output;
fn null(&mut self);
fn bool_val(&mut self, v: bool);
fn number(&mut self, s: &'src str);
fn string(&mut self, s: &'src str);
fn escaped_string(&mut self, s: Box<str>);
fn key(&mut self, s: &'src str);
fn escaped_key(&mut self, s: Box<str>);
fn start_object(&mut self);
fn end_object(&mut self);
fn start_array(&mut self);
fn end_array(&mut self);
fn finish(self) -> Option<Self::Output>;
}
fn is_valid_json_number(s: &[u8]) -> bool {
let mut i = 0;
let n = s.len();
if n == 0 {
return false;
}
if s[i] == b'-' {
i += 1;
if i == n {
return false;
}
}
if s[i] == b'0' {
i += 1;
if i < n && s[i].is_ascii_digit() {
return false;
}
} else if s[i].is_ascii_digit() {
while i < n && s[i].is_ascii_digit() {
i += 1;
}
} else {
return false;
}
if i < n && s[i] == b'.' {
i += 1;
if i == n || !s[i].is_ascii_digit() {
return false;
}
while i < n && s[i].is_ascii_digit() {
i += 1;
}
}
if i < n && (s[i] == b'e' || s[i] == b'E') {
i += 1;
if i < n && (s[i] == b'+' || s[i] == b'-') {
i += 1;
}
if i == n || !s[i].is_ascii_digit() {
return false;
}
while i < n && s[i].is_ascii_digit() {
i += 1;
}
}
i == n
}
#[unsafe(no_mangle)]
pub extern "C" fn is_valid_json_number_c(ptr: *const u8, len: usize) -> bool {
let s = unsafe { std::slice::from_raw_parts(ptr, len) };
is_valid_json_number(s)
}
#[cfg(target_arch = "x86_64")]
#[unsafe(no_mangle)]
#[inline(never)]
pub extern "C" fn tape_take_box_str(
unescape_buf: *mut String,
out_ptr: *mut *const u8,
out_len: *mut usize,
) {
unsafe {
let s = std::mem::replace(&mut *unescape_buf, String::new());
let boxed: Box<str> = s.into_boxed_str();
let len = boxed.len();
let raw: *mut str = Box::into_raw(boxed);
*out_ptr = raw as *mut u8 as *const u8;
*out_len = len;
}
}
fn write_atom<'a, W: JsonWriter<'a>>(s: &'a str, w: &mut W) -> bool {
match s {
"true" => {
w.bool_val(true);
true
}
"false" => {
w.bool_val(false);
true
}
"null" => {
w.null();
true
}
n => {
if is_valid_json_number(n.as_bytes()) {
w.number(n);
true
} else {
false
}
}
}
}
pub fn parse_to_tape<'a>(src: &'a str, classify: ClassifyFn) -> Option<Tape<'a>> {
parse_with(src, classify, TapeWriter::new())
}
#[cfg(target_arch = "x86_64")]
pub fn parse_to_tape_zmm_dyn<'a>(src: &'a str) -> Option<Tape<'a>> {
let mut writer = TapeWriter::new();
let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
let mut unescape_buf = String::new();
let vtab = ZmmVtab {
null: tw_null,
bool_val: tw_bool_val,
number: tw_number,
string: tw_string,
escaped_string: tw_escaped_string,
key: tw_key,
escaped_key: tw_escaped_key,
start_object: tw_start_object,
end_object: tw_end_object,
start_array: tw_start_array,
end_array: tw_end_array,
};
let ok = unsafe {
parse_json_zmm_dyn(
src.as_ptr(),
src.len(),
&raw mut writer as *mut (),
&vtab,
frames_buf.as_mut_ptr() as *mut u8,
&raw mut unescape_buf,
)
};
if ok { writer.finish() } else { None }
}
#[cfg(target_arch = "x86_64")]
pub fn parse_to_tape_zmm_tape<'a>(
src: &'a str,
initial_capacity: Option<usize>,
) -> Option<Tape<'a>> {
assert!(
std::is_x86_feature_detected!("avx512bw"),
"parse_to_tape_zmm_tape requires AVX-512BW; \
use parse_to_tape or choose_classifier() for automatic dispatch"
);
const RESULT_OK: u8 = 0;
const RESULT_PARSE_ERROR: u8 = 1;
const RESULT_TAPE_OVERFLOW: u8 = 2;
let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
let mut open_buf = [0u64; MAX_JSON_DEPTH];
let mut unescape_buf = String::new();
let mut capacity = initial_capacity.unwrap_or_else(|| (src.len() / 4).max(2));
loop {
let mut tape_data: Vec<TapeEntry<'a>> = Vec::with_capacity(capacity);
let tape_ptr = tape_data.as_mut_ptr() as *mut TapeEntry<'static>;
let mut tape_len: usize = 0;
let mut has_escapes: bool = false;
unescape_buf.clear();
let result = unsafe {
parse_json_zmm_tape(
src.as_ptr(),
src.len(),
tape_ptr,
&raw mut tape_len,
frames_buf.as_mut_ptr() as *mut u8,
open_buf.as_mut_ptr(),
&raw mut unescape_buf,
&raw mut has_escapes,
capacity,
)
};
match result {
RESULT_OK => {
unsafe { tape_data.set_len(tape_len) };
return Some(Tape {
entries: tape_data,
has_escapes,
});
}
RESULT_PARSE_ERROR => return None,
RESULT_TAPE_OVERFLOW => {
unsafe { tape_data.set_len(tape_len) };
capacity = capacity.saturating_mul(2).max(capacity + 1);
continue;
}
_ => return None, }
}
}
pub fn parse_with<'a, W: JsonWriter<'a>>(
src: &'a str,
classify: ClassifyFn,
writer: W,
) -> Option<W::Output> {
let mut frames_buf = [FrameKind::Object; MAX_JSON_DEPTH];
let mut unescape_buf = String::new();
parse_json_impl(src, classify, writer, &mut frames_buf, &mut unescape_buf)
}
fn parse_json_impl<'a, F, W>(
src: &'a str,
classify: F,
mut writer: W,
frames_buf: &mut [FrameKind; MAX_JSON_DEPTH],
unescape_buf: &mut String,
) -> Option<W::Output>
where
F: Fn(&[u8]) -> ByteState,
W: JsonWriter<'a>,
{
let bytes = src.as_bytes();
let mut frames_depth: usize = 0;
let mut str_start: usize = 0; let mut str_escaped = false; let mut atom_start: usize = 0; let mut current_key_raw: &'a str = ""; let mut current_key_escaped = false; let mut after_comma = false; let mut state = State::ValueWhitespace;
let mut pos = 0;
while pos < bytes.len() {
let chunk_len = (bytes.len() - pos).min(64);
let chunk = &bytes[pos..pos + chunk_len];
let byte_state = classify(chunk);
let mut chunk_offset = 0;
'inner: while chunk_offset < chunk_len {
state = match state {
State::ValueWhitespace => {
stat!(crate::stats::VALUE_WHITESPACE);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b'{' => {
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Object;
frames_depth += 1;
writer.start_object();
State::ObjectStart
}
}
b'[' => {
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Array;
frames_depth += 1;
writer.start_array();
State::ArrayStart
}
}
b'"' => {
str_start = pos + chunk_offset + 1;
str_escaped = false;
State::StringChars
}
_ => {
atom_start = pos + chunk_offset;
State::AtomChars
}
}
}
State::StringChars => {
stat!(crate::stats::STRING_CHARS);
let unescaped_quotes = byte_state.quotes & !(byte_state.backslashes << 1);
let interesting = (byte_state.backslashes | unescaped_quotes) >> chunk_offset;
let skip = interesting.trailing_zeros() as usize;
chunk_offset = (chunk_offset + skip).min(chunk_len);
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b'\\' => State::StringEscape,
b'"' => {
let raw = &src[str_start..pos + chunk_offset];
if str_escaped {
unescape_str(raw, unescape_buf);
writer.escaped_string(unescape_buf.as_str().into());
} else {
writer.string(raw);
}
State::AfterValue
}
_ => State::StringChars,
}
}
State::StringEscape => {
stat!(crate::stats::STRING_ESCAPE);
str_escaped = true;
State::StringChars
}
State::KeyChars => {
stat!(crate::stats::KEY_CHARS);
let unescaped_quotes = byte_state.quotes & !(byte_state.backslashes << 1);
let interesting = (byte_state.backslashes | unescaped_quotes) >> chunk_offset;
let skip = interesting.trailing_zeros() as usize;
chunk_offset = (chunk_offset + skip).min(chunk_len);
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b'\\' => State::KeyEscape,
b'"' => {
current_key_raw = &src[str_start..pos + chunk_offset];
current_key_escaped = str_escaped;
State::KeyEnd
}
_ => State::KeyChars,
}
}
State::KeyEscape => {
stat!(crate::stats::KEY_ESCAPE);
str_escaped = true;
State::KeyChars
}
State::KeyEnd => {
stat!(crate::stats::KEY_END);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b':' => {
if current_key_escaped {
unescape_str(current_key_raw, unescape_buf);
writer.escaped_key(unescape_buf.as_str().into());
} else {
writer.key(current_key_raw);
}
State::AfterColon
}
_ => State::Error,
}
}
State::AfterColon => {
stat!(crate::stats::AFTER_COLON);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b'{' => {
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Object;
frames_depth += 1;
writer.start_object();
State::ObjectStart
}
}
b'[' => {
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Array;
frames_depth += 1;
writer.start_array();
State::ArrayStart
}
}
b'"' => {
str_start = pos + chunk_offset + 1;
str_escaped = false;
State::StringChars
}
_ => {
atom_start = pos + chunk_offset;
State::AtomChars
}
}
}
State::AtomChars => {
stat!(crate::stats::ATOM_CHARS);
let ahead = byte_state.delimiters >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
if !write_atom(&src[atom_start..pos + chunk_offset], &mut writer) {
State::Error
} else {
match byte {
b'}' => {
if frames_depth == 0
|| frames_buf[frames_depth - 1] != FrameKind::Object
{
State::Error
} else {
frames_depth -= 1;
writer.end_object();
State::AfterValue
}
}
b']' => {
if frames_depth == 0
|| frames_buf[frames_depth - 1] != FrameKind::Array
{
State::Error
} else {
frames_depth -= 1;
writer.end_array();
State::AfterValue
}
}
b',' => {
if frames_depth == 0 {
State::Error
} else {
match frames_buf[frames_depth - 1] {
FrameKind::Array => {
after_comma = true;
State::ArrayStart
}
FrameKind::Object => {
after_comma = true;
State::ObjectStart
}
}
}
}
_ => State::AfterValue, }
}
}
State::Error => break 'inner,
State::ObjectStart => {
stat!(crate::stats::OBJECT_START);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b'"' => {
after_comma = false;
str_start = pos + chunk_offset + 1;
str_escaped = false;
State::KeyChars
}
b'}' => {
if after_comma {
State::Error
} else if frames_depth > 0
&& frames_buf[frames_depth - 1] == FrameKind::Object
{
frames_depth -= 1;
writer.end_object();
State::AfterValue
} else {
State::Error
}
}
_ => State::Error,
}
}
State::ArrayStart => {
stat!(crate::stats::ARRAY_START);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b']' => {
if after_comma {
State::Error
} else if frames_depth > 0
&& frames_buf[frames_depth - 1] == FrameKind::Array
{
frames_depth -= 1;
writer.end_array();
State::AfterValue
} else {
State::Error
}
}
b'{' => {
after_comma = false;
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Object;
frames_depth += 1;
writer.start_object();
State::ObjectStart
}
}
b'[' => {
after_comma = false;
if frames_depth >= MAX_JSON_DEPTH {
State::Error
} else {
frames_buf[frames_depth] = FrameKind::Array;
frames_depth += 1;
writer.start_array();
State::ArrayStart
}
}
b'"' => {
after_comma = false;
str_start = pos + chunk_offset + 1;
str_escaped = false;
State::StringChars
}
_ => {
after_comma = false;
atom_start = pos + chunk_offset;
State::AtomChars
}
}
}
State::AfterValue => {
stat!(crate::stats::AFTER_VALUE);
let ahead = (!byte_state.whitespace) >> chunk_offset;
let skip = ahead.trailing_zeros() as usize;
chunk_offset += skip;
if chunk_offset >= chunk_len {
break 'inner;
}
let byte = chunk[chunk_offset];
match byte {
b',' => {
if frames_depth == 0 {
State::Error
} else {
match frames_buf[frames_depth - 1] {
FrameKind::Object => {
after_comma = true;
State::ObjectStart
}
FrameKind::Array => {
after_comma = true;
State::ArrayStart
}
}
}
}
b'}' => {
if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Object
{
frames_depth -= 1;
writer.end_object();
State::AfterValue
} else {
State::Error
}
}
b']' => {
if frames_depth > 0 && frames_buf[frames_depth - 1] == FrameKind::Array
{
frames_depth -= 1;
writer.end_array();
State::AfterValue
} else {
State::Error
}
}
_ => State::Error,
}
}
};
chunk_offset += 1;
}
pos += chunk_len;
}
if state == State::AtomChars {
if !write_atom(&src[atom_start..], &mut writer) {
return None;
}
} else if state != State::AfterValue {
return None;
}
if state == State::Error {
return None;
}
if frames_depth != 0 {
return None;
}
writer.finish()
}
#[unsafe(no_mangle)]
#[inline(never)]
pub fn unescape_str(s: &str, out: &mut String) {
out.clear();
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] != b'\\' {
let ch = s[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
continue;
}
i += 1;
if i >= bytes.len() {
break;
}
match bytes[i] {
b'"' => {
out.push('"');
i += 1;
}
b'\\' => {
out.push('\\');
i += 1;
}
b'/' => {
out.push('/');
i += 1;
}
b'b' => {
out.push('\x08');
i += 1;
}
b'f' => {
out.push('\x0C');
i += 1;
}
b'n' => {
out.push('\n');
i += 1;
}
b'r' => {
out.push('\r');
i += 1;
}
b't' => {
out.push('\t');
i += 1;
}
b'u' => {
i += 1; if i + 4 <= bytes.len() {
if let Ok(hi) = u16::from_str_radix(&s[i..i + 4], 16) {
i += 4;
if (0xD800..0xDC00).contains(&hi)
&& i + 6 <= bytes.len()
&& bytes[i] == b'\\'
&& bytes[i + 1] == b'u'
{
if let Ok(lo) = u16::from_str_radix(&s[i + 2..i + 6], 16) {
if (0xDC00..=0xDFFF).contains(&lo) {
let cp = 0x1_0000u32
+ ((hi as u32 - 0xD800) << 10)
+ (lo as u32 - 0xDC00);
if let Some(ch) = char::from_u32(cp) {
out.push(ch);
i += 6;
continue;
}
}
}
}
if let Some(ch) = char::from_u32(hi as u32) {
out.push(ch);
}
}
}
}
b => {
out.push('\\');
out.push(b as char);
i += 1;
}
}
}
}
#[repr(C)]
#[derive(Debug, PartialEq)]
pub struct ByteState {
whitespace: u64, quotes: u64, backslashes: u64, delimiters: u64, }
#[repr(C)]
struct ByteStateConstants {
space: [u8; 64],
quote: [u8; 64],
backslash: [u8; 64],
comma: [u8; 64],
close_brace: [u8; 64],
close_bracket: [u8; 64],
}
impl ByteStateConstants {
const fn new() -> Self {
Self {
space: [b' '; 64],
quote: [b'"'; 64],
backslash: [b'\\'; 64],
comma: [b','; 64],
close_brace: [b'}'; 64],
close_bracket: [b']'; 64],
}
}
}
static ZMM_CONSTANTS: ByteStateConstants = ByteStateConstants::new();
pub type ClassifyFn = fn(&[u8]) -> ByteState;
pub fn classify_ymm(src: &[u8]) -> ByteState {
#[target_feature(enable = "avx2")]
unsafe fn imp(src: &[u8]) -> ByteState {
unsafe {
use std::arch::x86_64::{
__m256i, _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_max_epu8,
_mm256_movemask_epi8, _mm256_set1_epi8,
};
assert!(!src.is_empty() && src.len() <= 64);
let mut buf = [0u8; 64];
buf[..src.len()].copy_from_slice(src);
let p = buf.as_ptr();
let v0 = _mm256_loadu_si256(p.add(0).cast::<__m256i>());
let v1 = _mm256_loadu_si256(p.add(32).cast::<__m256i>());
let c_ws = _mm256_set1_epi8(0x20_u8 as i8);
let c_q = _mm256_set1_epi8(b'"' as i8);
let c_bs = _mm256_set1_epi8(b'\\' as i8);
let c_co = _mm256_set1_epi8(b',' as i8);
let c_cb = _mm256_set1_epi8(b'}' as i8);
let c_sb = _mm256_set1_epi8(b']' as i8);
macro_rules! movmsk {
($x:expr) => {
_mm256_movemask_epi8($x) as u32 as u64
};
}
macro_rules! ws {
($v:expr) => {
movmsk!(_mm256_cmpeq_epi8(_mm256_max_epu8($v, c_ws), c_ws))
};
}
macro_rules! eq {
($v:expr, $c:expr) => {
movmsk!(_mm256_cmpeq_epi8($v, $c))
};
}
macro_rules! combine2 {
($m0:expr, $m1:expr) => {
$m0 | ($m1 << 32)
};
}
let whitespace = combine2!(ws!(v0), ws!(v1));
let quotes = combine2!(eq!(v0, c_q), eq!(v1, c_q));
let backslashes = combine2!(eq!(v0, c_bs), eq!(v1, c_bs));
let commas = combine2!(eq!(v0, c_co), eq!(v1, c_co));
let cl_braces = combine2!(eq!(v0, c_cb), eq!(v1, c_cb));
let cl_brackets = combine2!(eq!(v0, c_sb), eq!(v1, c_sb));
let delimiters = whitespace | commas | cl_braces | cl_brackets;
ByteState {
whitespace,
quotes,
backslashes,
delimiters,
}
}
}
unsafe { imp(src) }
}
pub fn classify_zmm(src: &[u8]) -> ByteState {
#[target_feature(enable = "avx512bw")]
unsafe fn imp(src: &[u8]) -> ByteState {
assert!(!src.is_empty() && src.len() <= 64);
let load_mask: u64 = if src.len() == 64 {
!0u64
} else {
(1u64 << src.len()) - 1
};
let whitespace: u64;
let quotes: u64;
let backslashes: u64;
let delimiters: u64;
unsafe {
std::arch::asm!(
"kmovq k1, {load_mask}",
"vmovdqu8 zmm0 {{k1}}{{z}}, zmmword ptr [{src}]",
"vpcmpub k2, zmm0, zmmword ptr [{base} ], 2", "vpcmpeqb k3, zmm0, zmmword ptr [{base} + 64]", "vpcmpeqb k4, zmm0, zmmword ptr [{base} + 128]", "vpcmpeqb k5, zmm0, zmmword ptr [{base} + 192]", "vpcmpeqb k6, zmm0, zmmword ptr [{base} + 256]", "vpcmpeqb k7, zmm0, zmmword ptr [{base} + 320]", "korq k5, k5, k6", "korq k5, k5, k7", "korq k5, k5, k2", "kmovq {whitespace}, k2",
"kmovq {quotes}, k3",
"kmovq {backslashes}, k4",
"kmovq {delimiters}, k5",
src = in(reg) src.as_ptr(),
base = in(reg) &ZMM_CONSTANTS as *const ByteStateConstants,
load_mask = in(reg) load_mask,
whitespace = out(reg) whitespace,
quotes = out(reg) quotes,
backslashes = out(reg) backslashes,
delimiters = out(reg) delimiters,
out("zmm0") _,
out("k1") _, out("k2") _, out("k3") _,
out("k4") _, out("k5") _, out("k6") _, out("k7") _,
options(nostack, readonly),
);
}
ByteState {
whitespace,
quotes,
backslashes,
delimiters,
}
}
unsafe { imp(src) }
}
pub fn classify_u64(src: &[u8]) -> ByteState {
assert!(!src.is_empty() && src.len() <= 64);
let mut buf = [0u8; 64];
buf[..src.len()].copy_from_slice(src);
#[inline(always)]
fn has_zero_byte(v: u64) -> u64 {
v.wrapping_sub(0x0101_0101_0101_0101_u64) & !v & 0x8080_8080_8080_8080_u64
}
#[inline(always)]
fn eq_byte(v: u64, b: u8) -> u64 {
has_zero_byte(v ^ (b as u64 * 0x0101_0101_0101_0101_u64))
}
#[inline(always)]
fn movemask8(v: u64) -> u8 {
((v & 0x8080_8080_8080_8080_u64).wrapping_mul(0x0002_0408_1020_4081_u64) >> 56) as u8
}
let mut ws = [0u8; 8];
let mut q = [0u8; 8];
let mut bs = [0u8; 8];
let mut dl = [0u8; 8];
for i in 0..8 {
let v = u64::from_le_bytes(buf[i * 8..][..8].try_into().unwrap());
let masked = v & 0x7f7f_7f7f_7f7f_7f7f_u64;
let sum = masked.wrapping_add(0x5f5f_5f5f_5f5f_5f5f_u64);
let w = !(sum | v) & 0x8080_8080_8080_8080_u64;
let quotes = eq_byte(v, b'"');
let backslashes = eq_byte(v, b'\\');
let commas = eq_byte(v, b',');
let cl_brace = eq_byte(v, b'}');
let cl_bracket = eq_byte(v, b']');
let delims = w | commas | cl_brace | cl_bracket;
ws[i] = movemask8(w);
q[i] = movemask8(quotes);
bs[i] = movemask8(backslashes);
dl[i] = movemask8(delims);
}
ByteState {
whitespace: u64::from_le_bytes(ws),
quotes: u64::from_le_bytes(q),
backslashes: u64::from_le_bytes(bs),
delimiters: u64::from_le_bytes(dl),
}
}
pub fn choose_classifier() -> ClassifyFn {
#[cfg(target_arch = "x86_64")]
{
if std::is_x86_feature_detected!("avx512bw") {
return classify_zmm;
}
if std::is_x86_feature_detected!("avx2") {
return classify_ymm;
}
}
#[allow(unreachable_code)]
classify_u64
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn classifier_agreement() {
let inputs: &[&[u8]] = &[
b"{}[]:,",
b"\"hello\\world\"",
b" \t\r\n ",
b"{ \"key\" : \"val\\ue\" , [ 1, true, false, null ] } \x01",
b"\x80\x81\x82\xff\xfe\xfd\xaa\xbb",
b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
b"x",
b"0123456789abcdef",
b"0123456789abcdef0123456789abcdef",
];
for &input in inputs {
let src = &input[..input.len().min(64)];
let ymm = classify_ymm(src);
let u64_result = classify_u64(src);
assert_eq!(
u64_result, ymm,
"U64 vs YMM mismatch on input {:?}\n u64 ws={:#018x} ymm ws={:#018x}",
input, u64_result.whitespace, ymm.whitespace
);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if std::is_x86_feature_detected!("avx512bw") {
let zmm = classify_zmm(src);
assert_eq!(
ymm, zmm,
"YMM vs ZMM mismatch on input {:?}\n ymm ws={:#018x} zmm ws={:#018x}",
input, ymm.whitespace, zmm.whitespace
);
assert_eq!(
u64_result, zmm,
"U64 vs ZMM mismatch on input {:?}\n u64 ws={:#018x} zmm ws={:#018x}",
input, u64_result.whitespace, zmm.whitespace
);
}
}
}
#[cfg(target_arch = "x86_64")]
fn zmm_tape_matches_dyn(src: &str) {
let ref_tape = parse_to_tape(src, classify_zmm)
.unwrap_or_else(|| panic!("reference rejected: {src:?}"));
let asm_tape = parse_to_tape_zmm_tape(src, None)
.unwrap_or_else(|| panic!("zmm_tape rejected: {src:?}"));
assert_eq!(
ref_tape.entries, asm_tape.entries,
"tape mismatch for {src:?}"
);
}
#[cfg(target_arch = "x86_64")]
fn zmm_tape_rejects(src: &str) {
assert!(
parse_to_tape_zmm_tape(src, None).is_none(),
"zmm_tape should reject {src:?}"
);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_atoms() {
for src in &[
"null",
"true",
"false",
"0",
"42",
"-7",
"3.14",
"1e10",
"-0.5e-3",
"1",
"12",
"123",
"1234",
"12345",
"123456",
"1234567",
"12345678",
"123456789",
] {
zmm_tape_matches_dyn(src);
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_strings() {
for src in &[
r#""hello""#,
r#""""#,
r#""with \"escape\"""#,
r#""newline\nand\ttab""#,
r#""\u0041\u0042\u0043""#,
r#""\u0000""#,
r#""surrogate \uD83D\uDE00""#,
] {
zmm_tape_matches_dyn(src);
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_simple_object() {
zmm_tape_matches_dyn(r#"{"x":1}"#);
zmm_tape_matches_dyn(r#"{"a":1,"b":2,"c":3}"#);
zmm_tape_matches_dyn(r#"{}"#);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_simple_array() {
zmm_tape_matches_dyn(r#"[1,2,3]"#);
zmm_tape_matches_dyn(r#"[]"#);
zmm_tape_matches_dyn(r#"[null,true,false,"x",42]"#);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_nested() {
zmm_tape_matches_dyn(r#"{"a":{"b":[1,true,null]}}"#);
zmm_tape_matches_dyn(r#"[[1,[2,[3]]]]"#);
zmm_tape_matches_dyn(r#"{"k":{"k":{"k":{}}}}"#);
zmm_tape_matches_dyn(r#"[{"a":1},{"b":2}]"#);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_escaped_keys() {
zmm_tape_matches_dyn(r#"{"key\nname":1}"#);
zmm_tape_matches_dyn(r#"{"key\u0041":true}"#);
zmm_tape_matches_dyn(r#"{"a\"b":null}"#);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_whitespace() {
zmm_tape_matches_dyn(" { \"x\" : 1 } ");
zmm_tape_matches_dyn("[ 1 , 2 , 3 ]");
zmm_tape_matches_dyn("\t\r\nnull\t\r\n");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_long_string() {
let long = format!(r#""{}""#, "a".repeat(200));
zmm_tape_matches_dyn(&long);
let long_esc = format!(r#""{}\n{}""#, "b".repeat(100), "c".repeat(100));
zmm_tape_matches_dyn(&long_esc);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_reject_invalid() {
zmm_tape_rejects("");
zmm_tape_rejects("{");
zmm_tape_rejects("[");
zmm_tape_rejects("}");
zmm_tape_rejects(r#"{"a":}"#);
zmm_tape_rejects(r#"{"a":1"#);
zmm_tape_rejects("01");
zmm_tape_rejects("00");
zmm_tape_rejects("007");
zmm_tape_rejects("01234567"); }
#[cfg(target_arch = "x86_64")]
#[test]
fn zmm_tape_overflow_retry() {
let big: String = {
let mut s = String::from("[");
for i in 0..200u32 {
if i > 0 {
s.push(',');
}
s.push_str(&format!(r#"{{"k":{i}}}"#));
}
s.push(']');
s
};
let tape = parse_to_tape_zmm_tape(&big, Some(4)).expect("overflow retry should succeed");
assert_eq!(tape.root().unwrap().array_iter().unwrap().count(), 200);
}
}