#![allow(clippy::missing_safety_doc)]
use facet_format::jit::JitScratch;
use super::jit_debug;
pub type JsonJitResult = isize;
#[repr(C)]
pub struct JsonJitPosError {
pub new_pos: usize,
pub error: i32,
}
impl JsonJitPosError {
#[inline]
pub fn into_result(self) -> JsonJitResult {
if self.error == 0 {
self.new_pos as isize
} else {
self.error as isize
}
}
}
#[repr(C)]
pub struct JsonJitPosEndError {
pub packed_pos_end: usize,
pub error: i32,
}
impl JsonJitPosEndError {
pub fn new(new_pos: usize, is_end: bool, error: i32) -> Self {
let packed_pos_end = if is_end {
new_pos | (1usize << 63)
} else {
new_pos
};
Self {
packed_pos_end,
error,
}
}
#[allow(dead_code)]
pub fn new_pos(&self) -> usize {
self.packed_pos_end & 0x7FFFFFFFFFFFFFFF
}
#[allow(dead_code)]
pub fn is_end(&self) -> bool {
(self.packed_pos_end >> 63) != 0
}
}
#[repr(C)]
pub struct JsonJitPosValueError {
pub packed_pos_value: usize,
pub error: i32,
}
impl JsonJitPosValueError {
pub fn new(new_pos: usize, value: bool, error: i32) -> Self {
let packed_pos_value = if value {
new_pos | (1usize << 63)
} else {
new_pos
};
Self {
packed_pos_value,
error,
}
}
#[allow(dead_code)]
pub fn new_pos(&self) -> usize {
self.packed_pos_value & 0x7FFFFFFFFFFFFFFF
}
#[allow(dead_code)]
pub fn value(&self) -> bool {
(self.packed_pos_value >> 63) != 0
}
}
pub mod error {
pub const UNEXPECTED_EOF: i32 = -100;
pub const EXPECTED_ARRAY_START: i32 = -101;
pub const EXPECTED_BOOL: i32 = -102;
pub const EXPECTED_COMMA_OR_END: i32 = -103;
pub const EXPECTED_NUMBER: i32 = -104;
pub const NUMBER_OVERFLOW: i32 = -105;
pub const EXPECTED_STRING: i32 = -106;
pub const INVALID_ESCAPE: i32 = -107;
pub const INVALID_UTF8: i32 = -108;
pub const EXPECTED_OBJECT_START: i32 = -109;
pub const EXPECTED_COMMA_OR_BRACE: i32 = -110;
pub const EXPECTED_COLON: i32 = -111;
pub const CONTROL_CHAR_IN_STRING: i32 = -112;
pub const UNSUPPORTED: i32 = -1;
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_skip_ws(input: *const u8, len: usize, pos: usize) -> usize {
let mut p = pos;
while p < len {
let byte = unsafe { *input.add(p) };
if byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' {
p += 1;
} else {
break;
}
}
p
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_seq_begin(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitPosError {
let pos = unsafe { json_jit_skip_ws(input, len, pos) };
if pos >= len {
return JsonJitPosError {
new_pos: pos,
error: error::UNEXPECTED_EOF,
};
}
let byte = unsafe { *input.add(pos) };
if byte != b'[' {
return JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_ARRAY_START,
};
}
let pos = unsafe { json_jit_skip_ws(input, len, pos + 1) };
JsonJitPosError {
new_pos: pos,
error: 0,
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_seq_is_end(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitPosEndError {
jit_debug!("[json_jit_seq_is_end] pos={}, len={}", pos, len);
if pos >= len {
jit_debug!("[json_jit_seq_is_end] EOF!");
return JsonJitPosEndError::new(pos, false, error::UNEXPECTED_EOF);
}
let byte = unsafe { *input.add(pos) };
jit_debug!("[json_jit_seq_is_end] byte='{}' ({})", byte as char, byte);
if byte == b']' {
let pos = unsafe { json_jit_skip_ws(input, len, pos + 1) };
jit_debug!("[json_jit_seq_is_end] -> is_end=true, new_pos={}", pos);
JsonJitPosEndError::new(pos, true, 0)
} else {
jit_debug!("[json_jit_seq_is_end] -> is_end=false, new_pos={}", pos);
JsonJitPosEndError::new(pos, false, 0)
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_seq_next(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitPosError {
let pos = unsafe { json_jit_skip_ws(input, len, pos) };
if pos >= len {
return JsonJitPosError {
new_pos: pos,
error: error::UNEXPECTED_EOF,
};
}
let byte = unsafe { *input.add(pos) };
if byte == b',' {
let pos = unsafe { json_jit_skip_ws(input, len, pos + 1) };
JsonJitPosError {
new_pos: pos,
error: 0,
}
} else if byte == b']' {
JsonJitPosError {
new_pos: pos,
error: 0,
}
} else {
JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_COMMA_OR_END,
}
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_bool(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitPosValueError {
let pos = unsafe { json_jit_skip_ws(input, len, pos) };
if pos + 4 <= len {
let slice = unsafe { std::slice::from_raw_parts(input.add(pos), 4) };
if slice == b"true" {
return JsonJitPosValueError::new(pos + 4, true, 0);
}
}
if pos + 5 <= len {
let slice = unsafe { std::slice::from_raw_parts(input.add(pos), 5) };
if slice == b"false" {
return JsonJitPosValueError::new(pos + 5, false, 0);
}
}
JsonJitPosValueError::new(pos, false, error::EXPECTED_BOOL)
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_i64(
out: *mut JsonJitI64Result,
input: *const u8,
len: usize,
pos: usize,
) {
if pos >= len {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::UNEXPECTED_EOF,
};
}
return;
}
let mut p = pos;
let mut is_negative = false;
if unsafe { *input.add(p) } == b'-' {
is_negative = true;
p += 1;
if p >= len {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::EXPECTED_NUMBER,
};
}
return;
}
}
let mut value: u64 = 0;
let mut digit_count = 0;
while p + 8 <= len && digit_count < 19 {
let word = unsafe { (input.add(p) as *const u64).read_unaligned() };
let less_than_zero = word.wrapping_sub(0x3030303030303030);
let greater_than_nine = word | 0x4646464646464646; let is_all_digits = (less_than_zero | greater_than_nine) & 0x8080808080808080 == 0;
if !is_all_digits {
break;
}
let digits = word.wrapping_sub(0x3030303030303030);
for i in 0..8 {
let digit = (digits >> (i * 8)) & 0xFF;
value = value * 10 + digit;
digit_count += 1;
}
p += 8;
}
while p < len && digit_count < 19 {
let byte = unsafe { *input.add(p) };
if !byte.is_ascii_digit() {
break;
}
let digit = (byte - b'0') as u64;
value = value * 10 + digit;
digit_count += 1;
p += 1;
}
if digit_count == 0 {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::EXPECTED_NUMBER,
};
}
return;
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::NUMBER_OVERFLOW,
};
}
return;
}
}
let signed_value = if is_negative {
if value > 9223372036854775808u64 {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::NUMBER_OVERFLOW,
};
}
return;
}
-(value as i64)
} else {
if value > 9223372036854775807u64 {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::NUMBER_OVERFLOW,
};
}
return;
}
value as i64
};
unsafe {
*out = JsonJitI64Result {
new_pos: p,
value: signed_value,
error: 0,
};
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_u64(
out: *mut JsonJitI64Result,
input: *const u8,
len: usize,
pos: usize,
) {
if pos >= len {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::UNEXPECTED_EOF,
};
}
return;
}
let mut p = pos;
let mut value: u64 = 0;
let mut digit_count = 0;
while p < len && digit_count < 19 {
let byte = unsafe { *input.add(p) };
if !byte.is_ascii_digit() {
break;
}
let digit = (byte - b'0') as u64;
value = value * 10 + digit;
digit_count += 1;
p += 1;
}
if digit_count == 0 {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::EXPECTED_NUMBER,
};
}
return;
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
let digit = (byte - b'0') as u64;
if value > 1844674407370955161 || (value == 1844674407370955161 && digit > 5) {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::NUMBER_OVERFLOW,
};
}
return;
}
value = value * 10 + digit;
p += 1;
if p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
unsafe {
*out = JsonJitI64Result {
new_pos: pos,
value: 0,
error: error::NUMBER_OVERFLOW,
};
}
return;
}
}
}
}
unsafe {
*out = JsonJitI64Result {
new_pos: p,
value: value as i64,
error: 0,
};
}
}
#[repr(C)]
pub struct JsonJitI64Result {
pub new_pos: usize,
pub value: i64,
pub error: i32,
}
#[repr(C)]
pub struct JsonJitF64Result {
pub new_pos: usize,
pub value: f64,
pub error: i32,
}
#[repr(C)]
pub struct JsonJitStringResult {
pub new_pos: usize,
pub ptr: *const u8,
pub len: usize,
pub cap: usize,
pub owned: u8,
pub error: i32,
}
impl JsonJitStringResult {
fn error(pos: usize, code: i32) -> Self {
Self {
new_pos: pos,
ptr: std::ptr::null(),
len: 0,
cap: 0,
owned: 0,
error: code,
}
}
fn borrowed(new_pos: usize, ptr: *const u8, len: usize) -> Self {
Self {
new_pos,
ptr,
len,
cap: 0,
owned: 0,
error: 0,
}
}
fn owned(new_pos: usize, s: String) -> Self {
let len = s.len();
let cap = s.capacity();
let ptr = s.as_ptr();
std::mem::forget(s); Self {
new_pos,
ptr,
len,
cap,
owned: 1,
error: 0,
}
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_string(
out: *mut JsonJitStringResult,
input: *const u8,
len: usize,
pos: usize,
scratch: *mut JitScratch,
) {
let result = json_jit_parse_string_impl(input, len, pos, scratch);
unsafe { out.write(result) };
}
fn json_jit_parse_string_impl(
input: *const u8,
len: usize,
pos: usize,
scratch: *mut JitScratch,
) -> JsonJitStringResult {
if pos >= len {
return JsonJitStringResult::error(pos, error::UNEXPECTED_EOF);
}
let byte = unsafe { *input.add(pos) };
if byte != b'"' {
return JsonJitStringResult::error(pos, error::EXPECTED_STRING);
}
let start = pos + 1;
let (hit_idx, hit_byte, is_ascii) =
match find_quote_or_backslash_with_ascii(unsafe { input.add(start) }, len - start) {
Some(result) => result,
None => return JsonJitStringResult::error(pos, error::UNEXPECTED_EOF),
};
if hit_byte == b'"' {
let string_len = hit_idx;
let ptr = unsafe { input.add(start) };
if is_ascii {
JsonJitStringResult::borrowed(start + hit_idx + 1, ptr, string_len)
} else {
let slice = unsafe { std::slice::from_raw_parts(ptr, string_len) };
match std::str::from_utf8(slice) {
Ok(_) => JsonJitStringResult::borrowed(start + hit_idx + 1, ptr, string_len),
Err(_) => JsonJitStringResult::error(pos, error::INVALID_UTF8),
}
}
} else {
parse_string_with_escapes(input, len, pos, start, start + hit_idx, scratch)
}
}
#[inline(always)]
fn find_quote_or_backslash_with_ascii(ptr: *const u8, len: usize) -> Option<(usize, u8, bool)> {
let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
type Chunk = usize;
const STEP: usize = core::mem::size_of::<Chunk>();
const ONE_BYTES: Chunk = Chunk::MAX / 255; const HIGH_BITS: Chunk = ONE_BYTES << 7;
let mut i = 0;
let mut has_non_ascii = false;
while i + STEP <= len {
let chunk = unsafe { ptr.add(i).cast::<Chunk>().read_unaligned() };
if (chunk & HIGH_BITS) != 0 {
has_non_ascii = true;
}
let contains_ctrl = chunk.wrapping_sub(ONE_BYTES * 0x20) & !chunk & HIGH_BITS;
let chars_quote = chunk ^ (ONE_BYTES * (b'"' as Chunk));
let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote & HIGH_BITS;
let chars_backslash = chunk ^ (ONE_BYTES * (b'\\' as Chunk));
let contains_backslash =
chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash & HIGH_BITS;
let masked = contains_ctrl | contains_quote | contains_backslash;
if masked != 0 {
let byte_idx = if cfg!(target_endian = "little") {
masked.trailing_zeros() as usize / 8
} else {
masked.leading_zeros() as usize / 8
};
let hit_idx = i + byte_idx;
let hit_byte = slice[hit_idx];
return Some((hit_idx, hit_byte, !has_non_ascii));
}
i += STEP;
}
while i < len {
let b = slice[i];
if b & 0x80 != 0 {
has_non_ascii = true;
}
if b == b'"' || b == b'\\' || b < 0x20 {
return Some((i, b, !has_non_ascii));
}
i += 1;
}
None
}
#[inline]
fn is_ascii_swar(slice: &[u8]) -> bool {
const WORD_SIZE: usize = core::mem::size_of::<usize>();
const HI_MASK: usize = usize::from_ne_bytes([0x80; WORD_SIZE]);
let ptr = slice.as_ptr();
let len = slice.len();
let mut i = 0;
while i + WORD_SIZE <= len {
let word = unsafe { ptr.add(i).cast::<usize>().read_unaligned() };
if (word & HI_MASK) != 0 {
return false;
}
i += WORD_SIZE;
}
while i < len {
if slice[i] & 0x80 != 0 {
return false;
}
i += 1;
}
true
}
#[inline(never)]
fn parse_string_with_escapes(
input: *const u8,
len: usize,
pos: usize,
start: usize,
first_escape_pos: usize,
jit_scratch: *mut JitScratch,
) -> JsonJitStringResult {
let capacity_hint = len - start;
let mut scratch = unsafe { take_scratch_buffer(jit_scratch, capacity_hint) };
scratch.clear();
let prefix_len = first_escape_pos - start;
let mut has_non_ascii = false;
if prefix_len > 0 {
let prefix = unsafe { std::slice::from_raw_parts(input.add(start), prefix_len) };
has_non_ascii = !is_ascii_swar(prefix);
scratch.extend_from_slice(prefix);
}
let mut p = first_escape_pos;
loop {
debug_assert!(p < len && unsafe { *input.add(p) } == b'\\');
p += 1;
if p >= len {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::UNEXPECTED_EOF);
}
let escaped = unsafe { *input.add(p) };
match escaped {
b'"' => scratch.push(b'"'),
b'\\' => scratch.push(b'\\'),
b'/' => scratch.push(b'/'),
b'b' => scratch.push(b'\x08'),
b'f' => scratch.push(b'\x0C'),
b'n' => scratch.push(b'\n'),
b'r' => scratch.push(b'\r'),
b't' => scratch.push(b'\t'),
b'u' => {
if p + 4 >= len {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_ESCAPE);
}
let slice = unsafe { std::slice::from_raw_parts(input.add(p + 1), 4) };
let code_point =
match decode_four_hex_digits(slice[0], slice[1], slice[2], slice[3]) {
Some(n) => n,
None => {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_ESCAPE);
}
};
if (0xD800..=0xDBFF).contains(&code_point) {
if p + 10 < len {
let maybe_low = unsafe { std::slice::from_raw_parts(input.add(p + 5), 6) };
if maybe_low[0] == b'\\'
&& maybe_low[1] == b'u'
&& let Some(low_point) = decode_four_hex_digits(
maybe_low[2],
maybe_low[3],
maybe_low[4],
maybe_low[5],
)
&& (0xDC00..=0xDFFF).contains(&low_point)
{
has_non_ascii = true;
let full = 0x10000
+ ((code_point as u32 - 0xD800) << 10)
+ (low_point as u32 - 0xDC00);
push_utf8_codepoint(full, &mut scratch);
p += 10; } else {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_ESCAPE);
}
} else {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_ESCAPE);
}
} else {
if code_point >= 0x80 {
has_non_ascii = true;
}
push_utf8_codepoint(code_point as u32, &mut scratch);
p += 4; }
}
_ => {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_ESCAPE);
}
}
p += 1;
if p >= len {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::UNEXPECTED_EOF);
}
match find_special_byte_with_ascii(unsafe { input.add(p) }, len - p) {
Some((idx, hit_byte, is_ascii)) => {
if idx > 0 {
let literal = unsafe { std::slice::from_raw_parts(input.add(p), idx) };
scratch.extend_from_slice(literal);
}
if !is_ascii {
has_non_ascii = true;
}
p += idx;
if hit_byte == b'"' {
let result_string = if has_non_ascii {
match std::str::from_utf8(&scratch) {
Ok(s) => s.to_owned(),
Err(_) => {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::INVALID_UTF8);
}
}
} else {
unsafe { std::str::from_utf8_unchecked(&scratch) }.to_owned()
};
scratch.clear();
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::owned(p + 1, result_string);
} else if hit_byte == b'\\' {
} else {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::CONTROL_CHAR_IN_STRING);
}
}
None => {
unsafe { save_scratch_buffer(jit_scratch, scratch) };
return JsonJitStringResult::error(pos, error::UNEXPECTED_EOF);
}
}
}
}
#[inline(always)]
fn find_special_byte_with_ascii(ptr: *const u8, len: usize) -> Option<(usize, u8, bool)> {
type Chunk = usize;
const STEP: usize = core::mem::size_of::<Chunk>();
const ONE_BYTES: Chunk = Chunk::MAX / 255; const HIGH_BITS: Chunk = ONE_BYTES << 7;
let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
let mut i = 0;
let mut has_non_ascii = false;
while i + STEP <= len {
let chunk = unsafe { ptr.add(i).cast::<Chunk>().read_unaligned() };
if (chunk & HIGH_BITS) != 0 {
has_non_ascii = true;
}
let contains_ctrl = chunk.wrapping_sub(ONE_BYTES * 0x20) & !chunk & HIGH_BITS;
let chars_quote = chunk ^ (ONE_BYTES * (b'"' as Chunk));
let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote & HIGH_BITS;
let chars_backslash = chunk ^ (ONE_BYTES * (b'\\' as Chunk));
let contains_backslash =
chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash & HIGH_BITS;
let masked = contains_ctrl | contains_quote | contains_backslash;
if masked != 0 {
let byte_idx = if cfg!(target_endian = "little") {
masked.trailing_zeros() as usize / 8
} else {
masked.leading_zeros() as usize / 8
};
let hit_idx = i + byte_idx;
let hit_byte = slice[hit_idx];
return Some((hit_idx, hit_byte, !has_non_ascii));
}
i += STEP;
}
while i < len {
let b = slice[i];
if b & 0x80 != 0 {
has_non_ascii = true;
}
if b == b'"' || b == b'\\' || b < 0x20 {
return Some((i, b, !has_non_ascii));
}
i += 1;
}
None
}
unsafe fn take_scratch_buffer(jit_scratch: *mut JitScratch, capacity_hint: usize) -> Vec<u8> {
let scratch = unsafe { &mut *jit_scratch };
if scratch.string_scratch_ptr.is_null() {
return Vec::with_capacity(capacity_hint);
}
let vec = unsafe {
Vec::from_raw_parts(
scratch.string_scratch_ptr,
scratch.string_scratch_len,
scratch.string_scratch_cap,
)
};
scratch.string_scratch_ptr = std::ptr::null_mut();
scratch.string_scratch_len = 0;
scratch.string_scratch_cap = 0;
vec
}
unsafe fn save_scratch_buffer(jit_scratch: *mut JitScratch, mut buf: Vec<u8>) {
let scratch = unsafe { &mut *jit_scratch };
scratch.string_scratch_ptr = buf.as_mut_ptr();
scratch.string_scratch_len = buf.len();
scratch.string_scratch_cap = buf.capacity();
std::mem::forget(buf);
}
static HEX0: [i16; 256] = {
let mut table = [0i16; 256];
let mut ch = 0usize;
while ch < 256 {
table[ch] = match ch as u8 {
b'0'..=b'9' => (ch as u8 - b'0') as i16,
b'A'..=b'F' => (ch as u8 - b'A' + 10) as i16,
b'a'..=b'f' => (ch as u8 - b'a' + 10) as i16,
_ => -1,
};
ch += 1;
}
table
};
static HEX1: [i16; 256] = {
let mut table = [0i16; 256];
let mut ch = 0usize;
while ch < 256 {
table[ch] = match ch as u8 {
b'0'..=b'9' => ((ch as u8 - b'0') as i16) << 4,
b'A'..=b'F' => ((ch as u8 - b'A' + 10) as i16) << 4,
b'a'..=b'f' => ((ch as u8 - b'a' + 10) as i16) << 4,
_ => -1,
};
ch += 1;
}
table
};
#[inline]
fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
let a = HEX1[a as usize] as i32;
let b = HEX0[b as usize] as i32;
let c = HEX1[c as usize] as i32;
let d = HEX0[d as usize] as i32;
let codepoint = ((a | b) << 8) | c | d;
if codepoint >= 0 {
Some(codepoint as u16)
} else {
None
}
}
#[inline]
fn push_utf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
if n < 0x80 {
scratch.push(n as u8);
return;
}
scratch.reserve(4);
unsafe {
let ptr = scratch.as_mut_ptr().add(scratch.len());
let encoded_len = match n {
0..=0x7F => unreachable!(),
0x80..=0x7FF => {
ptr.write(((n >> 6) & 0b0001_1111) as u8 | 0b1100_0000);
ptr.add(1).write((n & 0b0011_1111) as u8 | 0b1000_0000);
2
}
0x800..=0xFFFF => {
ptr.write(((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000);
ptr.add(1)
.write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
ptr.add(2).write((n & 0b0011_1111) as u8 | 0b1000_0000);
3
}
0x1_0000..=0x10_FFFF => {
ptr.write(((n >> 18) & 0b0000_0111) as u8 | 0b1111_0000);
ptr.add(1)
.write(((n >> 12) & 0b0011_1111) as u8 | 0b1000_0000);
ptr.add(2)
.write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
ptr.add(3).write((n & 0b0011_1111) as u8 | 0b1000_0000);
4
}
_ => return, };
scratch.set_len(scratch.len() + encoded_len);
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_memchr2_quote_backslash(input: *const u8, len: usize) -> isize {
let slice = unsafe { std::slice::from_raw_parts(input, len) };
match memchr::memchr2(b'"', b'\\', slice) {
Some(idx) => idx as isize,
None => -1,
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_scratch_take(scratch: *mut JitScratch, capacity_hint: usize) {
let jit_scratch = unsafe { &mut *scratch };
if jit_scratch.string_scratch_ptr.is_null() {
let mut vec = Vec::<u8>::with_capacity(capacity_hint);
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = 0;
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
} else if jit_scratch.string_scratch_cap < capacity_hint {
let mut vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
0, jit_scratch.string_scratch_cap,
)
};
vec.reserve(capacity_hint - vec.capacity());
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = 0;
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
} else {
jit_scratch.string_scratch_len = 0;
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_scratch_extend(
scratch: *mut JitScratch,
src: *const u8,
src_len: usize,
) {
let jit_scratch = unsafe { &mut *scratch };
let mut vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
jit_scratch.string_scratch_len,
jit_scratch.string_scratch_cap,
)
};
let src_slice = unsafe { std::slice::from_raw_parts(src, src_len) };
vec.extend_from_slice(src_slice);
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = vec.len();
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_scratch_push_byte(scratch: *mut JitScratch, byte: u8) {
let jit_scratch = unsafe { &mut *scratch };
let mut vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
jit_scratch.string_scratch_len,
jit_scratch.string_scratch_cap,
)
};
vec.push(byte);
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = vec.len();
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_decode_unicode_escape(
scratch: *mut JitScratch,
input: *const u8,
remaining_len: usize,
) -> isize {
if remaining_len < 4 {
return error::INVALID_ESCAPE as isize;
}
let slice = unsafe { std::slice::from_raw_parts(input, remaining_len) };
let code_point = match decode_four_hex_digits(slice[0], slice[1], slice[2], slice[3]) {
Some(n) => n,
None => return error::INVALID_ESCAPE as isize,
};
if (0xD800..=0xDBFF).contains(&code_point) {
if remaining_len < 10 || slice[4] != b'\\' || slice[5] != b'u' {
return error::INVALID_ESCAPE as isize;
}
let low_point = match decode_four_hex_digits(slice[6], slice[7], slice[8], slice[9]) {
Some(n) if (0xDC00..=0xDFFF).contains(&n) => n,
_ => return error::INVALID_ESCAPE as isize,
};
let full = 0x10000 + ((code_point as u32 - 0xD800) << 10) + (low_point as u32 - 0xDC00);
let jit_scratch = unsafe { &mut *scratch };
let mut vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
jit_scratch.string_scratch_len,
jit_scratch.string_scratch_cap,
)
};
push_utf8_codepoint(full, &mut vec);
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = vec.len();
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
10 } else if (0xDC00..=0xDFFF).contains(&code_point) {
error::INVALID_ESCAPE as isize
} else {
let jit_scratch = unsafe { &mut *scratch };
let mut vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
jit_scratch.string_scratch_len,
jit_scratch.string_scratch_cap,
)
};
push_utf8_codepoint(code_point as u32, &mut vec);
jit_scratch.string_scratch_ptr = vec.as_mut_ptr();
jit_scratch.string_scratch_len = vec.len();
jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
4 }
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_scratch_finalize_string(
scratch: *mut JitScratch,
out: *mut JsonJitStringResult,
new_pos: usize,
is_ascii: u8,
) {
let jit_scratch = unsafe { &mut *scratch };
let vec = unsafe {
Vec::from_raw_parts(
jit_scratch.string_scratch_ptr,
jit_scratch.string_scratch_len,
jit_scratch.string_scratch_cap,
)
};
let result = if is_ascii != 0 {
let s = unsafe { String::from_utf8_unchecked(vec) };
JsonJitStringResult::owned(new_pos, s)
} else {
match String::from_utf8(vec) {
Ok(s) => JsonJitStringResult::owned(new_pos, s),
Err(e) => {
let vec = e.into_bytes();
jit_scratch.string_scratch_ptr = vec.as_ptr() as *mut u8;
jit_scratch.string_scratch_len = 0; jit_scratch.string_scratch_cap = vec.capacity();
std::mem::forget(vec);
JsonJitStringResult::error(new_pos, error::INVALID_UTF8)
}
}
};
let new_vec = Vec::<u8>::with_capacity(64);
jit_scratch.string_scratch_ptr = new_vec.as_ptr() as *mut u8;
jit_scratch.string_scratch_len = 0;
jit_scratch.string_scratch_cap = new_vec.capacity();
std::mem::forget(new_vec);
unsafe { out.write(result) };
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_is_ascii(input: *const u8, len: usize) -> u8 {
let slice = unsafe { std::slice::from_raw_parts(input, len) };
if is_ascii_swar(slice) { 1 } else { 0 }
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_f64_out(
out: *mut JsonJitF64Result,
input: *const u8,
len: usize,
pos: usize,
) {
let result = json_jit_parse_f64_impl(input, len, pos);
unsafe { *out = result };
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_parse_f64(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitF64Result {
json_jit_parse_f64_impl(input, len, pos)
}
#[cfg(feature = "lexical-parse")]
#[inline(always)]
fn json_jit_parse_f64_impl(input: *const u8, len: usize, pos: usize) -> JsonJitF64Result {
use lexical_parse_float::FromLexical;
let remaining = len - pos;
let slice_len = remaining.min(64);
let slice = unsafe { std::slice::from_raw_parts(input.add(pos), slice_len) };
match f64::from_lexical_partial(slice) {
Ok((value, consumed)) => JsonJitF64Result {
new_pos: pos + consumed,
value,
error: 0,
},
Err(_) => JsonJitF64Result {
new_pos: pos,
value: 0.0,
error: error::EXPECTED_NUMBER,
},
}
}
#[cfg(not(feature = "lexical-parse"))]
static POW10_NEG: [f64; 20] = [
1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, 1e-11, 1e-12, 1e-13, 1e-14,
1e-15, 1e-16, 1e-17, 1e-18, 1e-19,
];
#[cfg(not(feature = "lexical-parse"))]
fn json_jit_parse_f64_impl(input: *const u8, len: usize, pos: usize) -> JsonJitF64Result {
let mut p = pos;
let start = p;
let is_negative = if p < len && unsafe { *input.add(p) } == b'-' {
p += 1;
true
} else {
false
};
let mut int_part: u64 = 0;
let mut int_digits = 0;
while p < len && int_digits < 19 {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
let digit = (byte - b'0') as u64;
int_part = int_part * 10 + digit;
int_digits += 1;
p += 1;
} else {
break;
}
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
return json_jit_parse_f64_slow(input, len, start);
}
}
let mut frac_part: u64 = 0;
let mut frac_digits = 0;
if p < len && unsafe { *input.add(p) } == b'.' {
p += 1;
while p < len && frac_digits < 19 {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
let digit = (byte - b'0') as u64;
frac_part = frac_part * 10 + digit;
frac_digits += 1;
p += 1;
} else {
break;
}
}
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
p += 1;
} else {
break;
}
}
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte == b'e' || byte == b'E' {
return json_jit_parse_f64_slow(input, len, start);
}
}
if int_digits == 0 && frac_digits == 0 {
return JsonJitF64Result {
new_pos: pos,
value: 0.0,
error: error::EXPECTED_NUMBER,
};
}
let mut value = int_part as f64;
if frac_digits > 0 {
value += (frac_part as f64) * POW10_NEG[frac_digits];
}
if is_negative {
value = -value;
}
JsonJitF64Result {
new_pos: p,
value,
error: 0,
}
}
#[cfg(not(feature = "lexical-parse"))]
fn json_jit_parse_f64_slow(input: *const u8, len: usize, start: usize) -> JsonJitF64Result {
let mut p = start;
let mut has_digit = false;
if p < len && unsafe { *input.add(p) } == b'-' {
p += 1;
}
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
has_digit = true;
p += 1;
} else {
break;
}
}
if p < len && unsafe { *input.add(p) } == b'.' {
p += 1;
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
has_digit = true;
p += 1;
} else {
break;
}
}
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte == b'e' || byte == b'E' {
p += 1;
if p < len {
let sign_byte = unsafe { *input.add(p) };
if sign_byte == b'+' || sign_byte == b'-' {
p += 1;
}
}
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
p += 1;
} else {
break;
}
}
}
}
if !has_digit {
return JsonJitF64Result {
new_pos: start,
value: 0.0,
error: error::EXPECTED_NUMBER,
};
}
let slice = unsafe { std::slice::from_raw_parts(input.add(start), p - start) };
let s = match std::str::from_utf8(slice) {
Ok(s) => s,
Err(_) => {
return JsonJitF64Result {
new_pos: start,
value: 0.0,
error: error::EXPECTED_NUMBER,
};
}
};
match s.parse::<f64>() {
Ok(value) => JsonJitF64Result {
new_pos: p,
value,
error: 0,
},
Err(_) => JsonJitF64Result {
new_pos: start,
value: 0.0,
error: error::NUMBER_OVERFLOW,
},
}
}
#[unsafe(no_mangle)]
pub unsafe extern "C" fn json_jit_skip_value(
input: *const u8,
len: usize,
pos: usize,
) -> JsonJitResult {
let pos = unsafe { json_jit_skip_ws(input, len, pos) };
if pos >= len {
return error::UNEXPECTED_EOF as isize;
}
let byte = unsafe { *input.add(pos) };
let result = match byte {
b'"' => skip_string(input, len, pos),
b'[' => skip_array(input, len, pos),
b'{' => skip_object(input, len, pos),
b'-' | b'0'..=b'9' => skip_number(input, len, pos),
b't' => skip_literal(input, len, pos, b"true"),
b'f' => skip_literal(input, len, pos, b"false"),
b'n' => skip_literal(input, len, pos, b"null"),
_ => JsonJitPosError {
new_pos: pos,
error: error::UNEXPECTED_EOF, },
};
result.into_result()
}
fn skip_string(input: *const u8, len: usize, pos: usize) -> JsonJitPosError {
if pos >= len || unsafe { *input.add(pos) } != b'"' {
return JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_STRING,
};
}
let start = pos + 1;
match fast_skip_to_quote(unsafe { input.add(start) }, len - start) {
Some(quote_idx) => JsonJitPosError {
new_pos: start + quote_idx + 1, error: 0,
},
None => JsonJitPosError {
new_pos: pos,
error: error::UNEXPECTED_EOF,
},
}
}
fn fast_skip_to_quote(ptr: *const u8, len: usize) -> Option<usize> {
let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
let mut i = 0;
loop {
let hit = memchr::memchr2(b'"', b'\\', &slice[i..])?;
let abs_hit = i + hit;
let byte = slice[abs_hit];
if byte == b'"' {
return Some(abs_hit);
}
i = abs_hit + 1; if i >= len {
return None;
}
let escaped = slice[i];
if escaped == b'u' {
i += 5; } else {
i += 1; }
}
}
fn skip_number(input: *const u8, len: usize, pos: usize) -> JsonJitPosError {
let mut p = pos;
if p < len && unsafe { *input.add(p) } == b'-' {
p += 1;
}
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
p += 1;
} else {
break;
}
}
if p < len && unsafe { *input.add(p) } == b'.' {
p += 1;
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
p += 1;
} else {
break;
}
}
}
if p < len {
let byte = unsafe { *input.add(p) };
if byte == b'e' || byte == b'E' {
p += 1;
if p < len {
let sign = unsafe { *input.add(p) };
if sign == b'+' || sign == b'-' {
p += 1;
}
}
while p < len {
let byte = unsafe { *input.add(p) };
if byte.is_ascii_digit() {
p += 1;
} else {
break;
}
}
}
}
if p == pos {
return JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_NUMBER,
};
}
JsonJitPosError {
new_pos: p,
error: 0,
}
}
fn skip_literal(input: *const u8, len: usize, pos: usize, literal: &[u8]) -> JsonJitPosError {
if pos + literal.len() > len {
return JsonJitPosError {
new_pos: pos,
error: error::UNEXPECTED_EOF,
};
}
let slice = unsafe { std::slice::from_raw_parts(input.add(pos), literal.len()) };
if slice == literal {
JsonJitPosError {
new_pos: pos + literal.len(),
error: 0,
}
} else {
JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_BOOL, }
}
}
fn skip_array(input: *const u8, len: usize, pos: usize) -> JsonJitPosError {
if pos >= len || unsafe { *input.add(pos) } != b'[' {
return JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_ARRAY_START,
};
}
let mut p = pos + 1;
p = unsafe { json_jit_skip_ws(input, len, p) };
if p < len && unsafe { *input.add(p) } == b']' {
return JsonJitPosError {
new_pos: p + 1,
error: 0,
};
}
loop {
let result = unsafe { json_jit_skip_value(input, len, p) };
if result < 0 {
return JsonJitPosError {
new_pos: p,
error: result as i32,
};
}
p = result as usize;
p = unsafe { json_jit_skip_ws(input, len, p) };
if p >= len {
return JsonJitPosError {
new_pos: p,
error: error::UNEXPECTED_EOF,
};
}
let byte = unsafe { *input.add(p) };
if byte == b']' {
return JsonJitPosError {
new_pos: p + 1,
error: 0,
};
} else if byte == b',' {
p += 1;
p = unsafe { json_jit_skip_ws(input, len, p) };
} else {
return JsonJitPosError {
new_pos: p,
error: error::EXPECTED_COMMA_OR_END,
};
}
}
}
fn skip_object(input: *const u8, len: usize, pos: usize) -> JsonJitPosError {
if pos >= len || unsafe { *input.add(pos) } != b'{' {
return JsonJitPosError {
new_pos: pos,
error: error::EXPECTED_OBJECT_START,
};
}
let mut p = pos + 1;
p = unsafe { json_jit_skip_ws(input, len, p) };
if p < len && unsafe { *input.add(p) } == b'}' {
return JsonJitPosError {
new_pos: p + 1,
error: 0,
};
}
loop {
let result = skip_string(input, len, p);
if result.error != 0 {
return result;
}
p = result.new_pos;
p = unsafe { json_jit_skip_ws(input, len, p) };
if p >= len || unsafe { *input.add(p) } != b':' {
return JsonJitPosError {
new_pos: p,
error: error::EXPECTED_COLON,
};
}
p += 1;
p = unsafe { json_jit_skip_ws(input, len, p) };
let result = unsafe { json_jit_skip_value(input, len, p) };
if result < 0 {
return JsonJitPosError {
new_pos: p,
error: result as i32,
};
}
p = result as usize;
p = unsafe { json_jit_skip_ws(input, len, p) };
if p >= len {
return JsonJitPosError {
new_pos: p,
error: error::UNEXPECTED_EOF,
};
}
let byte = unsafe { *input.add(p) };
if byte == b'}' {
return JsonJitPosError {
new_pos: p + 1,
error: 0,
};
} else if byte == b',' {
p += 1;
p = unsafe { json_jit_skip_ws(input, len, p) };
} else {
return JsonJitPosError {
new_pos: p,
error: error::EXPECTED_COMMA_OR_BRACE,
};
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use facet_testhelpers::test;
#[test]
fn test_json_jit_parse_bool() {
let input = b"true";
let result = unsafe { json_jit_parse_bool(input.as_ptr(), input.len(), 0) };
assert_eq!(result.error, 0);
assert_eq!(result.new_pos(), 4);
assert!(result.value());
let input = b"false";
let result = unsafe { json_jit_parse_bool(input.as_ptr(), input.len(), 0) };
assert_eq!(result.error, 0);
assert_eq!(result.new_pos(), 5);
assert!(!result.value());
}
#[test]
fn test_json_jit_seq_begin() {
let input = b"[true]";
let result = unsafe { json_jit_seq_begin(input.as_ptr(), input.len(), 0) };
assert_eq!(result.error, 0);
assert_eq!(result.new_pos, 1); }
#[test]
fn test_json_jit_seq_is_end() {
let input = b"]";
let result = unsafe { json_jit_seq_is_end(input.as_ptr(), input.len(), 0) };
assert_eq!(result.error, 0);
assert!(result.is_end());
let input = b"true";
let result = unsafe { json_jit_seq_is_end(input.as_ptr(), input.len(), 0) };
assert_eq!(result.error, 0);
assert!(!result.is_end());
}
}