use std::borrow::Cow;
use super::constants::*;
use super::error::Error;
use super::error::ParseErrorCode;
use super::error::Result;
use super::number::Number;
use super::util::parse_string;
use super::value::Object;
use super::value::Value;
use crate::core::Decoder;
use crate::core::JsonAstEncoder;
#[cfg(feature = "arbitrary_precision")]
use crate::Decimal128;
#[cfg(feature = "arbitrary_precision")]
use crate::Decimal256;
#[cfg(feature = "arbitrary_precision")]
use crate::Decimal64;
use crate::OwnedJsonb;
#[cfg(feature = "arbitrary_precision")]
use ethnum::i256;
use crate::constants::MAX_DECIMAL128_PRECISION;
use crate::constants::MAX_DECIMAL256_PRECISION;
#[cfg(feature = "arbitrary_precision")]
use crate::constants::MAX_DECIMAL64_PRECISION;
#[cfg(feature = "arbitrary_precision")]
use crate::constants::DECIMAL128_MAX;
#[cfg(feature = "arbitrary_precision")]
use crate::constants::DECIMAL128_MIN;
#[cfg(feature = "arbitrary_precision")]
use crate::constants::DECIMAL64_MAX;
#[cfg(feature = "arbitrary_precision")]
use crate::constants::DECIMAL64_MIN;
use crate::constants::INT64_MAX;
use crate::constants::INT64_MIN;
use crate::constants::UINT64_MAX;
use crate::constants::UINT64_MIN;
const NULL_LOWERCASE: [u8; 4] = [b'n', b'u', b'l', b'l'];
const NULL_UPPERCASE: [u8; 4] = [b'N', b'U', b'L', b'L'];
const TRUE_LOWERCASE: [u8; 4] = [b't', b'r', b'u', b'e'];
const TRUE_UPPERCASE: [u8; 4] = [b'T', b'R', b'U', b'E'];
const FALSE_LOWERCASE: [u8; 5] = [b'f', b'a', b'l', b's', b'e'];
const FALSE_UPPERCASE: [u8; 5] = [b'F', b'A', b'L', b'S', b'E'];
const NAN_LOWERCASE: [u8; 3] = [b'n', b'a', b'n'];
const NAN_UPPERCASE: [u8; 3] = [b'N', b'A', b'N'];
const INFINITY_LOWERCASE: [u8; 8] = [b'i', b'n', b'f', b'i', b'n', b'i', b't', b'y'];
const INFINITY_UPPERCASE: [u8; 8] = [b'I', b'N', b'F', b'I', b'N', b'I', b'T', b'Y'];
#[cfg(feature = "arbitrary_precision")]
static POWER_TABLE: std::sync::LazyLock<[i256; 39]> = std::sync::LazyLock::new(|| {
[
i256::from(1_i128),
i256::from(10_i128),
i256::from(100_i128),
i256::from(1000_i128),
i256::from(10000_i128),
i256::from(100000_i128),
i256::from(1000000_i128),
i256::from(10000000_i128),
i256::from(100000000_i128),
i256::from(1000000000_i128),
i256::from(10000000000_i128),
i256::from(100000000000_i128),
i256::from(1000000000000_i128),
i256::from(10000000000000_i128),
i256::from(100000000000000_i128),
i256::from(1000000000000000_i128),
i256::from(10000000000000000_i128),
i256::from(100000000000000000_i128),
i256::from(1000000000000000000_i128),
i256::from(10000000000000000000_i128),
i256::from(100000000000000000000_i128),
i256::from(1000000000000000000000_i128),
i256::from(10000000000000000000000_i128),
i256::from(100000000000000000000000_i128),
i256::from(1000000000000000000000000_i128),
i256::from(10000000000000000000000000_i128),
i256::from(100000000000000000000000000_i128),
i256::from(1000000000000000000000000000_i128),
i256::from(10000000000000000000000000000_i128),
i256::from(100000000000000000000000000000_i128),
i256::from(1000000000000000000000000000000_i128),
i256::from(10000000000000000000000000000000_i128),
i256::from(100000000000000000000000000000000_i128),
i256::from(1000000000000000000000000000000000_i128),
i256::from(10000000000000000000000000000000000_i128),
i256::from(100000000000000000000000000000000000_i128),
i256::from(1000000000000000000000000000000000000_i128),
i256::from(10000000000000000000000000000000000000_i128),
i256::from(100000000000000000000000000000000000000_i128),
]
});
#[derive(Clone, PartialEq, Default, Eq, Debug)]
pub(crate) enum JsonAst<'a> {
#[default]
Null,
Bool(bool),
String(Cow<'a, str>),
Number(Number),
Array(Vec<JsonAst<'a>>),
Object(Vec<(Cow<'a, str>, JsonAst<'a>, usize)>),
}
impl<'a> JsonAst<'a> {
fn into_value(self) -> Result<Value<'a>> {
let value = match self {
JsonAst::Null => Value::Null,
JsonAst::Bool(v) => Value::Bool(v),
JsonAst::String(v) => Value::String(v),
JsonAst::Number(v) => Value::Number(v),
JsonAst::Array(vals) => {
let mut values = Vec::with_capacity(vals.len());
for val in vals.into_iter() {
let value = val.into_value()?;
values.push(value);
}
Value::Array(values)
}
JsonAst::Object(kvs) => {
let mut object = Object::new();
for (key, val, _) in kvs.into_iter() {
let key_str = key.to_string();
let value = val.into_value()?;
object.insert(key_str, value);
}
Value::Object(object)
}
};
Ok(value)
}
fn into_owned_jsonb(self, size: usize) -> OwnedJsonb {
let mut buf = Vec::with_capacity(size);
let mut encoder = JsonAstEncoder::new(&mut buf);
encoder.encode(&self);
OwnedJsonb::new(buf)
}
fn into_owned_jsonb_with_buffer(self, size: usize, result_buf: &mut Vec<u8>) {
result_buf.reserve(size);
let mut encoder = JsonAstEncoder::new(result_buf);
encoder.encode(&self);
}
}
pub fn from_slice(buf: &[u8]) -> Result<Value<'_>> {
let mut decoder = Decoder::new(buf);
match decoder.decode() {
Ok(value) => Ok(value),
Err(_) => parse_value(buf),
}
}
pub fn parse_value(buf: &[u8]) -> Result<Value<'_>> {
let mut parser = Parser::new(buf);
let json_ast = parser.parse()?;
json_ast.into_value()
}
pub fn parse_value_standard_mode(buf: &[u8]) -> Result<Value<'_>> {
let mut parser = Parser::new_standard_mode(buf);
let json_ast = parser.parse()?;
json_ast.into_value()
}
pub fn parse_owned_jsonb(buf: &[u8]) -> Result<OwnedJsonb> {
let size = buf.len();
let mut parser = Parser::new(buf);
let json_ast = parser.parse()?;
Ok(json_ast.into_owned_jsonb(size))
}
pub fn parse_owned_jsonb_standard_mode(buf: &[u8]) -> Result<OwnedJsonb> {
let size = buf.len();
let mut parser = Parser::new_standard_mode(buf);
let json_ast = parser.parse()?;
Ok(json_ast.into_owned_jsonb(size))
}
pub fn parse_owned_jsonb_with_buf(buf: &[u8], result_buf: &mut Vec<u8>) -> Result<()> {
let size = buf.len();
let mut parser = Parser::new(buf);
let json_ast = parser.parse()?;
json_ast.into_owned_jsonb_with_buffer(size, result_buf);
Ok(())
}
pub fn parse_owned_jsonb_standard_mode_with_buf(
buf: &[u8],
result_buf: &mut Vec<u8>,
) -> Result<()> {
let size = buf.len();
let mut parser = Parser::new_standard_mode(buf);
let json_ast = parser.parse()?;
json_ast.into_owned_jsonb_with_buffer(size, result_buf);
Ok(())
}
struct Parser<'a> {
buf: &'a [u8],
idx: usize,
parse_value_fn: fn(&mut Self) -> Result<JsonAst<'a>>,
parse_array_value_fn: fn(&mut Self) -> Result<JsonAst<'a>>,
parse_object_key_fn: fn(&mut Self) -> Result<Cow<'a, str>>,
}
impl<'a> Parser<'a> {
fn new(buf: &'a [u8]) -> Self {
Self {
buf,
idx: 0,
parse_value_fn: Self::parse_json_value,
parse_array_value_fn: Self::parse_array_value,
parse_object_key_fn: Self::parse_object_key,
}
}
fn new_standard_mode(buf: &'a [u8]) -> Self {
Self {
buf,
idx: 0,
parse_value_fn: Self::parse_standard_json_value,
parse_array_value_fn: Self::parse_standard_json_value,
parse_object_key_fn: Self::parse_standard_object_key,
}
}
fn parse(&mut self) -> Result<JsonAst<'a>> {
let value = (self.parse_value_fn)(self)?;
self.skip_unused();
if self.idx < self.buf.len() {
self.step();
return Err(self.error(ParseErrorCode::UnexpectedTrailingCharacters));
}
Ok(value)
}
#[inline]
fn parse_standard_json_value(&mut self) -> Result<JsonAst<'a>> {
self.skip_unused();
let c = self.next()?;
match c {
b'n' => self.parse_standard_json_null(),
b't' => self.parse_standard_json_true(),
b'f' => self.parse_standard_json_false(),
b'0'..=b'9' | b'-' => self.parse_standard_json_number(),
b'"' => self.parse_standard_json_string(),
b'[' => self.parse_json_array(),
b'{' => self.parse_json_object(),
_ => {
self.step();
Err(self.error(ParseErrorCode::ExpectedSomeValue))
}
}
}
#[inline]
fn parse_json_value(&mut self) -> Result<JsonAst<'a>> {
self.skip_unused();
let Ok(c) = self.next() else {
return Ok(JsonAst::Null);
};
match c {
b'n' | b'N' => self.parse_json_null_or_nan(),
b't' | b'T' => self.parse_json_true(),
b'f' | b'F' => self.parse_json_false(),
b'i' | b'I' => self.parse_json_infinity(),
b'0'..=b'9' | b'-' | b'+' | b'.' => self.parse_json_number(),
b'"' | b'\'' => self.parse_json_string(),
b'[' => self.parse_json_array(),
b'{' => self.parse_json_object(),
_ => {
self.step();
Err(self.error(ParseErrorCode::ExpectedSomeValue))
}
}
}
#[inline]
fn next(&mut self) -> Result<u8> {
match self.buf.get(self.idx) {
Some(c) => Ok(*c),
None => Err(self.error(ParseErrorCode::InvalidEOF)),
}
}
#[inline]
fn must_is(&mut self, c: &u8) -> Result<()> {
match self.buf.get(self.idx) {
Some(v) => {
self.step();
if v == c {
Ok(())
} else {
Err(self.error(ParseErrorCode::ExpectedSomeIdent))
}
}
None => Err(self.error(ParseErrorCode::InvalidEOF)),
}
}
#[inline]
fn must_either(&mut self, c1: &u8, c2: &u8) -> Result<u8> {
match self.buf.get(self.idx) {
Some(v) => {
self.step();
if v == c1 || v == c2 {
Ok(*v)
} else {
Err(self.error(ParseErrorCode::ExpectedSomeIdent))
}
}
None => Err(self.error(ParseErrorCode::InvalidEOF)),
}
}
#[inline]
fn check_next(&mut self, c: &u8) -> bool {
if let Some(v) = self.buf.get(self.idx) {
if v == c {
return true;
}
}
false
}
#[inline]
fn check_next_either(&mut self, c1: &u8, c2: &u8) -> Option<u8> {
if let Some(v) = self.buf.get(self.idx) {
if v == c1 || v == c2 {
return Some(*v);
}
}
None
}
#[inline]
fn check_digit(&mut self) -> Option<u8> {
if let Some(v) = self.buf.get(self.idx) {
if v.is_ascii_digit() {
let digit = v - b'0';
return Some(digit);
}
}
None
}
#[inline]
fn step_digits(&mut self) -> usize {
let mut len = 0;
while let Some(v) = self.buf.get(self.idx) {
if !v.is_ascii_digit() {
break;
}
len += 1;
self.step();
}
len
}
#[inline]
fn step_hexdigits(&mut self) -> usize {
let mut len = 0;
while let Some(v) = self.buf.get(self.idx) {
if !v.is_ascii_hexdigit() {
break;
}
len += 1;
self.step();
}
len
}
#[inline]
fn step(&mut self) {
self.idx += 1;
}
#[inline]
fn step_by(&mut self, n: usize) {
self.idx += n;
}
fn error(&self, code: ParseErrorCode) -> Error {
let pos = self.idx;
Error::Syntax(code, pos)
}
#[inline]
fn skip_unused(&mut self) {
while self.idx < self.buf.len() {
let c = self.buf[self.idx];
if c.is_ascii_whitespace() {
self.idx += 1;
continue;
}
if c == b'\\' && self.idx + 1 < self.buf.len() {
let next_c = self.buf[self.idx + 1];
let simple_escape = matches!(next_c, b'n' | b'r' | b't');
if simple_escape {
self.idx += 2;
continue;
}
let hex_escape = self.idx + 3 < self.buf.len()
&& next_c == b'x'
&& self.buf[self.idx + 2] == b'0'
&& self.buf[self.idx + 3] == b'C';
if hex_escape {
self.idx += 4;
continue;
}
}
break;
}
}
#[inline]
fn parse_standard_json_null(&mut self) -> Result<JsonAst<'a>> {
for v in NULL_LOWERCASE.iter() {
self.must_is(v)?;
}
Ok(JsonAst::Null)
}
#[inline]
fn parse_json_null_or_nan(&mut self) -> Result<JsonAst<'a>> {
let idx = self.idx;
if let Ok(null) = self.parse_json_null() {
Ok(null)
} else {
self.idx = idx;
self.parse_json_nan()
}
}
#[inline]
fn parse_json_null(&mut self) -> Result<JsonAst<'a>> {
for (v1, v2) in NULL_LOWERCASE.iter().zip(NULL_UPPERCASE.iter()) {
self.must_either(v1, v2)?;
}
Ok(JsonAst::Null)
}
#[inline]
fn parse_standard_json_true(&mut self) -> Result<JsonAst<'a>> {
for v in TRUE_LOWERCASE.iter() {
self.must_is(v)?;
}
Ok(JsonAst::Bool(true))
}
#[inline]
fn parse_json_true(&mut self) -> Result<JsonAst<'a>> {
for (v1, v2) in TRUE_LOWERCASE.iter().zip(TRUE_UPPERCASE.iter()) {
self.must_either(v1, v2)?;
}
Ok(JsonAst::Bool(true))
}
#[inline]
fn parse_standard_json_false(&mut self) -> Result<JsonAst<'a>> {
for v in FALSE_LOWERCASE.iter() {
self.must_is(v)?;
}
Ok(JsonAst::Bool(false))
}
#[inline]
fn parse_json_false(&mut self) -> Result<JsonAst<'a>> {
for (v1, v2) in FALSE_LOWERCASE.iter().zip(FALSE_UPPERCASE.iter()) {
self.must_either(v1, v2)?;
}
Ok(JsonAst::Bool(false))
}
#[inline]
fn parse_json_infinity(&mut self) -> Result<JsonAst<'a>> {
for (v1, v2) in INFINITY_LOWERCASE.iter().zip(INFINITY_UPPERCASE.iter()) {
self.must_either(v1, v2)?;
}
Ok(JsonAst::Number(Number::Float64(f64::INFINITY)))
}
#[inline]
fn parse_json_nan(&mut self) -> Result<JsonAst<'a>> {
for (v1, v2) in NAN_LOWERCASE.iter().zip(NAN_UPPERCASE.iter()) {
self.must_either(v1, v2)?;
}
Ok(JsonAst::Number(Number::Float64(f64::NAN)))
}
fn parse_standard_json_number(&mut self) -> Result<JsonAst<'a>> {
let start_idx = self.idx;
let mut negative = false;
let mut has_fraction = false;
let mut has_exponent = false;
if self.check_next(&b'-') {
negative = true;
self.step();
}
if self.check_next(&b'0') {
self.step();
if self.check_digit().is_some() {
self.step();
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
} else {
let len = self.step_digits();
if len == 0 {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
}
if self.check_next(&b'.') {
has_fraction = true;
self.step();
let len = self.step_digits();
if len == 0 {
self.step();
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
}
if self.check_next_either(&b'E', &b'e').is_some() {
has_exponent = true;
self.step();
if self.check_next_either(&b'+', &b'-').is_some() {
self.step();
}
let len = self.step_digits();
if len == 0 {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
}
let s = unsafe { std::str::from_utf8_unchecked(&self.buf[start_idx..self.idx]) };
if !has_fraction && !has_exponent {
if !negative {
if let Ok(v) = s.parse::<u64>() {
return Ok(JsonAst::Number(Number::UInt64(v)));
}
} else if let Ok(v) = s.parse::<i64>() {
return Ok(JsonAst::Number(Number::Int64(v)));
}
}
match fast_float2::parse(s) {
Ok(v) => Ok(JsonAst::Number(Number::Float64(v))),
Err(_) => Err(self.error(ParseErrorCode::InvalidNumberValue)),
}
}
fn parse_json_number(&mut self) -> Result<JsonAst<'a>> {
let start_idx = self.idx;
let mut negative = false;
let mut leading_zeros = 0;
let c = self.next()?;
if c == b'-' {
negative = true;
self.step();
} else if c == b'+' {
self.step();
}
loop {
if self.check_next(&b'0') {
leading_zeros += 1;
self.step();
} else {
break;
}
}
let mut hi_value = 0_i128; let mut lo_value = 0_i128; let mut scale = 0_u32; let mut precision = 0; let mut has_fraction = false; let mut has_exponent = false;
while precision < MAX_DECIMAL256_PRECISION {
if let Some(digit) = self.check_digit() {
if precision < MAX_DECIMAL128_PRECISION {
hi_value = unsafe { hi_value.unchecked_mul(10_i128) };
hi_value = unsafe { hi_value.unchecked_add(digit as i128) };
} else {
lo_value = unsafe { lo_value.unchecked_mul(10_i128) };
lo_value = unsafe { lo_value.unchecked_add(digit as i128) };
}
self.step();
} else if self.check_next(&b'.') {
if has_fraction {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
has_fraction = true;
self.step();
continue;
} else {
break;
}
precision += 1;
if has_fraction {
scale += 1;
}
}
if precision == MAX_DECIMAL256_PRECISION {
if !has_fraction {
let len = self.step_digits();
precision += len;
if self.check_next(&b'.') {
has_fraction = true;
self.step();
}
}
if has_fraction {
let len = self.step_digits();
precision += len;
scale += len as u32;
}
}
if leading_zeros == 0 && precision == 0 {
if !has_fraction {
if let Ok(c) = self.next() {
match c {
b'i' | b'I' => {
let val = self.parse_json_infinity()?;
if negative {
return Ok(JsonAst::Number(Number::Float64(f64::NEG_INFINITY)));
} else {
return Ok(val);
}
}
b'n' | b'N' => {
let val = self.parse_json_nan()?;
if negative {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
} else {
return Ok(val);
}
}
_ => {}
}
}
}
return Err(self.error(ParseErrorCode::InvalidNumberValue));
} else if leading_zeros == 1 && precision == 0 && !has_fraction {
if self.check_next_either(&b'x', &b'X').is_some() {
self.step();
let hex_start = self.idx;
let int_len = self.step_hexdigits();
if int_len == 0 {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
if self.check_next(&b'.') {
self.step();
let frac_start = self.idx;
let frac_len = self.step_hexdigits();
if frac_len == 0 {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
let int_str = std::str::from_utf8(&self.buf[hex_start..hex_start + int_len])
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
let frac_str =
std::str::from_utf8(&self.buf[frac_start..frac_start + frac_len])
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
let int_val = u128::from_str_radix(int_str, 16)
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
let frac_val = u128::from_str_radix(frac_str, 16)
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
let frac_divisor = 16.0_f64.powi(frac_len as i32);
let mut final_val = int_val as f64 + (frac_val as f64 / frac_divisor);
if negative {
final_val = -final_val;
}
return Ok(JsonAst::Number(Number::Float64(final_val)));
} else {
let int_str = std::str::from_utf8(&self.buf[hex_start..self.idx])
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
let value = u128::from_str_radix(int_str, 16)
.map_err(|_| self.error(ParseErrorCode::InvalidNumberValue))?;
if negative {
if value <= (i64::MAX as u128 + 1) {
let i_val = -(value as i64);
return Ok(JsonAst::Number(Number::Int64(i_val)));
}
#[cfg(feature = "arbitrary_precision")]
{
if value <= (DECIMAL128_MAX as u128 + 1) {
return Ok(JsonAst::Number(Number::Decimal128(Decimal128 {
scale: 0,
value: -(value as i128),
})));
} else {
return Ok(JsonAst::Number(Number::Decimal256(Decimal256 {
scale: 0,
value: i256::from(value) * -1,
})));
}
}
#[cfg(not(feature = "arbitrary_precision"))]
{
return Ok(JsonAst::Number(Number::Float64(-(value as f64))));
}
} else {
if value <= u64::MAX as u128 {
return Ok(JsonAst::Number(Number::UInt64(value as u64)));
}
#[cfg(feature = "arbitrary_precision")]
{
if value <= DECIMAL128_MAX as u128 {
return Ok(JsonAst::Number(Number::Decimal128(Decimal128 {
scale: 0,
value: value as i128,
})));
} else {
return Ok(JsonAst::Number(Number::Decimal256(Decimal256 {
scale: 0,
value: i256::from(value),
})));
}
}
#[cfg(not(feature = "arbitrary_precision"))]
{
return Ok(JsonAst::Number(Number::Float64(value as f64)));
}
}
}
}
}
if self.check_next_either(&b'E', &b'e').is_some() {
has_exponent = true;
self.step();
if self.check_next_either(&b'+', &b'-').is_some() {
self.step();
}
let len = self.step_digits();
if len == 0 {
return Err(self.error(ParseErrorCode::InvalidNumberValue));
}
}
if !has_exponent && precision <= MAX_DECIMAL128_PRECISION {
let value = if negative { -hi_value } else { hi_value };
if scale == 0 && (UINT64_MIN..=UINT64_MAX).contains(&value) {
return Ok(JsonAst::Number(Number::UInt64(
u64::try_from(value).unwrap(),
)));
} else if scale == 0 && (INT64_MIN..=INT64_MAX).contains(&value) {
return Ok(JsonAst::Number(Number::Int64(
i64::try_from(value).unwrap(),
)));
}
#[cfg(feature = "arbitrary_precision")]
{
if (DECIMAL64_MIN..=DECIMAL64_MAX).contains(&value)
&& precision <= MAX_DECIMAL64_PRECISION
{
return Ok(JsonAst::Number(Number::Decimal64(Decimal64 {
scale: scale as u8,
value: i64::try_from(value).unwrap(),
})));
} else if (DECIMAL128_MIN..=DECIMAL128_MAX).contains(&value) {
return Ok(JsonAst::Number(Number::Decimal128(Decimal128 {
scale: scale as u8,
value,
})));
}
}
}
#[cfg(feature = "arbitrary_precision")]
if !has_exponent && precision <= MAX_DECIMAL256_PRECISION {
let multiplier = POWER_TABLE[precision - MAX_DECIMAL128_PRECISION];
let mut i256_value = i256::from(hi_value) * multiplier + i256::from(lo_value);
if negative {
i256_value *= -1;
}
return Ok(JsonAst::Number(Number::Decimal256(Decimal256 {
scale: scale as u8,
value: i256_value,
})));
}
let s = unsafe { std::str::from_utf8_unchecked(&self.buf[start_idx..self.idx]) };
match fast_float2::parse(s) {
Ok(v) => Ok(JsonAst::Number(Number::Float64(v))),
Err(_) => Err(self.error(ParseErrorCode::InvalidNumberValue)),
}
}
#[inline]
fn parse_standard_json_string(&mut self) -> Result<JsonAst<'a>> {
self.must_is(&b'"')?;
let val = self.parse_quoted_string(b'"')?;
Ok(JsonAst::String(val))
}
#[inline]
fn parse_json_string(&mut self) -> Result<JsonAst<'a>> {
let end_quote = self.must_either(&b'"', &b'\'')?;
let val = self.parse_quoted_string(end_quote)?;
Ok(JsonAst::String(val))
}
fn parse_quoted_string(&mut self, end_quote: u8) -> Result<Cow<'a, str>> {
let start_idx = self.idx;
let mut escapes = 0;
loop {
let c = self.next()?;
match c {
b'\\' => {
self.step();
escapes += 1;
let next_c = self.next()?;
if next_c == b'u' {
self.step();
let next_c = self.next()?;
if next_c == b'{' {
self.step_by(UNICODE_LEN + 2);
} else {
self.step_by(UNICODE_LEN);
}
} else {
self.step();
}
}
_ => {
self.step();
if c == end_quote {
break;
}
}
}
}
let data = &self.buf[start_idx..self.idx - 1];
let val = if escapes > 0 {
let len = self.idx - 1 - start_idx - escapes;
let mut idx = start_idx + 1;
let s = parse_string(data, len, &mut idx)?;
Cow::Owned(s)
} else {
std::str::from_utf8(data)
.map(Cow::Borrowed)
.map_err(|_| self.error(ParseErrorCode::InvalidStringValue))?
};
Ok(val)
}
fn parse_unquoted_string(&mut self) -> Result<Cow<'a, str>> {
let start_idx = self.idx;
let c = self.next()?;
if c.is_ascii_digit() {
self.step();
return Err(self.error(ParseErrorCode::ObjectKeyInvalidNumber));
}
loop {
let c = self.next()?;
if c.is_ascii_alphanumeric() || matches!(c, b'_' | b'$') {
self.step();
} else if c >= 0x80 {
let continuation_bytes = if c >= 0xF0 {
4 } else if c >= 0xE0 {
3 } else if c >= 0xC0 {
2 } else {
return Err(self.error(ParseErrorCode::ObjectKeyInvalidCharacter));
};
self.step_by(continuation_bytes);
} else {
break;
}
}
if self.idx == start_idx {
return Err(self.error(ParseErrorCode::ObjectKeyInvalidCharacter));
}
let data = &self.buf[start_idx..self.idx];
let val = std::str::from_utf8(data)
.map(Cow::Borrowed)
.map_err(|_| self.error(ParseErrorCode::InvalidStringValue))?;
Ok(val)
}
#[inline]
fn parse_array_value(&mut self) -> Result<JsonAst<'a>> {
if self.check_next_either(&b',', &b']').is_some() {
Ok(JsonAst::Null)
} else {
self.parse_json_value()
}
}
fn parse_json_array(&mut self) -> Result<JsonAst<'a>> {
self.must_is(&b'[')?;
let mut first = true;
let mut values = Vec::with_capacity(8);
loop {
self.skip_unused();
let c = self.next()?;
if c == b']' {
self.step();
break;
}
if !first {
if c != b',' {
return Err(self.error(ParseErrorCode::ExpectedArrayCommaOrEnd));
}
self.step();
}
first = false;
self.skip_unused();
let value = (self.parse_array_value_fn)(self)?;
values.push(value);
}
Ok(JsonAst::Array(values))
}
#[inline]
fn parse_standard_object_key(&mut self) -> Result<Cow<'a, str>> {
self.must_is(&b'"')?;
self.parse_quoted_string(b'"')
}
#[inline]
fn parse_object_key(&mut self) -> Result<Cow<'a, str>> {
if let Some(end_quote) = self.check_next_either(&b'"', &b'\'') {
self.step();
self.parse_quoted_string(end_quote)
} else {
self.parse_unquoted_string()
}
}
fn parse_json_object(&mut self) -> Result<JsonAst<'a>> {
self.must_is(&b'{')?;
let mut first = true;
let mut obj = Vec::with_capacity(16);
loop {
self.skip_unused();
let c = self.next()?;
if c == b'}' {
self.step();
break;
}
if !first {
if c != b',' {
return Err(self.error(ParseErrorCode::ExpectedObjectCommaOrEnd));
}
self.step();
}
first = false;
self.skip_unused();
let key_str = (self.parse_object_key_fn)(self)?;
let pos = self.idx;
self.skip_unused();
let c = self.next()?;
if c != b':' {
return Err(self.error(ParseErrorCode::ExpectedColon));
}
self.step();
let value = (self.parse_value_fn)(self)?;
obj.push((key_str, value, pos));
}
obj.sort_by(|a, b| a.0.cmp(&b.0));
for i in 1..obj.len() {
if obj[i - 1].0 == obj[i].0 {
let key_str = obj[i].0.clone().to_string();
let pos = obj[i].2;
let code = ParseErrorCode::ObjectDuplicateKey(key_str);
return Err(Error::Syntax(code, pos));
}
}
Ok(JsonAst::Object(obj))
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
use std::collections::BTreeMap;
use std::fmt::Display;
use std::fmt::Formatter;
#[derive(Clone, PartialEq, Default, Eq, Debug)]
pub enum Json5Value {
#[default]
Null,
Bool(bool),
Number(Number),
HexNumber(String),
DoubleQuotedString(String),
SingleQuotedString(String),
Array(Vec<Json5Value>),
DoubleQuotedKeyObject(BTreeMap<String, Json5Value>),
SingleQuotedKeyObject(BTreeMap<String, Json5Value>),
UnquotedKeyObject(BTreeMap<String, Json5Value>),
}
impl Display for Json5Value {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match self {
Json5Value::Null => write!(f, "null"),
Json5Value::Bool(v) => {
if *v {
write!(f, "true")
} else {
write!(f, "false")
}
}
Json5Value::Number(ref v) => write!(f, "{}", v),
Json5Value::HexNumber(ref v) => write!(f, "{}", v),
Json5Value::DoubleQuotedString(ref v) => {
write!(f, "\"")?;
for c in v.chars() {
match c {
'"' => write!(f, "\\\"")?,
'\\' => write!(f, "\\\\")?,
c => write!(f, "{}", c)?,
}
}
write!(f, "\"")
}
Json5Value::SingleQuotedString(ref v) => {
write!(f, "'")?;
for c in v.chars() {
match c {
'\'' => write!(f, "\\\'")?,
'\\' => write!(f, "\\\\")?,
c => write!(f, "{}", c)?,
}
}
write!(f, "'")
}
Json5Value::Array(ref vs) => {
write!(f, "[")?;
for (i, v) in vs.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{v}")?;
}
write!(f, "]")
}
Json5Value::DoubleQuotedKeyObject(ref vs) => {
write!(f, "{{")?;
for (i, (k, v)) in vs.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "\"")?;
for c in k.chars() {
match c {
'"' => write!(f, "\\\"")?,
'\\' => write!(f, "\\\\")?,
c => write!(f, "{}", c)?,
}
}
write!(f, "\"")?;
write!(f, ":{v}")?;
}
write!(f, "}}")
}
Json5Value::SingleQuotedKeyObject(ref vs) => {
write!(f, "{{")?;
for (i, (k, v)) in vs.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "'")?;
for c in k.chars() {
match c {
'\'' => write!(f, "\\\'")?,
'\\' => write!(f, "\\\\")?,
c => write!(f, "{}", c)?,
}
}
write!(f, "'")?;
write!(f, ":{v}")?;
}
write!(f, "}}")
}
Json5Value::UnquotedKeyObject(ref vs) => {
write!(f, "{{")?;
for (i, (k, v)) in vs.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{k}:{v}")?;
}
write!(f, "}}")
}
}
}
}
fn string_strategy() -> impl Strategy<Value = String> {
let ascii = '!'..='~';
let cjk = '\u{4E00}'..='\u{9FFF}';
let chars: Vec<char> = ascii.chain(cjk).collect();
prop::collection::vec(prop::sample::select(chars), 1..50)
.prop_map(|v| v.into_iter().collect())
}
fn quoted_string_strategy() -> impl Strategy<Value = String> {
let ascii1 = '('..='[';
let ascii2 = ']'..='~';
let cjk = '\u{4E00}'..='\u{9FFF}';
let chars: Vec<char> = ascii1.chain(ascii2).chain(cjk).collect();
prop::collection::vec(prop::sample::select(chars), 1..50)
.prop_map(|v| v.into_iter().collect())
}
fn unquoted_string_strategy() -> impl Strategy<Value = String> {
let number = '0'..='9';
let lowercase = 'a'..='f';
let uppercase = 'A'..='F';
let underline = '_';
let dollar = '$';
let cjk = '\u{4E00}'..='\u{9FFF}';
let mut chars: Vec<char> = number
.chain(lowercase)
.chain(uppercase)
.chain(cjk)
.collect();
chars.push(underline);
chars.push(dollar);
prop::collection::vec(prop::sample::select(chars), 1..50)
.prop_map(|v| v.into_iter().collect())
}
fn standard_number_strategy() -> impl Strategy<Value = Number> {
prop_oneof![
any::<u64>().prop_map(Number::UInt64),
any::<i64>().prop_map(Number::Int64),
any::<f64>()
.prop_filter("Exclude -0.0", |x| *x != -0.0)
.prop_map(Number::Float64),
]
}
#[cfg(feature = "arbitrary_precision")]
fn number_strategy() -> impl Strategy<Value = Number> {
use crate::Decimal128;
use crate::Decimal256;
use crate::Decimal64;
use ethnum::i256;
prop_oneof![
any::<u64>().prop_map(Number::UInt64),
any::<i64>().prop_map(Number::Int64),
any::<f64>().prop_filter("Exclude -0.0", |x| *x != -0.0).prop_map(Number::Float64),
(0u8..=18u8, any::<i64>()).prop_map(|(scale, value)| Number::Decimal64(Decimal64 { scale, value })),
(0u8..=38u8, any::<i128>()).prop_map(|(scale, value)| Number::Decimal128(Decimal128 { scale, value })),
(0u8..=76u8, any::<i128>(), any::<i128>()).prop_filter("Exclude big i256",
|(_, hi, lo)| {
let val = i256::from_words(*hi, *lo);
val >= ethnum::int!("-9999999999999999999999999999999999999999999999999999999999999999999999999999") &&
val <= ethnum::int!("9999999999999999999999999999999999999999999999999999999999999999999999999999")
})
.prop_map(|(scale, hi, lo)| Number::Decimal256(Decimal256 { scale, value: i256::from_words(hi, lo) })),
]
}
fn hex_number_strategy() -> impl Strategy<Value = String> {
let number = '0'..='9';
let lowercase = 'a'..='f';
let uppercase = 'A'..='F';
let hex_digit =
prop::sample::select(number.chain(lowercase).chain(uppercase).collect::<Vec<_>>());
let hex_prefix = prop::sample::select(vec!['x', 'X']);
let int_part = prop::collection::vec(hex_digit.clone(), 1..16)
.prop_map(|v| v.into_iter().collect::<String>());
(hex_prefix, int_part).prop_map(|(x, i)| format!("0{}{}", x, i))
}
fn json5_strategy() -> impl Strategy<Value = Json5Value> {
let leaf = prop_oneof![
Just(Json5Value::Null),
any::<bool>().prop_map(Json5Value::Bool),
standard_number_strategy().prop_map(Json5Value::Number),
hex_number_strategy().prop_map(Json5Value::HexNumber),
quoted_string_strategy().prop_map(Json5Value::DoubleQuotedString),
quoted_string_strategy().prop_map(Json5Value::SingleQuotedString),
];
leaf.prop_recursive(8, 256, 30, |inner| {
prop_oneof![
prop::collection::vec(inner.clone(), 0..10).prop_map(Json5Value::Array),
prop::collection::btree_map(quoted_string_strategy(), inner.clone(), 0..20)
.prop_map(Json5Value::DoubleQuotedKeyObject),
prop::collection::btree_map(quoted_string_strategy(), inner.clone(), 0..20)
.prop_map(Json5Value::SingleQuotedKeyObject),
prop::collection::btree_map(unquoted_string_strategy(), inner, 0..20)
.prop_map(Json5Value::UnquotedKeyObject),
]
})
}
#[cfg(feature = "arbitrary_precision")]
fn json_strategy() -> impl Strategy<Value = Value<'static>> {
let leaf = prop_oneof![
Just(Value::Null),
any::<bool>().prop_map(Value::Bool),
number_strategy().prop_map(Value::Number),
string_strategy().prop_map(|v| Value::String(Cow::Owned(v))),
];
leaf.prop_recursive(8, 256, 30, |inner| {
prop_oneof![
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
prop::collection::btree_map(string_strategy(), inner, 0..20)
.prop_map(Value::Object),
]
})
}
fn standard_json_strategy() -> impl Strategy<Value = Value<'static>> {
let leaf = prop_oneof![
Just(Value::Null),
any::<bool>().prop_map(Value::Bool),
standard_number_strategy().prop_map(Value::Number),
string_strategy().prop_map(|v| Value::String(Cow::Owned(v))),
];
leaf.prop_recursive(8, 256, 30, |inner| {
prop_oneof![
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
prop::collection::btree_map(string_strategy(), inner, 0..20)
.prop_map(Value::Object),
]
})
}
proptest! {
#[test]
fn test_json5_parser(json in json5_strategy()) {
let source = format!("{}", json);
let res1 = json_five::from_str::<serde_json::Value>(&source);
let res2 = parse_value(source.as_bytes());
let res3 = parse_owned_jsonb(source.as_bytes());
assert_eq!(res1.is_ok(), res2.is_ok());
assert_eq!(res1.is_ok(), res3.is_ok());
if res1.is_ok() {
let res1 = format!("{}", res1.unwrap());
let res2 = format!("{}", res2.unwrap());
let res3 = format!("{}", res3.unwrap());
assert_eq!(res1, res2);
assert_eq!(res1, res3);
}
}
}
proptest! {
#[test]
#[cfg(feature = "arbitrary_precision")]
fn test_json_parser(json in json_strategy()) {
let source = format!("{}", json);
let res1 = serde_json::from_slice::<serde_json::Value>(source.as_bytes());
let res2 = parse_value(source.as_bytes());
let res3 = parse_owned_jsonb(source.as_bytes());
assert_eq!(res1.is_ok(), res2.is_ok());
assert_eq!(res1.is_ok(), res3.is_ok());
if res1.is_ok() {
let res1 = format!("{}", res1.unwrap());
let res2 = format!("{}", res2.unwrap());
let res3 = format!("{}", res3.unwrap());
assert_eq!(res1, res2);
assert_eq!(res1, res3);
}
}
}
proptest! {
#[test]
fn test_standard_json_parser(json in standard_json_strategy()) {
let source = format!("{}", json);
let res1 = serde_json::from_slice::<serde_json::Value>(source.as_bytes());
let res2 = parse_value_standard_mode(source.as_bytes());
let res3 = parse_owned_jsonb_standard_mode(source.as_bytes());
assert_eq!(res1.is_ok(), res2.is_ok());
assert_eq!(res1.is_ok(), res3.is_ok());
if res1.is_ok() {
let res2 = format!("{}", res2.unwrap());
let res3 = format!("{}", res3.unwrap());
assert_eq!(source, res2);
assert_eq!(source, res3);
}
}
}
}