use std::{str, slice};
use std::char::decode_utf16;
use std::convert::TryFrom;
use crate::object::Object;
use crate::number::Number;
use crate::{JsonValue, Error, Result};
const MAX_PRECISION: u64 = 576460752303423500;
const DEPTH_LIMIT: usize = 512;
struct Parser<'a> {
buffer: Vec<u8>,
source: &'a str,
byte_ptr: *const u8,
index: usize,
length: usize,
}
macro_rules! expect_byte {
($parser:ident) => ({
if $parser.is_eof() {
return Err(Error::UnexpectedEndOfJson);
}
let ch = $parser.read_byte();
$parser.bump();
ch
})
}
macro_rules! expect_sequence {
($parser:ident, $( $ch:pat ),*) => {
$(
match expect_byte!($parser) {
$ch => {},
_ => return $parser.unexpected_character(),
}
)*
}
}
macro_rules! expect_byte_ignore_whitespace {
($parser:ident) => ({
let mut ch = expect_byte!($parser);
match ch {
9 ..= 13 | 32 => {
loop {
match expect_byte!($parser) {
9 ..= 13 | 32 => {},
next => {
ch = next;
break;
}
}
}
},
_ => {}
}
ch
})
}
macro_rules! expect_eof {
($parser:ident) => ({
while !$parser.is_eof() {
match $parser.read_byte() {
9 ..= 13 | 32 => $parser.bump(),
_ => {
$parser.bump();
return $parser.unexpected_character();
}
}
}
})
}
macro_rules! expect {
($parser:ident, $byte:expr) => ({
let ch = expect_byte_ignore_whitespace!($parser);
if ch != $byte {
return $parser.unexpected_character()
}
});
{$parser:ident $(, $byte:pat => $then:expr )*} => ({
let ch = expect_byte_ignore_whitespace!($parser);
match ch {
$(
$byte => $then,
)*
_ => return $parser.unexpected_character()
}
})
}
const QU: bool = false;
const BS: bool = false;
const CT: bool = false;
const __: bool = true;
static ALLOWED: [bool; 256] = [
CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
__, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
];
macro_rules! expect_string {
($parser:ident) => ({
let result: &str;
let start = $parser.index;
loop {
let ch = expect_byte!($parser);
if ALLOWED[ch as usize] {
continue;
}
if ch == b'"' {
unsafe {
let ptr = $parser.byte_ptr.offset(start as isize);
let len = $parser.index - 1 - start;
result = str::from_utf8_unchecked(slice::from_raw_parts(ptr, len));
}
break;
}
if ch == b'\\' {
result = $parser.read_complex_string(start)?;
break;
}
return $parser.unexpected_character();
}
result
})
}
macro_rules! expect_number {
($parser:ident, $first:ident) => ({
let mut num = ($first - b'0') as u64;
let result: Number;
loop {
if num >= MAX_PRECISION {
result = $parser.read_big_number(num)?;
break;
}
if $parser.is_eof() {
result = num.into();
break;
}
let ch = $parser.read_byte();
match ch {
b'0' ..= b'9' => {
$parser.bump();
num = num * 10 + (ch - b'0') as u64;
},
_ => {
let mut e = 0;
result = allow_number_extensions!($parser, num, e, ch);
break;
}
}
}
result
})
}
macro_rules! allow_number_extensions {
($parser:ident, $num:ident, $e:ident, $ch:ident) => ({
match $ch {
b'.' => {
$parser.bump();
expect_fraction!($parser, $num, $e)
},
b'e' | b'E' => {
$parser.bump();
$parser.expect_exponent($num, $e)?
},
_ => $num.into()
}
});
($parser:ident) => ({
if $parser.is_eof() {
0.into()
} else {
let mut num = 0;
let mut e = 0;
let ch = $parser.read_byte();
allow_number_extensions!($parser, num, e, ch)
}
})
}
macro_rules! expect_fraction {
($parser:ident, $num:ident, $e:ident) => ({
let result: Number;
let ch = expect_byte!($parser);
match ch {
b'0' ..= b'9' => {
if $num < MAX_PRECISION {
$num = $num * 10 + (ch - b'0') as u64;
$e -= 1;
} else {
match $num.checked_mul(10).and_then(|num| {
num.checked_add((ch - b'0') as u64)
}) {
Some(result) => {
$num = result;
$e -= 1;
},
None => {}
}
}
},
_ => return $parser.unexpected_character()
}
loop {
if $parser.is_eof() {
result = unsafe { Number::from_parts_unchecked(true, $num, $e) };
break;
}
let ch = $parser.read_byte();
match ch {
b'0' ..= b'9' => {
$parser.bump();
if $num < MAX_PRECISION {
$num = $num * 10 + (ch - b'0') as u64;
$e -= 1;
} else {
match $num.checked_mul(10).and_then(|num| {
num.checked_add((ch - b'0') as u64)
}) {
Some(result) => {
$num = result;
$e -= 1;
},
None => {}
}
}
},
b'e' | b'E' => {
$parser.bump();
result = $parser.expect_exponent($num, $e)?;
break;
}
_ => {
result = unsafe { Number::from_parts_unchecked(true, $num, $e) };
break;
}
}
}
result
})
}
impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
Parser {
buffer: Vec::with_capacity(30),
source: source,
byte_ptr: source.as_ptr(),
index: 0,
length: source.len(),
}
}
#[inline(always)]
fn is_eof(&mut self) -> bool {
self.index == self.length
}
#[inline(always)]
fn read_byte(&mut self) -> u8 {
debug_assert!(self.index < self.length, "Reading out of bounds");
unsafe { *self.byte_ptr.offset(self.index as isize) }
}
#[inline(always)]
fn bump(&mut self) {
self.index = self.index.wrapping_add(1);
}
fn unexpected_character<T: Sized>(&mut self) -> Result<T> {
let at = self.index - 1;
let ch = self.source[at..]
.chars()
.next()
.expect("Must have a character");
let (lineno, col) = self.source[..at]
.lines()
.enumerate()
.last()
.unwrap_or((0, ""));
let colno = col.chars().count();
Err(Error::UnexpectedCharacter {
ch: ch,
line: lineno + 1,
column: colno + 1,
})
}
fn read_hexdec_digit(&mut self) -> Result<u16> {
let ch = expect_byte!(self);
Ok(match ch {
b'0' ..= b'9' => (ch - b'0'),
b'a' ..= b'f' => (ch + 10 - b'a'),
b'A' ..= b'F' => (ch + 10 - b'A'),
_ => return self.unexpected_character(),
} as u16)
}
fn read_hexdec_codepoint(&mut self) -> Result<u16> {
Ok(
self.read_hexdec_digit()? << 12 |
self.read_hexdec_digit()? << 8 |
self.read_hexdec_digit()? << 4 |
self.read_hexdec_digit()?
)
}
fn read_codepoint(&mut self) -> Result<()> {
let mut buf = [0; 4];
let codepoint = self.read_hexdec_codepoint()?;
let unicode = match char::try_from(codepoint as u32) {
Ok(code) => code,
Err(_) => {
expect_sequence!(self, b'\\', b'u');
match decode_utf16(
[codepoint, self.read_hexdec_codepoint()?].iter().copied()
).next() {
Some(Ok(code)) => code,
_ => return Err(Error::FailedUtf8Parsing),
}
}
};
self.buffer.extend_from_slice(unicode.encode_utf8(&mut buf).as_bytes());
Ok(())
}
fn read_complex_string<'b>(&mut self, start: usize) -> Result<&'b str> {
let len = self.buffer.len();
let mut ch = b'\\';
self.buffer.extend_from_slice(&self.source.as_bytes()[start .. self.index - 1]);
loop {
if ALLOWED[ch as usize] {
self.buffer.push(ch);
ch = expect_byte!(self);
continue;
}
match ch {
b'"' => break,
b'\\' => {
let escaped = expect_byte!(self);
let escaped = match escaped {
b'u' => {
self.read_codepoint()?;
ch = expect_byte!(self);
continue;
},
b'"' |
b'\\' |
b'/' => escaped,
b'b' => 0x8,
b'f' => 0xC,
b't' => b'\t',
b'r' => b'\r',
b'n' => b'\n',
_ => return self.unexpected_character()
};
self.buffer.push(escaped);
},
_ => return self.unexpected_character()
}
ch = expect_byte!(self);
}
Ok(unsafe {
str::from_utf8_unchecked(
slice::from_raw_parts(self.buffer[len .. ].as_ptr(), self.buffer.len() - len)
)
})
}
fn read_big_number(&mut self, mut num: u64) -> Result<Number> {
let mut e = 0i16;
loop {
if self.is_eof() {
return Ok(unsafe { Number::from_parts_unchecked(true, num, e) });
}
let ch = self.read_byte();
match ch {
b'0' ..= b'9' => {
self.bump();
match num.checked_mul(10).and_then(|num| {
num.checked_add((ch - b'0') as u64)
}) {
Some(result) => num = result,
None => e = e.checked_add(1).ok_or_else(|| Error::ExceededDepthLimit)?,
}
},
b'.' => {
self.bump();
return Ok(expect_fraction!(self, num, e));
},
b'e' | b'E' => {
self.bump();
return self.expect_exponent(num, e);
}
_ => break
}
}
Ok(unsafe { Number::from_parts_unchecked(true, num, e) })
}
fn expect_exponent(&mut self, num: u64, big_e: i16) -> Result<Number> {
let mut ch = expect_byte!(self);
let sign = match ch {
b'-' => {
ch = expect_byte!(self);
-1
},
b'+' => {
ch = expect_byte!(self);
1
},
_ => 1
};
let mut e = match ch {
b'0' ..= b'9' => (ch - b'0') as i16,
_ => return self.unexpected_character(),
};
loop {
if self.is_eof() {
break;
}
let ch = self.read_byte();
match ch {
b'0' ..= b'9' => {
self.bump();
e = e.saturating_mul(10).saturating_add((ch - b'0') as i16);
},
_ => break
}
}
Ok(unsafe { Number::from_parts_unchecked(true, num, big_e.saturating_add(e * sign)) })
}
fn parse(&mut self) -> Result<JsonValue> {
let mut stack = Vec::with_capacity(3);
let mut ch = expect_byte_ignore_whitespace!(self);
'parsing: loop {
let mut value = match ch {
b'[' => {
ch = expect_byte_ignore_whitespace!(self);
if ch != b']' {
if stack.len() == DEPTH_LIMIT {
return Err(Error::ExceededDepthLimit);
}
stack.push(StackBlock(JsonValue::Array(Vec::with_capacity(2)), 0));
continue 'parsing;
}
JsonValue::Array(Vec::new())
},
b'{' => {
ch = expect_byte_ignore_whitespace!(self);
if ch != b'}' {
if stack.len() == DEPTH_LIMIT {
return Err(Error::ExceededDepthLimit);
}
let mut object = Object::with_capacity(3);
if ch != b'"' {
return self.unexpected_character()
}
let index = object.insert_index(expect_string!(self), JsonValue::Null);
expect!(self, b':');
stack.push(StackBlock(JsonValue::Object(object), index));
ch = expect_byte_ignore_whitespace!(self);
continue 'parsing;
}
JsonValue::Object(Object::new())
},
b'"' => expect_string!(self).into(),
b'0' => JsonValue::Number(allow_number_extensions!(self)),
b'1' ..= b'9' => {
JsonValue::Number(expect_number!(self, ch))
},
b'-' => {
let ch = expect_byte!(self);
JsonValue::Number(- match ch {
b'0' => allow_number_extensions!(self),
b'1' ..= b'9' => expect_number!(self, ch),
_ => return self.unexpected_character()
})
}
b't' => {
expect_sequence!(self, b'r', b'u', b'e');
JsonValue::Boolean(true)
},
b'f' => {
expect_sequence!(self, b'a', b'l', b's', b'e');
JsonValue::Boolean(false)
},
b'n' => {
expect_sequence!(self, b'u', b'l', b'l');
JsonValue::Null
},
_ => return self.unexpected_character()
};
'popping: loop {
match stack.last_mut() {
None => {
expect_eof!(self);
return Ok(value);
},
Some(&mut StackBlock(JsonValue::Array(ref mut array), _)) => {
array.push(value);
ch = expect_byte_ignore_whitespace!(self);
match ch {
b',' => {
ch = expect_byte_ignore_whitespace!(self);
continue 'parsing;
},
b']' => {},
_ => return self.unexpected_character()
}
},
Some(&mut StackBlock(JsonValue::Object(ref mut object), ref mut index )) => {
object.override_at(*index, value);
ch = expect_byte_ignore_whitespace!(self);
match ch {
b',' => {
expect!(self, b'"');
*index = object.insert_index(expect_string!(self), JsonValue::Null);
expect!(self, b':');
ch = expect_byte_ignore_whitespace!(self);
continue 'parsing;
},
b'}' => {},
_ => return self.unexpected_character()
}
},
_ => unreachable!(),
}
value = match stack.pop() {
Some(StackBlock(value, _)) => value,
None => break 'popping
}
}
}
}
}
struct StackBlock(JsonValue, usize);
#[inline]
pub fn parse(source: &str) -> Result<JsonValue> {
Parser::new(source).parse()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::stringify;
use crate::JsonValue;
#[macro_use]
use crate::object;
use crate::array;
use std::fs::File;
use std::io::prelude::*;
#[test]
fn it_should_parse_escaped_forward_slashes_with_quotes() {
let mut file = File::open("tests/test_json_slashes_quotes").unwrap();
let mut contents = String::new();
file.read_to_string(&mut contents).unwrap();
let actual = parse(&contents).unwrap();
let serialized = stringify(actual.clone());
assert_eq!(serialized, contents);
}
#[test]
fn it_should_parse_escaped_quotes() {
let contents = String::from("{\"ab\":\"c\\\"d\\\"e\"}");
let actual = parse(&contents).unwrap();
let serialized = stringify(actual.clone());
assert_eq!(serialized, contents);
}
#[test]
fn it_should_parse_basic_json_values() {
let s = "{\"a\":1,\"b\":true,\"c\":false,\"d\":null,\"e\":2}";
let actual = parse(s).unwrap();
let mut expected = object! {
"a" => 1,
"b" => true,
"c" => false,
"e" => 2
};
expected["d"] = JsonValue::Null;
assert_eq!(actual, expected);
}
#[test]
fn it_should_parse_json_arrays() {
let s = "{\"a\":1,\"b\":true,\"c\":false,\"d\":null,\"e\":2,\"f\":[1,2,3,false,true,[],{}]}";
let actual = parse(s).unwrap();
let mut expected = object! {
"a" => 1,
"b" => true,
"c" => false,
"e" => 2
};
expected["d"] = JsonValue::Null;
expected["f"] = array![
1,2,3,
false,
true,
array![],
object!{}
];
assert_eq!(actual, expected);
}
#[test]
fn it_should_parse_json_nested_object() {
let s = "{\"a\":1,\"b\":{\"c\":2,\"d\":{\"e\":{\"f\":{\"g\":3,\"h\":[]}}},\"i\":4,\"j\":[],\"k\":{\"l\":5,\"m\":{}}}}";
let actual = parse(s).unwrap();
let mut expected = object! {
"a" => 1,
"b" => object!{
"c" => 2,
"d" => object!{
"e" => object! {
"f" => object!{
"g" => 3,
"h" => array![]
}
}
},
"i" => 4,
"j" => array![],
"k" => object!{
"l" => 5,
"m" => object!{}
}
}
};
assert_eq!(actual, expected);
}
#[test]
fn it_should_parse_json_complex_object() {
let s = "{\"a\":1,\"b\":{\"c\":2,\"d\":{\"e\":{\"f\":{\"g\":3,\"h\":[{\"z\":1},{\"y\":2,\"x\":[{},{}]}]}}},\"i\":4,\"j\":[],\"k\":{\"l\":5,\"m\":{}}}}";
let actual = parse(s).unwrap();
let mut expected = object! {
"a" => 1,
"b" => object!{
"c" => 2,
"d" => object!{
"e" => object! {
"f" => object!{
"g" => 3,
"h" => array![
object!{"z" => 1},
object!{"y" => 2, "x" => array![object!{}, object!{}]}
]
}
}
},
"i" => 4,
"j" => array![],
"k" => object!{
"l" => 5,
"m" => object!{}
}
}
};
assert_eq!(actual, expected);
}
}