#![cfg(feature="std")]
#![doc(cfg(feature="std"))]
mod number_parser;
mod unicode_char;
use {
core::fmt::{self, Display, Formatter},
alloc::{
collections::btree_map::Entry,
string::String,
vec::Vec,
},
std::io::{Bytes, Read},
crate::{Error, Json, Number, Object, Result, bytes},
self::{
number_parser::NumberParser,
unicode_char::UnicodeChar,
},
};
const STRING_CAPACITY: usize = 512;
#[derive(Debug)]
enum IgnoredByte {
Colon,
Comma,
EndOfArray,
EndOfObject,
}
impl Display for IgnoredByte {
fn fmt(&self, f: &mut Formatter) -> core::result::Result<(), fmt::Error> {
f.write_str(match self {
IgnoredByte::Colon => concat!(':'),
IgnoredByte::Comma => concat!(','),
IgnoredByte::EndOfArray => concat!(']'),
IgnoredByte::EndOfObject => concat!('}'),
})
}
}
impl TryFrom<&u8> for IgnoredByte {
type Error = Error;
fn try_from(b: &u8) -> core::result::Result<Self, Self::Error> {
match b {
b':' => Ok(IgnoredByte::Colon),
b',' => Ok(IgnoredByte::Comma),
b']' => Ok(IgnoredByte::EndOfArray),
b'}' => Ok(IgnoredByte::EndOfObject),
_ => Err(err!("Expected one of ':,]}}', got: {:?}", char::from(*b))),
}
}
}
pub fn parse<R>(src: &mut R) -> Result<Json> where R: Read {
let mut bytes = src.bytes();
let mut result = None;
let mut number_parser = NumberParser::new();
loop {
let b = match bytes.next() {
Some(b) => b.map_err(|e| err!("{}", e))?,
None => break,
};
match b {
b' ' | b'\r' | b'\n' | b'\t' => continue,
b'[' => {
let mut array = alloc::vec![];
parse_array_content(&mut bytes, &mut number_parser, |v| array.push(v))?;
result = Some(Json::Array(array));
break;
},
b'{' => {
let mut object = Object::new();
parse_object_content(&mut bytes, &mut number_parser, |k, v| match object.entry(k) {
Entry::Vacant(vacant) => {
vacant.insert(v);
Ok(())
},
Entry::Occupied(occupied) => Err(err!("Duplicate key: {:?}", occupied.key())),
})?;
result = Some(Json::Object(object));
break;
},
other => return Err(err!("Expected either '{{' or '[', got: {:?}", char::from(other))),
};
}
ensure_white_spaces(&mut bytes)?;
result.ok_or_else(|| err!("Found no JSON document"))
}
pub fn parse_bytes<B>(bytes: B) -> Result<Json> where B: AsRef<[u8]> {
parse(&mut bytes.as_ref())
}
fn parse_object_content<R, F>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser, mut handler: F) -> Result<()>
where R: Read, F: FnMut(String, Json) -> Result<()> {
loop {
match parse_string(bytes, None)? {
(Some(key), ignored) => match ignored {
Some(IgnoredByte::Colon) => match parse_one_value(bytes, number_parser)? {
(Some(value), ignored) => match ignored {
Some(IgnoredByte::Comma) => handler(key, value)?,
Some(IgnoredByte::EndOfObject) => {
handler(key, value)?;
return Ok(());
},
Some(ignored) => return Err(err!("Expected either ',' or '}}', got: {:?}", ignored)),
None => return Err(err!("Invalid object")),
},
(None, _) => return Err(err!("Missing value")),
},
_ => return Err(err!("Expected ':', got: {:?}", ignored)),
},
(None, ignored) => return match ignored {
Some(IgnoredByte::EndOfObject) => Ok(()),
Some(ignored) => Err(err!("Expected '}}', got: {:?}", ignored)),
None => ensure_char(bytes, b'}'),
},
};
}
}
fn parse_array_content<R, F>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser, mut handler: F) -> Result<()>
where R: Read, F: FnMut(Json) {
loop {
match parse_one_value(bytes, number_parser)? {
(Some(value), ignored) => match ignored {
Some(IgnoredByte::Comma) => handler(value),
Some(IgnoredByte::EndOfArray) => {
handler(value);
return Ok(());
},
_ => return Err(err!("Expected either ',' or ']', got: {:?}", ignored)),
},
(None, ignored) => match ignored {
Some(IgnoredByte::EndOfArray) => return Ok(()),
_ => return Err(err!("Expected ']', got: {:?}", ignored)),
},
};
}
}
fn parse_one_value<R>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser) -> Result<(Option<Json>, Option<IgnoredByte>)> where R: Read {
let mut value = None;
loop {
let b = match bytes.next() {
Some(b) => b.map_err(|e| err!("{}", e))?,
None => break,
};
match b {
b' ' | b'\r' | b'\n' | b'\t' => continue,
b't' => match (bytes.next(), bytes.next(), bytes.next()) {
(Some(Ok(b'r')), Some(Ok(b'u')), Some(Ok(b'e'))) => {
value = Some(Json::Boolean(true));
break;
},
_ => return Err(err!("Expected 'true', got other")),
},
b'f' => match (bytes.next(), bytes.next(), bytes.next(), bytes.next()) {
(Some(Ok(b'a')), Some(Ok(b'l')), Some(Ok(b's')), Some(Ok(b'e'))) => {
value = Some(Json::Boolean(false));
break;
},
_ => return Err(err!("Expected 'false', got other")),
},
b'n' => match (bytes.next(), bytes.next(), bytes.next()) {
(Some(Ok(b'u')), Some(Ok(b'l')), Some(Ok(b'l'))) => {
value = Some(Json::Null);
break;
},
_ => return Err(err!("Expected 'null', got other")),
},
b'[' => {
let mut array = alloc::vec![];
parse_array_content(bytes, number_parser, |v| array.push(v))?;
value = Some(Json::Array(array));
break;
},
b'{' => {
let mut object = Object::new();
parse_object_content(bytes, number_parser, |k, v| match object.entry(k) {
Entry::Vacant(vacant) => {
vacant.insert(v);
Ok(())
},
Entry::Occupied(occupied) => Err(err!("Duplicate key: {:?}", occupied.key())),
})?;
value = Some(Json::Object(object));
break;
},
b']' => return Ok((None, Some(IgnoredByte::EndOfArray))),
b'}' => return Ok((None, Some(IgnoredByte::EndOfObject))),
b'"' => return match parse_string(bytes, Some(Vec::with_capacity(STRING_CAPACITY)))? {
(Some(s), ignored) => Ok((Some(Json::String(s)), ignored)),
(None, _) => Err(err!("Invalid string value")),
},
b'+' | b'-' | b'0'..=b'9' => {
number_parser.add(&b)?;
let (number, ignored) = parse_number(bytes, number_parser)?;
match ignored {
Some(_) => return Ok((Some(Json::Number(number)), ignored)),
None => {
value = Some(Json::Number(number));
break;
},
};
},
other => return Err(err!("Invalid character: {:?}", char::from(other))),
};
}
for b in bytes {
match b.map_err(|e| err!("{}", e))? {
b' ' | b'\r' | b'\n' | b'\t' => continue,
b',' => return Ok((value, Some(IgnoredByte::Comma))),
b']' => return Ok((value, Some(IgnoredByte::EndOfArray))),
b'}' => return Ok((value, Some(IgnoredByte::EndOfObject))),
other => return Err(err!("Expected one of ',]}}', got: {:?}", char::from(other))),
};
}
Ok((value, None))
}
fn parse_string<R>(bytes: &mut Bytes<R>, start: Option<Vec<u8>>) -> Result<(Option<String>, Option<IgnoredByte>)> where R: Read {
fn escape(b: &u8) -> Result<u8> {
match b {
b'"' => Ok(*b),
b'\\' => Ok(*b),
b'/' => Ok(*b),
b'b' => Ok(bytes::BACKSPACE),
b'f' => Ok(bytes::FORM_FEED),
b'n' => Ok(bytes::LINE_FEED),
b'r' => Ok(bytes::CARRIAGE_RETURN),
b't' => Ok(bytes::HORIZONTAL_TAB),
_ => Err(err!("Escape character not supported: {:?}", char::from(*b))),
}
}
let mut result: Option<Vec<u8>> = start;
let mut escaping = false;
let mut unicode_char: Option<UnicodeChar> = None;
loop {
let b = match bytes.next() {
Some(b) => b.map_err(|e| err!("{}", e))?,
None => match result {
Some(_) => return Err(err!("Missing end of string: \"")),
None => break,
},
};
match result.as_mut() {
Some(result) => {
if escaping {
if let Some(mut uc) = unicode_char.take() {
if uc.is_full() {
uc.encode_as_utf8_bytes(result)?;
escaping = false;
} else {
uc.add_hex(&b)?;
unicode_char = Some(uc);
continue;
}
}
}
if escaping {
match b {
b'u' => unicode_char = Some(UnicodeChar::new()),
#[cfg(feature="x-unicode")]
b'x' => unicode_char = Some({
let mut uc = UnicodeChar::new();
uc.add_hex(&b'0')?;
uc.add_hex(&b'0')?;
uc
}),
_ => {
result.push(escape(&b)?);
escaping = false;
},
};
} else {
match b {
b'\\' => escaping = true,
b'"' => break,
_ => result.push(b),
};
}
},
None => match b {
b' ' | b'\r' | b'\n' | b'\t' => continue,
b'"' => result = Some(Vec::with_capacity(STRING_CAPACITY)),
_ => return Ok((None, Some(IgnoredByte::try_from(&b)?))),
},
};
}
let result = match result {
Some(result) => Some(String::from_utf8(result).map_err(|e| err!("{}", e))?),
None => None,
};
for b in bytes {
match b.map_err(|e| err!("{}", e))? {
b' ' | b'\r' | b'\n' | b'\t' => continue,
other => return Ok((result, Some(IgnoredByte::try_from(&other)?))),
};
}
Ok((result, None))
}
#[test]
fn test_parse_string() -> Result<()> {
use alloc::string::ToString;
fn escape_unicode(c: &char) -> Vec<u8> {
fn f(c: char) -> bool {
match c {
'{' | '}' => true,
_ => false,
}
}
let mut result = c.escape_unicode().to_string().replace(f, concat!());
(0..(6 - result.len())).for_each(|_| result.insert(2, '0'));
result.into_bytes()
}
let (s, ignored) = parse_string(&mut b"".bytes(), None)?;
assert!(s.is_none() && ignored.is_none());
parse_string(&mut b"".bytes(), Some(alloc::vec![])).unwrap_err();
for (mut raw, expected) in alloc::vec![
(alloc::vec![], alloc::vec![]),
(br#"some-\t\r\n--\\/\/\""#.to_vec(), "some-\t\r\n--\\//\"".as_bytes().to_vec()),
] {
raw.insert(0, b'"');
raw.push(b'"');
let (parsed, ignored) = parse_string(&mut raw.bytes(), None)?;
assert!(ignored.is_none());
assert_eq!(parsed.unwrap().into_bytes(), expected);
}
for chr in &['\u{1d2d}', '\u{D55C}', '\u{a2}', '\u{0024}'] {
const SRC: &[u8] = b"some";
let mut raw = SRC.to_vec();
raw.insert(0, b'"');
raw.extend(escape_unicode(chr));
raw.push(b'"');
let mut expected = SRC.to_vec();
expected.extend(chr.to_string().into_bytes());
let (parsed, ignored) = parse_string(&mut raw.bytes(), None)?;
assert!(ignored.is_none());
assert_eq!(parsed.unwrap().into_bytes(), expected);
}
Ok(())
}
#[test]
#[cfg(feature="x-unicode")]
fn test_parse_string_with_x_unicode_chars() -> Result<()> {
for (bytes, expected) in &[
(&br#""\x3c""#[..], "<"), (br#""A\x3cz""#, "A<z"),
(br#""\x39""#, "9"), (br#""@\x39,""#, "@9,"),
(br#""\x7E""#, "~"), (br#""!!\x7E]]""#, "!!~]]"),
] {
let (parsed, ignored_byte) = parse_string(&mut bytes.bytes(), None)?;
assert_eq!(parsed.unwrap(), *expected);
assert!(ignored_byte.is_none());
}
Ok(())
}
fn ensure_char<R>(bytes: &mut Bytes<R>, expected: u8) -> Result<()> where R: Read {
for b in bytes {
match b.map_err(|e| err!("{}", e))? {
b' ' | b'\r' | b'\n' | b'\t' => continue,
other => return if other == expected { Ok(()) } else {
Err(err!("Expected {expected:?}, got: {other:?}", expected=char::from(expected), other=char::from(other)))
},
};
}
Err(err!("Missing {:?}", char::from(expected)))
}
fn ensure_white_spaces<R>(bytes: &mut Bytes<R>) -> Result<()> where R: Read {
for b in bytes {
match b.map_err(|e| err!("{}", e))? {
b' ' | b'\r' | b'\n' | b'\t' => continue,
other => return Err(err!("Expected white spaces, got: {other:?}", other=char::from(other))),
};
}
Ok(())
}
fn parse_number<R>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser) -> Result<(Number, Option<IgnoredByte>)> where R: Read {
let mut ignored = None;
for b in bytes {
let b = b.map_err(|e| err!("{}", e))?;
match b {
b' ' | b'\r' | b'\n' | b'\t' => break,
b'0'..=b'9' | b'-' | b'+' | b'e' | b'E' | b'.' => number_parser.add(&b)?,
_ => {
ignored = Some(IgnoredByte::try_from(&b)?);
break;
},
};
}
Ok((number_parser.parse()?, ignored))
}