use crate::error::{ParseError, ParseErrorKind};
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum Token {
Invalid,
Eof,
OpenCurly,
CloseCurly,
OpenBracket,
CloseBracket,
Comma,
Colon,
True,
False,
Null,
String,
Number,
}
impl Token {
fn name(self) -> &'static str {
match self {
Token::Invalid => "invalid",
Token::Eof => "end of input",
Token::OpenCurly => "{",
Token::CloseCurly => "}",
Token::OpenBracket => "[",
Token::CloseBracket => "]",
Token::Comma => ",",
Token::Colon => ":",
Token::True => "true",
Token::False => "false",
Token::Null => "null",
Token::String => "string",
Token::Number => "number",
}
}
}
pub struct Parser<'src, 'buf> {
src: &'src [u8],
pos: usize,
token_start: usize,
key_start: usize,
str_buf: &'buf mut [u8],
str_len: usize,
token: Token,
number_start: usize,
number_end: usize,
}
impl<'src, 'buf> Parser<'src, 'buf> {
pub fn new(src: &'src [u8], str_buf: &'buf mut [u8]) -> Self {
Self {
src,
pos: 0,
token_start: 0,
key_start: 0,
str_buf,
str_len: 0,
token: Token::Invalid,
number_start: 0,
number_end: 0,
}
}
pub fn error_offset(&self) -> usize {
self.token_start
}
fn skip_whitespace(&mut self) {
while self.pos < self.src.len() {
match self.src[self.pos] {
b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
_ => break,
}
}
}
fn get_token(&mut self) -> Result<(), ParseError> {
self.skip_whitespace();
self.token_start = self.pos;
if self.pos >= self.src.len() {
self.token = Token::Eof;
return Ok(());
}
let ch = self.src[self.pos];
let punct = match ch {
b'{' => Some(Token::OpenCurly),
b'}' => Some(Token::CloseCurly),
b'[' => Some(Token::OpenBracket),
b']' => Some(Token::CloseBracket),
b',' => Some(Token::Comma),
b':' => Some(Token::Colon),
_ => None,
};
if let Some(t) = punct {
self.token = t;
self.pos += 1;
return Ok(());
}
let keywords: [(&[u8], Token); 3] = [
(b"true", Token::True),
(b"false", Token::False),
(b"null", Token::Null),
];
for (keyword, tok) in keywords {
if self.src[self.pos..].starts_with(keyword) {
self.token = tok;
self.pos += keyword.len();
return Ok(());
}
}
if ch == b'-' || ch.is_ascii_digit() {
let start = self.pos;
if ch == b'-' { self.pos += 1; }
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
if self.pos < self.src.len() && self.src[self.pos] == b'.' {
self.pos += 1;
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
}
if self.pos < self.src.len() && matches!(self.src[self.pos], b'e' | b'E') {
self.pos += 1;
if self.pos < self.src.len() && matches!(self.src[self.pos], b'+' | b'-') {
self.pos += 1;
}
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
}
self.number_start = start;
self.number_end = self.pos;
self.token = Token::Number;
return Ok(());
}
if ch == b'"' {
self.pos += 1;
self.str_len = 0;
loop {
if self.pos >= self.src.len() {
self.token = Token::Invalid;
return Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedEof,
));
}
match self.src[self.pos] {
b'"' => {
self.pos += 1;
self.token = Token::String;
return Ok(());
}
b'\\' => {
self.pos += 1;
if self.pos >= self.src.len() {
self.token = Token::Invalid;
return Err(ParseError::at(
self.pos,
ParseErrorKind::UnexpectedEof,
));
}
let esc = self.src[self.pos];
self.pos += 1;
let decoded = match esc {
b'"' => b'"',
b'\\' => b'\\',
b'/' => b'/',
b'b' => b'\x08',
b't' => b'\t',
b'n' => b'\n',
b'v' => b'\x0B',
b'f' => b'\x0C',
b'r' => b'\r',
other => {
self.token = Token::Invalid;
return Err(ParseError::at(
self.pos - 1,
ParseErrorKind::InvalidEscape(other),
));
}
};
if self.str_len >= self.str_buf.len() {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::StringBufferOverflow,
));
}
self.str_buf[self.str_len] = decoded;
self.str_len += 1;
}
_ => {
let b = self.src[self.pos];
self.pos += 1;
if self.str_len >= self.str_buf.len() {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::StringBufferOverflow,
));
}
self.str_buf[self.str_len] = b;
self.str_len += 1;
}
}
}
}
self.token = Token::Invalid;
Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken { expected: "value", got: "invalid character" },
))
}
fn expect_token(&mut self, expected: Token) -> Result<(), ParseError> {
if self.token != expected {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken {
expected: expected.name(),
got: self.token.name(),
},
));
}
Ok(())
}
fn get_and_expect(&mut self, expected: Token) -> Result<(), ParseError> {
self.get_token()?;
self.expect_token(expected)
}
fn current_string(&self) -> Result<&str, ParseError> {
let bytes = &self.str_buf[..self.str_len];
core::str::from_utf8(bytes).map_err(|_| {
ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
})
}
pub fn object_begin(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::OpenCurly)
}
pub fn object_member(&mut self) -> Result<Option<&'buf str>, ParseError> {
let saved_pos = self.pos;
self.get_token()?;
match self.token {
Token::Comma => {
self.get_and_expect(Token::String)?;
self.key_start = self.token_start; self.get_and_expect(Token::Colon)?;
let s = self.current_string()?;
let s: &'buf str = unsafe { core::mem::transmute(s) };
Ok(Some(s))
}
Token::CloseCurly => {
self.pos = saved_pos;
Ok(None)
}
Token::String => {
self.key_start = self.token_start; self.get_and_expect(Token::Colon)?;
let s = self.current_string()?;
let s: &'buf str = unsafe { core::mem::transmute(s) };
Ok(Some(s))
}
_ => Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken {
expected: "string or }",
got: self.token.name(),
},
)),
}
}
pub fn object_end(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::CloseCurly)
}
pub fn unknown_field(&self) -> ParseError {
ParseError::at(self.key_start, ParseErrorKind::UnknownField)
}
pub fn array_begin(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::OpenBracket)
}
pub fn array_item(&mut self) -> Result<bool, ParseError> {
let saved_pos = self.pos;
self.get_token()?;
match self.token {
Token::Comma => Ok(true),
Token::CloseBracket => {
self.pos = saved_pos;
Ok(false)
}
_ => {
self.pos = saved_pos;
Ok(true)
}
}
}
pub fn array_end(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::CloseBracket)
}
pub fn null(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::Null)
}
pub fn bool_val(&mut self) -> Result<bool, ParseError> {
self.get_token()?;
match self.token {
Token::True => Ok(true),
Token::False => Ok(false),
_ => Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken { expected: "boolean", got: self.token.name() },
)),
}
}
pub fn string(&mut self) -> Result<&'buf str, ParseError> {
self.get_and_expect(Token::String)?;
let s = self.current_string()?;
let s: &'buf str = unsafe { core::mem::transmute(s) };
Ok(s)
}
pub fn number_str(&mut self) -> Result<&'src str, ParseError> {
self.get_and_expect(Token::Number)?;
let bytes = &self.src[self.number_start..self.number_end];
core::str::from_utf8(bytes).map_err(|_| {
ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
})
}
fn peek_token(&mut self) -> Token {
let saved_pos = self.pos;
let saved_token_start = self.token_start;
let saved_token = self.token;
let _ = self.get_token();
let peeked = self.token;
self.pos = saved_pos;
self.token_start = saved_token_start;
self.token = saved_token;
peeked
}
pub fn is_null_ahead(&mut self) -> bool { self.peek_token() == Token::Null }
pub fn is_bool_ahead(&mut self) -> bool { matches!(self.peek_token(), Token::True | Token::False) }
pub fn is_number_ahead(&mut self) -> bool { self.peek_token() == Token::Number }
pub fn is_string_ahead(&mut self) -> bool { self.peek_token() == Token::String }
pub fn is_array_ahead(&mut self) -> bool { self.peek_token() == Token::OpenBracket }
pub fn is_object_ahead(&mut self) -> bool { self.peek_token() == Token::OpenCurly }
}
pub trait Deserialize<'src, 'buf>: Sized {
fn deserialize(json: &mut Parser<'src, 'buf>) -> Result<Self, ParseError>;
}
impl<'src, 'buf> Deserialize<'src, 'buf> for bool {
fn deserialize(json: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
json.bool_val()
}
}
impl<'src, 'buf> Deserialize<'src, 'buf> for &'buf str {
fn deserialize(json: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
json.string()
}
}
#[cfg(feature = "alloc")]
impl<'src, 'buf> Deserialize<'src, 'buf> for alloc::string::String {
fn deserialize(json: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
Ok(alloc::string::String::from(json.string()?))
}
}
impl<'src, 'buf> Deserialize<'src, 'buf> for f32 {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let s = parser.number_str()?;
let offset = parser.error_offset();
s.parse::<f32>().map_err(|_| ParseError::at(
offset,
ParseErrorKind::UnexpectedToken { expected: "number", got: "invalid float" },
))
}
}
impl<'src, 'buf> Deserialize<'src, 'buf> for f64 {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let s = parser.number_str()?;
let offset = parser.error_offset();
s.parse::<f64>().map_err(|_| ParseError::at(
offset,
ParseErrorKind::UnexpectedToken { expected: "number", got: "invalid float" },
))
}
}
macro_rules! impl_integer {
($($t:ty),*) => {$(
impl<'src, 'buf> Deserialize<'src, 'buf> for $t {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let s = parser.number_str()?;
integer_from_str::<$t>(s, parser.token_start)
}
}
)*};
}
impl_integer!(i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize);
fn integer_from_str<T: IntParse>(s: &str, offset: usize) -> Result<T, ParseError> {
T::from_str(s).ok_or_else(|| ParseError::at(
offset,
ParseErrorKind::UnexpectedToken { expected: "integer", got: "number out of range" },
))
}
trait IntParse: Sized {
fn from_str(s: &str) -> Option<Self>;
}
macro_rules! impl_int_parse {
(signed: $($t:ty),*) => {$(
impl IntParse for $t {
fn from_str(s: &str) -> Option<Self> {
let bytes = s.as_bytes();
if bytes.is_empty() { return None; }
let (neg, digits) = if bytes[0] == b'-' { (true, &bytes[1..]) } else { (false, bytes) };
if digits.is_empty() { return None; }
let mut v: $t = 0;
for &b in digits {
if b < b'0' || b > b'9' { return None; }
v = v.checked_mul(10)?.checked_sub((b - b'0') as $t)?;
}
if neg { Some(v) } else { v.checked_neg() }
}
}
)*};
(unsigned: $($t:ty),*) => {$(
impl IntParse for $t {
fn from_str(s: &str) -> Option<Self> {
let bytes = s.as_bytes();
if bytes.is_empty() || bytes[0] == b'-' { return None; }
let mut v: $t = 0;
for &b in bytes {
if b < b'0' || b > b'9' { return None; }
v = v.checked_mul(10)?.checked_add((b - b'0') as $t)?;
}
Some(v)
}
}
)*};
}
impl_int_parse!(signed: i8, i16, i32, i64, i128, isize);
impl_int_parse!(unsigned: u8, u16, u32, u64, u128, usize);
impl<'src, 'buf, T: Deserialize<'src, 'buf>> Deserialize<'src, 'buf> for Option<T> {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
if parser.is_null_ahead() {
parser.null()?;
Ok(None)
} else {
T::deserialize(parser).map(Some)
}
}
}
impl<'src, 'buf, T: Deserialize<'src, 'buf>, const N: usize> Deserialize<'src, 'buf> for [T; N] {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
use core::mem::MaybeUninit;
parser.array_begin()?;
let mut arr: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() };
for (i, slot) in arr.iter_mut().enumerate() {
if !parser.array_item()? {
for prev in &mut arr[..i] { unsafe { prev.assume_init_drop(); } }
return Err(ParseError::at(
parser.error_offset(),
ParseErrorKind::UnexpectedToken { expected: "array item", got: "]" },
));
}
match T::deserialize(parser) {
Ok(v) => { slot.write(v); }
Err(e) => {
for prev in &mut arr[..i] { unsafe { prev.assume_init_drop(); } }
return Err(e);
}
}
}
if parser.array_item()? {
for slot in arr.iter_mut() { unsafe { slot.assume_init_drop(); } }
return Err(ParseError::at(
parser.error_offset(),
ParseErrorKind::UnexpectedToken { expected: "]", got: "array item" },
));
}
parser.array_end()?;
Ok(arr.map(|x| unsafe { x.assume_init() }))
}
}
#[cfg(feature = "arrayvec")]
impl<'src, 'buf, T: Deserialize<'src, 'buf>, const N: usize> Deserialize<'src, 'buf>
for arrayvec::ArrayVec<T, N>
{
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut vec = arrayvec::ArrayVec::new();
parser.array_begin()?;
while parser.array_item()? {
let v = T::deserialize(parser)?;
vec.try_push(v).map_err(|_| ParseError::at(
parser.error_offset(),
ParseErrorKind::StringBufferOverflow,
))?;
}
parser.array_end()?;
Ok(vec)
}
}
#[cfg(feature = "arrayvec")]
impl<'src, 'buf, const N: usize> Deserialize<'src, 'buf> for arrayvec::ArrayString<N> {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let s = parser.string()?;
arrayvec::ArrayString::try_from(s).map_err(|_| ParseError::at(
parser.error_offset(),
ParseErrorKind::StringBufferOverflow,
))
}
}
#[cfg(feature = "alloc")]
impl<'src, 'buf, T: Deserialize<'src, 'buf>> Deserialize<'src, 'buf> for alloc::vec::Vec<T> {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut vec = alloc::vec::Vec::new();
parser.array_begin()?;
while parser.array_item()? {
vec.push(T::deserialize(parser)?);
}
parser.array_end()?;
Ok(vec)
}
}
#[cfg(feature = "alloc")]
impl<'src, 'buf, T: Deserialize<'src, 'buf>> Deserialize<'src, 'buf> for alloc::boxed::Box<T> {
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
T::deserialize(parser).map(alloc::boxed::Box::new)
}
}
#[cfg(feature = "alloc")]
impl<'src, 'buf, V: Deserialize<'src, 'buf>> Deserialize<'src, 'buf>
for alloc::collections::BTreeMap<alloc::string::String, V>
{
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut map = alloc::collections::BTreeMap::new();
parser.object_begin()?;
while let Some(key) = parser.object_member()? {
let key = alloc::string::String::from(key);
let value = V::deserialize(parser)?;
map.insert(key, value);
}
parser.object_end()?;
Ok(map)
}
}
#[cfg(feature = "std")]
impl<'src, 'buf, V: Deserialize<'src, 'buf>> Deserialize<'src, 'buf>
for std::collections::HashMap<std::string::String, V>
{
fn deserialize(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut map = std::collections::HashMap::new();
parser.object_begin()?;
while let Some(key) = parser.object_member()? {
let key = std::string::String::from(key);
let value = V::deserialize(parser)?;
map.insert(key, value);
}
parser.object_end()?;
Ok(map)
}
}
pub fn parse_manual_sized<'s, const STR_BUF: usize, T>(
src: &[u8],
f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
let mut scratch = [0u8; STR_BUF];
let mut parser = Parser::new(src, scratch.as_mut_slice());
f(&mut parser)
}
#[inline]
pub fn parse_sized<'s, const STR_BUF: usize, T>(
src: &'s [u8],
) -> Result<T, ParseError>
where
T: for<'b> Deserialize<'s, 'b>,
{
let mut buf = [0u8; STR_BUF];
T::deserialize(&mut Parser::new(src, &mut buf))
}
#[cfg(feature = "std")]
#[inline]
pub fn parse_bytes<T>(src: &[u8]) -> Result<T, ParseError>
where
T: for<'s, 'b> Deserialize<'s, 'b>,
{
let mut scratch = std::vec![0u8; src.len().max(1)];
T::deserialize(&mut Parser::new(src, scratch.as_mut_slice()))
}
#[cfg(feature = "std")]
#[inline]
pub fn parse<T>(src: &str) -> Result<T, ParseError>
where
T: for<'s, 'b> Deserialize<'s, 'b>,
{
parse_bytes(src.as_bytes())
}
#[cfg(feature = "std")]
#[inline]
pub fn parse_manual<T>(
src: &[u8],
f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
let mut scratch = std::vec![0u8; src.len().max(1)];
let mut parser = Parser::new(src, scratch.as_mut_slice());
f(&mut parser)
}