use core::str::FromStr;
use crate::error::{ParseError, ParseErrorKind};
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum Token {
Invalid,
Eof,
OpenCurly,
CloseCurly,
OpenBracket,
CloseBracket,
Comma,
Colon,
True,
False,
Null,
String,
Number,
}
impl Token {
fn name(self) -> &'static str {
match self {
Token::Invalid => "invalid",
Token::Eof => "end of input",
Token::OpenCurly => "{",
Token::CloseCurly => "}",
Token::OpenBracket => "[",
Token::CloseBracket => "]",
Token::Comma => ",",
Token::Colon => ":",
Token::True => "true",
Token::False => "false",
Token::Null => "null",
Token::String => "string",
Token::Number => "number",
}
}
}
pub struct Parser<'src, 'buf> {
src: &'src [u8],
str_buf: &'buf mut [u8],
pos: usize,
token_start: usize,
key_start: usize,
str_len: usize,
str_start_in_src: usize,
str_end_in_src: usize,
str_has_escapes: bool,
token: Token,
number_start: usize,
number_end: usize,
}
impl<'src, 'buf> Parser<'src, 'buf> {
pub fn new(src: &'src [u8], str_buf: &'buf mut [u8]) -> Self {
Self {
src,
str_buf,
pos: 0,
token_start: 0,
key_start: 0,
str_len: 0,
str_start_in_src: 0,
str_end_in_src: 0,
str_has_escapes: false,
token: Token::Invalid,
number_start: 0,
number_end: 0,
}
}
pub fn error_offset(&self) -> usize {
self.token_start
}
fn skip_whitespace(&mut self) {
while self.pos < self.src.len() {
match self.src[self.pos] {
b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
_ => break,
}
}
}
fn get_token<const WRITE_OUT_TOKEN: bool>(&mut self) -> Result<(), ParseError> {
self.skip_whitespace();
self.token_start = self.pos;
if self.pos >= self.src.len() {
self.token = Token::Eof;
return Ok(());
}
let ch = self.src[self.pos];
let punct = match ch {
b'{' => Some(Token::OpenCurly),
b'}' => Some(Token::CloseCurly),
b'[' => Some(Token::OpenBracket),
b']' => Some(Token::CloseBracket),
b',' => Some(Token::Comma),
b':' => Some(Token::Colon),
_ => None,
};
if let Some(t) = punct {
self.token = t;
self.pos += 1;
return Ok(());
}
let keywords: [(&[u8], Token); 3] = [
(b"true", Token::True),
(b"false", Token::False),
(b"null", Token::Null),
];
for (keyword, tok) in keywords {
if self.src[self.pos..].starts_with(keyword) {
self.token = tok;
self.pos += keyword.len();
return Ok(());
}
}
if ch == b'-' || ch.is_ascii_digit() {
let start = self.pos;
if ch == b'-' { self.pos += 1; }
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
if self.pos < self.src.len() && self.src[self.pos] == b'.' {
self.pos += 1;
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
}
if self.pos < self.src.len() && matches!(self.src[self.pos], b'e' | b'E') {
self.pos += 1;
if self.pos < self.src.len() && matches!(self.src[self.pos], b'+' | b'-') {
self.pos += 1;
}
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
}
self.number_start = start;
self.number_end = self.pos;
self.token = Token::Number;
return Ok(());
}
if ch == b'"' {
self.pos += 1;
self.str_len = 0;
self.str_start_in_src = self.pos;
self.str_has_escapes = false;
loop {
if self.pos >= self.src.len() {
self.token = Token::Invalid;
return Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedEof,
));
}
match self.src[self.pos] {
b'"' => {
self.str_end_in_src = self.pos;
self.pos += 1;
self.token = Token::String;
return Ok(());
}
b'\\' => {
self.str_has_escapes = true;
self.pos += 1;
if self.pos >= self.src.len() {
self.token = Token::Invalid;
return Err(ParseError::at(
self.pos,
ParseErrorKind::UnexpectedEof,
));
}
let esc = self.src[self.pos];
self.pos += 1;
if esc == b'u' {
if self.pos + 4 > self.src.len() {
return Err(ParseError::at(self.pos, ParseErrorKind::UnexpectedEof));
}
let h = parse_hex4(&self.src[self.pos..])
.ok_or_else(|| ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')))?;
self.pos += 4;
let cp: u32 = if (0xD800..=0xDBFF).contains(&h) {
if self.pos + 6 > self.src.len()
|| self.src[self.pos] != b'\\'
|| self.src[self.pos + 1] != b'u'
{
return Err(ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')));
}
self.pos += 2;
let low = parse_hex4(&self.src[self.pos..])
.ok_or_else(|| ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')))?;
if !(0xDC00..=0xDFFF).contains(&low) {
return Err(ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')));
}
self.pos += 4;
0x10000 + ((h as u32 - 0xD800) << 10) + (low as u32 - 0xDC00)
} else if (0xDC00..=0xDFFF).contains(&h) {
return Err(ParseError::at(self.pos - 4, ParseErrorKind::InvalidEscape(b'u')));
} else {
h as u32
};
let (bytes, len) = encode_utf8_cp(cp);
for &byte in &bytes[..len] {
if WRITE_OUT_TOKEN {
if self.str_len >= self.str_buf.len() {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::StringBufferOverflow,
));
}
self.str_buf[self.str_len] = byte;
}
self.str_len += 1;
}
} else {
let decoded = match esc {
b'"' => b'"',
b'\\' => b'\\',
b'/' => b'/',
b'b' => b'\x08',
b't' => b'\t',
b'n' => b'\n',
b'v' => b'\x0B',
b'f' => b'\x0C',
b'r' => b'\r',
other => {
self.token = Token::Invalid;
return Err(ParseError::at(
self.pos - 1,
ParseErrorKind::InvalidEscape(other),
));
}
};
if WRITE_OUT_TOKEN {
if self.str_len >= self.str_buf.len() {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::StringBufferOverflow,
));
}
self.str_buf[self.str_len] = decoded;
}
self.str_len += 1;
}
}
_ => {
let b = self.src[self.pos];
self.pos += 1;
if WRITE_OUT_TOKEN {
if self.str_len < self.str_buf.len() {
self.str_buf[self.str_len] = b;
} else {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::StringBufferOverflow,
));
}
}
self.str_len += 1;
}
}
}
}
self.token = Token::Invalid;
Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken { expected: "value", got: "invalid character" },
))
}
fn expect_token(&mut self, expected: Token) -> Result<(), ParseError> {
if self.token != expected {
return Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken {
expected: expected.name(),
got: self.token.name(),
},
));
}
Ok(())
}
fn get_and_expect(&mut self, expected: Token) -> Result<(), ParseError> {
self.get_token::<false>()?;
self.expect_token(expected)
}
fn current_string(&self) -> Result<&str, ParseError> {
let bytes = &self.str_buf[..self.str_len];
core::str::from_utf8(bytes).map_err(|_| {
ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
})
}
fn current_string_src(&self) -> Result<&'src str, ParseError> {
if self.str_has_escapes {
return Err(ParseError::at(self.token_start, ParseErrorKind::KeyHasEscapes));
}
let bytes = &self.src[self.str_start_in_src..self.str_end_in_src];
core::str::from_utf8(bytes).map_err(|_| {
ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
})
}
pub fn object_begin(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::OpenCurly)
}
fn object_next_member(&mut self) -> Result<bool, ParseError> {
self.skip_whitespace();
if self.pos >= self.src.len() {
return Err(ParseError::at(self.pos, ParseErrorKind::UnexpectedEof));
}
match self.src[self.pos] {
b'}' => Ok(false),
b',' => {
self.token_start = self.pos;
self.token = Token::Comma;
self.pos += 1;
Ok(true)
}
_ => Ok(true), }
}
pub fn member(&mut self) -> Result<Option<&'src str>, ParseError> {
if !self.object_next_member()? { return Ok(None) };
self.get_and_expect(Token::String)?;
self.key_start = self.token_start;
self.get_and_expect(Token::Colon)?;
Ok(Some(self.current_string_src()?))
}
fn member_decoded(&mut self) -> Result<Option<&str>, ParseError> {
if !self.object_next_member()? { return Ok(None) };
self.get_token::<true>()?;
self.expect_token(Token::String)?;
self.key_start = self.token_start;
self.get_and_expect(Token::Colon)?;
Ok(Some(self.current_string()?))
}
pub fn object_end(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::CloseCurly)
}
pub fn unknown_field(&self) -> ParseError {
ParseError::at(self.key_start, ParseErrorKind::UnknownField { type_name: "", expected_fields: &[] })
}
pub fn unknown_field_in(&self, type_name: &'static str, expected_fields: &'static [&'static str]) -> ParseError {
ParseError::at(self.key_start, ParseErrorKind::UnknownField { type_name, expected_fields })
}
pub fn array_begin(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::OpenBracket)
}
pub fn array_item(&mut self) -> Result<bool, ParseError> {
match self.peek_token() {
Token::Comma => {
self.skip_whitespace();
self.token_start = self.pos;
self.pos += 1;
self.token = Token::Comma;
Ok(true)
}
Token::CloseBracket => Ok(false),
_ => Ok(true),
}
}
pub fn array_end(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::CloseBracket)
}
pub fn null(&mut self) -> Result<(), ParseError> {
self.get_and_expect(Token::Null)
}
pub fn boolean(&mut self) -> Result<bool, ParseError> {
self.get_token::<false>()?;
match self.token {
Token::True => Ok(true),
Token::False => Ok(false),
_ => Err(ParseError::at(
self.token_start,
ParseErrorKind::UnexpectedToken { expected: "boolean", got: self.token.name() },
)),
}
}
pub fn string(&mut self) -> Result<&str, ParseError> {
self.get_token::<true>()?;
self.expect_token(Token::String)?;
self.current_string()
}
pub fn number_str(&mut self) -> Result<&'src str, ParseError> {
self.get_and_expect(Token::Number)?;
let bytes = &self.src[self.number_start..self.number_end];
core::str::from_utf8(bytes).map_err(|_| {
ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
})
}
pub fn float<Num: FromStr>(&mut self) -> Result<Num, ParseError> {
let s = self.number_str()?;
let offset = self.error_offset();
s.parse::<Num>().map_err(|_| ParseError::at(
offset,
ParseErrorKind::UnexpectedToken { expected: "number", got: "invalid float" },
))
}
pub fn integer<Num: FromStr>(&mut self) -> Result<Num, ParseError> {
let s = self.number_str()?;
let offset = self.error_offset();
s.parse::<Num>().map_err(|_| ParseError::at(
offset,
ParseErrorKind::UnexpectedToken { expected: "number", got: "int out of range" },
))
}
fn peek_token(&self) -> Token {
let mut i = self.pos;
while i < self.src.len() && matches!(self.src[i], b' ' | b'\t' | b'\n' | b'\r') {
i += 1;
}
if i >= self.src.len() { return Token::Eof; }
match self.src[i] {
b'{' => Token::OpenCurly,
b'}' => Token::CloseCurly,
b'[' => Token::OpenBracket,
b']' => Token::CloseBracket,
b',' => Token::Comma,
b':' => Token::Colon,
b'"' => Token::String,
b't' => Token::True,
b'f' => Token::False,
b'n' => Token::Null,
b'-' | b'0'..=b'9' => Token::Number,
_ => Token::Invalid,
}
}
pub fn is_null_ahead(&self) -> bool { self.peek_token() == Token::Null }
pub fn is_bool_ahead(&self) -> bool { matches!(self.peek_token(), Token::True | Token::False) }
pub fn is_number_ahead(&self) -> bool { self.peek_token() == Token::Number }
pub fn is_string_ahead(&self) -> bool { self.peek_token() == Token::String }
pub fn is_array_ahead(&self) -> bool { self.peek_token() == Token::OpenBracket }
pub fn is_object_ahead(&self) -> bool { self.peek_token() == Token::OpenCurly }
}
pub trait Deserialize<'src>: Sized {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError>;
}
impl<'src> Deserialize<'src> for bool {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
parser.boolean()
}
}
impl<'src> Deserialize<'src> for &'src str {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
parser.get_token::<false>()?;
parser.expect_token(Token::String)?;
parser.current_string_src()
}
}
#[cfg(feature = "alloc")]
impl<'src> Deserialize<'src> for alloc::string::String {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
Ok(alloc::string::String::from(parser.string()?))
}
}
macro_rules! impl_float {
($($t:ty),*) => {$(
impl<'src> Deserialize<'src> for $t {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
parser.float()
}
}
)*};
}
impl_float!(f32, f64);
macro_rules! impl_integer {
($($t:ty),*) => {$(
impl<'src> Deserialize<'src> for $t {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
parser.integer()
}
}
)*};
}
impl_integer!(i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize);
impl<'src, T: Deserialize<'src>> Deserialize<'src> for Option<T> {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
if parser.is_null_ahead() {
parser.null()?;
Ok(None)
} else {
T::deserialize(parser).map(Some)
}
}
}
impl<'src, T, const N: usize> Deserialize<'src> for [T; N]
where
T: Deserialize<'src>,
{
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
parser.array_begin()?;
let mut arr: [Option<T>; N] = [(); N].map(|_| None);
for i in 0..N {
if !parser.array_item()? {
return Err(ParseError::at(
parser.error_offset(),
ParseErrorKind::UnexpectedToken { expected: "array item", got: "]" },
));
}
arr[i] = Some(T::deserialize(parser)?);
}
if parser.array_item()? {
return Err(ParseError::at(
parser.error_offset(),
ParseErrorKind::UnexpectedToken { expected: "]", got: "array item" },
));
}
parser.array_end()?;
Ok(arr.map(|x| x.unwrap()))
}
}
#[cfg(feature = "arrayvec")]
impl<'src, T, const N: usize> Deserialize<'src> for arrayvec::ArrayVec<T, N>
where
T: Deserialize<'src>,
{
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut vec = arrayvec::ArrayVec::new();
parser.array_begin()?;
while parser.array_item()? {
let v = T::deserialize(parser)?;
vec.try_push(v).map_err(|_| ParseError::at(
parser.error_offset(),
ParseErrorKind::StringBufferOverflow,
))?;
}
parser.array_end()?;
Ok(vec)
}
}
#[cfg(feature = "arrayvec")]
impl<'src, const N: usize> Deserialize<'src> for arrayvec::ArrayString<N> {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let off = parser.error_offset();
let s = parser.string()?;
let off = off + s.as_bytes().len();
arrayvec::ArrayString::try_from(s).map_err(|_| ParseError::at(
off, ParseErrorKind::StringBufferOverflow,
))
}
}
#[cfg(feature = "alloc")]
impl<'src, T> Deserialize<'src> for alloc::vec::Vec<T>
where
T: Deserialize<'src>,
{
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut vec = alloc::vec::Vec::new();
parser.array_begin()?;
while parser.array_item()? {
vec.push(T::deserialize(parser)?);
}
parser.array_end()?;
Ok(vec)
}
}
#[cfg(feature = "alloc")]
impl<'src, T: Deserialize<'src>> Deserialize<'src> for alloc::boxed::Box<T> {
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
T::deserialize(parser).map(alloc::boxed::Box::new)
}
}
macro_rules! impl_deserialize_map {
($map_ty:ty, $new:expr) => {
impl<'src, V> Deserialize<'src> for $map_ty
where
V: Deserialize<'src>,
{
fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
let mut map = $new;
parser.object_begin()?;
loop {
let maybe_key = parser.member_decoded()?;
let key = match maybe_key {
None => break,
Some(k) => alloc::string::String::from(k),
};
let value = V::deserialize(parser)?;
map.insert(key, value);
}
parser.object_end()?;
Ok(map)
}
}
};
}
#[cfg(feature = "alloc")]
impl_deserialize_map!(
alloc::collections::BTreeMap<alloc::string::String, V>,
alloc::collections::BTreeMap::new()
);
#[cfg(feature = "std")]
impl_deserialize_map!(
std::collections::HashMap<std::string::String, V>,
std::collections::HashMap::new()
);
pub fn parse_sized_as<T>(
buf: &mut [u8],
src: impl AsRef<[u8]>,
f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
let mut parser = Parser::new(src.as_ref(), buf);
f(&mut parser)
}
#[inline]
pub fn parse_sized<T: for<'s> Deserialize<'s>>(
buf: &mut [u8],
src: impl AsRef<[u8]>
) -> Result<T, ParseError> {
T::deserialize(&mut Parser::new(src.as_ref(), buf))
}
#[cfg(feature = "std")]
#[inline]
pub fn parse<T: for<'s> Deserialize<'s>>(
src: impl AsRef<[u8]>,
) -> Result<T, ParseError> {
let src = src.as_ref();
let mut scratch = std::vec![0u8; src.len().max(1)];
T::deserialize(&mut Parser::new(src, &mut scratch))
}
#[cfg(feature = "std")]
#[inline]
pub fn parse_as<T>(
src: impl AsRef<[u8]>,
f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
let src = src.as_ref();
let mut scratch = std::vec![0u8; src.len().max(1)];
let mut parser = Parser::new(src, &mut scratch);
f(&mut parser)
}
fn parse_hex4(bytes: &[u8]) -> Option<u16> {
if bytes.len() < 4 { return None; }
let mut n: u16 = 0;
for &b in &bytes[..4] {
let d: u16 = match b {
b'0'..=b'9' => (b - b'0') as u16,
b'a'..=b'f' => (b - b'a' + 10) as u16,
b'A'..=b'F' => (b - b'A' + 10) as u16,
_ => return None,
};
n = n * 16 + d;
}
Some(n)
}
fn encode_utf8_cp(cp: u32) -> ([u8; 4], usize) {
match cp {
0x00..=0x7F => ([cp as u8, 0, 0, 0], 1),
0x80..=0x7FF => ([
0xC0 | (cp >> 6) as u8,
0x80 | (cp & 0x3F) as u8,
0, 0,
], 2),
0x800..=0xFFFF => ([
0xE0 | (cp >> 12) as u8,
0x80 | ((cp >> 6) & 0x3F) as u8,
0x80 | (cp & 0x3F) as u8,
0,
], 3),
_ => ([ 0xF0 | (cp >> 18) as u8,
0x80 | ((cp >> 12) & 0x3F) as u8,
0x80 | ((cp >> 6) & 0x3F) as u8,
0x80 | (cp & 0x3F) as u8,
], 4),
}
}