use crate::error::Error;
use forma_core::de::*;
const DEFAULT_DEPTH_LIMIT: usize = 128;
pub struct Deserializer<'de> {
input: &'de [u8],
pos: usize,
line: usize,
col: usize,
remaining_depth: usize,
lenient: bool,
}
impl<'de> Deserializer<'de> {
pub fn new(input: &'de [u8]) -> Self {
Deserializer {
input,
pos: 0,
line: 1,
col: 1,
remaining_depth: DEFAULT_DEPTH_LIMIT,
lenient: false,
}
}
pub fn set_depth_limit(&mut self, limit: usize) {
self.remaining_depth = limit;
}
pub fn set_lenient(&mut self, lenient: bool) {
self.lenient = lenient;
}
pub fn from_str(input: &'de str) -> Self {
Self::new(input.as_bytes())
}
#[inline]
fn peek(&self) -> Option<u8> {
self.input.get(self.pos).copied()
}
#[inline]
fn advance(&mut self) -> Option<u8> {
let b = self.input.get(self.pos).copied()?;
self.pos += 1;
if b == b'\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
Some(b)
}
#[inline]
fn skip_whitespace(&mut self) {
let bytes = &self.input[self.pos..];
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' | b'\r' => {
i += 1;
self.col += 1;
}
b'\n' => {
i += 1;
self.line += 1;
self.col = 1;
}
_ => break,
}
}
self.pos += i;
}
fn peek_or_eof(&mut self) -> Result<u8, Error> {
self.skip_whitespace();
self.peek().ok_or(Error::Eof)
}
fn next_or_eof(&mut self) -> Result<u8, Error> {
self.advance().ok_or(Error::Eof)
}
fn expect(&mut self, expected: u8) -> Result<(), Error> {
self.skip_whitespace();
match self.advance() {
Some(b) if b == expected => Ok(()),
Some(_) => Err(Error::Syntax(
format!("expected '{}'", expected as char),
self.line,
self.col - 1,
)),
None => Err(Error::Eof),
}
}
fn parse_string(&mut self) -> Result<String, Error> {
self.skip_whitespace();
self.expect(b'"')?;
let start = self.pos;
let bytes = &self.input[start..];
let mut i = 0;
loop {
if i >= bytes.len() {
break;
}
match bytes[i] {
b'"' => {
let s = unsafe { std::str::from_utf8_unchecked(&bytes[..i]) }.to_owned();
self.pos = start + i + 1;
self.col += i + 1; return Ok(s);
}
b'\\' | 0x80..=0xFF => {
break;
}
b'\n' => {
break;
}
_ => {
i += 1;
}
}
}
let mut s = String::from(
unsafe { std::str::from_utf8_unchecked(&self.input[start..start + i]) },
);
self.pos = start + i;
self.col += i;
loop {
match self.next_or_eof()? {
b'"' => return Ok(s),
b'\\' => {
match self.next_or_eof()? {
b'"' => s.push('"'),
b'\\' => s.push('\\'),
b'/' => s.push('/'),
b'n' => s.push('\n'),
b'r' => s.push('\r'),
b't' => s.push('\t'),
b'b' => s.push('\u{0008}'),
b'f' => s.push('\u{000C}'),
b'u' => {
let cp = self.parse_hex4()?;
if (0xD800..=0xDBFF).contains(&cp) {
self.expect(b'\\')?;
self.expect(b'u')?;
let low = self.parse_hex4()?;
if !(0xDC00..=0xDFFF).contains(&low) {
return Err(Error::Syntax(
"invalid surrogate pair".into(),
self.line,
self.col,
));
}
let cp = 0x10000
+ ((cp as u32 - 0xD800) << 10)
+ (low as u32 - 0xDC00);
s.push(char::from_u32(cp).ok_or_else(|| {
Error::Syntax(
"invalid unicode codepoint".into(),
self.line,
self.col,
)
})?);
} else {
s.push(char::from_u32(cp as u32).ok_or_else(|| {
Error::Syntax(
"invalid unicode codepoint".into(),
self.line,
self.col,
)
})?);
}
}
_ => {
return Err(Error::Syntax(
"invalid escape".into(),
self.line,
self.col,
))
}
}
}
b if b < 0x80 => s.push(b as char),
b => {
let len = if b & 0xE0 == 0xC0 {
2
} else if b & 0xF0 == 0xE0 {
3
} else if b & 0xF8 == 0xF0 {
4
} else {
return Err(Error::Syntax(
"invalid UTF-8 byte".into(),
self.line,
self.col,
));
};
let start = self.pos - 1; for _ in 1..len {
match self.advance() {
Some(cont) if cont & 0xC0 == 0x80 => {}
_ => {
return Err(Error::Syntax(
"invalid UTF-8 continuation byte".into(),
self.line,
self.col,
));
}
}
}
let utf8_bytes = &self.input[start..self.pos];
match std::str::from_utf8(utf8_bytes) {
Ok(ch) => s.push_str(ch),
Err(_) => {
return Err(Error::Syntax(
"invalid UTF-8 sequence".into(),
self.line,
self.col,
));
}
}
}
}
}
}
fn parse_hex4(&mut self) -> Result<u16, Error> {
let mut val = 0u16;
for _ in 0..4 {
let b = self.next_or_eof()?;
let digit = match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => {
return Err(Error::Syntax(
"invalid hex digit".into(),
self.line,
self.col,
))
}
};
val = (val << 4) | digit as u16;
}
Ok(val)
}
#[inline]
fn parse_number_bytes(&mut self) -> &'de [u8] {
let start = self.pos;
let bytes = &self.input[start..];
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'0'..=b'9' | b'-' | b'.' | b'e' | b'E' => i += 1,
b'+' => {
if i > 0 && matches!(bytes[i - 1], b'e' | b'E') {
i += 1;
} else {
break;
}
}
_ => break,
}
}
self.col += i;
self.pos = start + i;
&self.input[start..self.pos]
}
#[inline]
fn check_depth(&mut self) -> Result<(), Error> {
if self.remaining_depth == 0 {
return Err(Error::Message("recursion limit exceeded".into()));
}
self.remaining_depth -= 1;
Ok(())
}
#[inline]
fn restore_depth(&mut self) {
self.remaining_depth += 1;
}
pub fn end_of_input(&mut self) -> Result<(), Error> {
self.skip_whitespace();
if self.pos < self.input.len() {
Err(Error::TrailingData)
} else {
Ok(())
}
}
}
impl<'de, 'a> forma_core::de::Deserializer<'de> for &'a mut Deserializer<'de> {
type Error = Error;
fn deserialize_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
match self.peek_or_eof()? {
b'"' => {
let s = self.parse_string()?;
visitor.visit_string(s)
}
b't' | b'f' => self.deserialize_bool(visitor),
b'n' => {
self.skip_whitespace();
self.expect(b'n')?;
self.expect(b'u')?;
self.expect(b'l')?;
self.expect(b'l')?;
visitor.visit_unit()
}
b'[' => self.deserialize_seq(visitor),
b'{' => self.deserialize_map(visitor),
b'0'..=b'9' | b'-' => {
self.skip_whitespace();
let num_bytes = self.parse_number_bytes();
let num_str = std::str::from_utf8(num_bytes).map_err(|_| {
Error::Syntax("invalid number".into(), self.line, self.col)
})?;
if num_str.contains('.') || num_str.contains('e') || num_str.contains('E') {
let v: f64 = num_str.parse().map_err(|_| {
Error::Syntax(format!("invalid number: {num_str}"), self.line, self.col)
})?;
visitor.visit_f64(v)
} else if num_str.starts_with('-') {
let v: i64 = num_str.parse().map_err(|_| {
Error::Syntax(format!("invalid number: {num_str}"), self.line, self.col)
})?;
visitor.visit_i64(v)
} else {
let v: u64 = num_str.parse().map_err(|_| {
Error::Syntax(format!("invalid number: {num_str}"), self.line, self.col)
})?;
visitor.visit_u64(v)
}
}
b => Err(Error::Syntax(
format!("unexpected character '{}'", b as char),
self.line,
self.col,
)),
}
}
fn deserialize_bool<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
if self.lenient {
match self.peek_or_eof()? {
b'"' => {
let s = self.parse_string()?;
return match s.as_str() {
"true" | "1" => visitor.visit_bool(true),
"false" | "0" => visitor.visit_bool(false),
_ => Err(Error::Syntax(
format!("cannot coerce \"{}\" to bool", s),
self.line, self.col,
)),
};
}
b'0' | b'1' => {
let bytes = self.parse_number_bytes();
return match bytes {
[b'0'] => visitor.visit_bool(false),
[b'1'] => visitor.visit_bool(true),
_ => Err(Error::Syntax(
"cannot coerce number to bool".into(),
self.line, self.col,
)),
};
}
_ => {}
}
}
match self.peek_or_eof()? {
b't' => {
self.advance(); self.expect(b'r')?;
self.expect(b'u')?;
self.expect(b'e')?;
visitor.visit_bool(true)
}
b'f' => {
self.advance(); self.expect(b'a')?;
self.expect(b'l')?;
self.expect(b's')?;
self.expect(b'e')?;
visitor.visit_bool(false)
}
_ => Err(Error::Syntax(
"expected bool".into(),
self.line,
self.col,
)),
}
}
fn deserialize_i8<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_i64(visitor)
}
fn deserialize_i16<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_i64(visitor)
}
fn deserialize_i32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_i64(visitor)
}
fn deserialize_i64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
if self.lenient && self.peek() == Some(b'"') {
let s = self.parse_string()?;
let v: i64 = s.parse().map_err(|_| {
Error::Syntax(format!("cannot coerce \"{}\" to i64", s), self.line, self.col)
})?;
return visitor.visit_i64(v);
}
let bytes = self.parse_number_bytes();
let s = std::str::from_utf8(bytes)
.map_err(|_| Error::Syntax("invalid number".into(), self.line, self.col))?;
let v: i64 = s
.parse()
.map_err(|_| Error::Syntax(format!("invalid i64: {s}"), self.line, self.col))?;
visitor.visit_i64(v)
}
fn deserialize_i128<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
let bytes = self.parse_number_bytes();
let s = std::str::from_utf8(bytes)
.map_err(|_| Error::Syntax("invalid number".into(), self.line, self.col))?;
let v: i128 = s
.parse()
.map_err(|_| Error::Syntax(format!("invalid i128: {s}"), self.line, self.col))?;
visitor.visit_i128(v)
}
fn deserialize_u8<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_u64(visitor)
}
fn deserialize_u16<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_u64(visitor)
}
fn deserialize_u32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_u64(visitor)
}
fn deserialize_u64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
if self.lenient && self.peek() == Some(b'"') {
let s = self.parse_string()?;
let v: u64 = s.parse().map_err(|_| {
Error::Syntax(format!("cannot coerce \"{}\" to u64", s), self.line, self.col)
})?;
return visitor.visit_u64(v);
}
let bytes = self.parse_number_bytes();
let s = std::str::from_utf8(bytes)
.map_err(|_| Error::Syntax("invalid number".into(), self.line, self.col))?;
let v: u64 = s
.parse()
.map_err(|_| Error::Syntax(format!("invalid u64: {s}"), self.line, self.col))?;
visitor.visit_u64(v)
}
fn deserialize_u128<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
let bytes = self.parse_number_bytes();
let s = std::str::from_utf8(bytes)
.map_err(|_| Error::Syntax("invalid number".into(), self.line, self.col))?;
let v: u128 = s
.parse()
.map_err(|_| Error::Syntax(format!("invalid u128: {s}"), self.line, self.col))?;
visitor.visit_u128(v)
}
fn deserialize_f32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_f64(visitor)
}
fn deserialize_f64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
if self.lenient && self.peek() == Some(b'"') {
let s = self.parse_string()?;
let v: f64 = s.parse().map_err(|_| {
Error::Syntax(format!("cannot coerce \"{}\" to f64", s), self.line, self.col)
})?;
return visitor.visit_f64(v);
}
let bytes = self.parse_number_bytes();
let s = std::str::from_utf8(bytes)
.map_err(|_| Error::Syntax("invalid number".into(), self.line, self.col))?;
let v: f64 = s
.parse()
.map_err(|_| Error::Syntax(format!("invalid f64: {s}"), self.line, self.col))?;
visitor.visit_f64(v)
}
fn deserialize_char<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
let s = self.parse_string()?;
let mut chars = s.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => visitor.visit_char(c),
_ => Err(Error::Syntax(
"expected single character".into(),
self.line,
self.col,
)),
}
}
fn deserialize_str<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
let s = self.parse_string()?;
visitor.visit_string(s)
}
fn deserialize_string<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_str(visitor)
}
fn deserialize_bytes<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_seq(visitor)
}
fn deserialize_byte_buf<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_bytes(visitor)
}
fn deserialize_option<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
if self.peek() == Some(b'n') {
self.advance(); self.expect(b'u')?;
self.expect(b'l')?;
self.expect(b'l')?;
visitor.visit_none()
} else {
visitor.visit_some(self)
}
}
fn deserialize_unit<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.skip_whitespace();
self.expect(b'n')?;
self.expect(b'u')?;
self.expect(b'l')?;
self.expect(b'l')?;
visitor.visit_unit()
}
fn deserialize_unit_struct<V: Visitor<'de>>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Error> {
self.deserialize_unit(visitor)
}
fn deserialize_newtype_struct<V: Visitor<'de>>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Error> {
visitor.visit_newtype_struct(self)
}
fn deserialize_seq<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.check_depth()?;
self.skip_whitespace();
if self.lenient && self.peek() != Some(b'[') {
let value = visitor.visit_seq(SingleElementSeq { de: self, done: false })?;
self.restore_depth();
return Ok(value);
}
self.expect(b'[')?;
let value = visitor.visit_seq(SeqAccess::new(self))?;
self.expect(b']')?;
self.restore_depth();
Ok(value)
}
fn deserialize_tuple<V: Visitor<'de>>(
self,
_len: usize,
visitor: V,
) -> Result<V::Value, Error> {
self.deserialize_seq(visitor)
}
fn deserialize_tuple_struct<V: Visitor<'de>>(
self,
_name: &'static str,
_len: usize,
visitor: V,
) -> Result<V::Value, Error> {
self.deserialize_seq(visitor)
}
fn deserialize_map<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.check_depth()?;
self.skip_whitespace();
self.expect(b'{')?;
let value = visitor.visit_map(MapAccess::new(self))?;
self.expect(b'}')?;
self.restore_depth();
Ok(value)
}
fn deserialize_struct<V: Visitor<'de>>(
self,
_name: &'static str,
_fields: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Error> {
self.deserialize_map(visitor)
}
fn deserialize_enum<V: Visitor<'de>>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Error> {
self.skip_whitespace();
match self.peek_or_eof()? {
b'"' => visitor.visit_enum(UnitVariantAccess::new(self)),
b'{' => {
self.check_depth()?;
self.advance(); let value = visitor.visit_enum(VariantMapAccess::new(self))?;
self.skip_whitespace();
self.expect(b'}')?;
self.restore_depth();
Ok(value)
}
_ => Err(Error::Syntax(
"expected string or object for enum".into(),
self.line,
self.col,
)),
}
}
fn deserialize_identifier<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_str(visitor)
}
fn deserialize_ignored_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Error> {
self.deserialize_any(visitor)
}
}
struct SeqAccess<'a, 'de> {
de: &'a mut Deserializer<'de>,
first: bool,
}
impl<'a, 'de> SeqAccess<'a, 'de> {
fn new(de: &'a mut Deserializer<'de>) -> Self {
SeqAccess { de, first: true }
}
}
impl<'de> forma_core::de::SeqAccess<'de> for SeqAccess<'_, 'de> {
type Error = Error;
fn next_element_seed<T: DeserializeSeed<'de>>(
&mut self,
seed: T,
) -> Result<Option<T::Value>, Error> {
self.de.skip_whitespace();
if self.de.peek() == Some(b']') {
return Ok(None);
}
if !self.first {
self.de.expect(b',')?;
}
self.first = false;
seed.deserialize(&mut *self.de).map(Some)
}
}
struct MapAccess<'a, 'de> {
de: &'a mut Deserializer<'de>,
first: bool,
}
impl<'a, 'de> MapAccess<'a, 'de> {
fn new(de: &'a mut Deserializer<'de>) -> Self {
MapAccess { de, first: true }
}
}
impl<'de> forma_core::de::MapAccess<'de> for MapAccess<'_, 'de> {
type Error = Error;
fn next_key_seed<K: DeserializeSeed<'de>>(
&mut self,
seed: K,
) -> Result<Option<K::Value>, Error> {
self.de.skip_whitespace();
if self.de.peek() == Some(b'}') {
return Ok(None);
}
if !self.first {
self.de.expect(b',')?;
}
self.first = false;
seed.deserialize(&mut *self.de).map(Some)
}
fn next_value_seed<V: DeserializeSeed<'de>>(
&mut self,
seed: V,
) -> Result<V::Value, Error> {
self.de.expect(b':')?;
seed.deserialize(&mut *self.de)
}
}
struct UnitVariantAccess<'a, 'de> {
de: &'a mut Deserializer<'de>,
}
impl<'a, 'de> UnitVariantAccess<'a, 'de> {
fn new(de: &'a mut Deserializer<'de>) -> Self {
UnitVariantAccess { de }
}
}
impl<'de> forma_core::de::EnumAccess<'de> for UnitVariantAccess<'_, 'de> {
type Error = Error;
type Variant = UnitOnly;
fn variant_seed<V: DeserializeSeed<'de>>(
self,
seed: V,
) -> Result<(V::Value, UnitOnly), Error> {
let variant = seed.deserialize(&mut *self.de)?;
Ok((variant, UnitOnly))
}
}
struct UnitOnly;
impl<'de> forma_core::de::VariantAccess<'de> for UnitOnly {
type Error = Error;
fn unit_variant(self) -> Result<(), Error> {
Ok(())
}
fn newtype_variant_seed<T: DeserializeSeed<'de>>(self, _seed: T) -> Result<T::Value, Error> {
Err(Error::Message(
"expected unit variant, got newtype".into(),
))
}
fn tuple_variant<V: Visitor<'de>>(self, _len: usize, _visitor: V) -> Result<V::Value, Error> {
Err(Error::Message("expected unit variant, got tuple".into()))
}
fn struct_variant<V: Visitor<'de>>(
self,
_fields: &'static [&'static str],
_visitor: V,
) -> Result<V::Value, Error> {
Err(Error::Message(
"expected unit variant, got struct".into(),
))
}
}
struct VariantMapAccess<'a, 'de> {
de: &'a mut Deserializer<'de>,
}
impl<'a, 'de> VariantMapAccess<'a, 'de> {
fn new(de: &'a mut Deserializer<'de>) -> Self {
VariantMapAccess { de }
}
}
impl<'a, 'de> forma_core::de::EnumAccess<'de> for VariantMapAccess<'a, 'de> {
type Error = Error;
type Variant = VariantContentAccess<'a, 'de>;
fn variant_seed<V: DeserializeSeed<'de>>(
self,
seed: V,
) -> Result<(V::Value, VariantContentAccess<'a, 'de>), Error> {
let variant = seed.deserialize(&mut *self.de)?;
self.de.expect(b':')?;
Ok((variant, VariantContentAccess { de: self.de }))
}
}
struct VariantContentAccess<'a, 'de> {
de: &'a mut Deserializer<'de>,
}
impl<'de> forma_core::de::VariantAccess<'de> for VariantContentAccess<'_, 'de> {
type Error = Error;
fn unit_variant(self) -> Result<(), Error> {
forma_core::de::Deserialize::deserialize(self.de)
}
fn newtype_variant_seed<T: DeserializeSeed<'de>>(
self,
seed: T,
) -> Result<T::Value, Error> {
seed.deserialize(self.de)
}
fn tuple_variant<V: Visitor<'de>>(
self,
_len: usize,
visitor: V,
) -> Result<V::Value, Error> {
forma_core::de::Deserializer::deserialize_seq(self.de, visitor)
}
fn struct_variant<V: Visitor<'de>>(
self,
_fields: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Error> {
forma_core::de::Deserializer::deserialize_map(self.de, visitor)
}
}
struct SingleElementSeq<'a, 'de> {
de: &'a mut Deserializer<'de>,
done: bool,
}
impl<'de> forma_core::de::SeqAccess<'de> for SingleElementSeq<'_, 'de> {
type Error = Error;
fn next_element_seed<T: DeserializeSeed<'de>>(
&mut self,
seed: T,
) -> Result<Option<T::Value>, Error> {
if self.done {
return Ok(None);
}
self.done = true;
seed.deserialize(&mut *self.de).map(Some)
}
}