use decoded_char::DecodedChar;
use locspan::{Meta, Span};
use std::{fmt, io};
mod array;
mod boolean;
mod null;
mod number;
mod object;
mod string;
mod value;
use crate::CodeMap;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct Options {
pub accept_truncated_surrogate_pair: bool,
pub accept_invalid_codepoints: bool,
}
impl Options {
pub fn strict() -> Self {
Self {
accept_truncated_surrogate_pair: false,
accept_invalid_codepoints: false,
}
}
pub fn flexible() -> Self {
Self {
accept_truncated_surrogate_pair: true,
accept_invalid_codepoints: true,
}
}
}
impl Default for Options {
fn default() -> Self {
Self::strict()
}
}
pub trait Parse: Sized {
fn parse_slice(content: &[u8]) -> Result<(Self, CodeMap), Error> {
Self::parse_utf8(utf8_decode::Decoder::new(content.iter().copied()))
.map_err(Error::io_into_utf8)
}
fn parse_slice_with(content: &[u8], options: Options) -> Result<(Self, CodeMap), Error> {
Self::parse_utf8_with(utf8_decode::Decoder::new(content.iter().copied()), options)
.map_err(Error::io_into_utf8)
}
fn parse_str(content: &str) -> Result<(Self, CodeMap), Error> {
Self::parse_utf8(content.chars().map(Ok))
}
fn parse_str_with(content: &str, options: Options) -> Result<(Self, CodeMap), Error> {
Self::parse_utf8_with(content.chars().map(Ok), options)
}
fn parse_infallible_utf8<C>(chars: C) -> Result<(Self, CodeMap), Error>
where
C: Iterator<Item = char>,
{
Self::parse_infallible(chars.map(DecodedChar::from_utf8))
}
fn parse_utf8_infallible_with<C>(chars: C, options: Options) -> Result<(Self, CodeMap), Error>
where
C: Iterator<Item = char>,
{
Self::parse_infallible_with(chars.map(DecodedChar::from_utf8), options)
}
fn parse_utf8<C, E>(chars: C) -> Result<(Self, CodeMap), Error<E>>
where
C: Iterator<Item = Result<char, E>>,
{
Self::parse(chars.map(|c| c.map(DecodedChar::from_utf8)))
}
fn parse_utf8_with<C, E>(chars: C, options: Options) -> Result<(Self, CodeMap), Error<E>>
where
C: Iterator<Item = Result<char, E>>,
{
Self::parse_with(chars.map(|c| c.map(DecodedChar::from_utf8)), options)
}
fn parse_infallible<C>(chars: C) -> Result<(Self, CodeMap), Error>
where
C: Iterator<Item = DecodedChar>,
{
let mut parser = Parser::new(chars.map(Ok));
let value = Self::parse_in(&mut parser, Context::None)?.into_value();
Ok((value, parser.code_map))
}
fn parse_infallible_with<C>(chars: C, options: Options) -> Result<(Self, CodeMap), Error>
where
C: Iterator<Item = DecodedChar>,
{
let mut parser = Parser::new_with(chars.map(Ok), options);
let value = Self::parse_in(&mut parser, Context::None)?.into_value();
Ok((value, parser.code_map))
}
fn parse<C, E>(chars: C) -> Result<(Self, CodeMap), Error<E>>
where
C: Iterator<Item = Result<DecodedChar, E>>,
{
let mut parser = Parser::new(chars);
let value = Self::parse_in(&mut parser, Context::None)?.into_value();
Ok((value, parser.code_map))
}
fn parse_with<C, E>(chars: C, options: Options) -> Result<(Self, CodeMap), Error<E>>
where
C: Iterator<Item = Result<DecodedChar, E>>,
{
let mut parser = Parser::new_with(chars, options);
let value = Self::parse_in(&mut parser, Context::None)?.into_value();
Ok((value, parser.code_map))
}
fn parse_in<C, E>(
parser: &mut Parser<C, E>,
context: Context,
) -> Result<Meta<Self, usize>, Error<E>>
where
C: Iterator<Item = Result<DecodedChar, E>>;
}
pub struct Parser<C: Iterator<Item = Result<DecodedChar, E>>, E> {
chars: C,
pending: Option<DecodedChar>,
position: usize,
options: Options,
code_map: CodeMap,
}
#[inline(always)]
pub fn is_whitespace(c: char) -> bool {
matches!(c, ' ' | '\t' | '\r' | '\n')
}
impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Parser<C, E> {
pub fn new(chars: C) -> Self {
Self {
chars,
pending: None,
position: 0,
options: Options::default(),
code_map: CodeMap::default(),
}
}
pub fn new_with(chars: C, options: Options) -> Self {
Self {
chars,
pending: None,
position: 0,
options,
code_map: CodeMap::default(),
}
}
fn begin_fragment(&mut self) -> usize {
self.code_map.reserve(self.position)
}
fn end_fragment(&mut self, i: usize) {
let entry_count = self.code_map.len();
let entry = self.code_map.get_mut(i).unwrap();
entry.span.set_end(self.position);
entry.volume = entry_count - i;
}
fn peek_char(&mut self) -> Result<Option<char>, Error<E>> {
match self.pending {
Some(c) => Ok(Some(c.chr())),
None => match self.chars.next() {
Some(Ok(c)) => {
self.pending = Some(c);
Ok(Some(c.chr()))
}
Some(Err(e)) => Err(Error::Stream(self.position, e)),
None => Ok(None),
},
}
}
fn next_char(&mut self) -> Result<(usize, Option<char>), Error<E>> {
let c = match self.pending.take() {
Some(c) => Some(c),
None => self
.chars
.next()
.transpose()
.map_err(|e| Error::Stream(self.position, e))?,
};
let p = self.position;
let c = c.map(|c| {
self.position += c.len();
c.chr()
});
Ok((p, c))
}
fn skip_whitespaces(&mut self) -> Result<(), Error<E>> {
while let Some(c) = self.peek_char()? {
if is_whitespace(c) {
self.next_char()?;
} else {
break;
}
}
Ok(())
}
}
#[derive(Debug)]
pub enum Error<E = core::convert::Infallible> {
Stream(usize, E),
Unexpected(usize, Option<char>),
InvalidUnicodeCodePoint(Span, u32),
MissingLowSurrogate(Span, u16),
InvalidLowSurrogate(Span, u16, u32),
InvalidUtf8(usize),
}
impl<E> Error<E> {
#[inline(always)]
fn unexpected(position: usize, c: Option<char>) -> Self {
Self::Unexpected(position, c)
}
pub fn position(&self) -> usize {
match self {
Self::Stream(p, _) => *p,
Self::Unexpected(p, _) => *p,
Self::InvalidUnicodeCodePoint(span, _) => span.start(),
Self::MissingLowSurrogate(span, _) => span.start(),
Self::InvalidLowSurrogate(span, _, _) => span.start(),
Self::InvalidUtf8(p) => *p,
}
}
pub fn span(&self) -> Span {
match self {
Self::Stream(p, _) => Span::new(*p, *p),
Self::Unexpected(p, _) => Span::new(*p, *p),
Self::InvalidUnicodeCodePoint(span, _) => *span,
Self::MissingLowSurrogate(span, _) => *span,
Self::InvalidLowSurrogate(span, _, _) => *span,
Self::InvalidUtf8(p) => Span::new(*p, *p),
}
}
}
impl Error<io::Error> {
fn io_into_utf8(self) -> Error {
match self {
Self::Stream(p, _) => Error::InvalidUtf8(p),
Self::Unexpected(p, e) => Error::Unexpected(p, e),
Self::InvalidUnicodeCodePoint(s, e) => Error::InvalidUnicodeCodePoint(s, e),
Self::MissingLowSurrogate(s, e) => Error::MissingLowSurrogate(s, e),
Self::InvalidLowSurrogate(s, a, b) => Error::InvalidLowSurrogate(s, a, b),
Self::InvalidUtf8(p) => Error::InvalidUtf8(p),
}
}
}
impl<E: fmt::Display> fmt::Display for Error<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Stream(_, e) => e.fmt(f),
Self::Unexpected(_, Some(c)) => write!(f, "unexpected character `{}`", c),
Self::Unexpected(_, None) => write!(f, "unexpected end of file"),
Self::InvalidUnicodeCodePoint(_, c) => write!(f, "invalid Unicode code point {:x}", *c),
Self::MissingLowSurrogate(_, _) => write!(f, "missing low surrogate"),
Self::InvalidLowSurrogate(_, _, _) => write!(f, "invalid low surrogate"),
Self::InvalidUtf8(_) => write!(f, "invalid UTF-8"),
}
}
}
impl<E: 'static + std::error::Error> std::error::Error for Error<E> {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Stream(_, e) => Some(e),
_ => None,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Context {
None,
Array,
ObjectKey,
ObjectValue,
}
impl Context {
pub fn follows(&self, c: char) -> bool {
match self {
Self::None => is_whitespace(c),
Self::Array => is_whitespace(c) || matches!(c, ',' | ']'),
Self::ObjectKey => is_whitespace(c) || matches!(c, ':'),
Self::ObjectValue => is_whitespace(c) || matches!(c, ',' | '}'),
}
}
}