use crate::{
Pos,
lexical::{self, Error as _, Token},
};
use bitvec::prelude::*;
use std::{fmt, iter::Take, sync::Arc};
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[repr(u8)]
pub enum StructKind {
Arr = 0,
Obj = 1,
}
impl From<StructKind> for bool {
fn from(value: StructKind) -> Self {
(value as u8) == 1
}
}
impl From<bool> for StructKind {
fn from(value: bool) -> Self {
match value {
false => StructKind::Arr,
true => StructKind::Obj,
}
}
}
impl From<BitRef<'_>> for StructKind {
fn from(value: BitRef<'_>) -> Self {
let value: bool = *value;
value.into()
}
}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Expect {
ArrElementOrEnd,
ArrElementSepOrEnd,
Eof,
ObjName,
ObjNameOrEnd,
ObjNameSep,
ObjValueSepOrEnd,
#[default]
Value,
}
impl Expect {
pub const fn allowed_tokens(&self) -> &'static [Token] {
match self {
Expect::Value => &[
Token::ArrBegin,
Token::LitFalse,
Token::LitNull,
Token::LitTrue,
Token::Num,
Token::ObjBegin,
Token::Str,
],
Expect::ObjName => &[Token::Str],
Expect::ObjNameOrEnd => &[Token::Str, Token::ObjEnd],
Expect::ObjNameSep => &[Token::NameSep],
Expect::ObjValueSepOrEnd => &[Token::ObjEnd, Token::ValueSep],
Expect::ArrElementOrEnd => &[
Token::ArrBegin,
Token::ArrEnd,
Token::LitFalse,
Token::LitNull,
Token::LitTrue,
Token::Num,
Token::ObjBegin,
Token::Str,
],
Expect::ArrElementSepOrEnd => &[Token::ArrEnd, Token::ValueSep],
Expect::Eof => &[],
}
}
}
impl fmt::Display for Expect {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::ArrElementOrEnd => "array element or ]",
Self::ArrElementSepOrEnd => ", or ]",
Self::Eof => "EOF",
Self::ObjName => "object member name",
Self::ObjNameOrEnd => "object member name or }",
Self::ObjNameSep => ":",
Self::ObjValueSepOrEnd => ", or }",
Self::Value => "value",
};
write!(f, "{s}")
}
}
const INLINE_LEN_BYTES: usize = 16;
const INLINE_LEN_USIZES: usize = INLINE_LEN_BYTES / std::mem::size_of::<usize>();
const NUM_INLINED_LEVELS: usize = INLINE_LEN_BYTES * 8;
#[derive(Clone, Debug)]
enum StructContext {
Inline(usize, BitArray<[usize; INLINE_LEN_USIZES]>),
Heap(BitVec),
}
impl StructContext {
fn push(&mut self, s: StructKind) {
match self {
StructContext::Inline(len, array) => {
if *len < array.len() {
array.set(*len, s.into());
*len += 1;
} else {
let mut v = BitVec::with_capacity(2 * array.len());
v.extend_from_bitslice(array);
v.push(s.into());
*self = StructContext::Heap(v);
}
}
StructContext::Heap(v) => v.push(s.into()),
}
}
fn pop(&mut self) {
match self {
StructContext::Inline(len, _) => *len -= 1,
StructContext::Heap(v) => {
v.pop().unwrap();
}
};
}
fn peek(&self) -> Option<StructKind> {
match self {
StructContext::Inline(0, _) => None,
StructContext::Inline(len, array) => Some(array[*len - 1].into()),
StructContext::Heap(v) => v.last().map(Into::into),
}
}
fn level(&self) -> usize {
match self {
StructContext::Inline(len, _) => *len,
StructContext::Heap(v) => v.len(),
}
}
fn is_struct(&self) -> bool {
self.level() > 0
}
fn iter(&self) -> StructIter<bitvec::slice::Iter<'_, usize, Lsb0>> {
StructIter(match self {
StructContext::Inline(len, array) => array[0..*len].iter(),
StructContext::Heap(v) => v.iter(),
})
}
}
impl IntoIterator for StructContext {
type Item = bool;
type IntoIter = StructContextIntoIter;
fn into_iter(self) -> Self::IntoIter {
match self {
Self::Inline(len, array) => StructContextIntoIter::Inline(array.into_iter().take(len)),
Self::Heap(v) => StructContextIntoIter::Heap(v.into_iter()),
}
}
}
impl PartialEq<StructContext> for StructContext {
fn eq(&self, other: &StructContext) -> bool {
match (self, other) {
(Self::Inline(m, a), Self::Inline(n, b)) => m == n && a[..*m] == b[..*m],
(Self::Inline(m, a), Self::Heap(w)) => *m == w.len() && &a[..*m] == w,
(Self::Heap(v), Self::Inline(n, b)) => v.len() == *n && &b[..*n] == v,
(Self::Heap(v), Self::Heap(w)) => v == w,
}
}
}
impl Eq for StructContext {}
#[doc(hidden)]
pub enum StructContextIntoIter {
Inline(Take<<BitArray<[usize; INLINE_LEN_USIZES]> as IntoIterator>::IntoIter>),
Heap(<BitVec as IntoIterator>::IntoIter),
}
impl Iterator for StructContextIntoIter {
type Item = bool;
fn next(&mut self) -> Option<Self::Item> {
match self {
StructContextIntoIter::Inline(i) => i.next(),
StructContextIntoIter::Heap(i) => i.next(),
}
}
}
impl ExactSizeIterator for StructContextIntoIter {
fn len(&self) -> usize {
match self {
StructContextIntoIter::Inline(i) => i.len(),
StructContextIntoIter::Heap(i) => i.len(),
}
}
}
impl Default for StructContext {
fn default() -> Self {
Self::Inline(0, BitArray::new([0usize; INLINE_LEN_USIZES]))
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Context {
inner: StructContext,
expect: Expect,
}
impl Context {
pub fn expect(&self) -> Expect {
self.expect
}
pub fn level(&self) -> usize {
self.inner.level()
}
pub fn is_struct(&self) -> bool {
self.inner.is_struct()
}
pub fn struct_kind(&self) -> Option<StructKind> {
self.inner.peek()
}
pub fn iter(&self) -> StructIter<bitvec::slice::Iter<'_, usize, Lsb0>> {
self.inner.iter()
}
}
impl IntoIterator for Context {
type Item = StructKind;
type IntoIter = StructIter<StructContextIntoIter>;
fn into_iter(self) -> Self::IntoIter {
StructIter(self.inner.into_iter())
}
}
pub struct StructIter<I>(I);
impl<I> Iterator for StructIter<I>
where
I: Iterator,
I::Item: Into<StructKind>,
{
type Item = StructKind;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(Into::<StructKind>::into)
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
impl<I> ExactSizeIterator for StructIter<I>
where
I: ExactSizeIterator,
I::Item: Into<StructKind>,
{
fn len(&self) -> usize {
self.0.len()
}
}
enum Content {
Lazy,
Err(Error),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
Level {
level: usize,
token: lexical::Token,
},
Lexical(lexical::ErrorKind),
Syntax {
context: Context,
token: lexical::Token,
},
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Level { level, token } => {
write!(
f,
"level error: level {level} would exceed parser's configured maximum on {token}"
)
}
Self::Lexical(lexical::ErrorKind::Read) => write!(f, "read error"),
Self::Lexical(inner) => {
write!(f, "lexical error: ")?;
inner.fmt(f)
}
Self::Syntax { context, token } => {
write!(
f,
"syntax error: expected {} but got {token}",
context.expect()
)
}
}
}
}
#[derive(Debug, Clone)]
pub struct Error {
kind: ErrorKind,
pos: Pos,
source: Option<Arc<dyn std::error::Error + Send + Sync + 'static>>,
}
impl Error {
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
pub fn pos(&self) -> &Pos {
&self.pos
}
pub fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.source
.as_ref()
.map(|arc| &**arc as &(dyn std::error::Error + 'static))
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at {}", self.kind, self.pos)
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Error::source(self)
}
}
pub struct Parser<L> {
lexer: L,
context: Context,
content: Content,
max_level: usize,
}
impl<L> Parser<L>
where
L: lexical::Analyzer,
L::Error: 'static,
{
pub fn new(lexer: L) -> Self {
Self {
lexer,
context: Context::default(),
content: Content::Lazy,
max_level: NUM_INLINED_LEVELS,
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Token {
if matches!(self.content, Content::Err(_)) {
return Token::Err;
}
let mut token = self.lexer.next();
let mut content = Content::Lazy;
match (self.context.expect, token) {
(e, Token::ObjBegin) if e == Expect::Value || e == Expect::ArrElementOrEnd => {
let level = self.level();
if level < self.max_level {
self.context.inner.push(StructKind::Obj);
self.context.expect = Expect::ObjNameOrEnd;
} else {
content = Content::Err(Error {
kind: ErrorKind::Level { level, token },
pos: *self.pos(),
source: None,
});
token = Token::Err;
}
}
(e, Token::ArrBegin) if e == Expect::Value || e == Expect::ArrElementOrEnd => {
let level = self.level();
if level < self.max_level {
self.context.inner.push(StructKind::Arr);
self.context.expect = Expect::ArrElementOrEnd;
} else {
content = Content::Err(Error {
kind: ErrorKind::Level { level, token },
pos: *self.pos(),
source: None,
});
token = Token::Err;
}
}
(Expect::Value, t) | (Expect::ArrElementOrEnd, t)
if t == Token::LitFalse
|| t == Token::LitNull
|| t == Token::LitTrue
|| t == Token::Num
|| t == Token::Str =>
{
self.got_value(false);
}
(Expect::ObjName, Token::Str) => {
self.context.expect = Expect::ObjNameSep;
}
(Expect::ObjNameOrEnd, Token::ObjEnd) => {
self.got_value(true);
}
(Expect::ObjNameOrEnd, Token::Str) => {
self.context.expect = Expect::ObjNameSep;
}
(Expect::ObjNameSep, Token::NameSep) => {
self.context.expect = Expect::Value;
}
(Expect::ObjValueSepOrEnd, Token::ValueSep) => {
self.context.expect = Expect::ObjName;
}
(Expect::ObjValueSepOrEnd, Token::ObjEnd) => {
self.got_value(true);
}
(Expect::ArrElementOrEnd, Token::ArrEnd) => {
self.got_value(true);
}
(Expect::ArrElementSepOrEnd, Token::ArrEnd) => {
self.got_value(true);
}
(Expect::ArrElementSepOrEnd, Token::ValueSep) => {
self.context.expect = Expect::Value;
}
(Expect::Eof, Token::Eof) => (),
(_, Token::White) => (),
(_, Token::Err) => {
let err = self
.lexer
.try_content()
.expect_err("lexer returned error token, must contain error value");
let kind = ErrorKind::Lexical(err.kind());
let source =
Some(Arc::new(err) as Arc<dyn std::error::Error + Send + Sync + 'static>);
content = Content::Err(Error {
kind,
pos: *self.lexer.pos(),
source,
})
}
(_, _) => {
content = Content::Err(Error {
kind: ErrorKind::Syntax {
context: self.context.clone(),
token,
},
pos: *self.lexer.pos(),
source: None,
});
token = Token::Err;
}
}
self.content = content;
token
}
pub fn next_non_white(&mut self) -> Token {
let token = self.next();
if token != Token::White {
token
} else {
self.next()
}
}
pub fn next_meaningful(&mut self) -> Token {
let mut token = self.next();
loop {
match token {
Token::NameSep | Token::ValueSep | Token::White => token = self.next(),
_ => break token,
}
}
}
#[inline]
pub fn content(&self) -> L::Content {
self.try_content().unwrap()
}
#[inline]
pub fn err(&self) -> Error {
self.try_content().unwrap_err()
}
#[inline(always)]
pub fn pos(&self) -> &Pos {
self.lexer.pos()
}
pub fn try_content(&self) -> Result<L::Content, Error> {
match &self.content {
Content::Lazy => match self.lexer.try_content() {
Ok(v) => Ok(v),
Err(_) => panic!("lexer must not be in an error state"),
},
Content::Err(err) => Err(err.clone()),
}
}
pub fn context(&self) -> &Context {
&self.context
}
#[inline(always)]
pub fn level(&self) -> usize {
self.context.level()
}
pub fn max_level(&self) -> usize {
self.max_level
}
pub fn set_max_level(&mut self, max_level: usize) {
if self.level() > max_level {
panic!(
"current level {} exceeds new max level {max_level}",
self.level()
);
}
self.max_level = max_level;
}
pub fn with_max_level(lexer: L, max_level: usize) -> Self {
let mut parser = Self::new(lexer);
parser.set_max_level(max_level);
parser
}
pub fn into_inner(self) -> L {
self.lexer
}
fn got_value(&mut self, pop: bool) {
if pop {
self.context.inner.pop();
}
match self.context.inner.peek() {
Some(StructKind::Arr) => self.context.expect = Expect::ArrElementSepOrEnd,
Some(StructKind::Obj) => self.context.expect = Expect::ObjValueSepOrEnd,
None => self.context.expect = Expect::Eof,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
#[case(false, StructKind::Arr)]
#[case(true, StructKind::Obj)]
fn test_struct_kind_from_bool(#[case] t: bool, #[case] expect: StructKind) {
assert_eq!(expect, t.into());
assert_eq!(t, Into::<bool>::into(expect));
}
#[test]
fn test_struct_kind_from_bitref() {
let bits: BitArray<[u8; 1]> = bitarr![u8, Lsb0; 0, 1];
assert_eq!(StructKind::Arr, bits[0].into());
assert_eq!(StructKind::Obj, bits[1].into());
}
#[rstest]
#[case::empty(None::<StructKind>)]
#[case::array([StructKind::Arr])]
#[case::array_array([StructKind::Arr, StructKind::Arr])]
#[case::array_array_array([StructKind::Arr, StructKind::Arr, StructKind::Arr])]
#[case::array_array_object([StructKind::Arr, StructKind::Arr, StructKind::Obj])]
#[case::array_object([StructKind::Arr, StructKind::Obj])]
#[case::array_object_array([StructKind::Arr, StructKind::Obj, StructKind::Arr])]
#[case::array_object_object([StructKind::Arr, StructKind::Obj, StructKind::Obj])]
#[case::object([StructKind::Obj])]
#[case::object_array([StructKind::Obj, StructKind::Arr])]
#[case::object_array_array([StructKind::Obj, StructKind::Arr, StructKind::Arr])]
#[case::object_array_object([StructKind::Obj, StructKind::Arr, StructKind::Obj])]
#[case::object_object([StructKind::Obj, StructKind::Obj])]
#[case::object_object_array([StructKind::Obj, StructKind::Obj, StructKind::Arr])]
#[case::object_object_object([StructKind::Obj, StructKind::Obj, StructKind::Obj])]
#[case::heap(std::iter::repeat([false, true]).take(NUM_INLINED_LEVELS+3).flatten().map(Into::into))]
fn test_struct_context<I>(#[case] expect: I)
where
I: IntoIterator<Item = StructKind>,
{
let expect = expect.into_iter().collect::<Vec<_>>();
let mut ctx = StructContext::default();
assert_eq!(ctx, ctx);
assert_eq!(0, ctx.level());
assert_eq!(None, ctx.peek());
assert!(!ctx.is_struct());
assert_eq!(Vec::<StructKind>::new(), ctx.iter().collect::<Vec<_>>());
for (i, s) in expect.iter().enumerate() {
let prev_ctx = ctx.clone();
ctx.push(*s);
assert_eq!(ctx, ctx);
assert_ne!(prev_ctx, ctx);
assert_ne!(ctx, prev_ctx);
assert_eq!(i + 1, ctx.level());
assert_eq!(Some(*s), ctx.peek());
assert!(ctx.is_struct());
let progress = expect[0..=i].to_vec();
assert_eq!(progress, ctx.iter().collect::<Vec<_>>());
let iter = ctx.clone().into_iter();
assert_eq!(i + 1, iter.len());
assert_eq!(progress, iter.map(Into::into).collect::<Vec<_>>());
}
for (i, s) in expect.iter().enumerate().rev() {
assert_eq!(i + 1, ctx.level());
assert_eq!(Some(*s), ctx.peek());
assert!(ctx.is_struct());
let progress = expect[0..=i].to_vec();
assert_eq!(progress, ctx.iter().collect::<Vec<_>>());
let iter = ctx.clone().into_iter();
assert_eq!(i + 1, iter.len());
assert_eq!(progress, iter.map(Into::into).collect::<Vec<_>>());
let prev_ctx = ctx.clone();
ctx.pop();
assert_eq!(ctx, ctx);
assert_ne!(prev_ctx, ctx);
assert_ne!(ctx, prev_ctx);
}
assert_eq!(ctx, ctx);
assert_eq!(0, ctx.level());
assert_eq!(None, ctx.peek());
assert!(!ctx.is_struct());
assert_eq!(Vec::<StructKind>::new(), ctx.iter().collect::<Vec<_>>());
}
#[rstest]
#[case(Expect::ArrElementOrEnd, [Token::ArrBegin, Token::ArrEnd, Token::LitFalse, Token::LitNull, Token::LitTrue, Token::Num, Token::ObjBegin, Token::Str])]
#[case(Expect::ArrElementSepOrEnd, [Token::ArrEnd, Token::ValueSep])]
#[case(Expect::Eof, [])]
#[case(Expect::ObjName, [Token::Str])]
#[case(Expect::ObjNameOrEnd, [Token::Str, Token::ObjEnd])]
#[case(Expect::ObjNameSep, [Token::NameSep])]
#[case(Expect::ObjValueSepOrEnd, [Token::ObjEnd, Token::ValueSep])]
#[case(Expect::Value, [Token::ArrBegin, Token::LitFalse, Token::LitNull, Token::LitTrue, Token::Num, Token::ObjBegin, Token::Str])]
fn test_expect_allowed_tokens<const N: usize>(
#[case] input: Expect,
#[case] expect: [Token; N],
) {
let actual = input.allowed_tokens();
assert_eq!(expect, actual);
}
#[test]
fn temp_test_to_repro_bug_delete_or_replace_me_pls() {
let mut parser = lexical::fixed::FixedAnalyzer::new(&b"[1]"[..]).into_parser();
assert_eq!(Token::ArrBegin, parser.next());
assert_eq!("[", parser.content().literal());
assert_eq!(Token::Num, parser.next());
assert_eq!("1", parser.content().literal());
}
#[test]
fn temp_test_to_repro_bug_delete_or_replace_me_pls_2() {
let mut parser = lexical::fixed::FixedAnalyzer::new(&b"[1, 2]"[..]).into_parser();
assert_eq!(Token::ArrBegin, parser.next_meaningful());
assert_eq!("[", parser.content().literal());
assert_eq!(Token::Num, parser.next_meaningful());
assert_eq!("1", parser.content().literal());
assert_eq!(Token::Num, parser.next_meaningful());
assert_eq!("2", parser.content().literal());
}
#[test]
fn temp_test_to_repro_bug_delete_or_replace_me_pls_3() {
let mut parser = lexical::fixed::FixedAnalyzer::new(&b"[}"[..]).into_parser();
assert_eq!(Token::ArrBegin, parser.next());
assert_eq!("[", parser.content().literal());
assert_eq!(Token::Err, parser.next());
}
#[test]
fn temp_test_to_repro_bug_delete_or_replace_me_pls_4() {
let mut parser = lexical::fixed::FixedAnalyzer::new(
&br#"{"multiValueHeaders":{"foo":["bar"],"foo":["baz"]}}"#[..],
)
.into_parser();
loop {
match parser.next() {
Token::Err => panic!("{:?}", parser.err()),
Token::Str => assert!(
parser.content().literal().len() >= 2,
"literal content: {:?}",
parser.content().literal()
),
Token::Eof => break,
_ => (),
};
}
}
}