use crate::{
Buf, EqStr, IntoBuf, OrdStr, Pos,
lexical::{self, ErrorKind, Token, Unescaped, read},
syntax,
};
use bytes::{Buf as _, Bytes};
use std::{
borrow::Cow,
cmp::Ordering,
convert::Infallible,
fmt,
hash::{Hash, Hasher},
str::FromStr,
sync::Arc,
};
#[derive(Clone, Debug)]
pub struct Literal(read::Literal);
impl Literal {
pub const fn from_static(s: &'static str) -> Self {
Self(read::Literal::from_static(s))
}
pub fn from_ref<T: AsRef<str> + ?Sized>(s: &T) -> Self {
Self(read::Literal::from_ref(s))
}
pub fn from_string(s: String) -> Self {
Self(read::Literal::from_string(s))
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl IntoBuf for Literal {
type Buf = LiteralBuf;
fn into_buf(self) -> Self::Buf {
LiteralBuf(self.0.into_buf())
}
}
impl fmt::Display for Literal {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl EqStr for Literal {}
impl Eq for Literal {}
impl From<Literal> for String {
fn from(value: Literal) -> Self {
value.0.into()
}
}
impl<T: ?Sized + AsRef<str>> From<&T> for Literal {
fn from(value: &T) -> Self {
Literal::from_ref(&value)
}
}
impl<'a> From<Cow<'a, str>> for Literal {
fn from(value: Cow<'a, str>) -> Self {
match value {
Cow::Borrowed(s) => Literal::from_ref(&s),
Cow::Owned(s) => Literal::from_string(s),
}
}
}
impl From<String> for Literal {
fn from(value: String) -> Self {
Literal::from_string(value)
}
}
impl FromStr for Literal {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Literal::from_ref(&s))
}
}
impl Hash for Literal {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state)
}
}
impl Ord for Literal {
fn cmp(&self, other: &Self) -> Ordering {
Ord::cmp(&self.0, &other.0)
}
}
impl OrdStr for Literal {
fn cmp(&self, other: &str) -> Ordering {
OrdStr::cmp(&self.0, other)
}
}
impl PartialEq for Literal {
fn eq(&self, other: &Self) -> bool {
if self.len() != other.len() {
false
} else {
self.0 == other.0
}
}
}
impl PartialEq<str> for Literal {
fn eq(&self, other: &str) -> bool {
if self.len() != other.len() {
false
} else {
self.0 == other
}
}
}
impl PartialEq<&str> for Literal {
fn eq(&self, other: &&str) -> bool {
self == *other
}
}
impl PartialEq<String> for Literal {
fn eq(&self, other: &String) -> bool {
self == other.as_str()
}
}
impl PartialEq<Literal> for str {
fn eq(&self, other: &Literal) -> bool {
other == self
}
}
impl PartialEq<Literal> for &str {
fn eq(&self, other: &Literal) -> bool {
other == self
}
}
impl PartialEq<Literal> for String {
fn eq(&self, other: &Literal) -> bool {
other == self
}
}
impl PartialOrd for Literal {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(Ord::cmp(self, other))
}
}
impl PartialOrd<str> for Literal {
fn partial_cmp(&self, other: &str) -> Option<Ordering> {
Some(OrdStr::cmp(self, other))
}
}
impl PartialOrd<Literal> for str {
fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
Some(OrdStr::cmp(other, self).reverse())
}
}
impl PartialOrd<&str> for Literal {
fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
Some(OrdStr::cmp(self, other))
}
}
impl PartialOrd<Literal> for &str {
fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
Some(OrdStr::cmp(other, self).reverse())
}
}
impl PartialOrd<String> for Literal {
fn partial_cmp(&self, other: &String) -> Option<Ordering> {
self.partial_cmp(other.as_str())
}
}
impl PartialOrd<Literal> for String {
fn partial_cmp(&self, other: &Literal) -> Option<Ordering> {
self.as_str().partial_cmp(other)
}
}
pub struct LiteralBuf(read::LiteralBuf);
impl LiteralBuf {
pub fn advance(&mut self, n: usize) {
self.0.advance(n)
}
pub fn chunk(&self) -> &[u8] {
self.0.chunk()
}
pub fn remaining(&self) -> usize {
self.0.remaining()
}
pub fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
self.0.try_copy_to_slice(dst)
}
}
impl Buf for LiteralBuf {
fn advance(&mut self, n: usize) {
LiteralBuf::advance(self, n);
}
fn chunk(&self) -> &[u8] {
LiteralBuf::chunk(self)
}
fn remaining(&self) -> usize {
LiteralBuf::remaining(self)
}
fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
LiteralBuf::try_copy_to_slice(self, dst)
}
}
#[derive(Debug)]
pub struct Content(read::Content);
impl Content {
pub fn literal(&self) -> Literal {
Literal(self.0.literal().clone())
}
pub fn is_escaped(&self) -> bool {
self.0.is_escaped()
}
pub fn unescaped(&self) -> Unescaped<Literal> {
match self.0.unescaped() {
Unescaped::Literal(l) => Unescaped::Literal(Literal(l)),
Unescaped::Expanded(x) => Unescaped::Expanded(x),
}
}
}
impl fmt::Display for Content {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.literal().fmt(f)
}
}
impl super::Content for Content {
type Literal<'a> = Literal;
#[inline(always)]
fn literal<'a>(&'a self) -> Self::Literal<'a> {
Content::literal(self)
}
#[inline(always)]
fn is_escaped(&self) -> bool {
Content::is_escaped(self)
}
#[inline(always)]
fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>> {
Content::unescaped(self)
}
}
const _: [(); 24] = [(); std::mem::size_of::<Literal>()];
const _: [(); 24] = [(); std::mem::size_of::<Content>()];
#[derive(Clone, Debug)]
pub struct Error<E> {
kind: ErrorKind,
pos: Pos,
source: Option<Arc<E>>,
}
impl<E> Error<E> {
pub fn kind(&self) -> ErrorKind {
self.kind
}
pub fn pos(&self) -> &Pos {
&self.pos
}
}
impl<E> Error<E>
where
E: std::error::Error,
{
fn new_lexical(kind: ErrorKind, pos: &Pos) -> Self {
Self {
kind,
pos: *pos,
source: None,
}
}
fn new_read(
pos: &Pos,
) -> Self {
Self {
kind: ErrorKind::Read,
pos: *pos,
source: None,
}
}
}
impl<E> fmt::Display for Error<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.kind.fmt_at(f, Some(&self.pos))
}
}
impl<E> std::error::Error for Error<E>
where
E: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.source.as_ref().map(|e| &**e as &dyn std::error::Error)
}
}
impl<E> lexical::Error for Error<E>
where
E: std::error::Error + Send + Sync + 'static,
{
fn kind(&self) -> ErrorKind {
Error::kind(self)
}
fn pos(&self) -> &Pos {
Error::pos(self)
}
}
pub trait Pipe {
type Error: std::error::Error + Send + Sync + 'static;
fn recv(&mut self) -> Option<Result<Bytes, Self::Error>>;
}
impl Pipe for std::sync::mpsc::Receiver<Bytes> {
type Error = Infallible;
fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
std::sync::mpsc::Receiver::recv(self).ok().map(Ok)
}
}
struct TempReader<P> {
pipe: P,
chunk: Option<Bytes>,
}
impl<P> TempReader<P> {
fn new(pipe: P) -> Self {
Self { pipe, chunk: None }
}
}
impl<P: Pipe> std::io::Read for TempReader<P> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let chunk = if let Some(c) = self.chunk.as_mut() {
c
} else {
match self.pipe.recv() {
None => return Ok(0),
Some(Ok(b)) => {
self.chunk = Some(b);
self.chunk.as_mut().unwrap()
}
Some(Err(err)) => return Err(std::io::Error::other(err)),
}
};
let n = chunk.len().min(buf.len());
buf[..n].copy_from_slice(&chunk[..n]);
if n == chunk.len() {
self.chunk = None;
} else {
chunk.advance(n);
}
Ok(n)
}
}
pub struct PipeAnalyzer<
P: Pipe,
> {
temp_inner: read::ReadAnalyzer<TempReader<P>>,
}
impl<P: Pipe> PipeAnalyzer<P> {
pub fn new(pipe: P) -> Self {
Self {
temp_inner: read::ReadAnalyzer::new(TempReader::new(pipe)),
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Token {
self.temp_inner.next()
}
#[inline]
pub fn content(&self) -> Content {
if let Ok(content) = self.try_content() {
content
} else {
panic!("no content: last `next()` returned `Token::Err` (use `err()` instead)");
}
}
#[inline]
pub fn err(&self) -> Error<P::Error> {
if let Err(err) = self.try_content() {
err
} else {
panic!("no error: last `next()` did not return `Token::Err` (use `content()` instead)");
}
}
#[inline(always)]
pub fn pos(&self) -> &Pos {
self.temp_inner.pos()
}
pub fn try_content(&self) -> Result<Content, Error<P::Error>> {
match self.temp_inner.try_content() {
Ok(c) => Ok(Content(c)),
Err(err) if err.kind() != ErrorKind::Read => {
Err(Error::new_lexical(err.kind(), err.pos()))
}
Err(err) => Err(Error::new_read(err.pos())),
}
}
pub fn into_parser(self) -> syntax::Parser<PipeAnalyzer<P>> {
syntax::Parser::new(self)
}
}
impl<P: Pipe> lexical::Analyzer for PipeAnalyzer<P> {
type Content = Content;
type Error = Error<P::Error>;
#[inline(always)]
fn next(&mut self) -> Token {
PipeAnalyzer::next(self)
}
#[inline(always)]
fn try_content(&self) -> Result<Self::Content, Error<P::Error>> {
PipeAnalyzer::try_content(self)
}
#[inline(always)]
fn pos(&self) -> &Pos {
PipeAnalyzer::pos(self)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{IntoBuf, lexical::Expect};
use rstest::rstest;
use std::{
error::Error as _,
sync::mpsc::channel,
};
#[rstest]
#[case(Literal::from_static(""))]
#[case(Literal::from_ref(""))]
#[case(Literal::from_string("".into()))]
#[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
fn test_literal_buf_advance_panic(#[case] literal: Literal) {
let _ = literal.into_buf().advance(1);
}
#[rstest]
#[case(Literal::from_static(""))]
#[case(Literal::from_ref(""))]
#[case(Literal::from_string("".into()))]
#[should_panic(expected = "not enough bytes in buffer (1 requested, but only 0 remain)")]
fn test_literal_buf_copy_to_slice_panic(#[case] literal: Literal) {
let mut dst = [0; 1];
let _ = literal.into_buf().copy_to_slice(&mut dst);
}
#[rstest]
#[case(Error::new_lexical(ErrorKind::UnexpectedEof(Token::LitTrue), &Pos::new(3, 2, 1)), ErrorKind::UnexpectedEof(Token::LitTrue), "unexpected EOF in true token at line 2, column 1 (offset: 3)", None)]
#[case(Error::new_read(&Pos::new(3, 2, 1)), ErrorKind::Read, "read error at line 2, column 1 (offset: 3)", None)] fn test_error(
#[case] err: Error<ToyError>,
#[case] expect_kind: ErrorKind,
#[case] expect_display: &str,
#[case] expect_source: Option<ToyError>,
) {
let pos = Pos::new(3, 2, 1);
assert_eq!(expect_kind, err.kind());
assert_eq!(&pos, err.pos());
assert_eq!(
expect_source.as_ref(),
err.source().and_then(|e| e.downcast_ref::<ToyError>())
);
let actual_display = format!("{err}");
assert_eq!(expect_display, actual_display);
}
#[test]
fn test_analyzer_empty() {
let (tx, rx) = channel();
let mut an = PipeAnalyzer::new(rx);
drop(tx);
assert_eq!(an.next(), Token::Eof);
assert_eq!("", an.content().literal().into_string());
assert_eq!("", an.content().unescaped().into_string());
}
#[test]
fn test_analyzer_initial_state_content() {
let (_, rx) = channel();
let an = PipeAnalyzer::new(rx);
for _ in 0..5 {
let content = an.content();
assert_eq!("", content.literal().into_string());
assert!(!content.is_escaped());
assert_eq!("", content.unescaped().into_string());
let content = an.try_content().unwrap();
assert_eq!("", content.literal().into_string());
assert!(!content.is_escaped());
assert_eq!("", content.unescaped().into_string());
}
}
#[test]
#[should_panic(
expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
)]
fn test_analyzer_initial_state_err() {
let (_, rx) = channel();
let _ = PipeAnalyzer::new(rx).err();
}
#[rstest]
#[case("", Token::Eof, None)]
#[case("{", Token::ObjBegin, None)]
#[case("}", Token::ObjEnd, None)]
#[case("[", Token::ArrBegin, None)]
#[case("]", Token::ArrEnd, None)]
#[case(":", Token::NameSep, None)]
#[case(",", Token::ValueSep, None)]
#[case("false", Token::LitFalse, None)]
#[case("null", Token::LitNull, None)]
#[case("true", Token::LitTrue, None)]
#[case("0", Token::Num, None)]
#[case("-0", Token::Num, None)]
#[case("1", Token::Num, None)]
#[case("-1", Token::Num, None)]
#[case("12", Token::Num, None)]
#[case("-12", Token::Num, None)]
#[case("0.0", Token::Num, None)]
#[case("-0.0", Token::Num, None)]
#[case("0.123456789", Token::Num, None)]
#[case("-123.456789", Token::Num, None)]
#[case("0E0", Token::Num, None)]
#[case("0e0", Token::Num, None)]
#[case("0E+0", Token::Num, None)]
#[case("0e+0", Token::Num, None)]
#[case("0E-0", Token::Num, None)]
#[case("0e-0", Token::Num, None)]
#[case("0.0E0", Token::Num, None)]
#[case("0.0e0", Token::Num, None)]
#[case("0.0E+0", Token::Num, None)]
#[case("0.0e+0", Token::Num, None)]
#[case("0.0E0", Token::Num, None)]
#[case("0.0e0", Token::Num, None)]
#[case("0E0", Token::Num, None)]
#[case("0e0", Token::Num, None)]
#[case("-0E+0", Token::Num, None)]
#[case("-0e+0", Token::Num, None)]
#[case("-0E-0", Token::Num, None)]
#[case("-0e-0", Token::Num, None)]
#[case("-0.0E0", Token::Num, None)]
#[case("-0.0e0", Token::Num, None)]
#[case("-0.0E+0", Token::Num, None)]
#[case("-0.0e+0", Token::Num, None)]
#[case("-0.0E0", Token::Num, None)]
#[case("-0.0e0", Token::Num, None)]
#[case("123E456", Token::Num, None)]
#[case("123e456", Token::Num, None)]
#[case("123.456E+7", Token::Num, None)]
#[case("123.456e+7", Token::Num, None)]
#[case("123.456E-89", Token::Num, None)]
#[case("123.456e-89", Token::Num, None)]
#[case("-123E456", Token::Num, None)]
#[case("-123e456", Token::Num, None)]
#[case("-123.456E+7", Token::Num, None)]
#[case("-123.456e+7", Token::Num, None)]
#[case("-123.456E-89", Token::Num, None)]
#[case("-123.456e-89", Token::Num, None)]
#[case(r#""""#, Token::Str, None)]
#[case(r#"" ""#, Token::Str, None)]
#[case(r#""foo""#, Token::Str, None)]
#[case(r#""The quick brown fox jumped over the lazy dog!""#, Token::Str, None)]
#[case(r#""\\""#, Token::Str, Some(r#""\""#))]
#[case(r#""\/""#, Token::Str, Some(r#""/""#))]
#[case(r#""\t""#, Token::Str, Some("\"\t\""))]
#[case(r#""\r""#, Token::Str, Some("\"\r\""))]
#[case(r#""\n""#, Token::Str, Some("\"\n\""))]
#[case(r#""\f""#, Token::Str, Some("\"\u{000c}\""))]
#[case(r#""\b""#, Token::Str, Some("\"\u{0008}\""))]
#[case(r#""\u0000""#, Token::Str, Some("\"\u{0000}\""))]
#[case(r#""\u001f""#, Token::Str, Some("\"\u{001f}\""))]
#[case(r#""\u0020""#, Token::Str, Some(r#"" ""#))]
#[case(r#""\u007E""#, Token::Str, Some(r#""~""#))]
#[case(r#""\u007F""#, Token::Str, Some("\"\u{007f}\""))]
#[case(r#""\u0080""#, Token::Str, Some("\"\u{0080}\""))]
#[case(r#""\u0100""#, Token::Str, Some("\"\u{0100}\""))]
#[case(r#""\uE000""#, Token::Str, Some("\"\u{e000}\""))]
#[case(r#""\ufDCf""#, Token::Str, Some("\"\u{fdcf}\""))]
#[case(r#""\uFdeF""#, Token::Str, Some("\"\u{fdef}\""))]
#[case(r#""\ufffd""#, Token::Str, Some("\"\u{fffd}\""))]
#[case(r#""\uFFFE""#, Token::Str, Some("\"\u{fffe}\""))]
#[case(r#""\uFFFF""#, Token::Str, Some("\"\u{ffff}\""))]
#[case(r#""\ud800\udc00""#, Token::Str, Some("\"\u{10000}\""))] #[case(r#""\uD800\uDFFF""#, Token::Str, Some("\"\u{103ff}\""))] #[case(r#""\uDBFF\uDC00""#, Token::Str, Some("\"\u{10fc00}\""))] #[case(r#""\udbFf\udfff""#, Token::Str, Some("\"\u{10ffff}\""))] #[case(" ", Token::White, None)]
#[case("\t", Token::White, None)]
#[case(" ", Token::White, None)]
#[case("\t\t", Token::White, None)]
#[case(" \t \t \t \t\t", Token::White, None)]
fn test_analyzer_single_token(
#[case] input: &str,
#[case] expect: Token,
#[case] unescaped: Option<&str>,
) {
const CHUNK_SIZES: [usize; 3] = [
1, 2,
10,
];
for chunk_size in CHUNK_SIZES {
{
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
assert_eq!(Pos::default(), *an.pos());
assert_eq!(expect, an.next());
assert_eq!(Pos::default(), *an.pos());
let content = an.content();
assert_eq!(
input,
content.literal().into_string(),
"chunk_size = {chunk_size}, input = {input:?}, content = {content}"
);
assert_eq!(unescaped.is_some(), content.is_escaped());
if let Some(u) = unescaped {
assert_eq!(u, content.unescaped().into_string());
} else {
assert_eq!(input, content.unescaped().into_string());
}
assert_eq!(Token::Eof, an.next());
assert_eq!(
Pos {
offset: input.len(),
line: 1,
col: input.len() + 1,
},
*an.pos()
);
assert_eq!(Token::Eof, an.next());
assert_eq!(
Pos {
offset: input.len(),
line: 1,
col: input.len() + 1,
},
*an.pos()
);
}
{
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
assert_eq!(Pos::default(), *an.pos());
assert_eq!(expect, an.next());
assert_eq!(Pos::default(), *an.pos());
assert_eq!(Token::Eof, an.next());
assert_eq!(
Pos {
offset: input.len(),
line: 1,
col: input.len() + 1,
},
*an.pos()
);
assert_eq!(Token::Eof, an.next());
assert_eq!(
Pos {
offset: input.len(),
line: 1,
col: input.len() + 1,
},
*an.pos()
);
}
}
}
#[rstest]
#[case(r#"["#)]
#[case(r#"]"#)]
#[case(r#"false"#)]
#[case(r#":"#)]
#[case(r#"null"#)]
#[case(r#"3.14159e+0"#)]
#[case(r#"{"#)]
#[case(r#"}"#)]
#[case(r#""foo\/\u1234\/bar""#)]
#[case(r#"true"#)]
#[case(r#","#)]
#[case("\n\n\n ")]
#[should_panic(
expected = "no error: last `next()` did not return `Token::Err` (use `content()` instead)"
)]
fn test_analyzer_single_token_panic_no_err(#[case] input: &str) {
const CHUNK_SIZES: [usize; 3] = [
1, 2,
10,
];
for chunk_size in CHUNK_SIZES {
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_bytes()));
let token = an.next();
assert!(!token.is_terminal(), "input = {input:?}, token = {token:?}");
let _ = an.err();
}
}
#[test]
#[should_panic(expected = "last `next()` returned `Token::Err` (use `err()` instead)")]
fn test_analyzer_single_error_panic_no_content() {
let mut an = PipeAnalyzer::new(SlicePipe::new(1, &b"a"[..]));
assert_eq!(Token::Err, an.next());
let _ = an.content();
}
#[rstest]
#[case(r#""\uDC00""#, ErrorKind::BadSurrogate { first: 0xdc00, second: None, offset: 5 }, 1)]
#[case(&[b'"', 0xc2, 0xc0], ErrorKind::BadUtf8ContByte { seq_len: 2, offset: 1, value: 0xc0 }, 1)]
#[case(&b"\"\x80", ErrorKind::UnexpectedByte { token: Some(Token::Str), expect: Expect::StrChar, actual: 0x80 }, 1)]
#[case([b'"'], ErrorKind::UnexpectedEof(Token::Str), 1)]
#[case("10.", ErrorKind::UnexpectedEof(Token::Num), 3)]
fn test_analyzer_single_lexical_error<T>(
#[case] input: T,
#[case] kind: ErrorKind,
#[case] pos_offset: usize,
) where
T: AsRef<[u8]> + fmt::Debug,
{
const CHUNK_SIZES: [usize; 3] = [
1, 2,
// TODO: FIXME: uncomment below after refactor
// INLINE_LEN - 1,
// INLINE_LEN,
// INLINE_LEN + 1,
10,
// TODO: FIXME: uncomment below after refactor
// Bufs::DEFAULT_BUF_SIZE,
];
for chunk_size in CHUNK_SIZES {
// With error fetch.
{
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
assert_eq!(Pos::default(), *an.pos());
assert_eq!(Token::Err, an.next());
assert_eq!(Pos::default(), *an.pos());
let err = an.err();
assert_eq!(kind, err.kind());
assert_eq!(
Pos {
offset: pos_offset,
line: 1,
col: pos_offset + 1
},
*err.pos()
);
assert!(err.source().is_none());
assert_eq!(Token::Err, an.next());
assert_eq!(Pos::default(), *an.pos());
}
// Without error fetch.
{
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, input.as_ref()));
assert_eq!(Pos::default(), *an.pos());
assert_eq!(Token::Err, an.next());
assert_eq!(Pos::default(), *an.pos());
assert_eq!(Token::Err, an.next());
assert_eq!(Pos::default(), *an.pos());
}
}
}
#[rstest]
#[case(1, r#"{"#, [Token::ObjBegin], Pos::new(1, 1, 2), Pos::new(1, 1, 2))]
#[case(1, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
#[case(2, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
// TODO: FIXME: Uncomment after refactor
// #[case(Bufs::DEFAULT_BUF_SIZE, r#"fals"#, [], Pos::default(), Pos::new(4, 1, 5))]
#[case(1, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
#[case(2, r#"[3.141592653589793238462643383279"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(33, 1, 34))]
#[case(1, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
#[case(2, r#"[3.141592653589793238462643383279,"#, [Token::ArrBegin, Token::Num, Token::ValueSep], Pos::new(34, 1, 35), Pos::new(34, 1, 35))]
// TODO: FIXME: Uncomment after refactor
// #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
// #[case(INLINE_LEN-1, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
// #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
// #[case(INLINE_LEN, r#"[314.1592653589793238462643383279e-2 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(38, 1, 39), Pos::new(38, 1, 39))]
// #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279e-2"#, [Token::ArrBegin], Pos::new(1, 1, 2), Pos::new(36, 1, 37))]
// #[case(INLINE_LEN+1, r#"[314.1592653589793238462643383279E+999 :"#, [Token::ArrBegin, Token::Num, Token::White, Token::NameSep], Pos::new(40, 1, 41), Pos::new(40, 1, 41))]
// #[case(Bufs::DEFAULT_BUF_SIZE, r#"[3141.592653589793238462643383279e-3,{"aaaaaaaaaaaaaaaaaaaaaaaaaaaa":true}] "#, [Token::ArrBegin, Token::Num, Token::ValueSep, Token::ObjBegin, Token::Str, Token::NameSep, Token::LitTrue, Token::ObjEnd, Token::ArrEnd], Pos::new(75, 1, 76), Pos::new(79, 1, 80))]
fn test_analyzer_single_read_error<T>(
#[case] chunk_size: usize,
#[case] input: &str,
#[case] expect_tokens: T,
#[case] expect_token_pos: Pos,
#[case] expect_err_pos: Pos,
) where
T: IntoIterator<Item = Token>,
{
#[derive(Debug)]
struct PipeError;
impl fmt::Display for PipeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("there's an error in the pipe!")
}
}
impl std::error::Error for PipeError {}
struct ErrorPipe<'a> {
chunk_size: usize,
input: &'a [u8],
}
impl<'a> ErrorPipe<'a> {
fn new(chunk_size: usize, input: &'a [u8]) -> Self {
assert!(chunk_size > 0);
Self { chunk_size, input }
}
}
impl<'a> Pipe for ErrorPipe<'a> {
type Error = PipeError;
fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
if self.input.len() > 0 {
let n = self.input.len().min(self.chunk_size);
let b = self.input[..n].to_vec().into();
self.input = &self.input[n..];
Some(Ok(b))
} else {
Some(Err(PipeError))
}
}
}
let mut an = PipeAnalyzer::new(ErrorPipe::new(chunk_size, input.as_bytes()));
for expect_token in expect_tokens.into_iter() {
let actual_token = an.next();
assert_eq!(expect_token, actual_token);
}
assert_eq!(Token::Err, an.next());
assert_eq!(expect_token_pos, *an.pos());
let err = an.err();
assert_eq!(ErrorKind::Read, err.kind());
assert_eq!(expect_err_pos, *err.pos());
assert_eq!(Token::Err, an.next());
assert_eq!(expect_token_pos, *an.pos());
let err = an.try_content().unwrap_err();
assert_eq!(ErrorKind::Read, err.kind());
assert_eq!(expect_err_pos, *err.pos());
// TODO: FIXME: Uncomment below after refactor
// assert!(
// err.source()
// .and_then(|e| e.downcast_ref::<PipeError>())
// .is_some()
// );
assert_eq!(Token::Err, an.next());
}
#[rstest]
#[case(1)]
#[case(2)]
// TODO: FIXME: Uncomment below after refactor
// #[case(INLINE_LEN - 1)]
// #[case(INLINE_LEN)]
// #[case(INLINE_LEN + 1)]
// #[case(Bufs::DEFAULT_BUF_SIZE)]
fn test_analyzer_into_parser(#[case] chunk_size: usize) {
const INPUT: &str = r#"{"hello":["🌍"]}"#;
let mut parser =
PipeAnalyzer::new(SlicePipe::new(chunk_size, INPUT.as_bytes())).into_parser();
assert_eq!(Token::ObjBegin, parser.next());
assert_eq!("{", parser.content().literal());
assert_eq!(Pos::default(), *parser.pos());
assert_eq!(1, parser.level());
assert_eq!(Token::Str, parser.next());
assert_eq!(r#""hello""#, parser.content().literal());
assert_eq!(Pos::new(1, 1, 2), *parser.pos());
assert_eq!(1, parser.level());
assert_eq!(Token::NameSep, parser.next());
assert_eq!(":", parser.content().literal());
assert_eq!(Pos::new(8, 1, 9), *parser.pos());
assert_eq!(1, parser.level());
assert_eq!(Token::ArrBegin, parser.next());
assert_eq!("[", parser.content().literal());
assert_eq!(Pos::new(9, 1, 10), *parser.pos());
assert_eq!(2, parser.level());
assert_eq!(Token::Str, parser.next());
assert_eq!(r#""🌍""#, parser.content().literal());
assert_eq!(Pos::new(10, 1, 11), *parser.pos());
assert_eq!(2, parser.level());
assert_eq!(Token::ArrEnd, parser.next());
assert_eq!("]", parser.content().literal());
assert_eq!(Pos::new(16, 1, 14), *parser.pos());
assert_eq!(1, parser.level());
assert_eq!(Token::ObjEnd, parser.next());
assert_eq!("}", parser.content().literal());
assert_eq!(Pos::new(17, 1, 15), *parser.pos());
assert_eq!(0, parser.level());
for _ in 0..5 {
assert_eq!(Token::Eof, parser.next());
assert_eq!(Pos::new(18, 1, 16), *parser.pos());
assert_eq!(0, parser.level());
}
}
#[rstest]
#[case(1)]
#[case(2)]
// TODO: FIXME: Uncomment below after refactor
// #[case(INLINE_LEN - 1)]
// #[case(INLINE_LEN)]
// #[case(INLINE_LEN + 1)]
// #[case(Bufs::DEFAULT_BUF_SIZE)]
fn test_analyzer_smoke(#[case] chunk_size: usize) {
const JSON_TEXT: &str = r#"
[
[],
{},
[true, false, null, "foo",-9, -9.9, -99.99e-99, {"❤️😊":1}, 10000000],
"\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\nUt labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\nLaboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in."
]"#;
const EXPECT: &[(Token, Pos, &str, Option<&str>)] = &[
// Line 1.
(Token::White, Pos::new(0, 1, 1), "\n\n", None),
// Line 3.
(Token::ArrBegin, Pos::new(2, 3, 1), "[", None),
(Token::White, Pos::new(3, 3, 2), "\n ", None),
// Line 4.
(Token::ArrBegin, Pos::new(6, 4, 3), "[", None),
(Token::ArrEnd, Pos::new(7, 4, 4), "]", None),
(Token::ValueSep, Pos::new(8, 4, 5), ",", None),
(Token::White, Pos::new(9, 4, 6), "\n ", None),
// Line 5.
(Token::ObjBegin, Pos::new(12, 5, 3), "{", None),
(Token::ObjEnd, Pos::new(13, 5, 4), "}", None),
(Token::ValueSep, Pos::new(14, 5, 5), ",", None),
(Token::White, Pos::new(15, 5, 6), "\n ", None),
// Line 6.
(Token::ArrBegin, Pos::new(18, 6, 3), "[", None),
(Token::LitTrue, Pos::new(19, 6, 4), "true", None),
(Token::ValueSep, Pos::new(23, 6, 8), ",", None),
(Token::White, Pos::new(24, 6, 9), " ", None),
(Token::LitFalse, Pos::new(25, 6, 10), "false", None),
(Token::ValueSep, Pos::new(30, 6, 15), ",", None),
(Token::White, Pos::new(31, 6, 16), " ", None),
(Token::LitNull, Pos::new(32, 6, 17), "null", None),
(Token::ValueSep, Pos::new(36, 6, 21), ",", None),
(Token::White, Pos::new(37, 6, 22), " ", None),
(Token::Str, Pos::new(38, 6, 23), r#""foo""#, None),
(Token::ValueSep, Pos::new(43, 6, 28), ",", None),
(Token::Num, Pos::new(44, 6, 29), "-9", None),
(Token::ValueSep, Pos::new(46, 6, 31), ",", None),
(Token::White, Pos::new(47, 6, 32), " ", None),
(Token::Num, Pos::new(48, 6, 33), "-9.9", None),
(Token::ValueSep, Pos::new(52, 6, 37), ",", None),
(Token::White, Pos::new(53, 6, 38), " ", None),
(Token::Num, Pos::new(54, 6, 39), "-99.99e-99", None),
(Token::ValueSep, Pos::new(64, 6, 49), ",", None),
(Token::White, Pos::new(65, 6, 50), " ", None),
(Token::ObjBegin, Pos::new(66, 6, 51), "{", None),
(Token::Str, Pos::new(67, 6, 52), r#""❤️😊""#, None),
(Token::NameSep, Pos::new(79, 6, 57), ":", None),
(Token::Num, Pos::new(80, 6, 58), "1", None),
(Token::ObjEnd, Pos::new(81, 6, 59), "}", None),
(Token::ValueSep, Pos::new(82, 6, 60), ",", None),
(Token::White, Pos::new(83, 6, 61), " ", None),
(Token::Num, Pos::new(84, 6, 62), "10000000", None),
(Token::ArrEnd, Pos::new(92, 6, 70), "]", None),
(Token::ValueSep, Pos::new(93, 6, 71), ",", None),
(Token::White, Pos::new(94, 6, 72), "\n ", None),
// Line 7.
(
Token::Str,
Pos::new(97, 7, 3),
r#""\u0068\u0065\u006c\u006c\u006f\u002c\u0020\u0077\u006f\u0072\u006c\u0064""#,
Some(r#""hello, world""#),
),
(Token::ValueSep, Pos::new(171, 7, 77), ",", None),
(Token::White, Pos::new(172, 7, 78), "\n ", None),
// Line 8.
(
Token::Str,
Pos::new(175, 8, 3),
concat!(
r#""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n"#,
r#"Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n"#,
r#"Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.""#,
),
Some(concat!(
"\"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt.\n",
"Ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco.\n",
"Laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in.\"",
)),
),
// Line 9.
(Token::White, Pos::new(455, 8, 283), "\n", None),
(Token::ArrEnd, Pos::new(456, 9, 1), "]", None),
(Token::Eof, Pos::new(457, 9, 2), "", None),
];
let mut an = PipeAnalyzer::new(SlicePipe::new(chunk_size, JSON_TEXT.as_bytes()));
for (i, (expect_token, expect_pos, expect_literal, expect_unescaped)) in
EXPECT.iter().enumerate()
{
let actual_token = an.next();
let actual_pos = *an.pos();
let content = an.content();
assert_eq!(
*expect_token, actual_token,
"i = {i}, actual_pos = {actual_pos}, expect_pos = {expect_pos}"
);
assert_eq!(
*expect_pos, actual_pos,
"i = {i}, token = {actual_token}, content = {content}"
);
assert_eq!(
*expect_literal,
content.literal(),
"i = {i}, token = {actual_token}, expect_literal = {expect_literal:?}, content.literal() = {}",
content.literal(),
);
if let Some(u) = expect_unescaped {
assert!(
content.is_escaped(),
"i = {i}, token = {actual_token}, literal = {expect_literal:?}"
);
assert_eq!(*u, content.unescaped());
} else {
assert!(
!content.is_escaped(),
"i = {i}, token = {actual_token}, literal = {expect_literal:?}"
);
assert_eq!(*expect_literal, content.unescaped());
}
}
}
#[derive(Debug, Eq, PartialEq)]
struct ToyError(&'static str);
impl fmt::Display for ToyError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.0)
}
}
impl std::error::Error for ToyError {}
struct SlicePipe<'a> {
chunk_size: usize,
input: &'a [u8],
}
impl<'a> SlicePipe<'a> {
fn new(chunk_size: usize, input: &'a [u8]) -> Self {
Self { chunk_size, input }
}
}
impl<'a> Pipe for SlicePipe<'a> {
type Error = Infallible;
fn recv(&mut self) -> Option<Result<Bytes, Self::Error>> {
if self.input.len() > 0 {
let n = self.input.len().min(self.chunk_size);
let b = self.input[..n].to_vec().into();
self.input = &self.input[n..];
Some(Ok(b))
} else {
None
}
}
}
trait IntoString {
fn into_string(self) -> String;
}
impl<T: IntoBuf> IntoString for T {
fn into_string(self) -> String {
let mut src = self.into_buf();
let mut dst = Vec::with_capacity(src.remaining());
while src.remaining() > 0 {
let chunk = src.chunk();
dst.extend_from_slice(chunk);
src.advance(chunk.len());
}
String::from_utf8(dst).expect("valid UTF-8")
}
}
}