#![cfg_attr(feature = "read", doc = "- [`read`][`crate::lexical::read`]")]
#![cfg_attr(not(feature = "read"), doc = "- `read`")]
use crate::{Buf, EqStr, IntoBuf, OrdStr, Pos, StringBuf};
use std::{
borrow::Borrow,
cmp::{Ord, Ordering},
fmt,
hash::{Hash, Hasher},
ops::Deref,
};
pub mod fixed;
pub mod state;
#[cfg(feature = "read")]
pub mod read;
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Token {
ArrBegin,
ArrEnd,
Eof,
Err,
LitFalse,
LitNull,
LitTrue,
NameSep,
Num,
ObjBegin,
ObjEnd,
Str,
ValueSep,
White,
}
impl Token {
#[inline]
pub const fn is_literal(&self) -> bool {
matches!(self, Self::LitFalse | Self::LitNull | Self::LitTrue)
}
#[inline]
pub const fn is_pseudo(&self) -> bool {
matches!(self, Self::Eof | Self::Err | Self::White)
}
#[inline]
pub const fn is_primitive(&self) -> bool {
matches!(
self,
Self::LitFalse | Self::LitNull | Self::LitTrue | Self::Num | Self::Str
)
}
#[inline]
pub const fn is_punct(&self) -> bool {
matches!(
self,
Self::ArrBegin
| Self::ArrEnd
| Self::NameSep
| Self::ObjBegin
| Self::ObjEnd
| Self::ValueSep
)
}
#[inline]
pub const fn is_terminal(&self) -> bool {
matches!(self, Self::Eof | Self::Err)
}
#[inline]
pub const fn static_content(&self) -> Option<&'static str> {
match self {
Self::ArrBegin => Some("["),
Self::ArrEnd => Some("]"),
Self::LitFalse => Some("false"),
Self::LitNull => Some("null"),
Self::LitTrue => Some("true"),
Self::NameSep => Some(":"),
Self::ObjBegin => Some("{"),
Self::ObjEnd => Some("}"),
Self::ValueSep => Some(","),
_ => None,
}
}
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::ArrBegin => "[",
Self::ArrEnd => "]",
Self::Eof => "EOF",
Self::Err => "error",
Self::LitFalse => "false",
Self::LitNull => "null",
Self::LitTrue => "true",
Self::NameSep => ":",
Self::Num => "number",
Self::ObjBegin => "{",
Self::ObjEnd => "}",
Self::Str => "string",
Self::ValueSep => ",",
Self::White => "whitespace",
};
f.write_str(s)
}
}
#[derive(Clone, Debug)]
pub enum Unescaped<T> {
Literal(T),
Expanded(String),
}
impl<T> Unescaped<T> {
pub fn literal(&self) -> Option<&T> {
match self {
Self::Literal(t) => Some(t),
Self::Expanded(_) => None,
}
}
pub fn expanded(&self) -> Option<&str> {
match self {
Self::Literal(_) => None,
Self::Expanded(e) => Some(e.as_str()),
}
}
pub fn is_literal(&self) -> bool {
matches!(self, Self::Literal(_))
}
pub fn is_expanded(&self) -> bool {
matches!(self, Self::Expanded(_))
}
}
impl<T: IntoBuf> IntoBuf for Unescaped<T> {
type Buf = UnescapedBuf<T::Buf>;
fn into_buf(self) -> Self::Buf {
match self {
Self::Literal(t) => UnescapedBuf(UnescapedBufInner::Literal(t.into_buf())),
Self::Expanded(e) => UnescapedBuf(UnescapedBufInner::Expanded(e.into_buf())),
}
}
}
impl AsRef<str> for Unescaped<&str> {
fn as_ref(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl AsRef<[u8]> for Unescaped<&str> {
fn as_ref(&self) -> &[u8] {
match self {
Unescaped::Literal(t) => t.as_bytes(),
Unescaped::Expanded(e) => e.as_bytes(),
}
}
}
impl Deref for Unescaped<&str> {
type Target = str;
fn deref(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl Borrow<str> for Unescaped<&str> {
fn borrow(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl<T> fmt::Display for Unescaped<T>
where
T: fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Unescaped::Literal(t) => t.fmt(f),
Unescaped::Expanded(e) => e.fmt(f),
}
}
}
impl<T> Eq for Unescaped<T> where T: Eq + EqStr {}
impl<T> From<Unescaped<T>> for String
where
String: From<T>,
{
fn from(u: Unescaped<T>) -> Self {
match u {
Unescaped::Literal(t) => t.into(),
Unescaped::Expanded(e) => e,
}
}
}
impl<T> Hash for Unescaped<T>
where
T: Hash,
{
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Unescaped::Literal(t) => t.hash(state),
Unescaped::Expanded(e) => e.hash(state),
}
}
}
impl<T> Ord for Unescaped<T>
where
T: Eq + Ord + EqStr + OrdStr,
Self: Eq + PartialOrd,
{
fn cmp(&self, other: &Unescaped<T>) -> Ordering {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => Ord::cmp(t1, t2),
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1.cmp(e2),
(Unescaped::Literal(t), Unescaped::Expanded(e)) => OrdStr::cmp(t, e.as_str()),
(Unescaped::Expanded(e), Unescaped::Literal(t)) => OrdStr::cmp(t, e.as_str()).reverse(),
}
}
}
impl<T> PartialEq<Unescaped<T>> for Unescaped<T>
where
T: for<'a> PartialEq<&'a str>,
T: PartialEq<T>,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => t1 == t2,
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1 == e2,
(Unescaped::Literal(t1), Unescaped::Expanded(e2)) => *t1 == e2.as_str(),
(Unescaped::Expanded(e1), Unescaped::Literal(t2)) => *t2 == e1.as_str(),
}
}
}
impl<T> PartialEq<&str> for Unescaped<T>
where
T: for<'a> PartialEq<&'a str>,
{
fn eq(&self, other: &&str) -> bool {
match self {
Unescaped::Literal(t) => *t == *other,
Unescaped::Expanded(e) => e == other,
}
}
}
impl<'a, 'b, T> PartialEq<Unescaped<T>> for &'a str
where
T: PartialEq<&'b str>,
'a: 'b,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match other {
Unescaped::Literal(t) => *t == *self,
Unescaped::Expanded(e) => self == e,
}
}
}
impl<T> PartialEq<String> for Unescaped<T>
where
T: PartialEq<String>,
{
fn eq(&self, other: &String) -> bool {
match self {
Unescaped::Literal(t) => t == other,
Unescaped::Expanded(e) => e == other,
}
}
}
impl<T> PartialEq<Unescaped<T>> for String
where
T: PartialEq<String>,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match other {
Unescaped::Literal(t) => t == self,
Unescaped::Expanded(e) => self == e,
}
}
}
impl<T> PartialOrd<Unescaped<T>> for Unescaped<T>
where
T: for<'a> PartialOrd<&'a str>,
for<'a> &'a str: PartialOrd<T>,
T: PartialOrd<T>,
Self: PartialEq,
{
fn partial_cmp(&self, other: &Unescaped<T>) -> Option<Ordering> {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => t1.partial_cmp(t2),
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1.partial_cmp(e2),
(Unescaped::Literal(t), Unescaped::Expanded(e)) => t.partial_cmp(&e.as_str()),
(Unescaped::Expanded(e), Unescaped::Literal(t)) => {
PartialOrd::<T>::partial_cmp(&e.as_str(), t)
}
}
}
}
impl<T> PartialOrd<&str> for Unescaped<T>
where
T: for<'a> PartialOrd<&'a str>,
Self: for<'a> PartialEq<&'a str>,
{
fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
match self {
Unescaped::Literal(t) => t.partial_cmp(other),
Unescaped::Expanded(e) => e.as_str().partial_cmp(*other),
}
}
}
impl<T> PartialOrd<Unescaped<T>> for &str
where
Self: PartialOrd<T>,
Self: for<'c> PartialEq<Unescaped<T>>,
{
fn partial_cmp(&self, other: &Unescaped<T>) -> Option<Ordering> {
match other {
Unescaped::Literal(t) => self.partial_cmp(t),
Unescaped::Expanded(e) => PartialOrd::<&str>::partial_cmp(self, &e.as_str()),
}
}
}
#[derive(Debug)]
enum UnescapedBufInner<B> {
Literal(B),
Expanded(StringBuf),
}
#[derive(Debug)]
pub struct UnescapedBuf<B>(UnescapedBufInner<B>);
impl<B: Buf> Buf for UnescapedBuf<B> {
fn advance(&mut self, n: usize) {
match &mut self.0 {
UnescapedBufInner::Literal(b) => b.advance(n),
UnescapedBufInner::Expanded(e) => e.advance(n),
}
}
fn chunk(&self) -> &[u8] {
match &self.0 {
UnescapedBufInner::Literal(b) => b.chunk(),
UnescapedBufInner::Expanded(e) => e.chunk(),
}
}
fn remaining(&self) -> usize {
match &self.0 {
UnescapedBufInner::Literal(b) => b.remaining(),
UnescapedBufInner::Expanded(e) => e.remaining(),
}
}
fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
match &mut self.0 {
UnescapedBufInner::Literal(b) => b.try_copy_to_slice(dst),
UnescapedBufInner::Expanded(e) => e.try_copy_to_slice(dst),
}
}
}
pub trait Content: fmt::Debug {
type Literal<'a>: IntoBuf
where
Self: 'a;
fn literal<'a>(&'a self) -> Self::Literal<'a>;
fn is_escaped(&self) -> bool;
fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>>;
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Expect {
Boundary,
Char(char),
Digit,
DigitExpOrBoundary,
DigitOrExpSign,
DigitOrBoundary,
DotExpOrBoundary,
EscChar,
StrChar,
TokenStartChar,
UnicodeEscHexDigit,
}
impl fmt::Display for Expect {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Boundary => f.write_str("boundary character or EOF"),
Self::Char(c) => write!(f, "character '{c}'"),
Self::Digit => f.write_str("digit character '0'..'9'"),
Self::DigitOrBoundary => f.write_str("digit character '0'..'9', boundary character, or EOF"),
Self::DigitExpOrBoundary => f.write_str("digit character '0'..'9', exponent character 'E' or 'e', boundary character, or EOF"),
Self::DigitOrExpSign => f.write_str("exponent sign character '+' or '-', or exponent digit character '0'..'9'"),
Self::DotExpOrBoundary => f.write_str("character '.', 'exponent character 'E' or 'e', boundary character, or EOF"),
Self::EscChar => f.write_str("escape sequence character '\\', '\"', '/', 'r', 'n', 't', or 'u'"),
Self::StrChar => f.write_str("string character"),
Self::TokenStartChar => f.write_str("token start character"),
Self::UnicodeEscHexDigit => f.write_str("Unicode escape sequence hex digit '0'..'9', 'A'..'F', or 'a'..'f'"),
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum ErrorKind {
BadSurrogate {
first: u16,
second: Option<u16>,
offset: u8,
},
BadUtf8ContByte {
seq_len: u8,
offset: u8,
value: u8,
},
Read,
UnexpectedByte {
token: Option<Token>,
expect: Expect,
actual: u8,
},
UnexpectedEof(Token),
}
impl ErrorKind {
pub(crate) fn bad_utf8_cont_byte(seq_len: u8, offset: u8, value: u8) -> ErrorKind {
ErrorKind::BadUtf8ContByte {
seq_len,
offset,
value,
}
}
pub(crate) fn expect_boundary(token: Token, actual: u8) -> ErrorKind {
let expect = Expect::Boundary;
ErrorKind::UnexpectedByte {
token: Some(token),
expect,
actual,
}
}
pub(crate) fn expect_char(token: Token, actual: u8, expect: char) -> ErrorKind {
let expect = Expect::Char(expect);
ErrorKind::UnexpectedByte {
token: Some(token),
expect,
actual,
}
}
pub(crate) fn expect_digit(actual: u8) -> ErrorKind {
let expect = Expect::Digit;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_digit_exp_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DigitExpOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_digit_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DigitOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_dot_exp_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DotExpOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_esc_char(actual: u8) -> ErrorKind {
let expect = Expect::EscChar;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn expect_exp_sign_or_digit(actual: u8) -> ErrorKind {
let expect = Expect::DigitOrExpSign;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_str_char(actual: u8) -> ErrorKind {
let expect = Expect::StrChar;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn expect_token_start_char(actual: u8) -> ErrorKind {
let expect = Expect::TokenStartChar;
ErrorKind::UnexpectedByte {
token: None,
expect,
actual,
}
}
pub(crate) fn expect_unicode_esc_hex_digit(actual: u8) -> ErrorKind {
let expect = Expect::UnicodeEscHexDigit;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn fmt_at(&self, f: &mut fmt::Formatter, pos: Option<&Pos>) -> fmt::Result {
match self {
Self::BadSurrogate {
first: lo,
second: None,
offset: _,
} if (0xdc00..=0xdfff).contains(lo) => {
write!(
f,
"bad Unicode escape sequence: low surrogate '\\u{lo:04X}' without preceding high surrogate"
)?;
}
Self::BadSurrogate {
first: hi,
second: None,
offset: _,
} => {
write!(
f,
"bad Unicode escape sequence: high surrogate '\\u{hi:04X}' not followed by low surrogate"
)?;
}
Self::BadSurrogate {
first: hi,
second: Some(lo),
offset: _,
} => {
write!(
f,
"bad Unicode escape sequence surrogate pair: high surrogate '\\u{hi:04X}' followed by invalid low surrogate '\\u{lo:04X}'"
)?;
}
Self::BadUtf8ContByte {
seq_len,
offset,
value,
} => {
write!(
f,
"bad UTF-8 continuation byte 0x{value:02x} in {seq_len}-byte UTF-8 sequence (byte #{offset})"
)?;
}
Self::Read => write!(f, "read error")?,
Self::UnexpectedByte {
token,
expect,
actual,
} if (b' '..=0x7e).contains(actual) => {
write!(
f,
"expected {expect} but got character '{}' (ASCII 0x{actual:02x})",
*actual as char
)?;
if let Some(t) = token {
write!(f, " in {t} token")?;
}
}
Self::UnexpectedByte {
token,
expect,
actual,
} => {
write!(f, "expected {expect} but got byte {actual:02x}")?;
if let Some(t) = token {
write!(f, " in {t} token")?;
}
}
Self::UnexpectedEof(token) => {
write!(f, "unexpected EOF in {token} token")?;
}
};
if let Some(p) = pos {
write!(f, " at {}", *p)?;
}
Ok(())
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.fmt_at(f, None)
}
}
pub trait Error: std::error::Error + Send + Sync {
fn kind(&self) -> ErrorKind;
fn pos(&self) -> &Pos;
}
pub trait Analyzer {
type Content: Content;
type Error: Error;
fn next(&mut self) -> Token;
#[inline]
fn content(&self) -> Self::Content {
self.try_content().unwrap()
}
#[inline]
fn err(&self) -> Self::Error {
self.try_content().unwrap_err()
}
fn pos(&self) -> &Pos;
fn try_content(&self) -> Result<Self::Content, Self::Error>;
}
pub(crate) fn hex2u16(b: u8) -> u16 {
match b {
b'0'..=b'9' => (b - b'0') as u16,
b'a'..=b'f' => (10 + b - b'a') as u16,
b'A'..=b'F' => (10 + b - b'A') as u16,
_ => panic!("invalid hex character: 0x{b:02x}"),
}
}
pub fn unescape(literal: impl IntoBuf, dst: &mut Vec<u8>) {
let mut literal = literal.into_buf();
if !literal.has_remaining() {
return;
}
dst.reserve(literal.remaining());
#[derive(Default)]
struct Esc {
len: u32, hi: u32, lo: u32, }
let mut esc: Option<Esc> = None;
loop {
let chunk = literal.chunk();
let (mut i, mut j) = (0usize, 0usize);
loop {
let b = chunk[j];
match &mut esc {
None if b != b'\\' => j += 1,
None => {
dst.extend_from_slice(&chunk[i..j]);
esc = Some(Esc::default());
j += 1;
i = j;
}
Some(e) if e.len == 0 => {
let mut single = |b: u8, esc: &mut Option<Esc>| {
dst.push(b);
*esc = None;
j += 1;
i = j;
};
match b {
b'"' | b'\\' | b'/' => single(b, &mut esc),
b'b' => single(b'\x08', &mut esc),
b't' => single(b'\t', &mut esc),
b'f' => single(b'\x0c', &mut esc),
b'n' => single(b'\n', &mut esc),
b'r' => single(b'\r', &mut esc),
b'u' => {
e.len = 1;
j += 1;
i = j;
}
_ => panic!(r#"invalid escape sequence byte after '\': 0x{b:02x}"#),
}
}
Some(e) if (1..=4).contains(&e.len) => {
let shift = 4 * (4 - e.len);
e.hi |= (hex2u16(b) as u32) << shift;
e.len += 1;
if e.len == 5 {
match e.hi {
0xd800..=0xdbff => (),
0xdc00..=0xdfff => panic!(
"Unicode escape low surrogate without preceding high surrogate: 0x{:02x}",
e.hi
),
_ => {
append_code_point(e.hi, dst);
esc = None;
}
}
}
j += 1;
i = j;
}
Some(e) if e.len == 5 && b == b'\\' => {
e.len = 6;
j += 1;
i = j;
}
Some(e) if e.len == 5 => panic!(
r#"expected '\' to start low surrogate Unicode escape after high surrogate 0x{:04x}, found byte 0x{b:02x}"#,
e.hi
),
Some(e) if e.len == 6 && b == b'u' => {
e.len = 7;
j += 1;
i = j;
}
Some(e) if e.len == 6 => panic!(
r#"expected '\u' to start low surrogate Unicode escape after high surrogate 0x{:04x}, found '\' followed by byte {b:02x}"#,
e.hi
),
Some(e) if (7..=10).contains(&e.len) => {
let shift = 4 * (10 - e.len);
e.lo |= (hex2u16(b) as u32) << shift;
e.len += 1;
if e.len == 11 {
match e.lo {
0xdc00..=0xdfff => {
let code_point =
0x10000 + (((e.hi - 0xd800) << 10) | (e.lo - 0xdc00));
append_code_point(code_point, dst);
esc = None;
}
_ => {
panic!(
"Unicode escape high surrogate not followed by low surrogate: 0x{:04x} and then 0x{:04x}",
e.hi, e.lo
)
}
}
}
j += 1;
i = j;
}
_ => unreachable!(),
}
if j == chunk.len() {
break;
}
}
dst.extend_from_slice(&chunk[i..j]);
literal.advance(chunk.len());
if !literal.has_remaining() {
break;
}
}
if esc.is_some() {
panic!("unexpected end of input within Unicode escape sequence");
}
}
fn append_code_point(code_point: u32, dst: &mut Vec<u8>) {
match char::from_u32(code_point) {
Some(c) => {
let mut seq = [0u8; 4];
let utf8_str = c.encode_utf8(&mut seq);
dst.extend_from_slice(utf8_str.as_bytes());
}
None => unreachable!(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
use std::collections::{BTreeMap, HashMap};
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, true)]
#[case(Token::LitNull, true)]
#[case(Token::LitTrue, true)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_literal(#[case] token: Token, #[case] is_literal: bool) {
assert_eq!(is_literal, token.is_literal());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, true)]
#[case(Token::Err, true)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, true)]
fn test_token_is_pseudo(#[case] token: Token, #[case] is_pseudo: bool) {
assert_eq!(is_pseudo, token.is_pseudo());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, true)]
#[case(Token::LitNull, true)]
#[case(Token::LitTrue, true)]
#[case(Token::NameSep, false)]
#[case(Token::Num, true)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, true)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_primitive(#[case] token: Token, #[case] is_primitive: bool) {
assert_eq!(is_primitive, token.is_primitive());
}
#[rstest]
#[case(Token::ArrBegin, true)]
#[case(Token::ArrEnd, true)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, true)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, true)]
#[case(Token::ObjEnd, true)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, true)]
#[case(Token::White, false)]
fn test_token_is_punct(#[case] token: Token, #[case] is_punct: bool) {
assert_eq!(is_punct, token.is_punct());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, true)]
#[case(Token::Err, true)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_terminal(#[case] token: Token, #[case] is_terminal: bool) {
assert_eq!(is_terminal, token.is_terminal());
}
#[rstest]
#[case(Token::ArrBegin, Some("["))]
#[case(Token::ArrEnd, Some("]"))]
#[case(Token::Eof, None)]
#[case(Token::Err, None)]
#[case(Token::LitFalse, Some("false"))]
#[case(Token::LitNull, Some("null"))]
#[case(Token::LitTrue, Some("true"))]
#[case(Token::NameSep, Some(":"))]
#[case(Token::Num, None)]
#[case(Token::ObjBegin, Some("{"))]
#[case(Token::ObjEnd, Some("}"))]
#[case(Token::Str, None)]
#[case(Token::ValueSep, Some(","))]
#[case(Token::White, None)]
fn test_token_static_content(#[case] token: Token, #[case] static_content: Option<&str>) {
assert_eq!(static_content, token.static_content());
}
#[rstest]
#[case(Token::ArrBegin, "[")]
#[case(Token::ArrEnd, "]")]
#[case(Token::Eof, "EOF")]
#[case(Token::Err, "error")]
#[case(Token::LitFalse, "false")]
#[case(Token::LitNull, "null")]
#[case(Token::LitTrue, "true")]
#[case(Token::NameSep, ":")]
#[case(Token::Num, "number")]
#[case(Token::ObjBegin, "{")]
#[case(Token::ObjEnd, "}")]
#[case(Token::Str, "string")]
#[case(Token::ValueSep, ",")]
#[case(Token::White, "whitespace")]
fn test_token_display(#[case] token: Token, #[case] expect: &str) {
assert_eq!(expect, format!("{token}"));
}
#[rstest]
#[case(Unescaped::Literal("foo"), "foo")]
#[case(Unescaped::Expanded("bar".to_string()), "bar")]
fn test_unescaped_str_into_buf(#[case] u: Unescaped<&str>, #[case] expect: &str) {
let mut b = u.into_buf();
assert_eq!(expect.len(), b.remaining());
assert_eq!(expect, str::from_utf8(b.chunk()).unwrap());
if b.remaining() > 0 {
b.advance(1);
assert_eq!(expect.len() - 1, b.remaining());
assert_eq!(&expect[1..], str::from_utf8(b.chunk()).unwrap());
}
let mut v = vec![0; expect.len() - 1];
b.copy_to_slice(&mut v);
assert_eq!(0, b.remaining());
assert_eq!(b"", b.chunk())
}
#[test]
fn test_unescaped_str() {
let a1 = Unescaped::Literal("a");
let b1 = Unescaped::Expanded("bb".to_string());
let a2 = Unescaped::Expanded("a".to_string());
let b2 = Unescaped::Literal("bb");
assert_eq!("a", Into::<String>::into(a1.clone()));
assert_eq!("bb", Into::<String>::into(b1.clone()));
assert_eq!("a", Into::<String>::into(a2.clone()));
assert_eq!("bb", Into::<String>::into(b2.clone()));
assert!(matches!(a1.literal(), Some(&"a")));
assert!(b1.literal().is_none());
assert!(a2.literal().is_none());
assert!(matches!(b2.literal(), Some(&"bb")));
assert!(a1.expanded().is_none());
assert!(matches!(b1.expanded(), Some("bb")));
assert!(matches!(a2.expanded(), Some("a")));
assert!(b2.expanded().is_none());
assert!(a1.is_literal());
assert!(!a1.is_expanded());
assert!(!b1.is_literal());
assert!(b1.is_expanded());
assert_eq!(1, a1.len());
assert_eq!(2, b1.len());
assert_eq!(1, a2.len());
assert_eq!(2, b2.len());
let a3: &str = a1.as_ref();
let b3: &str = b1.as_ref();
let a4: &str = a2.as_ref();
let b4: &str = b2.as_ref();
assert_eq!("a", format!("{a1}"));
assert_eq!("bb", format!("{b1}"));
assert_eq!("a", format!("{a2}"));
assert_eq!("bb", format!("{b2}"));
assert_eq!("a", a3);
assert_eq!("bb", b3);
assert_eq!("a", a4);
assert_eq!("bb", b4);
let x1: &[u8] = a1.as_ref();
let y1: &[u8] = b1.as_ref();
let x2: &[u8] = a2.as_ref();
let y2: &[u8] = b2.as_ref();
assert_eq!(b"a", x1);
assert_eq!(b"bb", y1);
assert_eq!(b"a", x2);
assert_eq!(b"bb", y2);
assert_eq!(a1, a2);
assert_eq!(a2, a1);
assert_eq!(b1, b2);
assert_eq!(b2, b1);
assert_ne!(a1, b1);
assert_ne!(b1, a1);
assert_ne!(a1, b2);
assert_ne!(b1, a2);
assert!(a1 < b1);
assert!(a1 < b2);
assert!(a2 < b1);
assert!(a2 < b2);
assert!(b1 > a1);
assert!(b1 > a2);
assert!(b2 > a1);
assert!(b2 > a2);
assert_eq!("a", a1);
assert_eq!(a1, "a");
assert_eq!("bb", b1);
assert_eq!(b1, "bb");
assert_eq!("a", a2);
assert_eq!(a2, "a");
assert_eq!("bb", b2);
assert_eq!(b2, "bb");
assert_eq!("a".to_string(), a1);
assert_eq!(a1, "a".to_string());
assert_eq!("bb".to_string(), b1);
assert_eq!(b1, "bb".to_string());
assert_eq!("a".to_string(), a2);
assert_eq!(a2, "a".to_string());
assert_eq!("bb".to_string(), b2);
assert_eq!(b2, "bb".to_string());
assert!(a1 < "bb");
assert!("bb" > a1);
assert!(b1 > "a");
assert!("a" < b1);
let mut m1 = HashMap::new();
m1.insert(a1.clone(), "a1");
m1.insert(b1.clone(), "b1");
assert_eq!(Some(&"a1"), m1.get("a"));
assert_eq!(Some(&"a1"), m1.get(&a2));
assert_eq!(Some(&"b1"), m1.get("bb"));
assert_eq!(Some(&"b1"), m1.get(&b2));
assert!(!m1.contains_key("aa"));
let mut m2 = BTreeMap::new();
m2.insert(a1.clone(), "a1");
m2.insert(b1.clone(), "b1");
assert_eq!(Some(&"a1"), m2.get("a"));
assert_eq!(Some(&"a1"), m2.get(&a2));
assert_eq!(Some(&"b1"), m2.get("bb"));
assert_eq!(Some(&"b1"), m2.get(&b2));
assert!(!m2.contains_key("aa"));
assert_eq!(Some("a1"), m2.remove(&a2));
assert_eq!(Some("b1"), m2.remove(&b2));
m2.insert(b2.clone(), "b2");
m2.insert(a2.clone(), "a2");
assert_eq!(Some(&"a2"), m2.get("a"));
assert_eq!(Some(&"a2"), m2.get(&a1));
assert_eq!(Some(&"b2"), m2.get("bb"));
assert_eq!(Some(&"b2"), m2.get(&b1));
assert!(!m2.contains_key("aa"));
assert_eq!(Some("a2"), m2.remove("a"));
assert_eq!(Some("b2"), m2.remove("bb"));
}
#[rstest]
#[case(ErrorKind::BadSurrogate {
first: 0xD800,
second: None,
offset: 5,
}, "bad Unicode escape sequence: high surrogate '\\uD800' not followed by low surrogate")]
#[case(ErrorKind::BadUtf8ContByte {
seq_len: 3,
offset: 2,
value: 0x20,
}, "bad UTF-8 continuation byte 0x20 in 3-byte UTF-8 sequence (byte #2)")]
#[case(ErrorKind::Read, "read error")]
#[case(ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect: Expect::Digit,
actual: 0x41,
}, "expected digit character '0'..'9' but got character 'A' (ASCII 0x41) in number token")]
#[case(ErrorKind::UnexpectedEof(Token::Str), "unexpected EOF in string token")]
fn test_error_kind_display(#[case] kind: ErrorKind, #[case] expect: &str) {
assert_eq!(expect, format!("{kind}"));
struct Wrapper(ErrorKind);
impl fmt::Display for Wrapper {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let pos = Pos::default();
self.0.fmt_at(f, Some(&pos))
}
}
assert_eq!(
format!("{expect} at line 1, column 1 (offset: 0)"),
format!("{}", Wrapper(kind))
);
}
#[rstest]
#[case(r#""""#, r#""""#)]
#[case(r#""f""#, r#""f""#)]
#[case(r#""fo""#, r#""fo""#)]
#[case(r#""foo""#, r#""foo""#)]
#[case(r#""\\""#, r#""\""#)]
#[case(r#""\/""#, r#""/""#)]
#[case(r#""\"""#, r#"""""#)]
#[case(r#""\b""#, "\"\x08\"")]
#[case(r#""\t""#, "\"\t\"")]
#[case(r#""\f""#, "\"\x0c\"")]
#[case(r#""\n""#, "\"\n\"")]
#[case(r#""\r""#, "\"\r\"")]
#[case(r#""\u0000""#, "\"\0\"")]
#[case(r#""\u0008""#, "\"\x08\"")]
#[case(r#""\u0009""#, "\"\t\"")]
#[case(r#""\u000c""#, "\"\x0c\"")]
#[case(r#""\u000C""#, "\"\x0C\"")]
#[case(r#""\u000a""#, "\"\n\"")]
#[case(r#""\u000A""#, "\"\n\"")]
#[case(r#""\u000d""#, "\"\r\"")]
#[case(r#""\u000D""#, "\"\r\"")]
#[case(r#""\u000D""#, "\"\r\"")]
#[case(r#""\u0021""#, r#""!""#)]
#[case(r#""\u0030""#, r#""0""#)]
#[case(r#""\u0041""#, r#""A""#)]
#[case(r#""\u0062""#, r#""b""#)]
#[case(r#""\u007F""#, "\"\x7f\"")] #[case(r#""\u00A9""#, r#""©""#)] #[case(r#""\u03A9""#, r#""Ω""#)] #[case(r#""\u0080""#, "\"\u{80}\"")] #[case(r#""\u07FF""#, "\"\u{7ff}\"")] #[case(r#""\u20AC""#, r#""€""#)] #[case(r#""\u2603""#, r#""☃""#)] #[case(r#""\u0800""#, "\"\u{800}\"")] #[case(r#""\uFFFF""#, "\"\u{ffff}\"")] #[case(r#""\ud83D\uDe00""#, r#""😀""#)] #[case(r#""\ud800\uDC00""#, "\"\u{10000}\"")] #[case(r#""\uDBFF\udfff""#, "\"\u{10FFFF}\"")] fn test_unescape_ok(#[case] input: &str, #[case] expect: &str) {
{
let mut buf = Vec::new();
unescape(input, &mut buf);
let actual = String::from_utf8(buf).unwrap();
assert_eq!(actual, expect);
}
{
let mut buf = Vec::new();
buf.extend_from_slice(b"foo");
unescape(input, &mut buf);
let actual = String::from_utf8(buf).unwrap();
assert_eq!(actual, format!("foo{expect}"));
}
}
#[rstest]
#[case(r#""\a""#)]
#[case(r#""\U""#)]
#[case(r#""\:""#)]
#[should_panic(expected = "invalid escape sequence byte after '\\'")]
fn test_unescape_panic_invalid_esc_seq_byte(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800\u0000"#)]
#[case(r#"\ud800\ud7ff"#)]
#[case(r#"\ud800\ud800"#)]
#[case(r#"\ud800\ue000"#)]
#[case(r#"\ud800\uffff"#)]
#[case(r#"\udbff\u0000"#)]
#[case(r#"\udbff\ud7ff"#)]
#[case(r#"\udbff\ud800"#)]
#[case(r#"\udbff\ue000"#)]
#[case(r#"\udbff\uffff"#)]
#[should_panic(expected = "Unicode escape high surrogate not followed by low surrogate")]
fn test_unescape_panic_low_surrogate_no_high(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#""\ud800\u0000""#)]
#[case(r#""\uDBFF\ud800""#)]
#[should_panic(expected = "Unicode escape high surrogate not followed by low surrogate")]
fn test_unescape_panic_high_surrogate_no_low(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800 "#)]
#[case(r#"\udbff "#)]
#[should_panic(
expected = r#"expected '\' to start low surrogate Unicode escape after high surrogate"#
)]
fn test_unescape_panic_high_surrogate_no_backslash(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800\n"#)]
#[case(r#"\udbff\a"#)]
#[should_panic(
expected = r#"expected '\u' to start low surrogate Unicode escape after high surrogate"#
)]
fn test_unescape_panic_high_surrogate_no_backslash_u(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\"#)]
#[case(r#"\u"#)]
#[case(r#"\u0"#)]
#[case(r#"\u00"#)]
#[case(r#"\u000"#)]
#[case(r#"\u0000\"#)]
#[case(r#"\u0000\u"#)]
#[case(r#"\u0000\u1"#)]
#[case(r#"\u0000\u11"#)]
#[case(r#"\u0000\u111"#)]
#[case(r#"\ud800\u111"#)]
#[case(r#"\udbff\u111"#)]
#[should_panic(expected = "unexpected end of input within Unicode escape sequence")]
fn test_unescape_panic_unexpected_eof(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
}