#![cfg_attr(feature = "read", doc = "- [`read`][`crate::lexical::read`]")]
#![cfg_attr(not(feature = "read"), doc = "- `read`")]
#![cfg_attr(feature = "pipe", doc = "- [`pipe`][`crate::lexical::pipe`]")]
#![cfg_attr(not(feature = "pipe"), doc = "- `pipe`")]
use crate::{Buf, EqStr, IntoBuf, OrdStr, Pos, Sink, StringBuf};
#[cfg(feature = "num")]
use crate::{sink, sink::InlineSink};
use alloc::string::String;
use core::{
borrow::Borrow,
cmp::{Ord, Ordering},
fmt,
hash::{Hash, Hasher},
ops::Deref,
};
#[cfg(feature = "num")]
use core::{num::ParseFloatError, str::FromStr};
#[cfg(feature = "num")]
macro_rules! max_decimal_digits {
($n:expr) => {{
const V: u128 = if ($n as i128) < 0 {
(!($n as u128)).wrapping_add(1)
} else {
$n as u128
};
const N: usize = if V == 0 { 1 } else { V.ilog10() as usize + 1 };
N
}};
}
#[cfg(feature = "num")]
macro_rules! parse_int {
(into_buf, $b:ident, $t:ty, $sign:literal, $limit_val:expr) => {{
let buf = $b.into_buf();
let rem = buf.remaining();
const N: usize = $sign + max_decimal_digits!($limit_val);
if 0 < rem && rem <= N {
let chunk = buf.chunk();
if chunk.len() == rem {
parse_int!(one_slice, chunk, $t, $sign, $limit_val);
} else {
let mut dst = InlineSink::<N>::new();
sink(buf, &mut dst);
let slice = dst.as_slice();
parse_int!(one_slice, slice, $t, $sign, $limit_val);
}
}
Err(parse_int_err(buf, $sign == 1))
}};
(one_slice, $slice:ident, $t:ty, $sign:literal, $limit_val:expr) => {{
const MAX_DIGITS: usize = max_decimal_digits!($limit_val);
if $sign == 1 && $slice[0] == b'-' {
let digits = &$slice[1..];
if digits.len() < MAX_DIGITS {
parse_int!(calculate, digits, $t, negative, cannot_overflow);
} else if digits.len() == MAX_DIGITS {
parse_int!(calculate, digits, $t, negative, might_overflow);
}
} else {
let digits = $slice;
if digits.len() < MAX_DIGITS {
parse_int!(calculate, digits, $t, non_negative, cannot_overflow);
} else if digits.len() == MAX_DIGITS {
parse_int!(calculate, digits, $t, non_negative, might_overflow);
}
}
return Err(parse_int_err($slice, $sign == 1));
}};
(calculate, $slice:ident, $t:ty, negative, cannot_overflow) => {{
let mut acc: $t = 0;
let mut i = 0;
while i < $slice.len() {
let d = $slice[i].wrapping_sub(b'0');
if d >= 10 {
break;
}
acc = acc * 10 - d as $t;
i += 1;
}
if i == $slice.len() {
return Ok(acc);
}
}};
(calculate, $slice:ident, $t:ty, negative, might_overflow) => {{
if $slice.iter().all(|b| b.is_ascii_digit()) {
let (head, tail) = $slice.split_at($slice.len() - 1);
let acc = head
.iter()
.fold(0 as $t, |acc, &b| acc * 10 - (b - b'0') as $t);
let d = (tail[0] - b'0') as $t;
return if let Some(v) = acc.checked_mul(10).and_then(|a| a.checked_sub(d)) {
Ok(v)
} else {
Err(NumError::Range)
};
}
}};
(calculate, $slice:ident, $t:ty, non_negative, cannot_overflow) => {{
let mut acc: $t = 0;
let mut i = 0;
while i < $slice.len() {
let d = $slice[i].wrapping_sub(b'0');
if d >= 10 {
break;
}
acc = acc * 10 + d as $t;
i += 1;
}
if i == $slice.len() {
return Ok(acc);
}
}};
(calculate, $slice:ident, $t:ty, non_negative, might_overflow) => {{
if $slice.iter().all(|b| b.is_ascii_digit()) {
let (head, tail) = $slice.split_at($slice.len() - 1);
let acc = head
.iter()
.fold(0 as $t, |acc, &b| acc * 10 + (b - b'0') as $t);
let d = (tail[0] - b'0') as $t;
return if let Some(v) = acc.checked_mul(10).and_then(|a| a.checked_add(d)) {
Ok(v)
} else {
Err(NumError::Range)
};
}
}};
}
#[cfg(feature = "num")]
pub fn parse_i64(literal: impl IntoBuf) -> Result<i64, NumError> {
parse_int!(into_buf, literal, i64, 1, i64::MIN)
}
#[cfg(feature = "num")]
pub fn parse_u64(literal: impl IntoBuf) -> Result<u64, NumError> {
parse_int!(into_buf, literal, u64, 0, u64::MAX)
}
#[cfg(feature = "num_ext")]
pub fn parse_i128(literal: impl IntoBuf) -> Result<i128, NumError> {
parse_int!(into_buf, literal, i128, 1, i128::MIN)
}
#[cfg(feature = "num")]
pub fn parse_f64(literal: impl IntoBuf) -> Result<f64, NumError> {
let buf = literal.into_buf();
let rem = buf.remaining();
let mut chunk = buf.chunk();
#[cfg(test)]
const MAX_INLINE_PARSE_LEN: usize = 7;
#[cfg(not(test))]
const MAX_INLINE_PARSE_LEN: usize = 128;
let r = if chunk.len() == rem {
let s = unsafe { str::from_utf8_unchecked(chunk) };
f64::from_str(s)
} else {
let n = chunk.len();
#[allow(unused_assignments)]
{
chunk = &[];
}
if n <= MAX_INLINE_PARSE_LEN {
let mut dst = InlineSink::<MAX_INLINE_PARSE_LEN>::new();
sink(buf, &mut dst);
let s = unsafe { str::from_utf8_unchecked(dst.as_slice()) };
f64::from_str(s)
} else {
let mut dst = Vec::new();
sink(buf, &mut dst);
let s = unsafe { str::from_utf8_unchecked(&dst) };
f64::from_str(s)
}
};
parse_f64_result(r)
}
#[cfg(feature = "num")]
pub(crate) fn parse_int_err(mut buf: impl Buf, signed: bool) -> NumError {
let mut chunk = buf.chunk();
if chunk.is_empty() {
return NumError::Format;
} else if signed && chunk[0] == b'-' {
buf.advance(1);
chunk = buf.chunk();
}
loop {
if !chunk.iter().all(|b| b.is_ascii_digit()) {
return NumError::Format;
}
buf.advance(chunk.len());
chunk = buf.chunk();
if chunk.is_empty() {
return NumError::Range;
}
}
}
#[cfg(feature = "num")]
#[inline(always)]
fn parse_f64_result(r: Result<f64, ParseFloatError>) -> Result<f64, NumError> {
match r {
Ok(v) if v.is_infinite() => Err(NumError::Range),
Ok(v) => Ok(v),
Err(_) => Err(NumError::Format),
}
}
pub mod fixed;
pub mod state;
#[cfg(feature = "pipe")]
#[cfg_attr(docsrs, doc(cfg(feature = "pipe")))]
pub mod pipe;
#[cfg(feature = "read")]
#[cfg_attr(docsrs, doc(cfg(feature = "read")))]
pub mod read;
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Token {
ArrBegin,
ArrEnd,
Eof,
Err,
LitFalse,
LitNull,
LitTrue,
NameSep,
Num,
ObjBegin,
ObjEnd,
Str,
ValueSep,
White,
}
impl Token {
#[inline(always)]
pub const fn is_eof(&self) -> bool {
matches!(self, Token::Eof)
}
#[inline(always)]
pub const fn is_err(&self) -> bool {
matches!(self, Token::Err)
}
#[inline(always)]
pub const fn is_literal(&self) -> bool {
matches!(self, Self::LitFalse | Self::LitNull | Self::LitTrue)
}
#[inline(always)]
pub const fn is_primitive(&self) -> bool {
matches!(
self,
Self::LitFalse | Self::LitNull | Self::LitTrue | Self::Num | Self::Str
)
}
#[inline(always)]
pub const fn is_pseudo(&self) -> bool {
matches!(self, Self::Eof | Self::Err | Self::White)
}
#[inline(always)]
pub const fn is_punct(&self) -> bool {
matches!(self, Self::NameSep | Self::ValueSep)
}
#[inline(always)]
pub const fn is_struct(&self) -> bool {
matches!(
self,
Self::ArrBegin | Self::ArrEnd | Self::ObjBegin | Self::ObjEnd
)
}
#[inline(always)]
pub const fn is_terminal(&self) -> bool {
matches!(self, Self::Eof | Self::Err)
}
#[inline(always)]
pub const fn static_content(&self) -> Option<&'static str> {
match self {
Self::ArrBegin => Some("["),
Self::ArrEnd => Some("]"),
Self::LitFalse => Some("false"),
Self::LitNull => Some("null"),
Self::LitTrue => Some("true"),
Self::NameSep => Some(":"),
Self::ObjBegin => Some("{"),
Self::ObjEnd => Some("}"),
Self::ValueSep => Some(","),
_ => None,
}
}
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::ArrBegin => "[",
Self::ArrEnd => "]",
Self::Eof => "EOF",
Self::Err => "error",
Self::LitFalse => "false",
Self::LitNull => "null",
Self::LitTrue => "true",
Self::NameSep => ":",
Self::Num => "number",
Self::ObjBegin => "{",
Self::ObjEnd => "}",
Self::Str => "string",
Self::ValueSep => ",",
Self::White => "whitespace",
};
f.write_str(s)
}
}
#[derive(Clone, Debug)]
pub enum Unescaped<T> {
Literal(T),
Expanded(String),
}
impl<T> Unescaped<T> {
#[inline(always)]
pub fn literal(&self) -> Option<&T> {
match self {
Self::Literal(t) => Some(t),
Self::Expanded(_) => None,
}
}
#[inline(always)]
pub fn expanded(&self) -> Option<&str> {
match self {
Self::Literal(_) => None,
Self::Expanded(e) => Some(e.as_str()),
}
}
#[inline(always)]
pub fn is_literal(&self) -> bool {
matches!(self, Self::Literal(_))
}
#[inline(always)]
pub fn is_expanded(&self) -> bool {
matches!(self, Self::Expanded(_))
}
}
impl<T: IntoBuf> IntoBuf for Unescaped<T> {
type Buf = UnescapedBuf<T::Buf>;
#[inline]
fn into_buf(self) -> Self::Buf {
match self {
Self::Literal(t) => UnescapedBuf(UnescapedBufInner::Literal(t.into_buf())),
Self::Expanded(e) => UnescapedBuf(UnescapedBufInner::Expanded(e.into_buf())),
}
}
}
impl AsRef<str> for Unescaped<&str> {
#[inline]
fn as_ref(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl AsRef<[u8]> for Unescaped<&str> {
#[inline]
fn as_ref(&self) -> &[u8] {
match self {
Unescaped::Literal(t) => t.as_bytes(),
Unescaped::Expanded(e) => e.as_bytes(),
}
}
}
impl Deref for Unescaped<&str> {
type Target = str;
#[inline]
fn deref(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl Borrow<str> for Unescaped<&str> {
#[inline]
fn borrow(&self) -> &str {
match self {
Unescaped::Literal(t) => t,
Unescaped::Expanded(e) => e.as_str(),
}
}
}
impl<T> fmt::Display for Unescaped<T>
where
T: fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Unescaped::Literal(t) => t.fmt(f),
Unescaped::Expanded(e) => e.fmt(f),
}
}
}
impl<T> Eq for Unescaped<T> where T: Eq + EqStr {}
impl<T> From<Unescaped<T>> for String
where
String: From<T>,
{
fn from(u: Unescaped<T>) -> Self {
match u {
Unescaped::Literal(t) => t.into(),
Unescaped::Expanded(e) => e,
}
}
}
impl<T> Hash for Unescaped<T>
where
T: Hash,
{
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Unescaped::Literal(t) => t.hash(state),
Unescaped::Expanded(e) => e.hash(state),
}
}
}
impl<T> Ord for Unescaped<T>
where
T: Eq + Ord + EqStr + OrdStr,
Self: Eq + PartialOrd,
{
fn cmp(&self, other: &Unescaped<T>) -> Ordering {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => Ord::cmp(t1, t2),
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1.cmp(e2),
(Unescaped::Literal(t), Unescaped::Expanded(e)) => OrdStr::cmp(t, e.as_str()),
(Unescaped::Expanded(e), Unescaped::Literal(t)) => OrdStr::cmp(t, e.as_str()).reverse(),
}
}
}
impl<T> PartialEq<Unescaped<T>> for Unescaped<T>
where
T: for<'a> PartialEq<&'a str>,
T: PartialEq<T>,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => t1 == t2,
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1 == e2,
(Unescaped::Literal(t1), Unescaped::Expanded(e2)) => *t1 == e2.as_str(),
(Unescaped::Expanded(e1), Unescaped::Literal(t2)) => *t2 == e1.as_str(),
}
}
}
impl<T> PartialEq<&str> for Unescaped<T>
where
T: for<'a> PartialEq<&'a str>,
{
fn eq(&self, other: &&str) -> bool {
match self {
Unescaped::Literal(t) => *t == *other,
Unescaped::Expanded(e) => e == other,
}
}
}
impl<'a, 'b, T> PartialEq<Unescaped<T>> for &'a str
where
T: PartialEq<&'b str>,
'a: 'b,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match other {
Unescaped::Literal(t) => *t == *self,
Unescaped::Expanded(e) => self == e,
}
}
}
impl<T> PartialEq<String> for Unescaped<T>
where
T: PartialEq<String>,
{
fn eq(&self, other: &String) -> bool {
match self {
Unescaped::Literal(t) => t == other,
Unescaped::Expanded(e) => e == other,
}
}
}
impl<T> PartialEq<Unescaped<T>> for String
where
T: PartialEq<String>,
{
fn eq(&self, other: &Unescaped<T>) -> bool {
match other {
Unescaped::Literal(t) => t == self,
Unescaped::Expanded(e) => self == e,
}
}
}
impl<T> PartialOrd<Unescaped<T>> for Unescaped<T>
where
T: for<'a> PartialOrd<&'a str>,
for<'a> &'a str: PartialOrd<T>,
T: PartialOrd<T>,
Self: PartialEq,
{
fn partial_cmp(&self, other: &Unescaped<T>) -> Option<Ordering> {
match (self, other) {
(Unescaped::Literal(t1), Unescaped::Literal(t2)) => t1.partial_cmp(t2),
(Unescaped::Expanded(e1), Unescaped::Expanded(e2)) => e1.partial_cmp(e2),
(Unescaped::Literal(t), Unescaped::Expanded(e)) => t.partial_cmp(&e.as_str()),
(Unescaped::Expanded(e), Unescaped::Literal(t)) => {
PartialOrd::<T>::partial_cmp(&e.as_str(), t)
}
}
}
}
impl<T> PartialOrd<&str> for Unescaped<T>
where
T: for<'a> PartialOrd<&'a str>,
Self: for<'a> PartialEq<&'a str>,
{
fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
match self {
Unescaped::Literal(t) => t.partial_cmp(other),
Unescaped::Expanded(e) => e.as_str().partial_cmp(*other),
}
}
}
impl<T> PartialOrd<Unescaped<T>> for &str
where
Self: PartialOrd<T>,
Self: for<'c> PartialEq<Unescaped<T>>,
{
fn partial_cmp(&self, other: &Unescaped<T>) -> Option<Ordering> {
match other {
Unescaped::Literal(t) => self.partial_cmp(t),
Unescaped::Expanded(e) => PartialOrd::<&str>::partial_cmp(self, &e.as_str()),
}
}
}
#[derive(Debug)]
enum UnescapedBufInner<B> {
Literal(B),
Expanded(StringBuf),
}
#[derive(Debug)]
pub struct UnescapedBuf<B>(UnescapedBufInner<B>);
impl<B: Buf> Buf for UnescapedBuf<B> {
#[inline]
fn advance(&mut self, n: usize) {
match &mut self.0 {
UnescapedBufInner::Literal(b) => b.advance(n),
UnescapedBufInner::Expanded(e) => e.advance(n),
}
}
#[inline]
fn chunk(&self) -> &[u8] {
match &self.0 {
UnescapedBufInner::Literal(b) => b.chunk(),
UnescapedBufInner::Expanded(e) => e.chunk(),
}
}
#[inline]
fn remaining(&self) -> usize {
match &self.0 {
UnescapedBufInner::Literal(b) => b.remaining(),
UnescapedBufInner::Expanded(e) => e.remaining(),
}
}
#[inline]
fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
match &mut self.0 {
UnescapedBufInner::Literal(b) => b.try_copy_to_slice(dst),
UnescapedBufInner::Expanded(e) => e.try_copy_to_slice(dst),
}
}
}
#[cfg(feature = "num")]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum NumError {
Format,
Range,
}
pub trait Content: fmt::Debug {
type Literal<'a>: IntoBuf
where
Self: 'a;
fn literal<'a>(&'a self) -> Self::Literal<'a>;
fn literal_len(&self) -> usize {
self.literal().into_buf().remaining()
}
fn is_escaped(&self) -> bool;
fn unescaped<'a>(&'a self) -> Unescaped<Self::Literal<'a>>;
#[inline(always)]
fn unescaped_cmp(&self, other: &str) -> Ordering {
unescaped_cmp(self.literal(), other)
}
#[cfg(feature = "num")]
#[inline]
fn parse_i64(&self) -> Result<i64, NumError> {
parse_i64(self.literal())
}
#[cfg(feature = "num")]
#[inline]
fn parse_u64(&self) -> Result<u64, NumError> {
parse_u64(self.literal())
}
#[cfg(feature = "num_ext")]
#[inline]
fn parse_i128(&self) -> Result<i128, NumError> {
parse_i128(self.literal())
}
#[cfg(feature = "num")]
#[inline]
fn parse_f64(&self) -> Result<f64, NumError> {
parse_f64(self.literal())
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Expect {
Boundary,
Char(char),
Digit,
DigitDotExpOrBoundary,
DigitExpOrBoundary,
DigitOrBoundary,
DigitOrExpSign,
DotExpOrBoundary,
EscChar,
StrChar,
TokenStartChar,
UnicodeEscHexDigit,
}
impl fmt::Display for Expect {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Boundary => f.write_str("boundary character or EOF"),
Self::Char(c) => write!(f, "character '{c}'"),
Self::Digit => f.write_str("digit character '0'..'9'"),
Self::DigitDotExpOrBoundary => f.write_str("digit character '0'..'9', boundary character, or EOF"),
Self::DigitExpOrBoundary => f.write_str("digit character '0'..'9', decimal point character '.', exponent character 'E' or 'e', boundary character, or EOF"),
Self::DigitOrBoundary => f.write_str("digit character '0'..'9', boundary character, or EOF"),
Self::DigitOrExpSign => f.write_str("exponent sign character '+' or '-', or exponent digit character '0'..'9'"),
Self::DotExpOrBoundary => f.write_str("decimal point character '.', 'exponent character 'E' or 'e', boundary character, or EOF"),
Self::EscChar => f.write_str("escape sequence character '\\', '\"', '/', 'r', 'n', 't', or 'u'"),
Self::StrChar => f.write_str("string character"),
Self::TokenStartChar => f.write_str("token start character"),
Self::UnicodeEscHexDigit => f.write_str("Unicode escape sequence hex digit '0'..'9', 'A'..'F', or 'a'..'f'"),
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum ErrorKind {
BadSurrogate {
first: u16,
second: Option<u16>,
},
BadUtf8ContByte {
seq_len: u8,
offset: u8,
value: u8,
},
Read,
UnexpectedByte {
token: Option<Token>,
expect: Expect,
actual: u8,
},
UnexpectedEof(Token),
}
impl ErrorKind {
pub(crate) fn bad_surrogate(first: u16, second: Option<u16>) -> ErrorKind {
ErrorKind::BadSurrogate { first, second }
}
pub(crate) fn bad_utf8_cont_byte(seq_len: u8, offset: u8, value: u8) -> ErrorKind {
ErrorKind::BadUtf8ContByte {
seq_len,
offset,
value,
}
}
pub(crate) fn expect_boundary(token: Token, actual: u8) -> ErrorKind {
let expect = Expect::Boundary;
ErrorKind::UnexpectedByte {
token: Some(token),
expect,
actual,
}
}
pub(crate) fn expect_char(token: Token, actual: u8, expect: char) -> ErrorKind {
let expect = Expect::Char(expect);
ErrorKind::UnexpectedByte {
token: Some(token),
expect,
actual,
}
}
pub(crate) fn expect_digit(actual: u8) -> ErrorKind {
let expect = Expect::Digit;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_digit_dot_exp_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DigitDotExpOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_digit_exp_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DigitExpOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_digit_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DigitOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_dot_exp_or_boundary(actual: u8) -> ErrorKind {
let expect = Expect::DotExpOrBoundary;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_esc_char(actual: u8) -> ErrorKind {
let expect = Expect::EscChar;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn expect_exp_sign_or_digit(actual: u8) -> ErrorKind {
let expect = Expect::DigitOrExpSign;
ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect,
actual,
}
}
pub(crate) fn expect_str_char(actual: u8) -> ErrorKind {
let expect = Expect::StrChar;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn expect_token_start_char(actual: u8) -> ErrorKind {
let expect = Expect::TokenStartChar;
ErrorKind::UnexpectedByte {
token: None,
expect,
actual,
}
}
pub(crate) fn expect_unicode_esc_hex_digit(actual: u8) -> ErrorKind {
let expect = Expect::UnicodeEscHexDigit;
ErrorKind::UnexpectedByte {
token: Some(Token::Str),
expect,
actual,
}
}
pub(crate) fn fmt_at(&self, f: &mut fmt::Formatter, pos: Option<&Pos>) -> fmt::Result {
match self {
Self::BadSurrogate {
first: lo,
second: None,
} if (0xdc00..=0xdfff).contains(lo) => {
write!(
f,
"bad Unicode escape sequence: low surrogate '\\u{lo:04X}' does not follow a high surrogate"
)?;
}
Self::BadSurrogate {
first: hi,
second: None,
} => {
write!(
f,
"bad Unicode escape sequence: high surrogate '\\u{hi:04X}' not followed by low surrogate"
)?;
}
Self::BadSurrogate {
first: hi,
second: Some(lo),
} => {
write!(
f,
"bad Unicode escape sequence surrogate pair: high surrogate '\\u{hi:04X}' followed by invalid low surrogate '\\u{lo:04X}'"
)?;
}
Self::BadUtf8ContByte {
seq_len,
offset,
value,
} => {
write!(
f,
"bad UTF-8 continuation byte 0x{value:02x} in {seq_len}-byte UTF-8 sequence (byte #{offset})"
)?;
}
Self::Read => write!(f, "read error")?,
Self::UnexpectedByte {
token,
expect,
actual,
} if (b' '..=0x7e).contains(actual) => {
write!(
f,
"expected {expect} but got character '{}' (ASCII 0x{actual:02x})",
*actual as char
)?;
if let Some(t) = token {
write!(f, " in {t} token")?;
}
}
Self::UnexpectedByte {
token,
expect,
actual,
} => {
write!(f, "expected {expect} but got byte {actual:02x}")?;
if let Some(t) = token {
write!(f, " in {t} token")?;
}
}
Self::UnexpectedEof(token) => {
write!(f, "unexpected EOF in {token} token")?;
}
};
if let Some(p) = pos {
write!(f, " at {}", *p)?;
}
Ok(())
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.fmt_at(f, None)
}
}
pub trait Error: core::error::Error + Send + Sync {
fn kind(&self) -> ErrorKind;
fn pos(&self) -> &Pos;
}
pub trait Analyzer {
type Content: Content;
type Error: Error;
fn next(&mut self) -> Token;
#[inline]
fn content(&self) -> Self::Content {
self.try_content().unwrap()
}
#[inline]
fn err(&self) -> Self::Error {
self.try_content().unwrap_err()
}
fn pos(&self) -> &Pos;
fn try_content(&self) -> Result<Self::Content, Self::Error>;
}
#[inline(always)]
fn hex2u32(b: u8) -> u32 {
if !b.is_ascii_hexdigit() {
panic!("byte 0x{b:02x?} is not an ASCII hex digit")
}
((b & 0xF) + (b >> 6) * 9) as u32
}
pub fn unescape(literal: impl IntoBuf, dst: &mut impl Sink) {
let mut literal = literal.into_buf();
if !literal.has_remaining() {
return;
}
dst.reserve(literal.remaining() - 1);
#[derive(Default)]
struct Esc {
len: u32, hi: u32, lo: u32, }
let mut esc: Option<Esc> = None;
loop {
let chunk = literal.chunk();
let (mut i, mut j) = (0usize, 0usize);
loop {
let b = chunk[j];
match &mut esc {
None if b != b'\\' => j += 1,
None => {
dst.extend_from_slice(&chunk[i..j]);
esc = Some(Esc::default());
j += 1;
i = j;
}
Some(e) if e.len == 0 => {
let mut single = |b: u8, esc: &mut Option<Esc>| {
dst.push(b);
*esc = None;
j += 1;
i = j;
};
match b {
b'"' | b'\\' | b'/' => single(b, &mut esc),
b'b' => single(b'\x08', &mut esc),
b't' => single(b'\t', &mut esc),
b'f' => single(b'\x0c', &mut esc),
b'n' => single(b'\n', &mut esc),
b'r' => single(b'\r', &mut esc),
b'u' => {
e.len = 1;
j += 1;
i = j;
}
_ => panic!(r#"invalid escape sequence byte after '\': 0x{b:02x}"#),
}
}
Some(e) if (1..=4).contains(&e.len) => {
let shift = 4 * (4 - e.len);
e.hi |= hex2u32(b) << shift;
e.len += 1;
if e.len == 5 {
match e.hi {
0xd800..=0xdbff => (),
0xdc00..=0xdfff => panic!(
"Unicode escape low surrogate without preceding high surrogate: 0x{:02x}",
e.hi
),
_ => {
append_code_point(e.hi, dst);
esc = None;
}
}
}
j += 1;
i = j;
}
Some(e) if e.len == 5 && b == b'\\' => {
e.len = 6;
j += 1;
i = j;
}
Some(e) if e.len == 5 => panic!(
r#"expected '\' to start low surrogate Unicode escape after high surrogate 0x{:04x}, found byte 0x{b:02x}"#,
e.hi
),
Some(e) if e.len == 6 && b == b'u' => {
e.len = 7;
j += 1;
i = j;
}
Some(e) if e.len == 6 => panic!(
r#"expected '\u' to start low surrogate Unicode escape after high surrogate 0x{:04x}, found '\' followed by byte {b:02x}"#,
e.hi
),
Some(e) if (7..=10).contains(&e.len) => {
let shift = 4 * (10 - e.len);
e.lo |= hex2u32(b) << shift;
e.len += 1;
if e.len == 11 {
match e.lo {
0xdc00..=0xdfff => {
let code_point =
0x10000 + (((e.hi - 0xd800) << 10) | (e.lo - 0xdc00));
append_code_point(code_point, dst);
esc = None;
}
_ => {
panic!(
"Unicode escape high surrogate not followed by low surrogate: 0x{:04x} and then 0x{:04x}",
e.hi, e.lo
)
}
}
}
j += 1;
i = j;
}
_ => unreachable!(),
}
if j == chunk.len() {
break;
}
}
dst.extend_from_slice(&chunk[i..j]);
literal.advance(chunk.len());
if !literal.has_remaining() {
break;
}
}
if esc.is_some() {
panic!("unexpected end of input within Unicode escape sequence");
}
}
pub fn unescaped_cmp(literal: impl IntoBuf, other: &str) -> Ordering {
let mut literal = literal.into_buf();
let mut a = literal.chunk();
let mut b = other.as_bytes();
loop {
match a.iter().position(|&b| b == b'\\') {
None => {
let n = a.len();
let o = a.cmp(&b[..n.min(b.len())]);
if o != Ordering::Equal {
return o;
}
literal.advance(n);
a = literal.chunk();
b = &b[n..];
}
Some(j) => {
let o = a[..j].cmp(&b[..j.min(b.len())]);
if o != Ordering::Equal {
return o;
} else if j >= b.len() {
return Ordering::Greater;
}
let i = if j + 1 < a.len() {
j + 1
} else {
literal.advance(j + 1);
a = literal.chunk();
if a.is_empty() {
panic!("unterminated escape sequence");
}
0
};
let y = b[j];
match unescape_byte(a[i]) {
UnescapeByte::Byte(x) if x != y => return x.cmp(&y),
UnescapeByte::Byte(_) => {
literal.advance(i + 1);
a = literal.chunk();
b = &b[j + 1..];
}
UnescapeByte::Unicode => {
literal.advance(i + 1);
let (x, n) = unescape_unicode(&mut literal);
let n = n as usize;
let o = x[..n].cmp(&b[j..j + n.min(b.len() - j)]);
if o != Ordering::Equal {
return o;
}
a = literal.chunk();
b = &b[j + n..];
}
}
}
}
if a.is_empty() {
return 0.cmp(&b.len());
}
}
}
fn append_code_point(code_point: u32, dst: &mut impl Sink) {
match char::from_u32(code_point) {
Some(c) => {
let mut seq = [0u8; 4];
let utf8_str = c.encode_utf8(&mut seq);
dst.extend_from_slice(utf8_str.as_bytes());
}
None => unreachable!(),
}
}
#[derive(Debug)]
pub(crate) enum UnescapeByte {
Byte(u8),
Unicode,
}
#[inline]
pub(crate) fn unescape_byte(b: u8) -> UnescapeByte {
match b {
b'"' | b'\\' | b'/' => UnescapeByte::Byte(b),
b'b' => UnescapeByte::Byte(b'\x08'),
b't' => UnescapeByte::Byte(b'\t'),
b'f' => UnescapeByte::Byte(b'\x0c'),
b'n' => UnescapeByte::Byte(b'\n'),
b'r' => UnescapeByte::Byte(b'\r'),
b'u' => UnescapeByte::Unicode,
_ => panic!("invalid escape sequence: byte 0x{b:02x} cannot follow '\\'"),
}
}
pub(crate) fn unescape_unicode(buf: &mut impl Buf) -> ([u8; 4], u32) {
let mut digits = [0u8; 4];
if buf.remaining() < 4 {
panic!(
"at least 4 hex digits are required to complete Unicode escape sequence, but only {} bytes remain",
buf.remaining()
);
}
buf.copy_to_slice(&mut digits);
let mut code_point = digits.iter().fold(0u32, |acc, &b| acc << 4 | hex2u32(b));
if code_point & 0xfc00 == 0xd800 {
if buf.remaining() < 6 {
panic!(
r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\u{}", but only {} bytes remain"#,
str::from_utf8(&digits).unwrap(),
buf.remaining(),
)
}
let mut second = [0u8; 6];
buf.copy_to_slice(&mut second);
if second[0] != b'\\' || second[1] != b'u' {
panic!(
r#"low surrogate Unicode escape sequence must start with "\u", but {second:02x?} does not"#
);
}
let lo = second
.iter()
.skip(2)
.take(4)
.fold(0u32, |acc, &b| acc << 4 | hex2u32(b));
if !(0xdc00..=0xdfff).contains(&lo) {
panic!(
r#"high surrogate \u{} followed by invalid low surrogate \u{}"#,
str::from_utf8(&digits).unwrap(),
str::from_utf8(&second[2..]).unwrap()
);
}
code_point = ((code_point - 0xd800) << 10 | (lo - 0xdc00)) + 0x10000;
}
if let Some(c) = char::from_u32(code_point) {
let mut buf = [0u8; 4];
let s = c.encode_utf8(&mut buf);
let n = s.len() as u32;
(buf, n)
} else {
panic!("invalid Unicode escape sequence(s) produced invalid code point 0x{code_point:04x}");
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
use std::collections::{BTreeMap, HashMap};
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, true)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_eof(#[case] token: Token, #[case] is_eof: bool) {
assert_eq!(is_eof, token.is_eof());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, true)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_err(#[case] token: Token, #[case] is_err: bool) {
assert_eq!(is_err, token.is_err());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, true)]
#[case(Token::LitNull, true)]
#[case(Token::LitTrue, true)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_literal(#[case] token: Token, #[case] is_literal: bool) {
assert_eq!(is_literal, token.is_literal());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, true)]
#[case(Token::LitNull, true)]
#[case(Token::LitTrue, true)]
#[case(Token::NameSep, false)]
#[case(Token::Num, true)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, true)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_primitive(#[case] token: Token, #[case] is_primitive: bool) {
assert_eq!(is_primitive, token.is_primitive());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, true)]
#[case(Token::Err, true)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, true)]
fn test_token_is_pseudo(#[case] token: Token, #[case] is_pseudo: bool) {
assert_eq!(is_pseudo, token.is_pseudo());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, true)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, true)]
#[case(Token::White, false)]
fn test_token_is_punct(#[case] token: Token, #[case] is_punct: bool) {
assert_eq!(is_punct, token.is_punct());
}
#[rstest]
#[case(Token::ArrBegin, true)]
#[case(Token::ArrEnd, true)]
#[case(Token::Eof, false)]
#[case(Token::Err, false)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, true)]
#[case(Token::ObjEnd, true)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_struct(#[case] token: Token, #[case] is_struct: bool) {
assert_eq!(is_struct, token.is_struct());
}
#[rstest]
#[case(Token::ArrBegin, false)]
#[case(Token::ArrEnd, false)]
#[case(Token::Eof, true)]
#[case(Token::Err, true)]
#[case(Token::LitFalse, false)]
#[case(Token::LitNull, false)]
#[case(Token::LitTrue, false)]
#[case(Token::NameSep, false)]
#[case(Token::Num, false)]
#[case(Token::ObjBegin, false)]
#[case(Token::ObjEnd, false)]
#[case(Token::Str, false)]
#[case(Token::ValueSep, false)]
#[case(Token::White, false)]
fn test_token_is_terminal(#[case] token: Token, #[case] is_terminal: bool) {
assert_eq!(is_terminal, token.is_terminal());
}
#[rstest]
#[case(Token::ArrBegin, Some("["))]
#[case(Token::ArrEnd, Some("]"))]
#[case(Token::Eof, None)]
#[case(Token::Err, None)]
#[case(Token::LitFalse, Some("false"))]
#[case(Token::LitNull, Some("null"))]
#[case(Token::LitTrue, Some("true"))]
#[case(Token::NameSep, Some(":"))]
#[case(Token::Num, None)]
#[case(Token::ObjBegin, Some("{"))]
#[case(Token::ObjEnd, Some("}"))]
#[case(Token::Str, None)]
#[case(Token::ValueSep, Some(","))]
#[case(Token::White, None)]
fn test_token_static_content(#[case] token: Token, #[case] static_content: Option<&str>) {
assert_eq!(static_content, token.static_content());
}
#[rstest]
#[case(Token::ArrBegin, "[")]
#[case(Token::ArrEnd, "]")]
#[case(Token::Eof, "EOF")]
#[case(Token::Err, "error")]
#[case(Token::LitFalse, "false")]
#[case(Token::LitNull, "null")]
#[case(Token::LitTrue, "true")]
#[case(Token::NameSep, ":")]
#[case(Token::Num, "number")]
#[case(Token::ObjBegin, "{")]
#[case(Token::ObjEnd, "}")]
#[case(Token::Str, "string")]
#[case(Token::ValueSep, ",")]
#[case(Token::White, "whitespace")]
fn test_token_display(#[case] token: Token, #[case] expect: &str) {
assert_eq!(expect, format!("{token}"));
}
#[rstest]
#[case(Unescaped::Literal("foo"), "foo")]
#[case(Unescaped::Expanded("bar".to_string()), "bar")]
fn test_unescaped_str_into_buf(#[case] u: Unescaped<&str>, #[case] expect: &str) {
let mut b = u.into_buf();
assert_eq!(expect.len(), b.remaining());
assert_eq!(expect, str::from_utf8(b.chunk()).unwrap());
if b.remaining() > 0 {
b.advance(1);
assert_eq!(expect.len() - 1, b.remaining());
assert_eq!(&expect[1..], str::from_utf8(b.chunk()).unwrap());
}
let mut v = vec![0; expect.len() - 1];
b.copy_to_slice(&mut v);
assert_eq!(0, b.remaining());
assert_eq!(b"", b.chunk())
}
#[test]
fn test_unescaped_str() {
let a1 = Unescaped::Literal("a");
let b1 = Unescaped::Expanded("bb".to_string());
let a2 = Unescaped::Expanded("a".to_string());
let b2 = Unescaped::Literal("bb");
assert_eq!("a", Into::<String>::into(a1.clone()));
assert_eq!("bb", Into::<String>::into(b1.clone()));
assert_eq!("a", Into::<String>::into(a2.clone()));
assert_eq!("bb", Into::<String>::into(b2.clone()));
assert!(matches!(a1.literal(), Some(&"a")));
assert!(b1.literal().is_none());
assert!(a2.literal().is_none());
assert!(matches!(b2.literal(), Some(&"bb")));
assert!(a1.expanded().is_none());
assert!(matches!(b1.expanded(), Some("bb")));
assert!(matches!(a2.expanded(), Some("a")));
assert!(b2.expanded().is_none());
assert!(a1.is_literal());
assert!(!a1.is_expanded());
assert!(!b1.is_literal());
assert!(b1.is_expanded());
assert_eq!(1, a1.len());
assert_eq!(2, b1.len());
assert_eq!(1, a2.len());
assert_eq!(2, b2.len());
let a3: &str = a1.as_ref();
let b3: &str = b1.as_ref();
let a4: &str = a2.as_ref();
let b4: &str = b2.as_ref();
assert_eq!("a", format!("{a1}"));
assert_eq!("bb", format!("{b1}"));
assert_eq!("a", format!("{a2}"));
assert_eq!("bb", format!("{b2}"));
assert_eq!("a", a3);
assert_eq!("bb", b3);
assert_eq!("a", a4);
assert_eq!("bb", b4);
let x1: &[u8] = a1.as_ref();
let y1: &[u8] = b1.as_ref();
let x2: &[u8] = a2.as_ref();
let y2: &[u8] = b2.as_ref();
assert_eq!(b"a", x1);
assert_eq!(b"bb", y1);
assert_eq!(b"a", x2);
assert_eq!(b"bb", y2);
assert_eq!(a1, a2);
assert_eq!(a2, a1);
assert_eq!(b1, b2);
assert_eq!(b2, b1);
assert_ne!(a1, b1);
assert_ne!(b1, a1);
assert_ne!(a1, b2);
assert_ne!(b1, a2);
assert!(a1 < b1);
assert!(a1 < b2);
assert!(a2 < b1);
assert!(a2 < b2);
assert!(b1 > a1);
assert!(b1 > a2);
assert!(b2 > a1);
assert!(b2 > a2);
assert_eq!("a", a1);
assert_eq!(a1, "a");
assert_eq!("bb", b1);
assert_eq!(b1, "bb");
assert_eq!("a", a2);
assert_eq!(a2, "a");
assert_eq!("bb", b2);
assert_eq!(b2, "bb");
assert_eq!("a".to_string(), a1);
assert_eq!(a1, "a".to_string());
assert_eq!("bb".to_string(), b1);
assert_eq!(b1, "bb".to_string());
assert_eq!("a".to_string(), a2);
assert_eq!(a2, "a".to_string());
assert_eq!("bb".to_string(), b2);
assert_eq!(b2, "bb".to_string());
assert!(a1 < "bb");
assert!("bb" > a1);
assert!(b1 > "a");
assert!("a" < b1);
let mut m1 = HashMap::new();
m1.insert(a1.clone(), "a1");
m1.insert(b1.clone(), "b1");
assert_eq!(Some(&"a1"), m1.get("a"));
assert_eq!(Some(&"a1"), m1.get(&a2));
assert_eq!(Some(&"b1"), m1.get("bb"));
assert_eq!(Some(&"b1"), m1.get(&b2));
assert!(!m1.contains_key("aa"));
let mut m2 = BTreeMap::new();
m2.insert(a1.clone(), "a1");
m2.insert(b1.clone(), "b1");
assert_eq!(Some(&"a1"), m2.get("a"));
assert_eq!(Some(&"a1"), m2.get(&a2));
assert_eq!(Some(&"b1"), m2.get("bb"));
assert_eq!(Some(&"b1"), m2.get(&b2));
assert!(!m2.contains_key("aa"));
assert_eq!(Some("a1"), m2.remove(&a2));
assert_eq!(Some("b1"), m2.remove(&b2));
m2.insert(b2.clone(), "b2");
m2.insert(a2.clone(), "a2");
assert_eq!(Some(&"a2"), m2.get("a"));
assert_eq!(Some(&"a2"), m2.get(&a1));
assert_eq!(Some(&"b2"), m2.get("bb"));
assert_eq!(Some(&"b2"), m2.get(&b1));
assert!(!m2.contains_key("aa"));
assert_eq!(Some("a2"), m2.remove("a"));
assert_eq!(Some("b2"), m2.remove("bb"));
}
#[rstest]
#[case(ErrorKind::BadSurrogate {
first: 0xD800,
second: None,
}, "bad Unicode escape sequence: high surrogate '\\uD800' not followed by low surrogate")]
#[case(ErrorKind::BadUtf8ContByte {
seq_len: 3,
offset: 2,
value: 0x20,
}, "bad UTF-8 continuation byte 0x20 in 3-byte UTF-8 sequence (byte #2)")]
#[case(ErrorKind::Read, "read error")]
#[case(ErrorKind::UnexpectedByte {
token: Some(Token::Num),
expect: Expect::Digit,
actual: 0x41,
}, "expected digit character '0'..'9' but got character 'A' (ASCII 0x41) in number token")]
#[case(ErrorKind::UnexpectedEof(Token::Str), "unexpected EOF in string token")]
fn test_error_kind_display(#[case] kind: ErrorKind, #[case] expect: &str) {
assert_eq!(expect, format!("{kind}"));
struct Wrapper(ErrorKind);
impl fmt::Display for Wrapper {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let pos = Pos::default();
self.0.fmt_at(f, Some(&pos))
}
}
assert_eq!(
format!("{expect} at line 1, column 1 (offset: 0)"),
format!("{}", Wrapper(kind))
);
}
#[rstest]
#[case(r#""""#, r#""""#)]
#[case(r#""f""#, r#""f""#)]
#[case(r#""fo""#, r#""fo""#)]
#[case(r#""foo""#, r#""foo""#)]
#[case(r#""\\""#, r#""\""#)]
#[case(r#""\/""#, r#""/""#)]
#[case(r#""\"""#, r#"""""#)]
#[case(r#""\b""#, "\"\x08\"")]
#[case(r#""\t""#, "\"\t\"")]
#[case(r#""\f""#, "\"\x0c\"")]
#[case(r#""\n""#, "\"\n\"")]
#[case(r#""\r""#, "\"\r\"")]
#[case(r#""\u0000""#, "\"\0\"")]
#[case(r#""\u0008""#, "\"\x08\"")]
#[case(r#""\u0009""#, "\"\t\"")]
#[case(r#""\u000c""#, "\"\x0c\"")]
#[case(r#""\u000C""#, "\"\x0C\"")]
#[case(r#""\u000a""#, "\"\n\"")]
#[case(r#""\u000A""#, "\"\n\"")]
#[case(r#""\u000d""#, "\"\r\"")]
#[case(r#""\u000D""#, "\"\r\"")]
#[case(r#""\u000D""#, "\"\r\"")]
#[case(r#""\u0021""#, r#""!""#)]
#[case(r#""\u0030""#, r#""0""#)]
#[case(r#""\u0041""#, r#""A""#)]
#[case(r#""\u0062""#, r#""b""#)]
#[case(r#""\u007F""#, "\"\x7f\"")] #[case(r#""\u00A9""#, r#""©""#)] #[case(r#""\u03A9""#, r#""Ω""#)] #[case(r#""\u0080""#, "\"\u{80}\"")] #[case(r#""\u07FF""#, "\"\u{7ff}\"")] #[case(r#""\u20AC""#, r#""€""#)] #[case(r#""\u2603""#, r#""☃""#)] #[case(r#""\u0800""#, "\"\u{800}\"")] #[case(r#""\uFFFF""#, "\"\u{ffff}\"")] #[case(r#""\ud83D\uDe00""#, r#""😀""#)] #[case(r#""\ud800\uDC00""#, "\"\u{10000}\"")] #[case(r#""\uDBFF\udfff""#, "\"\u{10FFFF}\"")] fn test_unescape_ok(#[case] input: &str, #[case] expect: &str) {
{
let mut buf = Vec::new();
unescape(input, &mut buf);
let actual = String::from_utf8(buf).unwrap();
assert_eq!(actual, expect);
}
{
let mut buf = Vec::new();
buf.extend_from_slice(b"foo");
unescape(input, &mut buf);
let actual = String::from_utf8(buf).unwrap();
assert_eq!(actual, format!("foo{expect}"));
}
}
#[rstest]
#[case(r#""\a""#)]
#[case(r#""\U""#)]
#[case(r#""\:""#)]
#[should_panic(expected = "invalid escape sequence byte after '\\'")]
fn test_unescape_panic_invalid_esc_seq_byte(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800\u0000"#)]
#[case(r#"\ud800\ud7ff"#)]
#[case(r#"\ud800\ud800"#)]
#[case(r#"\ud800\ue000"#)]
#[case(r#"\ud800\uffff"#)]
#[case(r#"\udbff\u0000"#)]
#[case(r#"\udbff\ud7ff"#)]
#[case(r#"\udbff\ud800"#)]
#[case(r#"\udbff\ue000"#)]
#[case(r#"\udbff\uffff"#)]
#[should_panic(expected = "Unicode escape high surrogate not followed by low surrogate")]
fn test_unescape_panic_low_surrogate_no_high(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#""\ud800\u0000""#)]
#[case(r#""\uDBFF\ud800""#)]
#[should_panic(expected = "Unicode escape high surrogate not followed by low surrogate")]
fn test_unescape_panic_high_surrogate_no_low(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800 "#)]
#[case(r#"\udbff "#)]
#[should_panic(
expected = r#"expected '\' to start low surrogate Unicode escape after high surrogate"#
)]
fn test_unescape_panic_high_surrogate_no_backslash(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\ud800\n"#)]
#[case(r#"\udbff\a"#)]
#[should_panic(
expected = r#"expected '\u' to start low surrogate Unicode escape after high surrogate"#
)]
fn test_unescape_panic_high_surrogate_no_backslash_u(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case(r#"\"#)]
#[case(r#"\u"#)]
#[case(r#"\u0"#)]
#[case(r#"\u00"#)]
#[case(r#"\u000"#)]
#[case(r#"\u0000\"#)]
#[case(r#"\u0000\u"#)]
#[case(r#"\u0000\u1"#)]
#[case(r#"\u0000\u11"#)]
#[case(r#"\u0000\u111"#)]
#[case(r#"\ud800\u111"#)]
#[case(r#"\udbff\u111"#)]
#[should_panic(expected = "unexpected end of input within Unicode escape sequence")]
fn test_unescape_panic_unexpected_eof(#[case] literal: &str) {
let mut buf = Vec::new();
unescape(literal, &mut buf);
}
#[rstest]
#[case::arr_begin("[")]
#[case::arr_end("]")]
#[case::empty("")]
#[case::lit_false("false")]
#[case::lit_false("null")]
#[case::lit_false("true")]
#[case::name_sep(":")]
#[case::num_0("1")]
#[case::num_1("1")]
#[case::num_pi("3.14159")]
#[case::num_minus_1("-1")]
#[case::num_big1("1234567890.1234567890")]
#[case::num_big2("1e100")]
#[case::num_big3("10.000e+99")]
#[case::str_empty(r#""""#)]
#[case::str_a(r#""a""#)]
#[case::str_utf8_2_bytes(r#""è""#)]
#[case::str_utf8_3_bytes(r#""€""#)]
#[case::str_utf8_4_bytes(r#""🇮🇹""#)]
#[case::str_utf8_all(r#""🇮🇹 Questo caffè costa 3€. ☕""#)]
#[case::obj_begin("{")]
#[case::obj_end("}")]
#[case::value_sep(",")]
#[case::white1(" ")]
#[case::white2("\t")]
#[case::white3("\r")]
#[case::white4("\n")]
#[case::white5(" \t\r\n \n\t\t\r \r\n\r")]
fn test_unescaped_cmp_identity(#[case] input: &str) {
assert_eq!(Ordering::Equal, unescaped_cmp(input, input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Ordering::Equal, unescaped_cmp(chunked_str, input));
let content = chunked_str.into_content(false);
assert_eq!(Ordering::Equal, content.unescaped_cmp(input));
}
}
#[rstest]
#[case::empty_str_empty("", r#""""#, Ordering::Less)]
#[case::empty_num("", "1", Ordering::Less)]
#[case::empty_white("", " ", Ordering::Less)]
#[case::num_empty("0", "", Ordering::Greater)]
#[case::str_a_str_b(r#""a""#, r#""b""#, Ordering::Less)]
#[case::str_a_str_empty(r#""a""#, r#""""#, Ordering::Greater)]
#[case::str_b_str_a(r#""b""#, r#""a""#, Ordering::Greater)]
#[case::str_bar_str_bard(r#""bar""#, r#""bard""#, Ordering::Less)]
#[case::str_bar_str_bark(r#""bar""#, r#""bark""#, Ordering::Less)]
#[case::str_bard_str_bar(r#""bard""#, r#""bar""#, Ordering::Greater)]
#[case::str_bard_str_bark(r#""bard""#, r#""bark""#, Ordering::Less)]
#[case::str_bark_str_bar(r#""bark""#, r#""bar""#, Ordering::Greater)]
#[case::str_bark_str_bard(r#""bark""#, r#""bard""#, Ordering::Greater)]
#[case::str_empty_empty(r#""""#, "", Ordering::Greater)]
#[case::str_empty_str_a(r#""""#, r#""a""#, Ordering::Less)]
#[case::str_esc_quot_str_quot(r#""\"""#, "\"\"\"", Ordering::Equal)]
#[case::str_esc_bsol_str_bsol(r#""\\""#, "\"\\\"", Ordering::Equal)]
#[case::str_esc_sol_str_sol(r#""\/""#, "\"/\"", Ordering::Equal)]
#[case::str_esc_bs_str_bs(r#""\b""#, "\"\x08\"", Ordering::Equal)]
#[case::str_esc_ff_str_ff(r#""\f""#, "\"\x0c\"", Ordering::Equal)]
#[case::str_esc_nl_str_nl(r#""\n""#, "\"\n\"", Ordering::Equal)]
#[case::str_esc_nl_str_cr(r#""\n""#, "\"\r\"", Ordering::Less)]
#[case::str_esc_cr_str_cr(r#""\r""#, "\"\r\"", Ordering::Equal)]
#[case::str_esc_cr_str_nl(r#""\r""#, "\"\n\"", Ordering::Greater)]
#[case::str_esc_tab_str_tab(r#""\t""#, "\"\t\"", Ordering::Equal)]
#[case::str_esc_u_0008_str_bs(r#""\u0008""#, "\"\x08\"", Ordering::Equal)]
#[case::str_esc_u_0009_str_tab(r#""\u0009""#, "\"\t\"", Ordering::Equal)]
#[case::str_esc_u_000a_str_nl(r#""\u000a""#, "\"\n\"", Ordering::Equal)]
#[case::str_esc_u_000c_str_ff(r#""\u000c""#, "\"\x0c\"", Ordering::Equal)]
#[case::str_esc_u_000d_str_cr(r#""\u000d""#, "\"\r\"", Ordering::Equal)]
#[case::str_esc_u_0022_str_quot(r#""\u0022""#, "\"\"\"", Ordering::Equal)]
#[case::str_esc_u_002f_str_sol(r#""\u002f""#, "\"/\"", Ordering::Equal)]
#[case::str_esc_u_005c_str_bsol(r#""\u005c""#, "\"\\\"", Ordering::Equal)]
#[case::str_esc_u_0062_str_a(r#""\u0063""#, r#""a""#, Ordering::Greater)]
#[case::str_esc_u_00e8_str_e_grave(r#""\u00e8""#, "\"\u{00e8}\"", Ordering::Equal)]
#[case::str_esc_u_20ac_str_euro(r#""\u20ac""#, "\"\u{20ac}\"", Ordering::Equal)]
#[case::str_esc_u_surrogate_str_globe(r#""\ud83c\udf0d""#, "\"\u{1f30d}\"", Ordering::Equal)]
#[case::str_esc_at_end_of_longer_str(r#""abcde\n""#, r#""abc""#, Ordering::Greater)]
#[case::str_esc_consecutive(r#""\n\t""#, "\"\n\t\"", Ordering::Equal)]
#[case::str_where_other_terminates_early(r#""\n""#, "\"", Ordering::Greater)]
#[case::white_empty(" ", "", Ordering::Greater)]
#[case::smoke_1(
r#""Path: C:\\Users\\alice\/docs""#,
"\"Path: C:\\Users\\alice/docs\"",
Ordering::Equal
)]
#[case::smoke_2(
r#""At the caff\u00e8 counter, he paid the 10,50\u00a0\u20ac and\nquickly downed the cappucino. Then,\r\n\twith a quick\"ciao\u0022, he turned and walked out.""#,
"\"At the caffè counter, he paid the 10,50\u{00a0}€ and\nquickly downed the cappucino. Then,\r\n\twith a quick\"ciao\", he turned and walked out.\"",
Ordering::Equal,
)]
#[case::smoke_3(
r#""At the caff\u00e8 counter, he paid the 10,50\u00a0\u20ac and\nquickly downed the cappucino. Then,\r\n\twith a quick\"ciao\u0022, he turned and ran out.""#,
"\"At the caffè counter, he paid the 10,50\u{00a0}€ and\nquickly downed the cappucino. Then,\r\n\twith a quick\"ciao\", he turned and walked out.\"",
Ordering::Less,
)]
fn test_unescaped_cmp_ok(#[case] a: &str, #[case] b: &str, #[case] expect: Ordering) {
assert_eq!(expect, unescaped_cmp(a, b));
for chunked_str in ChunkedStr::chunkify(a) {
assert_eq!(expect, unescaped_cmp(chunked_str, b));
let content = chunked_str.into_content(false);
assert_eq!(expect, content.unescaped_cmp(b));
}
}
#[rstest]
#[case::unterminated_1(r#"\"#, r#"\"#, "unterminated escape sequence")]
#[case::unterminated_2(r#""\"#, r#""\"#, "unterminated escape sequence")]
#[case::unterminated_3(r#""hello\"#, r#""hello\"#, "unterminated escape sequence")]
#[case::invalid_single_byte_1(
r#"\0"#,
r#"a"#,
r#"invalid escape sequence: byte 0x30 cannot follow '\'"#
)]
#[case::invalid_single_byte_2(
r#""\a""#,
r#""a"#,
r#"invalid escape sequence: byte 0x61 cannot follow '\'"#
)]
#[case::invalid_unicode_need_4(
r#""\u"#,
r#""a"#,
"at least 4 hex digits are required to complete Unicode escape sequence, but only 0 bytes remain"
)]
#[case::invalid_unicode_need_3(
r#""\u0"#,
r#""a"#,
"at least 4 hex digits are required to complete Unicode escape sequence, but only 1 bytes remain"
)]
#[case::invalid_unicode_need_2(
r#""\u00"#,
r#""a"#,
"at least 4 hex digits are required to complete Unicode escape sequence, but only 2 bytes remain"
)]
#[case::invalid_unicode_need_1(
r#""\u000"#,
r#""a"#,
"at least 4 hex digits are required to complete Unicode escape sequence, but only 3 bytes remain"
)]
#[case::invalid_unicode_not_hex_1(r#""\uG000"#, r#""a"#, "byte 0x47 is not an ASCII hex digit")]
#[case::invalid_unicode_not_hex_2(r#""\u0H00"#, r#""a"#, "byte 0x48 is not an ASCII hex digit")]
#[case::invalid_unicode_not_hex_3(r#""\u00I0"#, r#""a"#, "byte 0x49 is not an ASCII hex digit")]
#[case::invalid_unicode_not_hex_4(r#""\u000J"#, r#""a"#, "byte 0x4a is not an ASCII hex digit")]
#[case::lo_surrogate_incomplete_1(r#"\ud800"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\ud800", but only 0 bytes remain"#)]
#[case::lo_surrogate_incomplete_2(r#"\uD801x"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\uD801", but only 1 bytes remain"#)]
#[case::lo_surrogate_incomplete_3(r#"\uD802xx"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\uD802", but only 2 bytes remain"#)]
#[case::lo_surrogate_incomplete_4(r#"\uD803xxx"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\uD803", but only 3 bytes remain"#)]
#[case::lo_surrogate_incomplete_5(r#"\udbfdxxxx"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\udbfd", but only 4 bytes remain"#)]
#[case::lo_surrogate_incomplete_6(r#"\uDBFExxxxx"#, "a", r#"at least 6 bytes are required for Unicode low surrogate escape sequence that follows "\uDBFE", but only 5 bytes remain"#)]
#[case::lo_surrogate_no_leading_bsol_u_1(
r#"\udbFFxxxxxx"#,
"a",
r#"low surrogate Unicode escape sequence must start with "\u""#
)]
#[case::lo_surrogate_no_leading_bsol_u_2(
r#"\ud800\Uxxxx"#,
"a",
r#"low surrogate Unicode escape sequence must start with "\u""#
)]
#[case::lo_surrogate_no_leading_bsol_u_2(
r#"\ud800\Uxxxx"#,
"a",
r#"low surrogate Unicode escape sequence must start with "\u""#
)]
#[case::lo_surrogate_not_hex_1(
r#""\ud800\uK000""#,
r#""a""#,
"byte 0x4b is not an ASCII hex digit"
)]
#[case::lo_surrogate_not_hex_1(
r#""\ud800\u0L00""#,
r#""a""#,
"byte 0x4c is not an ASCII hex digit"
)]
#[case::lo_surrogate_not_hex_1(
r#""\ud800\u00M0""#,
r#""a""#,
"byte 0x4d is not an ASCII hex digit"
)]
#[case::lo_surrogate_not_hex_1(
r#""\ud800\u000N""#,
r#""a""#,
"byte 0x4e is not an ASCII hex digit"
)]
#[case::lo_surrogate_alone(
r#""\udc00""#,
r#""a""#,
"invalid Unicode escape sequence(s) produced invalid code point 0xdc00"
)]
#[case::invalid_surrogate_pair(
r#""\ud800\u0041""#,
r#""a""#,
r#"high surrogate \ud800 followed by invalid low surrogate \u0041"#
)]
fn test_unescaped_cmp_panic(#[case] a: &str, #[case] b: &str, #[case] expect: &str) {
assert_panic(|| unescaped_cmp(a, b), expect);
for chunked_str in ChunkedStr::chunkify(a) {
assert_panic(|| unescaped_cmp(chunked_str, b), expect);
let content = chunked_str.into_content(false);
assert_panic(|| content.unescaped_cmp(b), expect);
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::zero("0", 0)]
#[case::minus_zero("-0", 0)]
#[case::one("1", 1)]
#[case::minus_one("-1", -1)]
#[case::forty_two("42", 42)]
#[case::minus_forty_two("-42", -42)]
#[case::all_digits("9876543210", 9876543210)]
#[case::max_minus_1(format!("{}", i64::MAX-1), i64::MAX-1)]
#[case::max(format!("{}", i64::MAX), i64::MAX)]
#[case::min_plus_1(format!("{}", i64::MIN+1), i64::MIN+1)]
#[case::min(format!("{}", i64::MIN), i64::MIN)]
fn test_parse_i64_ok(#[case] input: impl AsRef<str>, #[case] expect: i64) {
let input = input.as_ref();
assert_eq!(Ok(expect), parse_i64(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Ok(expect), parse_i64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Ok(expect), content.parse_i64());
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::empty("", NumError::Format)]
#[case::token_arr_begin("[", NumError::Format)]
#[case::token_arr_end("]", NumError::Format)]
#[case::token_lit_false("false", NumError::Format)]
#[case::token_lit_null("null", NumError::Format)]
#[case::token_lit_true("true", NumError::Format)]
#[case::token_name_sep(":", NumError::Format)]
#[case::token_obj_begin("{", NumError::Format)]
#[case::token_obj_end("}", NumError::Format)]
#[case::token_str_empty(r#""""#, NumError::Format)]
#[case::token_str_one(r#""1""#, NumError::Format)]
#[case::token_value_sep(",", NumError::Format)]
#[case::token_white(" ", NumError::Format)]
#[case::space_prefix(" 1", NumError::Format)]
#[case::space_suffix("1 ", NumError::Format)]
#[case::decimal_zero("10.0", NumError::Format)]
#[case::decimal("3.14159", NumError::Format)]
#[case::exponent_zero("0e0", NumError::Format)]
#[case::exponent_positive("10E+1", NumError::Format)]
#[case::exponent_negative("1E-98", NumError::Format)]
#[case::range_i64_min_minus_1(format!("{}", i64::MIN as i128 - 1), NumError::Range)]
#[case::range_i128_min(format!("{}", i128::MIN), NumError::Range)]
#[case::range_i64_max_plus_1(format!("{}", i64::MAX as i128 + 1), NumError::Range)]
#[case::range_i128_max(format!("{}", i128::MAX), NumError::Range)]
fn test_parse_i64_err(#[case] input: impl AsRef<str>, #[case] expect: NumError) {
let input = input.as_ref();
assert_eq!(Err(expect), parse_i64(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Err(expect), parse_i64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Err(expect), content.parse_i64());
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::zero("0", 0)]
#[case::one("1", 1)]
#[case::forty_two("42", 42)]
#[case::all_digits("9876543210", 9876543210)]
#[case::max_minus_1(format!("{}", u64::MAX-1), u64::MAX-1)]
#[case::max(format!("{}", u64::MAX), u64::MAX)]
fn test_parse_u64_ok(#[case] input: impl AsRef<str>, #[case] expect: u64) {
let input = input.as_ref();
assert_eq!(Ok(expect), parse_u64(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Ok(expect), parse_u64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Ok(expect), content.parse_u64());
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::empty("", NumError::Format)]
#[case::token_arr_begin("[", NumError::Format)]
#[case::token_arr_end("]", NumError::Format)]
#[case::token_lit_false("false", NumError::Format)]
#[case::token_lit_null("null", NumError::Format)]
#[case::token_lit_true("true", NumError::Format)]
#[case::token_name_sep(":", NumError::Format)]
#[case::token_obj_begin("{", NumError::Format)]
#[case::token_obj_end("}", NumError::Format)]
#[case::token_str_empty(r#""""#, NumError::Format)]
#[case::token_str_one(r#""1""#, NumError::Format)]
#[case::token_value_sep(",", NumError::Format)]
#[case::token_white(" ", NumError::Format)]
#[case::space_prefix(" 1", NumError::Format)]
#[case::space_suffix("1 ", NumError::Format)]
#[case::minus_zero("-0", NumError::Format)]
#[case::minus_one("-1", NumError::Format)]
#[case::minus_forty_two("-42", NumError::Format)]
#[case::decimal_zero("10.0", NumError::Format)]
#[case::decimal("3.14159", NumError::Format)]
#[case::exponent_zero("0e0", NumError::Format)]
#[case::exponent_positive("10E+1", NumError::Format)]
#[case::exponent_negative("1E-98", NumError::Format)]
#[case::range_u64_max_plus_1(format!("{}", u64::MAX as i128 + 1), NumError::Range)]
#[case::range_i128_max(format!("{}", i128::MAX), NumError::Range)]
fn test_parse_u64_err(#[case] input: impl AsRef<str>, #[case] expect: NumError) {
let input = input.as_ref();
assert_eq!(Err(expect), parse_u64(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Err(expect), parse_u64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Err(expect), content.parse_u64());
}
}
#[cfg(feature = "num_ext")]
#[rstest]
#[case::zero("0", 0)]
#[case::minus_zero("-0", 0)]
#[case::one("1", 1)]
#[case::minus_one("-1", -1)]
#[case::forty_two("42", 42)]
#[case::minus_forty_two("-42", -42)]
#[case::all_digits("9876543210", 9876543210)]
#[case::i64_max(format!("{}", i64::MAX), i64::MAX as i128)]
#[case::i64_min(format!("{}", i64::MIN), i64::MIN as i128)]
#[case::beyond_i64_max(format!("{}", i64::MAX as i128 + 1), i64::MAX as i128 + 1)]
#[case::beyond_i64_min(format!("{}", i64::MIN as i128 - 1), i64::MIN as i128 - 1)]
#[case::pow10_38(format!("{}", 10_i128.pow(38)), 10_i128.pow(38))]
#[case::neg_pow10_38(format!("{}", -10_i128.pow(38)), -10_i128.pow(38))]
#[case::max_minus_1(format!("{}", i128::MAX - 1), i128::MAX - 1)]
#[case::max(format!("{}", i128::MAX), i128::MAX)]
#[case::min_plus_1(format!("{}", i128::MIN + 1), i128::MIN + 1)]
#[case::min(format!("{}", i128::MIN), i128::MIN)]
fn test_parse_i128_ok(#[case] input: impl AsRef<str>, #[case] expect: i128) {
let input = input.as_ref();
assert_eq!(Ok(expect), parse_i128(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Ok(expect), parse_i128(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Ok(expect), content.parse_i128());
}
}
#[cfg(feature = "num_ext")]
#[rstest]
#[case::empty("", NumError::Format)]
#[case::token_arr_begin("[", NumError::Format)]
#[case::token_arr_end("]", NumError::Format)]
#[case::token_lit_false("false", NumError::Format)]
#[case::token_lit_null("null", NumError::Format)]
#[case::token_lit_true("true", NumError::Format)]
#[case::token_name_sep(":", NumError::Format)]
#[case::token_obj_begin("{", NumError::Format)]
#[case::token_obj_end("}", NumError::Format)]
#[case::token_str_empty(r#""""#, NumError::Format)]
#[case::token_str_one(r#""1""#, NumError::Format)]
#[case::token_value_sep(",", NumError::Format)]
#[case::token_white(" ", NumError::Format)]
#[case::space_prefix(" 1", NumError::Format)]
#[case::space_suffix("1 ", NumError::Format)]
#[case::decimal_zero("10.0", NumError::Format)]
#[case::decimal("3.14159", NumError::Format)]
#[case::exponent_zero("0e0", NumError::Format)]
#[case::exponent_positive("10E+1", NumError::Format)]
#[case::exponent_negative("1E-98", NumError::Format)]
#[case::range_i128_min_minus_1("−170141183460469231731687303715884105729", NumError::Format)]
#[case::range_i128_max_plus_1(format!("{}", i128::MAX as u128 + 1), NumError::Range)]
#[case::range_u128_max(format!("{}", u128::MAX), NumError::Range)]
fn test_parse_i128_err(#[case] input: impl AsRef<str>, #[case] expect: NumError) {
let input = input.as_ref();
assert_eq!(Err(expect), parse_i128(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Err(expect), parse_i128(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Err(expect), content.parse_i128());
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::zero("0", 0.0)]
#[case::minus_zero("-0", 0.0)]
#[case::one("1", 1.0)]
#[case::minus_one("-1", -1.0)]
#[case::forty_two("42", 42.0)]
#[case::pi("3.14159", 3.14159)]
#[case::negative_decimal("-2.5", -2.5)]
#[case::exponent_zero("0e0", 0.0)]
#[case::exponent_positive("1e2", 100.0)]
#[case::exponent_negative("1e-2", 0.01)]
#[case::exponent_upper("1E2", 100.0)]
#[case::exponent_plus_sign("1e+2", 100.0)]
#[case::decimal_and_exponent("1.5e2", 150.0)]
#[case::large_integer("9876543210", 9876543210.0)]
#[case::f64_max(format!("{}", f64::MAX), f64::MAX)]
#[case::f64_min_positive(format!("{}", f64::MIN_POSITIVE), f64::MIN_POSITIVE)]
#[case::subnormal("5e-324", 5e-324)]
fn test_parse_f64_ok(#[case] input: impl AsRef<str>, #[case] expect: f64) {
let input = input.as_ref();
let result = parse_f64(input);
assert_eq!(Ok(expect), result);
if input.len() <= 7 {
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Ok(expect), parse_f64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Ok(expect), content.parse_f64());
}
}
}
#[cfg(feature = "num")]
#[rstest]
#[case::empty("", NumError::Format)]
#[case::token_lit_false("false", NumError::Format)]
#[case::token_lit_null("null", NumError::Format)]
#[case::token_lit_true("true", NumError::Format)]
#[case::token_str_empty(r#""""#, NumError::Format)]
#[case::token_str_one(r#""1""#, NumError::Format)]
#[case::space_prefix(" 1", NumError::Format)]
#[case::space_suffix("1 ", NumError::Format)]
#[case::range_positive_overflow("1e309", NumError::Range)]
#[case::range_negative_overflow("-1e309", NumError::Range)]
fn test_parse_f64_err(#[case] input: impl AsRef<str>, #[case] expect: NumError) {
let input = input.as_ref();
assert_eq!(Err(expect), parse_f64(input));
for chunked_str in ChunkedStr::chunkify(input) {
assert_eq!(Err(expect), parse_f64(chunked_str));
let content = chunked_str.into_content(false);
assert_eq!(Err(expect), content.parse_f64());
}
}
#[derive(Debug)]
struct ChunkedContent<'a> {
lit: ChunkedStr<'a>,
esc: bool,
}
impl<'a> Content for ChunkedContent<'a> {
type Literal<'b>
= ChunkedStr<'a>
where
Self: 'b;
fn literal<'b>(&'b self) -> Self::Literal<'b> {
self.lit
}
fn literal_len(&self) -> usize {
self.lit.buf.len()
}
fn is_escaped(&self) -> bool {
self.esc
}
fn unescaped<'b>(&'b self) -> Unescaped<Self::Literal<'b>> {
if !self.esc {
Unescaped::Literal(self.lit)
} else {
let mut buf = Vec::new();
unescape(self.lit, &mut buf);
let s = unsafe { String::from_utf8_unchecked(buf) };
Unescaped::Expanded(s)
}
}
}
#[derive(Clone, Copy, Debug)]
struct ChunkedStr<'a> {
buf: &'a [u8],
pos: usize,
n: usize,
}
impl<'a> ChunkedStr<'a> {
fn new(s: &'a str, n: usize) -> Self {
Self {
buf: s.as_bytes(),
pos: 0,
n,
}
}
fn chunkify(s: &'a str) -> Vec<Self> {
let mut v = vec![Self::new(s, 1)];
if s.len() >= 2 {
v.push(Self::new(s, 2));
}
if s.len() >= 4 {
v.push(Self::new(s, s.len() - 1));
v.push(Self::new(s, s.len()));
}
v
}
fn into_content(&self, escaped: bool) -> ChunkedContent<'a> {
ChunkedContent {
lit: *self,
esc: escaped,
}
}
}
impl<'a> IntoBuf for ChunkedStr<'a> {
type Buf = Self;
fn into_buf(self) -> Self::Buf {
self
}
}
impl<'a> Buf for ChunkedStr<'a> {
fn advance(&mut self, n: usize) {
let len = self.buf.len();
let pos = self.pos;
if len < pos + n {
panic!(
"{}",
&crate::BufUnderflow {
requested: n,
remaining: len - pos,
}
);
} else {
self.pos = pos + n;
}
}
fn chunk(&self) -> &[u8] {
let end = self.buf.len().min(self.pos + self.n);
&self.buf[self.pos..end]
}
fn remaining(&self) -> usize {
let len = self.buf.len();
let pos = self.pos;
len - pos
}
fn try_copy_to_slice(&mut self, dst: &mut [u8]) -> Result<(), crate::BufUnderflow> {
let len = self.buf.len();
let pos = self.pos;
if len < pos + dst.len() {
Err(crate::BufUnderflow {
requested: dst.len(),
remaining: len - pos,
})
} else {
dst.copy_from_slice(&self.buf[pos..pos + dst.len()]);
self.pos = pos + dst.len();
Ok(())
}
}
}
fn assert_panic<F, T>(f: F, expect: &str)
where
F: FnOnce() -> T,
T: fmt::Debug,
{
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
let err = result.unwrap_err();
let msg = err
.downcast_ref::<String>()
.map(|s| s.as_str())
.or_else(|| err.downcast_ref::<&str>().copied())
.unwrap();
assert!(
msg.contains(expect),
"expected panic containing {expect:?}, got {msg:?}"
);
}
}