#[cfg(test)]
mod test_from_str;
use std::{borrow::Cow, fmt, iter::Peekable, ops::RangeInclusive};
use super::Element;
use crate::{
warning::{self, CaveatDeferred, IntoCaveatDeferred as _},
Caveat, IntoCaveat as _,
};
const ESCAPE_CHAR: char = '\\';
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
pub enum Warning {
ControlCharacter(usize),
DecodeUtf16(usize, u16),
InvalidEscape(usize),
UnexpectedEndOfString(usize),
}
impl crate::Warning for Warning {
fn id(&self) -> warning::Id {
match self {
Self::ControlCharacter(_) => {
warning::Id::from_static("control_character_while_parsing_string")
}
Self::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
Self::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
Self::UnexpectedEndOfString(_) => warning::Id::from_static("unexpected_end_of_string"),
}
}
}
impl fmt::Display for Warning {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ControlCharacter(index) => {
write!(
f,
"Control chars were found at index `{index}` while decoding a JSON string."
)
}
Self::DecodeUtf16(index, code) => {
write!(
f,
"A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
)
}
Self::InvalidEscape(index) => {
write!(
f,
"String contains an invalid escape char at index: `{index})`."
)
}
Self::UnexpectedEndOfString(index) => {
write!(f, "The String ended prematurely at index: `{index}`.")
}
}
}
}
pub(super) fn analyze<'buf>(
s: &'buf str,
elem: &Element<'buf>,
) -> Caveat<super::PendingStr<'buf>, Warning> {
let mut warnings = warning::Set::new();
if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
warnings.insert(elem, Warning::ControlCharacter(index));
}
if s.chars().any(|ch| ch == ESCAPE_CHAR) {
super::PendingStr::HasEscapes(super::EscapeStr(s)).into_caveat(warnings)
} else {
super::PendingStr::NoEscapes(s).into_caveat(warnings)
}
}
pub(super) fn from_raw<'buf>(s: &'buf str) -> CaveatDeferred<Cow<'buf, str>, Warning> {
let mut warnings = warning::SetDeferred::new();
if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
warnings.insert(Warning::ControlCharacter(index));
}
return Cow::Borrowed(s).into_caveat_deferred(warnings);
}
let mut buf = Buffer::with_capacity(s.len());
for decoded in Decoded::from_str(s) {
match decoded {
Ok(ch) => buf.push(ch),
Err(warn_kind) => {
warnings.insert(warn_kind);
return Cow::Borrowed(s).into_caveat_deferred(warnings);
}
}
}
Cow::<'buf, str>::Owned(buf.into_string()).into_caveat_deferred(warnings)
}
pub(super) fn eq(raw: &str, other: &str) -> Result<bool, Warning> {
let mut decoded = Decoded::from_str(raw);
let mut expected = other.chars();
loop {
match decoded.next() {
Some(Err(warn_kind)) => return Err(warn_kind),
Some(Ok(actual)) => {
if expected.next() != Some(actual) {
return Ok(false);
}
}
None => return Ok(expected.next().is_none()),
}
}
}
pub(super) fn eq_ignore_ascii_case(raw: &str, other: &str) -> Result<bool, Warning> {
let mut decoded = Decoded::from_str(raw);
let mut expected = other.chars();
loop {
match decoded.next() {
Some(Err(warn_kind)) => return Err(warn_kind),
Some(Ok(actual)) => match expected.next() {
Some(expected) if expected.eq_ignore_ascii_case(&actual) => {}
_ => return Ok(false),
},
None => return Ok(expected.next().is_none()),
}
}
}
fn parse_escape(chars: &mut Chars<'_>) -> Result<char, Warning> {
let (index, ch) = chars.next_or_eof()?;
let ch = match ch {
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => return parse_unicode_escape(chars),
_ => {
return Err(Warning::InvalidEscape(index));
}
};
if ch.is_control() {
return Err(Warning::ControlCharacter(index));
}
Ok(ch)
}
fn parse_unicode_escape(chars: &mut Chars<'_>) -> Result<char, Warning> {
const HIGH_SURROGATE: RangeInclusive<u16> = 0xD800..=0xDBFF;
let n1 = decode_hex_escape(chars)?;
let ch = if HIGH_SURROGATE.contains(&n1) {
let Some(n2) = chars.is_next_escape()? else {
return Err(Warning::InvalidEscape(chars.index));
};
decode_surrogate_pair(n1, n2, chars.index)?
} else {
let Some(ch) = char::from_u32(u32::from(n1)) else {
return Err(Warning::InvalidEscape(chars.index));
};
ch
};
if ch.is_control() {
return Err(Warning::ControlCharacter(chars.index));
}
Ok(ch)
}
struct Chars<'buf> {
char_indices: Peekable<std::str::CharIndices<'buf>>,
push_back: Option<(usize, char)>,
index: usize,
}
impl<'buf> Chars<'buf> {
fn from_str(s: &'buf str) -> Self {
Self {
char_indices: s.char_indices().peekable(),
push_back: None,
index: 0,
}
}
fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
if let Some((index, ch)) = self.next() {
if ch.is_control() {
return Err(Warning::ControlCharacter(index));
}
Ok((index, ch))
} else {
Err(Warning::UnexpectedEndOfString(self.index))
}
}
fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
let Some(backslash) = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR) else {
return Ok(None);
};
if self.char_indices.next_if(|(_, ch)| *ch == 'u').is_none() {
self.push_back = Some(backslash);
return Ok(None);
}
let n = decode_hex_escape(self)?;
Ok(Some(n))
}
}
impl Iterator for Chars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
if let Some(item) = self.push_back.take() {
self.index = item.0;
return Some(item);
}
if let Some((index, char)) = self.char_indices.next() {
self.index = index;
Some((index, char))
} else {
None
}
}
}
struct Decoded<'buf> {
chars: Chars<'buf>,
}
impl<'buf> Decoded<'buf> {
fn from_str(s: &'buf str) -> Self {
Self {
chars: Chars::from_str(s),
}
}
}
impl Iterator for Decoded<'_> {
type Item = Result<char, Warning>;
fn next(&mut self) -> Option<Self::Item> {
let (index, ch) = self.chars.next()?;
if ch == ESCAPE_CHAR {
Some(parse_escape(&mut self.chars))
} else if ch.is_control() {
Some(Err(Warning::ControlCharacter(index)))
} else {
Some(Ok(ch))
}
}
}
struct Buffer {
buf: String,
}
impl Buffer {
fn with_capacity(capacity: usize) -> Self {
Self {
buf: String::with_capacity(capacity),
}
}
fn push(&mut self, ch: char) {
self.buf.push(ch);
}
fn into_string(self) -> String {
self.buf
}
}
fn decode_surrogate_pair(n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
let Some(ch) = char::decode_utf16([n1, n2]).next() else {
return Err(Warning::InvalidEscape(index));
};
match ch {
Ok(ch) => Ok(ch),
Err(err) => Err(Warning::DecodeUtf16(index, err.unpaired_surrogate())),
}
}
fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
const RADIX: u32 = 16;
let (_, one) = chars.next_or_eof()?;
let (_, two) = chars.next_or_eof()?;
let (_, three) = chars.next_or_eof()?;
let (index, four) = chars.next_or_eof()?;
let string = [one, two, three, four].into_iter().collect::<String>();
let Ok(n) = u16::from_str_radix(&string, RADIX) else {
return Err(Warning::InvalidEscape(index));
};
Ok(n)
}