#[cfg(test)]
mod test_unescape;
use std::{borrow::Cow, fmt, iter::Peekable};
use crate::{warning, Caveat, IntoCaveat};
use super::Element;
const ESCAPE_CHAR: char = '\\';
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
pub enum Warning {
ControlCharacterWhileParsingString(usize),
DecodeUtf16(usize, u16),
InvalidEscape(usize),
UnexpectedEndOfString(usize),
}
impl crate::Warning for Warning {
fn id(&self) -> warning::Id {
match self {
Self::ControlCharacterWhileParsingString(_) => {
warning::Id::from_static("control_character_while_parsing_string")
}
Self::DecodeUtf16(..) => warning::Id::from_static("decode_utf_1_6"),
Self::InvalidEscape(_) => warning::Id::from_static("invalid_escape"),
Self::UnexpectedEndOfString(_) => warning::Id::from_static("unexpected_end_of_string"),
}
}
}
impl fmt::Display for Warning {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ControlCharacterWhileParsingString(index) => {
write!(
f,
"Control chars were found at index `{index}` while decoding a JSON string."
)
}
Self::DecodeUtf16(index, code) => {
write!(
f,
"A UTF-16 surrogate pair `{code}` failed to decode at index: `{index}`."
)
}
Self::InvalidEscape(index) => {
write!(
f,
"String contains an invalid escape char at index: `{index})`."
)
}
Self::UnexpectedEndOfString(index) => {
write!(f, "The String ended prematurely at index: `{index}`.")
}
}
}
}
pub(crate) fn analyze<'buf>(
s: &'buf str,
elem: &Element<'buf>,
) -> Caveat<PendingStr<'buf>, Warning> {
let mut warnings = warning::Set::new();
if s.chars().any(|ch| ch == ESCAPE_CHAR) {
PendingStr::HasEscapes(EscapeStr(s)).into_caveat(warnings)
} else {
if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
}
PendingStr::NoEscapes(s).into_caveat(warnings)
}
}
pub(crate) enum PendingStr<'buf> {
NoEscapes(&'buf str),
HasEscapes(EscapeStr<'buf>),
}
pub(crate) struct EscapeStr<'buf>(&'buf str);
impl<'buf> EscapeStr<'buf> {
pub(crate) fn decode_escapes(&self, elem: &Element<'buf>) -> Caveat<Cow<'buf, str>, Warning> {
unescape_str(self.0, elem)
}
pub(crate) fn into_raw(self) -> &'buf str {
self.0
}
}
pub(crate) fn unescape_str<'buf>(
s: &'buf str,
elem: &Element<'buf>,
) -> Caveat<Cow<'buf, str>, Warning> {
let mut warnings = warning::Set::new();
if !s.chars().any(|ch| ch == ESCAPE_CHAR) {
if let Some((index, _)) = s.char_indices().find(|(_, ch)| ch.is_control()) {
warnings.insert(Warning::ControlCharacterWhileParsingString(index), elem);
}
return Cow::Borrowed(s).into_caveat(warnings);
}
let mut chars = Chars::from_str(s);
let mut buf = Buffer::with_capacity(s.len());
loop {
let Some((index, ch)) = chars.next() else {
return Cow::<'buf, str>::Owned(buf.into_string()).into_caveat(warnings);
};
if ch == ESCAPE_CHAR {
if let Err(warn_kind) = parse_escape(&mut chars, &mut buf) {
warnings.insert(warn_kind, elem);
return Cow::Borrowed(s).into_caveat(warnings);
}
} else if let Err(warn_kind) = buf.push_char(ch, index) {
warnings.insert(warn_kind, elem);
return Cow::Borrowed(s).into_caveat(warnings);
}
}
}
fn parse_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
let (index, ch) = chars.next_or_eof()?;
let ch = match ch {
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => return parse_unicode_escape(chars, buf),
_ => {
return Err(Warning::InvalidEscape(index));
}
};
buf.push_char(ch, index)?;
Ok(())
}
fn parse_unicode_escape(chars: &mut Chars<'_>, buf: &mut Buffer) -> Result<(), Warning> {
let n1 = decode_hex_escape(chars)?;
let n2 = chars.is_next_escape()?;
if let Some(n2) = n2 {
buf.push_surrogate_pair(n1, n2, chars.index)?;
} else {
let Some(ch) = char::from_u32(u32::from(n1)) else {
return Err(Warning::InvalidEscape(chars.index));
};
buf.push_char(ch, chars.index)?;
}
Ok(())
}
struct Chars<'buf> {
char_indices: Peekable<std::str::CharIndices<'buf>>,
index: usize,
}
impl<'buf> Chars<'buf> {
fn from_str(s: &'buf str) -> Self {
Self {
char_indices: s.char_indices().peekable(),
index: 0,
}
}
fn next_or_eof(&mut self) -> Result<(usize, char), Warning> {
if let Some((index, ch)) = self.next() {
if ch.is_control() {
return Err(Warning::ControlCharacterWhileParsingString(index));
}
Ok((index, ch))
} else {
Err(Warning::UnexpectedEndOfString(self.index))
}
}
fn is_next_escape(&mut self) -> Result<Option<u16>, Warning> {
{
let escape_char = self.char_indices.next_if(|(_, ch)| *ch == ESCAPE_CHAR);
if escape_char.is_none() {
return Ok(None);
}
}
{
let escape_unicode = self.char_indices.next_if(|(_, ch)| *ch == 'u');
if escape_unicode.is_none() {
return Ok(None);
}
}
let n = decode_hex_escape(self)?;
Ok(Some(n))
}
}
impl Iterator for Chars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
if let Some((index, char)) = self.char_indices.next() {
self.index = index;
Some((index, char))
} else {
None
}
}
}
struct Buffer {
buf: String,
}
impl Buffer {
fn with_capacity(capacity: usize) -> Self {
Self {
buf: String::with_capacity(capacity),
}
}
fn push_char(&mut self, ch: char, index: usize) -> Result<(), Warning> {
if ch.is_control() {
return Err(Warning::ControlCharacterWhileParsingString(index));
}
self.buf.push(ch);
Ok(())
}
fn into_string(self) -> String {
self.buf
}
fn push_surrogate_pair(&mut self, n1: u16, n2: u16, index: usize) -> Result<char, Warning> {
let Some(ch) = char::decode_utf16([n1, n2]).next() else {
return Err(Warning::InvalidEscape(index));
};
let ch = match ch {
Ok(ch) => ch,
Err(err) => {
return Err(Warning::DecodeUtf16(index, err.unpaired_surrogate()));
}
};
self.push_char(ch, index)?;
Ok(ch)
}
}
fn decode_hex_escape(chars: &mut Chars<'_>) -> Result<u16, Warning> {
const RADIX: u32 = 16;
let (_, one) = chars.next_or_eof()?;
let (_, two) = chars.next_or_eof()?;
let (_, three) = chars.next_or_eof()?;
let (index, four) = chars.next_or_eof()?;
let string = [one, two, three, four].into_iter().collect::<String>();
let Ok(n) = u16::from_str_radix(&string, RADIX) else {
return Err(Warning::InvalidEscape(index));
};
Ok(n)
}