#[derive(Debug, Clone, Eq, PartialEq)]
pub struct UnescapeError;
type UnescapeResult<T> = Result<T, UnescapeError>;
struct UnescapeState {
out: Vec<u8>,
}
impl UnescapeState {
fn with_capacity(capacity: usize) -> UnescapeState {
UnescapeState {
out: Vec::with_capacity(capacity),
}
}
fn push_char(&mut self, c: char) {
let mut buff = [0; 8];
self.out
.extend_from_slice(c.encode_utf8(&mut buff).as_bytes());
}
fn push_u8(&mut self, c: u8) {
self.out.push(c);
}
fn push_raw(&mut self, c: u32) -> UnescapeResult<()> {
match std::char::from_u32(c) {
Some(c) => Ok(self.push_char(c)),
None => Err(UnescapeError),
}
}
fn push_slice(&mut self, slice: &[u8]) {
self.out.extend_from_slice(slice);
}
fn finalize(self) -> UnescapeResult<String> {
String::from_utf8(self.out).map_err(|_| UnescapeError)
}
}
fn parse_u32(
s: &mut PeekableBytes,
radix: u32,
mut result: u32,
max: Option<u8>,
) -> UnescapeResult<u32> {
let mut max = max.unwrap_or(u8::max_value());
while let Some(digit) = s.peek().and_then(|digit| (digit as char).to_digit(radix)) {
let _ = s.next(); result = result.checked_mul(radix).ok_or(UnescapeError)?;
result = result.checked_add(digit).ok_or(UnescapeError)?;
max -= 1;
if max == 0 {
break;
}
}
Ok(result)
}
trait EscapedString {
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]>;
}
struct SingleQuoteString;
impl EscapedString for SingleQuoteString {
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
let mut ins = PeekableBytes::new(bytes);
debug_assert_eq!(ins.next(), Some(b'\\'));
match ins.next() {
None => {
return Err(UnescapeError);
}
Some(d) => match d {
b'\\' | b'\'' => state.push_u8(d),
_ => {
state.push_u8(b'\\');
state.push_u8(d)
}
},
}
Ok(ins.as_slice())
}
}
struct DoubleQuoteString;
impl EscapedString for DoubleQuoteString {
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
let mut ins = PeekableBytes::new(bytes);
debug_assert_eq!(ins.next(), Some(b'\\'));
match ins.next() {
None => {
return Err(UnescapeError);
}
Some(d) => {
match d {
b'$' | b'"' | b'\\' => state.push_u8(d),
b'n' => state.push_u8(b'\n'), b'r' => state.push_u8(b'\r'), b't' => state.push_u8(b'\t'), b'v' => state.push_u8(b'\x0B'), b'f' => state.push_u8(b'\x0C'), b'x' => {
let val = parse_u32(&mut ins, 16, 0, Some(2))?;
state.push_raw(val)?;
}
b'u' => match ins.next() {
Some(b'{') => {
let val = parse_u32(&mut ins, 16, 0, None)?;
state.push_raw(val)?;
if !matches!(ins.next(), Some(b'}')) {
return Err(UnescapeError);
}
}
Some(d) => {
state.push_u8(b'\\');
state.push_u8(b'u');
state.push_u8(d);
}
None => {
state.push_u8(b'\\');
state.push_u8(d);
}
},
b'0'..=b'7' => {
let val =
parse_u32(&mut ins, 8, (d as char).to_digit(8).unwrap(), Some(3))?;
state.push_raw(val)?;
}
_ => {
state.push_u8(b'\\');
state.push_u8(d)
}
}
}
}
Ok(ins.as_slice())
}
}
pub fn parse_string(literal: &str) -> Result<String, UnescapeError> {
let inner = &literal[1..(literal.len()) - 1];
if literal.bytes().next().unwrap() == b'\'' {
unescape::<SingleQuoteString>(inner)
} else {
unescape::<DoubleQuoteString>(inner)
}
}
fn unescape<S: EscapedString>(s: &str) -> UnescapeResult<String> {
let mut state = UnescapeState::with_capacity(s.len());
let mut bytes = s.as_bytes();
while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
state.push_slice(&bytes[0..escape_index]);
bytes = &bytes[escape_index..];
bytes = S::handle_escape(bytes, &mut state)?;
}
state.push_slice(&bytes[0..]);
state.finalize()
}
struct PeekableBytes<'a> {
slice: &'a [u8],
pos: usize,
}
impl<'a> Iterator for PeekableBytes<'a> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
let byte = self.slice.get(self.pos)?;
self.pos += 1;
Some(*byte)
}
}
impl<'a> PeekableBytes<'a> {
pub fn new(slice: &'a [u8]) -> Self {
PeekableBytes { slice, pos: 0 }
}
pub fn peek(&self) -> Option<u8> {
self.slice.get(self.pos).copied()
}
pub fn as_slice(self) -> &'a [u8] {
&self.slice[self.pos..]
}
}
pub fn is_array_key_numeric(string: &str) -> bool {
let mut bytes = string.bytes();
if !matches!((bytes.next(), string.len()), (Some(b'-'), _) | (Some(b'0'..=b'9'), 1) | (Some(b'1'..=b'9'), _))
{
return false;
}
bytes.all(|byte| matches!(byte, b'0'..=b'9'))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_numeric() {
assert_eq!(true, is_array_key_numeric("123"));
assert_eq!(true, is_array_key_numeric("-123"));
assert_eq!(true, is_array_key_numeric("0"));
assert_eq!(false, is_array_key_numeric("0123"));
assert_eq!(false, is_array_key_numeric("123asd"));
assert_eq!(false, is_array_key_numeric("+123"));
}
#[test]
fn test_unescape_single() {
assert_eq!(unescape::<SingleQuoteString>(&r#"abc"#), Ok("abc".into()));
assert_eq!(
unescape::<SingleQuoteString>(&r#"ab\nc"#),
Ok("ab\\nc".into())
);
assert_eq!(
unescape::<SingleQuoteString>(r#"ab\zc"#),
Ok("ab\\zc".into())
);
assert_eq!(
unescape::<SingleQuoteString>(r#" \"abc\" "#),
Ok(" \\\"abc\\\" ".into())
);
assert_eq!(unescape::<SingleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
assert_eq!(unescape::<SingleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
assert_eq!(
unescape::<SingleQuoteString>(r#"\xD834\xDD1E"#),
Ok("\\xD834\\xDD1E".into())
);
assert_eq!(
unescape::<SingleQuoteString>(r#"\xD834"#),
Ok("\\xD834".into())
);
assert_eq!(
unescape::<SingleQuoteString>(r#"\xDD1E"#),
Ok("\\xDD1E".into())
);
assert_eq!(unescape::<SingleQuoteString>("\t"), Ok("\t".into()));
}
#[test]
fn test_unescape_double() {
assert_eq!(unescape::<DoubleQuoteString>(&r#"abc"#), Ok("abc".into()));
assert_eq!(
unescape::<DoubleQuoteString>(&r#"ab\nc"#),
Ok("ab\nc".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"ab\zc"#),
Ok("ab\\zc".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#" \"abc\" "#),
Ok(" \"abc\" ".into())
);
assert_eq!(unescape::<DoubleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
assert_eq!(unescape::<DoubleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
assert_eq!(
unescape::<DoubleQuoteString>(r#"\u{1D11E}"#),
Ok("𝄞".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\xD834"#),
Ok("\u{D8}34".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\xDD1E"#),
Ok("\u{DD}1E".into())
);
assert_eq!(unescape::<DoubleQuoteString>(r#"\xD"#), Ok("\u{D}".into()));
assert_eq!(unescape::<DoubleQuoteString>("\t"), Ok("\t".into()));
assert_eq!(
unescape::<DoubleQuoteString>(r#"\u{D834"#),
Err(UnescapeError)
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\uD834"#),
Ok("\\uD834".into())
);
assert_eq!(unescape::<DoubleQuoteString>(r#"\u"#), Ok("\\u".into()));
assert_eq!(
unescape::<DoubleQuoteString>(r#"\47foo"#),
Ok("'foo".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\48foo"#),
Ok("\u{4}8foo".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\87foo"#),
Ok("\\87foo".into())
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\u{999999}"#),
Err(UnescapeError)
);
assert_eq!(
unescape::<DoubleQuoteString>(r#"\u{999999999999999999}"#),
Err(UnescapeError)
);
}
}