use std::borrow::Cow;
use std::iter;
use std::string::FromUtf8Error;
use crate::utils::octal_ascii_triple_to_byte;
#[derive(Debug, From)]
pub struct UnquoteStringError {
pub error: FromUtf8Error
}
impl Into<Vec<u8>> for UnquoteStringError {
fn into(self) -> Vec<u8> {
self.into_bytes()
}
}
impl UnquoteStringError {
pub fn into_bytes(self) -> Vec<u8> {
self.error.into_bytes()
}
}
pub fn unquote_string(text: &str) -> (Option<usize>, Result<Cow<str>, UnquoteStringError>) {
if text.len() == 0 {
return (None, Ok(Cow::Borrowed(&text[..0])));
}
if text.len() >= 2 {
let end_of_quoted_string = {
let mut is_ignored = false;
let mut idx = 0;
let mut found = false;
for c in text.chars().skip(1) {
if !is_ignored {
if c == '\\' {
is_ignored = true;
} else if c == '"' {
idx += c.len_utf8();
found = true;
break;
}
} else {
is_ignored = false;
}
idx += c.len_utf8();
}
if found {
debug_assert!(text.as_bytes()[idx] == b'"');
Some(idx)
} else {
None
}
};
return if text.as_bytes()[0] == b'\"' && end_of_quoted_string.is_some() {
let end_of_quoted_string = end_of_quoted_string.unwrap();
let text = &text[1..end_of_quoted_string];
if text.chars().all(|c| c != '\\') {
return (Some(end_of_quoted_string), Ok(Cow::Borrowed(&text[..])));
}
let mut res = Vec::new();
let mut is_escaped = false;
let mut escaped_char_buf = [0u8; 3];
let mut escaped_char_buf_sz = 0;
for c in text.chars() {
let mut char_to_process = Some(c);
while let Some(c) = char_to_process.take() {
if is_escaped {
if escaped_char_buf_sz == 0 {
match c {
'n' => res.push(b'\n'),
't' => res.push(b'\t'),
'r' => res.push(b'\r'),
'"' => res.push(b'\"'),
'\\' => res.push(b'\\'),
c if c.is_ascii_digit() => {
escaped_char_buf[0] = c as u8;
escaped_char_buf_sz += 1;
continue;
}
c => {
res.extend(iter::repeat(0).take(c.len_utf8()));
let len = res.len();
c.encode_utf8(&mut res[len - c.len_utf8()..]);
}
}
} else {
if c.is_ascii_digit() && (c as u8 - b'0') <= 7 && escaped_char_buf_sz < 3 {
escaped_char_buf[escaped_char_buf_sz] = c as u8;
escaped_char_buf_sz += 1;
continue;
} else {
char_to_process = Some(c);
let len = escaped_char_buf.len();
escaped_char_buf.rotate_right(len - escaped_char_buf_sz);
if let Some(v) = octal_ascii_triple_to_byte(escaped_char_buf) {
res.push(v);
} else {
res.extend_from_slice(&escaped_char_buf[..escaped_char_buf_sz]);
}
}
}
escaped_char_buf = [0u8; 3];
escaped_char_buf_sz = 0;
is_escaped = false;
} else {
if c == '\\' {
is_escaped = true;
}
else {
res.extend(iter::repeat(0).take(c.len_utf8()));
let len = res.len();
c.encode_utf8(&mut res[len - c.len_utf8()..]);
}
}
}
}
if escaped_char_buf_sz > 0 {
let len = escaped_char_buf.len();
escaped_char_buf.rotate_right(len - escaped_char_buf_sz);
if let Some(v) = octal_ascii_triple_to_byte(escaped_char_buf) {
res.push(v);
} else {
res.extend_from_slice(&escaped_char_buf[..escaped_char_buf_sz]);
}
}
let res = String::from_utf8(res)
.map(|v| Cow::Owned(v))
.map_err(|e| UnquoteStringError::from(e));
(Some(end_of_quoted_string), res)
} else {
(None, Ok(Cow::Borrowed(&text[..])))
};
}
(None, Ok(Cow::Borrowed(&text[..])))
}
pub fn quote_string(text: &[u8]) -> String {
let mut res = String::with_capacity(text.len() + 2);
res.push('\"');
for b in text.iter().copied() {
match b {
b'\n' => res.push_str("\\n"),
b'\r' => res.push_str("\\r"),
b'\t' => res.push_str("\\t"),
b'\\' => res.push_str("\\\\"),
b'"' => res.push_str("\\\""),
b if b.is_ascii_alphanumeric() || b.is_ascii_punctuation() => {
res.push(b as char);
}
b => {
res.push('\\');
let mut b = b;
let mut digit_count = 0;
let mut digits = [0u8; 3];
if b > 0 {
while b > 0 {
digits[digit_count] = b % 8;
b = b / 8;
digit_count += 1;
}
} else {
digit_count = 1;
}
debug_assert!(digit_count >= 1);
for d in digits.iter().take(digit_count).rev() {
res.push((*d + b'0') as char);
}
}
}
}
res.push('\"');
res
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_can_quote_and_unquote_string() {
for input in [
"asdf",
"\0\0\0\0",
"ŁŁŁ",
r#"""""#,
"\n\t\r",
].iter().cloned() {
assert_eq!(
input,
unquote_string("e_string(input.as_bytes())).1.unwrap().as_ref()
)
}
}
#[test]
fn test_can_unquote_string() {
for (input, output) in [
("not quoted string", (None, Ok("not quoted string"))),
("\"and a quoted one\"", (Some(17), Ok("and a quoted one"))),
("\"esc backslash \\\\ \"", (Some(18), Ok("esc backslash \\ "))),
(r#""\0\0\0\0\213\321\3\123\312\31\221\312""#, (
Some(38),
Err(&[0u8, 0, 0, 0, 0o213, 0o321, 0o3, 0o123, 0o312, 0o31, 0o221, 0o312] as &[u8])
)),
(r#""\0\0\0\0\213\321\3\123\312\31\221\31""#, (
Some(37),
Err(&[0u8, 0, 0, 0, 0o213, 0o321, 0o3, 0o123, 0o312, 0o31, 0o221, 0o31] as &[u8])
)),
(r#""\0\0\0\0\213\321\3\123\312\31\221\3""#, (
Some(36),
Err(&[0u8, 0, 0, 0, 0o213, 0o321, 0o3, 0o123, 0o312, 0o31, 0o221, 0o3] as &[u8])
)),
("\"q\\\"q\"", (Some(5), Ok("q\"q"))),
("\"first\"\"second\"", (Some(6), Ok("first"))),
].iter().cloned() {
let (expected_offset, expected_value) = output;
let (offset, value) = unquote_string(input);
let value = value.map_err(|e| e.into_bytes());
assert_eq!(offset, expected_offset);
assert_eq!(
value
.as_ref()
.map(|v| v.as_ref())
.map_err(|e| e.as_ref()),
expected_value
);
}
}
}