use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::fmt::Write;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnescapeError {
InvalidUtf8,
BadEscape(&'static str),
}
pub fn unescape(raw: &str) -> Result<Vec<u8>, UnescapeError> {
debug_assert!(
matches!(raw.as_bytes().first(), Some(b'"' | b'\'')),
"unescape input must start with a quote; got {raw:?}"
);
let mut out = Vec::new();
let mut s = raw.as_bytes();
loop {
let Some("e) = s.first() else {
return Err(UnescapeError::BadEscape("unterminated string"));
};
if quote != b'"' && quote != b'\'' {
break;
}
s = &s[1..];
loop {
match s.first() {
None => return Err(UnescapeError::BadEscape("unterminated string")),
Some(&c) if c == quote => {
s = &s[1..];
break;
}
Some(b'\n') | Some(0) => {
return Err(UnescapeError::BadEscape(
"raw newline/NUL in string literal",
));
}
Some(b'\\') => {
s = &s[1..];
let Some(&esc) = s.first() else {
return Err(UnescapeError::BadEscape("unterminated escape"));
};
s = &s[1..];
match esc {
b'"' | b'\'' | b'\\' | b'?' => out.push(esc),
b'n' => out.push(b'\n'),
b'r' => out.push(b'\r'),
b't' => out.push(b'\t'),
b'a' => out.push(0x07),
b'b' => out.push(0x08),
b'f' => out.push(0x0C),
b'v' => out.push(0x0B),
b'0'..=b'7' => {
let mut v = (esc - b'0') as u32;
let mut n = 0;
while n < 2 {
match s.first() {
Some(&d @ b'0'..=b'7') => {
v = v * 8 + (d - b'0') as u32;
s = &s[1..];
n += 1;
}
_ => break,
}
}
if v > 0xFF {
return Err(UnescapeError::BadEscape("octal escape out of range"));
}
out.push(v as u8);
}
b'x' => {
let (v, consumed) = take_hex(s, 2);
if consumed == 0 {
return Err(UnescapeError::BadEscape("invalid \\x escape"));
}
s = &s[consumed..];
out.push(v as u8);
}
b'u' => {
let (cp, n) = take_hex(s, 4);
if n != 4 {
return Err(UnescapeError::BadEscape("invalid \\u escape"));
}
s = &s[4..];
if (0xD800..0xE000).contains(&cp) {
return Err(UnescapeError::BadEscape(
"\\u escape is surrogate; use \\U for non-BMP",
));
}
push_utf8(&mut out, cp)?;
}
b'U' => {
let (cp, n) = take_hex(s, 8);
if n != 8 {
return Err(UnescapeError::BadEscape("invalid \\U escape"));
}
s = &s[8..];
push_utf8(&mut out, cp)?;
}
_ => return Err(UnescapeError::BadEscape("unrecognised escape sequence")),
}
}
Some(&c) => {
out.push(c);
s = &s[1..];
}
}
}
while let Some(&c) = s.first() {
if super::token::is_textproto_ws(c) {
s = &s[1..];
} else {
break;
}
}
if !matches!(s.first(), Some(b'"') | Some(b'\'')) {
break;
}
}
Ok(out)
}
pub fn unescape_str(raw: &str) -> Result<Cow<'_, str>, UnescapeError> {
let bytes = raw.as_bytes();
if let Some("e) = bytes.first() {
if (quote == b'"' || quote == b'\'') && bytes.len() >= 2 {
let inner = &bytes[1..];
let mut i = 0;
while i < inner.len() {
let b = inner[i];
if b == quote {
let tail = &inner[i + 1..];
if tail.iter().all(|&c| super::token::is_textproto_ws(c)) {
return Ok(Cow::Borrowed(&raw[1..1 + i]));
}
break;
}
if b == b'\\' || b == b'\n' || b == 0 {
break;
}
i += 1;
}
}
}
let owned = unescape(raw)?;
alloc::string::String::from_utf8(owned)
.map(Cow::Owned)
.map_err(|_| UnescapeError::InvalidUtf8)
}
#[inline]
fn take_hex(s: &[u8], max: usize) -> (u32, usize) {
let mut v: u32 = 0;
let mut n = 0;
while n < max && n < s.len() {
let d = match s[n] {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' => c - b'a' + 10,
c @ b'A'..=b'F' => c - b'A' + 10,
_ => break,
};
v = (v << 4) | d as u32;
n += 1;
}
(v, n)
}
#[inline]
fn push_utf8(out: &mut Vec<u8>, cp: u32) -> Result<(), UnescapeError> {
let c = char::from_u32(cp).ok_or(UnescapeError::BadEscape("invalid unicode code point"))?;
let mut buf = [0u8; 4];
out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
Ok(())
}
pub fn escape_bytes<W: Write + ?Sized>(bytes: &[u8], w: &mut W) -> core::fmt::Result {
w.write_char('"')?;
for &b in bytes {
match b {
b'"' => w.write_str("\\\"")?,
b'\\' => w.write_str("\\\\")?,
b'\n' => w.write_str("\\n")?,
b'\r' => w.write_str("\\r")?,
b'\t' => w.write_str("\\t")?,
0x20..=0x7E => w.write_char(b as char)?,
_ => {
w.write_char('\\')?;
w.write_char((b'0' + (b >> 6)) as char)?;
w.write_char((b'0' + ((b >> 3) & 7)) as char)?;
w.write_char((b'0' + (b & 7)) as char)?;
}
}
}
w.write_char('"')
}
pub fn escape_str<W: Write + ?Sized>(s: &str, w: &mut W) -> core::fmt::Result {
w.write_char('"')?;
for c in s.chars() {
match c {
'"' => w.write_str("\\\"")?,
'\\' => w.write_str("\\\\")?,
'\n' => w.write_str("\\n")?,
'\r' => w.write_str("\\r")?,
'\t' => w.write_str("\\t")?,
'\x00'..='\x1F' | '\x7F' => {
let b = c as u8;
w.write_char('\\')?;
w.write_char((b'0' + (b >> 6)) as char)?;
w.write_char((b'0' + ((b >> 3) & 7)) as char)?;
w.write_char((b'0' + (b & 7)) as char)?;
}
_ => w.write_char(c)?,
}
}
w.write_char('"')
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::string::String;
#[test]
fn unescape_table() {
#[rustfmt::skip]
let cases: &[(&str, Option<&[u8]>)] = &[
(r#""hello""#, Some(b"hello")),
(r#"'hello'"#, Some(b"hello")),
(r#""""#, Some(b"")),
(r#""\n""#, Some(b"\n")),
(r#""\r\t""#, Some(b"\r\t")),
(r#""\"\\\'""#, Some(b"\"\\'")),
(r#""\?""#, Some(b"?")),
(r#""\a\b\f\v""#, Some(&[0x07, 0x08, 0x0C, 0x0B])),
(r#""\0""#, Some(&[0x00])),
(r#""\7""#, Some(&[0x07])),
(r#""\77""#, Some(&[0o77])), (r#""\377""#, Some(&[0xFF])), (r#""\1234""#, Some(&[0o123, b'4'])), (r#""\x41""#, Some(b"A")),
(r#""\xa""#, Some(&[0x0A])), (r#""\xFF""#, Some(&[0xFF])),
(r#""\u0041""#, Some(b"A")),
(r#""\u00e9""#, Some("é".as_bytes())),
(r#""\U0001F600""#, Some("😀".as_bytes())),
(r#""foo" "bar""#, Some(b"foobar")), (r#""foo"'bar'"baz""#, Some(b"foobarbaz")), (r#""a" "b""#, Some(b"ab")), (r#""unterminated"#, None),
(r#""\"#, None), (r#""\400""#, None), (r#""\x""#, None), (r#""\u00""#, None), (r#""\U0000""#, None), (r#""\uD800""#, None), (r#""\uDC00""#, None), (r#""\uD83D\uDE00""#, None), (r#""\z""#, None), ("\"line\nbreak\"", None), ];
for &(input, expected) in cases {
let got = unescape(input).ok();
assert_eq!(got.as_deref(), expected, "input: {input:?}");
}
}
#[test]
fn unescape_str_borrows_when_trivial() {
let got = unescape_str(r#""hello""#).unwrap();
assert!(matches!(got, Cow::Borrowed("hello")));
}
#[test]
fn unescape_str_owns_when_escaped() {
let got = unescape_str(r#""hel\nlo""#).unwrap();
assert!(matches!(got, Cow::Owned(_)));
assert_eq!(got, "hel\nlo");
}
#[test]
fn unescape_str_owns_when_concatenated() {
let got = unescape_str(r#""foo" "bar""#).unwrap();
assert!(matches!(got, Cow::Owned(_)));
assert_eq!(got, "foobar");
}
#[test]
fn unescape_str_borrows_with_trailing_ws() {
let got = unescape_str("\"hello\" ").unwrap();
assert!(matches!(got, Cow::Borrowed("hello")));
}
#[test]
fn unescape_str_rejects_bad_utf8() {
assert!(unescape(r#""\xFF""#).is_ok());
assert!(unescape_str(r#""\xFF""#).is_err());
}
#[test]
fn escape_bytes_table() {
#[rustfmt::skip]
let cases: &[(&[u8], &str)] = &[
(b"hello", r#""hello""#),
(b"", r#""""#),
(b"\n\r\t", r#""\n\r\t""#),
(b"\"\\", r#""\"\\""#),
(&[0x00], r#""\000""#),
(&[0x01], r#""\001""#),
(&[0x7F], r#""\177""#),
(&[0xFF], r#""\377""#),
(&[0x80, 0x81], r#""\200\201""#), (b"a'b", r#""a'b""#), ];
for &(input, want) in cases {
let mut out = String::new();
escape_bytes(input, &mut out).unwrap();
assert_eq!(out, want, "input: {input:?}");
}
}
#[test]
fn escape_str_preserves_unicode() {
let mut out = String::new();
escape_str("café 😀", &mut out).unwrap();
assert_eq!(out, r#""café 😀""#);
}
#[test]
fn escape_str_escapes_controls() {
let mut out = String::new();
escape_str("\x01\x7F", &mut out).unwrap();
assert_eq!(out, r#""\001\177""#);
}
#[test]
fn escape_bytes_vs_str_non_ascii() {
let mut a = String::new();
let mut b = String::new();
escape_bytes("é".as_bytes(), &mut a).unwrap();
escape_str("é", &mut b).unwrap();
assert_eq!(a, r#""\303\251""#);
assert_eq!(b, r#""é""#);
}
#[test]
fn escape_unescape_roundtrip() {
#[rustfmt::skip]
let cases: &[&[u8]] = &[
b"",
b"plain",
b"\x00\x01\x02\xFD\xFE\xFF",
b"mix\nall\"the\\things\x7F",
&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
];
for &input in cases {
let mut escaped = String::new();
escape_bytes(input, &mut escaped).unwrap();
let back = unescape(&escaped).unwrap();
assert_eq!(back, input, "escaped form: {escaped}");
}
}
}