use crate::DocInner;
#[derive(Debug, PartialEq, Eq)]
pub enum UnescapeError {
IllegalCharacterFollowingBackslash {
character_index: usize,
found: char,
string: String,
},
UnexpectedEofFollowingBackslash {
character_index: usize,
string: String,
},
InvalidHexEscape {
character_index: usize,
string: String,
},
InvalidUnicodeEscape {
character_index: usize,
string: String,
},
}
impl std::fmt::Display for UnescapeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
UnescapeError::IllegalCharacterFollowingBackslash {
character_index,
found,
string,
} => {
write!(
f,
"Illegal character following a backslash at index {character_index} in {string:?}: found '{found}'"
)
}
UnescapeError::UnexpectedEofFollowingBackslash {
character_index,
string,
} => {
write!(
f,
"Unexpected end of file following a backslash at index {character_index} in {string:?}"
)
}
UnescapeError::InvalidHexEscape {
character_index,
string,
} => {
write!(
f,
"Invalid hex escape at index {character_index} in {string:?}"
)
}
UnescapeError::InvalidUnicodeEscape {
character_index,
string,
} => {
write!(
f,
"Invalid unicode escape at index {character_index} in {string:?}"
)
}
}
}
}
pub fn unescape(doc_attr: &DocInner) -> Result<String, UnescapeError> {
unescape_inner(doc_attr.value.as_str())
}
fn parse_hex_escape(
chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
escape_start: usize,
s: &str,
) -> Result<char, UnescapeError> {
let mut value = 0u8;
for _ in 0..2 {
match chars.next() {
Some((_, c)) if c.is_ascii_hexdigit() => {
value = value * 16 + c.to_digit(16).unwrap() as u8;
}
_ => {
return Err(UnescapeError::InvalidHexEscape {
character_index: escape_start,
string: s.to_string(),
});
}
}
}
Ok(value as char)
}
fn parse_unicode_escape(
chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
escape_start: usize,
s: &str,
) -> Result<char, UnescapeError> {
match chars.next() {
Some((_, '{')) => {}
_ => {
return Err(UnescapeError::InvalidUnicodeEscape {
character_index: escape_start,
string: s.to_string(),
});
}
}
let mut value = 0u32;
let mut digit_count = 0;
loop {
match chars.next() {
Some((_, '}')) => break,
Some((_, c)) if c.is_ascii_hexdigit() => {
digit_count += 1;
if digit_count > 6 {
return Err(UnescapeError::InvalidUnicodeEscape {
character_index: escape_start,
string: s.to_string(),
});
}
value = value * 16 + c.to_digit(16).unwrap();
}
_ => {
return Err(UnescapeError::InvalidUnicodeEscape {
character_index: escape_start,
string: s.to_string(),
});
}
}
}
if digit_count == 0 {
return Err(UnescapeError::InvalidUnicodeEscape {
character_index: escape_start,
string: s.to_string(),
});
}
char::from_u32(value).ok_or_else(|| UnescapeError::InvalidUnicodeEscape {
character_index: escape_start,
string: s.to_string(),
})
}
pub fn unescape_inner(s: &str) -> Result<String, UnescapeError> {
let mut out = String::with_capacity(s.len());
let mut chars = s.char_indices().peekable();
while let Some((i, c)) = chars.next() {
if c == '\\' {
match chars.next() {
Some((_, '\\')) => out.push('\\'),
Some((_, '"')) => out.push('"'),
Some((_, '\'')) => out.push('\''),
Some((_, 'n')) => out.push('\n'),
Some((_, 'r')) => out.push('\r'),
Some((_, 't')) => out.push('\t'),
Some((_, '0')) => out.push('\0'),
Some((_, 'x')) => {
out.push(parse_hex_escape(&mut chars, i, s)?);
}
Some((_, 'u')) => {
out.push(parse_unicode_escape(&mut chars, i, s)?);
}
Some((_, found)) => {
return Err(UnescapeError::IllegalCharacterFollowingBackslash {
character_index: i,
found,
string: s.to_string(),
});
}
None => {
return Err(UnescapeError::UnexpectedEofFollowingBackslash {
character_index: i,
string: s.to_string(),
});
}
}
} else {
out.push(c);
}
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_unescape_basic() {
assert_eq!(unescape_inner("hello").unwrap(), "hello");
assert_eq!(
unescape_inner(r#"hello \"world\""#).unwrap(),
r#"hello "world""#
);
assert_eq!(
unescape_inner(r#"hello \'world\'"#).unwrap(),
"hello 'world'"
);
assert_eq!(unescape_inner(r"back\\slash").unwrap(), r"back\slash");
}
#[test]
fn test_unescape_newline() {
assert_eq!(
unescape_inner(r"```solidity\nstruct MyStruct { ... }\n```").unwrap(),
"```solidity\nstruct MyStruct { ... }\n```"
);
}
#[test]
fn test_unescape_common_escapes() {
assert_eq!(unescape_inner(r"hello\nworld").unwrap(), "hello\nworld");
assert_eq!(unescape_inner(r"hello\rworld").unwrap(), "hello\rworld");
assert_eq!(unescape_inner(r"hello\tworld").unwrap(), "hello\tworld");
assert_eq!(unescape_inner(r"null\0char").unwrap(), "null\0char");
assert_eq!(
unescape_inner(r"line1\nline2\nline3").unwrap(),
"line1\nline2\nline3"
);
assert_eq!(unescape_inner(r"tab\there").unwrap(), "tab\there");
assert_eq!(unescape_inner(r"cr\rlf").unwrap(), "cr\rlf");
assert_eq!(unescape_inner(r"crlf\r\n").unwrap(), "crlf\r\n");
}
#[test]
fn test_unescape_hex() {
assert_eq!(unescape_inner(r"\x41").unwrap(), "A");
assert_eq!(unescape_inner(r"\x61").unwrap(), "a");
assert_eq!(unescape_inner(r"\x00").unwrap(), "\0");
assert_eq!(unescape_inner(r"\x7f").unwrap(), "\x7f");
assert_eq!(unescape_inner(r"hello\x20world").unwrap(), "hello world");
}
#[test]
fn test_unescape_unicode() {
assert_eq!(unescape_inner(r"\u{41}").unwrap(), "A");
assert_eq!(unescape_inner(r"\u{0041}").unwrap(), "A");
assert_eq!(unescape_inner(r"\u{1F600}").unwrap(), "😀");
assert_eq!(unescape_inner(r"\u{10FFFF}").unwrap(), "\u{10FFFF}");
assert_eq!(unescape_inner(r"hello\u{20}world").unwrap(), "hello world");
}
#[test]
fn test_unescape_mixed() {
assert_eq!(
unescape_inner(r#"line1\nline2\ttab\\backslash\"quote"#).unwrap(),
"line1\nline2\ttab\\backslash\"quote"
);
}
#[test]
fn test_unescape_errors() {
assert!(matches!(
unescape_inner(r"invalid \a escape"),
Err(UnescapeError::IllegalCharacterFollowingBackslash {
character_index: 8,
found: 'a',
..
})
));
assert!(matches!(
unescape_inner(r"trailing backslash \"),
Err(UnescapeError::UnexpectedEofFollowingBackslash {
character_index: 19,
..
})
));
assert!(matches!(
unescape_inner(r"\x4"),
Err(UnescapeError::InvalidHexEscape { .. })
));
assert!(matches!(
unescape_inner(r"\xGG"),
Err(UnescapeError::InvalidHexEscape { .. })
));
assert!(matches!(
unescape_inner(r"\u0041"),
Err(UnescapeError::InvalidUnicodeEscape { .. })
));
assert!(matches!(
unescape_inner(r"\u{}"),
Err(UnescapeError::InvalidUnicodeEscape { .. })
));
assert!(matches!(
unescape_inner(r"\u{1234567}"),
Err(UnescapeError::InvalidUnicodeEscape { .. })
));
assert!(matches!(
unescape_inner(r"\u{FFFFFF}"),
Err(UnescapeError::InvalidUnicodeEscape { .. })
));
}
}