1use crate::DocInner;
4
5#[derive(Debug, PartialEq, Eq)]
7pub enum UnescapeError {
8 IllegalCharacterFollowingBackslash {
10 character_index: usize,
12 found: char,
14 string: String,
16 },
17 UnexpectedEofFollowingBackslash {
19 character_index: usize,
21 string: String,
23 },
24 InvalidHexEscape {
26 character_index: usize,
28 string: String,
30 },
31 InvalidUnicodeEscape {
33 character_index: usize,
35 string: String,
37 },
38}
39
40impl std::fmt::Display for UnescapeError {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 match self {
43 UnescapeError::IllegalCharacterFollowingBackslash {
44 character_index,
45 found,
46 string,
47 } => {
48 write!(
49 f,
50 "Illegal character following a backslash at index {character_index} in {string:?}: found '{found}'"
51 )
52 }
53 UnescapeError::UnexpectedEofFollowingBackslash {
54 character_index,
55 string,
56 } => {
57 write!(
58 f,
59 "Unexpected end of file following a backslash at index {character_index} in {string:?}"
60 )
61 }
62 UnescapeError::InvalidHexEscape {
63 character_index,
64 string,
65 } => {
66 write!(
67 f,
68 "Invalid hex escape at index {character_index} in {string:?}"
69 )
70 }
71 UnescapeError::InvalidUnicodeEscape {
72 character_index,
73 string,
74 } => {
75 write!(
76 f,
77 "Invalid unicode escape at index {character_index} in {string:?}"
78 )
79 }
80 }
81 }
82}
83
84pub fn unescape(doc_attr: &DocInner) -> Result<String, UnescapeError> {
86 unescape_inner(doc_attr.value.as_str())
87}
88
89fn parse_hex_escape(
91 chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
92 escape_start: usize,
93 s: &str,
94) -> Result<char, UnescapeError> {
95 let mut value = 0u8;
96 for _ in 0..2 {
97 match chars.next() {
98 Some((_, c)) if c.is_ascii_hexdigit() => {
99 value = value * 16 + c.to_digit(16).unwrap() as u8;
100 }
101 _ => {
102 return Err(UnescapeError::InvalidHexEscape {
103 character_index: escape_start,
104 string: s.to_string(),
105 });
106 }
107 }
108 }
109 Ok(value as char)
110}
111
112fn parse_unicode_escape(
114 chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
115 escape_start: usize,
116 s: &str,
117) -> Result<char, UnescapeError> {
118 match chars.next() {
120 Some((_, '{')) => {}
121 _ => {
122 return Err(UnescapeError::InvalidUnicodeEscape {
123 character_index: escape_start,
124 string: s.to_string(),
125 });
126 }
127 }
128
129 let mut value = 0u32;
130 let mut digit_count = 0;
131
132 loop {
133 match chars.next() {
134 Some((_, '}')) => break,
135 Some((_, c)) if c.is_ascii_hexdigit() => {
136 digit_count += 1;
137 if digit_count > 6 {
138 return Err(UnescapeError::InvalidUnicodeEscape {
139 character_index: escape_start,
140 string: s.to_string(),
141 });
142 }
143 value = value * 16 + c.to_digit(16).unwrap();
144 }
145 _ => {
146 return Err(UnescapeError::InvalidUnicodeEscape {
147 character_index: escape_start,
148 string: s.to_string(),
149 });
150 }
151 }
152 }
153
154 if digit_count == 0 {
155 return Err(UnescapeError::InvalidUnicodeEscape {
156 character_index: escape_start,
157 string: s.to_string(),
158 });
159 }
160
161 char::from_u32(value).ok_or_else(|| UnescapeError::InvalidUnicodeEscape {
162 character_index: escape_start,
163 string: s.to_string(),
164 })
165}
166
167pub fn unescape_inner(s: &str) -> Result<String, UnescapeError> {
180 let mut out = String::with_capacity(s.len());
181 let mut chars = s.char_indices().peekable();
182
183 while let Some((i, c)) = chars.next() {
184 if c == '\\' {
185 match chars.next() {
186 Some((_, '\\')) => out.push('\\'),
187 Some((_, '"')) => out.push('"'),
188 Some((_, '\'')) => out.push('\''),
189 Some((_, 'n')) => out.push('\n'),
190 Some((_, 'r')) => out.push('\r'),
191 Some((_, 't')) => out.push('\t'),
192 Some((_, '0')) => out.push('\0'),
193 Some((_, 'x')) => {
194 out.push(parse_hex_escape(&mut chars, i, s)?);
195 }
196 Some((_, 'u')) => {
197 out.push(parse_unicode_escape(&mut chars, i, s)?);
198 }
199 Some((_, found)) => {
200 return Err(UnescapeError::IllegalCharacterFollowingBackslash {
201 character_index: i,
202 found,
203 string: s.to_string(),
204 });
205 }
206 None => {
207 return Err(UnescapeError::UnexpectedEofFollowingBackslash {
208 character_index: i,
209 string: s.to_string(),
210 });
211 }
212 }
213 } else {
214 out.push(c);
215 }
216 }
217 Ok(out)
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn test_unescape_basic() {
226 assert_eq!(unescape_inner("hello").unwrap(), "hello");
227 assert_eq!(
228 unescape_inner(r#"hello \"world\""#).unwrap(),
229 r#"hello "world""#
230 );
231 assert_eq!(
232 unescape_inner(r#"hello \'world\'"#).unwrap(),
233 "hello 'world'"
234 );
235 assert_eq!(unescape_inner(r"back\\slash").unwrap(), r"back\slash");
236 }
237
238 #[test]
239 fn test_unescape_newline() {
240 assert_eq!(
242 unescape_inner(r"```solidity\nstruct MyStruct { ... }\n```").unwrap(),
243 "```solidity\nstruct MyStruct { ... }\n```"
244 );
245 }
246
247 #[test]
248 fn test_unescape_common_escapes() {
249 assert_eq!(unescape_inner(r"hello\nworld").unwrap(), "hello\nworld");
250 assert_eq!(unescape_inner(r"hello\rworld").unwrap(), "hello\rworld");
251 assert_eq!(unescape_inner(r"hello\tworld").unwrap(), "hello\tworld");
252 assert_eq!(unescape_inner(r"null\0char").unwrap(), "null\0char");
253 assert_eq!(
254 unescape_inner(r"line1\nline2\nline3").unwrap(),
255 "line1\nline2\nline3"
256 );
257 assert_eq!(unescape_inner(r"tab\there").unwrap(), "tab\there");
258 assert_eq!(unescape_inner(r"cr\rlf").unwrap(), "cr\rlf");
259 assert_eq!(unescape_inner(r"crlf\r\n").unwrap(), "crlf\r\n");
260 }
261
262 #[test]
263 fn test_unescape_hex() {
264 assert_eq!(unescape_inner(r"\x41").unwrap(), "A");
265 assert_eq!(unescape_inner(r"\x61").unwrap(), "a");
266 assert_eq!(unescape_inner(r"\x00").unwrap(), "\0");
267 assert_eq!(unescape_inner(r"\x7f").unwrap(), "\x7f");
268 assert_eq!(unescape_inner(r"hello\x20world").unwrap(), "hello world");
269 }
270
271 #[test]
272 fn test_unescape_unicode() {
273 assert_eq!(unescape_inner(r"\u{41}").unwrap(), "A");
274 assert_eq!(unescape_inner(r"\u{0041}").unwrap(), "A");
275 assert_eq!(unescape_inner(r"\u{1F600}").unwrap(), "😀");
276 assert_eq!(unescape_inner(r"\u{10FFFF}").unwrap(), "\u{10FFFF}");
277 assert_eq!(unescape_inner(r"hello\u{20}world").unwrap(), "hello world");
278 }
279
280 #[test]
281 fn test_unescape_mixed() {
282 assert_eq!(
283 unescape_inner(r#"line1\nline2\ttab\\backslash\"quote"#).unwrap(),
284 "line1\nline2\ttab\\backslash\"quote"
285 );
286 }
287
288 #[test]
289 fn test_unescape_errors() {
290 assert!(matches!(
292 unescape_inner(r"invalid \a escape"),
293 Err(UnescapeError::IllegalCharacterFollowingBackslash {
294 character_index: 8,
295 found: 'a',
296 ..
297 })
298 ));
299
300 assert!(matches!(
302 unescape_inner(r"trailing backslash \"),
303 Err(UnescapeError::UnexpectedEofFollowingBackslash {
304 character_index: 19,
305 ..
306 })
307 ));
308
309 assert!(matches!(
311 unescape_inner(r"\x4"),
312 Err(UnescapeError::InvalidHexEscape { .. })
313 ));
314
315 assert!(matches!(
317 unescape_inner(r"\xGG"),
318 Err(UnescapeError::InvalidHexEscape { .. })
319 ));
320
321 assert!(matches!(
323 unescape_inner(r"\u0041"),
324 Err(UnescapeError::InvalidUnicodeEscape { .. })
325 ));
326
327 assert!(matches!(
329 unescape_inner(r"\u{}"),
330 Err(UnescapeError::InvalidUnicodeEscape { .. })
331 ));
332
333 assert!(matches!(
335 unescape_inner(r"\u{1234567}"),
336 Err(UnescapeError::InvalidUnicodeEscape { .. })
337 ));
338
339 assert!(matches!(
341 unescape_inner(r"\u{FFFFFF}"),
342 Err(UnescapeError::InvalidUnicodeEscape { .. })
343 ));
344 }
345}