1use std::str::Chars;
5use std::ops::Range;
6
7#[cfg(test)]
8mod tests;
9
10#[derive(Debug, PartialEq, Eq)]
11pub enum EscapeError {
12 ZeroChars,
13 MoreThanOneChar,
14
15 LoneSlash,
16 InvalidEscape,
17 BareCarriageReturn,
18 BareCarriageReturnInRawString,
19 EscapeOnlyChar,
20
21 TooShortHexEscape,
22 InvalidCharInHexEscape,
23 OutOfRangeHexEscape,
24
25 NoBraceInUnicodeEscape,
26 InvalidCharInUnicodeEscape,
27 EmptyUnicodeEscape,
28 UnclosedUnicodeEscape,
29 LeadingUnderscoreUnicodeEscape,
30 OverlongUnicodeEscape,
31 LoneSurrogateUnicodeEscape,
32 OutOfRangeUnicodeEscape,
33
34 UnicodeEscapeInByte,
35 NonAsciiCharInByte,
36 NonAsciiCharInByteString,
37}
38
39pub fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
42 let mut chars = literal_text.chars();
43 unescape_char_or_byte(&mut chars, Mode::Char)
44 .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
45}
46
47pub fn unescape_str<F>(literal_text: &str, callback: &mut F)
50where
51 F: FnMut(Range<usize>, Result<char, EscapeError>),
52{
53 unescape_str_or_byte_str(literal_text, Mode::Str, callback)
54}
55
56pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
57 let mut chars = literal_text.chars();
58 unescape_char_or_byte(&mut chars, Mode::Byte)
59 .map(byte_from_char)
60 .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
61}
62
63pub fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
66where
67 F: FnMut(Range<usize>, Result<u8, EscapeError>),
68{
69 unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
70 callback(range, char.map(byte_from_char))
71 })
72}
73
74pub fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
79where
80 F: FnMut(Range<usize>, Result<char, EscapeError>),
81{
82 unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
83}
84
85pub fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
90where
91 F: FnMut(Range<usize>, Result<u8, EscapeError>),
92{
93 unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
94 callback(range, char.map(byte_from_char))
95 })
96}
97
98#[derive(Debug, Clone, Copy)]
99pub enum Mode {
100 Char,
101 Str,
102 Byte,
103 ByteStr,
104}
105
106impl Mode {
107 pub fn in_single_quotes(self) -> bool {
108 match self {
109 Mode::Char | Mode::Byte => true,
110 Mode::Str | Mode::ByteStr => false,
111 }
112 }
113
114 pub fn in_double_quotes(self) -> bool {
115 !self.in_single_quotes()
116 }
117
118 pub fn is_bytes(self) -> bool {
119 match self {
120 Mode::Byte | Mode::ByteStr => true,
121 Mode::Char | Mode::Str => false,
122 }
123 }
124}
125
126
127fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
128 if first_char != '\\' {
129 return match first_char {
130 '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
131 '\r' => Err(EscapeError::BareCarriageReturn),
132 '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
133 '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
134 _ => {
135 if mode.is_bytes() && !first_char.is_ascii() {
136 return Err(EscapeError::NonAsciiCharInByte);
137 }
138 Ok(first_char)
139 }
140 };
141 }
142
143 let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
144
145 let res = match second_char {
146 '"' => '"',
147 'n' => '\n',
148 'r' => '\r',
149 't' => '\t',
150 '\\' => '\\',
151 '\'' => '\'',
152 '0' => '\0',
153
154 'x' => {
155 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
156 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
157
158 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
159 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
160
161 let value = hi * 16 + lo;
162
163 if !mode.is_bytes() && !is_ascii(value) {
164 return Err(EscapeError::OutOfRangeHexEscape);
165 }
166 let value = value as u8;
167
168 value as char
169 }
170
171 'u' => {
172 if chars.next() != Some('{') {
173 return Err(EscapeError::NoBraceInUnicodeEscape);
174 }
175
176 let mut n_digits = 1;
177 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
178 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
179 '}' => return Err(EscapeError::EmptyUnicodeEscape),
180 c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
181 };
182
183 loop {
184 match chars.next() {
185 None => return Err(EscapeError::UnclosedUnicodeEscape),
186 Some('_') => continue,
187 Some('}') => {
188 if n_digits > 6 {
189 return Err(EscapeError::OverlongUnicodeEscape);
190 }
191 if mode.is_bytes() {
192 return Err(EscapeError::UnicodeEscapeInByte);
193 }
194
195 break std::char::from_u32(value).ok_or_else(|| {
196 if value > 0x10FFFF {
197 EscapeError::OutOfRangeUnicodeEscape
198 } else {
199 EscapeError::LoneSurrogateUnicodeEscape
200 }
201 })?;
202 }
203 Some(c) => {
204 let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
205 n_digits += 1;
206 if n_digits > 6 {
207 continue;
208 }
209 let digit = digit as u32;
210 value = value * 16 + digit;
211 }
212 };
213 }
214 }
215 _ => return Err(EscapeError::InvalidEscape),
216 };
217 Ok(res)
218}
219
220fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
221 let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
222 let res = scan_escape(first_char, chars, mode)?;
223 if chars.next().is_some() {
224 return Err(EscapeError::MoreThanOneChar);
225 }
226 Ok(res)
227}
228
229fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
232where
233 F: FnMut(Range<usize>, Result<char, EscapeError>),
234{
235 assert!(mode.in_double_quotes());
236 let initial_len = src.len();
237 let mut chars = src.chars();
238 while let Some(first_char) = chars.next() {
239 let start = initial_len - chars.as_str().len() - first_char.len_utf8();
240
241 let unescaped_char = match first_char {
242 '\\' => {
243 let second_char = chars.clone().next();
244 match second_char {
245 Some('\n') => {
246 skip_ascii_whitespace(&mut chars);
247 continue;
248 }
249 _ => scan_escape(first_char, &mut chars, mode),
250 }
251 }
252 '\n' => Ok('\n'),
253 '\t' => Ok('\t'),
254 _ => scan_escape(first_char, &mut chars, mode),
255 };
256 let end = initial_len - chars.as_str().len();
257 callback(start..end, unescaped_char);
258 }
259
260 fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
261 let str = chars.as_str();
262 let first_non_space = str
263 .bytes()
264 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
265 .unwrap_or(str.len());
266 *chars = str[first_non_space..].chars()
267 }
268}
269
270fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
275where
276 F: FnMut(Range<usize>, Result<char, EscapeError>),
277{
278 assert!(mode.in_double_quotes());
279 let initial_len = literal_text.len();
280
281 let mut chars = literal_text.chars();
282 while let Some(curr) = chars.next() {
283 let start = initial_len - chars.as_str().len() - curr.len_utf8();
284
285 let result = match curr {
286 '\r' => Err(EscapeError::BareCarriageReturnInRawString),
287 c if mode.is_bytes() && !c.is_ascii() =>
288 Err(EscapeError::NonAsciiCharInByteString),
289 c => Ok(c),
290 };
291 let end = initial_len - chars.as_str().len();
292
293 callback(start..end, result);
294 }
295}
296
297fn byte_from_char(c: char) -> u8 {
298 let res = c as u32;
299 assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)");
300 res as u8
301}
302
303fn is_ascii(x: u32) -> bool {
304 x <= 0x7F
305}