1use std::iter::Enumerate;
2use std::num::ParseIntError;
3use std::str::Chars;
4
5#[derive(Debug, PartialEq)]
7pub enum ParseSequenceError {
8 InvalidEscape {
10 escape: String,
11 index: usize,
12 string: String,
13 },
14 InvalidUnicode {
16 source: ParseUnicodeError,
18 index: usize,
19 string: String,
20 },
21 MissingOpeningQuote,
22 MissingClosingQuote,
23}
24
25#[derive(Debug, PartialEq, Clone)]
27pub enum ParseUnicodeError {
28 Hex {
30 source: ParseIntError,
32 string: String,
33 },
34 Oct {
35 source: ParseIntError,
37 string: String,
38 },
39 Unicode {
41 value: u32,
42 },
43}
44
45pub fn parse_bytes(s: &str) -> Result<Vec<u8>, ParseSequenceError> {
46 let mut chars = s.chars().enumerate();
47 let mut res: Vec<u8> = Vec::with_capacity(s.len());
48
49 while let Some((idx, c)) = chars.next() {
50 if c == '\\' {
51 match chars.next() {
52 None => {
53 return Err(ParseSequenceError::InvalidEscape {
54 escape: format!("{c}"),
55 index: idx,
56 string: String::from(s),
57 });
58 }
59 Some((idx, c2)) => {
60 let byte: u8 = match c2 {
61 'x' => {
62 let hex: String = [
63 chars
64 .next()
65 .ok_or(ParseSequenceError::InvalidEscape {
66 escape: "\\x".to_string(),
67 index: idx,
68 string: s.to_string(),
69 })?
70 .1,
71 chars
72 .next()
73 .ok_or(ParseSequenceError::InvalidEscape {
74 escape: "\\x".to_string(),
75 index: idx,
76 string: s.to_string(),
77 })?
78 .1,
79 ]
80 .iter()
81 .collect();
82 u8::from_str_radix(&hex, 16).map_err(|_| {
83 ParseSequenceError::InvalidEscape {
84 escape: hex,
85 index: idx,
86 string: s.to_string(),
87 }
88 })?
89 }
90 n if ('0'..='3').contains(&n) => {
91 let octal: String = [
92 n,
93 chars
94 .next()
95 .ok_or(ParseSequenceError::InvalidEscape {
96 escape: format!("\\{n}"),
97 index: idx,
98 string: s.to_string(),
99 })?
100 .1,
101 chars
102 .next()
103 .ok_or(ParseSequenceError::InvalidEscape {
104 escape: format!("\\{n}"),
105 index: idx,
106 string: s.to_string(),
107 })?
108 .1,
109 ]
110 .iter()
111 .collect();
112 u8::from_str_radix(&octal, 8).map_err(|_| {
113 ParseSequenceError::InvalidEscape {
114 escape: octal,
115 index: idx,
116 string: s.to_string(),
117 }
118 })?
119 }
120 _ => {
121 return Err(ParseSequenceError::InvalidEscape {
122 escape: format!("{c}{c2}"),
123 index: idx,
124 string: String::from(s),
125 });
126 }
127 };
128
129 res.push(byte);
130 continue;
131 }
132 };
133 }
134 let size = c.len_utf8();
135 let mut buffer = [0; 4];
136 c.encode_utf8(&mut buffer);
137 res.extend_from_slice(&buffer[..size]);
138 }
139 Ok(res)
140}
141
142pub fn parse_string(s: &str) -> Result<String, ParseSequenceError> {
178 let mut chars = s.chars().enumerate();
179 let res = String::with_capacity(s.len());
180
181 match chars.next() {
182 Some((_, c)) if c == 'r' || c == 'R' => parse_raw_string(&mut chars, res),
183 Some((_, c)) if c == '\'' || c == '"' => parse_quoted_string(s, &mut chars, res, c),
184 _ => Err(ParseSequenceError::MissingOpeningQuote),
185 }
186}
187
188fn parse_raw_string(
189 chars: &mut Enumerate<Chars>,
190 mut res: String,
191) -> Result<String, ParseSequenceError> {
192 let mut in_single_quotes = false;
193 let mut in_double_quotes = false;
194
195 while let Some((_, c)) = chars.next() {
196 let in_quotes = in_single_quotes || in_double_quotes;
197
198 if c == '\\' && in_quotes {
199 match chars.next() {
200 Some((_, c2)) => {
201 match c2 {
202 '"' => {
203 if in_single_quotes {
204 res.push(c);
205 }
206 }
207 '\'' => {
208 if in_double_quotes {
209 res.push(c);
210 }
211 }
212 _ => {
213 res.push(c);
214 }
215 };
216 res.push(c2);
217 continue;
218 }
219 _ => {
220 res.push(c);
221 continue;
222 }
223 };
224 } else if c == '\'' {
225 if in_double_quotes {
226 res.push(c);
227 continue;
228 }
229
230 in_single_quotes = !in_single_quotes;
231 continue;
232 } else if c == '"' {
233 if in_single_quotes {
234 res.push(c);
235 continue;
236 }
237
238 in_double_quotes = !in_double_quotes;
239 continue;
240 } else if !in_quotes {
241 return Err(ParseSequenceError::MissingOpeningQuote);
242 }
243
244 res.push(c);
245 }
246
247 Ok(res)
248}
249
250fn parse_quoted_string(
251 s: &str,
252 mut chars: &mut Enumerate<Chars>,
253 mut res: String,
254 quote: char,
255) -> Result<String, ParseSequenceError> {
256 let mut in_single_quotes = quote == '\'';
257 let mut in_double_quotes = quote == '"';
258
259 while let Some((idx, c)) = chars.next() {
260 let in_quotes = in_single_quotes || in_double_quotes;
261
262 if c == '\\' && in_quotes {
263 match chars.next() {
264 None => {
265 return Err(ParseSequenceError::InvalidEscape {
266 escape: format!("{c}"),
267 index: idx,
268 string: String::from(s),
269 });
270 }
271 Some((idx, c2)) => {
272 let mut push_escape_character = false;
273
274 let value = match c2 {
275 'a' => '\u{07}',
276 'b' => '\u{08}',
277 'v' => '\u{0B}',
278 'f' => '\u{0C}',
279 'n' => '\n',
280 'r' => '\r',
281 't' => '\t',
282 '\\' => c2,
283 '?' => c2,
284 '\'' => {
285 push_escape_character = in_double_quotes;
286 c2
287 }
288 '"' => {
289 push_escape_character = in_single_quotes;
290 c2
291 }
292 '`' => c2,
293 'x' | 'u' | 'U' => {
294 let length = match c2 {
295 'x' => 2,
296 'u' => 4,
297 'U' => 8,
298 _ => unreachable!(),
299 };
300
301 parse_unicode_hex(length, &mut chars).map_err(|x| {
302 ParseSequenceError::InvalidUnicode {
303 source: x.clone(),
304 index: idx,
305 string: String::from(s),
306 }
307 })?
308 }
309 n if ('0'..='3').contains(&n) => parse_unicode_oct(&n, &mut chars)
310 .map_err(|x| ParseSequenceError::InvalidUnicode {
311 source: x.clone(),
312 index: idx,
313 string: String::from(s),
314 })?,
315 _ => {
316 return Err(ParseSequenceError::InvalidEscape {
317 escape: format!("{c}{c2}"),
318 index: idx,
319 string: String::from(s),
320 });
321 }
322 };
323
324 if push_escape_character {
325 res.push(c);
326 }
327
328 res.push(value);
329
330 continue;
331 }
332 };
333 } else if c == '\'' {
334 if in_double_quotes {
335 res.push(c);
336 continue;
337 }
338
339 in_single_quotes = !in_single_quotes;
340 continue;
341 } else if c == '"' {
342 if in_single_quotes {
343 res.push(c);
344 continue;
345 }
346
347 in_double_quotes = !in_double_quotes;
348 continue;
349 } else if !in_quotes {
350 return Err(ParseSequenceError::MissingOpeningQuote);
351 }
352
353 res.push(c);
354 }
355
356 if in_single_quotes || in_double_quotes {
358 return Err(ParseSequenceError::MissingClosingQuote);
359 }
360
361 Ok(res)
362}
363
364fn parse_unicode_hex<I>(length: usize, chars: &mut I) -> Result<char, ParseUnicodeError>
365where
366 I: Iterator<Item = (usize, char)>,
367{
368 let unicode_seq: String = chars.take(length).map(|(_, c)| c).collect();
369
370 u32::from_str_radix(&unicode_seq, 16)
371 .map_err(|e| ParseUnicodeError::Hex {
372 source: e,
373 string: unicode_seq,
374 })
375 .and_then(|u| char::from_u32(u).ok_or(ParseUnicodeError::Unicode { value: u }))
376}
377
378fn parse_unicode_oct<I>(first_char: &char, chars: &mut I) -> Result<char, ParseUnicodeError>
379where
380 I: Iterator<Item = (usize, char)>,
381{
382 let mut unicode_seq: String = String::with_capacity(3);
383 unicode_seq.push(*first_char);
384 chars.take(2).for_each(|(_, c)| unicode_seq.push(c));
385
386 u32::from_str_radix(&unicode_seq, 8)
387 .map_err(|e| ParseUnicodeError::Oct {
388 source: e,
389 string: unicode_seq,
390 })
391 .and_then(|u| {
392 if u <= 255 {
393 char::from_u32(u).ok_or(ParseUnicodeError::Unicode { value: u })
394 } else {
395 Err(ParseUnicodeError::Unicode { value: u })
396 }
397 })
398}
399
400#[cfg(test)]
401mod tests {
402 use super::{parse_bytes, parse_string, ParseSequenceError};
403
404 #[test]
405 fn single_quotes_interprets_escapes() {
406 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
407 ("'Hello \\a'", Ok(String::from("Hello \u{07}"))),
408 ("'Hello \\b'", Ok(String::from("Hello \u{08}"))),
409 ("'Hello \\v'", Ok(String::from("Hello \u{0b}"))),
410 ("'Hello \\f'", Ok(String::from("Hello \u{0c}"))),
411 ("'Hello \\n'", Ok(String::from("Hello \u{0a}"))),
412 ("'Hello \\r'", Ok(String::from("Hello \u{0d}"))),
413 ("'Hello \\t'", Ok(String::from("Hello \u{09}"))),
414 ("'Hello \\\\'", Ok(String::from("Hello \\"))),
415 ("'Hello \\?'", Ok(String::from("Hello ?"))),
416 ("'Hello \"'", Ok(String::from("Hello \""))),
417 ("'Hello \\''", Ok(String::from("Hello '"))),
418 ("'Hello \\`'", Ok(String::from("Hello `"))),
419 ("'Hello \\x20'", Ok(String::from("Hello "))),
420 ("'Hello \\u270c'", Ok(String::from("Hello ✌"))),
421 ("'Hello \\U0001f431'", Ok(String::from("Hello 🐱"))),
422 ("'Hello \\040'", Ok(String::from("Hello "))),
423 (
424 "Missing closing quote'",
425 Err(ParseSequenceError::MissingOpeningQuote),
426 ),
427 (
428 "'Missing closing quote",
429 Err(ParseSequenceError::MissingClosingQuote),
430 ),
431 (
433 "'\\440'",
434 Err(ParseSequenceError::InvalidEscape {
435 escape: String::from("\\4"),
436 index: 2,
437 string: String::from("'\\440'"),
438 }),
439 ),
440 ];
441
442 for (s, expected) in tests {
443 let result = parse_string(s);
444 assert_eq!(result, expected);
445 }
446 }
447
448 #[test]
449 fn double_quotes_interprets_escapes() {
450 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
451 ("\"Hello \\a\"", Ok(String::from("Hello \u{07}"))),
452 ("\"Hello \\b\"", Ok(String::from("Hello \u{08}"))),
453 ("\"Hello \\v\"", Ok(String::from("Hello \u{0b}"))),
454 ("\"Hello \\f\"", Ok(String::from("Hello \u{0c}"))),
455 ("\"Hello \\n\"", Ok(String::from("Hello \u{0a}"))),
456 ("\"Hello \\r\"", Ok(String::from("Hello \u{0d}"))),
457 ("\"Hello \\t\"", Ok(String::from("Hello \u{09}"))),
458 ("\"Hello \\\\\"", Ok(String::from("Hello \\"))),
459 ("\"Hello \\?\"", Ok(String::from("Hello ?"))),
460 ("\"Hello \\\"\"", Ok(String::from("Hello \""))),
461 ("\"Hello \\'\"", Ok(String::from("Hello \\'"))),
462 ("\"Hello \\`\"", Ok(String::from("Hello `"))),
463 ("\"Hello \\x20 \"", Ok(String::from("Hello "))),
464 ("\"Hello \\x60\"", Ok(String::from("Hello `"))),
465 ("\"Hello \\u270c\"", Ok(String::from("Hello ✌"))),
466 ("\"Hello \\U0001f431\"", Ok(String::from("Hello 🐱"))),
467 ("\"Hello \\040\"", Ok(String::from("Hello "))),
468 (
469 "Missing closing quote\"",
470 Err(ParseSequenceError::MissingOpeningQuote),
471 ),
472 (
473 "\"Missing closing quote",
474 Err(ParseSequenceError::MissingClosingQuote),
475 ),
476 (
478 "\"\\440\"",
479 Err(ParseSequenceError::InvalidEscape {
480 escape: String::from("\\4"),
481 index: 2,
482 string: String::from("\"\\440\""),
483 }),
484 ),
485 ];
486
487 for (s, expected) in tests {
488 let result = parse_string(s);
489 assert_eq!(result, expected, "Testing {s}");
490 }
491 }
492
493 #[test]
494 fn raw_string_does_not_interpret_escapes() {
495 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
496 (
500 "r\"Hello \\a \\\" ' \\' \\U0001f431 \"",
501 Ok(String::from("Hello \\a \" ' \\' \\U0001f431 ")),
502 ),
503 (
504 "R\"Hello \\a \\\" ' \\' \\U0001f431 \"",
505 Ok(String::from("Hello \\a \" ' \\' \\U0001f431 ")),
506 ),
507 (
511 "r'Hello \\a \\\" \" \\' \\U0001f431 '",
512 Ok(String::from("Hello \\a \\\" \" ' \\U0001f431 ")),
513 ),
514 (
515 "R'Hello \\a \\\" \" \\' \\U0001f431 '",
516 Ok(String::from("Hello \\a \\\" \" ' \\U0001f431 ")),
517 ),
518 ];
519
520 for (s, expected) in tests {
521 let result = parse_string(s);
522 assert_eq!(result, expected, "Testing {s}");
523 }
524 }
525
526 #[test]
527 fn parses_bytes() {
528 let bytes = parse_bytes("abc💖\\xFF\\376").expect("Must parse!");
529 assert_eq!([97, 98, 99, 240, 159, 146, 150, 255, 254], *bytes)
530 }
531}