1use std::iter::Enumerate;
2use std::num::ParseIntError;
3use std::str::Chars;
4
5#[derive(Debug, PartialEq)]
7pub enum ParseSequenceError {
8 InvalidSymbol {
9 symbol: String,
10 index: usize,
11 string: String,
12 },
13 InvalidEscape {
15 escape: String,
16 index: usize,
17 string: String,
18 },
19 InvalidUnicode {
21 source: ParseUnicodeError,
23 index: usize,
24 string: String,
25 },
26 MissingOpeningQuote,
27 MissingClosingQuote,
28}
29
30#[derive(Debug, PartialEq, Clone)]
32pub enum ParseUnicodeError {
33 ParseHexFailed {
35 source: ParseIntError,
37 string: String,
38 },
39 ParseOctFailed {
40 source: ParseIntError,
42 string: String,
43 },
44 ParseUnicodeFailed {
46 value: u32,
47 },
48}
49
50pub fn parse_bytes(s: &str) -> Result<Vec<u8>, ParseSequenceError> {
51 let mut chars = s.chars().enumerate();
52 let mut res: Vec<u8> = Vec::with_capacity(s.len());
53
54 while let Some((idx, c)) = chars.next() {
55 if c == '\\' {
56 match chars.next() {
57 None => {
58 return Err(ParseSequenceError::InvalidEscape {
59 escape: format!("{}", c),
60 index: idx,
61 string: String::from(s),
62 });
63 }
64 Some((idx, c2)) => {
65 let byte: u8 = match c2 {
66 'x' => {
67 let hex: String = [
68 chars
69 .next()
70 .ok_or(ParseSequenceError::InvalidEscape {
71 escape: "\\x".to_string(),
72 index: idx,
73 string: s.to_string(),
74 })?
75 .1,
76 chars
77 .next()
78 .ok_or(ParseSequenceError::InvalidEscape {
79 escape: "\\x".to_string(),
80 index: idx,
81 string: s.to_string(),
82 })?
83 .1,
84 ]
85 .iter()
86 .collect();
87 u8::from_str_radix(&hex, 16).map_err(|_| {
88 ParseSequenceError::InvalidEscape {
89 escape: hex,
90 index: idx,
91 string: s.to_string(),
92 }
93 })?
94 }
95 n if ('0'..='3').contains(&n) => {
96 let octal: String = [
97 n,
98 chars
99 .next()
100 .ok_or(ParseSequenceError::InvalidEscape {
101 escape: format!("\\{n}"),
102 index: idx,
103 string: s.to_string(),
104 })?
105 .1,
106 chars
107 .next()
108 .ok_or(ParseSequenceError::InvalidEscape {
109 escape: format!("\\{n}"),
110 index: idx,
111 string: s.to_string(),
112 })?
113 .1,
114 ]
115 .iter()
116 .collect();
117 u8::from_str_radix(&octal, 8).map_err(|_| {
118 ParseSequenceError::InvalidEscape {
119 escape: octal,
120 index: idx,
121 string: s.to_string(),
122 }
123 })?
124 }
125 _ => {
126 return Err(ParseSequenceError::InvalidEscape {
127 escape: format!("{}{}", c, c2),
128 index: idx,
129 string: String::from(s),
130 });
131 }
132 };
133
134 res.push(byte);
135 continue;
136 }
137 };
138 }
139 let size = c.len_utf8();
140 let mut buffer = [0; 4];
141 c.encode_utf8(&mut buffer);
142 res.extend_from_slice(&buffer[..size]);
143 }
144 Ok(res)
145}
146
147pub fn parse_string(s: &str) -> Result<String, ParseSequenceError> {
183 let mut chars = s.chars().enumerate();
184 let res = String::with_capacity(s.len());
185
186 match chars.next() {
187 Some((_, c)) if c == 'r' || c == 'R' => parse_raw_string(&mut chars, res),
188 Some((_, c)) if c == '\'' || c == '"' => parse_quoted_string(s, &mut chars, res, c),
189 _ => Err(ParseSequenceError::MissingOpeningQuote),
190 }
191}
192
193fn parse_raw_string(
194 chars: &mut Enumerate<Chars>,
195 mut res: String,
196) -> Result<String, ParseSequenceError> {
197 let mut in_single_quotes = false;
198 let mut in_double_quotes = false;
199
200 while let Some((_, c)) = chars.next() {
201 let in_quotes = in_single_quotes || in_double_quotes;
202
203 if c == '\\' && in_quotes {
204 match chars.next() {
205 Some((_, c2)) => {
206 match c2 {
207 '"' => {
208 if in_single_quotes {
209 res.push(c);
210 }
211 }
212 '\'' => {
213 if in_double_quotes {
214 res.push(c);
215 }
216 }
217 _ => {
218 res.push(c);
219 }
220 };
221 res.push(c2);
222 continue;
223 }
224 _ => {
225 res.push(c);
226 continue;
227 }
228 };
229 } else if c == '\'' {
230 if in_double_quotes {
231 res.push(c);
232 continue;
233 }
234
235 in_single_quotes = !in_single_quotes;
236 continue;
237 } else if c == '"' {
238 if in_single_quotes {
239 res.push(c);
240 continue;
241 }
242
243 in_double_quotes = !in_double_quotes;
244 continue;
245 } else if !in_quotes {
246 return Err(ParseSequenceError::MissingOpeningQuote);
247 }
248
249 res.push(c);
250 }
251
252 Ok(res)
253}
254
255fn parse_quoted_string(
256 s: &str,
257 mut chars: &mut Enumerate<Chars>,
258 mut res: String,
259 quote: char,
260) -> Result<String, ParseSequenceError> {
261 let mut in_single_quotes = quote == '\'';
262 let mut in_double_quotes = quote == '"';
263
264 while let Some((idx, c)) = chars.next() {
265 let in_quotes = in_single_quotes || in_double_quotes;
266
267 if c == '\\' && in_quotes {
268 match chars.next() {
269 None => {
270 return Err(ParseSequenceError::InvalidEscape {
271 escape: format!("{}", c),
272 index: idx,
273 string: String::from(s),
274 });
275 }
276 Some((idx, c2)) => {
277 let mut push_escape_character = false;
278
279 let value = match c2 {
280 'a' => '\u{07}',
281 'b' => '\u{08}',
282 'v' => '\u{0B}',
283 'f' => '\u{0C}',
284 'n' => '\n',
285 'r' => '\r',
286 't' => '\t',
287 '\\' => c2,
288 '?' => c2,
289 '\'' => {
290 push_escape_character = in_double_quotes;
291 c2
292 }
293 '"' => {
294 push_escape_character = in_single_quotes;
295 c2
296 }
297 '`' => c2,
298 'x' | 'u' | 'U' => {
299 let length = match c2 {
300 'x' => 2,
301 'u' => 4,
302 'U' => 8,
303 _ => unreachable!(),
304 };
305
306 parse_unicode_hex(length, &mut chars).map_err(|x| {
307 ParseSequenceError::InvalidUnicode {
308 source: x.clone(),
309 index: idx,
310 string: String::from(s),
311 }
312 })?
313 }
314 n if ('0'..='3').contains(&n) => parse_unicode_oct(&n, &mut chars)
315 .map_err(|x| ParseSequenceError::InvalidUnicode {
316 source: x.clone(),
317 index: idx,
318 string: String::from(s),
319 })?,
320 _ => {
321 return Err(ParseSequenceError::InvalidEscape {
322 escape: format!("{}{}", c, c2),
323 index: idx,
324 string: String::from(s),
325 });
326 }
327 };
328
329 if push_escape_character {
330 res.push(c);
331 }
332
333 res.push(value);
334
335 continue;
336 }
337 };
338 } else if c == '\'' {
339 if in_double_quotes {
340 res.push(c);
341 continue;
342 }
343
344 in_single_quotes = !in_single_quotes;
345 continue;
346 } else if c == '"' {
347 if in_single_quotes {
348 res.push(c);
349 continue;
350 }
351
352 in_double_quotes = !in_double_quotes;
353 continue;
354 } else if !in_quotes {
355 return Err(ParseSequenceError::MissingOpeningQuote);
356 }
357
358 res.push(c);
359 }
360
361 if in_single_quotes || in_double_quotes {
363 return Err(ParseSequenceError::MissingClosingQuote);
364 }
365
366 Ok(res)
367}
368
369fn parse_unicode_hex<I>(length: usize, chars: &mut I) -> Result<char, ParseUnicodeError>
370where
371 I: Iterator<Item = (usize, char)>,
372{
373 let unicode_seq: String = chars.take(length).map(|(_, c)| c).collect();
374
375 u32::from_str_radix(&unicode_seq, 16)
376 .map_err(|e| ParseUnicodeError::ParseHexFailed {
377 source: e,
378 string: unicode_seq,
379 })
380 .and_then(|u| char::from_u32(u).ok_or(ParseUnicodeError::ParseUnicodeFailed { value: u }))
381}
382
383fn parse_unicode_oct<I>(first_char: &char, chars: &mut I) -> Result<char, ParseUnicodeError>
384where
385 I: Iterator<Item = (usize, char)>,
386{
387 let mut unicode_seq: String = String::with_capacity(3);
388 unicode_seq.push(*first_char);
389 chars.take(2).for_each(|(_, c)| unicode_seq.push(c));
390
391 u32::from_str_radix(&unicode_seq, 8)
392 .map_err(|e| ParseUnicodeError::ParseOctFailed {
393 source: e,
394 string: unicode_seq,
395 })
396 .and_then(|u| {
397 if u <= 255 {
398 char::from_u32(u).ok_or(ParseUnicodeError::ParseUnicodeFailed { value: u })
399 } else {
400 Err(ParseUnicodeError::ParseUnicodeFailed { value: u })
401 }
402 })
403}
404
405#[cfg(test)]
406mod tests {
407 use crate::parse::ParseSequenceError;
408 use crate::{parse_bytes, parse_string};
409
410 #[test]
411 fn single_quotes_interprets_escapes() {
412 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
413 ("'Hello \\a'", Ok(String::from("Hello \u{07}"))),
414 ("'Hello \\b'", Ok(String::from("Hello \u{08}"))),
415 ("'Hello \\v'", Ok(String::from("Hello \u{0b}"))),
416 ("'Hello \\f'", Ok(String::from("Hello \u{0c}"))),
417 ("'Hello \\n'", Ok(String::from("Hello \u{0a}"))),
418 ("'Hello \\r'", Ok(String::from("Hello \u{0d}"))),
419 ("'Hello \\t'", Ok(String::from("Hello \u{09}"))),
420 ("'Hello \\\\'", Ok(String::from("Hello \\"))),
421 ("'Hello \\?'", Ok(String::from("Hello ?"))),
422 ("'Hello \"'", Ok(String::from("Hello \""))),
423 ("'Hello \\''", Ok(String::from("Hello '"))),
424 ("'Hello \\`'", Ok(String::from("Hello `"))),
425 ("'Hello \\x20'", Ok(String::from("Hello "))),
426 ("'Hello \\u270c'", Ok(String::from("Hello ✌"))),
427 ("'Hello \\U0001f431'", Ok(String::from("Hello 🐱"))),
428 ("'Hello \\040'", Ok(String::from("Hello "))),
429 (
430 "Missing closing quote'",
431 Err(ParseSequenceError::MissingOpeningQuote),
432 ),
433 (
434 "'Missing closing quote",
435 Err(ParseSequenceError::MissingClosingQuote),
436 ),
437 (
439 "'\\440'",
440 Err(ParseSequenceError::InvalidEscape {
441 escape: String::from("\\4"),
442 index: 2,
443 string: String::from("'\\440'"),
444 }),
445 ),
446 ];
447
448 for (s, expected) in tests {
449 let result = parse_string(s);
450 assert_eq!(result, expected);
451 }
452 }
453
454 #[test]
455 fn double_quotes_interprets_escapes() {
456 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
457 ("\"Hello \\a\"", Ok(String::from("Hello \u{07}"))),
458 ("\"Hello \\b\"", Ok(String::from("Hello \u{08}"))),
459 ("\"Hello \\v\"", Ok(String::from("Hello \u{0b}"))),
460 ("\"Hello \\f\"", Ok(String::from("Hello \u{0c}"))),
461 ("\"Hello \\n\"", Ok(String::from("Hello \u{0a}"))),
462 ("\"Hello \\r\"", Ok(String::from("Hello \u{0d}"))),
463 ("\"Hello \\t\"", Ok(String::from("Hello \u{09}"))),
464 ("\"Hello \\\\\"", Ok(String::from("Hello \\"))),
465 ("\"Hello \\?\"", Ok(String::from("Hello ?"))),
466 ("\"Hello \\\"\"", Ok(String::from("Hello \""))),
467 ("\"Hello \\'\"", Ok(String::from("Hello \\'"))),
468 ("\"Hello \\`\"", Ok(String::from("Hello `"))),
469 ("\"Hello \\x20 \"", Ok(String::from("Hello "))),
470 ("\"Hello \\x60\"", Ok(String::from("Hello `"))),
471 ("\"Hello \\u270c\"", Ok(String::from("Hello ✌"))),
472 ("\"Hello \\U0001f431\"", Ok(String::from("Hello 🐱"))),
473 ("\"Hello \\040\"", Ok(String::from("Hello "))),
474 (
475 "Missing closing quote\"",
476 Err(ParseSequenceError::MissingOpeningQuote),
477 ),
478 (
479 "\"Missing closing quote",
480 Err(ParseSequenceError::MissingClosingQuote),
481 ),
482 (
484 "\"\\440\"",
485 Err(ParseSequenceError::InvalidEscape {
486 escape: String::from("\\4"),
487 index: 2,
488 string: String::from("\"\\440\""),
489 }),
490 ),
491 ];
492
493 for (s, expected) in tests {
494 let result = parse_string(s);
495 assert_eq!(result, expected, "Testing {}", s);
496 }
497 }
498
499 #[test]
500 fn raw_string_does_not_interpret_escapes() {
501 let tests: Vec<(&str, Result<String, ParseSequenceError>)> = vec![
502 (
506 "r\"Hello \\a \\\" ' \\' \\U0001f431 \"",
507 Ok(String::from("Hello \\a \" ' \\' \\U0001f431 ")),
508 ),
509 (
510 "R\"Hello \\a \\\" ' \\' \\U0001f431 \"",
511 Ok(String::from("Hello \\a \" ' \\' \\U0001f431 ")),
512 ),
513 (
517 "r'Hello \\a \\\" \" \\' \\U0001f431 '",
518 Ok(String::from("Hello \\a \\\" \" ' \\U0001f431 ")),
519 ),
520 (
521 "R'Hello \\a \\\" \" \\' \\U0001f431 '",
522 Ok(String::from("Hello \\a \\\" \" ' \\U0001f431 ")),
523 ),
524 ];
525
526 for (s, expected) in tests {
527 let result = parse_string(s);
528 assert_eq!(result, expected, "Testing {}", s);
529 }
530 }
531
532 #[test]
533 fn parses_bytes() {
534 let bytes = parse_bytes("abc💖\\xFF\\376").expect("Must parse!");
535 assert_eq!([97, 98, 99, 240, 159, 146, 150, 255, 254], *bytes)
536 }
537}