1use super::core::Lexer;
20use crate::parser::core::Result;
21use crate::parser::{Error, SyntaxError};
22use crate::syntax::EscapeUnit::{self, *};
23use crate::syntax::EscapedString;
24
25impl Lexer<'_> {
26 async fn hex_digit(&mut self) -> Result<Option<u32>> {
28 if let Some(c) = self.peek_char().await? {
29 if let Some(digit) = c.to_digit(16) {
30 self.consume_char();
31 return Ok(Some(digit));
32 }
33 }
34 Ok(None)
35 }
36
37 async fn hex_digits(&mut self, count: usize) -> Result<Option<u32>> {
44 let Some(digit) = self.hex_digit().await? else {
45 return Ok(None);
46 };
47 let mut value = digit;
48 for _ in 1..count {
49 let Some(digit) = self.hex_digit().await? else {
50 break;
51 };
52 value = (value << 4) | digit;
53 }
54 Ok(Some(value))
55 }
56
57 pub async fn escape_unit(&mut self) -> Result<Option<EscapeUnit>> {
69 let Some(c1) = self.peek_char().await? else {
70 return Ok(None);
71 };
72 let start_index = self.index();
73 self.consume_char();
74 if c1 != '\\' {
75 return Ok(Some(Literal(c1)));
76 }
77
78 let Some(c2) = self.peek_char().await? else {
79 let cause = SyntaxError::IncompleteEscape.into();
80 let location = self.location().await?.clone();
81 return Err(Error { cause, location });
82 };
83 self.consume_char();
84 match c2 {
85 '"' => Ok(Some(DoubleQuote)),
86 '\'' => Ok(Some(SingleQuote)),
87 '\\' => Ok(Some(Backslash)),
88 '?' => Ok(Some(Question)),
89 'a' => Ok(Some(Alert)),
90 'b' => Ok(Some(Backspace)),
91 'e' | 'E' => Ok(Some(Escape)),
92 'f' => Ok(Some(FormFeed)),
93 'n' => Ok(Some(Newline)),
94 'r' => Ok(Some(CarriageReturn)),
95 't' => Ok(Some(Tab)),
96 'v' => Ok(Some(VerticalTab)),
97
98 'c' => {
99 let start_index = self.index();
100 let Some(c3) = self.peek_char().await? else {
101 let cause = SyntaxError::IncompleteControlEscape.into();
102 let location = self.location().await?.clone();
103 return Err(Error { cause, location });
104 };
105 self.consume_char();
106 match c3.to_ascii_uppercase() {
107 '\\' => {
108 let Some('\\') = self.peek_char().await? else {
109 let cause = SyntaxError::IncompleteControlBackslashEscape.into();
110 let location = self.location().await?.clone();
111 return Err(Error { cause, location });
112 };
113 self.consume_char();
114 Ok(Some(Control(0x1C)))
115 }
116
117 c3 @ ('\u{3F}'..'\u{60}') => Ok(Some(Control(c3 as u8 ^ 0x40))),
119
120 _ => {
121 let cause = SyntaxError::InvalidControlEscape.into();
122 let location = self.location_range(start_index..self.index());
123 Err(Error { cause, location })
124 }
125 }
126 }
127
128 'x' => {
129 let Some(value) = self.hex_digits(2).await? else {
130 let cause = SyntaxError::IncompleteHexEscape.into();
131 let location = self.location().await?.clone();
132 return Err(Error { cause, location });
133 };
134 Ok(Some(Hex(value as u8)))
136 }
137
138 'u' => {
139 let Some(value) = self.hex_digits(4).await? else {
140 let cause = SyntaxError::IncompleteShortUnicodeEscape.into();
141 let location = self.location().await?.clone();
142 return Err(Error { cause, location });
143 };
144 if let Some(c) = char::from_u32(value) {
145 Ok(Some(Unicode(c)))
146 } else {
147 let cause = SyntaxError::UnicodeEscapeOutOfRange.into();
148 let location = self.location_range(start_index..self.index());
149 Err(Error { cause, location })
150 }
151 }
152
153 'U' => {
154 let Some(value) = self.hex_digits(8).await? else {
155 let cause = SyntaxError::IncompleteLongUnicodeEscape.into();
156 let location = self.location().await?.clone();
157 return Err(Error { cause, location });
158 };
159 if let Some(c) = char::from_u32(value) {
160 Ok(Some(Unicode(c)))
161 } else {
162 let cause = SyntaxError::UnicodeEscapeOutOfRange.into();
163 let location = self.location_range(start_index..self.index());
164 Err(Error { cause, location })
165 }
166 }
167
168 _ => {
169 let Some(mut value) = c2.to_digit(8) else {
171 let cause = SyntaxError::InvalidEscape.into();
172 let location = self.location_range(start_index..self.index());
173 return Err(Error { cause, location });
174 };
175 for _ in 0..2 {
176 let Some(digit) = self.peek_char().await? else {
177 break;
178 };
179 let Some(digit) = digit.to_digit(8) else {
180 break;
181 };
182 value = value * 8 + digit;
183 self.consume_char();
184 }
185 if let Ok(value) = value.try_into() {
186 Ok(Some(Octal(value)))
187 } else {
188 let cause = SyntaxError::OctalEscapeOutOfRange.into();
189 let location = self.location_range(start_index..self.index());
190 Err(Error { cause, location })
191 }
192 }
193 }
194 }
195
196 pub async fn escaped_string<F>(&mut self, mut is_delimiter: F) -> Result<EscapedString>
210 where
211 F: FnMut(char) -> bool,
212 {
213 self.escaped_string_dyn(&mut is_delimiter).await
214 }
215
216 async fn escaped_string_dyn(
218 &mut self,
219 is_delimiter: &mut dyn FnMut(char) -> bool,
220 ) -> Result<EscapedString> {
221 let mut this = self.disable_line_continuation();
222 let mut units = Vec::new();
223
224 while let Some(c) = this.peek_char().await? {
225 if is_delimiter(c) {
226 break;
227 }
228 let Some(unit) = this.escape_unit().await? else {
229 break;
230 };
231 units.push(unit);
232 }
233
234 Ok(EscapedString(units))
235 }
236
237 pub(super) async fn single_quoted_escaped_string(&mut self) -> Result<Option<EscapedString>> {
248 let is_single_quote = |c| c == '\'';
249
250 let Some(quote) = self.consume_char_if(is_single_quote).await? else {
252 return Ok(None);
253 };
254 let opening_location = quote.location.clone();
255
256 let content = self.escaped_string(is_single_quote).await?;
257
258 if let Some(quote) = self.peek_char().await? {
260 debug_assert_eq!(quote, '\'');
261 self.consume_char();
262 Ok(Some(content))
263 } else {
264 let cause = SyntaxError::UnclosedDollarSingleQuote { opening_location }.into();
265 let location = self.location().await?.clone();
266 Err(Error { cause, location })
267 }
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use crate::parser::ErrorCause;
275 use crate::source::Source;
276 use assert_matches::assert_matches;
277 use futures_util::FutureExt;
278
279 #[test]
280 fn escape_unit_literal() {
281 let mut lexer = Lexer::with_code("bar");
282 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
283 assert_eq!(result, Some(Literal('b')));
284 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('a')));
285 }
286
287 #[test]
288 fn escape_unit_named_escapes() {
289 let mut lexer = Lexer::with_code(r#"\""\'\\\?\a\b\e\E\f\n\r\t\v"#);
290 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
291 assert_eq!(result, Some(DoubleQuote));
292 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
293 assert_eq!(result, Some(Literal('"')));
294 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
295 assert_eq!(result, Some(SingleQuote));
296 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
297 assert_eq!(result, Some(Backslash));
298 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
299 assert_eq!(result, Some(Question));
300 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
301 assert_eq!(result, Some(Alert));
302 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
303 assert_eq!(result, Some(Backspace));
304 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
305 assert_eq!(result, Some(Escape));
306 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
307 assert_eq!(result, Some(Escape));
308 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
309 assert_eq!(result, Some(FormFeed));
310 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
311 assert_eq!(result, Some(Newline));
312 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
313 assert_eq!(result, Some(CarriageReturn));
314 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
315 assert_eq!(result, Some(Tab));
316 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
317 assert_eq!(result, Some(VerticalTab));
318 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
319 }
320
321 #[test]
322 fn escape_unit_incomplete_escapes() {
323 let mut lexer = Lexer::with_code(r"\");
324 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
325 assert_matches!(
326 error.cause,
327 ErrorCause::Syntax(SyntaxError::IncompleteEscape)
328 );
329 assert_eq!(*error.location.code.value.borrow(), r"\");
330 assert_eq!(error.location.code.start_line_number.get(), 1);
331 assert_eq!(*error.location.code.source, Source::Unknown);
332 assert_eq!(error.location.range, 1..1);
333 }
334
335 #[test]
336 fn escape_unit_control_escapes() {
337 let mut lexer = Lexer::with_code(r"\cA\cz\c^\c?\c\\");
338 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
339 assert_eq!(result, Some(Control(0x01)));
340 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
341 assert_eq!(result, Some(Control(0x1A)));
342 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
343 assert_eq!(result, Some(Control(0x1E)));
344 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
345 assert_eq!(result, Some(Control(0x7F)));
346 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
347 assert_eq!(result, Some(Control(0x1C)));
348 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
349 }
350
351 #[test]
352 fn escape_unit_incomplete_control_escape() {
353 let mut lexer = Lexer::with_code(r"\c");
354 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
355 assert_matches!(
356 error.cause,
357 ErrorCause::Syntax(SyntaxError::IncompleteControlEscape)
358 );
359 assert_eq!(*error.location.code.value.borrow(), r"\c");
360 assert_eq!(error.location.code.start_line_number.get(), 1);
361 assert_eq!(*error.location.code.source, Source::Unknown);
362 assert_eq!(error.location.range, 2..2);
363 }
364
365 #[test]
366 fn escape_unit_incomplete_control_backslash_escapes() {
367 let mut lexer = Lexer::with_code(r"\c\");
368 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
369 assert_matches!(
370 error.cause,
371 ErrorCause::Syntax(SyntaxError::IncompleteControlBackslashEscape)
372 );
373 assert_eq!(*error.location.code.value.borrow(), r"\c\");
374 assert_eq!(error.location.code.start_line_number.get(), 1);
375 assert_eq!(*error.location.code.source, Source::Unknown);
376 assert_eq!(error.location.range, 3..3);
377
378 let mut lexer = Lexer::with_code(r"\c\a");
379 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
380 assert_matches!(
381 error.cause,
382 ErrorCause::Syntax(SyntaxError::IncompleteControlBackslashEscape)
383 );
384 assert_eq!(*error.location.code.value.borrow(), r"\c\a");
385 assert_eq!(error.location.code.start_line_number.get(), 1);
386 assert_eq!(*error.location.code.source, Source::Unknown);
387 assert_eq!(error.location.range, 3..4);
388 }
389
390 #[test]
391 fn escape_unit_unknown_control_escape() {
392 let mut lexer = Lexer::with_code(r"\c!`");
393 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
394 assert_matches!(
395 error.cause,
396 ErrorCause::Syntax(SyntaxError::InvalidControlEscape)
397 );
398 assert_eq!(*error.location.code.value.borrow(), r"\c!`");
399 assert_eq!(error.location.code.start_line_number.get(), 1);
400 assert_eq!(*error.location.code.source, Source::Unknown);
401 assert_eq!(error.location.range, 2..3);
402 }
403
404 #[test]
405 fn escape_unit_octal_escapes() {
406 let mut lexer = Lexer::with_code(r"\0\07\234\0123");
407 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
408 assert_eq!(result, Some(Octal(0o0)));
409 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
410 assert_eq!(result, Some(Octal(0o7)));
411 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
412 assert_eq!(result, Some(Octal(0o234)));
413 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
414 assert_eq!(result, Some(Octal(0o12)));
415 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('3')));
417
418 let mut lexer = Lexer::with_code(r"\787");
419 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
420 assert_eq!(result, Some(Octal(0o7)));
422
423 let mut lexer = Lexer::with_code(r"\12");
424 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
425 assert_eq!(result, Some(Octal(0o12)));
427 }
428
429 #[test]
430 fn escape_unit_non_byte_octal_escape() {
431 let mut lexer = Lexer::with_code(r"\400");
432 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
433 assert_matches!(
434 error.cause,
435 ErrorCause::Syntax(SyntaxError::OctalEscapeOutOfRange)
436 );
437 assert_eq!(*error.location.code.value.borrow(), r"\400");
438 assert_eq!(error.location.code.start_line_number.get(), 1);
439 assert_eq!(*error.location.code.source, Source::Unknown);
440 assert_eq!(error.location.range, 0..4);
441 }
442
443 #[test]
444 fn escape_unit_hexadecimal_escapes() {
445 let mut lexer = Lexer::with_code(r"\x0\x7F\xd4A");
446 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
447 assert_eq!(result, Some(Hex(0x0)));
448 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
449 assert_eq!(result, Some(Hex(0x7F)));
450 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
451 assert_eq!(result, Some(Hex(0xD4)));
453 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('A')));
454
455 let mut lexer = Lexer::with_code(r"\xb");
456 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
457 assert_eq!(result, Some(Hex(0xB)));
459 }
460
461 #[test]
462 fn escape_unit_incomplete_hexadecimal_escape() {
463 let mut lexer = Lexer::with_code(r"\x");
464 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
465 assert_matches!(
466 error.cause,
467 ErrorCause::Syntax(SyntaxError::IncompleteHexEscape)
468 );
469 assert_eq!(*error.location.code.value.borrow(), r"\x");
470 assert_eq!(error.location.code.start_line_number.get(), 1);
471 assert_eq!(*error.location.code.source, Source::Unknown);
472 assert_eq!(error.location.range, 2..2);
473 }
474
475 #[test]
476 fn escape_unit_unicode_escapes() {
477 let mut lexer = Lexer::with_code(r"\u20\u4B9d0");
478 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
479 assert_eq!(result, Some(Unicode('\u{20}')));
480 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
481 assert_eq!(result, Some(Unicode('\u{4B9D}')));
482 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('0')));
484
485 let mut lexer = Lexer::with_code(r"\U42\U0001f4A9b");
486 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
487 assert_eq!(result, Some(Unicode('\u{42}')));
488 let result = lexer.escape_unit().now_or_never().unwrap().unwrap();
489 assert_eq!(result, Some(Unicode('\u{1F4A9}')));
490 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('b')));
492 }
493
494 #[test]
495 fn escape_unit_incomplete_unicode_escapes() {
496 let mut lexer = Lexer::with_code(r"\u");
497 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
498 assert_matches!(
499 error.cause,
500 ErrorCause::Syntax(SyntaxError::IncompleteShortUnicodeEscape)
501 );
502 assert_eq!(*error.location.code.value.borrow(), r"\u");
503 assert_eq!(error.location.code.start_line_number.get(), 1);
504 assert_eq!(*error.location.code.source, Source::Unknown);
505 assert_eq!(error.location.range, 2..2);
506
507 let mut lexer = Lexer::with_code(r"\U");
508 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
509 assert_matches!(
510 error.cause,
511 ErrorCause::Syntax(SyntaxError::IncompleteLongUnicodeEscape)
512 );
513 assert_eq!(*error.location.code.value.borrow(), r"\U");
514 assert_eq!(error.location.code.start_line_number.get(), 1);
515 assert_eq!(*error.location.code.source, Source::Unknown);
516 assert_eq!(error.location.range, 2..2);
517 }
518
519 #[test]
520 fn escape_unit_invalid_unicode_escapes() {
521 let mut lexer = Lexer::with_code(r"\uD800");
523 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
524 assert_matches!(
525 error.cause,
526 ErrorCause::Syntax(SyntaxError::UnicodeEscapeOutOfRange)
527 );
528 assert_eq!(*error.location.code.value.borrow(), r"\uD800");
529 assert_eq!(error.location.code.start_line_number.get(), 1);
530 assert_eq!(*error.location.code.source, Source::Unknown);
531 assert_eq!(error.location.range, 0..6);
532 }
533
534 #[test]
535 fn escape_unit_unknown_escape() {
536 let mut lexer = Lexer::with_code(r"\!");
537 let error = lexer.escape_unit().now_or_never().unwrap().unwrap_err();
538 assert_matches!(error.cause, ErrorCause::Syntax(SyntaxError::InvalidEscape));
539 assert_eq!(*error.location.code.value.borrow(), r"\!");
540 assert_eq!(error.location.code.start_line_number.get(), 1);
541 assert_eq!(*error.location.code.source, Source::Unknown);
542 assert_eq!(error.location.range, 0..2);
543 }
544
545 #[test]
548 fn escaped_string_literals() {
549 let mut lexer = Lexer::with_code("foo");
550 let EscapedString(content) = lexer
551 .escaped_string(|_| false)
552 .now_or_never()
553 .unwrap()
554 .unwrap();
555 assert_eq!(content, [Literal('f'), Literal('o'), Literal('o')]);
556 }
557
558 #[test]
559 fn escaped_string_mixed() {
560 let mut lexer = Lexer::with_code(r"foo\bar");
561 let EscapedString(content) = lexer
562 .escaped_string(|_| false)
563 .now_or_never()
564 .unwrap()
565 .unwrap();
566 assert_eq!(
567 content,
568 [
569 Literal('f'),
570 Literal('o'),
571 Literal('o'),
572 Backspace,
573 Literal('a'),
574 Literal('r')
575 ]
576 );
577 }
578
579 #[test]
580 fn no_line_continuations_in_escaped_string() {
581 let mut lexer = Lexer::with_code("\\\\\n");
582 let EscapedString(content) = lexer
583 .escaped_string(|_| false)
584 .now_or_never()
585 .unwrap()
586 .unwrap();
587 assert_eq!(content, [Backslash, Literal('\n')]);
588
589 let mut lexer = Lexer::with_code("\\\n");
590 let error = lexer
591 .escaped_string(|_| false)
592 .now_or_never()
593 .unwrap()
594 .unwrap_err();
595 assert_matches!(error.cause, ErrorCause::Syntax(SyntaxError::InvalidEscape));
596 assert_eq!(*error.location.code.value.borrow(), "\\\n");
597 assert_eq!(error.location.code.start_line_number.get(), 1);
598 assert_eq!(*error.location.code.source, Source::Unknown);
599 assert_eq!(error.location.range, 0..2);
600 }
601
602 #[test]
603 fn single_quoted_escaped_string_empty() {
604 let mut lexer = Lexer::with_code("''");
605 let result = lexer
606 .single_quoted_escaped_string()
607 .now_or_never()
608 .unwrap()
609 .unwrap();
610 assert_eq!(result, Some(EscapedString(vec![])));
611 }
612
613 #[test]
614 fn single_quoted_escaped_string_nonempty() {
615 let mut lexer = Lexer::with_code(r"'foo\e'x");
616 let result = lexer
617 .single_quoted_escaped_string()
618 .now_or_never()
619 .unwrap()
620 .unwrap();
621 assert_matches!(result, Some(EscapedString(content)) => {
622 assert_eq!(
623 content,
624 [
625 Literal('f'),
626 Literal('o'),
627 Literal('o'),
628 Escape,
629 ]
630 );
631 });
632 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('x')));
633 }
634
635 #[test]
636 fn single_quoted_escaped_string_unclosed() {
637 let mut lexer = Lexer::with_code("'foo");
638 let error = lexer
639 .single_quoted_escaped_string()
640 .now_or_never()
641 .unwrap()
642 .unwrap_err();
643 assert_matches!(
644 error.cause,
645 ErrorCause::Syntax(SyntaxError::UnclosedDollarSingleQuote { opening_location }) => {
646 assert_eq!(*opening_location.code.value.borrow(), "'foo");
647 assert_eq!(opening_location.code.start_line_number.get(), 1);
648 assert_eq!(*opening_location.code.source, Source::Unknown);
649 assert_eq!(opening_location.range, 0..1);
650 }
651 );
652 assert_eq!(*error.location.code.value.borrow(), "'foo");
653 assert_eq!(error.location.code.start_line_number.get(), 1);
654 assert_eq!(*error.location.code.source, Source::Unknown);
655 assert_eq!(error.location.range, 4..4);
656 }
657}