1use bstr::ByteSlice;
4use std::fmt;
5
6use ruff_python_ast::token::TokenKind;
7use ruff_python_ast::{self as ast, AnyStringFlags, AtomicNodeIndex, Expr, StringFlags};
8use ruff_text_size::{Ranged, TextRange, TextSize};
9
10use crate::error::{LexicalError, LexicalErrorType};
11
12#[derive(Debug)]
13pub(crate) enum StringType {
14 Str(ast::StringLiteral),
15 Bytes(ast::BytesLiteral),
16 FString(ast::FString),
17 TString(ast::TString),
18}
19
20impl Ranged for StringType {
21 fn range(&self) -> TextRange {
22 match self {
23 Self::Str(node) => node.range(),
24 Self::Bytes(node) => node.range(),
25 Self::FString(node) => node.range(),
26 Self::TString(node) => node.range(),
27 }
28 }
29}
30
31impl From<StringType> for Expr {
32 fn from(string: StringType) -> Self {
33 match string {
34 StringType::Str(node) => Expr::from(node),
35 StringType::Bytes(node) => Expr::from(node),
36 StringType::FString(node) => Expr::from(node),
37 StringType::TString(node) => Expr::from(node),
38 }
39 }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub(crate) enum InterpolatedStringKind {
44 FString,
45 TString,
46}
47
48impl InterpolatedStringKind {
49 #[inline]
50 pub(crate) const fn start_token(self) -> TokenKind {
51 match self {
52 InterpolatedStringKind::FString => TokenKind::FStringStart,
53 InterpolatedStringKind::TString => TokenKind::TStringStart,
54 }
55 }
56
57 #[inline]
58 pub(crate) const fn middle_token(self) -> TokenKind {
59 match self {
60 InterpolatedStringKind::FString => TokenKind::FStringMiddle,
61 InterpolatedStringKind::TString => TokenKind::TStringMiddle,
62 }
63 }
64
65 #[inline]
66 pub(crate) const fn end_token(self) -> TokenKind {
67 match self {
68 InterpolatedStringKind::FString => TokenKind::FStringEnd,
69 InterpolatedStringKind::TString => TokenKind::TStringEnd,
70 }
71 }
72}
73
74impl fmt::Display for InterpolatedStringKind {
75 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76 match self {
77 InterpolatedStringKind::FString => f.write_str("f-string"),
78 InterpolatedStringKind::TString => f.write_str("t-string"),
79 }
80 }
81}
82
83enum EscapedChar {
84 Literal(char),
85 Escape(char),
86}
87
88struct StringParser {
89 source: Box<str>,
91 cursor: usize,
93 flags: AnyStringFlags,
95 offset: TextSize,
97 range: TextRange,
99}
100
101impl StringParser {
102 fn new(source: Box<str>, flags: AnyStringFlags, offset: TextSize, range: TextRange) -> Self {
103 Self {
104 source,
105 cursor: 0,
106 flags,
107 offset,
108 range,
109 }
110 }
111
112 #[inline]
113 fn skip_bytes(&mut self, bytes: usize) -> &str {
114 let skipped_str = &self.source[self.cursor..self.cursor + bytes];
115 self.cursor += bytes;
116 skipped_str
117 }
118
119 #[inline]
121 fn position(&self) -> TextSize {
122 self.compute_position(self.cursor)
123 }
124
125 #[inline]
127 fn compute_position(&self, cursor: usize) -> TextSize {
128 self.offset + TextSize::try_from(cursor).unwrap()
129 }
130
131 #[inline]
137 fn next_byte(&mut self) -> Option<u8> {
138 self.source[self.cursor..].as_bytes().first().map(|&byte| {
139 self.cursor += 1;
140 byte
141 })
142 }
143
144 #[inline]
145 fn next_char(&mut self) -> Option<char> {
146 self.source[self.cursor..].chars().next().inspect(|c| {
147 self.cursor += c.len_utf8();
148 })
149 }
150
151 #[inline]
152 fn peek_byte(&self) -> Option<u8> {
153 self.source[self.cursor..].as_bytes().first().copied()
154 }
155
156 fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
157 let mut p: u32 = 0u32;
158 for i in 1..=literal_number {
159 let start = self.position();
160 match self.next_char() {
161 Some(c) => match c.to_digit(16) {
162 Some(d) => p += d << ((literal_number - i) * 4),
163 None => {
164 return Err(LexicalError::new(
165 LexicalErrorType::UnicodeError,
166 TextRange::at(start, TextSize::try_from(c.len_utf8()).unwrap()),
167 ));
168 }
169 },
170 None => {
171 return Err(LexicalError::new(
172 LexicalErrorType::UnicodeError,
173 TextRange::empty(self.position()),
174 ));
175 }
176 }
177 }
178 match p {
179 0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
180 _ => std::char::from_u32(p).ok_or(LexicalError::new(
181 LexicalErrorType::UnicodeError,
182 TextRange::empty(self.position()),
183 )),
184 }
185 }
186
187 fn parse_octet(&mut self, o: u8) -> char {
188 let mut radix_bytes = [o, 0, 0];
189 let mut len = 1;
190
191 while len < 3 {
192 let Some(b'0'..=b'7') = self.peek_byte() else {
193 break;
194 };
195
196 radix_bytes[len] = self.next_byte().unwrap();
197 len += 1;
198 }
199
200 let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes");
202 let value = u32::from_str_radix(radix_str, 8).unwrap();
203 char::from_u32(value).unwrap()
204 }
205
206 fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
207 let start_pos = self.position();
208 let Some('{') = self.next_char() else {
209 return Err(LexicalError::new(
210 LexicalErrorType::MissingUnicodeLbrace,
211 TextRange::empty(start_pos),
212 ));
213 };
214
215 let start_pos = self.position();
216 let Some(close_idx) = self.source[self.cursor..].find('}') else {
217 return Err(LexicalError::new(
218 LexicalErrorType::MissingUnicodeRbrace,
219 TextRange::empty(self.compute_position(self.source.len())),
220 ));
221 };
222
223 let name_and_ending = self.skip_bytes(close_idx + 1);
224 let name = &name_and_ending[..name_and_ending.len() - 1];
225
226 unicode_names2::character(name).ok_or_else(|| {
227 LexicalError::new(
228 LexicalErrorType::UnicodeError,
229 TextRange::new(
232 start_pos,
233 self.compute_position(self.cursor - '}'.len_utf8()),
234 ),
235 )
236 })
237 }
238
239 fn parse_escaped_char(&mut self) -> Result<Option<EscapedChar>, LexicalError> {
241 let Some(first_char) = self.next_char() else {
242 return Err(LexicalError::new(
244 LexicalErrorType::StringError,
245 TextRange::empty(self.position()),
246 ));
247 };
248
249 let new_char = match first_char {
250 '\\' => '\\',
251 '\'' => '\'',
252 '\"' => '"',
253 'a' => '\x07',
254 'b' => '\x08',
255 'f' => '\x0c',
256 'n' => '\n',
257 'r' => '\r',
258 't' => '\t',
259 'v' => '\x0b',
260 o @ '0'..='7' => self.parse_octet(o as u8),
261 'x' => self.parse_unicode_literal(2)?,
262 'u' if !self.flags.is_byte_string() => self.parse_unicode_literal(4)?,
263 'U' if !self.flags.is_byte_string() => self.parse_unicode_literal(8)?,
264 'N' if !self.flags.is_byte_string() => self.parse_unicode_name()?,
265 '\n' => return Ok(None),
267 '\r' => {
268 if self.peek_byte() == Some(b'\n') {
269 self.next_byte();
270 }
271
272 return Ok(None);
273 }
274 _ => return Ok(Some(EscapedChar::Escape(first_char))),
275 };
276
277 Ok(Some(EscapedChar::Literal(new_char)))
278 }
279
280 fn parse_interpolated_string_middle(
281 mut self,
282 ) -> Result<ast::InterpolatedStringLiteralElement, LexicalError> {
283 let Some(mut index) = memchr::memchr3(b'{', b'}', b'\\', self.source.as_bytes()) else {
285 return Ok(ast::InterpolatedStringLiteralElement {
286 value: self.source,
287 range: self.range,
288 node_index: AtomicNodeIndex::NONE,
289 });
290 };
291
292 let mut value = String::with_capacity(self.source.len());
293 loop {
294 let before_with_slash_or_brace = self.skip_bytes(index + 1);
296 let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
297 value.push_str(before);
298
299 match &self.source.as_bytes()[self.cursor - 1] {
301 b'{' => {
305 self.offset += TextSize::from(1);
306 value.push('{');
307 }
308 b'}' => {
309 self.offset += TextSize::from(1);
310 value.push('}');
311 }
312 b'\\' => {
333 if !self.flags.is_raw_string() && self.peek_byte().is_some() {
334 match self.parse_escaped_char()? {
335 None => {}
336 Some(EscapedChar::Literal(c)) => value.push(c),
337 Some(EscapedChar::Escape(c)) => {
338 value.push('\\');
339 value.push(c);
340 }
341 }
342 } else {
343 value.push('\\');
344 }
345 }
346 ch => {
347 unreachable!("Expected '{{', '}}', or '\\' but got {:?}", ch);
348 }
349 }
350
351 let Some(next_index) =
352 memchr::memchr3(b'{', b'}', b'\\', self.source[self.cursor..].as_bytes())
353 else {
354 let rest = &self.source[self.cursor..];
356 value.push_str(rest);
357 break;
358 };
359
360 index = next_index;
361 }
362
363 Ok(ast::InterpolatedStringLiteralElement {
364 value: value.into_boxed_str(),
365 range: self.range,
366 node_index: AtomicNodeIndex::NONE,
367 })
368 }
369
370 fn parse_bytes(mut self) -> Result<StringType, LexicalError> {
371 if let Some(index) = self.source.as_bytes().find_non_ascii_byte() {
372 let ch = self.source.chars().nth(index).unwrap();
373 return Err(LexicalError::new(
374 LexicalErrorType::InvalidByteLiteral,
375 TextRange::at(
376 self.compute_position(index),
377 TextSize::try_from(ch.len_utf8()).unwrap(),
378 ),
379 ));
380 }
381
382 if self.flags.is_raw_string() {
383 return Ok(StringType::Bytes(ast::BytesLiteral {
385 value: self.source.into_boxed_bytes(),
386 range: self.range,
387 flags: self.flags.into(),
388 node_index: AtomicNodeIndex::NONE,
389 }));
390 }
391
392 let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
393 return Ok(StringType::Bytes(ast::BytesLiteral {
395 value: self.source.into_boxed_bytes(),
396 range: self.range,
397 flags: self.flags.into(),
398 node_index: AtomicNodeIndex::NONE,
399 }));
400 };
401
402 let mut value = Vec::with_capacity(self.source.len());
404 loop {
405 let before_with_slash = self.skip_bytes(escape + 1);
407 let before = &before_with_slash[..before_with_slash.len() - 1];
408 value.extend_from_slice(before.as_bytes());
409
410 match self.parse_escaped_char()? {
412 None => {}
413 Some(EscapedChar::Literal(c)) => value.push(c as u8),
414 Some(EscapedChar::Escape(c)) => {
415 value.push(b'\\');
416 value.push(c as u8);
417 }
418 }
419
420 let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes())
421 else {
422 let rest = &self.source[self.cursor..];
424 value.extend_from_slice(rest.as_bytes());
425 break;
426 };
427
428 escape = next_escape;
430 }
431
432 Ok(StringType::Bytes(ast::BytesLiteral {
433 value: value.into_boxed_slice(),
434 range: self.range,
435 flags: self.flags.into(),
436 node_index: AtomicNodeIndex::NONE,
437 }))
438 }
439
440 fn parse_string(mut self) -> Result<StringType, LexicalError> {
441 if self.flags.is_raw_string() {
442 return Ok(StringType::Str(ast::StringLiteral {
444 value: self.source,
445 range: self.range,
446 flags: self.flags.into(),
447 node_index: AtomicNodeIndex::NONE,
448 }));
449 }
450
451 let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
452 return Ok(StringType::Str(ast::StringLiteral {
454 value: self.source,
455 range: self.range,
456 flags: self.flags.into(),
457 node_index: AtomicNodeIndex::NONE,
458 }));
459 };
460
461 let mut value = String::with_capacity(self.source.len());
463
464 loop {
465 let before_with_slash = self.skip_bytes(escape + 1);
467 let before = &before_with_slash[..before_with_slash.len() - 1];
468 value.push_str(before);
469
470 match self.parse_escaped_char()? {
472 None => {}
473 Some(EscapedChar::Literal(c)) => value.push(c),
474 Some(EscapedChar::Escape(c)) => {
475 value.push('\\');
476 value.push(c);
477 }
478 }
479
480 let Some(next_escape) = self.source[self.cursor..].find('\\') else {
481 let rest = &self.source[self.cursor..];
483 value.push_str(rest);
484 break;
485 };
486
487 escape = next_escape;
489 }
490
491 Ok(StringType::Str(ast::StringLiteral {
492 value: value.into_boxed_str(),
493 range: self.range,
494 flags: self.flags.into(),
495 node_index: AtomicNodeIndex::NONE,
496 }))
497 }
498
499 fn parse(self) -> Result<StringType, LexicalError> {
500 if self.flags.is_byte_string() {
501 self.parse_bytes()
502 } else {
503 self.parse_string()
504 }
505 }
506}
507
508pub(crate) fn parse_string_literal(
509 source: Box<str>,
510 flags: AnyStringFlags,
511 range: TextRange,
512) -> Result<StringType, LexicalError> {
513 StringParser::new(source, flags, range.start() + flags.opener_len(), range).parse()
514}
515
516pub(crate) fn parse_interpolated_string_literal_element(
518 source: Box<str>,
519 flags: AnyStringFlags,
520 range: TextRange,
521) -> Result<ast::InterpolatedStringLiteralElement, LexicalError> {
522 StringParser::new(source, flags, range.start(), range).parse_interpolated_string_middle()
523}
524
525#[cfg(test)]
526mod tests {
527 use ruff_python_ast::Suite;
528
529 use crate::error::LexicalErrorType;
530 use crate::{InterpolatedStringErrorType, ParseError, ParseErrorType, Parsed, parse_module};
531
532 const WINDOWS_EOL: &str = "\r\n";
533 const MAC_EOL: &str = "\r";
534 const UNIX_EOL: &str = "\n";
535
536 fn parse_suite(source: &str) -> Result<Suite, ParseError> {
537 parse_module(source).map(Parsed::into_suite)
538 }
539
540 fn string_parser_escaped_eol(eol: &str) -> Suite {
541 let source = format!(r"'text \{eol}more text'");
542 parse_suite(&source).unwrap()
543 }
544
545 #[test]
546 fn test_string_parser_escaped_unix_eol() {
547 let suite = string_parser_escaped_eol(UNIX_EOL);
548 insta::assert_debug_snapshot!(suite);
549 }
550
551 #[test]
552 fn test_string_parser_escaped_mac_eol() {
553 let suite = string_parser_escaped_eol(MAC_EOL);
554 insta::assert_debug_snapshot!(suite);
555 }
556
557 #[test]
558 fn test_string_parser_escaped_windows_eol() {
559 let suite = string_parser_escaped_eol(WINDOWS_EOL);
560 insta::assert_debug_snapshot!(suite);
561 }
562
563 #[test]
564 fn test_parse_fstring() {
565 let source = r#"f"{a}{ b }{{foo}}""#;
566 let suite = parse_suite(source).unwrap();
567 insta::assert_debug_snapshot!(suite);
568 }
569
570 #[test]
571 fn test_parse_fstring_nested_spec() {
572 let source = r#"f"{foo:{spec}}""#;
573 let suite = parse_suite(source).unwrap();
574 insta::assert_debug_snapshot!(suite);
575 }
576
577 #[test]
578 fn test_parse_fstring_not_nested_spec() {
579 let source = r#"f"{foo:spec}""#;
580 let suite = parse_suite(source).unwrap();
581 insta::assert_debug_snapshot!(suite);
582 }
583
584 #[test]
585 fn test_parse_empty_fstring() {
586 let source = r#"f"""#;
587 let suite = parse_suite(source).unwrap();
588 insta::assert_debug_snapshot!(suite);
589 }
590
591 #[test]
592 fn test_fstring_parse_self_documenting_base() {
593 let source = r#"f"{user=}""#;
594 let suite = parse_suite(source).unwrap();
595 insta::assert_debug_snapshot!(suite);
596 }
597
598 #[test]
599 fn test_fstring_parse_self_documenting_base_more() {
600 let source = r#"f"mix {user=} with text and {second=}""#;
601 let suite = parse_suite(source).unwrap();
602 insta::assert_debug_snapshot!(suite);
603 }
604
605 #[test]
606 fn test_fstring_parse_self_documenting_format() {
607 let source = r#"f"{user=:>10}""#;
608 let suite = parse_suite(source).unwrap();
609 insta::assert_debug_snapshot!(suite);
610 }
611
612 fn parse_fstring_error(source: &str) -> InterpolatedStringErrorType {
613 parse_suite(source)
614 .map_err(|e| match e.error {
615 ParseErrorType::Lexical(LexicalErrorType::FStringError(e)) => e,
616 ParseErrorType::FStringError(e) => e,
617 e => unreachable!("Expected FStringError: {:?}", e),
618 })
619 .expect_err("Expected error")
620 }
621
622 #[test]
623 fn test_parse_invalid_fstring() {
624 use InterpolatedStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
625
626 assert_eq!(parse_fstring_error(r#"f"{5!x}""#), InvalidConversionFlag);
627 assert_eq!(
628 parse_fstring_error("f'{lambda x:{x}}'"),
629 LambdaWithoutParentheses
630 );
631 assert!(parse_suite(r#"f"{class}""#).is_err());
638 }
639
640 #[test]
641 fn test_parse_fstring_not_equals() {
642 let source = r#"f"{1 != 2}""#;
643 let suite = parse_suite(source).unwrap();
644 insta::assert_debug_snapshot!(suite);
645 }
646
647 #[test]
648 fn test_parse_fstring_equals() {
649 let source = r#"f"{42 == 42}""#;
650 let suite = parse_suite(source).unwrap();
651 insta::assert_debug_snapshot!(suite);
652 }
653
654 #[test]
655 fn test_parse_fstring_self_doc_prec_space() {
656 let source = r#"f"{x =}""#;
657 let suite = parse_suite(source).unwrap();
658 insta::assert_debug_snapshot!(suite);
659 }
660
661 #[test]
662 fn test_parse_fstring_self_doc_trailing_space() {
663 let source = r#"f"{x= }""#;
664 let suite = parse_suite(source).unwrap();
665 insta::assert_debug_snapshot!(suite);
666 }
667
668 #[test]
669 fn test_parse_fstring_yield_expr() {
670 let source = r#"f"{yield}""#;
671 let suite = parse_suite(source).unwrap();
672 insta::assert_debug_snapshot!(suite);
673 }
674
675 #[test]
676 fn test_parse_tstring() {
677 let source = r#"t"{a}{ b }{{foo}}""#;
678 let suite = parse_suite(source).unwrap();
679 insta::assert_debug_snapshot!(suite);
680 }
681
682 #[test]
683 fn test_parse_tstring_nested_spec() {
684 let source = r#"t"{foo:{spec}}""#;
685 let suite = parse_suite(source).unwrap();
686 insta::assert_debug_snapshot!(suite);
687 }
688
689 #[test]
690 fn test_parse_tstring_not_nested_spec() {
691 let source = r#"t"{foo:spec}""#;
692 let suite = parse_suite(source).unwrap();
693 insta::assert_debug_snapshot!(suite);
694 }
695
696 #[test]
697 fn test_parse_empty_tstring() {
698 let source = r#"t"""#;
699 let suite = parse_suite(source).unwrap();
700 insta::assert_debug_snapshot!(suite);
701 }
702
703 #[test]
704 fn test_tstring_parse_self_documenting_base() {
705 let source = r#"t"{user=}""#;
706 let suite = parse_suite(source).unwrap();
707 insta::assert_debug_snapshot!(suite);
708 }
709
710 #[test]
711 fn test_tstring_parse_self_documenting_base_more() {
712 let source = r#"t"mix {user=} with text and {second=}""#;
713 let suite = parse_suite(source).unwrap();
714 insta::assert_debug_snapshot!(suite);
715 }
716
717 #[test]
718 fn test_tstring_parse_self_documenting_format() {
719 let source = r#"t"{user=:>10}""#;
720 let suite = parse_suite(source).unwrap();
721 insta::assert_debug_snapshot!(suite);
722 }
723
724 fn parse_tstring_error(source: &str) -> InterpolatedStringErrorType {
725 parse_suite(source)
726 .map_err(|e| match e.error {
727 ParseErrorType::Lexical(LexicalErrorType::TStringError(e)) => e,
728 ParseErrorType::TStringError(e) => e,
729 e => unreachable!("Expected TStringError: {:?}", e),
730 })
731 .expect_err("Expected error")
732 }
733
734 #[test]
735 fn test_parse_invalid_tstring() {
736 use InterpolatedStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
737
738 assert_eq!(parse_tstring_error(r#"t"{5!x}""#), InvalidConversionFlag);
739 assert_eq!(
740 parse_tstring_error("t'{lambda x:{x}}'"),
741 LambdaWithoutParentheses
742 );
743 assert!(parse_suite(r#"t"{class}""#).is_err());
750 }
751
752 #[test]
753 fn test_parse_tstring_not_equals() {
754 let source = r#"t"{1 != 2}""#;
755 let suite = parse_suite(source).unwrap();
756 insta::assert_debug_snapshot!(suite);
757 }
758
759 #[test]
760 fn test_parse_tstring_equals() {
761 let source = r#"t"{42 == 42}""#;
762 let suite = parse_suite(source).unwrap();
763 insta::assert_debug_snapshot!(suite);
764 }
765
766 #[test]
767 fn test_parse_tstring_self_doc_prec_space() {
768 let source = r#"t"{x =}""#;
769 let suite = parse_suite(source).unwrap();
770 insta::assert_debug_snapshot!(suite);
771 }
772
773 #[test]
774 fn test_parse_tstring_self_doc_trailing_space() {
775 let source = r#"t"{x= }""#;
776 let suite = parse_suite(source).unwrap();
777 insta::assert_debug_snapshot!(suite);
778 }
779
780 #[test]
781 fn test_parse_tstring_yield_expr() {
782 let source = r#"t"{yield}""#;
783 let suite = parse_suite(source).unwrap();
784 insta::assert_debug_snapshot!(suite);
785 }
786
787 #[test]
788 fn test_parse_string_concat() {
789 let source = "'Hello ' 'world'";
790 let suite = parse_suite(source).unwrap();
791 insta::assert_debug_snapshot!(suite);
792 }
793
794 #[test]
795 fn test_parse_u_string_concat_1() {
796 let source = "'Hello ' u'world'";
797 let suite = parse_suite(source).unwrap();
798 insta::assert_debug_snapshot!(suite);
799 }
800
801 #[test]
802 fn test_parse_u_string_concat_2() {
803 let source = "u'Hello ' 'world'";
804 let suite = parse_suite(source).unwrap();
805 insta::assert_debug_snapshot!(suite);
806 }
807
808 #[test]
809 fn test_parse_f_string_concat_1() {
810 let source = "'Hello ' f'world'";
811 let suite = parse_suite(source).unwrap();
812 insta::assert_debug_snapshot!(suite);
813 }
814
815 #[test]
816 fn test_parse_f_string_concat_2() {
817 let source = "'Hello ' f'world'";
818 let suite = parse_suite(source).unwrap();
819 insta::assert_debug_snapshot!(suite);
820 }
821
822 #[test]
823 fn test_parse_f_string_concat_3() {
824 let source = "'Hello ' f'world{\"!\"}'";
825 let suite = parse_suite(source).unwrap();
826 insta::assert_debug_snapshot!(suite);
827 }
828
829 #[test]
830 fn test_parse_f_string_concat_4() {
831 let source = "'Hello ' f'world{\"!\"}' 'again!'";
832 let suite = parse_suite(source).unwrap();
833 insta::assert_debug_snapshot!(suite);
834 }
835
836 #[test]
837 fn test_parse_u_f_string_concat_1() {
838 let source = "u'Hello ' f'world'";
839 let suite = parse_suite(source).unwrap();
840 insta::assert_debug_snapshot!(suite);
841 }
842
843 #[test]
844 fn test_parse_u_f_string_concat_2() {
845 let source = "u'Hello ' f'world' '!'";
846 let suite = parse_suite(source).unwrap();
847 insta::assert_debug_snapshot!(suite);
848 }
849
850 #[test]
851 fn test_parse_t_string_concat_1_error() {
852 let source = "'Hello ' t'world'";
853 let suite = parse_suite(source).unwrap_err();
854 insta::assert_debug_snapshot!(suite);
855 }
856
857 #[test]
858 fn test_parse_t_string_concat_2_error() {
859 let source = "'Hello ' t'world'";
860 let suite = parse_suite(source).unwrap_err();
861 insta::assert_debug_snapshot!(suite);
862 }
863
864 #[test]
865 fn test_parse_t_string_concat_3_error() {
866 let source = "'Hello ' t'world{\"!\"}'";
867 let suite = parse_suite(source).unwrap_err();
868 insta::assert_debug_snapshot!(suite);
869 }
870
871 #[test]
872 fn test_parse_t_string_concat_4_error() {
873 let source = "'Hello ' t'world{\"!\"}' 'again!'";
874 let suite = parse_suite(source).unwrap_err();
875 insta::assert_debug_snapshot!(suite);
876 }
877
878 #[test]
879 fn test_parse_u_t_string_concat_1_error() {
880 let source = "u'Hello ' t'world'";
881 let suite = parse_suite(source).unwrap_err();
882 insta::assert_debug_snapshot!(suite);
883 }
884
885 #[test]
886 fn test_parse_u_t_string_concat_2_error() {
887 let source = "u'Hello ' t'world' '!'";
888 let suite = parse_suite(source).unwrap_err();
889 insta::assert_debug_snapshot!(suite);
890 }
891
892 #[test]
893 fn test_parse_f_t_string_concat_1_error() {
894 let source = "f'Hello ' t'world'";
895 let suite = parse_suite(source).unwrap_err();
896 insta::assert_debug_snapshot!(suite);
897 }
898
899 #[test]
900 fn test_parse_f_t_string_concat_2_error() {
901 let source = "f'Hello ' t'world' '!'";
902 let suite = parse_suite(source).unwrap_err();
903 insta::assert_debug_snapshot!(suite);
904 }
905
906 #[test]
907 fn test_parse_string_triple_quotes_with_kind() {
908 let source = "u'''Hello, world!'''";
909 let suite = parse_suite(source).unwrap();
910 insta::assert_debug_snapshot!(suite);
911 }
912
913 #[test]
914 fn test_single_quoted_byte() {
915 let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
917 let suite = parse_suite(source).unwrap();
918 insta::assert_debug_snapshot!(suite);
919 }
920
921 #[test]
922 fn test_double_quoted_byte() {
923 let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
925 let suite = parse_suite(source).unwrap();
926 insta::assert_debug_snapshot!(suite);
927 }
928
929 #[test]
930 fn test_escape_char_in_byte_literal() {
931 let source = r#"b"omkmok\Xaa""#; let suite = parse_suite(source).unwrap();
934 insta::assert_debug_snapshot!(suite);
935 }
936
937 #[test]
938 fn test_raw_byte_literal_1() {
939 let source = r"rb'\x1z'";
940 let suite = parse_suite(source).unwrap();
941 insta::assert_debug_snapshot!(suite);
942 }
943
944 #[test]
945 fn test_raw_byte_literal_2() {
946 let source = r"rb'\\'";
947 let suite = parse_suite(source).unwrap();
948 insta::assert_debug_snapshot!(suite);
949 }
950
951 #[test]
952 fn test_escape_octet() {
953 let source = r"b'\43a\4\1234'";
954 let suite = parse_suite(source).unwrap();
955 insta::assert_debug_snapshot!(suite);
956 }
957
958 #[test]
959 fn test_fstring_escaped_newline() {
960 let source = r#"f"\n{x}""#;
961 let suite = parse_suite(source).unwrap();
962 insta::assert_debug_snapshot!(suite);
963 }
964
965 #[test]
966 fn test_fstring_constant_range() {
967 let source = r#"f"aaa{bbb}ccc{ddd}eee""#;
968 let suite = parse_suite(source).unwrap();
969 insta::assert_debug_snapshot!(suite);
970 }
971
972 #[test]
973 fn test_fstring_unescaped_newline() {
974 let source = r#"f"""
975{x}""""#;
976 let suite = parse_suite(source).unwrap();
977 insta::assert_debug_snapshot!(suite);
978 }
979
980 #[test]
981 fn test_fstring_escaped_character() {
982 let source = r#"f"\\{x}""#;
983 let suite = parse_suite(source).unwrap();
984 insta::assert_debug_snapshot!(suite);
985 }
986
987 #[test]
988 fn test_raw_fstring() {
989 let source = r#"rf"{x}""#;
990 let suite = parse_suite(source).unwrap();
991 insta::assert_debug_snapshot!(suite);
992 }
993
994 #[test]
995 fn test_triple_quoted_raw_fstring() {
996 let source = r#"rf"""{x}""""#;
997 let suite = parse_suite(source).unwrap();
998 insta::assert_debug_snapshot!(suite);
999 }
1000
1001 #[test]
1002 fn test_fstring_line_continuation() {
1003 let source = r#"rf"\
1004{x}""#;
1005 let suite = parse_suite(source).unwrap();
1006 insta::assert_debug_snapshot!(suite);
1007 }
1008
1009 #[test]
1010 fn test_parse_fstring_nested_string_spec() {
1011 let source = r#"f"{foo:{''}}""#;
1012 let suite = parse_suite(source).unwrap();
1013 insta::assert_debug_snapshot!(suite);
1014 }
1015
1016 #[test]
1017 fn test_parse_fstring_nested_concatenation_string_spec() {
1018 let source = r#"f"{foo:{'' ''}}""#;
1019 let suite = parse_suite(source).unwrap();
1020 insta::assert_debug_snapshot!(suite);
1021 }
1022
1023 #[test]
1024 fn test_tstring_escaped_newline() {
1025 let source = r#"t"\n{x}""#;
1026 let suite = parse_suite(source).unwrap();
1027 insta::assert_debug_snapshot!(suite);
1028 }
1029
1030 #[test]
1031 fn test_tstring_constant_range() {
1032 let source = r#"t"aaa{bbb}ccc{ddd}eee""#;
1033 let suite = parse_suite(source).unwrap();
1034 insta::assert_debug_snapshot!(suite);
1035 }
1036
1037 #[test]
1038 fn test_tstring_unescaped_newline() {
1039 let source = r#"t"""
1040{x}""""#;
1041 let suite = parse_suite(source).unwrap();
1042 insta::assert_debug_snapshot!(suite);
1043 }
1044
1045 #[test]
1046 fn test_tstring_escaped_character() {
1047 let source = r#"t"\\{x}""#;
1048 let suite = parse_suite(source).unwrap();
1049 insta::assert_debug_snapshot!(suite);
1050 }
1051
1052 #[test]
1053 fn test_raw_tstring() {
1054 let source = r#"rt"{x}""#;
1055 let suite = parse_suite(source).unwrap();
1056 insta::assert_debug_snapshot!(suite);
1057 }
1058
1059 #[test]
1060 fn test_triple_quoted_raw_tstring() {
1061 let source = r#"rt"""{x}""""#;
1062 let suite = parse_suite(source).unwrap();
1063 insta::assert_debug_snapshot!(suite);
1064 }
1065
1066 #[test]
1067 fn test_tstring_line_continuation() {
1068 let source = r#"rt"\
1069{x}""#;
1070 let suite = parse_suite(source).unwrap();
1071 insta::assert_debug_snapshot!(suite);
1072 }
1073
1074 #[test]
1075 fn test_parse_tstring_nested_string_spec() {
1076 let source = r#"t"{foo:{''}}""#;
1077 let suite = parse_suite(source).unwrap();
1078 insta::assert_debug_snapshot!(suite);
1079 }
1080
1081 #[test]
1082 fn test_parse_tstring_nested_concatenation_string_spec() {
1083 let source = r#"t"{foo:{'' ''}}""#;
1084 let suite = parse_suite(source).unwrap();
1085 insta::assert_debug_snapshot!(suite);
1086 }
1087
1088 #[test]
1090 fn test_dont_panic_on_8_in_octal_escape() {
1091 let source = r"bold = '\038[1m'";
1092 let suite = parse_suite(source).unwrap();
1093 insta::assert_debug_snapshot!(suite);
1094 }
1095
1096 #[test]
1097 fn test_invalid_unicode_literal() {
1098 let source = r"'\x1ó34'";
1099 let error = parse_suite(source).unwrap_err();
1100 insta::assert_debug_snapshot!(error);
1101 }
1102
1103 #[test]
1104 fn test_missing_unicode_lbrace_error() {
1105 let source = r"'\N '";
1106 let error = parse_suite(source).unwrap_err();
1107 insta::assert_debug_snapshot!(error);
1108 }
1109
1110 #[test]
1111 fn test_missing_unicode_rbrace_error() {
1112 let source = r"'\N{SPACE'";
1113 let error = parse_suite(source).unwrap_err();
1114 insta::assert_debug_snapshot!(error);
1115 }
1116
1117 #[test]
1118 fn test_invalid_unicode_name_error() {
1119 let source = r"'\N{INVALID}'";
1120 let error = parse_suite(source).unwrap_err();
1121 insta::assert_debug_snapshot!(error);
1122 }
1123
1124 #[test]
1125 fn test_invalid_byte_literal_error() {
1126 let source = r"b'123a𝐁c'";
1127 let error = parse_suite(source).unwrap_err();
1128 insta::assert_debug_snapshot!(error);
1129 }
1130
1131 macro_rules! test_aliases_parse {
1132 ($($name:ident: $alias:expr,)*) => {
1133 $(
1134 #[test]
1135 fn $name() {
1136 let source = format!(r#""\N{{{0}}}""#, $alias);
1137 let suite = parse_suite(&source).unwrap();
1138 insta::assert_debug_snapshot!(suite);
1139 }
1140 )*
1141 }
1142 }
1143
1144 test_aliases_parse! {
1145 test_backspace_alias: "BACKSPACE",
1146 test_bell_alias: "BEL",
1147 test_carriage_return_alias: "CARRIAGE RETURN",
1148 test_delete_alias: "DELETE",
1149 test_escape_alias: "ESCAPE",
1150 test_form_feed_alias: "FORM FEED",
1151 test_hts_alias: "HTS",
1152 test_character_tabulation_with_justification_alias: "CHARACTER TABULATION WITH JUSTIFICATION",
1153 }
1154}