rustpython_ruff_python_parser/
string.rs

1//! Parsing of string literals, bytes literals, and implicit string concatenation.
2
3use bstr::ByteSlice;
4use std::fmt;
5
6use ruff_python_ast::token::TokenKind;
7use ruff_python_ast::{self as ast, AnyStringFlags, AtomicNodeIndex, Expr, StringFlags};
8use ruff_text_size::{Ranged, TextRange, TextSize};
9
10use crate::error::{LexicalError, LexicalErrorType};
11
12#[derive(Debug)]
13pub(crate) enum StringType {
14    Str(ast::StringLiteral),
15    Bytes(ast::BytesLiteral),
16    FString(ast::FString),
17    TString(ast::TString),
18}
19
20impl Ranged for StringType {
21    fn range(&self) -> TextRange {
22        match self {
23            Self::Str(node) => node.range(),
24            Self::Bytes(node) => node.range(),
25            Self::FString(node) => node.range(),
26            Self::TString(node) => node.range(),
27        }
28    }
29}
30
31impl From<StringType> for Expr {
32    fn from(string: StringType) -> Self {
33        match string {
34            StringType::Str(node) => Expr::from(node),
35            StringType::Bytes(node) => Expr::from(node),
36            StringType::FString(node) => Expr::from(node),
37            StringType::TString(node) => Expr::from(node),
38        }
39    }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub(crate) enum InterpolatedStringKind {
44    FString,
45    TString,
46}
47
48impl InterpolatedStringKind {
49    #[inline]
50    pub(crate) const fn start_token(self) -> TokenKind {
51        match self {
52            InterpolatedStringKind::FString => TokenKind::FStringStart,
53            InterpolatedStringKind::TString => TokenKind::TStringStart,
54        }
55    }
56
57    #[inline]
58    pub(crate) const fn middle_token(self) -> TokenKind {
59        match self {
60            InterpolatedStringKind::FString => TokenKind::FStringMiddle,
61            InterpolatedStringKind::TString => TokenKind::TStringMiddle,
62        }
63    }
64
65    #[inline]
66    pub(crate) const fn end_token(self) -> TokenKind {
67        match self {
68            InterpolatedStringKind::FString => TokenKind::FStringEnd,
69            InterpolatedStringKind::TString => TokenKind::TStringEnd,
70        }
71    }
72}
73
74impl fmt::Display for InterpolatedStringKind {
75    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76        match self {
77            InterpolatedStringKind::FString => f.write_str("f-string"),
78            InterpolatedStringKind::TString => f.write_str("t-string"),
79        }
80    }
81}
82
83enum EscapedChar {
84    Literal(char),
85    Escape(char),
86}
87
88struct StringParser {
89    /// The raw content of the string e.g., the `foo` part in `"foo"`.
90    source: Box<str>,
91    /// Current position of the parser in the source.
92    cursor: usize,
93    /// Flags that can be used to query information about the string.
94    flags: AnyStringFlags,
95    /// The location of the first character in the source from the start of the file.
96    offset: TextSize,
97    /// The range of the string literal.
98    range: TextRange,
99}
100
101impl StringParser {
102    fn new(source: Box<str>, flags: AnyStringFlags, offset: TextSize, range: TextRange) -> Self {
103        Self {
104            source,
105            cursor: 0,
106            flags,
107            offset,
108            range,
109        }
110    }
111
112    #[inline]
113    fn skip_bytes(&mut self, bytes: usize) -> &str {
114        let skipped_str = &self.source[self.cursor..self.cursor + bytes];
115        self.cursor += bytes;
116        skipped_str
117    }
118
119    /// Returns the current position of the parser considering the offset.
120    #[inline]
121    fn position(&self) -> TextSize {
122        self.compute_position(self.cursor)
123    }
124
125    /// Computes the position of the cursor considering the offset.
126    #[inline]
127    fn compute_position(&self, cursor: usize) -> TextSize {
128        self.offset + TextSize::try_from(cursor).unwrap()
129    }
130
131    /// Returns the next byte in the string, if there is one.
132    ///
133    /// # Panics
134    ///
135    /// When the next byte is a part of a multi-byte character.
136    #[inline]
137    fn next_byte(&mut self) -> Option<u8> {
138        self.source[self.cursor..].as_bytes().first().map(|&byte| {
139            self.cursor += 1;
140            byte
141        })
142    }
143
144    #[inline]
145    fn next_char(&mut self) -> Option<char> {
146        self.source[self.cursor..].chars().next().inspect(|c| {
147            self.cursor += c.len_utf8();
148        })
149    }
150
151    #[inline]
152    fn peek_byte(&self) -> Option<u8> {
153        self.source[self.cursor..].as_bytes().first().copied()
154    }
155
156    fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
157        let mut p: u32 = 0u32;
158        for i in 1..=literal_number {
159            let start = self.position();
160            match self.next_char() {
161                Some(c) => match c.to_digit(16) {
162                    Some(d) => p += d << ((literal_number - i) * 4),
163                    None => {
164                        return Err(LexicalError::new(
165                            LexicalErrorType::UnicodeError,
166                            TextRange::at(start, TextSize::try_from(c.len_utf8()).unwrap()),
167                        ));
168                    }
169                },
170                None => {
171                    return Err(LexicalError::new(
172                        LexicalErrorType::UnicodeError,
173                        TextRange::empty(self.position()),
174                    ));
175                }
176            }
177        }
178        match p {
179            0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
180            _ => std::char::from_u32(p).ok_or(LexicalError::new(
181                LexicalErrorType::UnicodeError,
182                TextRange::empty(self.position()),
183            )),
184        }
185    }
186
187    fn parse_octet(&mut self, o: u8) -> char {
188        let mut radix_bytes = [o, 0, 0];
189        let mut len = 1;
190
191        while len < 3 {
192            let Some(b'0'..=b'7') = self.peek_byte() else {
193                break;
194            };
195
196            radix_bytes[len] = self.next_byte().unwrap();
197            len += 1;
198        }
199
200        // OK because radix_bytes is always going to be in the ASCII range.
201        let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes");
202        let value = u32::from_str_radix(radix_str, 8).unwrap();
203        char::from_u32(value).unwrap()
204    }
205
206    fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
207        let start_pos = self.position();
208        let Some('{') = self.next_char() else {
209            return Err(LexicalError::new(
210                LexicalErrorType::MissingUnicodeLbrace,
211                TextRange::empty(start_pos),
212            ));
213        };
214
215        let start_pos = self.position();
216        let Some(close_idx) = self.source[self.cursor..].find('}') else {
217            return Err(LexicalError::new(
218                LexicalErrorType::MissingUnicodeRbrace,
219                TextRange::empty(self.compute_position(self.source.len())),
220            ));
221        };
222
223        let name_and_ending = self.skip_bytes(close_idx + 1);
224        let name = &name_and_ending[..name_and_ending.len() - 1];
225
226        unicode_names2::character(name).ok_or_else(|| {
227            LexicalError::new(
228                LexicalErrorType::UnicodeError,
229                // The cursor is right after the `}` character, so we subtract 1 to get the correct
230                // range of the unicode name.
231                TextRange::new(
232                    start_pos,
233                    self.compute_position(self.cursor - '}'.len_utf8()),
234                ),
235            )
236        })
237    }
238
239    /// Parse an escaped character, returning the new character.
240    fn parse_escaped_char(&mut self) -> Result<Option<EscapedChar>, LexicalError> {
241        let Some(first_char) = self.next_char() else {
242            // TODO: check when this error case happens
243            return Err(LexicalError::new(
244                LexicalErrorType::StringError,
245                TextRange::empty(self.position()),
246            ));
247        };
248
249        let new_char = match first_char {
250            '\\' => '\\',
251            '\'' => '\'',
252            '\"' => '"',
253            'a' => '\x07',
254            'b' => '\x08',
255            'f' => '\x0c',
256            'n' => '\n',
257            'r' => '\r',
258            't' => '\t',
259            'v' => '\x0b',
260            o @ '0'..='7' => self.parse_octet(o as u8),
261            'x' => self.parse_unicode_literal(2)?,
262            'u' if !self.flags.is_byte_string() => self.parse_unicode_literal(4)?,
263            'U' if !self.flags.is_byte_string() => self.parse_unicode_literal(8)?,
264            'N' if !self.flags.is_byte_string() => self.parse_unicode_name()?,
265            // Special cases where the escape sequence is not a single character
266            '\n' => return Ok(None),
267            '\r' => {
268                if self.peek_byte() == Some(b'\n') {
269                    self.next_byte();
270                }
271
272                return Ok(None);
273            }
274            _ => return Ok(Some(EscapedChar::Escape(first_char))),
275        };
276
277        Ok(Some(EscapedChar::Literal(new_char)))
278    }
279
280    fn parse_interpolated_string_middle(
281        mut self,
282    ) -> Result<ast::InterpolatedStringLiteralElement, LexicalError> {
283        // Fast-path: if the f-string or t-string doesn't contain any escape sequences, return the literal.
284        let Some(mut index) = memchr::memchr3(b'{', b'}', b'\\', self.source.as_bytes()) else {
285            return Ok(ast::InterpolatedStringLiteralElement {
286                value: self.source,
287                range: self.range,
288                node_index: AtomicNodeIndex::NONE,
289            });
290        };
291
292        let mut value = String::with_capacity(self.source.len());
293        loop {
294            // Add the characters before the escape sequence (or curly brace) to the string.
295            let before_with_slash_or_brace = self.skip_bytes(index + 1);
296            let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
297            value.push_str(before);
298
299            // Add the escaped character to the string.
300            match &self.source.as_bytes()[self.cursor - 1] {
301                // If there are any curly braces inside a `F/TStringMiddle` token,
302                // then they were escaped (i.e. `{{` or `}}`). This means that
303                // we need increase the location by 2 instead of 1.
304                b'{' => {
305                    self.offset += TextSize::from(1);
306                    value.push('{');
307                }
308                b'}' => {
309                    self.offset += TextSize::from(1);
310                    value.push('}');
311                }
312                // We can encounter a `\` as the last character in a `F/TStringMiddle`
313                // token which is valid in this context. For example,
314                //
315                // ```python
316                // f"\{foo} \{bar:\}"
317                // # ^     ^^     ^
318                // ```
319                //
320                // Here, the `F/TStringMiddle` token content will be "\" and " \"
321                // which is invalid if we look at the content in isolation:
322                //
323                // ```python
324                // "\"
325                // ```
326                //
327                // However, the content is syntactically valid in the context of
328                // the f/t-string because it's a substring of the entire f/t-string.
329                // This is still an invalid escape sequence, but we don't want to
330                // raise a syntax error as is done by the CPython parser. It might
331                // be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
332                b'\\' => {
333                    if !self.flags.is_raw_string() && self.peek_byte().is_some() {
334                        match self.parse_escaped_char()? {
335                            None => {}
336                            Some(EscapedChar::Literal(c)) => value.push(c),
337                            Some(EscapedChar::Escape(c)) => {
338                                value.push('\\');
339                                value.push(c);
340                            }
341                        }
342                    } else {
343                        value.push('\\');
344                    }
345                }
346                ch => {
347                    unreachable!("Expected '{{', '}}', or '\\' but got {:?}", ch);
348                }
349            }
350
351            let Some(next_index) =
352                memchr::memchr3(b'{', b'}', b'\\', self.source[self.cursor..].as_bytes())
353            else {
354                // Add the rest of the string to the value.
355                let rest = &self.source[self.cursor..];
356                value.push_str(rest);
357                break;
358            };
359
360            index = next_index;
361        }
362
363        Ok(ast::InterpolatedStringLiteralElement {
364            value: value.into_boxed_str(),
365            range: self.range,
366            node_index: AtomicNodeIndex::NONE,
367        })
368    }
369
370    fn parse_bytes(mut self) -> Result<StringType, LexicalError> {
371        if let Some(index) = self.source.as_bytes().find_non_ascii_byte() {
372            let ch = self.source.chars().nth(index).unwrap();
373            return Err(LexicalError::new(
374                LexicalErrorType::InvalidByteLiteral,
375                TextRange::at(
376                    self.compute_position(index),
377                    TextSize::try_from(ch.len_utf8()).unwrap(),
378                ),
379            ));
380        }
381
382        if self.flags.is_raw_string() {
383            // For raw strings, no escaping is necessary.
384            return Ok(StringType::Bytes(ast::BytesLiteral {
385                value: self.source.into_boxed_bytes(),
386                range: self.range,
387                flags: self.flags.into(),
388                node_index: AtomicNodeIndex::NONE,
389            }));
390        }
391
392        let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
393            // If the string doesn't contain any escape sequences, return the owned string.
394            return Ok(StringType::Bytes(ast::BytesLiteral {
395                value: self.source.into_boxed_bytes(),
396                range: self.range,
397                flags: self.flags.into(),
398                node_index: AtomicNodeIndex::NONE,
399            }));
400        };
401
402        // If the string contains escape sequences, we need to parse them.
403        let mut value = Vec::with_capacity(self.source.len());
404        loop {
405            // Add the characters before the escape sequence to the string.
406            let before_with_slash = self.skip_bytes(escape + 1);
407            let before = &before_with_slash[..before_with_slash.len() - 1];
408            value.extend_from_slice(before.as_bytes());
409
410            // Add the escaped character to the string.
411            match self.parse_escaped_char()? {
412                None => {}
413                Some(EscapedChar::Literal(c)) => value.push(c as u8),
414                Some(EscapedChar::Escape(c)) => {
415                    value.push(b'\\');
416                    value.push(c as u8);
417                }
418            }
419
420            let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes())
421            else {
422                // Add the rest of the string to the value.
423                let rest = &self.source[self.cursor..];
424                value.extend_from_slice(rest.as_bytes());
425                break;
426            };
427
428            // Update the position of the next escape sequence.
429            escape = next_escape;
430        }
431
432        Ok(StringType::Bytes(ast::BytesLiteral {
433            value: value.into_boxed_slice(),
434            range: self.range,
435            flags: self.flags.into(),
436            node_index: AtomicNodeIndex::NONE,
437        }))
438    }
439
440    fn parse_string(mut self) -> Result<StringType, LexicalError> {
441        if self.flags.is_raw_string() {
442            // For raw strings, no escaping is necessary.
443            return Ok(StringType::Str(ast::StringLiteral {
444                value: self.source,
445                range: self.range,
446                flags: self.flags.into(),
447                node_index: AtomicNodeIndex::NONE,
448            }));
449        }
450
451        let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
452            // If the string doesn't contain any escape sequences, return the owned string.
453            return Ok(StringType::Str(ast::StringLiteral {
454                value: self.source,
455                range: self.range,
456                flags: self.flags.into(),
457                node_index: AtomicNodeIndex::NONE,
458            }));
459        };
460
461        // If the string contains escape sequences, we need to parse them.
462        let mut value = String::with_capacity(self.source.len());
463
464        loop {
465            // Add the characters before the escape sequence to the string.
466            let before_with_slash = self.skip_bytes(escape + 1);
467            let before = &before_with_slash[..before_with_slash.len() - 1];
468            value.push_str(before);
469
470            // Add the escaped character to the string.
471            match self.parse_escaped_char()? {
472                None => {}
473                Some(EscapedChar::Literal(c)) => value.push(c),
474                Some(EscapedChar::Escape(c)) => {
475                    value.push('\\');
476                    value.push(c);
477                }
478            }
479
480            let Some(next_escape) = self.source[self.cursor..].find('\\') else {
481                // Add the rest of the string to the value.
482                let rest = &self.source[self.cursor..];
483                value.push_str(rest);
484                break;
485            };
486
487            // Update the position of the next escape sequence.
488            escape = next_escape;
489        }
490
491        Ok(StringType::Str(ast::StringLiteral {
492            value: value.into_boxed_str(),
493            range: self.range,
494            flags: self.flags.into(),
495            node_index: AtomicNodeIndex::NONE,
496        }))
497    }
498
499    fn parse(self) -> Result<StringType, LexicalError> {
500        if self.flags.is_byte_string() {
501            self.parse_bytes()
502        } else {
503            self.parse_string()
504        }
505    }
506}
507
508pub(crate) fn parse_string_literal(
509    source: Box<str>,
510    flags: AnyStringFlags,
511    range: TextRange,
512) -> Result<StringType, LexicalError> {
513    StringParser::new(source, flags, range.start() + flags.opener_len(), range).parse()
514}
515
516// TODO(dhruvmanila): Move this to the new parser
517pub(crate) fn parse_interpolated_string_literal_element(
518    source: Box<str>,
519    flags: AnyStringFlags,
520    range: TextRange,
521) -> Result<ast::InterpolatedStringLiteralElement, LexicalError> {
522    StringParser::new(source, flags, range.start(), range).parse_interpolated_string_middle()
523}
524
525#[cfg(test)]
526mod tests {
527    use ruff_python_ast::Suite;
528
529    use crate::error::LexicalErrorType;
530    use crate::{InterpolatedStringErrorType, ParseError, ParseErrorType, Parsed, parse_module};
531
532    const WINDOWS_EOL: &str = "\r\n";
533    const MAC_EOL: &str = "\r";
534    const UNIX_EOL: &str = "\n";
535
536    fn parse_suite(source: &str) -> Result<Suite, ParseError> {
537        parse_module(source).map(Parsed::into_suite)
538    }
539
540    fn string_parser_escaped_eol(eol: &str) -> Suite {
541        let source = format!(r"'text \{eol}more text'");
542        parse_suite(&source).unwrap()
543    }
544
545    #[test]
546    fn test_string_parser_escaped_unix_eol() {
547        let suite = string_parser_escaped_eol(UNIX_EOL);
548        insta::assert_debug_snapshot!(suite);
549    }
550
551    #[test]
552    fn test_string_parser_escaped_mac_eol() {
553        let suite = string_parser_escaped_eol(MAC_EOL);
554        insta::assert_debug_snapshot!(suite);
555    }
556
557    #[test]
558    fn test_string_parser_escaped_windows_eol() {
559        let suite = string_parser_escaped_eol(WINDOWS_EOL);
560        insta::assert_debug_snapshot!(suite);
561    }
562
563    #[test]
564    fn test_parse_fstring() {
565        let source = r#"f"{a}{ b }{{foo}}""#;
566        let suite = parse_suite(source).unwrap();
567        insta::assert_debug_snapshot!(suite);
568    }
569
570    #[test]
571    fn test_parse_fstring_nested_spec() {
572        let source = r#"f"{foo:{spec}}""#;
573        let suite = parse_suite(source).unwrap();
574        insta::assert_debug_snapshot!(suite);
575    }
576
577    #[test]
578    fn test_parse_fstring_not_nested_spec() {
579        let source = r#"f"{foo:spec}""#;
580        let suite = parse_suite(source).unwrap();
581        insta::assert_debug_snapshot!(suite);
582    }
583
584    #[test]
585    fn test_parse_empty_fstring() {
586        let source = r#"f"""#;
587        let suite = parse_suite(source).unwrap();
588        insta::assert_debug_snapshot!(suite);
589    }
590
591    #[test]
592    fn test_fstring_parse_self_documenting_base() {
593        let source = r#"f"{user=}""#;
594        let suite = parse_suite(source).unwrap();
595        insta::assert_debug_snapshot!(suite);
596    }
597
598    #[test]
599    fn test_fstring_parse_self_documenting_base_more() {
600        let source = r#"f"mix {user=} with text and {second=}""#;
601        let suite = parse_suite(source).unwrap();
602        insta::assert_debug_snapshot!(suite);
603    }
604
605    #[test]
606    fn test_fstring_parse_self_documenting_format() {
607        let source = r#"f"{user=:>10}""#;
608        let suite = parse_suite(source).unwrap();
609        insta::assert_debug_snapshot!(suite);
610    }
611
612    fn parse_fstring_error(source: &str) -> InterpolatedStringErrorType {
613        parse_suite(source)
614            .map_err(|e| match e.error {
615                ParseErrorType::Lexical(LexicalErrorType::FStringError(e)) => e,
616                ParseErrorType::FStringError(e) => e,
617                e => unreachable!("Expected FStringError: {:?}", e),
618            })
619            .expect_err("Expected error")
620    }
621
622    #[test]
623    fn test_parse_invalid_fstring() {
624        use InterpolatedStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
625
626        assert_eq!(parse_fstring_error(r#"f"{5!x}""#), InvalidConversionFlag);
627        assert_eq!(
628            parse_fstring_error("f'{lambda x:{x}}'"),
629            LambdaWithoutParentheses
630        );
631        // NOTE: The parser produces the `LambdaWithoutParentheses` for this case, but
632        // since the parser only return the first error to maintain compatibility with
633        // the rest of the codebase, this test case fails. The `LambdaWithoutParentheses`
634        // error appears after the unexpected `FStringMiddle` token, which is between the
635        // `:` and the `{`.
636        // assert_eq!(parse_fstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses);
637        assert!(parse_suite(r#"f"{class}""#).is_err());
638    }
639
640    #[test]
641    fn test_parse_fstring_not_equals() {
642        let source = r#"f"{1 != 2}""#;
643        let suite = parse_suite(source).unwrap();
644        insta::assert_debug_snapshot!(suite);
645    }
646
647    #[test]
648    fn test_parse_fstring_equals() {
649        let source = r#"f"{42 == 42}""#;
650        let suite = parse_suite(source).unwrap();
651        insta::assert_debug_snapshot!(suite);
652    }
653
654    #[test]
655    fn test_parse_fstring_self_doc_prec_space() {
656        let source = r#"f"{x   =}""#;
657        let suite = parse_suite(source).unwrap();
658        insta::assert_debug_snapshot!(suite);
659    }
660
661    #[test]
662    fn test_parse_fstring_self_doc_trailing_space() {
663        let source = r#"f"{x=   }""#;
664        let suite = parse_suite(source).unwrap();
665        insta::assert_debug_snapshot!(suite);
666    }
667
668    #[test]
669    fn test_parse_fstring_yield_expr() {
670        let source = r#"f"{yield}""#;
671        let suite = parse_suite(source).unwrap();
672        insta::assert_debug_snapshot!(suite);
673    }
674
675    #[test]
676    fn test_parse_tstring() {
677        let source = r#"t"{a}{ b }{{foo}}""#;
678        let suite = parse_suite(source).unwrap();
679        insta::assert_debug_snapshot!(suite);
680    }
681
682    #[test]
683    fn test_parse_tstring_nested_spec() {
684        let source = r#"t"{foo:{spec}}""#;
685        let suite = parse_suite(source).unwrap();
686        insta::assert_debug_snapshot!(suite);
687    }
688
689    #[test]
690    fn test_parse_tstring_not_nested_spec() {
691        let source = r#"t"{foo:spec}""#;
692        let suite = parse_suite(source).unwrap();
693        insta::assert_debug_snapshot!(suite);
694    }
695
696    #[test]
697    fn test_parse_empty_tstring() {
698        let source = r#"t"""#;
699        let suite = parse_suite(source).unwrap();
700        insta::assert_debug_snapshot!(suite);
701    }
702
703    #[test]
704    fn test_tstring_parse_self_documenting_base() {
705        let source = r#"t"{user=}""#;
706        let suite = parse_suite(source).unwrap();
707        insta::assert_debug_snapshot!(suite);
708    }
709
710    #[test]
711    fn test_tstring_parse_self_documenting_base_more() {
712        let source = r#"t"mix {user=} with text and {second=}""#;
713        let suite = parse_suite(source).unwrap();
714        insta::assert_debug_snapshot!(suite);
715    }
716
717    #[test]
718    fn test_tstring_parse_self_documenting_format() {
719        let source = r#"t"{user=:>10}""#;
720        let suite = parse_suite(source).unwrap();
721        insta::assert_debug_snapshot!(suite);
722    }
723
724    fn parse_tstring_error(source: &str) -> InterpolatedStringErrorType {
725        parse_suite(source)
726            .map_err(|e| match e.error {
727                ParseErrorType::Lexical(LexicalErrorType::TStringError(e)) => e,
728                ParseErrorType::TStringError(e) => e,
729                e => unreachable!("Expected TStringError: {:?}", e),
730            })
731            .expect_err("Expected error")
732    }
733
734    #[test]
735    fn test_parse_invalid_tstring() {
736        use InterpolatedStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
737
738        assert_eq!(parse_tstring_error(r#"t"{5!x}""#), InvalidConversionFlag);
739        assert_eq!(
740            parse_tstring_error("t'{lambda x:{x}}'"),
741            LambdaWithoutParentheses
742        );
743        // NOTE: The parser produces the `LambdaWithoutParentheses` for this case, but
744        // since the parser only return the first error to maintain compatibility with
745        // the rest of the codebase, this test case fails. The `LambdaWithoutParentheses`
746        // error appears after the unexpected `tStringMiddle` token, which is between the
747        // `:` and the `{`.
748        // assert_eq!(parse_tstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses);
749        assert!(parse_suite(r#"t"{class}""#).is_err());
750    }
751
752    #[test]
753    fn test_parse_tstring_not_equals() {
754        let source = r#"t"{1 != 2}""#;
755        let suite = parse_suite(source).unwrap();
756        insta::assert_debug_snapshot!(suite);
757    }
758
759    #[test]
760    fn test_parse_tstring_equals() {
761        let source = r#"t"{42 == 42}""#;
762        let suite = parse_suite(source).unwrap();
763        insta::assert_debug_snapshot!(suite);
764    }
765
766    #[test]
767    fn test_parse_tstring_self_doc_prec_space() {
768        let source = r#"t"{x   =}""#;
769        let suite = parse_suite(source).unwrap();
770        insta::assert_debug_snapshot!(suite);
771    }
772
773    #[test]
774    fn test_parse_tstring_self_doc_trailing_space() {
775        let source = r#"t"{x=   }""#;
776        let suite = parse_suite(source).unwrap();
777        insta::assert_debug_snapshot!(suite);
778    }
779
780    #[test]
781    fn test_parse_tstring_yield_expr() {
782        let source = r#"t"{yield}""#;
783        let suite = parse_suite(source).unwrap();
784        insta::assert_debug_snapshot!(suite);
785    }
786
787    #[test]
788    fn test_parse_string_concat() {
789        let source = "'Hello ' 'world'";
790        let suite = parse_suite(source).unwrap();
791        insta::assert_debug_snapshot!(suite);
792    }
793
794    #[test]
795    fn test_parse_u_string_concat_1() {
796        let source = "'Hello ' u'world'";
797        let suite = parse_suite(source).unwrap();
798        insta::assert_debug_snapshot!(suite);
799    }
800
801    #[test]
802    fn test_parse_u_string_concat_2() {
803        let source = "u'Hello ' 'world'";
804        let suite = parse_suite(source).unwrap();
805        insta::assert_debug_snapshot!(suite);
806    }
807
808    #[test]
809    fn test_parse_f_string_concat_1() {
810        let source = "'Hello ' f'world'";
811        let suite = parse_suite(source).unwrap();
812        insta::assert_debug_snapshot!(suite);
813    }
814
815    #[test]
816    fn test_parse_f_string_concat_2() {
817        let source = "'Hello ' f'world'";
818        let suite = parse_suite(source).unwrap();
819        insta::assert_debug_snapshot!(suite);
820    }
821
822    #[test]
823    fn test_parse_f_string_concat_3() {
824        let source = "'Hello ' f'world{\"!\"}'";
825        let suite = parse_suite(source).unwrap();
826        insta::assert_debug_snapshot!(suite);
827    }
828
829    #[test]
830    fn test_parse_f_string_concat_4() {
831        let source = "'Hello ' f'world{\"!\"}' 'again!'";
832        let suite = parse_suite(source).unwrap();
833        insta::assert_debug_snapshot!(suite);
834    }
835
836    #[test]
837    fn test_parse_u_f_string_concat_1() {
838        let source = "u'Hello ' f'world'";
839        let suite = parse_suite(source).unwrap();
840        insta::assert_debug_snapshot!(suite);
841    }
842
843    #[test]
844    fn test_parse_u_f_string_concat_2() {
845        let source = "u'Hello ' f'world' '!'";
846        let suite = parse_suite(source).unwrap();
847        insta::assert_debug_snapshot!(suite);
848    }
849
850    #[test]
851    fn test_parse_t_string_concat_1_error() {
852        let source = "'Hello ' t'world'";
853        let suite = parse_suite(source).unwrap_err();
854        insta::assert_debug_snapshot!(suite);
855    }
856
857    #[test]
858    fn test_parse_t_string_concat_2_error() {
859        let source = "'Hello ' t'world'";
860        let suite = parse_suite(source).unwrap_err();
861        insta::assert_debug_snapshot!(suite);
862    }
863
864    #[test]
865    fn test_parse_t_string_concat_3_error() {
866        let source = "'Hello ' t'world{\"!\"}'";
867        let suite = parse_suite(source).unwrap_err();
868        insta::assert_debug_snapshot!(suite);
869    }
870
871    #[test]
872    fn test_parse_t_string_concat_4_error() {
873        let source = "'Hello ' t'world{\"!\"}' 'again!'";
874        let suite = parse_suite(source).unwrap_err();
875        insta::assert_debug_snapshot!(suite);
876    }
877
878    #[test]
879    fn test_parse_u_t_string_concat_1_error() {
880        let source = "u'Hello ' t'world'";
881        let suite = parse_suite(source).unwrap_err();
882        insta::assert_debug_snapshot!(suite);
883    }
884
885    #[test]
886    fn test_parse_u_t_string_concat_2_error() {
887        let source = "u'Hello ' t'world' '!'";
888        let suite = parse_suite(source).unwrap_err();
889        insta::assert_debug_snapshot!(suite);
890    }
891
892    #[test]
893    fn test_parse_f_t_string_concat_1_error() {
894        let source = "f'Hello ' t'world'";
895        let suite = parse_suite(source).unwrap_err();
896        insta::assert_debug_snapshot!(suite);
897    }
898
899    #[test]
900    fn test_parse_f_t_string_concat_2_error() {
901        let source = "f'Hello ' t'world' '!'";
902        let suite = parse_suite(source).unwrap_err();
903        insta::assert_debug_snapshot!(suite);
904    }
905
906    #[test]
907    fn test_parse_string_triple_quotes_with_kind() {
908        let source = "u'''Hello, world!'''";
909        let suite = parse_suite(source).unwrap();
910        insta::assert_debug_snapshot!(suite);
911    }
912
913    #[test]
914    fn test_single_quoted_byte() {
915        // single quote
916        let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
917        let suite = parse_suite(source).unwrap();
918        insta::assert_debug_snapshot!(suite);
919    }
920
921    #[test]
922    fn test_double_quoted_byte() {
923        // double quote
924        let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
925        let suite = parse_suite(source).unwrap();
926        insta::assert_debug_snapshot!(suite);
927    }
928
929    #[test]
930    fn test_escape_char_in_byte_literal() {
931        // backslash does not escape
932        let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok
933        let suite = parse_suite(source).unwrap();
934        insta::assert_debug_snapshot!(suite);
935    }
936
937    #[test]
938    fn test_raw_byte_literal_1() {
939        let source = r"rb'\x1z'";
940        let suite = parse_suite(source).unwrap();
941        insta::assert_debug_snapshot!(suite);
942    }
943
944    #[test]
945    fn test_raw_byte_literal_2() {
946        let source = r"rb'\\'";
947        let suite = parse_suite(source).unwrap();
948        insta::assert_debug_snapshot!(suite);
949    }
950
951    #[test]
952    fn test_escape_octet() {
953        let source = r"b'\43a\4\1234'";
954        let suite = parse_suite(source).unwrap();
955        insta::assert_debug_snapshot!(suite);
956    }
957
958    #[test]
959    fn test_fstring_escaped_newline() {
960        let source = r#"f"\n{x}""#;
961        let suite = parse_suite(source).unwrap();
962        insta::assert_debug_snapshot!(suite);
963    }
964
965    #[test]
966    fn test_fstring_constant_range() {
967        let source = r#"f"aaa{bbb}ccc{ddd}eee""#;
968        let suite = parse_suite(source).unwrap();
969        insta::assert_debug_snapshot!(suite);
970    }
971
972    #[test]
973    fn test_fstring_unescaped_newline() {
974        let source = r#"f"""
975{x}""""#;
976        let suite = parse_suite(source).unwrap();
977        insta::assert_debug_snapshot!(suite);
978    }
979
980    #[test]
981    fn test_fstring_escaped_character() {
982        let source = r#"f"\\{x}""#;
983        let suite = parse_suite(source).unwrap();
984        insta::assert_debug_snapshot!(suite);
985    }
986
987    #[test]
988    fn test_raw_fstring() {
989        let source = r#"rf"{x}""#;
990        let suite = parse_suite(source).unwrap();
991        insta::assert_debug_snapshot!(suite);
992    }
993
994    #[test]
995    fn test_triple_quoted_raw_fstring() {
996        let source = r#"rf"""{x}""""#;
997        let suite = parse_suite(source).unwrap();
998        insta::assert_debug_snapshot!(suite);
999    }
1000
1001    #[test]
1002    fn test_fstring_line_continuation() {
1003        let source = r#"rf"\
1004{x}""#;
1005        let suite = parse_suite(source).unwrap();
1006        insta::assert_debug_snapshot!(suite);
1007    }
1008
1009    #[test]
1010    fn test_parse_fstring_nested_string_spec() {
1011        let source = r#"f"{foo:{''}}""#;
1012        let suite = parse_suite(source).unwrap();
1013        insta::assert_debug_snapshot!(suite);
1014    }
1015
1016    #[test]
1017    fn test_parse_fstring_nested_concatenation_string_spec() {
1018        let source = r#"f"{foo:{'' ''}}""#;
1019        let suite = parse_suite(source).unwrap();
1020        insta::assert_debug_snapshot!(suite);
1021    }
1022
1023    #[test]
1024    fn test_tstring_escaped_newline() {
1025        let source = r#"t"\n{x}""#;
1026        let suite = parse_suite(source).unwrap();
1027        insta::assert_debug_snapshot!(suite);
1028    }
1029
1030    #[test]
1031    fn test_tstring_constant_range() {
1032        let source = r#"t"aaa{bbb}ccc{ddd}eee""#;
1033        let suite = parse_suite(source).unwrap();
1034        insta::assert_debug_snapshot!(suite);
1035    }
1036
1037    #[test]
1038    fn test_tstring_unescaped_newline() {
1039        let source = r#"t"""
1040{x}""""#;
1041        let suite = parse_suite(source).unwrap();
1042        insta::assert_debug_snapshot!(suite);
1043    }
1044
1045    #[test]
1046    fn test_tstring_escaped_character() {
1047        let source = r#"t"\\{x}""#;
1048        let suite = parse_suite(source).unwrap();
1049        insta::assert_debug_snapshot!(suite);
1050    }
1051
1052    #[test]
1053    fn test_raw_tstring() {
1054        let source = r#"rt"{x}""#;
1055        let suite = parse_suite(source).unwrap();
1056        insta::assert_debug_snapshot!(suite);
1057    }
1058
1059    #[test]
1060    fn test_triple_quoted_raw_tstring() {
1061        let source = r#"rt"""{x}""""#;
1062        let suite = parse_suite(source).unwrap();
1063        insta::assert_debug_snapshot!(suite);
1064    }
1065
1066    #[test]
1067    fn test_tstring_line_continuation() {
1068        let source = r#"rt"\
1069{x}""#;
1070        let suite = parse_suite(source).unwrap();
1071        insta::assert_debug_snapshot!(suite);
1072    }
1073
1074    #[test]
1075    fn test_parse_tstring_nested_string_spec() {
1076        let source = r#"t"{foo:{''}}""#;
1077        let suite = parse_suite(source).unwrap();
1078        insta::assert_debug_snapshot!(suite);
1079    }
1080
1081    #[test]
1082    fn test_parse_tstring_nested_concatenation_string_spec() {
1083        let source = r#"t"{foo:{'' ''}}""#;
1084        let suite = parse_suite(source).unwrap();
1085        insta::assert_debug_snapshot!(suite);
1086    }
1087
1088    /// <https://github.com/astral-sh/ruff/issues/8355>
1089    #[test]
1090    fn test_dont_panic_on_8_in_octal_escape() {
1091        let source = r"bold = '\038[1m'";
1092        let suite = parse_suite(source).unwrap();
1093        insta::assert_debug_snapshot!(suite);
1094    }
1095
1096    #[test]
1097    fn test_invalid_unicode_literal() {
1098        let source = r"'\x1ó34'";
1099        let error = parse_suite(source).unwrap_err();
1100        insta::assert_debug_snapshot!(error);
1101    }
1102
1103    #[test]
1104    fn test_missing_unicode_lbrace_error() {
1105        let source = r"'\N '";
1106        let error = parse_suite(source).unwrap_err();
1107        insta::assert_debug_snapshot!(error);
1108    }
1109
1110    #[test]
1111    fn test_missing_unicode_rbrace_error() {
1112        let source = r"'\N{SPACE'";
1113        let error = parse_suite(source).unwrap_err();
1114        insta::assert_debug_snapshot!(error);
1115    }
1116
1117    #[test]
1118    fn test_invalid_unicode_name_error() {
1119        let source = r"'\N{INVALID}'";
1120        let error = parse_suite(source).unwrap_err();
1121        insta::assert_debug_snapshot!(error);
1122    }
1123
1124    #[test]
1125    fn test_invalid_byte_literal_error() {
1126        let source = r"b'123a𝐁c'";
1127        let error = parse_suite(source).unwrap_err();
1128        insta::assert_debug_snapshot!(error);
1129    }
1130
1131    macro_rules! test_aliases_parse {
1132        ($($name:ident: $alias:expr,)*) => {
1133        $(
1134            #[test]
1135            fn $name() {
1136                let source = format!(r#""\N{{{0}}}""#, $alias);
1137                let suite = parse_suite(&source).unwrap();
1138                insta::assert_debug_snapshot!(suite);
1139            }
1140        )*
1141        }
1142    }
1143
1144    test_aliases_parse! {
1145        test_backspace_alias: "BACKSPACE",
1146        test_bell_alias: "BEL",
1147        test_carriage_return_alias: "CARRIAGE RETURN",
1148        test_delete_alias: "DELETE",
1149        test_escape_alias: "ESCAPE",
1150        test_form_feed_alias: "FORM FEED",
1151        test_hts_alias: "HTS",
1152        test_character_tabulation_with_justification_alias: "CHARACTER TABULATION WITH JUSTIFICATION",
1153    }
1154}
rustpython_ruff_python_parser/string.rs

rustpython_ruff_python_parser/
string.rs