rustpython_parser/
string.rs

1// Contains the logic for parsing string literals (mostly concerned with f-strings.)
2//
3// The lexer doesn't do any special handling of f-strings, it just treats them as
4// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
5// we have to do the parsing here, manually.
6use crate::text_size::TextRange;
7use crate::{
8    ast::{self, Constant, Expr},
9    lexer::{LexicalError, LexicalErrorType},
10    parser::{LalrpopError, Parse, ParseError, ParseErrorType},
11    token::{StringKind, Tok},
12};
13use itertools::Itertools;
14use rustpython_parser_core::{
15    text_size::{TextLen, TextSize},
16    ConversionFlag,
17};
18
19// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
20const MAX_UNICODE_NAME: usize = 88;
21
22struct StringParser<'a> {
23    chars: std::iter::Peekable<std::str::Chars<'a>>,
24    kind: StringKind,
25    start: TextSize,
26    end: TextSize,
27    location: TextSize,
28}
29
30impl<'a> StringParser<'a> {
31    fn new(
32        source: &'a str,
33        kind: StringKind,
34        triple_quoted: bool,
35        start: TextSize,
36        end: TextSize,
37    ) -> Self {
38        let offset = kind.prefix_len()
39            + if triple_quoted {
40                TextSize::from(3)
41            } else {
42                TextSize::from(1)
43            };
44        Self {
45            chars: source.chars().peekable(),
46            kind,
47            start,
48            end,
49            location: start + offset,
50        }
51    }
52
53    #[inline]
54    fn next_char(&mut self) -> Option<char> {
55        let c = self.chars.next()?;
56        self.location += c.text_len();
57        Some(c)
58    }
59
60    #[inline]
61    fn peek(&mut self) -> Option<&char> {
62        self.chars.peek()
63    }
64
65    #[inline]
66    fn get_pos(&self) -> TextSize {
67        self.location
68    }
69
70    #[inline]
71    fn expr(&self, node: Expr) -> Expr {
72        node
73    }
74
75    fn range(&self) -> TextRange {
76        TextRange::new(self.start, self.end)
77    }
78
79    fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
80        let mut p: u32 = 0u32;
81        let unicode_error = LexicalError::new(LexicalErrorType::UnicodeError, self.get_pos());
82        for i in 1..=literal_number {
83            match self.next_char() {
84                Some(c) => match c.to_digit(16) {
85                    Some(d) => p += d << ((literal_number - i) * 4),
86                    None => return Err(unicode_error),
87                },
88                None => return Err(unicode_error),
89            }
90        }
91        match p {
92            0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
93            _ => std::char::from_u32(p).ok_or(unicode_error),
94        }
95    }
96
97    fn parse_octet(&mut self, first: char) -> char {
98        let mut octet_content = String::new();
99        octet_content.push(first);
100        while octet_content.len() < 3 {
101            if let Some('0'..='7') = self.peek() {
102                octet_content.push(self.next_char().unwrap())
103            } else {
104                break;
105            }
106        }
107        let value = u32::from_str_radix(&octet_content, 8).unwrap();
108        char::from_u32(value).unwrap()
109    }
110
111    fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
112        let start_pos = self.get_pos();
113        match self.next_char() {
114            Some('{') => {}
115            _ => return Err(LexicalError::new(LexicalErrorType::StringError, start_pos)),
116        }
117        let start_pos = self.get_pos();
118        let mut name = String::new();
119        loop {
120            match self.next_char() {
121                Some('}') => break,
122                Some(c) => name.push(c),
123                None => {
124                    return Err(LexicalError::new(
125                        LexicalErrorType::StringError,
126                        self.get_pos(),
127                    ))
128                }
129            }
130        }
131
132        if name.len() > MAX_UNICODE_NAME {
133            return Err(LexicalError::new(
134                LexicalErrorType::UnicodeError,
135                self.get_pos(),
136            ));
137        }
138
139        unicode_names2::character(&name)
140            .ok_or_else(|| LexicalError::new(LexicalErrorType::UnicodeError, start_pos))
141    }
142
143    fn parse_escaped_char(&mut self) -> Result<String, LexicalError> {
144        match self.next_char() {
145            Some(c) => {
146                let char = match c {
147                    '\\' => '\\',
148                    '\'' => '\'',
149                    '\"' => '"',
150                    'a' => '\x07',
151                    'b' => '\x08',
152                    'f' => '\x0c',
153                    'n' => '\n',
154                    'r' => '\r',
155                    't' => '\t',
156                    'v' => '\x0b',
157                    o @ '0'..='7' => self.parse_octet(o),
158                    'x' => self.parse_unicode_literal(2)?,
159                    'u' if !self.kind.is_any_bytes() => self.parse_unicode_literal(4)?,
160                    'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?,
161                    'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
162                    // Special cases where the escape sequence is not a single character
163                    '\n' => return Ok("".to_string()),
164                    c => {
165                        if self.kind.is_any_bytes() && !c.is_ascii() {
166                            return Err(LexicalError {
167                                error: LexicalErrorType::OtherError(
168                                    "bytes can only contain ASCII literal characters".to_owned(),
169                                ),
170                                location: self.get_pos(),
171                            });
172                        }
173                        return Ok(format!("\\{c}"));
174                    }
175                };
176                Ok(char.to_string())
177            }
178            None => Err(LexicalError {
179                error: LexicalErrorType::StringError,
180                location: self.get_pos(),
181            }),
182        }
183    }
184
185    fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
186        use FStringErrorType::*;
187
188        let mut expression = String::new();
189        let mut spec = None;
190        let mut delimiters = Vec::new();
191        let mut conversion = ConversionFlag::None;
192        let mut self_documenting = false;
193        let mut trailing_seq = String::new();
194        let location = self.get_pos();
195
196        while let Some(ch) = self.next_char() {
197            match ch {
198                // can be integrated better with the remaining code, but as a starting point ok
199                // in general I would do here a tokenizing of the fstrings to omit this peeking.
200                '!' | '=' | '>' | '<' if self.peek() == Some(&'=') => {
201                    expression.push(ch);
202                    expression.push('=');
203                    self.next_char();
204                }
205                '!' if delimiters.is_empty() && self.peek() != Some(&'=') => {
206                    if expression.trim().is_empty() {
207                        return Err(FStringError::new(EmptyExpression, self.get_pos()).into());
208                    }
209
210                    conversion = match self.next_char() {
211                        Some('s') => ConversionFlag::Str,
212                        Some('a') => ConversionFlag::Ascii,
213                        Some('r') => ConversionFlag::Repr,
214                        Some(_) => {
215                            return Err(
216                                FStringError::new(InvalidConversionFlag, self.get_pos()).into()
217                            );
218                        }
219                        None => {
220                            return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
221                        }
222                    };
223
224                    match self.peek() {
225                        Some('}' | ':') => {}
226                        Some(_) | None => {
227                            return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
228                        }
229                    }
230                }
231
232                // match a python 3.8 self documenting expression
233                // format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
234                '=' if self.peek() != Some(&'=') && delimiters.is_empty() => {
235                    self_documenting = true;
236                }
237
238                ':' if delimiters.is_empty() => {
239                    let parsed_spec = self.parse_spec(nested)?;
240
241                    spec = Some(Box::new(
242                        self.expr(
243                            ast::ExprJoinedStr {
244                                values: parsed_spec,
245                                range: self.range(),
246                            }
247                            .into(),
248                        ),
249                    ));
250                }
251                '(' | '{' | '[' => {
252                    expression.push(ch);
253                    delimiters.push(ch);
254                }
255                ')' => {
256                    let last_delim = delimiters.pop();
257                    match last_delim {
258                        Some('(') => {
259                            expression.push(ch);
260                        }
261                        Some(c) => {
262                            return Err(FStringError::new(
263                                MismatchedDelimiter(c, ')'),
264                                self.get_pos(),
265                            )
266                            .into());
267                        }
268                        None => {
269                            return Err(FStringError::new(Unmatched(')'), self.get_pos()).into());
270                        }
271                    }
272                }
273                ']' => {
274                    let last_delim = delimiters.pop();
275                    match last_delim {
276                        Some('[') => {
277                            expression.push(ch);
278                        }
279                        Some(c) => {
280                            return Err(FStringError::new(
281                                MismatchedDelimiter(c, ']'),
282                                self.get_pos(),
283                            )
284                            .into());
285                        }
286                        None => {
287                            return Err(FStringError::new(Unmatched(']'), self.get_pos()).into());
288                        }
289                    }
290                }
291                '}' if !delimiters.is_empty() => {
292                    let last_delim = delimiters.pop();
293                    match last_delim {
294                        Some('{') => {
295                            expression.push(ch);
296                        }
297                        Some(c) => {
298                            return Err(FStringError::new(
299                                MismatchedDelimiter(c, '}'),
300                                self.get_pos(),
301                            )
302                            .into());
303                        }
304                        None => {}
305                    }
306                }
307                '}' => {
308                    if expression.trim().is_empty() {
309                        return Err(FStringError::new(EmptyExpression, self.get_pos()).into());
310                    }
311
312                    let ret = if !self_documenting {
313                        vec![self.expr(
314                            ast::ExprFormattedValue {
315                                value: Box::new(
316                                    parse_fstring_expr(&expression, location).map_err(|e| {
317                                        FStringError::new(
318                                            InvalidExpression(Box::new(e.error)),
319                                            location,
320                                        )
321                                    })?,
322                                ),
323                                conversion,
324                                format_spec: spec,
325                                range: self.range(),
326                            }
327                            .into(),
328                        )]
329                    } else {
330                        vec![
331                            self.expr(
332                                ast::ExprConstant {
333                                    value: Constant::Str(expression.to_owned() + "="),
334                                    kind: None,
335                                    range: self.range(),
336                                }
337                                .into(),
338                            ),
339                            self.expr(
340                                ast::ExprConstant {
341                                    value: trailing_seq.into(),
342                                    kind: None,
343                                    range: self.range(),
344                                }
345                                .into(),
346                            ),
347                            self.expr(
348                                ast::ExprFormattedValue {
349                                    value: Box::new(
350                                        parse_fstring_expr(&expression, location).map_err(|e| {
351                                            FStringError::new(
352                                                InvalidExpression(Box::new(e.error)),
353                                                location,
354                                            )
355                                        })?,
356                                    ),
357                                    conversion: if conversion == ConversionFlag::None
358                                        && spec.is_none()
359                                    {
360                                        ConversionFlag::Repr
361                                    } else {
362                                        conversion
363                                    },
364                                    format_spec: spec,
365                                    range: self.range(),
366                                }
367                                .into(),
368                            ),
369                        ]
370                    };
371                    return Ok(ret);
372                }
373                '"' | '\'' => {
374                    expression.push(ch);
375                    loop {
376                        let Some(c) = self.next_char() else {
377                            return Err(
378                                FStringError::new(UnterminatedString, self.get_pos()).into()
379                            );
380                        };
381                        expression.push(c);
382                        if c == ch {
383                            break;
384                        }
385                    }
386                }
387                ' ' if self_documenting => {
388                    trailing_seq.push(ch);
389                }
390                '\\' => return Err(FStringError::new(UnterminatedString, self.get_pos()).into()),
391                _ => {
392                    if self_documenting {
393                        return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into());
394                    }
395
396                    expression.push(ch);
397                }
398            }
399        }
400        Err(FStringError::new(UnclosedLbrace, self.get_pos()).into())
401    }
402
403    fn parse_spec(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
404        let mut spec_constructor = Vec::new();
405        let mut constant_piece = String::new();
406        while let Some(&next) = self.peek() {
407            match next {
408                '{' => {
409                    if !constant_piece.is_empty() {
410                        spec_constructor.push(
411                            self.expr(
412                                ast::ExprConstant {
413                                    value: std::mem::take(&mut constant_piece).into(),
414                                    kind: None,
415                                    range: self.range(),
416                                }
417                                .into(),
418                            ),
419                        );
420                    }
421                    let parsed_expr = self.parse_fstring(nested + 1)?;
422                    spec_constructor.extend(parsed_expr);
423                    continue;
424                }
425                '}' => {
426                    break;
427                }
428                _ => {
429                    constant_piece.push(next);
430                }
431            }
432            self.next_char();
433        }
434        if !constant_piece.is_empty() {
435            spec_constructor.push(
436                self.expr(
437                    ast::ExprConstant {
438                        value: std::mem::take(&mut constant_piece).into(),
439                        kind: None,
440                        range: self.range(),
441                    }
442                    .into(),
443                ),
444            );
445        }
446        Ok(spec_constructor)
447    }
448
449    fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
450        use FStringErrorType::*;
451
452        if nested >= 2 {
453            return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into());
454        }
455
456        let mut content = String::new();
457        let mut values = vec![];
458
459        while let Some(&ch) = self.peek() {
460            match ch {
461                '{' => {
462                    self.next_char();
463                    if nested == 0 {
464                        match self.peek() {
465                            Some('{') => {
466                                self.next_char();
467                                content.push('{');
468                                continue;
469                            }
470                            None => {
471                                return Err(FStringError::new(UnclosedLbrace, self.get_pos()).into())
472                            }
473                            _ => {}
474                        }
475                    }
476                    if !content.is_empty() {
477                        values.push(
478                            self.expr(
479                                ast::ExprConstant {
480                                    value: std::mem::take(&mut content).into(),
481                                    kind: None,
482                                    range: self.range(),
483                                }
484                                .into(),
485                            ),
486                        );
487                    }
488
489                    let parsed_values = self.parse_formatted_value(nested)?;
490                    values.extend(parsed_values);
491                }
492                '}' => {
493                    if nested > 0 {
494                        break;
495                    }
496                    self.next_char();
497                    if let Some('}') = self.peek() {
498                        self.next_char();
499                        content.push('}');
500                    } else {
501                        return Err(FStringError::new(SingleRbrace, self.get_pos()).into());
502                    }
503                }
504                '\\' if !self.kind.is_raw() => {
505                    self.next_char();
506                    if let Some('{' | '}') = self.peek() {
507                        content.push('\\');
508                    } else {
509                        content.push_str(&self.parse_escaped_char()?);
510                    }
511                }
512                _ => {
513                    content.push(ch);
514                    self.next_char();
515                }
516            }
517        }
518
519        if !content.is_empty() {
520            values.push(
521                self.expr(
522                    ast::ExprConstant {
523                        value: content.into(),
524                        kind: None,
525                        range: self.range(),
526                    }
527                    .into(),
528                ),
529            )
530        }
531
532        Ok(values)
533    }
534
535    fn parse_bytes(&mut self) -> Result<Expr, LexicalError> {
536        let mut content = String::new();
537        while let Some(ch) = self.next_char() {
538            match ch {
539                '\\' if !self.kind.is_raw() => {
540                    content.push_str(&self.parse_escaped_char()?);
541                }
542                ch => {
543                    if !ch.is_ascii() {
544                        return Err(LexicalError::new(
545                            LexicalErrorType::OtherError(
546                                "bytes can only contain ASCII literal characters".to_string(),
547                            ),
548                            self.get_pos(),
549                        ));
550                    }
551                    content.push(ch);
552                }
553            }
554        }
555
556        Ok(self.expr(
557            ast::ExprConstant {
558                value: Constant::Bytes(content.chars().map(|c| c as u8).collect()),
559                kind: None,
560                range: self.range(),
561            }
562            .into(),
563        ))
564    }
565
566    fn parse_string(&mut self) -> Result<Expr, LexicalError> {
567        let mut content = String::new();
568        while let Some(ch) = self.next_char() {
569            match ch {
570                '\\' if !self.kind.is_raw() => {
571                    content.push_str(&self.parse_escaped_char()?);
572                }
573                ch => content.push(ch),
574            }
575        }
576        Ok(self.expr(
577            ast::ExprConstant {
578                value: Constant::Str(content),
579                kind: self.kind.is_unicode().then(|| "u".to_string()),
580                range: self.range(),
581            }
582            .into(),
583        ))
584    }
585
586    fn parse(&mut self) -> Result<Vec<Expr>, LexicalError> {
587        if self.kind.is_any_fstring() {
588            self.parse_fstring(0)
589        } else if self.kind.is_any_bytes() {
590            self.parse_bytes().map(|expr| vec![expr])
591        } else {
592            self.parse_string().map(|expr| vec![expr])
593        }
594    }
595}
596
597fn parse_fstring_expr(source: &str, location: TextSize) -> Result<Expr, ParseError> {
598    let fstring_body = format!("({source})");
599    let start = location - TextSize::from(1);
600    ast::Expr::parse_starts_at(&fstring_body, "<fstring>", start)
601}
602
603fn parse_string(
604    source: &str,
605    kind: StringKind,
606    triple_quoted: bool,
607    start: TextSize,
608    end: TextSize,
609) -> Result<Vec<Expr>, LexicalError> {
610    StringParser::new(source, kind, triple_quoted, start, end).parse()
611}
612
613pub(crate) fn parse_strings(
614    values: Vec<(TextSize, (String, StringKind, bool), TextSize)>,
615) -> Result<Expr, LexicalError> {
616    // Preserve the initial location and kind.
617    let initial_start = values[0].0;
618    let last_end = values.last().unwrap().2;
619    let initial_kind = (values[0].1 .1 == StringKind::Unicode).then(|| "u".to_owned());
620    let has_fstring = values
621        .iter()
622        .any(|(_, (_, kind, ..), _)| kind.is_any_fstring());
623    let num_bytes = values
624        .iter()
625        .filter(|(_, (_, kind, ..), _)| kind.is_any_bytes())
626        .count();
627    let has_bytes = num_bytes > 0;
628
629    if has_bytes && num_bytes < values.len() {
630        return Err(LexicalError {
631            error: LexicalErrorType::OtherError(
632                "cannot mix bytes and nonbytes literals".to_owned(),
633            ),
634            location: initial_start,
635        });
636    }
637
638    if has_bytes {
639        let mut content: Vec<u8> = vec![];
640        for (start, (source, kind, triple_quoted), end) in values {
641            for value in parse_string(&source, kind, triple_quoted, start, end)? {
642                match value {
643                    Expr::Constant(ast::ExprConstant {
644                        value: Constant::Bytes(value),
645                        ..
646                    }) => content.extend(value),
647                    _ => unreachable!("Unexpected non-bytes expression."),
648                }
649            }
650        }
651        return Ok(ast::ExprConstant {
652            value: Constant::Bytes(content),
653            kind: None,
654            range: TextRange::new(initial_start, last_end),
655        }
656        .into());
657    }
658
659    if !has_fstring {
660        let mut content: Vec<String> = vec![];
661        for (start, (source, kind, triple_quoted), end) in values {
662            for value in parse_string(&source, kind, triple_quoted, start, end)? {
663                match value {
664                    Expr::Constant(ast::ExprConstant {
665                        value: Constant::Str(value),
666                        ..
667                    }) => content.push(value),
668                    _ => unreachable!("Unexpected non-string expression."),
669                }
670            }
671        }
672        return Ok(ast::ExprConstant {
673            value: Constant::Str(content.join("")),
674            kind: initial_kind,
675            range: TextRange::new(initial_start, last_end),
676        }
677        .into());
678    }
679
680    // De-duplicate adjacent constants.
681    let mut deduped: Vec<Expr> = vec![];
682    let mut current: Vec<String> = vec![];
683
684    let take_current = |current: &mut Vec<String>| -> Expr {
685        Expr::Constant(ast::ExprConstant {
686            value: Constant::Str(current.drain(..).join("")),
687            kind: initial_kind.clone(),
688            range: TextRange::new(initial_start, last_end),
689        })
690    };
691
692    for (start, (source, kind, triple_quoted), end) in values {
693        for value in parse_string(&source, kind, triple_quoted, start, end)? {
694            match value {
695                Expr::FormattedValue { .. } => {
696                    if !current.is_empty() {
697                        deduped.push(take_current(&mut current));
698                    }
699                    deduped.push(value)
700                }
701                Expr::Constant(ast::ExprConstant {
702                    value: Constant::Str(value),
703                    ..
704                }) => current.push(value),
705                _ => unreachable!("Unexpected non-string expression."),
706            }
707        }
708    }
709    if !current.is_empty() {
710        deduped.push(take_current(&mut current));
711    }
712
713    Ok(Expr::JoinedStr(ast::ExprJoinedStr {
714        values: deduped,
715        range: TextRange::new(initial_start, last_end),
716    }))
717}
718
719// TODO: consolidate these with ParseError
720/// An error that occurred during parsing of an f-string.
721#[derive(Debug, PartialEq)]
722struct FStringError {
723    /// The type of error that occurred.
724    pub error: FStringErrorType,
725    /// The location of the error.
726    pub location: TextSize,
727}
728
729impl FStringError {
730    /// Creates a new `FStringError` with the given error type and location.
731    pub fn new(error: FStringErrorType, location: TextSize) -> Self {
732        Self { error, location }
733    }
734}
735
736impl From<FStringError> for LexicalError {
737    fn from(err: FStringError) -> Self {
738        LexicalError {
739            error: LexicalErrorType::FStringError(err.error),
740            location: err.location,
741        }
742    }
743}
744
745/// Represents the different types of errors that can occur during parsing of an f-string.
746#[derive(Debug, PartialEq)]
747pub enum FStringErrorType {
748    /// Expected a right brace after an opened left brace.
749    UnclosedLbrace,
750    /// Expected a left brace after an ending right brace.
751    UnopenedRbrace,
752    /// Expected a right brace after a conversion flag.
753    ExpectedRbrace,
754    /// An error occurred while parsing an f-string expression.
755    InvalidExpression(Box<ParseErrorType>),
756    /// An invalid conversion flag was encountered.
757    InvalidConversionFlag,
758    /// An empty expression was encountered.
759    EmptyExpression,
760    /// An opening delimiter was not closed properly.
761    MismatchedDelimiter(char, char),
762    /// Too many nested expressions in an f-string.
763    ExpressionNestedTooDeeply,
764    /// The f-string expression cannot include the given character.
765    ExpressionCannotInclude(char),
766    /// A single right brace was encountered.
767    SingleRbrace,
768    /// A closing delimiter was not opened properly.
769    Unmatched(char),
770    // TODO: Test this case.
771    /// Unterminated string.
772    UnterminatedString,
773}
774
775impl std::fmt::Display for FStringErrorType {
776    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
777        use FStringErrorType::*;
778        match self {
779            UnclosedLbrace => write!(f, "expecting '}}'"),
780            UnopenedRbrace => write!(f, "Unopened '}}'"),
781            ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
782            InvalidExpression(error) => {
783                write!(f, "{error}")
784            }
785            InvalidConversionFlag => write!(f, "invalid conversion character"),
786            EmptyExpression => write!(f, "empty expression not allowed"),
787            MismatchedDelimiter(first, second) => write!(
788                f,
789                "closing parenthesis '{second}' does not match opening parenthesis '{first}'"
790            ),
791            SingleRbrace => write!(f, "single '}}' is not allowed"),
792            Unmatched(delim) => write!(f, "unmatched '{delim}'"),
793            ExpressionNestedTooDeeply => {
794                write!(f, "expressions nested too deeply")
795            }
796            UnterminatedString => {
797                write!(f, "unterminated string")
798            }
799            ExpressionCannotInclude(c) => {
800                if *c == '\\' {
801                    write!(f, "f-string expression part cannot include a backslash")
802                } else {
803                    write!(f, "f-string expression part cannot include '{c}'s")
804                }
805            }
806        }
807    }
808}
809
810impl From<FStringError> for LalrpopError<TextSize, Tok, LexicalError> {
811    fn from(err: FStringError) -> Self {
812        lalrpop_util::ParseError::User {
813            error: LexicalError {
814                error: LexicalErrorType::FStringError(err.error),
815                location: err.location,
816            },
817        }
818    }
819}
820
821#[cfg(test)]
822mod tests {
823    use super::*;
824    use crate::{ast, Parse};
825
826    fn parse_fstring(source: &str) -> Result<Vec<Expr>, LexicalError> {
827        StringParser::new(
828            source,
829            StringKind::FString,
830            false,
831            TextSize::default(),
832            TextSize::default() + source.text_len() + TextSize::from(3), // 3 for prefix and quotes
833        )
834        .parse()
835    }
836
837    #[test]
838    fn test_parse_fstring() {
839        let source = "{a}{ b }{{foo}}";
840        let parse_ast = parse_fstring(source).unwrap();
841
842        insta::assert_debug_snapshot!(parse_ast);
843    }
844
845    #[test]
846    fn test_parse_fstring_nested_spec() {
847        let source = "{foo:{spec}}";
848        let parse_ast = parse_fstring(source).unwrap();
849
850        insta::assert_debug_snapshot!(parse_ast);
851    }
852
853    #[test]
854    fn test_parse_fstring_not_nested_spec() {
855        let source = "{foo:spec}";
856        let parse_ast = parse_fstring(source).unwrap();
857
858        insta::assert_debug_snapshot!(parse_ast);
859    }
860
861    #[test]
862    fn test_parse_empty_fstring() {
863        insta::assert_debug_snapshot!(parse_fstring("").unwrap());
864    }
865
866    #[test]
867    fn test_fstring_parse_self_documenting_base() {
868        let src = "{user=}";
869        let parse_ast = parse_fstring(src).unwrap();
870
871        insta::assert_debug_snapshot!(parse_ast);
872    }
873
874    #[test]
875    fn test_fstring_parse_self_documenting_base_more() {
876        let src = "mix {user=} with text and {second=}";
877        let parse_ast = parse_fstring(src).unwrap();
878
879        insta::assert_debug_snapshot!(parse_ast);
880    }
881
882    #[test]
883    fn test_fstring_parse_self_documenting_format() {
884        let src = "{user=:>10}";
885        let parse_ast = parse_fstring(src).unwrap();
886
887        insta::assert_debug_snapshot!(parse_ast);
888    }
889
890    fn parse_fstring_error(source: &str) -> FStringErrorType {
891        parse_fstring(source)
892            .map_err(|e| match e.error {
893                LexicalErrorType::FStringError(e) => e,
894                e => unreachable!("Expected FStringError: {:?}", e),
895            })
896            .expect_err("Expected error")
897    }
898
899    #[test]
900    fn test_parse_invalid_fstring() {
901        use FStringErrorType::*;
902        assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace);
903        assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace);
904        assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace);
905        assert_eq!(parse_fstring_error("abc{!a 'cat'}"), EmptyExpression);
906        assert_eq!(parse_fstring_error("{!a"), EmptyExpression);
907        assert_eq!(parse_fstring_error("{ !a}"), EmptyExpression);
908
909        assert_eq!(parse_fstring_error("{5!}"), InvalidConversionFlag);
910        assert_eq!(parse_fstring_error("{5!x}"), InvalidConversionFlag);
911
912        assert_eq!(
913            parse_fstring_error("{a:{a:{b}}}"),
914            ExpressionNestedTooDeeply
915        );
916
917        assert_eq!(parse_fstring_error("{a:b}}"), SingleRbrace);
918        assert_eq!(parse_fstring_error("}"), SingleRbrace);
919        assert_eq!(parse_fstring_error("{a:{b}"), UnclosedLbrace);
920        assert_eq!(parse_fstring_error("{"), UnclosedLbrace);
921
922        assert_eq!(parse_fstring_error("{}"), EmptyExpression);
923
924        // TODO: check for InvalidExpression enum?
925        assert!(parse_fstring("{class}").is_err());
926    }
927
928    #[test]
929    fn test_parse_fstring_not_equals() {
930        let source = "{1 != 2}";
931        let parse_ast = parse_fstring(source).unwrap();
932        insta::assert_debug_snapshot!(parse_ast);
933    }
934
935    #[test]
936    fn test_parse_fstring_equals() {
937        let source = "{42 == 42}";
938        let parse_ast = parse_fstring(source).unwrap();
939        insta::assert_debug_snapshot!(parse_ast);
940    }
941
942    #[test]
943    fn test_parse_fstring_self_doc_prec_space() {
944        let source = "{x   =}";
945        let parse_ast = parse_fstring(source).unwrap();
946        insta::assert_debug_snapshot!(parse_ast);
947    }
948
949    #[test]
950    fn test_parse_fstring_self_doc_trailing_space() {
951        let source = "{x=   }";
952        let parse_ast = parse_fstring(source).unwrap();
953        insta::assert_debug_snapshot!(parse_ast);
954    }
955
956    #[test]
957    fn test_parse_fstring_yield_expr() {
958        let source = "{yield}";
959        let parse_ast = parse_fstring(source).unwrap();
960        insta::assert_debug_snapshot!(parse_ast);
961    }
962
963    #[test]
964    fn test_parse_fstring_escaped_brackets() {
965        let source = "\\{{x\\}}";
966        let parse_ast = parse_fstring(source).unwrap();
967        insta::assert_debug_snapshot!(parse_ast);
968    }
969
970    #[test]
971    fn test_parse_string_concat() {
972        let source = "'Hello ' 'world'";
973        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
974        insta::assert_debug_snapshot!(parse_ast);
975    }
976
977    #[test]
978    fn test_parse_u_string_concat_1() {
979        let source = "'Hello ' u'world'";
980        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
981        insta::assert_debug_snapshot!(parse_ast);
982    }
983
984    #[test]
985    fn test_parse_u_string_concat_2() {
986        let source = "u'Hello ' 'world'";
987        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
988        insta::assert_debug_snapshot!(parse_ast);
989    }
990
991    #[test]
992    fn test_parse_f_string_concat_1() {
993        let source = "'Hello ' f'world'";
994        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
995        insta::assert_debug_snapshot!(parse_ast);
996    }
997
998    #[test]
999    fn test_parse_f_string_concat_2() {
1000        let source = "'Hello ' f'world'";
1001        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1002        insta::assert_debug_snapshot!(parse_ast);
1003    }
1004
1005    #[test]
1006    fn test_parse_f_string_concat_3() {
1007        let source = "'Hello ' f'world{\"!\"}'";
1008        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1009        insta::assert_debug_snapshot!(parse_ast);
1010    }
1011
1012    #[test]
1013    fn test_parse_u_f_string_concat_1() {
1014        let source = "u'Hello ' f'world'";
1015        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1016        insta::assert_debug_snapshot!(parse_ast);
1017    }
1018
1019    #[test]
1020    fn test_parse_u_f_string_concat_2() {
1021        let source = "u'Hello ' f'world' '!'";
1022        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1023        insta::assert_debug_snapshot!(parse_ast);
1024    }
1025
1026    #[test]
1027    fn test_parse_string_triple_quotes_with_kind() {
1028        let source = "u'''Hello, world!'''";
1029        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1030        insta::assert_debug_snapshot!(parse_ast);
1031    }
1032
1033    #[test]
1034    fn test_single_quoted_byte() {
1035        // single quote
1036        let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
1037        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1038        insta::assert_debug_snapshot!(parse_ast);
1039    }
1040
1041    #[test]
1042    fn test_double_quoted_byte() {
1043        // double quote
1044        let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
1045        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1046        insta::assert_debug_snapshot!(parse_ast);
1047    }
1048
1049    #[test]
1050    fn test_escape_char_in_byte_literal() {
1051        // backslash does not escape
1052        let source = r##"b"omkmok\Xaa""##; // spell-checker:ignore omkmok
1053        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1054        insta::assert_debug_snapshot!(parse_ast);
1055    }
1056
1057    #[test]
1058    fn test_raw_byte_literal_1() {
1059        let source = r"rb'\x1z'";
1060        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1061        insta::assert_debug_snapshot!(parse_ast);
1062    }
1063
1064    #[test]
1065    fn test_raw_byte_literal_2() {
1066        let source = r"rb'\\'";
1067        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1068        insta::assert_debug_snapshot!(parse_ast);
1069    }
1070
1071    #[test]
1072    fn test_escape_octet() {
1073        let source = r##"b'\43a\4\1234'"##;
1074        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1075        insta::assert_debug_snapshot!(parse_ast);
1076    }
1077
1078    #[test]
1079    fn test_fstring_escaped_newline() {
1080        let source = r#"f"\n{x}""#;
1081        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1082        insta::assert_debug_snapshot!(parse_ast);
1083    }
1084
1085    #[test]
1086    fn test_fstring_unescaped_newline() {
1087        let source = r#"f"""
1088{x}""""#;
1089        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1090        insta::assert_debug_snapshot!(parse_ast);
1091    }
1092
1093    #[test]
1094    fn test_fstring_escaped_character() {
1095        let source = r#"f"\\{x}""#;
1096        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1097        insta::assert_debug_snapshot!(parse_ast);
1098    }
1099
1100    #[test]
1101    fn test_raw_fstring() {
1102        let source = r#"rf"{x}""#;
1103        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1104        insta::assert_debug_snapshot!(parse_ast);
1105    }
1106
1107    #[test]
1108    fn test_triple_quoted_raw_fstring() {
1109        let source = r#"rf"""{x}""""#;
1110        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1111        insta::assert_debug_snapshot!(parse_ast);
1112    }
1113
1114    #[test]
1115    fn test_fstring_line_continuation() {
1116        let source = r#"rf"\
1117{x}""#;
1118        let parse_ast = ast::Suite::parse(source, "<test>").unwrap();
1119        insta::assert_debug_snapshot!(parse_ast);
1120    }
1121
1122    macro_rules! test_aliases_parse {
1123        ($($name:ident: $alias:expr,)*) => {
1124        $(
1125            #[test]
1126            fn $name() {
1127                let source = format!(r#""\N{{{0}}}""#, $alias);
1128                let parse_ast = ast::Suite::parse(&source, "<test>").unwrap();
1129                insta::assert_debug_snapshot!(parse_ast);
1130            }
1131        )*
1132        }
1133    }
1134
1135    test_aliases_parse! {
1136        test_backspace_alias: "BACKSPACE",
1137        test_bell_alias: "BEL",
1138        test_carriage_return_alias: "CARRIAGE RETURN",
1139        test_delete_alias: "DELETE",
1140        test_escape_alias: "ESCAPE",
1141        test_form_feed_alias: "FORM FEED",
1142        test_hts_alias: "HTS",
1143        test_character_tabulation_with_justification_alias: "CHARACTER TABULATION WITH JUSTIFICATION",
1144    }
1145}