litcheck_filecheck/parse/
parser.rs

1/// Simple macro used in the grammar definition for constructing spans
2macro_rules! span {
3    ($l:expr, $r:expr) => {
4        litcheck::diagnostics::SourceSpan::from($l..$r)
5    };
6    ($i:expr) => {
7        litcheck::diagnostics::SourceSpan::from($i..$i)
8    };
9}
10
11lalrpop_util::lalrpop_mod!(
12    #[allow(clippy::all)]
13    grammar,
14    "/parse/grammar.rs"
15);
16
17use litcheck::{
18    diagnostics::{SourceFile, SourceSpan, Span, Spanned},
19    StringInterner,
20};
21use std::borrow::Cow;
22
23use super::{Lexed, Lexer, ParseError, ParseResult, ParserError, Token};
24use crate::{ast::*, Config};
25
26macro_rules! lex {
27    ($lexer:ident) => {
28        match $lexer.next().transpose()? {
29            Some((_, Token::Error(error), _)) => return Err(ParserError::from(error)),
30            lexed => lexed,
31        }
32    };
33}
34
35macro_rules! expected_token {
36    ($expected_ty:ident :: $expected_variant:ident ($expected_pat:pat)) => {
37        $expected_ty::$expected_variant(Default::default()).to_string()
38    };
39
40    ($expected_ty:ident :: $expected_variant:ident) => {
41        $expected_ty::$expected_variant.to_string()
42    };
43}
44
45macro_rules! expect {
46    ($lexer:ident, $($expected_ty:ident :: $expected_variant:ident $(($expected_pat:pat))?)|+) => {
47        match lex!($lexer) {
48            $(
49                Some(lexed @ (_, $expected_ty :: $expected_variant $(($expected_pat))?, _)) => lexed,
50            )*
51            Some((start, token, end)) => {
52                return Err(ParserError::UnrecognizedToken {
53                    span: SourceSpan::from(start..end),
54                    token: token.to_string(),
55                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
56                });
57            }
58            None => {
59                return Err(ParserError::UnrecognizedEof {
60                    span: $lexer.current_offset(),
61                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
62                });
63            }
64        }
65    };
66}
67
68macro_rules! expect_literal {
69    ($lexer:ident) => {
70        match lex!($lexer) {
71            Some((start, Token::Raw(raw), end)) => Span::new(SourceSpan::from(start..end), raw),
72            Some((start, token, end)) => {
73                return Err(ParserError::UnrecognizedToken {
74                    span: SourceSpan::from(start..end),
75                    token: token.to_string(),
76                    expected: vec!["literal".to_string()],
77                });
78            }
79            None => {
80                return Err(ParserError::UnrecognizedEof {
81                    span: $lexer.current_offset(),
82                    expected: vec!["literal".to_string()],
83                });
84            }
85        }
86    };
87}
88
89macro_rules! expect_ignore {
90    ($lexer:ident, $($expected_ty:ident :: $expected_variant:ident $(($expected_pat:pat))?)|+) => {
91        match lex!($lexer) {
92            Some((start, token, end)) => {
93                if !matches!(token, $($expected_ty :: $expected_variant $(($expected_pat))?)|*) {
94                    return Err(ParserError::UnrecognizedToken {
95                        span: SourceSpan::from(start..end),
96                        token: token.to_string(),
97                        expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
98                    });
99                }
100            }
101            None => {
102                return Err(ParserError::UnrecognizedEof {
103                    span: $lexer.current_offset(),
104                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
105                });
106            }
107        }
108    }
109}
110
111pub struct CheckFileParser<'config> {
112    config: &'config Config,
113    pub interner: &'config mut StringInterner,
114}
115impl<'config> CheckFileParser<'config> {
116    pub fn new(config: &'config Config, interner: &'config mut StringInterner) -> Self {
117        Self { config, interner }
118    }
119
120    pub fn parse<'a, S>(&mut self, code: &'a S) -> ParseResult<CheckFile<'a>>
121    where
122        S: SourceFile + ?Sized + 'a,
123    {
124        let source = code.source();
125        let mut lexer = Lexer::<'a>::new(
126            code,
127            &self.config.check_prefixes,
128            &self.config.comment_prefixes,
129        );
130        let mut comment = vec![];
131        let mut lines = vec![];
132        while let Some(lexed) = lexer.next() {
133            let (start, token, end) = lexed?;
134            match token {
135                Token::Comment(s) => {
136                    comment.push(s);
137                    continue;
138                }
139                Token::Check(ty) => {
140                    let mut line =
141                        self.parse_check(Span::new(start..end, ty), source, &mut lexer)?;
142                    line.comment.append(&mut comment);
143                    lines.push(line);
144                }
145                Token::Lf => continue,
146                Token::Error(err) => return Err(ParserError::from(err)),
147                token => {
148                    return Err(ParserError::ExtraToken {
149                        span: SourceSpan::from(start..end),
150                        token: token.to_string(),
151                    })
152                }
153            }
154        }
155
156        let unused_prefixes = lexer.unused_prefixes();
157        if !unused_prefixes.is_empty() && !self.config.allow_unused_prefixes {
158            return Err(ParserError::UnusedCheckPrefixes(unused_prefixes));
159        }
160
161        Ok(CheckFile::new(lines))
162    }
163
164    fn parse_check<'a>(
165        &mut self,
166        ty: Span<Check>,
167        source: &'a str,
168        lexer: &mut Lexer<'a>,
169    ) -> ParseResult<CheckLine<'a>> {
170        let line_start = ty.start();
171        let check_end = ty.end();
172
173        // Modifiers (optional)
174        let mut modifiers = CheckModifier::default();
175        let mut modifier_start = ty.end();
176        let mut modifier_end = modifier_start;
177        while let (start, Token::Modifier(modifier), end) =
178            expect!(lexer, Token::Modifier(_) | Token::Colon)
179        {
180            modifier_start = start;
181            modifier_end = end;
182            modifiers |= modifier;
183        }
184        let modifiers = Span::new(modifier_start..modifier_end, modifiers);
185
186        // CheckType
187        let ty_span = ty.range();
188        let ty = ty.into_inner();
189        let ty = CheckType::new(ty_span.into(), ty).with_modifiers(modifiers);
190
191        // CheckPattern
192        if modifiers.contains(CheckModifier::LITERAL) {
193            match expect!(lexer, Token::Raw(_) | Token::Lf) {
194                (start, Token::Raw(pattern), end) => {
195                    let pattern = Span::new(start..end, pattern);
196                    Ok(CheckLine::new(
197                        SourceSpan::from(line_start..end),
198                        ty,
199                        CheckPattern::Literal(pattern.map(Cow::Borrowed)),
200                    ))
201                }
202                (_, _, end) if matches!(ty.kind, Check::Empty) => Ok(CheckLine::new(
203                    SourceSpan::from(line_start..end),
204                    ty,
205                    CheckPattern::Empty(SourceSpan::from(line_start..end)),
206                )),
207                (_, _, end) => {
208                    // Expected a non-empty pattern
209                    Err(ParserError::EmptyPattern {
210                        span: SourceSpan::from(line_start..end),
211                    })
212                }
213            }
214        } else {
215            let mut parts = vec![];
216            loop {
217                match lexer.peek() {
218                    Some(Token::MatchStart) => {
219                        let part = self.parse_match_block(source, lexer)?;
220                        parts.push(part);
221                    }
222                    Some(Token::RegexStart) => {
223                        expect_ignore!(lexer, Token::RegexStart);
224                        let pattern = expect_literal!(lexer);
225                        parts.push(CheckPatternPart::Regex(RegexPattern::new(
226                            pattern.map(Cow::Borrowed),
227                        )));
228                        expect_ignore!(lexer, Token::RegexEnd);
229                    }
230                    Some(Token::Raw(_)) => {
231                        let pattern = expect_literal!(lexer);
232                        parts.push(CheckPatternPart::Literal(pattern.map(Cow::Borrowed)));
233                    }
234                    Some(Token::Lf) => {
235                        expect_ignore!(lexer, Token::Lf);
236                        break;
237                    }
238                    Some(_) => {
239                        let (start, token, end) = lex!(lexer).unwrap();
240                        return Err(ParserError::UnrecognizedToken {
241                            span: SourceSpan::from(start..end),
242                            token: token.to_string(),
243                            expected: vec![
244                                "literal".to_string(),
245                                "[[".to_string(),
246                                "{{".to_string(),
247                                Token::Lf.to_string(),
248                            ],
249                        });
250                    }
251                    None => {
252                        break;
253                    }
254                }
255            }
256
257            match parts.len() {
258                0 if matches!(ty.kind, Check::Empty) => Ok(CheckLine::new(
259                    SourceSpan::from(line_start..check_end),
260                    ty,
261                    CheckPattern::Empty(SourceSpan::from(line_start..check_end)),
262                )),
263                0 => Err(ParserError::EmptyPattern {
264                    span: SourceSpan::from(line_start..check_end),
265                }),
266                1 => {
267                    let pattern = match parts.pop().unwrap() {
268                        CheckPatternPart::Literal(literal) => CheckPattern::Literal(literal),
269                        CheckPatternPart::Regex(regex) => CheckPattern::Regex(regex),
270                        part @ CheckPatternPart::Match(_) => {
271                            let span = part.span();
272                            parts.push(part);
273                            CheckPattern::Match(Span::new(span, parts))
274                        }
275                    };
276                    Ok(CheckLine::new(
277                        SourceSpan::from(line_start..pattern.end()),
278                        ty,
279                        pattern,
280                    ))
281                }
282                _ => {
283                    let start = parts.first().unwrap().start();
284                    let end = parts.last().unwrap().end();
285                    Ok(CheckLine::new(
286                        SourceSpan::from(line_start..end),
287                        ty,
288                        CheckPattern::Match(Span::new(start..end, parts)),
289                    ))
290                }
291            }
292        }
293    }
294
295    fn parse_match_block<'a>(
296        &mut self,
297        source: &'a str,
298        lexer: &mut Lexer<'a>,
299    ) -> ParseResult<CheckPatternPart<'a>> {
300        let mut tokens: Vec<Lexed<'a>> = vec![];
301        tokens.push(Ok(expect!(lexer, Token::MatchStart)));
302
303        loop {
304            if matches!(lexer.peek(), Some(Token::MatchEnd)) {
305                break;
306            }
307            match lex!(lexer) {
308                Some((start, Token::Lf, end)) => {
309                    return Err(ParserError::UnrecognizedToken {
310                        span: SourceSpan::from(start..end),
311                        token: Token::Lf.to_string(),
312                        expected: vec!["]]".to_string()],
313                    });
314                }
315                Some(token) => {
316                    tokens.push(Ok(token));
317                }
318                None => {
319                    return Err(ParserError::UnrecognizedEof {
320                        span: lexer.current_offset(),
321                        expected: vec!["]]".to_string()],
322                    });
323                }
324            }
325        }
326
327        let (match_end_start, match_end, match_end_end) = expect!(lexer, Token::MatchEnd);
328
329        if tokens.len() == 1 {
330            return Err(ParserError::UnrecognizedToken {
331                span: SourceSpan::from(match_end_start..match_end_end),
332                token: match_end.to_string(),
333                expected: vec!["a non-empty match expression".to_string()],
334            });
335        }
336
337        tokens.push(Ok((match_end_start, match_end, match_end_end)));
338
339        Ok(CheckPatternPart::Match(self.parse_match(source, tokens)?))
340    }
341
342    fn parse_match<'a>(
343        &mut self,
344        source: &'a str,
345        tokens: Vec<Lexed<'a>>,
346    ) -> ParseResult<Match<'a>> {
347        let lexer = tokens.into_iter();
348        grammar::MatchParser::new()
349            .parse(source, self.interner, lexer)
350            .map_err(handle_parse_error)
351    }
352}
353
354fn handle_parse_error(err: ParseError) -> ParserError {
355    match err {
356        ParseError::InvalidToken { location: at } => ParserError::InvalidToken {
357            span: SourceSpan::from(at..at),
358        },
359        ParseError::UnrecognizedToken {
360            token: (l, tok, r),
361            expected,
362        } => ParserError::UnrecognizedToken {
363            span: SourceSpan::from(l..r),
364            token: tok.to_string(),
365            expected,
366        },
367        ParseError::ExtraToken { token: (l, tok, r) } => ParserError::ExtraToken {
368            span: SourceSpan::from(l..r),
369            token: tok.to_string(),
370        },
371        ParseError::UnrecognizedEof {
372            location: at,
373            expected,
374        } => ParserError::UnrecognizedEof {
375            span: SourceSpan::from(at..at),
376            expected,
377        },
378        ParseError::User { error } => error,
379    }
380}