litcheck_filecheck/parse/
parser.rs

1/// Simple macro used in the grammar definition for constructing spans
2macro_rules! span {
3    ($l:expr, $r:expr) => {
4        litcheck::diagnostics::SourceSpan::from($l..$r)
5    };
6    ($i:expr) => {
7        litcheck::diagnostics::SourceSpan::from($i..$i)
8    };
9}
10
11lalrpop_util::lalrpop_mod!(
12    #[allow(clippy::all)]
13    grammar,
14    "/parse/grammar.rs"
15);
16
17use litcheck::{
18    diagnostics::{SourceFile, SourceSpan, Span, Spanned},
19    StringInterner,
20};
21use std::borrow::Cow;
22
23use super::{Lexed, Lexer, ParseError, ParseResult, ParserError, Token};
24use crate::{ast::*, Config};
25
26macro_rules! lex {
27    ($lexer:ident) => {
28        match $lexer.next().transpose()? {
29            Some((_, Token::Error(error), _)) => return Err(ParserError::from(error)),
30            lexed => lexed,
31        }
32    };
33}
34
35macro_rules! expected_token {
36    ($expected_ty:ident :: $expected_variant:ident ($expected_pat:pat)) => {
37        $expected_ty::$expected_variant(Default::default()).to_string()
38    };
39
40    ($expected_ty:ident :: $expected_variant:ident) => {
41        $expected_ty::$expected_variant.to_string()
42    };
43}
44
45macro_rules! expect {
46    ($lexer:ident, $($expected_ty:ident :: $expected_variant:ident $(($expected_pat:pat))?)|+) => {
47        match lex!($lexer) {
48            $(
49                Some(lexed @ (_, $expected_ty :: $expected_variant $(($expected_pat))?, _)) => lexed,
50            )*
51            Some((start, token, end)) => {
52                return Err(ParserError::UnrecognizedToken {
53                    span: SourceSpan::from(start..end),
54                    token: token.to_string(),
55                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
56                });
57            }
58            None => {
59                return Err(ParserError::UnrecognizedEof {
60                    span: $lexer.current_offset(),
61                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
62                });
63            }
64        }
65    };
66}
67
68macro_rules! expect_literal {
69    ($lexer:ident) => {
70        match lex!($lexer) {
71            Some((start, Token::Raw(raw), end)) => Span::new(SourceSpan::from(start..end), raw),
72            Some((start, token, end)) => {
73                return Err(ParserError::UnrecognizedToken {
74                    span: SourceSpan::from(start..end),
75                    token: token.to_string(),
76                    expected: vec!["literal".to_string()],
77                });
78            }
79            None => {
80                return Err(ParserError::UnrecognizedEof {
81                    span: $lexer.current_offset(),
82                    expected: vec!["literal".to_string()],
83                });
84            }
85        }
86    };
87}
88
89macro_rules! expect_ignore {
90    ($lexer:ident, $($expected_ty:ident :: $expected_variant:ident $(($expected_pat:pat))?)|+) => {
91        match lex!($lexer) {
92            Some((start, token, end)) => {
93                if !matches!(token, $($expected_ty :: $expected_variant $(($expected_pat))?)|*) {
94                    return Err(ParserError::UnrecognizedToken {
95                        span: SourceSpan::from(start..end),
96                        token: token.to_string(),
97                        expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
98                    });
99                }
100            }
101            None => {
102                return Err(ParserError::UnrecognizedEof {
103                    span: $lexer.current_offset(),
104                    expected: vec![$(expected_token!($expected_ty :: $expected_variant $(($expected_pat))?)),*],
105                });
106            }
107        }
108    }
109}
110
111pub struct CheckFileParser<'config> {
112    config: &'config Config,
113    pub interner: &'config mut StringInterner,
114}
115impl<'config> CheckFileParser<'config> {
116    pub fn new(config: &'config Config, interner: &'config mut StringInterner) -> Self {
117        Self { config, interner }
118    }
119
120    pub fn parse<'a, S>(&mut self, code: &'a S) -> ParseResult<CheckFile<'a>>
121    where
122        S: SourceFile + ?Sized + 'a,
123    {
124        let source = code.source();
125        let mut lexer = Lexer::<'a>::new(code, self.config);
126        let mut comment = vec![];
127        let mut lines = vec![];
128        while let Some(lexed) = lexer.next() {
129            let (start, token, end) = lexed?;
130            match token {
131                Token::Comment(s) => {
132                    comment.push(s);
133                    continue;
134                }
135                Token::Check(ty) => {
136                    let mut line =
137                        self.parse_check(Span::new(start..end, ty), source, &mut lexer)?;
138                    line.comment.append(&mut comment);
139                    lines.push(line);
140                }
141                Token::Lf => continue,
142                Token::Error(err) => return Err(ParserError::from(err)),
143                token => {
144                    return Err(ParserError::ExtraToken {
145                        span: SourceSpan::from(start..end),
146                        token: token.to_string(),
147                    })
148                }
149            }
150        }
151
152        let unused_prefixes = lexer.unused_prefixes();
153        if !unused_prefixes.is_empty() && !self.config.allow_unused_prefixes {
154            return Err(ParserError::UnusedCheckPrefixes(unused_prefixes));
155        }
156
157        Ok(CheckFile::new(lines))
158    }
159
160    fn parse_check<'a>(
161        &mut self,
162        ty: Span<Check>,
163        source: &'a str,
164        lexer: &mut Lexer<'a>,
165    ) -> ParseResult<CheckLine<'a>> {
166        let line_start = ty.start();
167        let check_end = ty.end();
168
169        // Modifiers (optional)
170        let mut modifiers = CheckModifier::default();
171        let mut modifier_start = ty.end();
172        let mut modifier_end = modifier_start;
173        while let (start, Token::Modifier(modifier), end) =
174            expect!(lexer, Token::Modifier(_) | Token::Colon)
175        {
176            modifier_start = start;
177            modifier_end = end;
178            modifiers |= modifier;
179        }
180        let modifiers = Span::new(modifier_start..modifier_end, modifiers);
181
182        // CheckType
183        let ty_span = ty.range();
184        let ty = ty.into_inner();
185        let ty = CheckType::new(ty_span.into(), ty).with_modifiers(modifiers);
186
187        // CheckPattern
188        if modifiers.contains(CheckModifier::LITERAL) {
189            match expect!(lexer, Token::Raw(_) | Token::Lf) {
190                (start, Token::Raw(pattern), end) => {
191                    let pattern = Span::new(start..end, pattern);
192                    Ok(CheckLine::new(
193                        SourceSpan::from(line_start..end),
194                        ty,
195                        CheckPattern::Literal(pattern.map(Cow::Borrowed)),
196                    ))
197                }
198                (_, _, end) if matches!(ty.kind, Check::Empty) => Ok(CheckLine::new(
199                    SourceSpan::from(line_start..end),
200                    ty,
201                    CheckPattern::Empty(SourceSpan::from(line_start..end)),
202                )),
203                (_, _, end) => {
204                    // Expected a non-empty pattern
205                    Err(ParserError::EmptyPattern {
206                        span: SourceSpan::from(line_start..end),
207                    })
208                }
209            }
210        } else {
211            let mut parts = vec![];
212            loop {
213                match lexer.peek() {
214                    Some(Token::MatchStart) => {
215                        let part = self.parse_match_block(source, lexer)?;
216                        parts.push(part);
217                    }
218                    Some(Token::RegexStart) => {
219                        expect_ignore!(lexer, Token::RegexStart);
220                        let pattern = expect_literal!(lexer);
221                        parts.push(CheckPatternPart::Regex(RegexPattern::new(
222                            pattern.map(Cow::Borrowed),
223                        )));
224                        expect_ignore!(lexer, Token::RegexEnd);
225                    }
226                    Some(Token::Raw(_)) => {
227                        let pattern = expect_literal!(lexer);
228                        parts.push(CheckPatternPart::Literal(pattern.map(Cow::Borrowed)));
229                    }
230                    Some(Token::Lf) => {
231                        expect_ignore!(lexer, Token::Lf);
232                        break;
233                    }
234                    Some(_) => {
235                        let (start, token, end) = lex!(lexer).unwrap();
236                        return Err(ParserError::UnrecognizedToken {
237                            span: SourceSpan::from(start..end),
238                            token: token.to_string(),
239                            expected: vec![
240                                "literal".to_string(),
241                                "[[".to_string(),
242                                "{{".to_string(),
243                                Token::Lf.to_string(),
244                            ],
245                        });
246                    }
247                    None => {
248                        break;
249                    }
250                }
251            }
252
253            match parts.len() {
254                0 if matches!(ty.kind, Check::Empty) => Ok(CheckLine::new(
255                    SourceSpan::from(line_start..check_end),
256                    ty,
257                    CheckPattern::Empty(SourceSpan::from(line_start..check_end)),
258                )),
259                0 => Err(ParserError::EmptyPattern {
260                    span: SourceSpan::from(line_start..check_end),
261                }),
262                1 => {
263                    let pattern = match parts.pop().unwrap() {
264                        CheckPatternPart::Literal(literal) => CheckPattern::Literal(literal),
265                        CheckPatternPart::Regex(regex) => CheckPattern::Regex(regex),
266                        part @ CheckPatternPart::Match(_) => {
267                            let span = part.span();
268                            parts.push(part);
269                            CheckPattern::Match(Span::new(span, parts))
270                        }
271                    };
272                    Ok(CheckLine::new(
273                        SourceSpan::from(line_start..pattern.end()),
274                        ty,
275                        pattern,
276                    ))
277                }
278                _ => {
279                    let start = parts.first().unwrap().start();
280                    let end = parts.last().unwrap().end();
281                    Ok(CheckLine::new(
282                        SourceSpan::from(line_start..end),
283                        ty,
284                        CheckPattern::Match(Span::new(start..end, parts)),
285                    ))
286                }
287            }
288        }
289    }
290
291    fn parse_match_block<'a>(
292        &mut self,
293        source: &'a str,
294        lexer: &mut Lexer<'a>,
295    ) -> ParseResult<CheckPatternPart<'a>> {
296        let mut tokens: Vec<Lexed<'a>> = vec![];
297        tokens.push(Ok(expect!(lexer, Token::MatchStart)));
298
299        loop {
300            if matches!(lexer.peek(), Some(Token::MatchEnd)) {
301                break;
302            }
303            match lex!(lexer) {
304                Some((start, Token::Lf, end)) => {
305                    return Err(ParserError::UnrecognizedToken {
306                        span: SourceSpan::from(start..end),
307                        token: Token::Lf.to_string(),
308                        expected: vec!["]]".to_string()],
309                    });
310                }
311                Some(token) => {
312                    tokens.push(Ok(token));
313                }
314                None => {
315                    return Err(ParserError::UnrecognizedEof {
316                        span: lexer.current_offset(),
317                        expected: vec!["]]".to_string()],
318                    });
319                }
320            }
321        }
322
323        let (match_end_start, match_end, match_end_end) = expect!(lexer, Token::MatchEnd);
324
325        if tokens.len() == 1 {
326            return Err(ParserError::UnrecognizedToken {
327                span: SourceSpan::from(match_end_start..match_end_end),
328                token: match_end.to_string(),
329                expected: vec!["a non-empty match expression".to_string()],
330            });
331        }
332
333        tokens.push(Ok((match_end_start, match_end, match_end_end)));
334
335        Ok(CheckPatternPart::Match(self.parse_match(source, tokens)?))
336    }
337
338    fn parse_match<'a>(
339        &mut self,
340        source: &'a str,
341        tokens: Vec<Lexed<'a>>,
342    ) -> ParseResult<Match<'a>> {
343        let lexer = tokens.into_iter();
344        grammar::MatchParser::new()
345            .parse(source, self.interner, lexer)
346            .map_err(handle_parse_error)
347    }
348}
349
350fn handle_parse_error(err: ParseError) -> ParserError {
351    match err {
352        ParseError::InvalidToken { location: at } => ParserError::InvalidToken {
353            span: SourceSpan::from(at..at),
354        },
355        ParseError::UnrecognizedToken {
356            token: (l, tok, r),
357            expected,
358        } => ParserError::UnrecognizedToken {
359            span: SourceSpan::from(l..r),
360            token: tok.to_string(),
361            expected,
362        },
363        ParseError::ExtraToken { token: (l, tok, r) } => ParserError::ExtraToken {
364            span: SourceSpan::from(l..r),
365            token: tok.to_string(),
366        },
367        ParseError::UnrecognizedEof {
368            location: at,
369            expected,
370        } => ParserError::UnrecognizedEof {
371            span: SourceSpan::from(at..at),
372            expected,
373        },
374        ParseError::User { error } => error,
375    }
376}