prqlc_parser/parser/
mod.rs

1use chumsky::{prelude::*, Stream};
2
3use self::perror::PError;
4use self::pr::{Annotation, Stmt, StmtKind};
5use crate::error::Error;
6use crate::lexer::lr;
7use crate::lexer::lr::TokenKind;
8use crate::span::Span;
9
10mod expr;
11mod interpolation;
12pub(crate) mod perror;
13pub mod pr;
14pub(crate) mod stmt;
15#[cfg(test)]
16mod test;
17mod types;
18
19// Note that `parse_source` is in `prqlc` crate, not in `prqlc-parser` crate,
20// because it logs using the logging framework in `prqlc`.
21
22pub fn parse_lr_to_pr(source_id: u16, lr: Vec<lr::Token>) -> (Option<Vec<pr::Stmt>>, Vec<Error>) {
23    let stream = prepare_stream(lr, source_id);
24    let (pr, parse_errors) = stmt::source().parse_recovery(stream);
25
26    let errors = parse_errors.into_iter().map(|e| e.into()).collect();
27    log::debug!("parse errors: {errors:?}");
28
29    (pr, errors)
30}
31
32/// Convert the output of the lexer into the input of the parser. Requires
33/// supplying the original source code.
34pub(crate) fn prepare_stream<'a>(
35    tokens: Vec<lr::Token>,
36    source_id: u16,
37) -> Stream<'a, lr::TokenKind, Span, impl Iterator<Item = (lr::TokenKind, Span)> + Sized + 'a> {
38    let final_span = tokens.last().map(|t| t.span.end).unwrap_or(0);
39
40    // We don't want comments in the AST (but we do intend to use them as part of
41    // formatting)
42    let semantic_tokens = tokens.into_iter().filter(|token| {
43        !matches!(
44            token.kind,
45            lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_)
46        )
47    });
48
49    let tokens = semantic_tokens
50        .into_iter()
51        .map(move |token| (token.kind, Span::new(source_id, token.span)));
52    let eoi = Span {
53        start: final_span,
54        end: final_span,
55        source_id,
56    };
57    Stream::from_iter(eoi, tokens)
58}
59
60fn ident_part() -> impl Parser<TokenKind, String, Error = PError> + Clone {
61    select! {
62        TokenKind::Ident(ident) => ident,
63    }
64    .map_err(|e: PError| {
65        PError::expected_input_found(
66            e.span(),
67            [Some(TokenKind::Ident("".to_string()))],
68            e.found().cloned(),
69        )
70    })
71}
72
73fn keyword(kw: &'static str) -> impl Parser<TokenKind, (), Error = PError> + Clone {
74    just(TokenKind::Keyword(kw.to_string())).ignored()
75}
76
77/// Our approach to new lines is each item consumes new lines _before_ itself,
78/// but not newlines after itself. This allows us to enforce new lines between
79/// some items. The only place we handle new lines after an item is in the root
80/// parser.
81pub(crate) fn new_line() -> impl Parser<TokenKind, (), Error = PError> + Clone {
82    just(TokenKind::NewLine)
83        // Start is considered a new line, so we can enforce things start on a new
84        // line while allowing them to be at the beginning of a file
85        .or(just(TokenKind::Start))
86        .ignored()
87        .labelled("new line")
88}
89
90fn ctrl(char: char) -> impl Parser<TokenKind, (), Error = PError> + Clone {
91    just(TokenKind::Control(char)).ignored()
92}
93
94fn into_stmt((annotations, kind): (Vec<Annotation>, StmtKind), span: Span) -> Stmt {
95    Stmt {
96        kind,
97        span: Some(span),
98        annotations,
99        doc_comment: None,
100    }
101}
102
103fn doc_comment() -> impl Parser<TokenKind, String, Error = PError> + Clone {
104    // doc comments must start on a new line, so we enforce a new line (which
105    // can also be a file start) before the doc comment
106    //
107    // TODO: we currently lose any empty newlines between doc comments;
108    // eventually we want to retain or restrict them
109    (new_line().repeated().at_least(1).ignore_then(select! {
110        TokenKind::DocComment(dc) => dc,
111    }))
112    .repeated()
113    .at_least(1)
114    .collect()
115    .map(|lines: Vec<String>| lines.join("\n"))
116    .labelled("doc comment")
117}
118
119fn with_doc_comment<'a, P, O>(parser: P) -> impl Parser<TokenKind, O, Error = PError> + Clone + 'a
120where
121    P: Parser<TokenKind, O, Error = PError> + Clone + 'a,
122    O: SupportsDocComment + 'a,
123{
124    doc_comment()
125        .or_not()
126        .then(parser)
127        .map(|(doc_comment, inner)| inner.with_doc_comment(doc_comment))
128}
129
130/// Allows us to surround a parser by `with_doc_comment` and for a doc comment
131/// to be added to the result, as long as the result implements `SupportsDocComment`.
132///
133/// (In retrospect, we could manage without it, though probably not worth the
134/// effort to remove it. We could also use it to also support Span items.)
135trait SupportsDocComment {
136    fn with_doc_comment(self, doc_comment: Option<String>) -> Self;
137}
138
139/// Parse a sequence, allowing commas and new lines between items. Doesn't
140/// include the surrounding delimiters.
141fn sequence<'a, P, O>(parser: P) -> impl Parser<TokenKind, Vec<O>, Error = PError> + Clone + 'a
142where
143    P: Parser<TokenKind, O, Error = PError> + Clone + 'a,
144    O: 'a,
145{
146    parser
147        .separated_by(ctrl(',').then_ignore(new_line().repeated()))
148        .allow_trailing()
149        // Note because we pad rather than only take the ending new line, we
150        // can't put items that require a new line in a tuple, like:
151        //
152        // ```
153        // {
154        //   !# doc comment
155        //   a,
156        // }
157        // ```
158        // ...but I'm not sure there's a way around it, since we do need to
159        // consume newlines in tuples...
160        .padded_by(new_line().repeated())
161}
162
163fn pipe() -> impl Parser<TokenKind, (), Error = PError> + Clone {
164    ctrl('|')
165        .ignored()
166        .or(new_line().repeated().at_least(1).ignored())
167}
168
169#[cfg(test)]
170mod tests {
171    use insta::assert_debug_snapshot;
172
173    use super::*;
174    use crate::test::parse_with_parser;
175
176    #[test]
177    fn test_doc_comment() {
178        assert_debug_snapshot!(parse_with_parser(r#"
179        #! doc comment
180        #! another line
181
182        "#, doc_comment()), @r#"
183        Ok(
184            " doc comment\n another line",
185        )
186        "#);
187    }
188
189    #[test]
190    fn test_doc_comment_or_not() {
191        assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not()).unwrap(), @"None");
192        assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not().then_ignore(new_line().repeated()).then(ident_part())).unwrap(), @r#"
193        (
194            None,
195            "hello",
196        )
197        "#);
198    }
199
200    #[cfg(test)]
201    impl SupportsDocComment for String {
202        fn with_doc_comment(self, _doc_comment: Option<String>) -> Self {
203            self
204        }
205    }
206
207    #[test]
208    fn test_no_doc_comment_in_with_doc_comment() {
209        assert_debug_snapshot!(parse_with_parser(r#"hello"#, with_doc_comment(new_line().ignore_then(ident_part()))).unwrap(), @r#""hello""#);
210    }
211}