prqlc_parser/parser/
mod.rs

1use chumsky::{prelude::*, Stream};
2
3use self::perror::PError;
4use self::pr::{Annotation, Stmt, StmtKind};
5use crate::error::Error;
6use crate::lexer::lr;
7use crate::lexer::lr::TokenKind;
8use crate::span::Span;
9
10mod expr;
11mod interpolation;
12pub(crate) mod perror;
13pub mod pr;
14pub(crate) mod stmt;
15#[cfg(test)]
16mod test;
17mod types;
18
19// Note that `parse_source` is in `prqlc` crate, not in `prqlc-parser` crate,
20// because it logs using the logging framework in `prqlc`.
21
22pub fn parse_lr_to_pr(source_id: u16, lr: Vec<lr::Token>) -> (Option<Vec<pr::Stmt>>, Vec<Error>) {
23    let stream = prepare_stream(lr, source_id);
24    let (pr, parse_errors) = stmt::source().parse_recovery(stream);
25
26    let errors = parse_errors.into_iter().map(|e| e.into()).collect();
27    log::debug!("parse errors: {errors:?}");
28
29    (pr, errors)
30}
31
32/// Convert the output of the lexer into the input of the parser. Requires
33/// supplying the original source code.
34pub(crate) fn prepare_stream<'a>(
35    tokens: Vec<lr::Token>,
36    source_id: u16,
37) -> Stream<'a, lr::TokenKind, Span, impl Iterator<Item = (lr::TokenKind, Span)> + Sized + 'a> {
38    let final_span = tokens.last().map(|t| t.span.end).unwrap_or(0);
39
40    // We don't want comments in the AST (but we do intend to use them as part of
41    // formatting)
42    let semantic_tokens = tokens.into_iter().filter(|token| {
43        !matches!(
44            token.kind,
45            lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_)
46        )
47    });
48
49    let tokens = semantic_tokens
50        .into_iter()
51        .map(move |token| (token.kind, Span::new(source_id, token.span)));
52    let eoi = Span {
53        start: final_span,
54        end: final_span,
55        source_id,
56    };
57    Stream::from_iter(eoi, tokens)
58}
59
60fn ident_part() -> impl Parser<TokenKind, String, Error = PError> + Clone {
61    select! {
62        TokenKind::Ident(ident) => ident,
63        TokenKind::Keyword(ident) if &ident == "module" => ident,
64    }
65    .map_err(|e: PError| {
66        PError::expected_input_found(
67            e.span(),
68            [Some(TokenKind::Ident("".to_string()))],
69            e.found().cloned(),
70        )
71    })
72}
73
74fn keyword(kw: &'static str) -> impl Parser<TokenKind, (), Error = PError> + Clone {
75    just(TokenKind::Keyword(kw.to_string())).ignored()
76}
77
78/// Our approach to new lines is each item consumes new lines _before_ itself,
79/// but not newlines after itself. This allows us to enforce new lines between
80/// some items. The only place we handle new lines after an item is in the root
81/// parser.
82pub(crate) fn new_line() -> impl Parser<TokenKind, (), Error = PError> + Clone {
83    just(TokenKind::NewLine)
84        // Start is considered a new line, so we can enforce things start on a new
85        // line while allowing them to be at the beginning of a file
86        .or(just(TokenKind::Start))
87        .ignored()
88        .labelled("new line")
89}
90
91fn ctrl(char: char) -> impl Parser<TokenKind, (), Error = PError> + Clone {
92    just(TokenKind::Control(char)).ignored()
93}
94
95fn into_stmt((annotations, kind): (Vec<Annotation>, StmtKind), span: Span) -> Stmt {
96    Stmt {
97        kind,
98        span: Some(span),
99        annotations,
100        doc_comment: None,
101    }
102}
103
104fn doc_comment() -> impl Parser<TokenKind, String, Error = PError> + Clone {
105    // doc comments must start on a new line, so we enforce a new line (which
106    // can also be a file start) before the doc comment
107    //
108    // TODO: we currently lose any empty newlines between doc comments;
109    // eventually we want to retain or restrict them
110    (new_line().repeated().at_least(1).ignore_then(select! {
111        TokenKind::DocComment(dc) => dc,
112    }))
113    .repeated()
114    .at_least(1)
115    .collect()
116    .map(|lines: Vec<String>| lines.join("\n"))
117    .labelled("doc comment")
118}
119
120fn with_doc_comment<'a, P, O>(parser: P) -> impl Parser<TokenKind, O, Error = PError> + Clone + 'a
121where
122    P: Parser<TokenKind, O, Error = PError> + Clone + 'a,
123    O: SupportsDocComment + 'a,
124{
125    doc_comment()
126        .or_not()
127        .then(parser)
128        .map(|(doc_comment, inner)| inner.with_doc_comment(doc_comment))
129}
130
131/// Allows us to surround a parser by `with_doc_comment` and for a doc comment
132/// to be added to the result, as long as the result implements `SupportsDocComment`.
133///
134/// (In retrospect, we could manage without it, though probably not worth the
135/// effort to remove it. We could also use it to also support Span items.)
136trait SupportsDocComment {
137    fn with_doc_comment(self, doc_comment: Option<String>) -> Self;
138}
139
140/// Parse a sequence, allowing commas and new lines between items. Doesn't
141/// include the surrounding delimiters.
142fn sequence<'a, P, O>(parser: P) -> impl Parser<TokenKind, Vec<O>, Error = PError> + Clone + 'a
143where
144    P: Parser<TokenKind, O, Error = PError> + Clone + 'a,
145    O: 'a,
146{
147    parser
148        .separated_by(ctrl(',').then_ignore(new_line().repeated()))
149        .allow_trailing()
150        // Note because we pad rather than only take the ending new line, we
151        // can't put items that require a new line in a tuple, like:
152        //
153        // ```
154        // {
155        //   !# doc comment
156        //   a,
157        // }
158        // ```
159        // ...but I'm not sure there's a way around it, since we do need to
160        // consume newlines in tuples...
161        .padded_by(new_line().repeated())
162}
163
164fn pipe() -> impl Parser<TokenKind, (), Error = PError> + Clone {
165    ctrl('|')
166        .ignored()
167        .or(new_line().repeated().at_least(1).ignored())
168}
169
170#[cfg(test)]
171mod tests {
172    use insta::assert_debug_snapshot;
173
174    use super::*;
175    use crate::test::parse_with_parser;
176
177    #[test]
178    fn test_doc_comment() {
179        assert_debug_snapshot!(parse_with_parser(r#"
180        #! doc comment
181        #! another line
182
183        "#, doc_comment()), @r#"
184        Ok(
185            " doc comment\n another line",
186        )
187        "#);
188    }
189
190    #[test]
191    fn test_doc_comment_or_not() {
192        assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not()).unwrap(), @"None");
193        assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not().then_ignore(new_line().repeated()).then(ident_part())).unwrap(), @r#"
194        (
195            None,
196            "hello",
197        )
198        "#);
199    }
200
201    #[cfg(test)]
202    impl SupportsDocComment for String {
203        fn with_doc_comment(self, _doc_comment: Option<String>) -> Self {
204            self
205        }
206    }
207
208    #[test]
209    fn test_no_doc_comment_in_with_doc_comment() {
210        assert_debug_snapshot!(parse_with_parser(r#"hello"#, with_doc_comment(new_line().ignore_then(ident_part()))).unwrap(), @r#""hello""#);
211    }
212}