prqlc_parser/parser/
mod.rs

1use chumsky;
2use chumsky::input::BorrowInput;
3use chumsky::prelude::*;
4use chumsky::span::SimpleSpan;
5
6use self::pr::{Annotation, Stmt, StmtKind};
7use crate::error::Error;
8use crate::lexer::lr;
9use crate::lexer::lr::TokenKind;
10use crate::span::Span;
11
12// Type alias for parser error type to reduce verbosity
13pub(crate) type ParserError<'a> = extra::Err<Rich<'a, lr::Token, Span>>;
14
15mod expr;
16mod interpolation;
17pub(crate) mod perror;
18pub mod pr;
19pub(crate) mod stmt;
20#[cfg(test)]
21mod test;
22mod types;
23
24// Note that `parse_source` is in `prqlc` crate, not in `prqlc-parser` crate,
25// because it logs using the logging framework in `prqlc`.
26
27pub fn parse_lr_to_pr(source_id: u16, lr: Vec<lr::Token>) -> (Option<Vec<pr::Stmt>>, Vec<Error>) {
28    // Filter out comments - we don't want them in the AST
29    let semantic_tokens: Vec<_> = lr
30        .into_iter()
31        .filter(|token| {
32            !matches!(
33                token.kind,
34                lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_)
35            )
36        })
37        .collect();
38
39    // Use built-in Input impl for &[Token], then map_span to convert token indices to byte spans
40    let input = semantic_tokens
41        .as_slice()
42        .map_span(|simple_span: SimpleSpan| {
43            let start_idx = simple_span.start();
44            let end_idx = simple_span.end();
45
46            // Convert token indices to byte offsets in the source file
47            let start = semantic_tokens
48                .get(start_idx)
49                .map(|t| t.span.start)
50                .unwrap_or(0);
51            let end = semantic_tokens
52                .get(end_idx.saturating_sub(1))
53                .map(|t| t.span.end)
54                .unwrap_or(start);
55
56            Span {
57                start,
58                end,
59                source_id,
60            }
61        });
62
63    let parse_result = stmt::source().parse(input);
64    let (pr, parse_errors) = parse_result.into_output_errors();
65
66    let errors = parse_errors.into_iter().map(|e| e.into()).collect();
67    log::debug!("parse errors: {errors:?}");
68
69    (pr, errors)
70}
71
72fn ident_part<'a, I>() -> impl Parser<'a, I, String, ParserError<'a>> + Clone
73where
74    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
75{
76    select_ref! {
77        lr::Token { kind: TokenKind::Ident(ident), .. } => ident.clone(),
78    }
79}
80
81fn keyword<'a, I>(kw: &'static str) -> impl Parser<'a, I, (), ParserError<'a>> + Clone
82where
83    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
84{
85    select_ref! {
86        lr::Token { kind: TokenKind::Keyword(k), .. } if k == kw => (),
87    }
88}
89
90/// Our approach to new lines is each item consumes new lines _before_ itself,
91/// but not newlines after itself. This allows us to enforce new lines between
92/// some items. The only place we handle new lines after an item is in the root
93/// parser.
94pub(crate) fn new_line<'a, I>() -> impl Parser<'a, I, (), ParserError<'a>> + Clone
95where
96    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
97{
98    select_ref! {
99        lr::Token { kind: TokenKind::NewLine, .. } => (),
100        lr::Token { kind: TokenKind::Start, .. } => (),
101    }
102    .labelled("new line")
103}
104
105fn ctrl<'a, I>(char: char) -> impl Parser<'a, I, (), ParserError<'a>> + Clone
106where
107    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
108{
109    select_ref! {
110        lr::Token { kind: TokenKind::Control(c), .. } if *c == char => (),
111    }
112}
113
114fn into_stmt((annotations, kind): (Vec<Annotation>, StmtKind), span: Span) -> Stmt {
115    Stmt {
116        kind,
117        span: Some(span),
118        annotations,
119        doc_comment: None,
120    }
121}
122
123fn doc_comment<'a, I>() -> impl Parser<'a, I, String, ParserError<'a>> + Clone
124where
125    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
126{
127    // doc comments must start on a new line, so we enforce a new line (which
128    // can also be a file start) before the doc comment
129    //
130    // TODO: we currently lose any empty newlines between doc comments;
131    // eventually we want to retain or restrict them
132    (new_line().repeated().at_least(1).ignore_then(select_ref! {
133        lr::Token { kind: TokenKind::DocComment(dc), .. } => dc.clone(),
134    }))
135    .repeated()
136    .at_least(1)
137    .collect()
138    .map(|lines: Vec<String>| lines.join("\n"))
139    .labelled("doc comment")
140}
141
142fn with_doc_comment<'a, I, P, O>(parser: P) -> impl Parser<'a, I, O, ParserError<'a>> + Clone + 'a
143where
144    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
145    P: Parser<'a, I, O, ParserError<'a>> + Clone + 'a,
146    O: SupportsDocComment + 'a,
147{
148    doc_comment()
149        .or_not()
150        .then(parser)
151        .map(|(doc_comment, inner)| inner.with_doc_comment(doc_comment))
152}
153
154/// Allows us to surround a parser by `with_doc_comment` and for a doc comment
155/// to be added to the result, as long as the result implements `SupportsDocComment`.
156///
157/// (In retrospect, we could manage without it, though probably not worth the
158/// effort to remove it. We could also use it to also support Span items.)
159trait SupportsDocComment {
160    fn with_doc_comment(self, doc_comment: Option<String>) -> Self;
161}
162
163/// Parse a sequence, allowing commas and new lines between items. Doesn't
164/// include the surrounding delimiters.
165fn sequence<'a, I, P, O>(parser: P) -> impl Parser<'a, I, Vec<O>, ParserError<'a>> + Clone + 'a
166where
167    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
168    P: Parser<'a, I, O, ParserError<'a>> + Clone + 'a,
169    O: 'a,
170{
171    parser
172        .separated_by(ctrl(',').then_ignore(new_line().repeated()))
173        .allow_trailing()
174        .collect()
175        // Note because we pad rather than only take the ending new line, we
176        // can't put items that require a new line in a tuple, like:
177        //
178        // ```
179        // {
180        //   !# doc comment
181        //   a,
182        // }
183        // ```
184        // ...but I'm not sure there's a way around it, since we do need to
185        // consume newlines in tuples...
186        .padded_by(new_line().repeated())
187}
188
189fn pipe<'a, I>() -> impl Parser<'a, I, (), ParserError<'a>> + Clone
190where
191    I: Input<'a, Token = lr::Token, Span = Span> + BorrowInput<'a>,
192{
193    ctrl('|')
194        .ignored()
195        .or(new_line().repeated().at_least(1).ignored())
196}
197
198#[cfg(test)]
199mod tests {
200    use insta::assert_debug_snapshot;
201
202    use super::*;
203    use crate::error::Error;
204
205    fn parse_doc_comment(source: &str) -> Result<String, Vec<Error>> {
206        let tokens = crate::lexer::lex_source(source)?;
207        let semantic_tokens: Vec<_> = tokens
208            .0
209            .into_iter()
210            .filter(|token| {
211                !matches!(
212                    token.kind,
213                    crate::lexer::lr::TokenKind::Comment(_)
214                        | crate::lexer::lr::TokenKind::LineWrap(_)
215                )
216            })
217            .collect();
218
219        let input = semantic_tokens
220            .as_slice()
221            .map_span(|simple_span: SimpleSpan| {
222                let start_idx = simple_span.start();
223                let end_idx = simple_span.end();
224
225                let start = semantic_tokens
226                    .get(start_idx)
227                    .map(|t| t.span.start)
228                    .unwrap_or(0);
229                let end = semantic_tokens
230                    .get(end_idx.saturating_sub(1))
231                    .map(|t| t.span.end)
232                    .unwrap_or(start);
233
234                Span {
235                    start,
236                    end,
237                    source_id: 0,
238                }
239            });
240
241        let parser = doc_comment()
242            .then_ignore(new_line().repeated())
243            .then_ignore(end());
244        let (ast, errors) = parser.parse(input).into_output_errors();
245
246        if !errors.is_empty() {
247            return Err(errors.into_iter().map(Into::into).collect());
248        }
249        Ok(ast.unwrap())
250    }
251
252    #[test]
253    fn test_doc_comment() {
254        assert_debug_snapshot!(parse_doc_comment(r#"
255        #! doc comment
256        #! another line
257
258        "#), @r#"
259        Ok(
260            " doc comment\n another line",
261        )
262        "#);
263    }
264
265    // Doc comment functionality is tested in stmt.rs tests
266
267    #[cfg(test)]
268    impl SupportsDocComment for String {
269        fn with_doc_comment(self, _doc_comment: Option<String>) -> Self {
270            self
271        }
272    }
273}