Skip to main content

ludtwig_parser/
parser.rs

1use std::fmt::Write;
2
3use rowan::GreenNode;
4
5pub use parse_error::ParseError;
6pub use parse_error::ParseErrorBuilder;
7
8use crate::grammar::root;
9use crate::lexer::Token;
10use crate::parser::event::{CompletedMarker, EventCollection, Marker};
11use crate::parser::sink::Sink;
12use crate::parser::source::Source;
13use crate::syntax::untyped::{SyntaxKind, SyntaxNode, debug_tree};
14use crate::{T, lex};
15
16pub(crate) mod event;
17mod parse_error;
18mod sink;
19mod source;
20
21/// Tokens which can lead to parsing of another element
22/// (top level parsers under [`crate::grammar::parse_any_element`])
23pub(crate) static GENERAL_RECOVERY_SET: &[SyntaxKind] = &[
24    T!["{%"],
25    T!["{%-"],
26    T!["{%~"],
27    T!["{{"],
28    T!["{{-"],
29    T!["{{~"],
30    T!["{#"],
31    T!["{#-"],
32    T!["{#~"],
33    T!["<"],
34    T!["<!--"],
35    T!["<!"],
36];
37
38/// All token kinds that open a twig block tag: `{%`, `{%-`, `{%~`
39pub(crate) static TWIG_BLOCK_OPEN_SET: &[SyntaxKind] = &[T!["{%"], T!["{%-"], T!["{%~"]];
40
41/// All token kinds that close a twig block tag: `%}`, `-%}`, `~%}`
42pub(crate) static TWIG_BLOCK_CLOSE_SET: &[SyntaxKind] = &[T!["%}"], T!["-%}"], T!["~%}"]];
43
44/// All token kinds that open a twig var: `{{`, `{{-`, `{{~`
45pub(crate) static TWIG_VAR_OPEN_SET: &[SyntaxKind] = &[T!["{{"], T!["{{-"], T!["{{~"]];
46
47/// All token kinds that close a twig var: `}}`, `-}}`, `~}}`
48pub(crate) static TWIG_VAR_CLOSE_SET: &[SyntaxKind] = &[T!["}}"], T!["-}}"], T!["~}}"]];
49
50/// All token kinds that open a twig comment: `{#`, `{#-`, `{#~`
51pub(crate) static TWIG_COMMENT_OPEN_SET: &[SyntaxKind] = &[T!["{#"], T!["{#-"], T!["{#~"]];
52
53/// All token kinds that close a twig comment: `#}`, `-#}`, `~#}`
54pub(crate) static TWIG_COMMENT_CLOSE_SET: &[SyntaxKind] = &[T!["#}"], T!["-#}"], T!["~#}"]];
55
56/// Parses a given string slice (of Twig+HTML code) into a syntax tree.
57///
58/// ## Example
59/// ```
60/// use ludtwig_parser::syntax::untyped::{debug_tree, SyntaxNode};
61///
62/// let parse = ludtwig_parser::parse("{{ 42 }}");
63/// let (tree_root, errors) = parse.split();
64///
65/// assert_eq!(debug_tree(&tree_root), r##"ROOT@0..8
66///   TWIG_VAR@0..8
67///     TK_OPEN_CURLY_CURLY@0..2 "{{"
68///     TWIG_EXPRESSION@2..5
69///       TWIG_LITERAL_NUMBER@2..5
70///         TK_WHITESPACE@2..3 " "
71///         TK_NUMBER@3..5 "42"
72///     TK_WHITESPACE@5..6 " "
73///     TK_CLOSE_CURLY_CURLY@6..8 "}}""##);
74/// ```
75/// More examples can be found at the
76/// [crate level documentation](crate).
77#[must_use]
78pub fn parse(input_text: &str) -> Parse {
79    let lex_result = lex(input_text);
80    let parser = Parser::new(&lex_result);
81    let (parse_events, parse_errors) = parser.parse();
82    let sink = Sink::new(&lex_result, parse_events, parse_errors);
83    sink.finish()
84}
85
86/// Result of the parser
87pub struct Parse {
88    pub green_node: GreenNode,
89    pub errors: Vec<ParseError>,
90}
91
92impl Parse {
93    /// Split the parse result into a syntax tree root node and
94    /// the list of parse errors
95    #[must_use]
96    pub fn split(self) -> (SyntaxNode, Vec<ParseError>) {
97        let root = SyntaxNode::new_root(self.green_node);
98
99        (root, self.errors)
100    }
101
102    #[must_use]
103    pub fn debug_parse(self) -> String {
104        let syntax_node = SyntaxNode::new_root(self.green_node);
105        let mut s = debug_tree(&syntax_node);
106
107        for error in &self.errors {
108            let _ = write!(s, "\n{error}");
109        }
110
111        s
112    }
113}
114
115#[derive(Debug, Clone, Eq, PartialEq)]
116pub(crate) struct Parser<'source> {
117    source: Source<'source>,
118    event_collection: EventCollection,
119    parse_errors: Vec<ParseError>,
120}
121
122impl<'source> Parser<'source> {
123    pub(crate) fn new(tokens: &'source [Token<'source>]) -> Self {
124        // Events are roughly 2x the number of tokens (start/finish nodes + token events)
125        let estimated_events = tokens.len() * 2;
126        Self {
127            source: Source::new(tokens),
128            event_collection: EventCollection::with_capacity(estimated_events),
129            parse_errors: vec![],
130        }
131    }
132
133    fn parse(mut self) -> (EventCollection, Vec<ParseError>) {
134        root(&mut self);
135        (self.event_collection, self.parse_errors)
136    }
137
138    fn peek(&mut self) -> Option<SyntaxKind> {
139        self.source.peek_kind()
140    }
141
142    pub(crate) fn peek_token(&mut self) -> Option<&Token<'_>> {
143        self.source.peek_token()
144    }
145
146    /// Lookahead is expensive!
147    /// This lookahead doesn't skip further trivia tokens and is only there for combining the next n lexer tokens!
148    /// for n of zero use `peek_token` instead!
149    pub(crate) fn peek_nth_token(&mut self, n: usize) -> Option<&Token<'_>> {
150        self.source.peek_nth_token(n)
151    }
152
153    pub(crate) fn get_pos(&self) -> usize {
154        self.source.get_pos()
155    }
156
157    pub(crate) fn at_set(&mut self, set: &[SyntaxKind]) -> bool {
158        self.peek().is_some_and(|k| set.contains(&k))
159    }
160
161    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
162        self.peek() == Some(kind)
163    }
164
165    /// Only use this if absolutely necessary, because it is expensive to lookahead!
166    pub(crate) fn at_following(&mut self, set: &[SyntaxKind]) -> bool {
167        self.source.at_following(set)
168    }
169
170    /// Peeks the kind of the next non-trivia token after the current position.
171    pub(crate) fn peek_next_non_trivia_kind(&mut self) -> Option<SyntaxKind> {
172        self.source.peek_next_non_trivia_kind()
173    }
174
175    /// Efficiently checks if the parser is at a `{% keyword` sequence
176    /// (including whitespace control variants `{%-` and `{%~`).
177    pub(crate) fn at_twig_tag(&mut self, keyword: SyntaxKind) -> bool {
178        self.at_twig_block_open() && self.peek_next_non_trivia_kind() == Some(keyword)
179    }
180
181    /// Checks if the parser is at any twig block open token (`{%`, `{%-`, `{%~`).
182    pub(crate) fn at_twig_block_open(&mut self) -> bool {
183        self.at_set(TWIG_BLOCK_OPEN_SET)
184    }
185
186    /// Checks if the parser is at any twig block close token (`%}`, `-%}`, `~%}`).
187    pub(crate) fn at_twig_block_close(&mut self) -> bool {
188        self.at_set(TWIG_BLOCK_CLOSE_SET)
189    }
190
191    /// Checks if the parser is at any twig var open token (`{{`, `{{-`, `{{~`).
192    pub(crate) fn at_twig_var_open(&mut self) -> bool {
193        self.at_set(TWIG_VAR_OPEN_SET)
194    }
195
196    /// Checks if the parser is at any twig comment open token (`{#`, `{#-`, `{#~`).
197    pub(crate) fn at_twig_comment_open(&mut self) -> bool {
198        self.at_set(TWIG_COMMENT_OPEN_SET)
199    }
200
201    /// Only use this if absolutely necessary, because it is expensive to lookahead!
202    pub(crate) fn at_following_content(&mut self, set: &[(SyntaxKind, Option<&str>)]) -> bool {
203        self.source.at_following_content(set)
204    }
205
206    pub(crate) fn at_end(&mut self) -> bool {
207        self.peek().is_none()
208    }
209
210    #[track_caller]
211    pub(crate) fn bump(&mut self) -> &Token<'_> {
212        let consumed = self
213            .source
214            .next_token()
215            .expect("bump called, but there are no more tokens!");
216
217        self.event_collection.add_token(consumed.kind);
218
219        consumed
220    }
221
222    /// In most cases trivia like whitespace comes before any Tokens in a Node
223    /// But sometimes it is necessary to consume the trivia even after the last Token in a Node.
224    ///
225    /// This does exactly that and can be used to consume trailing trivia in a string parser
226    /// (where trivia should be inside as part of the string). Just call this before a call to parser.complete(...).
227    pub(crate) fn explicitly_consume_trivia(&mut self) {
228        self.event_collection.explicitly_consume_trivia();
229    }
230
231    #[track_caller]
232    pub(crate) fn bump_as(&mut self, kind: SyntaxKind) -> Token<'_> {
233        let consumed = self
234            .source
235            .next_token()
236            .expect("bump called, but there are no more tokens!");
237
238        self.event_collection.add_token(kind);
239
240        Token {
241            kind,
242            text: consumed.text,
243            range: consumed.range,
244        }
245    }
246
247    #[track_caller]
248    pub(crate) fn bump_next_n_as(&mut self, n: usize, kind: SyntaxKind) -> Vec<&Token<'_>> {
249        let consumed = self.source.next_n_tokens(n);
250        assert_eq!(
251            consumed.len(),
252            n,
253            "bump_next_n_as called, but there are not enough tokens!"
254        );
255
256        self.event_collection.add_next_n_tokens_as(n, kind);
257
258        consumed
259    }
260
261    pub(crate) fn expect(
262        &mut self,
263        kind: SyntaxKind,
264        recovery_set: &[SyntaxKind],
265    ) -> Option<&Token<'_>> {
266        if self.at(kind) {
267            Some(self.bump())
268        } else {
269            self.add_error(ParseErrorBuilder::new(format!("{kind}")));
270            self.recover_expect(&[kind], recovery_set)
271        }
272    }
273
274    /// Like [`expect`](Self::expect) but accepts any of the given token kinds.
275    /// The error message lists all accepted kinds.
276    pub(crate) fn expect_any(
277        &mut self,
278        kinds: &[SyntaxKind],
279        recovery_set: &[SyntaxKind],
280    ) -> Option<&Token<'_>> {
281        if self.at_set(kinds) {
282            Some(self.bump())
283        } else {
284            let expected = kinds
285                .iter()
286                .map(|k| format!("{k}"))
287                .collect::<Vec<_>>()
288                .join(" or ");
289            self.add_error(ParseErrorBuilder::new(expected));
290            self.recover_expect(kinds, recovery_set)
291        }
292    }
293
294    /// Recovers the parser after an error was found.
295    /// It looks for either any token in the `GENERAL_RECOVERY_SET` or the
296    /// provided `recovery_set` and wraps any tokens in between inside an error node.
297    ///
298    /// Important: in most cases this should not be called inside `parse_many`because it may
299    /// consume future children.
300    pub(crate) fn recover(&mut self, recovery_set: &[SyntaxKind]) {
301        self.recover_expect(&[], recovery_set);
302    }
303
304    fn recover_expect(
305        &mut self,
306        expected_kinds: &[SyntaxKind],
307        recovery_set: &[SyntaxKind],
308    ) -> Option<&Token<'_>> {
309        if self.at_end() || self.at_set(recovery_set) || self.at_set(GENERAL_RECOVERY_SET) {
310            return None;
311        }
312
313        let error_m = self.start();
314        loop {
315            self.bump();
316
317            if !expected_kinds.is_empty() && self.at_set(expected_kinds) {
318                self.complete(error_m, SyntaxKind::ERROR);
319                return Some(self.bump());
320            }
321
322            if self.at_end() || self.at_set(GENERAL_RECOVERY_SET) || self.at_set(recovery_set) {
323                self.complete(error_m, SyntaxKind::ERROR);
324                return None;
325            }
326        }
327    }
328
329    /// Adds a parser error but does not bump any tokens into the tree.
330    pub(crate) fn add_error(&mut self, mut error_builder: ParseErrorBuilder) {
331        // add missing information to builder
332        if error_builder.range.is_none() || error_builder.found.is_none() {
333            let current_token = self.source.peek_token();
334            let (found, range) = if let Some(Token { kind, range, .. }) = current_token {
335                (Some(*kind), *range)
336            } else {
337                // If we're at the end of the input we use the range of the very last token
338                // unwrap is fine, because error should not be called on empty file
339                (
340                    None,
341                    self.source
342                        .last_token_range()
343                        .expect("parser error called on empty file which has no last token"),
344                )
345            };
346
347            if error_builder.range.is_none() {
348                error_builder.range = Some(range);
349            }
350
351            if error_builder.found.is_none() {
352                error_builder.found = found;
353            }
354        }
355
356        self.parse_errors.push(error_builder.build());
357    }
358
359    pub(crate) fn start(&mut self) -> Marker {
360        self.event_collection.start()
361    }
362
363    #[track_caller]
364    pub(crate) fn complete(&mut self, marker: Marker, kind: SyntaxKind) -> CompletedMarker {
365        self.event_collection.complete(marker, kind)
366    }
367
368    #[track_caller]
369    pub(crate) fn precede(&mut self, completed_marker: CompletedMarker) -> Marker {
370        self.event_collection.precede(completed_marker)
371    }
372}
373
374#[cfg(test)]
375#[allow(clippy::needless_pass_by_value)]
376pub(crate) fn check_parse(input: &str, expected_tree: expect_test::Expect) {
377    let parse = parse(input);
378    expected_tree.assert_eq(&parse.debug_parse());
379}
380
381#[cfg(test)]
382mod tests {
383    use expect_test::expect;
384
385    use super::*;
386
387    #[test]
388    fn parse_nothing() {
389        check_parse("", expect!["ROOT@0..0"]);
390    }
391}