ludtwig_parser/
parser.rs

1use std::fmt::Write;
2
3use rowan::GreenNode;
4
5pub use parse_error::ParseError;
6pub use parse_error::ParseErrorBuilder;
7
8use crate::grammar::root;
9use crate::lexer::Token;
10use crate::parser::event::{CompletedMarker, EventCollection, Marker};
11use crate::parser::sink::Sink;
12use crate::parser::source::Source;
13use crate::syntax::untyped::{debug_tree, SyntaxKind, SyntaxNode};
14use crate::{lex, T};
15
16pub(crate) mod event;
17mod parse_error;
18mod sink;
19mod source;
20
21/// Tokens which can lead to parsing of another element
22/// (top level parsers under [`crate::grammar::parse_any_element`])
23pub(crate) static GENERAL_RECOVERY_SET: &[SyntaxKind] =
24    &[T!["{%"], T!["{{"], T!["{#"], T!["<"], T!["<!--"], T!["<!"]];
25
26/// Parses a given string slice (of Twig+HTML code) into a syntax tree.
27///
28/// ## Example
29/// ```
30/// use ludtwig_parser::syntax::untyped::{debug_tree, SyntaxNode};
31///
32/// let parse = ludtwig_parser::parse("{{ 42 }}");
33/// let (tree_root, errors) = parse.split();
34///
35/// assert_eq!(debug_tree(&tree_root), r##"ROOT@0..8
36///   TWIG_VAR@0..8
37///     TK_OPEN_CURLY_CURLY@0..2 "{{"
38///     TWIG_EXPRESSION@2..5
39///       TWIG_LITERAL_NUMBER@2..5
40///         TK_WHITESPACE@2..3 " "
41///         TK_NUMBER@3..5 "42"
42///     TK_WHITESPACE@5..6 " "
43///     TK_CLOSE_CURLY_CURLY@6..8 "}}""##);
44/// ```
45/// More examples can be found at the
46/// [crate level documentation](crate).
47#[must_use]
48pub fn parse(input_text: &str) -> Parse {
49    let lex_result = lex(input_text);
50    let parser = Parser::new(&lex_result);
51    let (parse_events, parse_errors) = parser.parse();
52    let sink = Sink::new(&lex_result, parse_events, parse_errors);
53    sink.finish()
54}
55
56/// Result of the parser
57pub struct Parse {
58    pub green_node: GreenNode,
59    pub errors: Vec<ParseError>,
60}
61
62impl Parse {
63    /// Split the parse result into a syntax tree root node and
64    /// the list of parse errors
65    #[must_use]
66    pub fn split(self) -> (SyntaxNode, Vec<ParseError>) {
67        let root = SyntaxNode::new_root(self.green_node.clone());
68
69        (root, self.errors)
70    }
71
72    #[must_use]
73    pub fn debug_parse(&self) -> String {
74        let syntax_node = SyntaxNode::new_root(self.green_node.clone());
75        let mut s = debug_tree(&syntax_node);
76
77        for error in &self.errors {
78            let _ = write!(s, "\n{error}");
79        }
80
81        s
82    }
83}
84
85#[derive(Debug, Clone, Eq, PartialEq)]
86pub(crate) struct Parser<'source> {
87    source: Source<'source>,
88    event_collection: EventCollection,
89    parse_errors: Vec<ParseError>,
90}
91
92impl<'source> Parser<'source> {
93    pub(crate) fn new(tokens: &'source [Token<'source>]) -> Self {
94        Self {
95            source: Source::new(tokens),
96            event_collection: EventCollection::new(),
97            parse_errors: vec![],
98        }
99    }
100
101    fn parse(mut self) -> (EventCollection, Vec<ParseError>) {
102        root(&mut self);
103        (self.event_collection, self.parse_errors)
104    }
105
106    fn peek(&mut self) -> Option<SyntaxKind> {
107        self.source.peek_kind()
108    }
109
110    pub(crate) fn peek_token(&mut self) -> Option<&Token> {
111        self.source.peek_token()
112    }
113
114    /// Lookahead is expensive!
115    /// This lookahead doesn't skip further trivia tokens and is only there for combining the next n lexer tokens!
116    /// for n of zero use `peek_token` instead!
117    pub(crate) fn peek_nth_token(&mut self, n: usize) -> Option<&Token> {
118        self.source.peek_nth_token(n)
119    }
120
121    pub(crate) fn get_pos(&self) -> usize {
122        self.source.get_pos()
123    }
124
125    pub(crate) fn at_set(&mut self, set: &[SyntaxKind]) -> bool {
126        self.peek().is_some_and(|k| set.contains(&k))
127    }
128
129    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
130        self.peek() == Some(kind)
131    }
132
133    /// Only use this if absolutely necessary, because it is expensive to lookahead!
134    pub(crate) fn at_following(&mut self, set: &[SyntaxKind]) -> bool {
135        self.source.at_following(set)
136    }
137
138    /// Only use this if absolutely necessary, because it is expensive to lookahead!
139    pub(crate) fn at_following_content(&mut self, set: &[(SyntaxKind, Option<&str>)]) -> bool {
140        self.source.at_following_content(set)
141    }
142
143    pub(crate) fn at_end(&mut self) -> bool {
144        self.peek().is_none()
145    }
146
147    #[track_caller]
148    pub(crate) fn bump(&mut self) -> &Token {
149        let consumed = self
150            .source
151            .next_token()
152            .expect("bump called, but there are no more tokens!");
153
154        self.event_collection.add_token(consumed.kind);
155
156        consumed
157    }
158
159    /// In most cases trivia like whitespace comes before any Tokens in a Node
160    /// But sometimes it is necessary to consume the trivia even after the last Token in a Node.
161    ///
162    /// This does exactly that and can be used to consume trailing trivia in a string parser
163    /// (where trivia should be inside as part of the string). Just call this before a call to parser.complete(...).
164    pub(crate) fn explicitly_consume_trivia(&mut self) {
165        self.event_collection.explicitly_consume_trivia();
166    }
167
168    #[track_caller]
169    pub(crate) fn bump_as(&mut self, kind: SyntaxKind) -> Token {
170        let consumed = self
171            .source
172            .next_token()
173            .expect("bump called, but there are no more tokens!");
174
175        self.event_collection.add_token(kind);
176
177        Token {
178            kind,
179            text: consumed.text,
180            range: consumed.range,
181        }
182    }
183
184    #[track_caller]
185    pub(crate) fn bump_next_n_as(&mut self, n: usize, kind: SyntaxKind) -> Vec<&Token> {
186        let consumed = self.source.next_n_tokens(n);
187        assert_eq!(
188            consumed.len(),
189            n,
190            "bump_next_n_as called, but there are not enough tokens!"
191        );
192
193        self.event_collection.add_next_n_tokens_as(n, kind);
194
195        consumed
196    }
197
198    pub(crate) fn expect(
199        &mut self,
200        kind: SyntaxKind,
201        recovery_set: &[SyntaxKind],
202    ) -> Option<&Token> {
203        if self.at(kind) {
204            Some(self.bump())
205        } else {
206            self.add_error(ParseErrorBuilder::new(format!("{kind}")));
207            self.recover_expect(Some(kind), recovery_set)
208        }
209    }
210
211    /// Recovers the parser after an error was found.
212    /// It looks for either any token in the `GENERAL_RECOVERY_SET` or the
213    /// provided `recovery_set` and wraps any tokens in between inside an error node.
214    ///
215    /// Important: in most cases this should not be called inside `parse_many`because it may
216    /// consume future children.
217    pub(crate) fn recover(&mut self, recovery_set: &[SyntaxKind]) {
218        self.recover_expect(None, recovery_set);
219    }
220
221    fn recover_expect(
222        &mut self,
223        expected_kind: Option<SyntaxKind>,
224        recovery_set: &[SyntaxKind],
225    ) -> Option<&Token> {
226        if self.at_end() || self.at_set(GENERAL_RECOVERY_SET) || self.at_set(recovery_set) {
227            return None;
228        }
229
230        let error_m = self.start();
231        loop {
232            self.bump();
233
234            if let Some(expected_kind) = expected_kind {
235                if self.at(expected_kind) {
236                    self.complete(error_m, SyntaxKind::ERROR);
237                    return Some(self.bump());
238                }
239            }
240
241            if self.at_end() || self.at_set(GENERAL_RECOVERY_SET) || self.at_set(recovery_set) {
242                self.complete(error_m, SyntaxKind::ERROR);
243                return None;
244            }
245        }
246    }
247
248    /// Adds a parser error but does not bump any tokens into the tree.
249    pub(crate) fn add_error(&mut self, mut error_builder: ParseErrorBuilder) {
250        // add missing information to builder
251        if error_builder.range.is_none() || error_builder.found.is_none() {
252            let current_token = self.source.peek_token();
253            let (found, range) = if let Some(Token { kind, range, .. }) = current_token {
254                (Some(*kind), *range)
255            } else {
256                // If we're at the end of the input we use the range of the very last token
257                // unwrap is fine, because error should not be called on empty file
258                (
259                    None,
260                    self.source
261                        .last_token_range()
262                        .expect("parser error called on empty file which has no last token"),
263                )
264            };
265
266            if error_builder.range.is_none() {
267                error_builder.range = Some(range);
268            }
269
270            if error_builder.found.is_none() {
271                error_builder.found = found;
272            }
273        }
274
275        self.parse_errors.push(error_builder.build());
276    }
277
278    pub(crate) fn start(&mut self) -> Marker {
279        self.event_collection.start()
280    }
281
282    #[track_caller]
283    pub(crate) fn complete(&mut self, marker: Marker, kind: SyntaxKind) -> CompletedMarker {
284        self.event_collection.complete(marker, kind)
285    }
286
287    #[track_caller]
288    pub(crate) fn precede(&mut self, completed_marker: CompletedMarker) -> Marker {
289        self.event_collection.precede(completed_marker)
290    }
291}
292
293#[cfg(test)]
294#[allow(clippy::needless_pass_by_value)]
295pub(crate) fn check_parse(input: &str, expected_tree: expect_test::Expect) {
296    let parse = parse(input);
297    expected_tree.assert_eq(&parse.debug_parse());
298}
299
300#[cfg(test)]
301mod tests {
302    use expect_test::expect;
303
304    use super::*;
305
306    #[test]
307    fn parse_nothing() {
308        check_parse("", expect!["ROOT@0..0"]);
309    }
310}