typst_syntax/
parser.rs

1use std::mem;
2use std::ops::{DerefMut, Index, IndexMut, Range};
3
4use ecow::{EcoString, eco_format};
5use rustc_hash::{FxHashMap, FxHashSet};
6use typst_utils::{default_math_class, defer};
7use unicode_math_class::MathClass;
8
9use crate::set::{SyntaxSet, syntax_set};
10use crate::{Lexer, SyntaxKind, SyntaxMode, SyntaxNode, ast, set};
11
12// Picked by gut feeling.
13const MAX_DEPTH: u32 = 256;
14
15/// Parses a source file as top-level markup.
16pub fn parse(text: &str) -> SyntaxNode {
17    let _scope = typst_timing::TimingScope::new("parse");
18    let mut p = Parser::new(text, 0, SyntaxMode::Markup);
19    markup_exprs(&mut p, true, syntax_set!(End));
20    p.finish_into(SyntaxKind::Markup)
21}
22
23/// Parses top-level code.
24pub fn parse_code(text: &str) -> SyntaxNode {
25    let _scope = typst_timing::TimingScope::new("parse code");
26    let mut p = Parser::new(text, 0, SyntaxMode::Code);
27    code_exprs(&mut p, syntax_set!(End));
28    p.finish_into(SyntaxKind::Code)
29}
30
31/// Parses top-level math.
32pub fn parse_math(text: &str) -> SyntaxNode {
33    let _scope = typst_timing::TimingScope::new("parse math");
34    let mut p = Parser::new(text, 0, SyntaxMode::Math);
35    math_exprs(&mut p, syntax_set!(End));
36    p.finish_into(SyntaxKind::Math)
37}
38
39/// Parses markup expressions until a stop condition is met.
40fn markup(p: &mut Parser, at_start: bool, wrap_trivia: bool, stop_set: SyntaxSet) {
41    let m = if wrap_trivia { p.before_trivia() } else { p.marker() };
42    markup_exprs(p, at_start, stop_set);
43    if wrap_trivia {
44        p.flush_trivia();
45    }
46    p.wrap(m, SyntaxKind::Markup);
47}
48
49/// Parses a sequence of markup expressions.
50fn markup_exprs(p: &mut Parser, mut at_start: bool, stop_set: SyntaxSet) {
51    debug_assert!(stop_set.contains(SyntaxKind::End));
52    let Some(p) = p.check_depth_until(stop_set) else { return };
53
54    at_start |= p.had_newline();
55    let mut nesting: usize = 0;
56    // Keep going if we're at a nested right-bracket regardless of the stop set.
57    while !p.at_set(stop_set) || (nesting > 0 && p.at(SyntaxKind::RightBracket)) {
58        markup_expr(p, at_start, &mut nesting);
59        at_start = p.had_newline();
60    }
61}
62
63/// Reparses a subsection of markup incrementally.
64pub(super) fn reparse_markup(
65    text: &str,
66    range: Range<usize>,
67    at_start: &mut bool,
68    nesting: &mut usize,
69    top_level: bool,
70) -> Option<Vec<SyntaxNode>> {
71    let mut p = Parser::new(text, range.start, SyntaxMode::Markup);
72    *at_start |= p.had_newline();
73    while !p.end() && p.current_start() < range.end {
74        // If not top-level and at a new RightBracket, stop the reparse.
75        if !top_level && *nesting == 0 && p.at(SyntaxKind::RightBracket) {
76            break;
77        }
78        markup_expr(&mut p, *at_start, nesting);
79        *at_start = p.had_newline();
80    }
81    (p.balanced && p.current_start() == range.end).then(|| p.finish())
82}
83
84/// Parses a single markup expression. This includes markup elements like text,
85/// headings, strong/emph, lists/enums, etc. This is also the entry point for
86/// parsing math equations and embedded code expressions.
87fn markup_expr(p: &mut Parser, at_start: bool, nesting: &mut usize) {
88    let Some(p) = &mut p.increase_depth() else { return };
89
90    match p.current() {
91        SyntaxKind::LeftBracket => {
92            *nesting += 1;
93            p.convert_and_eat(SyntaxKind::Text);
94        }
95        SyntaxKind::RightBracket if *nesting > 0 => {
96            *nesting -= 1;
97            p.convert_and_eat(SyntaxKind::Text);
98        }
99        SyntaxKind::RightBracket => {
100            p.unexpected();
101            p.hint("try using a backslash escape: \\]");
102        }
103
104        SyntaxKind::Shebang => p.eat(),
105
106        SyntaxKind::Text
107        | SyntaxKind::Linebreak
108        | SyntaxKind::Escape
109        | SyntaxKind::Shorthand
110        | SyntaxKind::SmartQuote
111        | SyntaxKind::Link
112        | SyntaxKind::Label => p.eat(),
113
114        SyntaxKind::Raw => p.eat(), // Raw is handled entirely in the Lexer.
115
116        SyntaxKind::Hash => embedded_code_expr(p),
117        SyntaxKind::Star => strong(p),
118        SyntaxKind::Underscore => emph(p),
119        SyntaxKind::HeadingMarker if at_start => heading(p),
120        SyntaxKind::ListMarker if at_start => list_item(p),
121        SyntaxKind::EnumMarker if at_start => enum_item(p),
122        SyntaxKind::TermMarker if at_start => term_item(p),
123        SyntaxKind::RefMarker => reference(p),
124        SyntaxKind::Dollar => equation(p),
125
126        SyntaxKind::HeadingMarker
127        | SyntaxKind::ListMarker
128        | SyntaxKind::EnumMarker
129        | SyntaxKind::TermMarker
130        | SyntaxKind::Colon => p.convert_and_eat(SyntaxKind::Text),
131
132        _ => p.unexpected(),
133    }
134}
135
136/// Parses strong content: `*Strong*`.
137fn strong(p: &mut Parser) {
138    p.with_nl_mode(AtNewline::StopParBreak, |p| {
139        let m = p.marker();
140        p.assert(SyntaxKind::Star);
141        markup(p, false, true, syntax_set!(Star, RightBracket, End));
142        let had_closing = p.expect_closing_delimiter(m, SyntaxKind::Star);
143        p.wrap(m, SyntaxKind::Strong);
144        if had_closing && p[m].len() == 2 {
145            p[m].warn("no text within stars");
146            let hint = "using multiple consecutive stars (e.g. **) has no \
147                        additional effect";
148            p[m].hint(hint);
149        }
150    });
151}
152
153/// Parses emphasized content: `_Emphasized_`.
154fn emph(p: &mut Parser) {
155    p.with_nl_mode(AtNewline::StopParBreak, |p| {
156        let m = p.marker();
157        p.assert(SyntaxKind::Underscore);
158        markup(p, false, true, syntax_set!(Underscore, RightBracket, End));
159        let had_closing = p.expect_closing_delimiter(m, SyntaxKind::Underscore);
160        p.wrap(m, SyntaxKind::Emph);
161        if had_closing && p[m].len() == 2 {
162            p[m].warn("no text within underscores");
163            let hint = "using multiple consecutive underscores (e.g. __) has no \
164                        additional effect";
165            p[m].hint(hint);
166        }
167    });
168}
169
170/// Parses a section heading: `= Introduction`.
171fn heading(p: &mut Parser) {
172    p.with_nl_mode(AtNewline::Stop, |p| {
173        let m = p.marker();
174        p.assert(SyntaxKind::HeadingMarker);
175        markup(p, false, false, syntax_set!(Label, RightBracket, End));
176        p.wrap(m, SyntaxKind::Heading);
177    });
178}
179
180/// Parses an item in a bullet list: `- ...`.
181fn list_item(p: &mut Parser) {
182    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
183        let m = p.marker();
184        p.assert(SyntaxKind::ListMarker);
185        markup(p, true, false, syntax_set!(RightBracket, End));
186        p.wrap(m, SyntaxKind::ListItem);
187    });
188}
189
190/// Parses an item in an enumeration (numbered list): `+ ...` or `1. ...`.
191fn enum_item(p: &mut Parser) {
192    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
193        let m = p.marker();
194        p.assert(SyntaxKind::EnumMarker);
195        markup(p, true, false, syntax_set!(RightBracket, End));
196        p.wrap(m, SyntaxKind::EnumItem);
197    });
198}
199
200/// Parses an item in a term list: `/ Term: Details`.
201fn term_item(p: &mut Parser) {
202    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
203        let m = p.marker();
204        p.with_nl_mode(AtNewline::Stop, |p| {
205            p.assert(SyntaxKind::TermMarker);
206            markup(p, false, false, syntax_set!(Colon, RightBracket, End));
207        });
208        p.expect(SyntaxKind::Colon);
209        markup(p, true, false, syntax_set!(RightBracket, End));
210        p.wrap(m, SyntaxKind::TermItem);
211    });
212}
213
214/// Parses a reference: `@target`, `@target[..]`.
215fn reference(p: &mut Parser) {
216    let m = p.marker();
217    p.assert(SyntaxKind::RefMarker);
218    if p.directly_at(SyntaxKind::LeftBracket) {
219        content_block(p);
220    }
221    p.wrap(m, SyntaxKind::Ref);
222}
223
224/// Parses a mathematical equation: `$x$`, `$ x^2 $`.
225fn equation(p: &mut Parser) {
226    let m = p.marker();
227    p.enter_modes(SyntaxMode::Math, AtNewline::Continue, |p| {
228        p.assert(SyntaxKind::Dollar);
229        math(p, syntax_set!(Dollar, End));
230        p.expect_closing_delimiter(m, SyntaxKind::Dollar);
231    });
232    p.wrap(m, SyntaxKind::Equation);
233}
234
235/// Parses the contents of a mathematical equation: `x^2 + 1`.
236fn math(p: &mut Parser, stop_set: SyntaxSet) {
237    let m = p.marker();
238    math_exprs(p, stop_set);
239    p.wrap(m, SyntaxKind::Math);
240}
241
242/// Parses a sequence of math expressions. Returns the number of expressions
243/// parsed (including errors).
244fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) -> usize {
245    debug_assert!(stop_set.contains(SyntaxKind::End));
246    let Some(p) = p.check_depth_until(stop_set) else { return 1 };
247
248    let mut count = 0;
249    while !p.at_set(stop_set) {
250        if p.at_set(set::MATH_EXPR) {
251            math_expr(p);
252        } else {
253            p.unexpected();
254        }
255        count += 1;
256    }
257    count
258}
259
260/// Parses a single math expression: This includes math elements like
261/// attachment, fractions, roots, and embedded code expressions.
262fn math_expr(p: &mut Parser) {
263    math_expr_prec(p, 0, syntax_set!())
264}
265
266/// Parses a math expression with at least the given precedence, possibly
267/// chaining with another operator by returning early.
268fn math_expr_prec(p: &mut Parser, min_prec: u8, stop_set: SyntaxSet) {
269    let Some(p) = &mut p.increase_depth() else { return };
270
271    let m = p.marker();
272    let mut continuable = false;
273    match p.current() {
274        SyntaxKind::Hash => embedded_code_expr(p),
275
276        // The lexer manages creating full MathFieldAccess nodes if needed.
277        SyntaxKind::MathIdent | SyntaxKind::MathFieldAccess => {
278            continuable = true;
279            p.eat();
280            // Parse a function call for an identifier or field access.
281            if MATH_FUNC_PREC >= min_prec && p.directly_at(SyntaxKind::LeftParen) {
282                math_args(p);
283                p.wrap(m, SyntaxKind::MathCall);
284                continuable = false;
285            }
286        }
287
288        SyntaxKind::LeftBrace | SyntaxKind::LeftParen => {
289            math_delimited(p);
290        }
291        SyntaxKind::RightBrace if p.current_text() == "|]" => {
292            p.convert_and_eat(SyntaxKind::MathShorthand);
293        }
294        SyntaxKind::Dot
295        | SyntaxKind::Bang
296        | SyntaxKind::Comma
297        | SyntaxKind::Semicolon
298        | SyntaxKind::RightBrace
299        | SyntaxKind::RightParen => {
300            p.convert_and_eat(SyntaxKind::MathText);
301        }
302
303        SyntaxKind::MathText => {
304            continuable = is_math_alphabetic(p.current_text());
305            p.eat();
306        }
307
308        SyntaxKind::Linebreak
309        | SyntaxKind::MathAlignPoint
310        | SyntaxKind::MathShorthand => p.eat(),
311
312        SyntaxKind::MathPrimes | SyntaxKind::Escape | SyntaxKind::Str => {
313            continuable = true;
314            p.eat();
315        }
316
317        SyntaxKind::Root => {
318            p.eat();
319            let m2 = p.marker();
320            math_expr_prec(p, MATH_ROOT_PREC, syntax_set!());
321            math_unparen(p, m2);
322            p.wrap(m, SyntaxKind::MathRoot);
323        }
324
325        _ => p.expected("expression"),
326    }
327
328    // Maybe recognize an implicit function call: a 'continuable' token followed
329    // by delimiters will group as one with the precedence of a normal function.
330    // E.g. `a(b)/c` parses as `(a(b))/c` when `a` is continuable.
331    if continuable
332        && MATH_FUNC_PREC >= min_prec
333        && !p.had_trivia()
334        && p.at_set(syntax_set!(LeftBrace, LeftParen))
335    {
336        math_delimited(p);
337        p.wrap(m, SyntaxKind::Math);
338    }
339
340    // Parse infix and postfix operators. The general form of a parsed op looks
341    // like: `MathAttach[ MathText("x"), Hat("^"), MathText("2") ]`.
342    while !p.at_set(stop_set)
343        && let op_kind = p.current()
344        && let had_trivia = p.had_trivia()
345        && let Some((wrapper, infix_assoc, prec)) = math_op(op_kind, had_trivia)
346        && prec >= min_prec
347    {
348        // Prepare a chaining set for the attachment operators.
349        let mut chain_set = if wrapper == SyntaxKind::MathAttach {
350            // Hat can chain with Underscore, Underscore can chain with Hat, and
351            // Prime can chain with either (but prime can't interrupt a chain,
352            // see below).
353            syntax_set!(Hat, Underscore).remove(op_kind)
354        } else {
355            syntax_set!()
356        };
357
358        // Eat the operator itself.
359        if op_kind == SyntaxKind::Bang {
360            p.convert_and_eat(SyntaxKind::MathText);
361        } else {
362            p.eat();
363        }
364
365        // Slash is the only operator that removes parens from its left operand.
366        if wrapper == SyntaxKind::MathFrac {
367            math_unparen(p, m);
368        }
369
370        // Parse the operator's right operand.
371        if let Some(assoc) = infix_assoc {
372            let prec = match assoc {
373                ast::Assoc::Left => prec + 1,
374                ast::Assoc::Right => prec,
375            };
376            let m_rhs = p.marker();
377            math_expr_prec(p, prec, chain_set);
378            math_unparen(p, m_rhs);
379        }
380
381        // Avoid interrupting a chain when initially parsing a prime.
382        // For `a^b'_c^d` the grouping is `(a^(b')_c)^d` and not `a^(b'_c^d)`.
383        if !(op_kind == SyntaxKind::MathPrimes && p.at_set(stop_set)) {
384            // Parse chained attachment operators as a single attachment.
385            while p.at_set(chain_set) {
386                chain_set = chain_set.remove(p.current());
387                p.eat();
388                let m_chain_rhs = p.marker();
389                math_expr_prec(p, prec, chain_set);
390                math_unparen(p, m_chain_rhs);
391            }
392        }
393
394        // Finish the operator by wrapping from its left operand.
395        p.wrap(m, wrapper);
396    }
397}
398
399// These are declared here so they're easier to compare with `math_op`.
400const MATH_FUNC_PREC: u8 = 2;
401const MATH_ROOT_PREC: u8 = 2;
402
403/// Precedence and wrapper kinds for infix and postfix math operators.
404fn math_op(
405    kind: SyntaxKind,
406    had_trivia: bool,
407) -> Option<(SyntaxKind, Option<ast::Assoc>, u8)> {
408    let op = match kind {
409        SyntaxKind::Slash => (SyntaxKind::MathFrac, Some(ast::Assoc::Left), 1),
410        SyntaxKind::Underscore => (SyntaxKind::MathAttach, Some(ast::Assoc::Right), 2),
411        SyntaxKind::Hat => (SyntaxKind::MathAttach, Some(ast::Assoc::Right), 2),
412        SyntaxKind::MathPrimes if !had_trivia => (SyntaxKind::MathAttach, None, 2),
413        SyntaxKind::Bang if !had_trivia => (SyntaxKind::Math, None, 3),
414        _ => return None,
415    };
416    Some(op)
417}
418
419/// Whether text counts as alphabetic in math. For the `Text` and `MathText`
420/// kinds, this causes them to group with parens as an implicit function call.
421fn is_math_alphabetic(text: &str) -> bool {
422    if let Some((0, c)) = text.char_indices().next_back() {
423        // Just a single character.
424        c.is_alphabetic() || default_math_class(c) == Some(MathClass::Alphabetic)
425    } else {
426        // Multiple characters.
427        text.chars().all(char::is_alphabetic)
428    }
429}
430
431/// Parse matched delimiters in math: `[x + y]`.
432///
433/// The lexer produces `{Left,Right}{Brace,Paren}` for delimiters, and it's our
434/// job to convert them back to `MathText` or `MathShorthand` before eating.
435fn math_delimited(p: &mut Parser) {
436    let m = p.marker();
437    if p.current_text() == "[|" {
438        p.convert_and_eat(SyntaxKind::MathShorthand);
439    } else {
440        p.convert_and_eat(SyntaxKind::MathText);
441    }
442    let m_body = p.marker();
443    math_exprs(p, syntax_set!(Dollar, End, RightBrace, RightParen));
444    if p.at_set(syntax_set!(RightBrace, RightParen)) {
445        p.wrap(m_body, SyntaxKind::Math);
446        if p.current_text() == "|]" {
447            p.convert_and_eat(SyntaxKind::MathShorthand);
448        } else {
449            p.convert_and_eat(SyntaxKind::MathText);
450        }
451        p.wrap(m, SyntaxKind::MathDelimited);
452    } else {
453        // If we had no closing delimiter, just produce a math sequence.
454        p.wrap(m, SyntaxKind::Math);
455    }
456}
457
458/// Remove one set of parentheses (if any) from a previously parsed expression
459/// by converting to non-expression SyntaxKinds.
460fn math_unparen(p: &mut Parser, m: Marker) {
461    let Some(node) = p.nodes.get_mut(m.0) else { return };
462    if node.kind() != SyntaxKind::MathDelimited {
463        return;
464    }
465
466    if let [first, .., last] = node.children_mut()
467        && first.leaf_text() == "("
468        && last.leaf_text() == ")"
469    {
470        first.convert_to_kind(SyntaxKind::LeftParen);
471        last.convert_to_kind(SyntaxKind::RightParen);
472        // Only convert if we did have regular parens.
473        node.convert_to_kind(SyntaxKind::Math);
474    }
475}
476
477/// Parse an argument list in math: `(a, b; c, d; size: #50%)`.
478fn math_args(p: &mut Parser) {
479    let m = p.marker();
480    p.assert(SyntaxKind::LeftParen);
481
482    let mut seen = FxHashSet::default();
483    while !p.at_set(syntax_set!(End, Dollar, RightParen)) {
484        math_arg(p, &mut seen);
485        match p.current() {
486            SyntaxKind::End | SyntaxKind::Dollar | SyntaxKind::RightParen => {}
487            SyntaxKind::Semicolon | SyntaxKind::Comma => p.eat(),
488            _ => p.expected("comma or semicolon"),
489        }
490    }
491
492    p.expect_closing_delimiter(m, SyntaxKind::RightParen);
493    p.wrap(m, SyntaxKind::MathArgs);
494}
495
496/// Parses a single argument in a math argument list.
497fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>) {
498    let m = p.marker();
499    let start = p.current_start();
500
501    let mut arg_kind = None;
502
503    if let Some(spread) = p.lexer.maybe_math_spread_arg(start) {
504        // Parses a spread argument: `..args`.
505        arg_kind = Some(SyntaxKind::Spread);
506        p.token.node = spread;
507        p.eat();
508    } else if let Some(named) = p.lexer.maybe_math_named_arg(start) {
509        // Parses a named argument: `thickness: #12pt`.
510        arg_kind = Some(SyntaxKind::Named);
511        p.token.node = named;
512        let text = p.current_text();
513        p.eat();
514        p.convert_and_eat(SyntaxKind::Colon);
515        if !seen.insert(text) {
516            p[m].convert_to_error(eco_format!("duplicate argument: {text}"));
517        }
518    }
519
520    // Parses the argument itself.
521    let m_arg = p.marker();
522    let count = math_exprs(p, syntax_set!(End, Dollar, Comma, Semicolon, RightParen));
523
524    if count == 0 {
525        // This can't happen due to checks in `Lexer::maybe_math_spread_arg`.
526        assert_ne!(arg_kind, Some(SyntaxKind::Spread));
527
528        // Named arguments require a value.
529        if arg_kind == Some(SyntaxKind::Named) {
530            p.expected("expression");
531        }
532    }
533
534    // Wrap math function arguments to join adjacent math content or create an
535    // empty 'Math' node for when we have 0 args. We don't wrap when
536    // `count == 1`, since wrapping would change the type of the expression
537    // from potentially non-content to content. E.g. `$ func(#12pt) $` would
538    // change the type of `#12pt` from size to content if wrapped.
539    if count != 1 {
540        p.wrap(m_arg, SyntaxKind::Math);
541    }
542
543    if let Some(kind) = arg_kind {
544        p.wrap(m, kind);
545    }
546}
547
548/// Parses the contents of a code block.
549fn code(p: &mut Parser, stop_set: SyntaxSet) {
550    let m = p.marker();
551    code_exprs(p, stop_set);
552    p.wrap(m, SyntaxKind::Code);
553}
554
555/// Parses a sequence of code expressions.
556fn code_exprs(p: &mut Parser, stop_set: SyntaxSet) {
557    debug_assert!(stop_set.contains(SyntaxKind::End));
558    let Some(p) = p.check_depth_until(stop_set) else { return };
559
560    while !p.at_set(stop_set) {
561        p.with_nl_mode(AtNewline::ContextualContinue, |p| {
562            if !p.at_set(set::CODE_EXPR) {
563                p.unexpected();
564                return;
565            }
566            code_expr(p);
567            if !p.at_set(stop_set) && !p.eat_if(SyntaxKind::Semicolon) {
568                p.expected("semicolon or line break");
569                if p.at(SyntaxKind::Label) {
570                    p.hint("labels can only be applied in markup mode");
571                    p.hint("try wrapping your code in a markup block (`[ ]`)");
572                }
573            }
574        });
575    }
576}
577
578/// Parses an atomic code expression embedded in markup or math.
579fn embedded_code_expr(p: &mut Parser) {
580    p.enter_modes(SyntaxMode::Code, AtNewline::Stop, |p| {
581        p.assert(SyntaxKind::Hash);
582        if p.had_trivia() || p.end() {
583            p.expected("expression");
584            return;
585        }
586
587        let stmt = p.at_set(set::STMT);
588        code_expr_prec(p, true, 0);
589
590        // Note: 2d math arguments rely on the `directly_at` check.
591        let semi = (stmt || p.directly_at(SyntaxKind::Semicolon))
592            && p.eat_if(SyntaxKind::Semicolon);
593
594        if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) {
595            p.expected("semicolon or line break");
596        }
597    });
598}
599
600/// Parses a single code expression.
601fn code_expr(p: &mut Parser) {
602    code_expr_prec(p, false, 0);
603}
604
605/// Parses a code expression with at least the given precedence.
606fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: u8) {
607    let Some(p) = &mut p.increase_depth() else { return };
608
609    let m = p.marker();
610    if p.at_set(set::UNARY_OP) {
611        if !atomic {
612            let op = ast::UnOp::from_kind(p.current()).unwrap();
613            p.eat();
614            code_expr_prec(p, atomic, op.precedence());
615            p.wrap(m, SyntaxKind::Unary);
616        } else {
617            p.unexpected();
618            p.hint(
619                "to use a unary operator here, wrap the entire expression in parentheses",
620            );
621        }
622    } else {
623        code_primary(p, atomic);
624    }
625
626    loop {
627        if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket)
628        {
629            args(p);
630            p.wrap(m, SyntaxKind::FuncCall);
631            continue;
632        }
633
634        let at_field_or_method = p.directly_at(SyntaxKind::Dot)
635            && p.lexer.clone().next().0 == SyntaxKind::Ident;
636
637        if atomic && !at_field_or_method {
638            break;
639        }
640
641        if p.eat_if(SyntaxKind::Dot) {
642            p.expect(SyntaxKind::Ident);
643            p.wrap(m, SyntaxKind::FieldAccess);
644            continue;
645        }
646
647        let binop = if p.at_set(set::BINARY_OP) {
648            ast::BinOp::from_kind(p.current())
649        } else if min_prec <= ast::BinOp::NotIn.precedence() && p.eat_if(SyntaxKind::Not)
650        {
651            if p.at(SyntaxKind::In) {
652                Some(ast::BinOp::NotIn)
653            } else {
654                p.expected("keyword `in`");
655                break;
656            }
657        } else {
658            None
659        };
660
661        if let Some(op) = binop {
662            let mut prec = op.precedence();
663            if prec < min_prec {
664                break;
665            }
666
667            match op.assoc() {
668                ast::Assoc::Left => prec += 1,
669                ast::Assoc::Right => {}
670            }
671
672            p.eat();
673            code_expr_prec(p, false, prec);
674            p.wrap(m, SyntaxKind::Binary);
675            continue;
676        }
677
678        break;
679    }
680}
681
682/// Parses a primary in a code expression. These are the atoms that unary and
683/// binary operations, functions calls, and field accesses start with / are
684/// composed of.
685fn code_primary(p: &mut Parser, atomic: bool) {
686    let m = p.marker();
687    match p.current() {
688        SyntaxKind::Ident => {
689            p.eat();
690            if !atomic && p.at(SyntaxKind::Arrow) {
691                p.wrap(m, SyntaxKind::Params);
692                p.assert(SyntaxKind::Arrow);
693                code_expr(p);
694                p.wrap(m, SyntaxKind::Closure);
695            }
696        }
697        SyntaxKind::Underscore if !atomic => {
698            p.eat();
699            if p.at(SyntaxKind::Arrow) {
700                p.wrap(m, SyntaxKind::Params);
701                p.eat();
702                code_expr(p);
703                p.wrap(m, SyntaxKind::Closure);
704            } else if p.eat_if(SyntaxKind::Eq) {
705                code_expr(p);
706                p.wrap(m, SyntaxKind::DestructAssignment);
707            } else {
708                p[m].expected("expression");
709            }
710        }
711
712        SyntaxKind::LeftBrace => code_block(p),
713        SyntaxKind::LeftBracket => content_block(p),
714        SyntaxKind::LeftParen => expr_with_paren(p, atomic),
715        SyntaxKind::Dollar => equation(p),
716        SyntaxKind::Let => let_binding(p),
717        SyntaxKind::Set => set_rule(p),
718        SyntaxKind::Show => show_rule(p),
719        SyntaxKind::Context => contextual(p, atomic),
720        SyntaxKind::If => conditional(p),
721        SyntaxKind::While => while_loop(p),
722        SyntaxKind::For => for_loop(p),
723        SyntaxKind::Import => module_import(p),
724        SyntaxKind::Include => module_include(p),
725        SyntaxKind::Break => break_stmt(p),
726        SyntaxKind::Continue => continue_stmt(p),
727        SyntaxKind::Return => return_stmt(p),
728
729        SyntaxKind::Raw => p.eat(), // Raw is handled entirely in the Lexer.
730
731        SyntaxKind::None
732        | SyntaxKind::Auto
733        | SyntaxKind::Int
734        | SyntaxKind::Float
735        | SyntaxKind::Bool
736        | SyntaxKind::Numeric
737        | SyntaxKind::Str
738        | SyntaxKind::Label => p.eat(),
739
740        // Consume erroneous tokens for things like `#12p`, `#]`, or `#"abc\"`.
741        _ if atomic => p.unexpected(),
742
743        _ => p.expected("expression"),
744    }
745}
746
747/// Reparses a full content or code block. This only succeeds if the new block
748/// contains balanced delimiters.
749pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
750    let mut p = Parser::new(text, range.start, SyntaxMode::Code);
751    assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
752    block(&mut p);
753    (p.balanced && p.prev_end() == range.end)
754        .then(|| p.finish().into_iter().next().unwrap())
755}
756
757/// Parses a content or code block.
758fn block(p: &mut Parser) {
759    match p.current() {
760        SyntaxKind::LeftBracket => content_block(p),
761        SyntaxKind::LeftBrace => code_block(p),
762        _ => p.expected("block"),
763    }
764}
765
766/// Parses a code block: `{ let x = 1; x + 2 }`.
767fn code_block(p: &mut Parser) {
768    let m = p.marker();
769    p.enter_modes(SyntaxMode::Code, AtNewline::Continue, |p| {
770        p.assert(SyntaxKind::LeftBrace);
771        code(p, syntax_set!(RightBrace, RightBracket, RightParen, End));
772        p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
773    });
774    p.wrap(m, SyntaxKind::CodeBlock);
775}
776
777/// Parses a content block: `[*Hi* there!]`.
778fn content_block(p: &mut Parser) {
779    let m = p.marker();
780    p.enter_modes(SyntaxMode::Markup, AtNewline::Continue, |p| {
781        p.assert(SyntaxKind::LeftBracket);
782        markup(p, true, true, syntax_set!(RightBracket, End));
783        p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
784    });
785    p.wrap(m, SyntaxKind::ContentBlock);
786}
787
788/// Parses a let binding: `let x = 1`.
789fn let_binding(p: &mut Parser) {
790    let m = p.marker();
791    p.assert(SyntaxKind::Let);
792
793    let m2 = p.marker();
794    let mut closure = false;
795    let mut other = false;
796
797    if p.eat_if(SyntaxKind::Ident) {
798        if p.directly_at(SyntaxKind::LeftParen) {
799            params(p);
800            closure = true;
801        }
802    } else {
803        pattern(p, false, &mut FxHashSet::default(), None);
804        other = true;
805    }
806
807    let f = if closure || other { Parser::expect } else { Parser::eat_if };
808    if f(p, SyntaxKind::Eq) {
809        code_expr(p);
810    }
811
812    if closure {
813        p.wrap(m2, SyntaxKind::Closure);
814    }
815
816    p.wrap(m, SyntaxKind::LetBinding);
817}
818
819/// Parses a set rule: `set text(...)`.
820fn set_rule(p: &mut Parser) {
821    let m = p.marker();
822    p.assert(SyntaxKind::Set);
823
824    let m2 = p.marker();
825    p.expect(SyntaxKind::Ident);
826    while p.eat_if(SyntaxKind::Dot) {
827        p.expect(SyntaxKind::Ident);
828        p.wrap(m2, SyntaxKind::FieldAccess);
829    }
830
831    args(p);
832    if p.eat_if(SyntaxKind::If) {
833        code_expr(p);
834    }
835    p.wrap(m, SyntaxKind::SetRule);
836}
837
838/// Parses a show rule: `show heading: it => emph(it.body)`.
839fn show_rule(p: &mut Parser) {
840    let m = p.marker();
841    p.assert(SyntaxKind::Show);
842    let m2 = p.before_trivia();
843
844    if !p.at(SyntaxKind::Colon) {
845        code_expr(p);
846    }
847
848    if p.eat_if(SyntaxKind::Colon) {
849        code_expr(p);
850    } else {
851        p.expected_at(m2, "colon");
852    }
853
854    p.wrap(m, SyntaxKind::ShowRule);
855}
856
857/// Parses a contextual expression: `context text.lang`.
858fn contextual(p: &mut Parser, atomic: bool) {
859    let m = p.marker();
860    p.assert(SyntaxKind::Context);
861    code_expr_prec(p, atomic, 0);
862    p.wrap(m, SyntaxKind::Contextual);
863}
864
865/// Parses an if-else conditional: `if x { y } else { z }`.
866fn conditional(p: &mut Parser) {
867    let m = p.marker();
868    p.assert(SyntaxKind::If);
869    code_expr(p);
870    block(p);
871    if p.eat_if(SyntaxKind::Else) {
872        if p.at(SyntaxKind::If) {
873            conditional(p);
874        } else {
875            block(p);
876        }
877    }
878    p.wrap(m, SyntaxKind::Conditional);
879}
880
881/// Parses a while loop: `while x { y }`.
882fn while_loop(p: &mut Parser) {
883    let m = p.marker();
884    p.assert(SyntaxKind::While);
885    code_expr(p);
886    block(p);
887    p.wrap(m, SyntaxKind::WhileLoop);
888}
889
890/// Parses a for loop: `for x in y { z }`.
891fn for_loop(p: &mut Parser) {
892    let m = p.marker();
893    p.assert(SyntaxKind::For);
894
895    let mut seen = FxHashSet::default();
896    pattern(p, false, &mut seen, None);
897
898    if p.at(SyntaxKind::Comma) {
899        let node = p.eat_and_get();
900        node.unexpected();
901        node.hint("destructuring patterns must be wrapped in parentheses");
902        if p.at_set(set::PATTERN) {
903            pattern(p, false, &mut seen, None);
904        }
905    }
906
907    p.expect(SyntaxKind::In);
908    code_expr(p);
909    block(p);
910    p.wrap(m, SyntaxKind::ForLoop);
911}
912
913/// Parses a module import: `import "utils.typ": a, b, c`.
914fn module_import(p: &mut Parser) {
915    let m = p.marker();
916    p.assert(SyntaxKind::Import);
917    code_expr(p);
918    if p.eat_if(SyntaxKind::As) {
919        // Allow renaming a full module import.
920        // If items are included, both the full module and the items are
921        // imported at the same time.
922        p.expect(SyntaxKind::Ident);
923    }
924
925    if p.eat_if(SyntaxKind::Colon) {
926        if p.at(SyntaxKind::LeftParen) {
927            p.with_nl_mode(AtNewline::Continue, |p| {
928                let m2 = p.marker();
929                p.assert(SyntaxKind::LeftParen);
930
931                import_items(p);
932
933                p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
934            });
935        } else if !p.eat_if(SyntaxKind::Star) {
936            import_items(p);
937        }
938    }
939
940    p.wrap(m, SyntaxKind::ModuleImport);
941}
942
943/// Parses items to import from a module: `a, b, c`.
944fn import_items(p: &mut Parser) {
945    let m = p.marker();
946    while !p.current().is_terminator() {
947        let item_marker = p.marker();
948        if !p.eat_if(SyntaxKind::Ident) {
949            p.unexpected();
950        }
951
952        // Nested import path: `a.b.c`
953        while p.eat_if(SyntaxKind::Dot) {
954            p.expect(SyntaxKind::Ident);
955        }
956
957        p.wrap(item_marker, SyntaxKind::ImportItemPath);
958
959        // Rename imported item.
960        if p.eat_if(SyntaxKind::As) {
961            p.expect(SyntaxKind::Ident);
962            p.wrap(item_marker, SyntaxKind::RenamedImportItem);
963        }
964
965        if !p.current().is_terminator() {
966            p.expect(SyntaxKind::Comma);
967        }
968    }
969
970    p.wrap(m, SyntaxKind::ImportItems);
971}
972
973/// Parses a module include: `include "chapter1.typ"`.
974fn module_include(p: &mut Parser) {
975    let m = p.marker();
976    p.assert(SyntaxKind::Include);
977    code_expr(p);
978    p.wrap(m, SyntaxKind::ModuleInclude);
979}
980
981/// Parses a break from a loop: `break`.
982fn break_stmt(p: &mut Parser) {
983    let m = p.marker();
984    p.assert(SyntaxKind::Break);
985    p.wrap(m, SyntaxKind::LoopBreak);
986}
987
988/// Parses a continue in a loop: `continue`.
989fn continue_stmt(p: &mut Parser) {
990    let m = p.marker();
991    p.assert(SyntaxKind::Continue);
992    p.wrap(m, SyntaxKind::LoopContinue);
993}
994
995/// Parses a return from a function: `return`, `return x + 1`.
996fn return_stmt(p: &mut Parser) {
997    let m = p.marker();
998    p.assert(SyntaxKind::Return);
999    if p.at_set(set::CODE_EXPR) {
1000        code_expr(p);
1001    }
1002    p.wrap(m, SyntaxKind::FuncReturn);
1003}
1004
1005/// An expression that starts with a parenthesis.
1006fn expr_with_paren(p: &mut Parser, atomic: bool) {
1007    if atomic {
1008        // Atomic expressions aren't modified by operators that follow them, so
1009        // our first guess of array/dict will be correct.
1010        parenthesized_or_array_or_dict(p);
1011        return;
1012    }
1013
1014    // If we've seen this position before and have a memoized result, restore it
1015    // and return. Otherwise, get a key to this position and a checkpoint to
1016    // restart from in case we make a wrong prediction.
1017    let Some((memo_key, checkpoint)) = p.restore_memo_or_checkpoint() else { return };
1018    // The node length from when we restored.
1019    let prev_len = checkpoint.node_len;
1020
1021    // When we reach a '(', we can't be sure what it is. First, we attempt to
1022    // parse as a simple parenthesized expression, array, or dictionary as
1023    // these are the most likely things. We can handle all of those in a single
1024    // pass.
1025    let kind = parenthesized_or_array_or_dict(p);
1026
1027    // If, however, '=>' or '=' follows, we must backtrack and reparse as either
1028    // a parameter list or a destructuring. To be able to do that, we created a
1029    // parser checkpoint before our speculative parse, which we can restore.
1030    //
1031    // However, naive backtracking has a fatal flaw: It can lead to exponential
1032    // parsing time if we are constantly getting things wrong in a nested
1033    // scenario. The particular failure case for parameter parsing is the
1034    // following: `(x: (x: (x) => y) => y) => y`
1035    //
1036    // Such a structure will reparse over and over again recursively, leading to
1037    // a running time of O(2^n) for nesting depth n. To prevent this, we perform
1038    // a simple trick: When we have done the mistake of picking the wrong path
1039    // once and have subsequently parsed correctly, we save the result of that
1040    // correct parsing in the `p.memo` map. When we reach the same position
1041    // again, we can then just restore this result. In this way, no
1042    // parenthesized expression is parsed more than twice, leading to a worst
1043    // case running time of O(2n).
1044    if p.at(SyntaxKind::Arrow) {
1045        p.restore(checkpoint);
1046        let m = p.marker();
1047        params(p);
1048        if !p.expect(SyntaxKind::Arrow) {
1049            return;
1050        }
1051        code_expr(p);
1052        p.wrap(m, SyntaxKind::Closure);
1053    } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized {
1054        p.restore(checkpoint);
1055        let m = p.marker();
1056        destructuring_or_parenthesized(p, true, &mut FxHashSet::default());
1057        if !p.expect(SyntaxKind::Eq) {
1058            return;
1059        }
1060        code_expr(p);
1061        p.wrap(m, SyntaxKind::DestructAssignment);
1062    } else {
1063        return;
1064    }
1065
1066    // Memoize result if we backtracked.
1067    p.memoize_parsed_nodes(memo_key, prev_len);
1068}
1069
1070/// Parses either
1071/// - a parenthesized expression: `(1 + 2)`, or
1072/// - an array: `(1, "hi", 12cm)`, or
1073/// - a dictionary: `(thickness: 3pt, dash: "solid")`.
1074fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
1075    let mut state = GroupState {
1076        count: 0,
1077        maybe_just_parens: true,
1078        kind: None,
1079        seen: FxHashSet::default(),
1080    };
1081
1082    // An edge case with parens is whether we can interpret a leading spread
1083    // expression as a dictionary, e.g. if we want `(..dict1, ..dict2)` to join
1084    // the two dicts.
1085    //
1086    // The issue is that we decide on the type of the parenthesized expression
1087    // here in the parser by the `SyntaxKind` we wrap with, instead of in eval
1088    // based on the type of the spread item.
1089    //
1090    // The current fix is that we allow a leading colon to force the
1091    // parenthesized value into a dict:
1092    // - `(..arr1, ..arr2)` is wrapped as an `Array`.
1093    // - `(: ..dict1, ..dict2)` is wrapped as a `Dict`.
1094    //
1095    // This does allow some unexpected expressions, such as `(: key: val)`, but
1096    // it's currently intentional.
1097    let m = p.marker();
1098    p.with_nl_mode(AtNewline::Continue, |p| {
1099        p.assert(SyntaxKind::LeftParen);
1100        if p.eat_if(SyntaxKind::Colon) {
1101            state.kind = Some(SyntaxKind::Dict);
1102        }
1103
1104        while !p.current().is_terminator() {
1105            if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
1106                p.unexpected();
1107                continue;
1108            }
1109
1110            array_or_dict_item(p, &mut state);
1111            state.count += 1;
1112
1113            if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
1114                state.maybe_just_parens = false;
1115            }
1116        }
1117
1118        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
1119    });
1120
1121    let kind = if state.maybe_just_parens && state.count == 1 {
1122        SyntaxKind::Parenthesized
1123    } else {
1124        state.kind.unwrap_or(SyntaxKind::Array)
1125    };
1126
1127    p.wrap(m, kind);
1128    kind
1129}
1130
1131/// State for array/dictionary parsing.
1132struct GroupState {
1133    count: usize,
1134    /// Whether this is just a single expression in parens: `(a)`. Single
1135    /// element arrays require an explicit comma: `(a,)`, unless we're
1136    /// spreading: `(..a)`.
1137    maybe_just_parens: bool,
1138    /// The `SyntaxKind` to wrap as (if we've figured it out yet).
1139    kind: Option<SyntaxKind>,
1140    /// Store named arguments so we can give an error if they're repeated.
1141    seen: FxHashSet<EcoString>,
1142}
1143
1144/// Parses a single item in an array or dictionary.
1145fn array_or_dict_item(p: &mut Parser, state: &mut GroupState) {
1146    let m = p.marker();
1147
1148    if p.eat_if(SyntaxKind::Dots) {
1149        // Parses a spread item: `..item`.
1150        code_expr(p);
1151        p.wrap(m, SyntaxKind::Spread);
1152        state.maybe_just_parens = false;
1153        return;
1154    }
1155
1156    code_expr(p);
1157
1158    if p.eat_if(SyntaxKind::Colon) {
1159        // Parses a named/keyed pair: `name: item` or `"key": item`.
1160        code_expr(p);
1161
1162        let node = &mut p[m];
1163        let pair_kind = match node.kind() {
1164            SyntaxKind::Ident => SyntaxKind::Named,
1165            _ => SyntaxKind::Keyed,
1166        };
1167
1168        if let Some(key) = match node.cast::<ast::Expr>() {
1169            Some(ast::Expr::Ident(ident)) => Some(ident.get().clone()),
1170            Some(ast::Expr::Str(s)) => Some(s.get()),
1171            _ => None,
1172        } && !state.seen.insert(key.clone())
1173        {
1174            node.convert_to_error(eco_format!("duplicate key: {key}"));
1175        }
1176
1177        p.wrap(m, pair_kind);
1178        state.maybe_just_parens = false;
1179
1180        if state.kind == Some(SyntaxKind::Array) {
1181            p[m].expected("expression");
1182        } else {
1183            state.kind = Some(SyntaxKind::Dict);
1184        }
1185    } else {
1186        // Parses a positional item.
1187        if state.kind == Some(SyntaxKind::Dict) {
1188            p[m].expected("named or keyed pair");
1189        } else {
1190            state.kind = Some(SyntaxKind::Array)
1191        }
1192    }
1193}
1194
1195/// Parses a function call's argument list: `(12pt, y)`.
1196fn args(p: &mut Parser) {
1197    if !p.directly_at(SyntaxKind::LeftParen) && !p.directly_at(SyntaxKind::LeftBracket) {
1198        p.expected("argument list");
1199        if p.at(SyntaxKind::LeftParen) || p.at(SyntaxKind::LeftBracket) {
1200            p.hint("there may not be any spaces before the argument list");
1201        }
1202    }
1203
1204    let m = p.marker();
1205    if p.at(SyntaxKind::LeftParen) {
1206        let m2 = p.marker();
1207        p.with_nl_mode(AtNewline::Continue, |p| {
1208            p.assert(SyntaxKind::LeftParen);
1209
1210            let mut seen = FxHashSet::default();
1211            while !p.current().is_terminator() {
1212                if !p.at_set(set::ARG) {
1213                    p.unexpected();
1214                    continue;
1215                }
1216
1217                arg(p, &mut seen);
1218
1219                if !p.current().is_terminator() {
1220                    p.expect(SyntaxKind::Comma);
1221                }
1222            }
1223
1224            p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
1225        });
1226    }
1227
1228    while p.directly_at(SyntaxKind::LeftBracket) {
1229        content_block(p);
1230    }
1231
1232    p.wrap(m, SyntaxKind::Args);
1233}
1234
1235/// Parses a single argument in an argument list.
1236fn arg<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>) {
1237    let m = p.marker();
1238
1239    // Parses a spread argument: `..args`.
1240    if p.eat_if(SyntaxKind::Dots) {
1241        code_expr(p);
1242        p.wrap(m, SyntaxKind::Spread);
1243        return;
1244    }
1245
1246    // Parses a normal positional argument or an argument name.
1247    let was_at_expr = p.at_set(set::CODE_EXPR);
1248    let text = p.current_text();
1249    code_expr(p);
1250
1251    // Parses a named argument: `thickness: 12pt`.
1252    if p.eat_if(SyntaxKind::Colon) {
1253        // Recover from bad argument name.
1254        if was_at_expr {
1255            if p[m].kind() != SyntaxKind::Ident {
1256                p[m].expected("identifier");
1257            } else if !seen.insert(text) {
1258                p[m].convert_to_error(eco_format!("duplicate argument: {text}"));
1259            }
1260        }
1261
1262        code_expr(p);
1263        p.wrap(m, SyntaxKind::Named);
1264    }
1265}
1266
1267/// Parses a closure's parameters: `(x, y)`.
1268fn params(p: &mut Parser) {
1269    let m = p.marker();
1270    p.with_nl_mode(AtNewline::Continue, |p| {
1271        p.assert(SyntaxKind::LeftParen);
1272
1273        let mut seen = FxHashSet::default();
1274        let mut sink = false;
1275
1276        while !p.current().is_terminator() {
1277            if !p.at_set(set::PARAM) {
1278                p.unexpected();
1279                continue;
1280            }
1281
1282            param(p, &mut seen, &mut sink);
1283
1284            if !p.current().is_terminator() {
1285                p.expect(SyntaxKind::Comma);
1286            }
1287        }
1288
1289        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
1290    });
1291    p.wrap(m, SyntaxKind::Params);
1292}
1293
1294/// Parses a single parameter in a parameter list.
1295fn param<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>, sink: &mut bool) {
1296    let m = p.marker();
1297
1298    // Parses argument sink: `..sink`.
1299    if p.eat_if(SyntaxKind::Dots) {
1300        if p.at_set(set::PATTERN_LEAF) {
1301            pattern_leaf(p, false, seen, Some("parameter"));
1302        }
1303        p.wrap(m, SyntaxKind::Spread);
1304        if mem::replace(sink, true) {
1305            p[m].convert_to_error("only one argument sink is allowed");
1306        }
1307        return;
1308    }
1309
1310    // Parses a normal positional parameter or a parameter name.
1311    let was_at_pat = p.at_set(set::PATTERN);
1312    pattern(p, false, seen, Some("parameter"));
1313
1314    // Parses a named parameter: `thickness: 12pt`.
1315    if p.eat_if(SyntaxKind::Colon) {
1316        // Recover from bad parameter name.
1317        if was_at_pat && p[m].kind() != SyntaxKind::Ident {
1318            p[m].expected("identifier");
1319        }
1320
1321        code_expr(p);
1322        p.wrap(m, SyntaxKind::Named);
1323    }
1324}
1325
1326/// Parses a binding or reassignment pattern.
1327fn pattern<'s>(
1328    p: &mut Parser<'s>,
1329    reassignment: bool,
1330    seen: &mut FxHashSet<&'s str>,
1331    dupe: Option<&'s str>,
1332) {
1333    let Some(p) = &mut p.increase_depth() else { return };
1334
1335    match p.current() {
1336        SyntaxKind::Underscore => p.eat(),
1337        SyntaxKind::LeftParen => destructuring_or_parenthesized(p, reassignment, seen),
1338        _ => pattern_leaf(p, reassignment, seen, dupe),
1339    }
1340}
1341
1342/// Parses a destructuring pattern or just a parenthesized pattern.
1343fn destructuring_or_parenthesized<'s>(
1344    p: &mut Parser<'s>,
1345    reassignment: bool,
1346    seen: &mut FxHashSet<&'s str>,
1347) {
1348    let mut sink = false;
1349    let mut count = 0;
1350    let mut maybe_just_parens = true;
1351
1352    let m = p.marker();
1353    p.with_nl_mode(AtNewline::Continue, |p| {
1354        p.assert(SyntaxKind::LeftParen);
1355
1356        while !p.current().is_terminator() {
1357            if !p.at_set(set::DESTRUCTURING_ITEM) {
1358                p.unexpected();
1359                continue;
1360            }
1361
1362            destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink);
1363            count += 1;
1364
1365            if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
1366                maybe_just_parens = false;
1367            }
1368        }
1369
1370        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
1371    });
1372
1373    if maybe_just_parens && count == 1 && !sink {
1374        p.wrap(m, SyntaxKind::Parenthesized);
1375    } else {
1376        p.wrap(m, SyntaxKind::Destructuring);
1377    }
1378}
1379
1380/// Parses an item in a destructuring pattern.
1381fn destructuring_item<'s>(
1382    p: &mut Parser<'s>,
1383    reassignment: bool,
1384    seen: &mut FxHashSet<&'s str>,
1385    maybe_just_parens: &mut bool,
1386    sink: &mut bool,
1387) {
1388    let m = p.marker();
1389
1390    // Parse destructuring sink: `..rest`.
1391    if p.eat_if(SyntaxKind::Dots) {
1392        if p.at_set(set::PATTERN_LEAF) {
1393            pattern_leaf(p, reassignment, seen, None);
1394        }
1395        p.wrap(m, SyntaxKind::Spread);
1396        if mem::replace(sink, true) {
1397            p[m].convert_to_error("only one destructuring sink is allowed");
1398        }
1399        return;
1400    }
1401
1402    // Parse a normal positional pattern or a destructuring key.
1403    let was_at_pat = p.at_set(set::PATTERN);
1404
1405    // We must use a full checkpoint here (can't just clone the lexer) because
1406    // there may be trivia between the identifier and the colon we need to skip.
1407    let checkpoint = p.checkpoint();
1408    if !(p.eat_if(SyntaxKind::Ident) && p.at(SyntaxKind::Colon)) {
1409        p.restore(checkpoint);
1410        pattern(p, reassignment, seen, None);
1411    }
1412
1413    // Parse named destructuring item.
1414    if p.eat_if(SyntaxKind::Colon) {
1415        // Recover from bad named destructuring.
1416        if was_at_pat && p[m].kind() != SyntaxKind::Ident {
1417            p[m].expected("identifier");
1418        }
1419
1420        pattern(p, reassignment, seen, None);
1421        p.wrap(m, SyntaxKind::Named);
1422        *maybe_just_parens = false;
1423    }
1424}
1425
1426/// Parses a leaf in a pattern - either an identifier or an expression
1427/// depending on whether it's a binding or reassignment pattern.
1428fn pattern_leaf<'s>(
1429    p: &mut Parser<'s>,
1430    reassignment: bool,
1431    seen: &mut FxHashSet<&'s str>,
1432    dupe: Option<&'s str>,
1433) {
1434    if p.current().is_keyword() {
1435        p.eat_and_get().expected("pattern");
1436        return;
1437    } else if !p.at_set(set::PATTERN_LEAF) {
1438        p.expected("pattern");
1439        return;
1440    }
1441
1442    let m = p.marker();
1443    let text = p.current_text();
1444
1445    // We parse an atomic expression even though we only want an identifier for
1446    // better error recovery. We can mark the whole expression as unexpected
1447    // instead of going through its pieces one by one.
1448    code_expr_prec(p, true, 0);
1449
1450    if !reassignment {
1451        let node = &mut p[m];
1452        if node.kind() == SyntaxKind::Ident {
1453            if !seen.insert(text) {
1454                node.convert_to_error(eco_format!(
1455                    "duplicate {}: {text}",
1456                    dupe.unwrap_or("binding"),
1457                ));
1458            }
1459        } else {
1460            node.expected("pattern");
1461        }
1462    }
1463}
1464
1465/// Manages parsing a stream of tokens into a tree of [`SyntaxNode`]s.
1466///
1467/// The implementation presents an interface that investigates a current `token`
1468/// with a [`SyntaxKind`] and can take one of the following actions:
1469///
1470/// 1. Eat a token: push `token` onto the `nodes` vector as a [leaf
1471///    node](`SyntaxNode::leaf`) and prepare a new `token` by calling into the
1472///    lexer.
1473/// 2. Wrap nodes from a marker to the end of `nodes` (excluding `token` and any
1474///    attached trivia) into an [inner node](`SyntaxNode::inner`) of a specific
1475///    `SyntaxKind`.
1476/// 3. Produce or convert nodes into an [error node](`SyntaxNode::error`) when
1477///    something expected is missing or something unexpected is found.
1478///
1479/// Overall the parser produces a nested tree of SyntaxNodes as a "_Concrete_
1480/// Syntax Tree." The raw Concrete Syntax Tree should contain the entire source
1481/// text, and is used as-is for e.g. syntax highlighting and IDE features. In
1482/// `ast.rs` the CST is interpreted as a lazy view over an "_Abstract_ Syntax
1483/// Tree." The AST module skips over irrelevant tokens -- whitespace, comments,
1484/// code parens, commas in function args, etc. -- as it iterates through the
1485/// tree.
1486///
1487/// ### Modes
1488///
1489/// The parser manages the transitions between the three modes of Typst through
1490/// [syntax modes](`SyntaxMode`) and [newline modes](`AtNewline`).
1491///
1492/// The syntax modes map to the three Typst modes and are stored in the lexer,
1493/// changing which `SyntaxKind`s it will generate.
1494///
1495/// The newline mode is used to determine whether a newline should end the
1496/// current expression. If so, the parser temporarily changes `token`'s kind to
1497/// a fake [`SyntaxKind::End`]. When the parser exits the mode the original
1498/// `SyntaxKind` is restored.
1499struct Parser<'s> {
1500    /// The source text shared with the lexer.
1501    text: &'s str,
1502    /// A lexer over the source text with multiple modes. Defines the boundaries
1503    /// of tokens and determines their [`SyntaxKind`]. Contains the [`SyntaxMode`]
1504    /// defining our current Typst mode.
1505    lexer: Lexer<'s>,
1506    /// The newline mode: whether to insert a temporary end at newlines.
1507    nl_mode: AtNewline,
1508    /// The current token under inspection, not yet present in `nodes`. This
1509    /// acts like a single item of lookahead for the parser.
1510    ///
1511    /// When wrapping, this is _not_ included in the wrapped nodes.
1512    token: Token,
1513    /// Whether the parser has the expected set of open/close delimiters. This
1514    /// only ever transitions from `true` to `false`.
1515    balanced: bool,
1516    /// Nodes representing the concrete syntax tree of previously parsed text.
1517    /// In Code and Math, includes previously parsed trivia, but not `token`.
1518    nodes: Vec<SyntaxNode>,
1519    /// Parser checkpoints for a given text index. Used for efficient parser
1520    /// backtracking similar to packrat parsing. See comments above in
1521    /// [`expr_with_paren`].
1522    memo: MemoArena,
1523    /// The current expression nesting depth.
1524    depth: u32,
1525}
1526
1527/// A single token returned from the lexer with a cached [`SyntaxKind`] and a
1528/// record of preceding trivia.
1529#[derive(Debug, Clone)]
1530struct Token {
1531    /// The [`SyntaxKind`] of the current token.
1532    kind: SyntaxKind,
1533    /// The [`SyntaxNode`] of the current token, ready to be eaten and pushed
1534    /// onto the end of `nodes`.
1535    node: SyntaxNode,
1536    /// The number of preceding trivia before this token.
1537    n_trivia: usize,
1538    /// Whether this token's preceding trivia contained a newline.
1539    newline: Option<Newline>,
1540    /// The index into `text` of the start of our current token (the end is
1541    /// stored as the lexer's cursor).
1542    start: usize,
1543    /// The index into `text` of the end of the previous token.
1544    prev_end: usize,
1545}
1546
1547/// Information about newlines in a group of trivia.
1548#[derive(Debug, Copy, Clone)]
1549struct Newline {
1550    /// The column of the start of the next token in its line.
1551    column: Option<usize>,
1552    /// Whether any of our newlines were paragraph breaks.
1553    parbreak: bool,
1554}
1555
1556/// How to proceed with parsing when at a newline.
1557#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1558enum AtNewline {
1559    /// Continue at newlines.
1560    Continue,
1561    /// Stop at any newline.
1562    Stop,
1563    /// Continue only if there is a continuation with `else` or `.` (Code only).
1564    ContextualContinue,
1565    /// Stop only at a parbreak, not normal newlines (Markup only).
1566    StopParBreak,
1567    /// Require that the token's column be greater or equal to a column (Markup
1568    /// only). If this is `0`, acts like `Continue`; if this is `usize::MAX`,
1569    /// acts like `Stop`.
1570    RequireColumn(usize),
1571}
1572
1573impl AtNewline {
1574    /// Whether to stop at a newline or continue based on the current context.
1575    fn stop_at(self, Newline { column, parbreak }: Newline, kind: SyntaxKind) -> bool {
1576        #[allow(clippy::match_like_matches_macro)]
1577        match self {
1578            AtNewline::Continue => false,
1579            AtNewline::Stop => true,
1580            AtNewline::ContextualContinue => match kind {
1581                SyntaxKind::Else | SyntaxKind::Dot => false,
1582                _ => true,
1583            },
1584            AtNewline::StopParBreak => parbreak,
1585            AtNewline::RequireColumn(min_col) => {
1586                // When the column is `None`, the newline doesn't start a
1587                // column, and we continue parsing. This may happen on the
1588                // boundary of syntax modes, since we only report a column in
1589                // Markup.
1590                column.is_some_and(|column| column <= min_col)
1591            }
1592        }
1593    }
1594}
1595
1596/// A marker representing a node's position in the parser. Mainly used for
1597/// wrapping, but can also index into the parser to access the node, like
1598/// `p[m]`.
1599#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1600struct Marker(usize);
1601
1602// Index into the parser with markers.
1603impl Index<Marker> for Parser<'_> {
1604    type Output = SyntaxNode;
1605
1606    fn index(&self, m: Marker) -> &Self::Output {
1607        &self.nodes[m.0]
1608    }
1609}
1610
1611impl IndexMut<Marker> for Parser<'_> {
1612    fn index_mut(&mut self, m: Marker) -> &mut Self::Output {
1613        &mut self.nodes[m.0]
1614    }
1615}
1616
1617/// Creating/Consuming the parser and getting info about the current token.
1618impl<'s> Parser<'s> {
1619    /// Create a new parser starting from the given text offset and syntax mode.
1620    fn new(text: &'s str, offset: usize, mode: SyntaxMode) -> Self {
1621        let mut lexer = Lexer::new(text, mode);
1622        lexer.jump(offset);
1623        let nl_mode = AtNewline::Continue;
1624        let mut nodes = vec![];
1625        let token = Self::lex(&mut nodes, &mut lexer, nl_mode);
1626        Self {
1627            text,
1628            lexer,
1629            nl_mode,
1630            token,
1631            balanced: true,
1632            nodes,
1633            memo: Default::default(),
1634            depth: 0,
1635        }
1636    }
1637
1638    /// Consume the parser, yielding the full vector of parsed SyntaxNodes.
1639    fn finish(self) -> Vec<SyntaxNode> {
1640        self.nodes
1641    }
1642
1643    /// Consume the parser, generating a single top-level node.
1644    fn finish_into(self, kind: SyntaxKind) -> SyntaxNode {
1645        assert!(self.at(SyntaxKind::End));
1646        SyntaxNode::inner(kind, self.finish())
1647    }
1648
1649    /// Similar to a `peek()` function: returns the `kind` of the next token to
1650    /// be eaten.
1651    fn current(&self) -> SyntaxKind {
1652        self.token.kind
1653    }
1654
1655    /// Whether the current token is a given [`SyntaxKind`].
1656    fn at(&self, kind: SyntaxKind) -> bool {
1657        self.token.kind == kind
1658    }
1659
1660    /// Whether the current token is contained in a [`SyntaxSet`].
1661    fn at_set(&self, set: SyntaxSet) -> bool {
1662        set.contains(self.token.kind)
1663    }
1664
1665    /// Whether we're at the end of the token stream.
1666    ///
1667    /// Note: This might be a fake end due to the newline mode.
1668    fn end(&self) -> bool {
1669        self.at(SyntaxKind::End)
1670    }
1671
1672    /// If we're at the given `kind` with no preceding trivia tokens.
1673    fn directly_at(&self, kind: SyntaxKind) -> bool {
1674        self.token.kind == kind && !self.had_trivia()
1675    }
1676
1677    /// Whether `token` had any preceding trivia.
1678    fn had_trivia(&self) -> bool {
1679        self.token.n_trivia > 0
1680    }
1681
1682    /// Whether `token` had a newline among any of its preceding trivia.
1683    fn had_newline(&self) -> bool {
1684        self.token.newline.is_some()
1685    }
1686
1687    /// The number of characters until the most recent newline from the start of
1688    /// the current token. Uses a cached value from the newline mode if present.
1689    fn current_column(&self) -> usize {
1690        self.token
1691            .newline
1692            .and_then(|newline| newline.column)
1693            .unwrap_or_else(|| self.lexer.column(self.token.start))
1694    }
1695
1696    /// The current token's text.
1697    fn current_text(&self) -> &'s str {
1698        &self.text[self.token.start..self.current_end()]
1699    }
1700
1701    /// The offset into `text` of the current token's start.
1702    fn current_start(&self) -> usize {
1703        self.token.start
1704    }
1705
1706    /// The offset into `text` of the current token's end.
1707    fn current_end(&self) -> usize {
1708        self.lexer.cursor()
1709    }
1710
1711    /// The offset into `text` of the previous token's end.
1712    fn prev_end(&self) -> usize {
1713        self.token.prev_end
1714    }
1715}
1716
1717// The main parsing interface for generating tokens and eating/modifying nodes.
1718impl<'s> Parser<'s> {
1719    /// A marker that will point to the current token in the parser once it's
1720    /// been eaten.
1721    fn marker(&self) -> Marker {
1722        Marker(self.nodes.len())
1723    }
1724
1725    /// A marker that will point to first trivia before this token in the
1726    /// parser (or the token itself if no trivia precede it).
1727    fn before_trivia(&self) -> Marker {
1728        Marker(self.nodes.len() - self.token.n_trivia)
1729    }
1730
1731    /// Eat the current node and return a reference for in-place mutation.
1732    #[track_caller]
1733    fn eat_and_get(&mut self) -> &mut SyntaxNode {
1734        let offset = self.nodes.len();
1735        self.eat();
1736        &mut self.nodes[offset]
1737    }
1738
1739    /// Eat the token if at `kind`. Returns `true` if eaten.
1740    ///
1741    /// Note: In Math and Code, this will ignore trivia in front of the
1742    /// `kind`, To forbid skipping trivia, consider using `eat_if_direct`.
1743    fn eat_if(&mut self, kind: SyntaxKind) -> bool {
1744        let at = self.at(kind);
1745        if at {
1746            self.eat();
1747        }
1748        at
1749    }
1750
1751    /// Assert that we are at the given [`SyntaxKind`] and eat it. This should
1752    /// be used when moving between functions that expect to start with a
1753    /// specific token.
1754    #[track_caller]
1755    fn assert(&mut self, kind: SyntaxKind) {
1756        assert_eq!(self.token.kind, kind);
1757        self.eat();
1758    }
1759
1760    /// Convert the current token's [`SyntaxKind`] and eat it.
1761    fn convert_and_eat(&mut self, kind: SyntaxKind) {
1762        // Only need to replace the node here.
1763        self.token.node.convert_to_kind(kind);
1764        self.eat();
1765    }
1766
1767    /// Eat the current token by saving it to the `nodes` vector, then move
1768    /// the lexer forward to prepare a new token.
1769    fn eat(&mut self) {
1770        self.nodes.push(std::mem::take(&mut self.token.node));
1771        self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
1772    }
1773
1774    /// Detach the parsed trivia nodes from this token (but not newline info) so
1775    /// that subsequent wrapping will include the trivia.
1776    fn flush_trivia(&mut self) {
1777        self.token.n_trivia = 0;
1778        self.token.prev_end = self.token.start;
1779    }
1780
1781    /// Wrap the nodes from a marker up to (but excluding) the current token in
1782    /// a new [inner node](`SyntaxNode::inner`) of the given kind. This is an
1783    /// easy interface for creating nested syntax nodes _after_ having parsed
1784    /// their children.
1785    fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
1786        let to = self.before_trivia().0;
1787        let from = from.0.min(to);
1788        let children = self.nodes.drain(from..to).collect();
1789        self.nodes.insert(from, SyntaxNode::inner(kind, children));
1790    }
1791
1792    /// Wrap the nodes from a marker up to (but excluding) the current token in
1793    /// a new [error node](`SyntaxNode::error`) with the given message. This is
1794    /// an easy interface for creating a syntax error _after_ having parsed its
1795    /// children.
1796    fn wrap_error(&mut self, from: Marker, message: impl Into<EcoString>) {
1797        let to = self.before_trivia().0;
1798        let from = from.0.min(to);
1799        let len: usize = self.nodes.drain(from..to).map(|node| node.len()).sum();
1800        let end = self.token.prev_end;
1801        let start = end - len;
1802        let text = &self.text[start..end];
1803        self.nodes.insert(from, SyntaxNode::error(message.into(), text));
1804    }
1805
1806    /// Parse within the [`SyntaxMode`] for subsequent tokens (does not change the
1807    /// current token). This may re-lex the final token on exit.
1808    ///
1809    /// This function effectively repurposes the call stack as a stack of modes.
1810    fn enter_modes(
1811        &mut self,
1812        mode: SyntaxMode,
1813        stop: AtNewline,
1814        func: impl FnOnce(&mut Parser<'s>),
1815    ) {
1816        let previous = self.lexer.mode();
1817        self.lexer.set_mode(mode);
1818        self.with_nl_mode(stop, func);
1819        if mode != previous {
1820            self.lexer.set_mode(previous);
1821            self.lexer.jump(self.token.prev_end);
1822            self.nodes.truncate(self.nodes.len() - self.token.n_trivia);
1823            self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
1824        }
1825    }
1826
1827    /// Parse within the [`AtNewline`] mode for subsequent tokens (does not
1828    /// change the current token). This may re-lex the final token on exit.
1829    ///
1830    /// This function effectively repurposes the call stack as a stack of modes.
1831    fn with_nl_mode(&mut self, mode: AtNewline, func: impl FnOnce(&mut Parser<'s>)) {
1832        let previous = self.nl_mode;
1833        self.nl_mode = mode;
1834        func(self);
1835        self.nl_mode = previous;
1836        if let Some(newline) = self.token.newline
1837            && mode != previous
1838        {
1839            // Restore our actual token's kind or insert a fake end.
1840            let actual_kind = self.token.node.kind();
1841            if self.nl_mode.stop_at(newline, actual_kind) {
1842                self.token.kind = SyntaxKind::End;
1843            } else {
1844                self.token.kind = actual_kind;
1845            }
1846        }
1847    }
1848
1849    /// Move the lexer forward and prepare the current token. In Code, this
1850    /// might insert a temporary [`SyntaxKind::End`] based on our newline mode.
1851    ///
1852    /// This is not a method on `self` because we need a valid token before we
1853    /// can initialize the parser.
1854    fn lex(nodes: &mut Vec<SyntaxNode>, lexer: &mut Lexer, nl_mode: AtNewline) -> Token {
1855        let prev_end = lexer.cursor();
1856        let mut start = prev_end;
1857        let (mut kind, mut node) = lexer.next();
1858        let mut n_trivia = 0;
1859        let mut had_newline = false;
1860        let mut parbreak = false;
1861
1862        while kind.is_trivia() {
1863            had_newline |= lexer.newline(); // Newlines are always trivia.
1864            parbreak |= kind == SyntaxKind::Parbreak;
1865            n_trivia += 1;
1866            nodes.push(node);
1867            start = lexer.cursor();
1868            (kind, node) = lexer.next();
1869        }
1870
1871        let newline = if had_newline {
1872            let column =
1873                (lexer.mode() == SyntaxMode::Markup).then(|| lexer.column(start));
1874            let newline = Newline { column, parbreak };
1875            if nl_mode.stop_at(newline, kind) {
1876                // Insert a temporary `SyntaxKind::End` to halt the parser.
1877                // The actual kind will be restored from `node` later.
1878                kind = SyntaxKind::End;
1879            }
1880            Some(newline)
1881        } else {
1882            None
1883        };
1884
1885        Token { kind, node, n_trivia, newline, start, prev_end }
1886    }
1887}
1888
1889/// Extra parser state for efficiently recovering from mispredicted parses.
1890///
1891/// This is the same idea as packrat parsing, but we use it only in the limited
1892/// case of parenthesized structures. See [`expr_with_paren`] for more.
1893#[derive(Default)]
1894struct MemoArena {
1895    /// A single arena of previously parsed nodes (to reduce allocations).
1896    /// Memoized ranges refer to unique sections of the arena.
1897    arena: Vec<SyntaxNode>,
1898    /// A map from the parser's current position to a range of previously parsed
1899    /// nodes in the arena and a checkpoint of the parser's state. These allow
1900    /// us to reset the parser to avoid parsing the same location again.
1901    memo_map: FxHashMap<MemoKey, (Range<usize>, PartialState)>,
1902}
1903
1904/// A type alias for the memo key so it doesn't get confused with other usizes.
1905///
1906/// The memo is keyed by the index into `text` of the current token's start.
1907type MemoKey = usize;
1908
1909/// A checkpoint of the parser which can fully restore it to a previous state.
1910struct Checkpoint {
1911    node_len: usize,
1912    state: PartialState,
1913}
1914
1915/// State needed to restore the parser's current token and the lexer (but not
1916/// the nodes vector).
1917#[derive(Clone)]
1918struct PartialState {
1919    cursor: usize,
1920    lex_mode: SyntaxMode,
1921    token: Token,
1922}
1923
1924/// The Memoization interface.
1925impl Parser<'_> {
1926    /// Store the already parsed nodes and the parser state into the memo map by
1927    /// extending the arena and storing the extended range and a checkpoint.
1928    fn memoize_parsed_nodes(&mut self, key: MemoKey, prev_len: usize) {
1929        let Checkpoint { state, node_len } = self.checkpoint();
1930        let memo_start = self.memo.arena.len();
1931        self.memo.arena.extend_from_slice(&self.nodes[prev_len..node_len]);
1932        let arena_range = memo_start..self.memo.arena.len();
1933        self.memo.memo_map.insert(key, (arena_range, state));
1934    }
1935
1936    /// Try to load a memoized result, return `None` if we did or `Some` (with a
1937    /// checkpoint and a key for the memo map) if we didn't.
1938    fn restore_memo_or_checkpoint(&mut self) -> Option<(MemoKey, Checkpoint)> {
1939        // We use the starting index of the current token as our key.
1940        let key: MemoKey = self.current_start();
1941        match self.memo.memo_map.get(&key).cloned() {
1942            Some((range, state)) => {
1943                self.nodes.extend_from_slice(&self.memo.arena[range]);
1944                // It's important that we don't truncate the nodes vector since
1945                // it may have grown or shrunk (due to other memoization or
1946                // error reporting) since we made this checkpoint.
1947                self.restore_partial(state);
1948                None
1949            }
1950            None => Some((key, self.checkpoint())),
1951        }
1952    }
1953
1954    /// Restore the parser to the state at a checkpoint.
1955    fn restore(&mut self, checkpoint: Checkpoint) {
1956        self.nodes.truncate(checkpoint.node_len);
1957        self.restore_partial(checkpoint.state);
1958    }
1959
1960    /// Restore parts of the checkpoint excluding the nodes vector.
1961    fn restore_partial(&mut self, state: PartialState) {
1962        self.lexer.jump(state.cursor);
1963        self.lexer.set_mode(state.lex_mode);
1964        self.token = state.token;
1965    }
1966
1967    /// Save a checkpoint of the parser state.
1968    fn checkpoint(&self) -> Checkpoint {
1969        let node_len = self.nodes.len();
1970        let state = PartialState {
1971            cursor: self.lexer.cursor(),
1972            lex_mode: self.lexer.mode(),
1973            token: self.token.clone(),
1974        };
1975        Checkpoint { node_len, state }
1976    }
1977}
1978
1979/// Functions for eating expected or unexpected tokens and generating errors if
1980/// we don't get what we expect.
1981impl Parser<'_> {
1982    /// Consume the given `kind` or produce an error.
1983    fn expect(&mut self, kind: SyntaxKind) -> bool {
1984        let at = self.at(kind);
1985        if at {
1986            self.eat();
1987        } else if kind == SyntaxKind::Ident && self.token.kind.is_keyword() {
1988            self.trim_errors();
1989            self.eat_and_get().expected(kind.name());
1990        } else {
1991            self.balanced &= !kind.is_grouping();
1992            self.expected(kind.name());
1993        }
1994        at
1995    }
1996
1997    /// Consume the given closing delimiter or produce an error for the matching
1998    /// opening delimiter at `open`.
1999    #[track_caller]
2000    fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) -> bool {
2001        let at = self.eat_if(kind);
2002        if !at {
2003            self.nodes[open.0].convert_to_error("unclosed delimiter");
2004        }
2005        at
2006    }
2007
2008    /// Produce an error that the given `thing` was expected. If the parser is
2009    /// at an erroneous token, this will instead eat that token and continue.
2010    fn expected(&mut self, thing: &str) {
2011        if self.token.kind.is_error() {
2012            // If we encounter an erroneous token when something was expected,
2013            // we need to actually consume the token. If we don't, and we then
2014            // proceed to exit our current lexing mode, future incremental
2015            // reparsing could fail to lex the token in the correct mode.
2016            //
2017            // Example: When parsing an unclosed string in `#import "str`,
2018            // we need to make sure the string is lexed as code, because if
2019            // it were lexed as markup, a future insert of a closing quote
2020            // would only be adjacent to markup text, and wouldn't lex as a
2021            // string when reparsing, causing a difference between the full
2022            // parse and the incremental parse.
2023            self.trim_errors();
2024            self.eat();
2025        } else if !self.after_error() {
2026            self.expected_at(self.before_trivia(), thing);
2027        }
2028    }
2029
2030    /// Whether the last non-trivia node is an error.
2031    fn after_error(&mut self) -> bool {
2032        let m = self.before_trivia();
2033        m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
2034    }
2035
2036    /// Produce an error that the given `thing` was expected at the position
2037    /// of the marker `m`.
2038    fn expected_at(&mut self, m: Marker, thing: &str) {
2039        let error = SyntaxNode::error(eco_format!("expected {thing}"), "");
2040        self.nodes.insert(m.0, error);
2041    }
2042
2043    /// Add a hint to a trailing error.
2044    fn hint(&mut self, hint: &str) {
2045        let m = self.before_trivia();
2046        if let Some(error) = self.nodes.get_mut(m.0 - 1) {
2047            error.hint(hint);
2048        }
2049    }
2050
2051    /// Consume the next token (if any) and produce an error stating that it was
2052    /// unexpected.
2053    fn unexpected(&mut self) {
2054        self.trim_errors();
2055        self.balanced &= !self.token.kind.is_grouping();
2056        self.eat_and_get().unexpected();
2057    }
2058
2059    /// Remove trailing errors with zero length.
2060    fn trim_errors(&mut self) {
2061        let Marker(end) = self.before_trivia();
2062        let mut start = end;
2063        while start > 0
2064            && self.nodes[start - 1].kind().is_error()
2065            && self.nodes[start - 1].is_empty()
2066        {
2067            start -= 1;
2068        }
2069        self.nodes.drain(start..end);
2070    }
2071
2072    /// Check if the maximum depth has been exceeded. If so, generate an error
2073    /// and try to make a best effort recovery using the `stop_set` as a guide.
2074    ///
2075    /// This function isn't strictly necessary, but it is an optimization to
2076    /// generate one combined error instead of an error message for every
2077    /// balanced set of tokens. In the pathological case an error would be
2078    /// generated by [`Self::increase_depth`] for every single token until
2079    /// the `stop_set` of the parent function is reached.
2080    fn check_depth_until(&mut self, stop_set: SyntaxSet) -> Option<&mut Self> {
2081        if self.depth < MAX_DEPTH {
2082            Some(self)
2083        } else {
2084            self.depth_check_error(Some(stop_set));
2085            None
2086        }
2087    }
2088
2089    /// Check if the maximum depth has been exceeded. If so, generate an error
2090    /// and try to make a best effort recovery. Otherwise increase the depth and
2091    /// return a handle that automatically decreases it again when dropped.
2092    fn increase_depth(&mut self) -> Option<impl DerefMut<Target = Self>> {
2093        if self.depth < MAX_DEPTH {
2094            self.depth += 1;
2095            Some(defer(self, |p| p.depth -= 1))
2096        } else {
2097            self.depth_check_error(None);
2098            None
2099        }
2100    }
2101
2102    /// Generate an error for an exceeded maximum depth check.
2103    fn depth_check_error(&mut self, stop_set: Option<SyntaxSet>) {
2104        let m = self.marker();
2105
2106        let mut balance: usize = 0;
2107        // This function has to guarantee some sort of forward progress,
2108        // otherwise the parser might loop indefinitely. One token is eaten in
2109        // all cases, if that token is an opening delimiter, try to balance the
2110        // opening and closing grouping delimiters before continuing.
2111        self.with_nl_mode(AtNewline::Continue, |p| {
2112            loop {
2113                if p.at_set(syntax_set!(LeftBracket, LeftBrace, LeftParen)) {
2114                    balance = balance.saturating_add(1);
2115                } else if p.at_set(syntax_set!(RightBracket, RightBrace, RightParen)) {
2116                    balance = balance.saturating_sub(1);
2117                }
2118                p.eat();
2119
2120                let at_stop = stop_set.is_none_or(|s| p.at_set(s));
2121                if (balance == 0 && at_stop) || p.end() {
2122                    break;
2123                }
2124            }
2125        });
2126
2127        self.wrap_error(m, "maximum parsing depth exceeded");
2128    }
2129}
typst_syntax/parser.rs

typst_syntax/
parser.rs