Skip to main content

panache_parser/
parser.rs

1//! Parser module for Pandoc/Quarto documents.
2//!
3//! This module implements a single-pass parser that constructs a lossless syntax tree (CST) for
4//! Quarto documents.
5
6use crate::options::ParserOptions;
7use crate::range_utils::find_incremental_restart_offset;
8use crate::syntax::{SyntaxKind, SyntaxNode};
9use rowan::{GreenNode, GreenToken, NodeOrToken};
10
11pub mod blocks;
12pub mod inlines;
13pub mod utils;
14pub mod yaml;
15
16mod block_dispatcher;
17mod core;
18
19// Re-export main parser
20pub use core::Parser;
21
22/// Parses a Quarto document string into a syntax tree.
23///
24/// Single-pass architecture: blocks emit inline structure during parsing.
25///
26/// # Examples
27///
28/// ```rust
29/// use panache_parser::parser::parse;
30///
31/// let input = "# Heading\n\nParagraph text.";
32/// let tree = parse(input, None);
33/// println!("{:#?}", tree);
34/// ```
35///
36/// # Arguments
37///
38/// * `input` - The Quarto document content to parse
39/// * `config` - Optional configuration. If None, uses default config.
40pub fn parse(input: &str, config: Option<ParserOptions>) -> SyntaxNode {
41    let mut config = config.unwrap_or_default();
42    populate_refdef_labels(input, &mut config);
43    Parser::new(input, &config).parse()
44}
45
46/// Pre-compute the document-level reference link label set.
47///
48/// CommonMark ยง6.3 makes reference link resolution depend on whether
49/// the label matches a definition that may appear anywhere in the
50/// document (including after the use site). The IR-based bracket
51/// resolution pass in `inlines::inline_ir` consults this set to
52/// distinguish a real shortcut/reference link from bracket-shaped
53/// literal text.
54///
55/// Pandoc-markdown agrees on the document-scoped lookup rule: a
56/// `[foo][bar]` shape with no `[bar]: ...` definition is literal text.
57/// Both dialects populate this set so the dispatcher's reference-link
58/// branch (under Pandoc) and the IR's `process_brackets` pass (under
59/// CommonMark) can consult it uniformly.
60///
61/// Only populated when the caller hasn't already supplied one.
62fn populate_refdef_labels(input: &str, config: &mut ParserOptions) {
63    if config.refdef_labels.is_some() {
64        return;
65    }
66    config.refdef_labels = Some(self::inlines::refdef_map::collect_refdef_labels(
67        input,
68        config.dialect,
69    ));
70}
71
72pub struct IncrementalParseResult {
73    pub tree: SyntaxNode,
74    pub reparse_range: (usize, usize),
75    pub strategy: &'static str,
76}
77
78/// Incrementally update a syntax tree by reparsing either a bounded section
79/// window (between top-level headings) or from a safe restart boundary to EOF.
80pub fn parse_incremental_suffix(
81    input: &str,
82    config: Option<ParserOptions>,
83    old_tree: &SyntaxNode,
84    old_edit_range: (usize, usize),
85    new_edit_range: (usize, usize),
86) -> IncrementalParseResult {
87    let mut config = config.unwrap_or_default();
88    populate_refdef_labels(input, &mut config);
89    let input_len = input.len();
90
91    let Some(old_edit) = normalize_range(old_edit_range) else {
92        return full_reparse_result(input, &config);
93    };
94    let Some(new_edit) = normalize_range(new_edit_range) else {
95        return full_reparse_result(input, &config);
96    };
97    if new_edit.1 > input_len {
98        return full_reparse_result(input, &config);
99    }
100
101    if old_tree.kind() != SyntaxKind::DOCUMENT {
102        return full_reparse_result(input, &config);
103    }
104
105    if let Some(section_window) =
106        find_top_level_heading_section_window(old_tree, old_edit, new_edit, input_len)
107        && let Some(result) = reparse_section_window(input, &config, old_tree, section_window)
108    {
109        return result;
110    }
111
112    let restart = find_incremental_restart_offset(old_tree, old_edit.0, old_edit.1);
113    let old_restart = align_to_document_child_start(old_tree, restart);
114
115    if (old_edit.0..old_edit.1).contains(&old_restart) {
116        return full_reparse_result(input, &config);
117    }
118
119    let new_restart = map_old_offset_to_new(old_restart, old_edit, new_edit, input_len);
120    if !input.is_char_boundary(new_restart) {
121        return full_reparse_result(input, &config);
122    }
123
124    let suffix_text = &input[new_restart..];
125    let suffix_tree = Parser::new(suffix_text, &config).parse();
126
127    let mut children: Vec<NodeOrToken<GreenNode, GreenToken>> = old_tree
128        .children_with_tokens()
129        .filter_map(|element| {
130            let range = element.text_range();
131            let end: usize = range.end().into();
132            if end <= old_restart {
133                Some(element_to_green(element))
134            } else {
135                None
136            }
137        })
138        .collect();
139    children.extend(suffix_tree.children_with_tokens().map(element_to_green));
140
141    let tree = SyntaxNode::new_root(GreenNode::new(SyntaxKind::DOCUMENT.into(), children));
142    let len: usize = tree.text_range().end().into();
143
144    IncrementalParseResult {
145        tree,
146        reparse_range: (new_restart, len),
147        strategy: "suffix_window",
148    }
149}
150
151fn normalize_range(range: (usize, usize)) -> Option<(usize, usize)> {
152    (range.0 <= range.1).then_some(range)
153}
154
155fn full_reparse_result(input: &str, config: &ParserOptions) -> IncrementalParseResult {
156    let tree = Parser::new(input, config).parse();
157    let len: usize = tree.text_range().end().into();
158    IncrementalParseResult {
159        tree,
160        reparse_range: (0, len),
161        strategy: "full_reparse",
162    }
163}
164
165fn align_to_document_child_start(tree: &SyntaxNode, offset: usize) -> usize {
166    for child in tree.children_with_tokens() {
167        let range = child.text_range();
168        let start: usize = range.start().into();
169        let end: usize = range.end().into();
170        if offset <= start {
171            return start;
172        }
173        if offset < end {
174            return start;
175        }
176    }
177    let len: usize = tree.text_range().end().into();
178    len
179}
180
181fn map_old_offset_to_new(
182    old_offset: usize,
183    old_edit: (usize, usize),
184    new_edit: (usize, usize),
185    new_len: usize,
186) -> usize {
187    if old_offset <= old_edit.0 {
188        return old_offset;
189    }
190    if old_offset >= old_edit.1 {
191        let old_span = old_edit.1 - old_edit.0;
192        let new_span = new_edit.1 - new_edit.0;
193        let delta = new_span as isize - old_span as isize;
194        return old_offset.saturating_add_signed(delta).min(new_len);
195    }
196    new_edit.1.min(new_len)
197}
198
199fn element_to_green(element: crate::syntax::SyntaxElement) -> NodeOrToken<GreenNode, GreenToken> {
200    match element {
201        NodeOrToken::Node(node) => NodeOrToken::Node(node.green().into_owned()),
202        NodeOrToken::Token(token) => NodeOrToken::Token(token.green().to_owned()),
203    }
204}
205
206#[derive(Debug, Clone, Copy)]
207struct SectionWindow {
208    old_start: usize,
209    old_end: usize,
210    new_start: usize,
211    new_end: usize,
212}
213
214fn find_top_level_heading_section_window(
215    old_tree: &SyntaxNode,
216    old_edit: (usize, usize),
217    new_edit: (usize, usize),
218    new_len: usize,
219) -> Option<SectionWindow> {
220    let old_len: usize = old_tree.text_range().end().into();
221    let mut previous_heading: Option<(usize, usize)> = None;
222    let mut next_heading: Option<(usize, usize)> = None;
223
224    for child in old_tree.children() {
225        if child.kind() != SyntaxKind::HEADING {
226            continue;
227        }
228
229        let range = child.text_range();
230        let start: usize = range.start().into();
231        let end: usize = range.end().into();
232
233        if start <= old_edit.0 {
234            previous_heading = Some((start, end));
235        } else {
236            next_heading = Some((start, end));
237            break;
238        }
239    }
240
241    let (previous_start, previous_end) = previous_heading?;
242    let (next_start, next_end) = next_heading.unwrap_or((old_len, old_len));
243
244    if ranges_intersect(old_edit, (previous_start, previous_end))
245        || ranges_intersect(old_edit, (next_start, next_end))
246    {
247        return None;
248    }
249
250    // Be conservative and only use the section window for edits that are
251    // strictly inside the section body (not touching heading boundaries).
252    if old_edit.0 <= previous_end || old_edit.1 >= next_start {
253        return None;
254    }
255
256    let new_start = map_old_offset_to_new(previous_start, old_edit, new_edit, new_len);
257    let new_end = map_old_offset_to_new(next_start, old_edit, new_edit, new_len);
258    if new_start >= new_end || new_end > new_len {
259        return None;
260    }
261
262    Some(SectionWindow {
263        old_start: previous_start,
264        old_end: next_start,
265        new_start,
266        new_end,
267    })
268}
269
270fn ranges_intersect(a: (usize, usize), b: (usize, usize)) -> bool {
271    a.0 < b.1 && b.0 < a.1
272}
273
274fn reparse_section_window(
275    input: &str,
276    config: &ParserOptions,
277    old_tree: &SyntaxNode,
278    section_window: SectionWindow,
279) -> Option<IncrementalParseResult> {
280    if !input.is_char_boundary(section_window.new_start)
281        || !input.is_char_boundary(section_window.new_end)
282    {
283        return None;
284    }
285
286    let reparsed_window = Parser::new(
287        &input[section_window.new_start..section_window.new_end],
288        config,
289    )
290    .parse();
291
292    let mut children: Vec<NodeOrToken<GreenNode, GreenToken>> = Vec::new();
293    let mut inserted_window = false;
294
295    for element in old_tree.children_with_tokens() {
296        let range = element.text_range();
297        let start: usize = range.start().into();
298        let end: usize = range.end().into();
299
300        if end <= section_window.old_start {
301            children.push(element_to_green(element));
302            continue;
303        }
304
305        if start >= section_window.old_end {
306            if !inserted_window {
307                children.extend(reparsed_window.children_with_tokens().map(element_to_green));
308                inserted_window = true;
309            }
310            children.push(element_to_green(element));
311            continue;
312        }
313
314        // Overlapping element is replaced by the reparsed section window.
315    }
316
317    if !inserted_window {
318        children.extend(reparsed_window.children_with_tokens().map(element_to_green));
319    }
320
321    let tree = SyntaxNode::new_root(GreenNode::new(SyntaxKind::DOCUMENT.into(), children));
322    Some(IncrementalParseResult {
323        tree,
324        reparse_range: (section_window.new_start, section_window.new_end),
325        strategy: "section_window",
326    })
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332
333    fn apply_edit(text: &str, old: (usize, usize), insert: &str) -> String {
334        let mut out = String::with_capacity(text.len() - (old.1 - old.0) + insert.len());
335        out.push_str(&text[..old.0]);
336        out.push_str(insert);
337        out.push_str(&text[old.1..]);
338        out
339    }
340
341    #[test]
342    fn incremental_suffix_matches_full_parse_for_tail_edit() {
343        let input = "# H\n\npara one\n\npara two\n\npara three\n";
344        let old_tree = parse(input, None);
345        let old_edit = (30, 35);
346        let updated = apply_edit(input, old_edit, "tail section");
347        let new_edit = (30, 42);
348
349        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
350        let full = parse(&updated, None);
351        assert_eq!(inc.to_string(), full.to_string());
352    }
353
354    #[test]
355    fn incremental_suffix_matches_full_parse_for_middle_edit() {
356        let input = "# H\n\n- a\n- b\n\nfinal para\n";
357        let old_tree = parse(input, None);
358        let old_edit = (10, 11);
359        let updated = apply_edit(input, old_edit, "alpha");
360        let new_edit = (10, 15);
361
362        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
363        let full = parse(&updated, None);
364        assert_eq!(inc.to_string(), full.to_string());
365    }
366
367    #[test]
368    fn incremental_suffix_matches_full_parse_for_setext_transition() {
369        let input = "Intro\nSecond\n\nTail\n";
370        let old_tree = parse(input, None);
371        let old_edit = (5, 5);
372        let updated = apply_edit(input, old_edit, "\n-----");
373        let new_edit = (5, 11);
374
375        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
376        let full = parse(&updated, None);
377        assert_eq!(inc.to_string(), full.to_string());
378    }
379
380    #[test]
381    fn incremental_suffix_matches_full_parse_for_lazy_blockquote_change() {
382        let input = "> quoted\nlazy\n\nnext\n";
383        let old_tree = parse(input, None);
384        let old_edit = (9, 13);
385        let updated = apply_edit(input, old_edit, "> line");
386        let new_edit = (9, 15);
387
388        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
389        let full = parse(&updated, None);
390        assert_eq!(inc.to_string(), full.to_string());
391    }
392
393    #[test]
394    fn incremental_uses_heading_section_window_when_available() {
395        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta section\n\n# End\n\nomega\n";
396        let old_tree = parse(input, None);
397        let start = input.find("beta").expect("beta in test input");
398        let old_edit = (start, start + 4);
399        let updated = apply_edit(input, old_edit, "BETA");
400        let new_edit = (start, start + 4);
401
402        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
403        let full = parse(&updated, None);
404        assert_eq!(inc.tree.to_string(), full.to_string());
405        assert!(
406            inc.reparse_range.0 > 0,
407            "section reparse should not start at 0"
408        );
409        assert!(
410            inc.reparse_range.1 < updated.len(),
411            "section reparse should stop before EOF"
412        );
413    }
414
415    #[test]
416    fn incremental_uses_section_window_for_last_section() {
417        let input = "# Intro\n\nalpha\n\n# Last\n\nbeta section\n";
418        let old_tree = parse(input, None);
419        let start = input.find("beta").expect("beta in test input");
420        let old_edit = (start, start + 4);
421        let updated = apply_edit(input, old_edit, "BETA");
422        let new_edit = (start, start + 4);
423
424        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
425        let full = parse(&updated, None);
426        assert_eq!(inc.tree.to_string(), full.to_string());
427        assert!(
428            inc.reparse_range.0 > 0,
429            "last section should start at the last heading boundary"
430        );
431        assert_eq!(
432            inc.reparse_range.1,
433            updated.len(),
434            "last section should end at EOF"
435        );
436    }
437
438    #[test]
439    fn incremental_does_not_use_section_window_when_edit_touches_heading() {
440        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
441        let old_tree = parse(input, None);
442        let middle_start = input
443            .find("# Middle")
444            .expect("middle heading in test input");
445        let old_edit = (middle_start, middle_start + 1);
446        let updated = apply_edit(input, old_edit, "#");
447        let new_edit = (middle_start, middle_start + 1);
448
449        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
450        let full = parse(&updated, None);
451        assert_eq!(inc.tree.to_string(), full.to_string());
452        assert_eq!(
453            inc.reparse_range.1,
454            updated.len(),
455            "edits on headings should avoid section-window reparsing"
456        );
457    }
458
459    #[test]
460    fn incremental_does_not_use_section_window_when_edit_crosses_next_heading() {
461        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
462        let old_tree = parse(input, None);
463        let beta_start = input.find("beta").expect("beta in test input");
464        let end_start = input.find("# End").expect("end heading in test input");
465        let old_edit = (beta_start, end_start + 2);
466        let updated = apply_edit(input, old_edit, "beta\n\n# ");
467        let new_edit = (beta_start, beta_start + 8);
468
469        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
470        let full = parse(&updated, None);
471        assert_eq!(inc.tree.to_string(), full.to_string());
472        assert_eq!(
473            inc.reparse_range.1,
474            updated.len(),
475            "cross-heading edits should avoid section-window reparsing"
476        );
477    }
478
479    #[test]
480    fn incremental_ignores_nested_headings_for_window_boundaries() {
481        let input = "# Intro\n\n> ## Nested\n> quote body\n\n# End\n\nomega\n";
482        let old_tree = parse(input, None);
483        let quote_start = input.find("quote body").expect("quote body in test input");
484        let old_edit = (quote_start, quote_start + 5);
485        let updated = apply_edit(input, old_edit, "QUOTE");
486        let new_edit = (quote_start, quote_start + 5);
487
488        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
489        let full = parse(&updated, None);
490        assert_eq!(inc.tree.to_string(), full.to_string());
491        assert!(
492            inc.reparse_range.1 < updated.len(),
493            "window boundary should be the next top-level heading, not nested heading"
494        );
495    }
496
497    #[test]
498    fn incremental_section_window_handles_list_tight_loose_transition() {
499        let input = "# Intro\n\nprelude\n\n# Middle\n\n- one\n- two\n\n# End\n\nomega\n";
500        let old_tree = parse(input, None);
501        let two_start = input.find("- two").expect("list item in test input");
502        let old_edit = (two_start, two_start);
503        let updated = apply_edit(input, old_edit, "\n");
504        let new_edit = (two_start, two_start + 1);
505
506        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
507        let full = parse(&updated, None);
508        assert_eq!(inc.tree.to_string(), full.to_string());
509        assert!(
510            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
511            "list transition inside section should remain section-bounded"
512        );
513    }
514
515    #[test]
516    fn incremental_section_window_handles_blockquote_lazy_transition() {
517        let input = "# Intro\n\nprelude\n\n# Middle\n\n> quoted\nlazy line\n\n# End\n\nomega\n";
518        let old_tree = parse(input, None);
519        let lazy_start = input.find("lazy line").expect("lazy line in test input");
520        let old_edit = (lazy_start, lazy_start);
521        let updated = apply_edit(input, old_edit, "> ");
522        let new_edit = (lazy_start, lazy_start + 2);
523
524        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
525        let full = parse(&updated, None);
526        assert_eq!(inc.tree.to_string(), full.to_string());
527        assert!(
528            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
529            "blockquote continuation change inside section should remain section-bounded"
530        );
531    }
532
533    #[test]
534    fn incremental_section_window_handles_fenced_div_with_nested_heading() {
535        let input = "# Intro\n\nprelude\n\n# Middle\n\n::: {.callout-note}\n## Nested\nbody text\n:::\n\n# End\n\nomega\n";
536        let old_tree = parse(input, None);
537        let body_start = input.find("body text").expect("body text in test input");
538        let old_edit = (body_start, body_start + 4);
539        let updated = apply_edit(input, old_edit, "BODY");
540        let new_edit = (body_start, body_start + 4);
541
542        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
543        let full = parse(&updated, None);
544        assert_eq!(inc.tree.to_string(), full.to_string());
545        assert!(
546            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
547            "fenced div edits should use top-level heading boundaries"
548        );
549    }
550
551    #[test]
552    fn incremental_handles_inserting_heading_inside_section_window() {
553        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
554        let old_tree = parse(input, None);
555        let beta_start = input.find("beta").expect("beta in test input");
556        let old_edit = (beta_start, beta_start);
557        let updated = apply_edit(input, old_edit, "## Inserted\n\n");
558        let new_edit = (beta_start, beta_start + "## Inserted\n\n".len());
559
560        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
561        let full = parse(&updated, None);
562        assert_eq!(inc.tree.to_string(), full.to_string());
563        assert_eq!(
564            inc.strategy, "section_window",
565            "heading insertions within a bounded section should remain section-window mode"
566        );
567    }
568
569    #[test]
570    fn incremental_falls_back_when_deleting_next_heading_boundary() {
571        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
572        let old_tree = parse(input, None);
573        let end_start = input.find("# End\n").expect("end heading in test input");
574        let old_edit = (end_start, end_start + "# End\n\n".len());
575        let updated = apply_edit(input, old_edit, "");
576        let new_edit = (end_start, end_start);
577
578        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
579        let full = parse(&updated, None);
580        assert_eq!(inc.tree.to_string(), full.to_string());
581        assert_ne!(
582            inc.strategy, "section_window",
583            "heading deletions across boundaries should avoid section-window mode"
584        );
585    }
586
587    #[test]
588    fn incremental_falls_back_when_editing_blank_line_after_heading() {
589        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
590        let old_tree = parse(input, None);
591        let boundary = input
592            .find("# Middle\n\n")
593            .expect("middle heading boundary in test input");
594        let blank_line_start = boundary + "# Middle\n".len();
595        let old_edit = (blank_line_start, blank_line_start + 1);
596        let updated = apply_edit(input, old_edit, "");
597        let new_edit = (blank_line_start, blank_line_start);
598
599        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
600        let full = parse(&updated, None);
601        assert_eq!(inc.tree.to_string(), full.to_string());
602        assert_ne!(
603            inc.strategy, "section_window",
604            "heading-adjacent blank line edits should avoid section-window mode"
605        );
606    }
607
608    #[test]
609    fn incremental_handles_frontmatter_to_first_heading_edit() {
610        let input = "---\ntitle: Demo\n---\n\n# Intro\n\nalpha\n\n# Next\n\nomega\n";
611        let old_tree = parse(input, None);
612        let title_start = input.find("Demo").expect("frontmatter value in test input");
613        let old_edit = (title_start, title_start + 4);
614        let updated = apply_edit(input, old_edit, "Updated Demo");
615        let new_edit = (title_start, title_start + "Updated Demo".len());
616
617        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
618        let full = parse(&updated, None);
619        assert_eq!(inc.tree.to_string(), full.to_string());
620        assert_ne!(
621            inc.strategy, "section_window",
622            "frontmatter edits before first heading should use conservative mode"
623        );
624    }
625
626    #[test]
627    fn incremental_handles_frontmatter_delimiter_edit() {
628        let input = "---\ntitle: Demo\n---\n\n# Intro\n\nalpha\n";
629        let old_tree = parse(input, None);
630        let first_delim_start = 0;
631        let old_edit = (first_delim_start, first_delim_start + 3);
632        let updated = apply_edit(input, old_edit, "----");
633        let new_edit = (first_delim_start, first_delim_start + 4);
634
635        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
636        let full = parse(&updated, None);
637        assert_eq!(inc.tree.to_string(), full.to_string());
638        assert_ne!(
639            inc.strategy, "section_window",
640            "frontmatter delimiter edits should stay in conservative mode"
641        );
642    }
643}