Skip to main content

panache_parser/
parser.rs

1//! Parser module for Pandoc/Quarto documents.
2//!
3//! This module implements a single-pass parser that constructs a lossless syntax tree (CST) for
4//! Quarto documents.
5
6use crate::options::ParserOptions;
7use crate::range_utils::find_incremental_restart_offset;
8use crate::syntax::{SyntaxKind, SyntaxNode};
9use rowan::{GreenNode, GreenToken, NodeOrToken};
10
11pub mod blocks;
12pub mod inlines;
13pub mod utils;
14pub mod yaml;
15
16mod block_dispatcher;
17mod core;
18
19// Re-export main parser
20pub use core::Parser;
21
22/// Parses a Quarto document string into a syntax tree.
23///
24/// Single-pass architecture: blocks emit inline structure during parsing.
25///
26/// # Examples
27///
28/// ```rust
29/// use panache_parser::parser::parse;
30///
31/// let input = "# Heading\n\nParagraph text.";
32/// let tree = parse(input, None);
33/// println!("{:#?}", tree);
34/// ```
35///
36/// # Arguments
37///
38/// * `input` - The Quarto document content to parse
39/// * `config` - Optional configuration. If None, uses default config.
40pub fn parse(input: &str, config: Option<ParserOptions>) -> SyntaxNode {
41    let config = config.unwrap_or_default();
42    Parser::new(input, &config).parse()
43}
44
45pub struct IncrementalParseResult {
46    pub tree: SyntaxNode,
47    pub reparse_range: (usize, usize),
48    pub strategy: &'static str,
49}
50
51/// Incrementally update a syntax tree by reparsing either a bounded section
52/// window (between top-level headings) or from a safe restart boundary to EOF.
53pub fn parse_incremental_suffix(
54    input: &str,
55    config: Option<ParserOptions>,
56    old_tree: &SyntaxNode,
57    old_edit_range: (usize, usize),
58    new_edit_range: (usize, usize),
59) -> IncrementalParseResult {
60    let config = config.unwrap_or_default();
61    let input_len = input.len();
62
63    let Some(old_edit) = normalize_range(old_edit_range) else {
64        return full_reparse_result(input, &config);
65    };
66    let Some(new_edit) = normalize_range(new_edit_range) else {
67        return full_reparse_result(input, &config);
68    };
69    if new_edit.1 > input_len {
70        return full_reparse_result(input, &config);
71    }
72
73    if old_tree.kind() != SyntaxKind::DOCUMENT {
74        return full_reparse_result(input, &config);
75    }
76
77    if let Some(section_window) =
78        find_top_level_heading_section_window(old_tree, old_edit, new_edit, input_len)
79        && let Some(result) = reparse_section_window(input, &config, old_tree, section_window)
80    {
81        return result;
82    }
83
84    let restart = find_incremental_restart_offset(old_tree, old_edit.0, old_edit.1);
85    let old_restart = align_to_document_child_start(old_tree, restart);
86
87    if (old_edit.0..old_edit.1).contains(&old_restart) {
88        return full_reparse_result(input, &config);
89    }
90
91    let new_restart = map_old_offset_to_new(old_restart, old_edit, new_edit, input_len);
92    if !input.is_char_boundary(new_restart) {
93        return full_reparse_result(input, &config);
94    }
95
96    let suffix_text = &input[new_restart..];
97    let suffix_tree = Parser::new(suffix_text, &config).parse();
98
99    let mut children: Vec<NodeOrToken<GreenNode, GreenToken>> = old_tree
100        .children_with_tokens()
101        .filter_map(|element| {
102            let range = element.text_range();
103            let end: usize = range.end().into();
104            if end <= old_restart {
105                Some(element_to_green(element))
106            } else {
107                None
108            }
109        })
110        .collect();
111    children.extend(suffix_tree.children_with_tokens().map(element_to_green));
112
113    let tree = SyntaxNode::new_root(GreenNode::new(SyntaxKind::DOCUMENT.into(), children));
114    let len: usize = tree.text_range().end().into();
115
116    IncrementalParseResult {
117        tree,
118        reparse_range: (new_restart, len),
119        strategy: "suffix_window",
120    }
121}
122
123fn normalize_range(range: (usize, usize)) -> Option<(usize, usize)> {
124    (range.0 <= range.1).then_some(range)
125}
126
127fn full_reparse_result(input: &str, config: &ParserOptions) -> IncrementalParseResult {
128    let tree = Parser::new(input, config).parse();
129    let len: usize = tree.text_range().end().into();
130    IncrementalParseResult {
131        tree,
132        reparse_range: (0, len),
133        strategy: "full_reparse",
134    }
135}
136
137fn align_to_document_child_start(tree: &SyntaxNode, offset: usize) -> usize {
138    for child in tree.children_with_tokens() {
139        let range = child.text_range();
140        let start: usize = range.start().into();
141        let end: usize = range.end().into();
142        if offset <= start {
143            return start;
144        }
145        if offset < end {
146            return start;
147        }
148    }
149    let len: usize = tree.text_range().end().into();
150    len
151}
152
153fn map_old_offset_to_new(
154    old_offset: usize,
155    old_edit: (usize, usize),
156    new_edit: (usize, usize),
157    new_len: usize,
158) -> usize {
159    if old_offset <= old_edit.0 {
160        return old_offset;
161    }
162    if old_offset >= old_edit.1 {
163        let old_span = old_edit.1 - old_edit.0;
164        let new_span = new_edit.1 - new_edit.0;
165        let delta = new_span as isize - old_span as isize;
166        return old_offset.saturating_add_signed(delta).min(new_len);
167    }
168    new_edit.1.min(new_len)
169}
170
171fn element_to_green(element: crate::syntax::SyntaxElement) -> NodeOrToken<GreenNode, GreenToken> {
172    match element {
173        NodeOrToken::Node(node) => NodeOrToken::Node(node.green().into_owned()),
174        NodeOrToken::Token(token) => NodeOrToken::Token(token.green().to_owned()),
175    }
176}
177
178#[derive(Debug, Clone, Copy)]
179struct SectionWindow {
180    old_start: usize,
181    old_end: usize,
182    new_start: usize,
183    new_end: usize,
184}
185
186fn find_top_level_heading_section_window(
187    old_tree: &SyntaxNode,
188    old_edit: (usize, usize),
189    new_edit: (usize, usize),
190    new_len: usize,
191) -> Option<SectionWindow> {
192    let old_len: usize = old_tree.text_range().end().into();
193    let mut previous_heading: Option<(usize, usize)> = None;
194    let mut next_heading: Option<(usize, usize)> = None;
195
196    for child in old_tree.children() {
197        if child.kind() != SyntaxKind::HEADING {
198            continue;
199        }
200
201        let range = child.text_range();
202        let start: usize = range.start().into();
203        let end: usize = range.end().into();
204
205        if start <= old_edit.0 {
206            previous_heading = Some((start, end));
207        } else {
208            next_heading = Some((start, end));
209            break;
210        }
211    }
212
213    let (previous_start, previous_end) = previous_heading?;
214    let (next_start, next_end) = next_heading.unwrap_or((old_len, old_len));
215
216    if ranges_intersect(old_edit, (previous_start, previous_end))
217        || ranges_intersect(old_edit, (next_start, next_end))
218    {
219        return None;
220    }
221
222    // Be conservative and only use the section window for edits that are
223    // strictly inside the section body (not touching heading boundaries).
224    if old_edit.0 <= previous_end || old_edit.1 >= next_start {
225        return None;
226    }
227
228    let new_start = map_old_offset_to_new(previous_start, old_edit, new_edit, new_len);
229    let new_end = map_old_offset_to_new(next_start, old_edit, new_edit, new_len);
230    if new_start >= new_end || new_end > new_len {
231        return None;
232    }
233
234    Some(SectionWindow {
235        old_start: previous_start,
236        old_end: next_start,
237        new_start,
238        new_end,
239    })
240}
241
242fn ranges_intersect(a: (usize, usize), b: (usize, usize)) -> bool {
243    a.0 < b.1 && b.0 < a.1
244}
245
246fn reparse_section_window(
247    input: &str,
248    config: &ParserOptions,
249    old_tree: &SyntaxNode,
250    section_window: SectionWindow,
251) -> Option<IncrementalParseResult> {
252    if !input.is_char_boundary(section_window.new_start)
253        || !input.is_char_boundary(section_window.new_end)
254    {
255        return None;
256    }
257
258    let reparsed_window = Parser::new(
259        &input[section_window.new_start..section_window.new_end],
260        config,
261    )
262    .parse();
263
264    let mut children: Vec<NodeOrToken<GreenNode, GreenToken>> = Vec::new();
265    let mut inserted_window = false;
266
267    for element in old_tree.children_with_tokens() {
268        let range = element.text_range();
269        let start: usize = range.start().into();
270        let end: usize = range.end().into();
271
272        if end <= section_window.old_start {
273            children.push(element_to_green(element));
274            continue;
275        }
276
277        if start >= section_window.old_end {
278            if !inserted_window {
279                children.extend(reparsed_window.children_with_tokens().map(element_to_green));
280                inserted_window = true;
281            }
282            children.push(element_to_green(element));
283            continue;
284        }
285
286        // Overlapping element is replaced by the reparsed section window.
287    }
288
289    if !inserted_window {
290        children.extend(reparsed_window.children_with_tokens().map(element_to_green));
291    }
292
293    let tree = SyntaxNode::new_root(GreenNode::new(SyntaxKind::DOCUMENT.into(), children));
294    Some(IncrementalParseResult {
295        tree,
296        reparse_range: (section_window.new_start, section_window.new_end),
297        strategy: "section_window",
298    })
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    fn apply_edit(text: &str, old: (usize, usize), insert: &str) -> String {
306        let mut out = String::with_capacity(text.len() - (old.1 - old.0) + insert.len());
307        out.push_str(&text[..old.0]);
308        out.push_str(insert);
309        out.push_str(&text[old.1..]);
310        out
311    }
312
313    #[test]
314    fn incremental_suffix_matches_full_parse_for_tail_edit() {
315        let input = "# H\n\npara one\n\npara two\n\npara three\n";
316        let old_tree = parse(input, None);
317        let old_edit = (30, 35);
318        let updated = apply_edit(input, old_edit, "tail section");
319        let new_edit = (30, 42);
320
321        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
322        let full = parse(&updated, None);
323        assert_eq!(inc.to_string(), full.to_string());
324    }
325
326    #[test]
327    fn incremental_suffix_matches_full_parse_for_middle_edit() {
328        let input = "# H\n\n- a\n- b\n\nfinal para\n";
329        let old_tree = parse(input, None);
330        let old_edit = (10, 11);
331        let updated = apply_edit(input, old_edit, "alpha");
332        let new_edit = (10, 15);
333
334        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
335        let full = parse(&updated, None);
336        assert_eq!(inc.to_string(), full.to_string());
337    }
338
339    #[test]
340    fn incremental_suffix_matches_full_parse_for_setext_transition() {
341        let input = "Intro\nSecond\n\nTail\n";
342        let old_tree = parse(input, None);
343        let old_edit = (5, 5);
344        let updated = apply_edit(input, old_edit, "\n-----");
345        let new_edit = (5, 11);
346
347        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
348        let full = parse(&updated, None);
349        assert_eq!(inc.to_string(), full.to_string());
350    }
351
352    #[test]
353    fn incremental_suffix_matches_full_parse_for_lazy_blockquote_change() {
354        let input = "> quoted\nlazy\n\nnext\n";
355        let old_tree = parse(input, None);
356        let old_edit = (9, 13);
357        let updated = apply_edit(input, old_edit, "> line");
358        let new_edit = (9, 15);
359
360        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit).tree;
361        let full = parse(&updated, None);
362        assert_eq!(inc.to_string(), full.to_string());
363    }
364
365    #[test]
366    fn incremental_uses_heading_section_window_when_available() {
367        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta section\n\n# End\n\nomega\n";
368        let old_tree = parse(input, None);
369        let start = input.find("beta").expect("beta in test input");
370        let old_edit = (start, start + 4);
371        let updated = apply_edit(input, old_edit, "BETA");
372        let new_edit = (start, start + 4);
373
374        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
375        let full = parse(&updated, None);
376        assert_eq!(inc.tree.to_string(), full.to_string());
377        assert!(
378            inc.reparse_range.0 > 0,
379            "section reparse should not start at 0"
380        );
381        assert!(
382            inc.reparse_range.1 < updated.len(),
383            "section reparse should stop before EOF"
384        );
385    }
386
387    #[test]
388    fn incremental_uses_section_window_for_last_section() {
389        let input = "# Intro\n\nalpha\n\n# Last\n\nbeta section\n";
390        let old_tree = parse(input, None);
391        let start = input.find("beta").expect("beta in test input");
392        let old_edit = (start, start + 4);
393        let updated = apply_edit(input, old_edit, "BETA");
394        let new_edit = (start, start + 4);
395
396        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
397        let full = parse(&updated, None);
398        assert_eq!(inc.tree.to_string(), full.to_string());
399        assert!(
400            inc.reparse_range.0 > 0,
401            "last section should start at the last heading boundary"
402        );
403        assert_eq!(
404            inc.reparse_range.1,
405            updated.len(),
406            "last section should end at EOF"
407        );
408    }
409
410    #[test]
411    fn incremental_does_not_use_section_window_when_edit_touches_heading() {
412        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
413        let old_tree = parse(input, None);
414        let middle_start = input
415            .find("# Middle")
416            .expect("middle heading in test input");
417        let old_edit = (middle_start, middle_start + 1);
418        let updated = apply_edit(input, old_edit, "#");
419        let new_edit = (middle_start, middle_start + 1);
420
421        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
422        let full = parse(&updated, None);
423        assert_eq!(inc.tree.to_string(), full.to_string());
424        assert_eq!(
425            inc.reparse_range.1,
426            updated.len(),
427            "edits on headings should avoid section-window reparsing"
428        );
429    }
430
431    #[test]
432    fn incremental_does_not_use_section_window_when_edit_crosses_next_heading() {
433        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
434        let old_tree = parse(input, None);
435        let beta_start = input.find("beta").expect("beta in test input");
436        let end_start = input.find("# End").expect("end heading in test input");
437        let old_edit = (beta_start, end_start + 2);
438        let updated = apply_edit(input, old_edit, "beta\n\n# ");
439        let new_edit = (beta_start, beta_start + 8);
440
441        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
442        let full = parse(&updated, None);
443        assert_eq!(inc.tree.to_string(), full.to_string());
444        assert_eq!(
445            inc.reparse_range.1,
446            updated.len(),
447            "cross-heading edits should avoid section-window reparsing"
448        );
449    }
450
451    #[test]
452    fn incremental_ignores_nested_headings_for_window_boundaries() {
453        let input = "# Intro\n\n> ## Nested\n> quote body\n\n# End\n\nomega\n";
454        let old_tree = parse(input, None);
455        let quote_start = input.find("quote body").expect("quote body in test input");
456        let old_edit = (quote_start, quote_start + 5);
457        let updated = apply_edit(input, old_edit, "QUOTE");
458        let new_edit = (quote_start, quote_start + 5);
459
460        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
461        let full = parse(&updated, None);
462        assert_eq!(inc.tree.to_string(), full.to_string());
463        assert!(
464            inc.reparse_range.1 < updated.len(),
465            "window boundary should be the next top-level heading, not nested heading"
466        );
467    }
468
469    #[test]
470    fn incremental_section_window_handles_list_tight_loose_transition() {
471        let input = "# Intro\n\nprelude\n\n# Middle\n\n- one\n- two\n\n# End\n\nomega\n";
472        let old_tree = parse(input, None);
473        let two_start = input.find("- two").expect("list item in test input");
474        let old_edit = (two_start, two_start);
475        let updated = apply_edit(input, old_edit, "\n");
476        let new_edit = (two_start, two_start + 1);
477
478        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
479        let full = parse(&updated, None);
480        assert_eq!(inc.tree.to_string(), full.to_string());
481        assert!(
482            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
483            "list transition inside section should remain section-bounded"
484        );
485    }
486
487    #[test]
488    fn incremental_section_window_handles_blockquote_lazy_transition() {
489        let input = "# Intro\n\nprelude\n\n# Middle\n\n> quoted\nlazy line\n\n# End\n\nomega\n";
490        let old_tree = parse(input, None);
491        let lazy_start = input.find("lazy line").expect("lazy line in test input");
492        let old_edit = (lazy_start, lazy_start);
493        let updated = apply_edit(input, old_edit, "> ");
494        let new_edit = (lazy_start, lazy_start + 2);
495
496        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
497        let full = parse(&updated, None);
498        assert_eq!(inc.tree.to_string(), full.to_string());
499        assert!(
500            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
501            "blockquote continuation change inside section should remain section-bounded"
502        );
503    }
504
505    #[test]
506    fn incremental_section_window_handles_fenced_div_with_nested_heading() {
507        let input = "# Intro\n\nprelude\n\n# Middle\n\n::: {.callout-note}\n## Nested\nbody text\n:::\n\n# End\n\nomega\n";
508        let old_tree = parse(input, None);
509        let body_start = input.find("body text").expect("body text in test input");
510        let old_edit = (body_start, body_start + 4);
511        let updated = apply_edit(input, old_edit, "BODY");
512        let new_edit = (body_start, body_start + 4);
513
514        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
515        let full = parse(&updated, None);
516        assert_eq!(inc.tree.to_string(), full.to_string());
517        assert!(
518            inc.reparse_range.0 > 0 && inc.reparse_range.1 < updated.len(),
519            "fenced div edits should use top-level heading boundaries"
520        );
521    }
522
523    #[test]
524    fn incremental_handles_inserting_heading_inside_section_window() {
525        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
526        let old_tree = parse(input, None);
527        let beta_start = input.find("beta").expect("beta in test input");
528        let old_edit = (beta_start, beta_start);
529        let updated = apply_edit(input, old_edit, "## Inserted\n\n");
530        let new_edit = (beta_start, beta_start + "## Inserted\n\n".len());
531
532        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
533        let full = parse(&updated, None);
534        assert_eq!(inc.tree.to_string(), full.to_string());
535        assert_eq!(
536            inc.strategy, "section_window",
537            "heading insertions within a bounded section should remain section-window mode"
538        );
539    }
540
541    #[test]
542    fn incremental_falls_back_when_deleting_next_heading_boundary() {
543        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
544        let old_tree = parse(input, None);
545        let end_start = input.find("# End\n").expect("end heading in test input");
546        let old_edit = (end_start, end_start + "# End\n\n".len());
547        let updated = apply_edit(input, old_edit, "");
548        let new_edit = (end_start, end_start);
549
550        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
551        let full = parse(&updated, None);
552        assert_eq!(inc.tree.to_string(), full.to_string());
553        assert_ne!(
554            inc.strategy, "section_window",
555            "heading deletions across boundaries should avoid section-window mode"
556        );
557    }
558
559    #[test]
560    fn incremental_falls_back_when_editing_blank_line_after_heading() {
561        let input = "# Intro\n\nalpha\n\n# Middle\n\nbeta\n\n# End\n\nomega\n";
562        let old_tree = parse(input, None);
563        let boundary = input
564            .find("# Middle\n\n")
565            .expect("middle heading boundary in test input");
566        let blank_line_start = boundary + "# Middle\n".len();
567        let old_edit = (blank_line_start, blank_line_start + 1);
568        let updated = apply_edit(input, old_edit, "");
569        let new_edit = (blank_line_start, blank_line_start);
570
571        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
572        let full = parse(&updated, None);
573        assert_eq!(inc.tree.to_string(), full.to_string());
574        assert_ne!(
575            inc.strategy, "section_window",
576            "heading-adjacent blank line edits should avoid section-window mode"
577        );
578    }
579
580    #[test]
581    fn incremental_handles_frontmatter_to_first_heading_edit() {
582        let input = "---\ntitle: Demo\n---\n\n# Intro\n\nalpha\n\n# Next\n\nomega\n";
583        let old_tree = parse(input, None);
584        let title_start = input.find("Demo").expect("frontmatter value in test input");
585        let old_edit = (title_start, title_start + 4);
586        let updated = apply_edit(input, old_edit, "Updated Demo");
587        let new_edit = (title_start, title_start + "Updated Demo".len());
588
589        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
590        let full = parse(&updated, None);
591        assert_eq!(inc.tree.to_string(), full.to_string());
592        assert_ne!(
593            inc.strategy, "section_window",
594            "frontmatter edits before first heading should use conservative mode"
595        );
596    }
597
598    #[test]
599    fn incremental_handles_frontmatter_delimiter_edit() {
600        let input = "---\ntitle: Demo\n---\n\n# Intro\n\nalpha\n";
601        let old_tree = parse(input, None);
602        let first_delim_start = 0;
603        let old_edit = (first_delim_start, first_delim_start + 3);
604        let updated = apply_edit(input, old_edit, "----");
605        let new_edit = (first_delim_start, first_delim_start + 4);
606
607        let inc = parse_incremental_suffix(&updated, None, &old_tree, old_edit, new_edit);
608        let full = parse(&updated, None);
609        assert_eq!(inc.tree.to_string(), full.to_string());
610        assert_ne!(
611            inc.strategy, "section_window",
612            "frontmatter delimiter edits should stay in conservative mode"
613        );
614    }
615}