Skip to main content

marco_core/parser/
mod.rs

1//! Parser entry points and AST-facing parser modules.
2//!
3//! The parser layer consumes grammar outputs and builds the crate AST.
4
5/// AST node and document types.
6pub mod ast;
7/// Position and span utilities.
8pub mod position;
9/// Shared parser span conversion helpers.
10pub mod shared;
11
12/// Block-level parser modules.
13pub mod blocks;
14/// Inline-level parser modules.
15pub mod inlines;
16
17/// Re-export AST types.
18pub use ast::*;
19/// Re-export block parser entry point.
20pub use blocks::parse_blocks;
21/// Re-export inline parser entry point.
22pub use inlines::parse_inlines;
23/// Re-export position and span types.
24pub use position::*;
25
26/// Runtime configuration for the Markdown parser.
27///
28/// Pass to [`parse_with_options`] to skip expensive hot-path work in
29/// performance-sensitive pipelines.
30///
31/// All fields default to `true` (full-featured parse). Set individual fields
32/// to `false` to opt out of that work at runtime (not just at compile time).
33#[derive(Debug, Clone)]
34pub struct ParseOptions {
35    /// Track source positions (line/column spans) on every AST node.
36    ///
37    /// When `false`, the O(n) string scans inside span conversion are skipped
38    /// and all node `span` fields will be `None`. Use this for render-only
39    /// pipelines that never inspect positions.
40    ///
41    /// Default: `true`.
42    pub track_positions: bool,
43
44    /// Parse inline `$...$` / `$$...$$` math and fenced ` ```math ` blocks.
45    ///
46    /// When `false`, math syntax falls through to plain text or regular code
47    /// blocks. Skips the math parser attempts in the inline hot loop.
48    ///
49    /// Default: `true`.
50    pub parse_math: bool,
51
52    /// Parse fenced ` ```mermaid ` code blocks into `NodeKind::MermaidDiagram`.
53    ///
54    /// When `false`, mermaid blocks are emitted as regular `NodeKind::CodeBlock`
55    /// nodes. Skips the diagram branch in the fenced-code-block parser.
56    ///
57    /// Default: `true`.
58    pub parse_diagrams: bool,
59}
60
61impl Default for ParseOptions {
62    fn default() -> Self {
63        Self {
64            track_positions: true,
65            parse_math: true,
66            parse_diagrams: true,
67        }
68    }
69}
70
71/// Parse Markdown text with runtime options controlling which work is performed.
72///
73/// This is the high-performance entry point. Pass a [`ParseOptions`] with
74/// fields set to `false` to skip expensive hot-path work at runtime.
75///
76/// For the default full-featured parse, use [`parse`] instead.
77///
78/// # Example
79/// ```rust
80/// let opts = marco_core::ParseOptions {
81///     track_positions: false,
82///     ..Default::default()
83/// };
84/// let doc = marco_core::parse_with_options("# Hello", opts)?;
85/// // All node spans are None — position computation was skipped.
86/// # Ok::<(), Box<dyn std::error::Error>>(())
87/// ```
88pub fn parse_with_options(
89    input: &str,
90    opts: ParseOptions,
91) -> Result<Document, Box<dyn std::error::Error>> {
92    log::info!("Starting parse: {} bytes", input.len());
93
94    // Set thread-local options for the duration of this parse call.
95    // The guard restores previous values on drop (including on error).
96    let _guard =
97        shared::ParseOptionsGuard::new(opts.track_positions, opts.parse_math, opts.parse_diagrams);
98
99    let mut document = parse_blocks(input)?;
100    log::debug!("Parsed {} blocks", document.children.len());
101
102    resolve_reference_links(&mut document);
103    blocks::gfm_admonitions::apply_gfm_admonitions(&mut document);
104
105    Ok(document)
106}
107
108/// Parse Markdown text into Document AST using default options (full-featured).
109pub fn parse(input: &str) -> Result<Document, Box<dyn std::error::Error>> {
110    parse_with_options(input, ParseOptions::default())
111}
112
113fn resolve_reference_links(document: &mut Document) {
114    resolve_reference_links_in_nodes(&mut document.children, &document.references);
115}
116
117fn unescape_commonmark_backslash_escapes(input: &str) -> String {
118    // CommonMark escapable punctuation set.
119    const ESCAPABLE: &str = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
120
121    let mut out = String::with_capacity(input.len());
122    let mut chars = input.chars().peekable();
123
124    while let Some(ch) = chars.next() {
125        if ch == '\\' {
126            if let Some(&next) = chars.peek() {
127                if ESCAPABLE.contains(next) {
128                    out.push(next);
129                    chars.next();
130                    continue;
131                }
132            }
133        }
134
135        out.push(ch);
136    }
137
138    out
139}
140
141fn resolve_reference_links_in_nodes(nodes: &mut Vec<Node>, references: &ReferenceMap) {
142    let mut i = 0;
143    while i < nodes.len() {
144        // Always resolve inside children first.
145        if !nodes[i].children.is_empty() {
146            resolve_reference_links_in_nodes(&mut nodes[i].children, references);
147        }
148
149        let is_ref = matches!(nodes[i].kind, NodeKind::LinkReference { .. });
150        if !is_ref {
151            i += 1;
152            continue;
153        }
154
155        // Temporarily take ownership of data we might need.
156        let (label, suffix) = match &nodes[i].kind {
157            NodeKind::LinkReference { label, suffix } => (label.clone(), suffix.clone()),
158            _ => unreachable!(),
159        };
160
161        if let Some((url, title)) = references.get(&label) {
162            nodes[i].kind = NodeKind::Link {
163                url: url.clone(),
164                title: title.clone(),
165            };
166            i += 1;
167            continue;
168        }
169
170        // Unresolved reference: fall back to literal bracketed text while preserving
171        // already-parsed children for the first bracket segment.
172        let mut inner_children = std::mem::take(&mut nodes[i].children);
173
174        let mut replacement: Vec<Node> = Vec::new();
175        replacement.push(Node {
176            kind: NodeKind::Text("[".to_string()),
177            span: None,
178            children: Vec::new(),
179        });
180        replacement.append(&mut inner_children);
181        replacement.push(Node {
182            kind: NodeKind::Text("]".to_string()),
183            span: None,
184            children: Vec::new(),
185        });
186        if !suffix.is_empty() {
187            replacement.push(Node {
188                kind: NodeKind::Text(unescape_commonmark_backslash_escapes(&suffix)),
189                span: None,
190                children: Vec::new(),
191            });
192        }
193
194        let replacement_len = replacement.len();
195        nodes.splice(i..i + 1, replacement);
196        i += replacement_len;
197    }
198}