marco_core/parser/mod.rs
1//! Parser entry points and AST-facing parser modules.
2//!
3//! The parser layer consumes grammar outputs and builds the crate AST.
4
5/// AST node and document types.
6pub mod ast;
7/// Position and span utilities.
8pub mod position;
9/// Shared parser span conversion helpers.
10pub mod shared;
11
12/// Block-level parser modules.
13pub mod blocks;
14/// Inline-level parser modules.
15pub mod inlines;
16
17/// Re-export AST types.
18pub use ast::*;
19/// Re-export block parser entry point.
20pub use blocks::parse_blocks;
21/// Re-export inline parser entry point.
22pub use inlines::parse_inlines;
23/// Re-export position and span types.
24pub use position::*;
25
26/// Runtime configuration for the Markdown parser.
27///
28/// Pass to [`parse_with_options`] to skip expensive hot-path work in
29/// performance-sensitive pipelines.
30///
31/// All fields default to `true` (full-featured parse). Set individual fields
32/// to `false` to opt out of that work at runtime (not just at compile time).
33#[derive(Debug, Clone)]
34pub struct ParseOptions {
35 /// Track source positions (line/column spans) on every AST node.
36 ///
37 /// When `false`, the O(n) string scans inside span conversion are skipped
38 /// and all node `span` fields will be `None`. Use this for render-only
39 /// pipelines that never inspect positions.
40 ///
41 /// Default: `true`.
42 pub track_positions: bool,
43
44 /// Parse inline `$...$` / `$$...$$` math and fenced ` ```math ` blocks.
45 ///
46 /// When `false`, math syntax falls through to plain text or regular code
47 /// blocks. Skips the math parser attempts in the inline hot loop.
48 ///
49 /// Default: `true`.
50 pub parse_math: bool,
51
52 /// Parse fenced ` ```mermaid ` code blocks into `NodeKind::MermaidDiagram`.
53 ///
54 /// When `false`, mermaid blocks are emitted as regular `NodeKind::CodeBlock`
55 /// nodes. Skips the diagram branch in the fenced-code-block parser.
56 ///
57 /// Default: `true`.
58 pub parse_diagrams: bool,
59}
60
61impl Default for ParseOptions {
62 fn default() -> Self {
63 Self {
64 track_positions: true,
65 parse_math: true,
66 parse_diagrams: true,
67 }
68 }
69}
70
71/// Parse Markdown text with runtime options controlling which work is performed.
72///
73/// This is the high-performance entry point. Pass a [`ParseOptions`] with
74/// fields set to `false` to skip expensive hot-path work at runtime.
75///
76/// For the default full-featured parse, use [`parse`] instead.
77///
78/// # Example
79/// ```rust
80/// let opts = marco_core::ParseOptions {
81/// track_positions: false,
82/// ..Default::default()
83/// };
84/// let doc = marco_core::parse_with_options("# Hello", opts)?;
85/// // All node spans are None — position computation was skipped.
86/// # Ok::<(), Box<dyn std::error::Error>>(())
87/// ```
88pub fn parse_with_options(
89 input: &str,
90 opts: ParseOptions,
91) -> Result<Document, Box<dyn std::error::Error>> {
92 log::info!("Starting parse: {} bytes", input.len());
93
94 // Set thread-local options for the duration of this parse call.
95 // The guard restores previous values on drop (including on error).
96 let _guard =
97 shared::ParseOptionsGuard::new(opts.track_positions, opts.parse_math, opts.parse_diagrams);
98
99 let mut document = parse_blocks(input)?;
100 log::debug!("Parsed {} blocks", document.children.len());
101
102 resolve_reference_links(&mut document);
103 blocks::gfm_admonitions::apply_gfm_admonitions(&mut document);
104
105 Ok(document)
106}
107
108/// Parse Markdown text into Document AST using default options (full-featured).
109pub fn parse(input: &str) -> Result<Document, Box<dyn std::error::Error>> {
110 parse_with_options(input, ParseOptions::default())
111}
112
113fn resolve_reference_links(document: &mut Document) {
114 resolve_reference_links_in_nodes(&mut document.children, &document.references);
115}
116
117fn unescape_commonmark_backslash_escapes(input: &str) -> String {
118 // CommonMark escapable punctuation set.
119 const ESCAPABLE: &str = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
120
121 let mut out = String::with_capacity(input.len());
122 let mut chars = input.chars().peekable();
123
124 while let Some(ch) = chars.next() {
125 if ch == '\\' {
126 if let Some(&next) = chars.peek() {
127 if ESCAPABLE.contains(next) {
128 out.push(next);
129 chars.next();
130 continue;
131 }
132 }
133 }
134
135 out.push(ch);
136 }
137
138 out
139}
140
141fn resolve_reference_links_in_nodes(nodes: &mut Vec<Node>, references: &ReferenceMap) {
142 let mut i = 0;
143 while i < nodes.len() {
144 // Always resolve inside children first.
145 if !nodes[i].children.is_empty() {
146 resolve_reference_links_in_nodes(&mut nodes[i].children, references);
147 }
148
149 let is_ref = matches!(nodes[i].kind, NodeKind::LinkReference { .. });
150 if !is_ref {
151 i += 1;
152 continue;
153 }
154
155 // Temporarily take ownership of data we might need.
156 let (label, suffix) = match &nodes[i].kind {
157 NodeKind::LinkReference { label, suffix } => (label.clone(), suffix.clone()),
158 _ => unreachable!(),
159 };
160
161 if let Some((url, title)) = references.get(&label) {
162 nodes[i].kind = NodeKind::Link {
163 url: url.clone(),
164 title: title.clone(),
165 };
166 i += 1;
167 continue;
168 }
169
170 // Unresolved reference: fall back to literal bracketed text while preserving
171 // already-parsed children for the first bracket segment.
172 let mut inner_children = std::mem::take(&mut nodes[i].children);
173
174 let mut replacement: Vec<Node> = Vec::new();
175 replacement.push(Node {
176 kind: NodeKind::Text("[".to_string()),
177 span: None,
178 children: Vec::new(),
179 });
180 replacement.append(&mut inner_children);
181 replacement.push(Node {
182 kind: NodeKind::Text("]".to_string()),
183 span: None,
184 children: Vec::new(),
185 });
186 if !suffix.is_empty() {
187 replacement.push(Node {
188 kind: NodeKind::Text(unescape_commonmark_backslash_escapes(&suffix)),
189 span: None,
190 children: Vec::new(),
191 });
192 }
193
194 let replacement_len = replacement.len();
195 nodes.splice(i..i + 1, replacement);
196 i += replacement_len;
197 }
198}