marco_core/parser/ast.rs
1// AST node definitions: central representation consumed by renderer and intelligence
2
3use crate::parser::Span;
4use std::collections::HashMap;
5
6// Link reference map: stores [label]: url definitions for later resolution
7#[derive(Debug, Clone, Default)]
8pub struct ReferenceMap {
9 // Key: normalized label (case-folded, whitespace collapsed), Value: (url, optional title)
10 defs: HashMap<String, (String, Option<String>)>,
11}
12
13impl ReferenceMap {
14 pub fn new() -> Self {
15 Self::default()
16 }
17
18 /// Add a link reference definition
19 pub fn insert(&mut self, label: &str, url: String, title: Option<String>) {
20 let normalized = normalize_label(label);
21 // CommonMark: when multiple definitions normalize to the same label,
22 // the first definition takes precedence.
23 self.defs.entry(normalized).or_insert((url, title));
24 }
25
26 /// Lookup a link reference by label
27 pub fn get(&self, label: &str) -> Option<&(String, Option<String>)> {
28 let normalized = normalize_label(label);
29 self.defs.get(&normalized)
30 }
31
32 /// Check if a label exists
33 pub fn contains(&self, label: &str) -> bool {
34 let normalized = normalize_label(label);
35 self.defs.contains_key(&normalized)
36 }
37}
38
39/// Normalize label according to CommonMark spec:
40/// - Apply Unicode case-folding semantics (best-effort)
41/// - Collapse consecutive whitespace to single space
42/// - Trim leading/trailing whitespace
43fn normalize_label(label: &str) -> String {
44 let collapsed = label.split_whitespace().collect::<Vec<_>>().join(" ");
45
46 // NOTE:
47 // Rust doesn't provide full Unicode case-folding in std. We apply
48 // to_lowercase() plus the critical sharp-s expansion so labels like
49 // "แบ" and "SS" normalize identically, matching CommonMark examples.
50 let mut out = String::with_capacity(collapsed.len());
51 for ch in collapsed.chars() {
52 for lower in ch.to_lowercase() {
53 if lower == 'ร' {
54 out.push('s');
55 out.push('s');
56 } else {
57 out.push(lower);
58 }
59 }
60 }
61
62 out
63}
64
65// Root document node
66#[derive(Debug, Clone, Default)]
67pub struct Document {
68 pub children: Vec<Node>,
69 pub references: ReferenceMap,
70}
71
72// Generic AST node
73#[derive(Debug, Clone)]
74pub struct Node {
75 pub kind: NodeKind,
76 pub span: Option<Span>,
77 pub children: Vec<Node>,
78}
79
80/// Table column alignment (GFM tables extension).
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
82pub enum TableAlignment {
83 #[default]
84 None,
85 Left,
86 Center,
87 Right,
88}
89
90/// GitHub-style admonitions / alerts (GFM extension).
91///
92/// Syntax is based on blockquotes, e.g.
93///
94/// `> [!NOTE]`
95/// `> body...`
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub enum AdmonitionKind {
98 Note,
99 Tip,
100 Important,
101 Warning,
102 Caution,
103}
104
105/// Rendering style for admonitions.
106///
107/// - `Alert`: Standard GitHub-style alert coloring (NOTE/TIP/WARNING/etc).
108/// - `Quote`: Quote-colored styling (neutral border/colors like regular blockquotes) while
109/// keeping the admonition title layout.
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub enum AdmonitionStyle {
112 Alert,
113 Quote,
114}
115
116// All node types
117#[derive(Debug, Clone)]
118pub enum NodeKind {
119 // Block-level
120 Heading {
121 level: u8,
122 text: String,
123 /// Explicit heading id, e.g. `### Title {#custom-id}`.
124 ///
125 /// When present, the renderer should emit it as `id="..."` on the
126 /// heading element.
127 id: Option<String>,
128 },
129 Paragraph,
130 CodeBlock {
131 language: Option<String>,
132 code: String,
133 },
134 ThematicBreak, // Horizontal rule (---, ***, ___)
135 List {
136 ordered: bool,
137 start: Option<u32>, // Starting number for ordered lists
138 tight: bool, // No blank lines between items
139 },
140 ListItem,
141
142 /// Extended definition lists (Markdown Guide / Markdown Extra-style).
143 ///
144 /// Rendering convention:
145 /// - A `DefinitionList` contains alternating `DefinitionTerm` (`<dt>`) and
146 /// `DefinitionDescription` (`<dd>`) children.
147 /// - `DefinitionTerm` should contain inline children.
148 /// - `DefinitionDescription` should contain block children.
149 DefinitionList,
150 DefinitionTerm,
151 DefinitionDescription,
152 /// GFM task list checkbox marker for a list item.
153 ///
154 /// This is emitted by the list parser when a list item begins with
155 /// `[ ]` or `[x]` / `[X]`.
156 ///
157 /// Rendering convention:
158 /// - This node is expected to appear as the first child inside a `ListItem`.
159 /// - The HTML renderer will convert it into a themed checkbox icon.
160 TaskCheckbox {
161 checked: bool,
162 },
163 Blockquote,
164 /// GitHub-style admonition / alert (GFM extension).
165 ///
166 /// This is created by a post-parse transformation that recognizes a special
167 /// first line inside a blockquote (e.g. `[!NOTE]`) and removes that marker.
168 Admonition {
169 kind: AdmonitionKind,
170 /// Optional custom title for the admonition header.
171 ///
172 /// Used by extended GFM-style admonitions (e.g. `> [๐ Happy Header]`).
173 title: Option<String>,
174 /// Optional custom icon content (typically a Unicode emoji) for the title.
175 ///
176 /// Rendered as text (not SVG) and must be styled by CSS.
177 icon: Option<String>,
178 /// Render variant.
179 style: AdmonitionStyle,
180 },
181
182 /// Marco extended tab blocks.
183 ///
184 /// Syntax (container + items):
185 /// ```text
186 /// :::tab
187 /// @tab Title
188 /// Content...
189 /// :::
190 /// ```
191 ///
192 /// Children convention:
193 /// - A `TabGroup` contains one or more `TabItem` children.
194 /// - Each `TabItem` contains block children representing the tab panel content.
195 TabGroup,
196 TabItem {
197 title: String,
198 },
199
200 /// Marco sliders (planned Reveal.js-like syntax, rendered as a simple slideshow).
201 ///
202 /// Syntax:
203 /// ```text
204 /// @slidestart
205 /// slide 1
206 /// ---
207 /// slide 2
208 /// @slideend
209 /// ```
210 ///
211 /// Optional timer (seconds per slide): `@slidestart:t5`.
212 ///
213 /// Children convention:
214 /// - A `SliderDeck` contains one or more `Slide` children.
215 /// - Each `Slide` contains block children representing the slide content.
216 SliderDeck {
217 timer_seconds: Option<u32>,
218 },
219 Slide {
220 /// True if this slide started after a vertical separator (`--`).
221 ///
222 /// The current Marco viewer treats slides as a single linear sequence
223 /// (left/right). This flag is preserved for future vertical navigation.
224 vertical: bool,
225 },
226 /// GFM table (pipe table extension).
227 ///
228 /// Children convention:
229 /// - Each child is a `TableRow`.
230 /// - Each `TableRow` contains `TableCell` children.
231 Table {
232 alignments: Vec<TableAlignment>,
233 },
234 TableRow {
235 header: bool,
236 },
237 TableCell {
238 header: bool,
239 alignment: TableAlignment,
240 },
241 HtmlBlock {
242 html: String,
243 }, // Block-level HTML (comments, tags, etc.)
244
245 /// GFM-style footnote definition (extension).
246 ///
247 /// Syntax:
248 /// - `[^label]: definition text`
249 /// - Continuation lines may be indented.
250 ///
251 /// Rendering convention:
252 /// - This node should not be rendered in place.
253 /// - Instead, the renderer collects referenced footnotes and emits a
254 /// footnotes section at the end of the document.
255 FootnoteDefinition {
256 label: String,
257 },
258
259 // Inline-level
260 Text(String),
261 /// Inline task checkbox marker (extension).
262 ///
263 /// This is emitted when a paragraph begins with a task marker like
264 /// `[ ] ` / `[x] ` / `[X] `.
265 ///
266 /// Rendering convention:
267 /// - The HTML renderer converts it into the same themed SVG checkbox icon
268 /// used for task list items.
269 TaskCheckboxInline {
270 checked: bool,
271 },
272 Emphasis,
273 Strong,
274 /// Combined strong+emphasis, e.g. `***text***` or `___text___`.
275 ///
276 /// This is parsed as a single inline node to avoid leaving dangling
277 /// delimiters that would otherwise be treated as plain text.
278 StrongEmphasis,
279 /// Strikethrough (extension), e.g. `~~text~~`.
280 Strikethrough,
281 /// Highlight/mark (extension), e.g. `==text==`.
282 Mark,
283 /// Superscript (extension), e.g. `^text^`.
284 Superscript,
285 /// Subscript (extension), e.g. `~text~`.
286 Subscript,
287 Link {
288 url: String,
289 title: Option<String>,
290 },
291 /// Reference-style link placeholder (CommonMark): `[text][label]`, `[label][]`, `[label]`.
292 ///
293 /// These cannot be fully resolved during inline parsing because reference
294 /// definitions may appear later in the document. The top-level `parse()`
295 /// performs a post-processing pass that converts this into a `Link` when a
296 /// matching definition exists in `Document.references`.
297 ///
298 /// If no matching definition is found, this should be rendered as literal
299 /// bracketed text (preserving the already-parsed `children` for the first
300 /// bracketed segment).
301 LinkReference {
302 /// Label used for reference resolution (will be normalized when looked up).
303 label: String,
304 /// Extra literal suffix after the first `]` (e.g. `"[]"` or `"[label]"`).
305 /// Empty for shortcut reference links.
306 suffix: String,
307 },
308
309 /// GFM-style footnote reference (extension), e.g. `[^label]`.
310 ///
311 /// Rendering convention:
312 /// - If a matching `FootnoteDefinition` exists, this renders as a numbered
313 /// superscript link.
314 /// - Otherwise it should fall back to literal text.
315 FootnoteReference {
316 label: String,
317 },
318 Image {
319 url: String,
320 alt: String,
321 },
322 CodeSpan(String),
323 InlineHtml(String),
324 HardBreak, // Two spaces + newline, or backslash + newline
325 SoftBreak, // Regular newline (rendered as space in HTML)
326
327 /// Marco extended user mentions.
328 ///
329 /// Syntax:
330 /// - `@username[platform]`
331 /// - `@username[platform](Display Name)`
332 ///
333 /// Rendering policy:
334 /// - The renderer may convert this to an external profile link based on
335 /// a platform mapping table.
336 PlatformMention {
337 username: String,
338 platform: String,
339 display: Option<String>,
340 },
341 /// Inline math (LaTeX), e.g. `$E = mc^2$`.
342 ///
343 /// Rendering policy:
344 /// - Rendered using KaTeX in inline mode.
345 /// - Content is raw LaTeX source code.
346 InlineMath {
347 content: String,
348 },
349
350 /// Display math (LaTeX), e.g. `$$\int_0^\infty e^{-x^2} dx$$`.
351 ///
352 /// Rendering policy:
353 /// - Rendered using KaTeX in display mode.
354 /// - Content is raw LaTeX source code.
355 DisplayMath {
356 content: String,
357 },
358
359 /// Mermaid diagram (code block with language="mermaid").
360 ///
361 /// Rendering policy:
362 /// - Rendered using mermaid-rs-renderer to SVG.
363 /// - Content is raw Mermaid diagram source code.
364 ///
365 /// This is created during parsing when a fenced code block has
366 /// info string "mermaid".
367 MermaidDiagram {
368 content: String,
369 },
370}
371
372impl Document {
373 pub fn new() -> Self {
374 Self::default()
375 }
376
377 pub fn len(&self) -> usize {
378 self.children.len()
379 }
380
381 pub fn is_empty(&self) -> bool {
382 self.children.is_empty()
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::ReferenceMap;
389
390 #[test]
391 fn smoke_test_reference_map_first_definition_wins() {
392 let mut refs = ReferenceMap::new();
393 refs.insert("foo", "https://first.example".to_string(), None);
394 refs.insert("foo", "https://second.example".to_string(), None);
395
396 let (url, title) = refs.get("foo").expect("reference not found");
397 assert_eq!(url, "https://first.example");
398 assert_eq!(title, &None);
399 }
400
401 #[test]
402 fn smoke_test_reference_map_casefold_sharp_s() {
403 let mut refs = ReferenceMap::new();
404 refs.insert("SS", "/url".to_string(), None);
405
406 let (url, _) = refs.get("แบ").expect("reference not found");
407 assert_eq!(url, "/url");
408 }
409
410 #[test]
411 fn smoke_test_reference_map_whitespace_collapse() {
412 let mut refs = ReferenceMap::new();
413 refs.insert("Foo\n\t bar", "/url".to_string(), None);
414
415 assert!(refs.contains("foo bar"));
416 assert!(refs.contains(" FOO BAR "));
417 }
418}