marco_core/parser/ast.rs
1//! AST node definitions consumed by parser, renderer, and intelligence layers.
2
3use crate::parser::Span;
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Default)]
7/// Map of normalized link reference labels to `(url, optional_title)`.
8pub struct ReferenceMap {
9 // Key: normalized label (case-folded, whitespace collapsed), Value: (url, optional title)
10 defs: HashMap<String, (String, Option<String>)>,
11}
12
13impl ReferenceMap {
14 /// Create an empty reference map.
15 pub fn new() -> Self {
16 Self::default()
17 }
18
19 /// Add a link reference definition
20 pub fn insert(&mut self, label: &str, url: String, title: Option<String>) {
21 let normalized = normalize_label(label);
22 // CommonMark: when multiple definitions normalize to the same label,
23 // the first definition takes precedence.
24 self.defs.entry(normalized).or_insert((url, title));
25 }
26
27 /// Lookup a link reference by label
28 pub fn get(&self, label: &str) -> Option<&(String, Option<String>)> {
29 let normalized = normalize_label(label);
30 self.defs.get(&normalized)
31 }
32
33 /// Check if a label exists
34 pub fn contains(&self, label: &str) -> bool {
35 let normalized = normalize_label(label);
36 self.defs.contains_key(&normalized)
37 }
38}
39
40/// Normalize label according to CommonMark spec:
41/// - Apply Unicode case-folding semantics (best-effort)
42/// - Collapse consecutive whitespace to single space
43/// - Trim leading/trailing whitespace
44fn normalize_label(label: &str) -> String {
45 // Build a whitespace-collapsed string directly without allocating a Vec.
46 let mut collapsed = String::with_capacity(label.len());
47 let mut first = true;
48 for word in label.split_whitespace() {
49 if !first {
50 collapsed.push(' ');
51 }
52 collapsed.push_str(word);
53 first = false;
54 }
55
56 // NOTE:
57 // Rust doesn't provide full Unicode case-folding in std. We apply
58 // to_lowercase() plus the critical sharp-s expansion so labels like
59 // "แบ" and "SS" normalize identically, matching CommonMark examples.
60 let mut out = String::with_capacity(collapsed.len());
61 for ch in collapsed.chars() {
62 for lower in ch.to_lowercase() {
63 if lower == 'ร' {
64 out.push('s');
65 out.push('s');
66 } else {
67 out.push(lower);
68 }
69 }
70 }
71
72 out
73}
74
75#[derive(Debug, Clone, Default)]
76/// Root parsed Markdown document.
77pub struct Document {
78 /// Top-level AST children in source order.
79 pub children: Vec<Node>,
80 /// Collected link reference definitions.
81 pub references: ReferenceMap,
82}
83
84#[derive(Debug, Clone)]
85/// Generic AST node.
86pub struct Node {
87 /// Semantic node kind.
88 pub kind: NodeKind,
89 /// Optional source span for this node.
90 pub span: Option<Span>,
91 /// Child nodes for hierarchical constructs.
92 pub children: Vec<Node>,
93}
94
95/// Table column alignment (GFM tables extension).
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
97pub enum TableAlignment {
98 /// No explicit alignment.
99 #[default]
100 None,
101 /// Left-aligned column.
102 Left,
103 /// Center-aligned column.
104 Center,
105 /// Right-aligned column.
106 Right,
107}
108
109/// GitHub-style admonitions / alerts (GFM extension).
110///
111/// Syntax is based on blockquotes, e.g.
112///
113/// `> [!NOTE]`
114/// `> body...`
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub enum AdmonitionKind {
117 /// Note admonition kind.
118 Note,
119 /// Tip admonition kind.
120 Tip,
121 /// Important admonition kind.
122 Important,
123 /// Warning admonition kind.
124 Warning,
125 /// Caution admonition kind.
126 Caution,
127}
128
129/// Rendering style for admonitions.
130///
131/// - `Alert`: Standard GitHub-style alert coloring (NOTE/TIP/WARNING/etc).
132/// - `Quote`: Quote-colored styling (neutral border/colors like regular blockquotes) while
133/// keeping the admonition title layout.
134#[derive(Debug, Clone, Copy, PartialEq, Eq)]
135pub enum AdmonitionStyle {
136 /// GitHub alert style.
137 Alert,
138 /// Quote-like neutral style.
139 Quote,
140}
141
142#[derive(Debug, Clone)]
143/// All supported block and inline AST node kinds.
144pub enum NodeKind {
145 /// ATX or setext heading node.
146 Heading {
147 /// Heading level, typically in range 1..=6.
148 level: u8,
149 /// Plain heading text content.
150 text: String,
151 /// Explicit heading id, e.g. `### Title {#custom-id}`.
152 ///
153 /// When present, the renderer should emit it as `id="..."` on the
154 /// heading element.
155 id: Option<String>,
156 },
157 /// Paragraph container.
158 Paragraph,
159 /// Fenced or indented code block.
160 CodeBlock {
161 /// Optional language/info string.
162 language: Option<String>,
163 /// Raw code block contents.
164 code: String,
165 },
166 /// Horizontal rule (`---`, `***`, `___`).
167 ThematicBreak,
168 /// Ordered or unordered list container.
169 List {
170 /// Whether this is an ordered list.
171 ordered: bool,
172 /// Starting number for ordered lists.
173 start: Option<u32>,
174 /// Whether list items are tight (no blank separators).
175 tight: bool,
176 },
177 /// List item container.
178 ListItem,
179
180 /// Extended definition lists (Markdown Guide / Markdown Extra-style).
181 ///
182 /// Rendering convention:
183 /// - A `DefinitionList` contains alternating `DefinitionTerm` (`<dt>`) and
184 /// `DefinitionDescription` (`<dd>`) children.
185 /// - `DefinitionTerm` should contain inline children.
186 /// - `DefinitionDescription` should contain block children.
187 DefinitionList,
188 /// Definition term (`dt`) item.
189 DefinitionTerm,
190 /// Definition description (`dd`) item.
191 DefinitionDescription,
192 /// GFM task list checkbox marker for a list item.
193 ///
194 /// This is emitted by the list parser when a list item begins with
195 /// `[ ]` or `[x]` / `[X]`.
196 ///
197 /// Rendering convention:
198 /// - This node is expected to appear as the first child inside a `ListItem`.
199 /// - The HTML renderer will convert it into a themed checkbox icon.
200 TaskCheckbox {
201 /// Whether checkbox is checked.
202 checked: bool,
203 },
204 /// Blockquote container.
205 Blockquote,
206 /// GitHub-style admonition / alert (GFM extension).
207 ///
208 /// This is created by a post-parse transformation that recognizes a special
209 /// first line inside a blockquote (e.g. `[!NOTE]`) and removes that marker.
210 Admonition {
211 /// Admonition semantic kind.
212 kind: AdmonitionKind,
213 /// Optional custom title for the admonition header.
214 ///
215 /// Used by extended GFM-style admonitions (e.g. `> [๐ Happy Header]`).
216 title: Option<String>,
217 /// Optional custom icon content (typically a Unicode emoji) for the title.
218 ///
219 /// Rendered as text (not SVG) and must be styled by CSS.
220 icon: Option<String>,
221 /// Render variant.
222 style: AdmonitionStyle,
223 },
224
225 /// Extended tab blocks.
226 ///
227 /// Syntax (container + items):
228 /// ```text
229 /// :::tab
230 /// @tab Title
231 /// Content...
232 /// :::
233 /// ```
234 ///
235 /// Children convention:
236 /// - A `TabGroup` contains one or more `TabItem` children.
237 /// - Each `TabItem` contains block children representing the tab panel content.
238 TabGroup,
239 /// A single tab item inside a tab group.
240 TabItem {
241 /// User-visible tab title.
242 title: String,
243 },
244
245 /// Extended slide decks (Reveal.js-like syntax, rendered as a simple slideshow).
246 ///
247 /// Syntax:
248 /// ```text
249 /// @slidestart
250 /// slide 1
251 /// ---
252 /// slide 2
253 /// @slideend
254 /// ```
255 ///
256 /// Optional timer (seconds per slide): `@slidestart:t5`.
257 ///
258 /// Children convention:
259 /// - A `SliderDeck` contains one or more `Slide` children.
260 /// - Each `Slide` contains block children representing the slide content.
261 SliderDeck {
262 /// Optional per-slide timer value in seconds.
263 timer_seconds: Option<u32>,
264 },
265 /// A single slide inside a slider deck.
266 Slide {
267 /// True if this slide started after a vertical separator (`--`).
268 ///
269 /// The current viewer treats slides as a single linear sequence
270 /// (left/right). This flag is preserved for future vertical navigation.
271 vertical: bool,
272 },
273 /// GFM table (pipe table extension).
274 ///
275 /// Children convention:
276 /// - Each child is a `TableRow`.
277 /// - Each `TableRow` contains `TableCell` children.
278 Table {
279 /// Per-column alignments.
280 alignments: Vec<TableAlignment>,
281 },
282 /// A single table row.
283 TableRow {
284 /// Whether this row is part of the table header.
285 header: bool,
286 },
287 /// A single table cell.
288 TableCell {
289 /// Whether this cell is in a header row.
290 header: bool,
291 /// Effective alignment for this cell.
292 alignment: TableAlignment,
293 },
294 /// Raw block-level HTML fragment.
295 HtmlBlock {
296 /// Raw HTML source.
297 html: String,
298 },
299
300 /// GFM-style footnote definition (extension).
301 ///
302 /// Syntax:
303 /// - `[^label]: definition text`
304 /// - Continuation lines may be indented.
305 ///
306 /// Rendering convention:
307 /// - This node should not be rendered in place.
308 /// - Instead, the renderer collects referenced footnotes and emits a
309 /// footnotes section at the end of the document.
310 FootnoteDefinition {
311 /// Footnote label (without `[^`/`]`).
312 label: String,
313 },
314
315 /// Plain text inline content.
316 Text(String),
317 /// Inline task checkbox marker (extension).
318 ///
319 /// This is emitted when a paragraph begins with a task marker like
320 /// `[ ] ` / `[x] ` / `[X] `.
321 ///
322 /// Rendering convention:
323 /// - The HTML renderer converts it into the same themed SVG checkbox icon
324 /// used for task list items.
325 TaskCheckboxInline {
326 /// Whether checkbox is checked.
327 checked: bool,
328 },
329 /// Emphasis inline container.
330 Emphasis,
331 /// Strong emphasis inline container.
332 Strong,
333 /// Combined strong+emphasis, e.g. `***text***` or `___text___`.
334 ///
335 /// This is parsed as a single inline node to avoid leaving dangling
336 /// delimiters that would otherwise be treated as plain text.
337 StrongEmphasis,
338 /// Strikethrough (extension), e.g. `~~text~~`.
339 Strikethrough,
340 /// Highlight/mark (extension), e.g. `==text==`.
341 Mark,
342 /// Superscript (extension), e.g. `^text^`.
343 Superscript,
344 /// Subscript (extension), e.g. `~text~`.
345 Subscript,
346 /// Inline link node.
347 Link {
348 /// Link destination URL.
349 url: String,
350 /// Optional link title.
351 title: Option<String>,
352 },
353 /// Reference-style link placeholder (CommonMark): `[text][label]`, `[label][]`, `[label]`.
354 ///
355 /// These cannot be fully resolved during inline parsing because reference
356 /// definitions may appear later in the document. The top-level `parse()`
357 /// performs a post-processing pass that converts this into a `Link` when a
358 /// matching definition exists in `Document.references`.
359 ///
360 /// If no matching definition is found, this should be rendered as literal
361 /// bracketed text (preserving the already-parsed `children` for the first
362 /// bracketed segment).
363 LinkReference {
364 /// Label used for reference resolution (will be normalized when looked up).
365 label: String,
366 /// Extra literal suffix after the first `]` (e.g. `"[]"` or `"[label]"`).
367 /// Empty for shortcut reference links.
368 suffix: String,
369 },
370
371 /// GFM-style footnote reference (extension), e.g. `[^label]`.
372 ///
373 /// Rendering convention:
374 /// - If a matching `FootnoteDefinition` exists, this renders as a numbered
375 /// superscript link.
376 /// - Otherwise it should fall back to literal text.
377 FootnoteReference {
378 /// Referenced footnote label.
379 label: String,
380 },
381 /// Inline image node.
382 Image {
383 /// Image source URL.
384 url: String,
385 /// Image alt text.
386 alt: String,
387 },
388 /// Inline code span.
389 CodeSpan(String),
390 /// Inline HTML fragment.
391 InlineHtml(String),
392 /// Hard line break (two spaces + newline or backslash + newline).
393 HardBreak,
394 /// Soft line break.
395 SoftBreak,
396
397 /// Extended user mentions.
398 ///
399 /// Syntax:
400 /// - `@username[platform]`
401 /// - `@username[platform](Display Name)`
402 ///
403 /// Rendering policy:
404 /// - The renderer may convert this to an external profile link based on
405 /// a platform mapping table.
406 PlatformMention {
407 /// Platform username/handle.
408 username: String,
409 /// Platform key, for example `github`.
410 platform: String,
411 /// Optional display label override.
412 display: Option<String>,
413 },
414 /// Inline math (LaTeX), e.g. `$E = mc^2$`.
415 ///
416 /// Rendering policy:
417 /// - Rendered using KaTeX in inline mode.
418 /// - Content is raw LaTeX source code.
419 InlineMath {
420 /// Raw inline LaTeX content.
421 content: String,
422 },
423
424 /// Display math (LaTeX), e.g. `$$\int_0^\infty e^{-x^2} dx$$`.
425 ///
426 /// Rendering policy:
427 /// - Rendered using KaTeX in display mode.
428 /// - Content is raw LaTeX source code.
429 DisplayMath {
430 /// Raw display LaTeX content.
431 content: String,
432 },
433
434 /// Mermaid diagram (code block with language="mermaid").
435 ///
436 /// Rendering policy:
437 /// - Rendered using mermaid-rs-renderer to SVG.
438 /// - Content is raw Mermaid diagram source code.
439 ///
440 /// This is created during parsing when a fenced code block has
441 /// info string "mermaid".
442 MermaidDiagram {
443 /// Raw Mermaid diagram source.
444 content: String,
445 },
446}
447
448impl Document {
449 /// Create an empty document.
450 pub fn new() -> Self {
451 Self::default()
452 }
453
454 /// Number of top-level nodes in the document.
455 pub fn len(&self) -> usize {
456 self.children.len()
457 }
458
459 /// Returns `true` when the document has no top-level nodes.
460 pub fn is_empty(&self) -> bool {
461 self.children.is_empty()
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::ReferenceMap;
468
469 #[test]
470 fn smoke_test_reference_map_first_definition_wins() {
471 let mut refs = ReferenceMap::new();
472 refs.insert("foo", "https://first.example".to_string(), None);
473 refs.insert("foo", "https://second.example".to_string(), None);
474
475 let (url, title) = refs.get("foo").expect("reference not found");
476 assert_eq!(url, "https://first.example");
477 assert_eq!(title, &None);
478 }
479
480 #[test]
481 fn smoke_test_reference_map_casefold_sharp_s() {
482 let mut refs = ReferenceMap::new();
483 refs.insert("SS", "/url".to_string(), None);
484
485 let (url, _) = refs.get("แบ").expect("reference not found");
486 assert_eq!(url, "/url");
487 }
488
489 #[test]
490 fn smoke_test_reference_map_whitespace_collapse() {
491 let mut refs = ReferenceMap::new();
492 refs.insert("Foo\n\t bar", "/url".to_string(), None);
493
494 assert!(refs.contains("foo bar"));
495 assert!(refs.contains(" FOO BAR "));
496 }
497}